venus: add experimental renderers

There are a virtio-gpu renderer and a vtest renderer. The vtest renderer must be enabled with VN_DEBUG=vtest. Signed-off-by: Chia-I Wu <olvaffe@gmail.com> Reviewed-by: Ryan Neph <ryanneph@google.com> Reviewed-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5800>
author: Chia-I Wu <olvaffe@gmail.com> 2019-10-29 11:00:00 -0700
committer: Marge Bot <eric+marge@anholt.net> 2021-04-08 17:15:37 +0000
commit: 247232d5969235b7f24b5ab9fbfc9dc5570c578e (patch)
tree: c4d32ca2c095510198e79b36ca9da633c2f943bb
parent: b5653e3414b43669bd50e0912c1d829f17cc1124 (diff)
6 files changed, 3050 insertions, 0 deletions
diff --git a/src/virtio/vulkan/meson.build b/src/virtio/vulkan/meson.build
index 4b66a2d7bc7..c17787a1c6f 100644
--- a/src/virtio/vulkan/meson.build
+++ b/src/virtio/vulkan/meson.build
@@ -34,9 +34,12 @@ libvn_files = files(
   'vn_common.c',
   'vn_device.c',
   'vn_icd.c',
+  'vn_renderer_virtgpu.c',
+  'vn_renderer_vtest.c',
 )
 
 vn_deps = [
+  dep_libdrm,
   dep_thread,
   idep_mesautil,
   idep_vulkan_util,
diff --git a/src/virtio/vulkan/vn_common.c b/src/virtio/vulkan/vn_common.c
index e167d85cb7c..f5754a5f222 100644
--- a/src/virtio/vulkan/vn_common.c
+++ b/src/virtio/vulkan/vn_common.c
@@ -25,6 +25,7 @@
 static const struct debug_control vn_debug_options[] = {
    { "init", VN_DEBUG_INIT },
    { "result", VN_DEBUG_RESULT },
+   { "vtest", VN_DEBUG_VTEST },
    { NULL, 0 },
 };
 
diff --git a/src/virtio/vulkan/vn_common.h b/src/virtio/vulkan/vn_common.h
index df226173413..614aa5f00bb 100644
--- a/src/virtio/vulkan/vn_common.h
+++ b/src/virtio/vulkan/vn_common.h
@@ -54,9 +54,14 @@ struct vn_device;
 struct vn_queue;
 struct vn_command_buffer;
 
+struct vn_renderer;
+struct vn_renderer_bo;
+struct vn_renderer_sync;
+
 enum vn_debug {
    VN_DEBUG_INIT = 1ull << 0,
    VN_DEBUG_RESULT = 1ull << 1,
+   VN_DEBUG_VTEST = 1ull << 2,
 };
 
 typedef uint64_t vn_object_id;
diff --git a/src/virtio/vulkan/vn_renderer.h b/src/virtio/vulkan/vn_renderer.h
new file mode 100644
index 00000000000..08a83cc664e
--- /dev/null
+++ b/src/virtio/vulkan/vn_renderer.h
@@ -0,0 +1,526 @@
+/*
+ * Copyright 2019 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef VN_RENDERER_H
+#define VN_RENDERER_H
+
+#include "vn_common.h"
+
+struct vn_renderer_bo_ops {
+   void (*destroy)(struct vn_renderer_bo *bo);
+
+   /* allocate a CPU shared memory as the storage */
+   VkResult (*init_cpu)(struct vn_renderer_bo *bo, VkDeviceSize size);
+
+   /* import a VkDeviceMemory as the storage */
+   VkResult (*init_gpu)(struct vn_renderer_bo *bo,
+                        VkDeviceSize size,
+                        vn_object_id mem_id,
+                        VkMemoryPropertyFlags flags,
+                        VkExternalMemoryHandleTypeFlags external_handles);
+
+   /* import a dmabuf as the storage */
+   VkResult (*init_dmabuf)(struct vn_renderer_bo *bo,
+                           VkDeviceSize size,
+                           int fd,
+                           VkMemoryPropertyFlags flags,
+                           VkExternalMemoryHandleTypeFlags external_handles);
+
+   int (*export_dmabuf)(struct vn_renderer_bo *bo);
+
+   /* map is not thread-safe */
+   void *(*map)(struct vn_renderer_bo *bo);
+
+   void (*flush)(struct vn_renderer_bo *bo,
+                 VkDeviceSize offset,
+                 VkDeviceSize size);
+   void (*invalidate)(struct vn_renderer_bo *bo,
+                      VkDeviceSize offset,
+                      VkDeviceSize size);
+};
+
+struct vn_renderer_bo {
+   atomic_int refcount;
+
+   uint32_t res_id;
+
+   struct vn_renderer_bo_ops ops;
+};
+
+enum vn_renderer_sync_flags {
+   VN_RENDERER_SYNC_SHAREABLE = 1u << 0,
+   VN_RENDERER_SYNC_BINARY = 1u << 1,
+};
+
+struct vn_renderer_sync_ops {
+   void (*destroy)(struct vn_renderer_sync *sync);
+
+   /* a sync can be initialized/released multiple times */
+   VkResult (*init)(struct vn_renderer_sync *sync,
+                    uint64_t initial_val,
+                    uint32_t flags);
+   VkResult (*init_syncobj)(struct vn_renderer_sync *sync,
+                            int fd,
+                            bool sync_file);
+   void (*release)(struct vn_renderer_sync *sync);
+
+   int (*export_syncobj)(struct vn_renderer_sync *sync, bool sync_file);
+
+   /* reset the counter */
+   VkResult (*reset)(struct vn_renderer_sync *sync, uint64_t initial_val);
+
+   /* read the current value from the counter */
+   VkResult (*read)(struct vn_renderer_sync *sync, uint64_t *val);
+
+   /* write a new value (larger than the current one) to the counter */
+   VkResult (*write)(struct vn_renderer_sync *sync, uint64_t val);
+};
+
+/*
+ * A sync consists of a uint64_t counter.  The counter can be updated by CPU
+ * or by GPU.  It can also be waited on by CPU or by GPU until it reaches
+ * certain values.
+ *
+ * This models after timeline VkSemaphore rather than timeline drm_syncobj.
+ * The main difference is that drm_syncobj can have unsignaled value 0.
+ */
+struct vn_renderer_sync {
+   uint32_t sync_id;
+
+   struct vn_renderer_sync_ops ops;
+};
+
+struct vn_renderer_info {
+   struct {
+      uint16_t vendor_id;
+      uint16_t device_id;
+
+      bool has_bus_info;
+      uint16_t domain;
+      uint8_t bus;
+      uint8_t device;
+      uint8_t function;
+   } pci;
+
+   bool has_dmabuf_import;
+   bool has_cache_management;
+   bool has_timeline_sync;
+   bool has_external_sync;
+
+   uint32_t max_sync_queue_count;
+
+   /* hw capset */
+   uint32_t wire_format_version;
+   uint32_t vk_xml_version;
+   uint32_t vk_ext_command_serialization_spec_version;
+   uint32_t vk_mesa_venus_protocol_spec_version;
+};
+
+struct vn_renderer_submit_batch {
+   const void *cs_data;
+   size_t cs_size;
+
+   /*
+    * Submit cs to the virtual sync queue identified by sync_queue_index.  The
+    * virtual queue is assumed to be associated with the physical VkQueue
+    * identified by vk_queue_id.  After the execution completes on the
+    * VkQueue, the virtual sync queue is signaled.
+    *
+    * sync_queue_index must be less than max_sync_queue_count.
+    *
+    * vk_queue_id specifies the object id of a VkQueue.
+    *
+    * When sync_queue_cpu is true, it specifies the special CPU sync queue,
+    * and sync_queue_index/vk_queue_id are ignored.  TODO revisit this later
+    */
+   uint32_t sync_queue_index;
+   bool sync_queue_cpu;
+   vn_object_id vk_queue_id;
+
+   /* syncs to update when the virtual sync queue is signaled */
+   struct vn_renderer_sync *const *syncs;
+   /* TODO allow NULL when syncs are all binary? */
+   const uint64_t *sync_values;
+   uint32_t sync_count;
+};
+
+struct vn_renderer_submit {
+   /* BOs to pin and to fence implicitly
+    *
+    * TODO track all bos and automatically pin them.  We don't do it yet
+    * because each vn_command_buffer owns a bo.  We can probably make do by
+    * returning the bos to a bo cache and exclude bo cache from pinning.
+    */
+   struct vn_renderer_bo *const *bos;
+   uint32_t bo_count;
+
+   const struct vn_renderer_submit_batch *batches;
+   uint32_t batch_count;
+};
+
+struct vn_renderer_wait {
+   bool wait_any;
+   uint64_t timeout;
+
+   struct vn_renderer_sync *const *syncs;
+   /* TODO allow NULL when syncs are all binary? */
+   const uint64_t *sync_values;
+   uint32_t sync_count;
+};
+
+struct vn_renderer_ops {
+   void (*destroy)(struct vn_renderer *renderer,
+                   const VkAllocationCallbacks *alloc);
+
+   void (*get_info)(struct vn_renderer *renderer,
+                    struct vn_renderer_info *info);
+
+   VkResult (*submit)(struct vn_renderer *renderer,
+                      const struct vn_renderer_submit *submit);
+
+   /*
+    * On success, returns VK_SUCCESS or VK_TIMEOUT.  On failure, returns
+    * VK_ERROR_DEVICE_LOST or out of device/host memory.
+    */
+   VkResult (*wait)(struct vn_renderer *renderer,
+                    const struct vn_renderer_wait *wait);
+
+   struct vn_renderer_bo *(*bo_create)(struct vn_renderer *renderer);
+
+   struct vn_renderer_sync *(*sync_create)(struct vn_renderer *renderer);
+};
+
+struct vn_renderer {
+   struct vn_renderer_ops ops;
+};
+
+VkResult
+vn_renderer_create_virtgpu(struct vn_instance *instance,
+                           const VkAllocationCallbacks *alloc,
+                           struct vn_renderer **renderer);
+
+VkResult
+vn_renderer_create_vtest(struct vn_instance *instance,
+                         const VkAllocationCallbacks *alloc,
+                         struct vn_renderer **renderer);
+
+static inline VkResult
+vn_renderer_create(struct vn_instance *instance,
+                   const VkAllocationCallbacks *alloc,
+                   struct vn_renderer **renderer)
+{
+   if (VN_DEBUG(VTEST)) {
+      VkResult result = vn_renderer_create_vtest(instance, alloc, renderer);
+      if (result == VK_SUCCESS)
+         return VK_SUCCESS;
+   }
+
+   return vn_renderer_create_virtgpu(instance, alloc, renderer);
+}
+
+static inline void
+vn_renderer_destroy(struct vn_renderer *renderer,
+                    const VkAllocationCallbacks *alloc)
+{
+   renderer->ops.destroy(renderer, alloc);
+}
+
+static inline void
+vn_renderer_get_info(struct vn_renderer *renderer,
+                     struct vn_renderer_info *info)
+{
+   renderer->ops.get_info(renderer, info);
+}
+
+static inline VkResult
+vn_renderer_submit(struct vn_renderer *renderer,
+                   const struct vn_renderer_submit *submit)
+{
+   return renderer->ops.submit(renderer, submit);
+}
+
+static inline VkResult
+vn_renderer_submit_simple(struct vn_renderer *renderer,
+                          const void *cs_data,
+                          size_t cs_size)
+{
+   const struct vn_renderer_submit submit = {
+      .batches =
+         &(const struct vn_renderer_submit_batch){
+            .cs_data = cs_data,
+            .cs_size = cs_size,
+         },
+      .batch_count = 1,
+   };
+   return vn_renderer_submit(renderer, &submit);
+}
+
+static inline VkResult
+vn_renderer_wait(struct vn_renderer *renderer,
+                 const struct vn_renderer_wait *wait)
+{
+   return renderer->ops.wait(renderer, wait);
+}
+
+static inline VkResult
+vn_renderer_bo_create_cpu(struct vn_renderer *renderer,
+                          VkDeviceSize size,
+                          struct vn_renderer_bo **_bo)
+{
+   struct vn_renderer_bo *bo = renderer->ops.bo_create(renderer);
+   if (!bo)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   VkResult result = bo->ops.init_cpu(bo, size);
+   if (result != VK_SUCCESS) {
+      bo->ops.destroy(bo);
+      return result;
+   }
+
+   atomic_init(&bo->refcount, 1);
+
+   *_bo = bo;
+   return VK_SUCCESS;
+}
+
+static inline VkResult
+vn_renderer_bo_create_gpu(struct vn_renderer *renderer,
+                          VkDeviceSize size,
+                          vn_object_id mem_id,
+                          VkMemoryPropertyFlags flags,
+                          VkExternalMemoryHandleTypeFlags external_handles,
+                          struct vn_renderer_bo **_bo)
+{
+   struct vn_renderer_bo *bo = renderer->ops.bo_create(renderer);
+   if (!bo)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   VkResult result =
+      bo->ops.init_gpu(bo, size, mem_id, flags, external_handles);
+   if (result != VK_SUCCESS) {
+      bo->ops.destroy(bo);
+      return result;
+   }
+
+   atomic_init(&bo->refcount, 1);
+
+   *_bo = bo;
+   return VK_SUCCESS;
+}
+
+static inline VkResult
+vn_renderer_bo_create_dmabuf(struct vn_renderer *renderer,
+                             VkDeviceSize size,
+                             int fd,
+                             VkMemoryPropertyFlags flags,
+                             VkExternalMemoryHandleTypeFlags external_handles,
+                             struct vn_renderer_bo **_bo)
+{
+   struct vn_renderer_bo *bo = renderer->ops.bo_create(renderer);
+   if (!bo)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   VkResult result =
+      bo->ops.init_dmabuf(bo, size, fd, flags, external_handles);
+   if (result != VK_SUCCESS) {
+      bo->ops.destroy(bo);
+      return result;
+   }
+
+   atomic_init(&bo->refcount, 1);
+
+   *_bo = bo;
+   return VK_SUCCESS;
+}
+
+static inline struct vn_renderer_bo *
+vn_renderer_bo_ref(struct vn_renderer_bo *bo)
+{
+   const int old =
+      atomic_fetch_add_explicit(&bo->refcount, 1, memory_order_relaxed);
+   assert(old >= 1);
+
+   return bo;
+}
+
+static inline bool
+vn_renderer_bo_unref(struct vn_renderer_bo *bo)
+{
+   const int old =
+      atomic_fetch_sub_explicit(&bo->refcount, 1, memory_order_release);
+   assert(old >= 1);
+
+   if (old == 1) {
+      atomic_thread_fence(memory_order_acquire);
+      bo->ops.destroy(bo);
+      return true;
+   }
+
+   return false;
+}
+
+static inline int
+vn_renderer_bo_export_dmabuf(struct vn_renderer_bo *bo)
+{
+   return bo->ops.export_dmabuf(bo);
+}
+
+static inline void *
+vn_renderer_bo_map(struct vn_renderer_bo *bo)
+{
+   return bo->ops.map(bo);
+}
+
+static inline void
+vn_renderer_bo_flush(struct vn_renderer_bo *bo,
+                     VkDeviceSize offset,
+                     VkDeviceSize end)
+{
+   bo->ops.flush(bo, offset, end);
+}
+
+static inline void
+vn_renderer_bo_invalidate(struct vn_renderer_bo *bo,
+                          VkDeviceSize offset,
+                          VkDeviceSize size)
+{
+   bo->ops.invalidate(bo, offset, size);
+}
+
+static inline VkResult
+vn_renderer_sync_create_cpu(struct vn_renderer *renderer,
+                            struct vn_renderer_sync **_sync)
+{
+   struct vn_renderer_sync *sync = renderer->ops.sync_create(renderer);
+   if (!sync)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   const uint64_t initial_val = 0;
+   const uint32_t flags = 0;
+   VkResult result = sync->ops.init(sync, initial_val, flags);
+   if (result != VK_SUCCESS) {
+      sync->ops.destroy(sync);
+      return result;
+   }
+
+   *_sync = sync;
+   return VK_SUCCESS;
+}
+
+static inline VkResult
+vn_renderer_sync_create_fence(struct vn_renderer *renderer,
+                              bool signaled,
+                              VkExternalFenceHandleTypeFlags external_handles,
+                              struct vn_renderer_sync **_sync)
+{
+   struct vn_renderer_sync *sync = renderer->ops.sync_create(renderer);
+   if (!sync)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   const uint64_t initial_val = signaled;
+   const uint32_t flags = VN_RENDERER_SYNC_BINARY |
+                          (external_handles ? VN_RENDERER_SYNC_SHAREABLE : 0);
+   VkResult result = sync->ops.init(sync, initial_val, flags);
+   if (result != VK_SUCCESS) {
+      sync->ops.destroy(sync);
+      return result;
+   }
+
+   *_sync = sync;
+   return VK_SUCCESS;
+}
+
+static inline VkResult
+vn_renderer_sync_create_semaphore(
+   struct vn_renderer *renderer,
+   VkSemaphoreType type,
+   uint64_t initial_val,
+   VkExternalSemaphoreHandleTypeFlags external_handles,
+   struct vn_renderer_sync **_sync)
+{
+   struct vn_renderer_sync *sync = renderer->ops.sync_create(renderer);
+   if (!sync)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   const uint32_t flags =
+      (external_handles ? VN_RENDERER_SYNC_SHAREABLE : 0) |
+      (type == VK_SEMAPHORE_TYPE_BINARY ? VN_RENDERER_SYNC_BINARY : 0);
+   VkResult result = sync->ops.init(sync, initial_val, flags);
+   if (result != VK_SUCCESS) {
+      sync->ops.destroy(sync);
+      return result;
+   }
+
+   *_sync = sync;
+   return VK_SUCCESS;
+}
+
+static inline VkResult
+vn_renderer_sync_create_empty(struct vn_renderer *renderer,
+                              struct vn_renderer_sync **_sync)
+{
+   struct vn_renderer_sync *sync = renderer->ops.sync_create(renderer);
+   if (!sync)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   /* no init */
+
+   *_sync = sync;
+   return VK_SUCCESS;
+}
+
+static inline void
+vn_renderer_sync_destroy(struct vn_renderer_sync *sync)
+{
+   sync->ops.destroy(sync);
+}
+
+static inline VkResult
+vn_renderer_sync_init_signaled(struct vn_renderer_sync *sync)
+{
+   const uint64_t initial_val = 1;
+   const uint32_t flags = VN_RENDERER_SYNC_BINARY;
+   return sync->ops.init(sync, initial_val, flags);
+}
+
+static inline VkResult
+vn_renderer_sync_init_syncobj(struct vn_renderer_sync *sync,
+                              int fd,
+                              bool sync_file)
+{
+   return sync->ops.init_syncobj(sync, fd, sync_file);
+}
+
+static inline void
+vn_renderer_sync_release(struct vn_renderer_sync *sync)
+{
+   sync->ops.release(sync);
+}
+
+static inline int
+vn_renderer_sync_export_syncobj(struct vn_renderer_sync *sync, bool sync_file)
+{
+   return sync->ops.export_syncobj(sync, sync_file);
+}
+
+static inline VkResult
+vn_renderer_sync_reset(struct vn_renderer_sync *sync, uint64_t initial_val)
+{
+   return sync->ops.reset(sync, initial_val);
+}
+
+static inline VkResult
+vn_renderer_sync_read(struct vn_renderer_sync *sync, uint64_t *val)
+{
+   return sync->ops.read(sync, val);
+}
+
+static inline VkResult
+vn_renderer_sync_write(struct vn_renderer_sync *sync, uint64_t val)
+{
+   return sync->ops.write(sync, val);
+}
+
+#endif /* VN_RENDERER_H */
diff --git a/src/virtio/vulkan/vn_renderer_virtgpu.c b/src/virtio/vulkan/vn_renderer_virtgpu.c
new file mode 100644
index 00000000000..c9d4184ef55
--- /dev/null
+++ b/src/virtio/vulkan/vn_renderer_virtgpu.c
@@ -0,0 +1,1470 @@
+/*
+ * Copyright 2020 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <xf86drm.h>
+
+#include "drm-uapi/virtgpu_drm.h"
+#define VIRGL_RENDERER_UNSTABLE_APIS
+#include "virtio-gpu/virglrenderer_hw.h"
+
+#include "vn_renderer.h"
+
+/* XXX WIP kernel uapi */
+#ifndef VIRTGPU_PARAM_CONTEXT_INIT
+#define VIRTGPU_PARAM_CONTEXT_INIT 6
+#define VIRTGPU_CONTEXT_PARAM_CAPSET_ID 0x0001
+struct drm_virtgpu_context_set_param {
+   __u64 param;
+   __u64 value;
+};
+struct drm_virtgpu_context_init {
+   __u32 num_params;
+   __u32 pad;
+   __u64 ctx_set_params;
+};
+#define DRM_VIRTGPU_CONTEXT_INIT 0xb
+#define DRM_IOCTL_VIRTGPU_CONTEXT_INIT                                       \
+   DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_CONTEXT_INIT,                     \
+            struct drm_virtgpu_context_init)
+#endif /* VIRTGPU_PARAM_CONTEXT_INIT */
+#ifndef VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT
+#define VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT 100
+#endif /* VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT */
+
+/* XXX comment these out to really use kernel uapi */
+#define SIMULATE_BO_SIZE_FIX 1
+//#define SIMULATE_CONTEXT_INIT 1
+#define SIMULATE_SYNCOBJ 1
+#define SIMULATE_SUBMIT 1
+
+#define VIRTGPU_PCI_VENDOR_ID 0x1af4
+#define VIRTGPU_PCI_DEVICE_ID 0x1050
+
+struct virtgpu;
+
+struct virtgpu_bo {
+   struct vn_renderer_bo base;
+   struct virtgpu *gpu;
+
+   uint32_t blob_flags;
+   VkDeviceSize size;
+
+   uint32_t gem_handle;
+   void *gem_ptr;
+};
+
+struct virtgpu_sync {
+   struct vn_renderer_sync base;
+   struct virtgpu *gpu;
+
+   /*
+    * drm_syncobj is in one of these states
+    *
+    *  - value N:      drm_syncobj has a signaled fence chain with seqno N
+    *  - pending N->M: drm_syncobj has an unsignaled fence chain with seqno M
+    *                  (which may point to another unsignaled fence chain with
+    *                   seqno between N and M, and so on)
+    *
+    * TODO Do we want to use binary drm_syncobjs?  They would be
+    *
+    *  - value 0: drm_syncobj has no fence
+    *  - value 1: drm_syncobj has a signaled fence with seqno 0
+    *
+    * They are cheaper but require special care.
+    */
+   uint32_t syncobj_handle;
+};
+
+struct virtgpu {
+   struct vn_renderer base;
+
+   struct vn_instance *instance;
+
+   int fd;
+   int version_minor;
+   drmPciBusInfo bus_info;
+
+   uint32_t max_sync_queue_count;
+
+   struct {
+      enum virgl_renderer_capset id;
+      uint32_t version;
+      struct virgl_renderer_capset_venus data;
+   } capset;
+};
+
+#ifdef SIMULATE_SYNCOBJ
+
+#include "util/hash_table.h"
+#include "util/u_idalloc.h"
+
+static struct {
+   mtx_t mutex;
+   struct hash_table *syncobjs;
+   struct util_idalloc ida;
+
+   int signaled_fd;
+} sim;
+
+struct sim_syncobj {
+   mtx_t mutex;
+   uint64_t point;
+
+   int pending_fd;
+   uint64_t pending_point;
+   bool pending_cpu;
+};
+
+static uint32_t
+sim_syncobj_create(struct virtgpu *gpu, bool signaled)
+{
+   struct sim_syncobj *syncobj = calloc(1, sizeof(*syncobj));
+   if (!syncobj)
+      return 0;
+
+   mtx_init(&syncobj->mutex, mtx_plain);
+   syncobj->pending_fd = -1;
+
+   mtx_lock(&sim.mutex);
+
+   /* initialize lazily */
+   if (!sim.syncobjs) {
+      sim.syncobjs = _mesa_pointer_hash_table_create(NULL);
+      if (!sim.syncobjs) {
+         mtx_unlock(&sim.mutex);
+         return 0;
+      }
+
+      util_idalloc_init(&sim.ida);
+      util_idalloc_resize(&sim.ida, 32);
+
+      struct drm_virtgpu_execbuffer args = {
+         .flags = VIRTGPU_EXECBUF_FENCE_FD_OUT,
+      };
+      int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
+      if (ret || args.fence_fd < 0) {
+         _mesa_hash_table_destroy(sim.syncobjs, NULL);
+         sim.syncobjs = NULL;
+         mtx_unlock(&sim.mutex);
+         return 0;
+      }
+
+      sim.signaled_fd = args.fence_fd;
+   }
+
+   const unsigned syncobj_handle = util_idalloc_alloc(&sim.ida) + 1;
+   _mesa_hash_table_insert(sim.syncobjs,
+                           (const void *)(uintptr_t)syncobj_handle, syncobj);
+
+   mtx_unlock(&sim.mutex);
+
+   return syncobj_handle;
+}
+
+static void
+sim_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
+{
+   struct sim_syncobj *syncobj = NULL;
+
+   mtx_lock(&sim.mutex);
+
+   struct hash_entry *entry = _mesa_hash_table_search(
+      sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
+   if (entry) {
+      syncobj = entry->data;
+      _mesa_hash_table_remove(sim.syncobjs, entry);
+      util_idalloc_free(&sim.ida, syncobj_handle - 1);
+   }
+
+   mtx_unlock(&sim.mutex);
+
+   if (syncobj) {
+      if (syncobj->pending_fd >= 0)
+         close(syncobj->pending_fd);
+      mtx_destroy(&syncobj->mutex);
+      free(syncobj);
+   }
+}
+
+static VkResult
+sim_syncobj_poll(int fd, int poll_timeout)
+{
+   struct pollfd pollfd = {
+      .fd = fd,
+      .events = POLLIN,
+   };
+   int ret;
+   do {
+      ret = poll(&pollfd, 1, poll_timeout);
+   } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
+
+   if (ret < 0 || (ret > 0 && !(pollfd.revents & POLLIN))) {
+      return (ret < 0 && errno == ENOMEM) ? VK_ERROR_OUT_OF_HOST_MEMORY
+                                          : VK_ERROR_DEVICE_LOST;
+   }
+
+   return ret ? VK_SUCCESS : VK_TIMEOUT;
+}
+
+static void
+sim_syncobj_set_point_locked(struct sim_syncobj *syncobj, uint64_t point)
+{
+   syncobj->point = point;
+
+   if (syncobj->pending_fd >= 0) {
+      close(syncobj->pending_fd);
+      syncobj->pending_fd = -1;
+      syncobj->pending_point = point;
+   }
+}
+
+static void
+sim_syncobj_update_point_locked(struct sim_syncobj *syncobj, int poll_timeout)
+{
+   if (syncobj->pending_fd >= 0) {
+      VkResult result;
+      if (syncobj->pending_cpu) {
+         if (poll_timeout == -1) {
+            const int max_cpu_timeout = 2000;
+            poll_timeout = max_cpu_timeout;
+            result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
+            if (result == VK_TIMEOUT) {
+               vn_log(NULL, "cpu sync timed out after %dms; ignoring",
+                      poll_timeout);
+               result = VK_SUCCESS;
+            }
+         } else {
+            result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
+         }
+      } else {
+         result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
+      }
+      if (result == VK_SUCCESS) {
+         close(syncobj->pending_fd);
+         syncobj->pending_fd = -1;
+         syncobj->point = syncobj->pending_point;
+      }
+   }
+}
+
+static struct sim_syncobj *
+sim_syncobj_lookup(struct virtgpu *gpu, uint32_t syncobj_handle)
+{
+   struct sim_syncobj *syncobj = NULL;
+
+   mtx_lock(&sim.mutex);
+   struct hash_entry *entry = _mesa_hash_table_search(
+      sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
+   if (entry)
+      syncobj = entry->data;
+   mtx_unlock(&sim.mutex);
+
+   return syncobj;
+}
+
+static int
+sim_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
+{
+   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
+   if (!syncobj)
+      return -1;
+
+   mtx_lock(&syncobj->mutex);
+   sim_syncobj_set_point_locked(syncobj, 0);
+   mtx_unlock(&syncobj->mutex);
+
+   return 0;
+}
+
+static int
+sim_syncobj_query(struct virtgpu *gpu,
+                  uint32_t syncobj_handle,
+                  uint64_t *point)
+{
+   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
+   if (!syncobj)
+      return -1;
+
+   mtx_lock(&syncobj->mutex);
+   sim_syncobj_update_point_locked(syncobj, 0);
+   *point = syncobj->point;
+   mtx_unlock(&syncobj->mutex);
+
+   return 0;
+}
+
+static int
+sim_syncobj_signal(struct virtgpu *gpu,
+                   uint32_t syncobj_handle,
+                   uint64_t point)
+{
+   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
+   if (!syncobj)
+      return -1;
+
+   mtx_lock(&syncobj->mutex);
+   sim_syncobj_set_point_locked(syncobj, point);
+   mtx_unlock(&syncobj->mutex);
+
+   return 0;
+}
+
+static int
+sim_syncobj_submit(struct virtgpu *gpu,
+                   uint32_t syncobj_handle,
+                   int sync_fd,
+                   uint64_t point,
+                   bool cpu)
+{
+   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
+   if (!syncobj)
+      return -1;
+
+   int pending_fd = dup(sync_fd);
+   if (pending_fd < 0) {
+      vn_log(gpu->instance, "failed to dup sync fd");
+      return -1;
+   }
+
+   mtx_lock(&syncobj->mutex);
+
+   if (syncobj->pending_fd >= 0) {
+      mtx_unlock(&syncobj->mutex);
+
+      /* TODO */
+      vn_log(gpu->instance, "sorry, no simulated timeline semaphore");
+      close(pending_fd);
+      return -1;
+   }
+   if (syncobj->point >= point)
+      vn_log(gpu->instance, "non-monotonic signaling");
+
+   syncobj->pending_fd = pending_fd;
+   syncobj->pending_point = point;
+   syncobj->pending_cpu = cpu;
+
+   mtx_unlock(&syncobj->mutex);
+
+   return 0;
+}
+
+static int
+timeout_to_poll_timeout(uint64_t timeout)
+{
+   const uint64_t ns_per_ms = 1000000;
+   const uint64_t ms = (timeout + ns_per_ms - 1) / ns_per_ms;
+   if (!ms && timeout)
+      return -1;
+   return ms <= INT_MAX ? ms : -1;
+}
+
+static int
+sim_syncobj_wait(struct virtgpu *gpu,
+                 const struct vn_renderer_wait *wait,
+                 bool wait_avail)
+{
+   if (wait_avail)
+      return -1;
+
+   const int poll_timeout = timeout_to_poll_timeout(wait->timeout);
+
+   /* TODO poll all fds at the same time */
+   for (uint32_t i = 0; i < wait->sync_count; i++) {
+      struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
+      const uint64_t point = wait->sync_values[i];
+
+      struct sim_syncobj *syncobj =
+         sim_syncobj_lookup(gpu, sync->syncobj_handle);
+      if (!syncobj)
+         return -1;
+
+      mtx_lock(&syncobj->mutex);
+
+      if (syncobj->point < point)
+         sim_syncobj_update_point_locked(syncobj, poll_timeout);
+
+      if (syncobj->point < point) {
+         if (wait->wait_any && i < wait->sync_count - 1 &&
+             syncobj->pending_fd < 0) {
+            mtx_unlock(&syncobj->mutex);
+            continue;
+         }
+         errno = ETIME;
+         mtx_unlock(&syncobj->mutex);
+         return -1;
+      }
+
+      mtx_unlock(&syncobj->mutex);
+
+      if (wait->wait_any)
+         break;
+
+      /* TODO adjust poll_timeout */
+   }
+
+   return 0;
+}
+
+static int
+sim_syncobj_export(struct virtgpu *gpu, uint32_t syncobj_handle)
+{
+   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
+   if (!syncobj)
+      return -1;
+
+   int fd = -1;
+   mtx_lock(&syncobj->mutex);
+   if (syncobj->pending_fd >= 0)
+      fd = dup(syncobj->pending_fd);
+   else
+      fd = dup(sim.signaled_fd);
+   mtx_unlock(&syncobj->mutex);
+
+   return fd;
+}
+
+static uint32_t
+sim_syncobj_import(struct virtgpu *gpu, uint32_t syncobj_handle, int fd)
+{
+   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
+   if (!syncobj)
+      return 0;
+
+   if (sim_syncobj_submit(gpu, syncobj_handle, fd, 1, false))
+      return 0;
+
+   return syncobj_handle;
+}
+
+#endif /* SIMULATE_SYNCOBJ */
+
+#ifdef SIMULATE_SUBMIT
+
+static int
+sim_submit_signal_syncs(struct virtgpu *gpu,
+                        int sync_fd,
+                        struct vn_renderer_sync *const *syncs,
+                        const uint64_t *sync_values,
+                        uint32_t sync_count,
+                        bool cpu)
+{
+   for (uint32_t i = 0; i < sync_count; i++) {
+      struct virtgpu_sync *sync = (struct virtgpu_sync *)syncs[i];
+      const uint64_t pending_point = sync_values[i];
+
+#ifdef SIMULATE_SYNCOBJ
+      int ret = sim_syncobj_submit(gpu, sync->syncobj_handle, sync_fd,
+                                   pending_point, cpu);
+      if (ret)
+         return ret;
+#else
+      /* we can in theory do a DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE followed by a
+       * DRM_IOCTL_SYNCOBJ_TRANSFER
+       */
+      return -1;
+#endif
+   }
+
+   return 0;
+}
+
+static uint32_t *
+sim_submit_alloc_gem_handles(struct vn_renderer_bo *const *bos,
+                             uint32_t bo_count)
+{
+   uint32_t *gem_handles = malloc(sizeof(*gem_handles) * bo_count);
+   if (!gem_handles)
+      return NULL;
+
+   for (uint32_t i = 0; i < bo_count; i++) {
+      struct virtgpu_bo *bo = (struct virtgpu_bo *)bos[i];
+      gem_handles[i] = bo->gem_handle;
+   }
+
+   return gem_handles;
+}
+
+static int
+sim_submit(struct virtgpu *gpu, const struct vn_renderer_submit *submit)
+{
+   /* TODO replace submit->bos by submit->gem_handles to avoid malloc/loop */
+   uint32_t *gem_handles = NULL;
+   if (submit->bo_count) {
+      gem_handles =
+         sim_submit_alloc_gem_handles(submit->bos, submit->bo_count);
+      if (!gem_handles)
+         return -1;
+   }
+
+   int ret = 0;
+   for (uint32_t i = 0; i < submit->batch_count; i++) {
+      const struct vn_renderer_submit_batch *batch = &submit->batches[i];
+
+      struct drm_virtgpu_execbuffer args = {
+         .flags = batch->sync_count ? VIRTGPU_EXECBUF_FENCE_FD_OUT : 0,
+         .size = batch->cs_size,
+         .command = (uintptr_t)batch->cs_data,
+         .bo_handles = (uintptr_t)gem_handles,
+         .num_bo_handles = submit->bo_count,
+      };
+
+      ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
+      if (ret) {
+         vn_log(gpu->instance, "failed to execbuffer: %s", strerror(errno));
+         break;
+      }
+
+      if (batch->sync_count) {
+         ret = sim_submit_signal_syncs(gpu, args.fence_fd, batch->syncs,
+                                       batch->sync_values, batch->sync_count,
+                                       batch->sync_queue_cpu);
+         close(args.fence_fd);
+         if (ret)
+            break;
+      }
+   }
+
+   free(gem_handles);
+
+   return ret;
+}
+
+#endif /* SIMULATE_SUBMIT */
+
+static int
+virtgpu_ioctl(struct virtgpu *gpu, unsigned long request, void *args)
+{
+   return drmIoctl(gpu->fd, request, args);
+}
+
+static uint64_t
+virtgpu_ioctl_getparam(struct virtgpu *gpu, uint64_t param)
+{
+#ifdef SIMULATE_CONTEXT_INIT
+   if (param == VIRTGPU_PARAM_CONTEXT_INIT)
+      return 1;
+#endif
+#ifdef SIMULATE_SUBMIT
+   if (param == VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT)
+      return 16;
+#endif
+
+   /* val must be zeroed because kernel only writes the lower 32 bits */
+   uint64_t val = 0;
+   struct drm_virtgpu_getparam args = {
+      .param = param,
+      .value = (uintptr_t)&val,
+   };
+
+   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GETPARAM, &args);
+   return ret ? 0 : val;
+}
+
+static int
+virtgpu_ioctl_get_caps(struct virtgpu *gpu,
+                       enum virgl_renderer_capset id,
+                       uint32_t version,
+                       void *capset,
+                       size_t capset_size)
+{
+#ifdef SIMULATE_CONTEXT_INIT
+   if (id == VIRGL_RENDERER_CAPSET_VENUS && version == 0)
+      return 0;
+#endif
+
+   struct drm_virtgpu_get_caps args = {
+      .cap_set_id = id,
+      .cap_set_ver = version,
+      .addr = (uintptr_t)capset,
+      .size = capset_size,
+   };
+
+   return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GET_CAPS, &args);
+}
+
+static int
+virtgpu_ioctl_context_init(struct virtgpu *gpu,
+                           enum virgl_renderer_capset capset_id)
+{
+#ifdef SIMULATE_CONTEXT_INIT
+   if (capset_id == VIRGL_RENDERER_CAPSET_VENUS)
+      return 0;
+#endif
+
+   struct drm_virtgpu_context_init args = {
+      .num_params = 1,
+      .ctx_set_params = (uintptr_t) &
+                        (struct drm_virtgpu_context_set_param){
+                           .param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID,
+                           .value = capset_id,
+                        },
+   };
+
+   return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args);
+}
+
+static uint32_t
+virtgpu_ioctl_resource_create_blob(struct virtgpu *gpu,
+                                   uint32_t blob_mem,
+                                   uint32_t blob_flags,
+                                   size_t blob_size,
+                                   uint64_t blob_id,
+                                   uint32_t *res_id)
+{
+#ifdef SIMULATE_BO_SIZE_FIX
+   blob_size = align64(blob_size, 4096);
+#endif
+
+   struct drm_virtgpu_resource_create_blob args = {
+      .blob_mem = blob_mem,
+      .blob_flags = blob_flags,
+      .size = blob_size,
+      .blob_id = blob_id,
+   };
+
+   if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args))
+      return 0;
+
+   *res_id = args.res_handle;
+   return args.bo_handle;
+}
+
+static int
+virtgpu_ioctl_resource_info(struct virtgpu *gpu,
+                            uint32_t gem_handle,
+                            struct drm_virtgpu_resource_info *info)
+{
+   *info = (struct drm_virtgpu_resource_info){
+      .bo_handle = gem_handle,
+   };
+
+   return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_INFO, info);
+}
+
+static void
+virtgpu_ioctl_gem_close(struct virtgpu *gpu, uint32_t gem_handle)
+{
+   struct drm_gem_close args = {
+      .handle = gem_handle,
+   };
+
+   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_GEM_CLOSE, &args);
+   assert(!ret);
+}
+
+static int
+virtgpu_ioctl_prime_handle_to_fd(struct virtgpu *gpu,
+                                 uint32_t gem_handle,
+                                 bool mappable)
+{
+   struct drm_prime_handle args = {
+      .handle = gem_handle,
+      .flags = DRM_CLOEXEC | (mappable ? DRM_RDWR : 0),
+   };
+
+   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
+   return ret ? -1 : args.fd;
+}
+
+static uint32_t
+virtgpu_ioctl_prime_fd_to_handle(struct virtgpu *gpu, int fd)
+{
+   struct drm_prime_handle args = {
+      .fd = fd,
+   };
+
+   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args);
+   return ret ? 0 : args.handle;
+}
+
+static void *
+virtgpu_ioctl_map(struct virtgpu *gpu, uint32_t gem_handle, size_t size)
+{
+   struct drm_virtgpu_map args = {
+      .handle = gem_handle,
+   };
+
+   if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_MAP, &args))
+      return NULL;
+
+   void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gpu->fd,
+                    args.offset);
+   if (ptr == MAP_FAILED)
+      return NULL;
+
+   return ptr;
+}
+
+static uint32_t
+virtgpu_ioctl_syncobj_create(struct virtgpu *gpu, bool signaled)
+{
+#ifdef SIMULATE_SYNCOBJ
+   return sim_syncobj_create(gpu, signaled);
+#endif
+
+   struct drm_syncobj_create args = {
+      .flags = signaled ? DRM_SYNCOBJ_CREATE_SIGNALED : 0,
+   };
+
+   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_CREATE, &args);
+   return ret ? 0 : args.handle;
+}
+
+static void
+virtgpu_ioctl_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
+{
+#ifdef SIMULATE_SYNCOBJ
+   sim_syncobj_destroy(gpu, syncobj_handle);
+   return;
+#endif
+
+   struct drm_syncobj_destroy args = {
+      .handle = syncobj_handle,
+   };
+
+   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_DESTROY, &args);
+   assert(!ret);
+}
+
+static int
+virtgpu_ioctl_syncobj_handle_to_fd(struct virtgpu *gpu,
+                                   uint32_t syncobj_handle,
+                                   bool sync_file)
+{
+#ifdef SIMULATE_SYNCOBJ
+   return sync_file ? sim_syncobj_export(gpu, syncobj_handle) : -1;
+#endif
+
+   struct drm_syncobj_handle args = {
+      .handle = syncobj_handle,
+      .flags =
+         sync_file ? DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE : 0,
+   };
+
+   int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
+   if (ret)
+      return -1;
+
+   return args.fd;
+}
+
+static uint32_t
+virtgpu_ioctl_syncobj_fd_to_handle(struct virtgpu *gpu,
+                                   int fd,
+                                   uint32_t syncobj_handle)
+{
+#ifdef SIMULATE_SYNCOBJ
+   return syncobj_handle ? sim_syncobj_import(gpu, syncobj_handle, fd) : 0;
+#endif
+
+   struct drm_syncobj_handle args = {
+      .handle = syncobj_handle,
+      .flags =
+         syncobj_handle ? DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE : 0,
+      .fd = fd,
+   };
+
+   int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &args);
+   if (ret)
+      return 0;
+
+   return args.handle;
+}
+
+static int
+virtgpu_ioctl_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
+{
+#ifdef SIMULATE_SYNCOBJ
+   return sim_syncobj_reset(gpu, syncobj_handle);
+#endif
+
+   struct drm_syncobj_array args = {
+      .handles = (uintptr_t)&syncobj_handle,
+      .count_handles = 1,
+   };
+
+   return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_RESET, &args);
+}
+
+static int
+virtgpu_ioctl_syncobj_query(struct virtgpu *gpu,
+                            uint32_t syncobj_handle,
+                            uint64_t *point)
+{
+#ifdef SIMULATE_SYNCOBJ
+   return sim_syncobj_query(gpu, syncobj_handle, point);
+#endif
+
+   struct drm_syncobj_timeline_array args = {
+      .handles = (uintptr_t)&syncobj_handle,
+      .points = (uintptr_t)point,
+      .count_handles = 1,
+   };
+
+   return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_QUERY, &args);
+}
+
+static int
+virtgpu_ioctl_syncobj_timeline_signal(struct virtgpu *gpu,
+                                      uint32_t syncobj_handle,
+                                      uint64_t point)
+{
+#ifdef SIMULATE_SYNCOBJ
+   return sim_syncobj_signal(gpu, syncobj_handle, point);
+#endif
+
+   struct drm_syncobj_timeline_array args = {
+      .handles = (uintptr_t)&syncobj_handle,
+      .points = (uintptr_t)&point,
+      .count_handles = 1,
+   };
+
+   return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, &args);
+}
+
+static int
+virtgpu_ioctl_syncobj_timeline_wait(struct virtgpu *gpu,
+                                    const struct vn_renderer_wait *wait,
+                                    bool wait_avail)
+{
+#ifdef SIMULATE_SYNCOBJ
+   return sim_syncobj_wait(gpu, wait, wait_avail);
+#endif
+
+   /* always enable wait-before-submit */
+   uint32_t flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+   if (!wait->wait_any)
+      flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
+   /* wait for fences to appear instead of signaling */
+   if (wait_avail)
+      flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE;
+
+   /* TODO replace wait->syncs by wait->sync_handles to avoid malloc/loop */
+   uint32_t *syncobj_handles =
+      malloc(sizeof(*syncobj_handles) * wait->sync_count);
+   if (!syncobj_handles)
+      return -1;
+   for (uint32_t i = 0; i < wait->sync_count; i++) {
+      struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
+      syncobj_handles[i] = sync->syncobj_handle;
+   }
+
+   struct drm_syncobj_timeline_wait args = {
+      .handles = (uintptr_t)syncobj_handles,
+      .points = (uintptr_t)wait->sync_values,
+      .timeout_nsec = os_time_get_absolute_timeout(wait->timeout),
+      .count_handles = wait->sync_count,
+      .flags = flags,
+   };
+
+   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &args);
+
+   free(syncobj_handles);
+
+   return ret;
+}
+
+static int
+virtgpu_ioctl_submit(struct virtgpu *gpu,
+                     const struct vn_renderer_submit *submit)
+{
+#ifdef SIMULATE_SUBMIT
+   return sim_submit(gpu, submit);
+#endif
+   return -1;
+}
+
+static VkResult
+virtgpu_sync_write(struct vn_renderer_sync *_sync, uint64_t val)
+{
+   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
+   struct virtgpu *gpu = (struct virtgpu *)sync->gpu;
+
+   const int ret =
+      virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle, val);
+
+   return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
+}
+
+static VkResult
+virtgpu_sync_read(struct vn_renderer_sync *_sync, uint64_t *val)
+{
+   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
+   struct virtgpu *gpu = (struct virtgpu *)sync->gpu;
+
+   const int ret =
+      virtgpu_ioctl_syncobj_query(gpu, sync->syncobj_handle, val);
+
+   return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
+}
+
+static VkResult
+virtgpu_sync_reset(struct vn_renderer_sync *_sync, uint64_t initial_val)
+{
+   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
+   struct virtgpu *gpu = (struct virtgpu *)sync->gpu;
+
+   int ret = virtgpu_ioctl_syncobj_reset(gpu, sync->syncobj_handle);
+   if (!ret) {
+      ret = virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle,
+                                                  initial_val);
+   }
+
+   return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
+}
+
+static int
+virtgpu_sync_export_syncobj(struct vn_renderer_sync *_sync, bool sync_file)
+{
+   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
+   struct virtgpu *gpu = (struct virtgpu *)sync->gpu;
+
+   return virtgpu_ioctl_syncobj_handle_to_fd(gpu, sync->syncobj_handle,
+                                             sync_file);
+}
+
+static void
+virtgpu_sync_release(struct vn_renderer_sync *_sync)
+{
+   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
+   struct virtgpu *gpu = (struct virtgpu *)sync->gpu;
+
+   virtgpu_ioctl_syncobj_destroy(gpu, sync->syncobj_handle);
+
+   sync->syncobj_handle = 0;
+   sync->base.sync_id = 0;
+}
+
+static VkResult
+virtgpu_sync_init_syncobj(struct vn_renderer_sync *_sync,
+                          int fd,
+                          bool sync_file)
+{
+   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
+   struct virtgpu *gpu = (struct virtgpu *)sync->gpu;
+
+   uint32_t syncobj_handle;
+   if (sync_file) {
+      syncobj_handle = virtgpu_ioctl_syncobj_create(gpu, false);
+      if (!syncobj_handle)
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+      if (!virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, syncobj_handle)) {
+         virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
+         return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+      }
+   } else {
+      syncobj_handle = virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, 0);
+      if (!syncobj_handle)
+         return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+   }
+
+   sync->syncobj_handle = syncobj_handle;
+   sync->base.sync_id = 0; /* TODO */
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+virtgpu_sync_init(struct vn_renderer_sync *_sync,
+                  uint64_t initial_val,
+                  uint32_t flags)
+{
+   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
+   struct virtgpu *gpu = (struct virtgpu *)sync->gpu;
+
+   /* TODO */
+   if (flags & VN_RENDERER_SYNC_SHAREABLE)
+      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
+   /* always false because we don't use binary drm_syncobjs */
+   const bool signaled = false;
+   sync->syncobj_handle = virtgpu_ioctl_syncobj_create(gpu, signaled);
+   if (!sync->syncobj_handle)
+      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
+   /* add a signaled fence chain with seqno initial_val */
+   const int ret = virtgpu_ioctl_syncobj_timeline_signal(
+      gpu, sync->syncobj_handle, initial_val);
+   if (ret) {
+      virtgpu_sync_release(&sync->base);
+      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+   }
+
+   /* we will have a sync_id when shareable is true and virtio-gpu associates
+    * a host sync object with guest drm_syncobj
+    */
+   sync->base.sync_id = 0;
+
+   return VK_SUCCESS;
+}
+
+static void
+virtgpu_sync_destroy(struct vn_renderer_sync *_sync)
+{
+   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
+
+   if (sync->syncobj_handle)
+      virtgpu_sync_release(&sync->base);
+
+   free(sync);
+}
+
+static struct vn_renderer_sync *
+virtgpu_sync_create(struct vn_renderer *renderer)
+{
+   struct virtgpu *gpu = (struct virtgpu *)renderer;
+
+   struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
+   if (!sync)
+      return NULL;
+
+   sync->gpu = gpu;
+
+   sync->base.ops.destroy = virtgpu_sync_destroy;
+   sync->base.ops.init = virtgpu_sync_init;
+   sync->base.ops.init_syncobj = virtgpu_sync_init_syncobj;
+   sync->base.ops.release = virtgpu_sync_release;
+   sync->base.ops.export_syncobj = virtgpu_sync_export_syncobj;
+   sync->base.ops.reset = virtgpu_sync_reset;
+   sync->base.ops.read = virtgpu_sync_read;
+   sync->base.ops.write = virtgpu_sync_write;
+
+   return &sync->base;
+}
+
+static void
+virtgpu_bo_invalidate(struct vn_renderer_bo *bo,
+                      VkDeviceSize offset,
+                      VkDeviceSize size)
+{
+   /* nop because kernel makes every mapping coherent */
+}
+
+static void
+virtgpu_bo_flush(struct vn_renderer_bo *bo,
+                 VkDeviceSize offset,
+                 VkDeviceSize size)
+{
+   /* nop because kernel makes every mapping coherent */
+}
+
+static void *
+virtgpu_bo_map(struct vn_renderer_bo *_bo)
+{
+   struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
+   struct virtgpu *gpu = bo->gpu;
+   const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
+
+   /* not thread-safe but is fine */
+   if (!bo->gem_ptr && mappable)
+      bo->gem_ptr = virtgpu_ioctl_map(gpu, bo->gem_handle, bo->size);
+
+   return bo->gem_ptr;
+}
+
+static int
+virtgpu_bo_export_dmabuf(struct vn_renderer_bo *_bo)
+{
+   struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
+   struct virtgpu *gpu = bo->gpu;
+   const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
+   const bool shareable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
+
+   return shareable
+             ? virtgpu_ioctl_prime_handle_to_fd(gpu, bo->gem_handle, mappable)
+             : -1;
+}
+
+static uint32_t
+virtgpu_bo_blob_flags(VkMemoryPropertyFlags flags,
+                      VkExternalMemoryHandleTypeFlags external_handles)
+{
+   uint32_t blob_flags = 0;
+   if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+      blob_flags |= VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
+   if (external_handles)
+      blob_flags |= VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
+   if (external_handles & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT)
+      blob_flags |= VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE;
+
+   return blob_flags;
+}
+
+static VkResult
+virtgpu_bo_init_dmabuf(struct vn_renderer_bo *_bo,
+                       VkDeviceSize size,
+                       int fd,
+                       VkMemoryPropertyFlags flags,
+                       VkExternalMemoryHandleTypeFlags external_handles)
+{
+   struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
+   struct virtgpu *gpu = bo->gpu;
+
+   const uint32_t gem_handle = virtgpu_ioctl_prime_fd_to_handle(gpu, fd);
+   if (!gem_handle)
+      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+   struct drm_virtgpu_resource_info info;
+   if (virtgpu_ioctl_resource_info(gpu, gem_handle, &info) ||
+       info.blob_mem != VIRTGPU_BLOB_MEM_HOST3D || info.size < size) {
+      virtgpu_ioctl_gem_close(gpu, gem_handle);
+      return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+   }
+
+   bo->blob_flags = virtgpu_bo_blob_flags(flags, external_handles);
+   bo->size = size ? size : info.size;
+   bo->gem_handle = gem_handle;
+   bo->base.res_id = info.res_handle;
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+virtgpu_bo_init_gpu(struct vn_renderer_bo *_bo,
+                    VkDeviceSize size,
+                    vn_object_id mem_id,
+                    VkMemoryPropertyFlags flags,
+                    VkExternalMemoryHandleTypeFlags external_handles)
+{
+   struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
+   struct virtgpu *gpu = bo->gpu;
+
+   bo->blob_flags = virtgpu_bo_blob_flags(flags, external_handles);
+   bo->size = size;
+
+   /* TODO work around KVM_SET_USER_MEMORY_REGION slot limit */
+   bo->gem_handle = virtgpu_ioctl_resource_create_blob(
+      gpu, VIRTGPU_BLOB_MEM_HOST3D, bo->blob_flags, bo->size, mem_id,
+      &bo->base.res_id);
+
+   return bo->gem_handle ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
+}
+
+static VkResult
+virtgpu_bo_init_cpu(struct vn_renderer_bo *_bo, VkDeviceSize size)
+{
+   struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
+   struct virtgpu *gpu = bo->gpu;
+
+   bo->blob_flags = VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
+   bo->size = size;
+
+   bo->gem_handle = virtgpu_ioctl_resource_create_blob(
+      gpu, VIRTGPU_BLOB_MEM_GUEST, bo->blob_flags, bo->size, 0,
+      &bo->base.res_id);
+
+   return bo->gem_handle ? VK_SUCCESS : VK_ERROR_OUT_OF_HOST_MEMORY;
+}
+
+static void
+virtgpu_bo_destroy(struct vn_renderer_bo *_bo)
+{
+   struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
+   struct virtgpu *gpu = bo->gpu;
+
+   if (bo->gem_handle) {
+      if (bo->gem_ptr)
+         munmap(bo->gem_ptr, bo->size);
+      virtgpu_ioctl_gem_close(gpu, bo->gem_handle);
+   }
+
+   free(bo);
+}
+
+static struct vn_renderer_bo *
+virtgpu_bo_create(struct vn_renderer *renderer)
+{
+   struct virtgpu *gpu = (struct virtgpu *)renderer;
+
+   struct virtgpu_bo *bo = calloc(1, sizeof(*bo));
+   if (!bo)
+      return NULL;
+
+   bo->gpu = gpu;
+
+   bo->base.ops.destroy = virtgpu_bo_destroy;
+   bo->base.ops.init_cpu = virtgpu_bo_init_cpu;
+   bo->base.ops.init_gpu = virtgpu_bo_init_gpu;
+   bo->base.ops.init_dmabuf = virtgpu_bo_init_dmabuf;
+   bo->base.ops.export_dmabuf = virtgpu_bo_export_dmabuf;
+   bo->base.ops.map = virtgpu_bo_map;
+   bo->base.ops.flush = virtgpu_bo_flush;
+   bo->base.ops.invalidate = virtgpu_bo_invalidate;
+
+   return &bo->base;
+}
+
+static VkResult
+virtgpu_wait(struct vn_renderer *renderer,
+             const struct vn_renderer_wait *wait)
+{
+   struct virtgpu *gpu = (struct virtgpu *)renderer;
+
+   const int ret = virtgpu_ioctl_syncobj_timeline_wait(gpu, wait, false);
+   if (ret && errno != ETIME)
+      return VK_ERROR_DEVICE_LOST;
+
+   return ret ? VK_TIMEOUT : VK_SUCCESS;
+}
+
+static VkResult
+virtgpu_submit(struct vn_renderer *renderer,
+               const struct vn_renderer_submit *submit)
+{
+   struct virtgpu *gpu = (struct virtgpu *)renderer;
+
+   const int ret = virtgpu_ioctl_submit(gpu, submit);
+   return ret ? VK_ERROR_DEVICE_LOST : VK_SUCCESS;
+}
+
+static void
+virtgpu_get_info(struct vn_renderer *renderer, struct vn_renderer_info *info)
+{
+   struct virtgpu *gpu = (struct virtgpu *)renderer;
+
+   memset(info, 0, sizeof(*info));
+
+   info->pci.vendor_id = VIRTGPU_PCI_VENDOR_ID;
+   info->pci.device_id = VIRTGPU_PCI_DEVICE_ID;
+
+   info->pci.has_bus_info = true;
+   info->pci.domain = gpu->bus_info.domain;
+   info->pci.bus = gpu->bus_info.bus;
+   info->pci.device = gpu->bus_info.dev;
+   info->pci.function = gpu->bus_info.func;
+
+   info->has_dmabuf_import = true;
+   /* Kernel makes every mapping coherent.  We are better off filtering
+    * incoherent memory types out than silently making them coherent.
+    */
+   info->has_cache_management = false;
+   /* TODO drm_syncobj */
+   info->has_timeline_sync = false;
+   info->has_external_sync = false;
+
+   info->max_sync_queue_count = gpu->max_sync_queue_count;
+
+   const struct virgl_renderer_capset_venus *capset = &gpu->capset.data;
+   info->wire_format_version = capset->wire_format_version;
+   info->vk_xml_version = capset->vk_xml_version;
+   info->vk_ext_command_serialization_spec_version =
+      capset->vk_ext_command_serialization_spec_version;
+   info->vk_mesa_venus_protocol_spec_version =
+      capset->vk_mesa_venus_protocol_spec_version;
+}
+
+static void
+virtgpu_destroy(struct vn_renderer *renderer,
+                const VkAllocationCallbacks *alloc)
+{
+   struct virtgpu *gpu = (struct virtgpu *)renderer;
+
+   if (gpu->fd >= 0)
+      close(gpu->fd);
+
+   vk_free(alloc, gpu);
+}
+
+static VkResult
+virtgpu_init_context(struct virtgpu *gpu)
+{
+   assert(!gpu->capset.version);
+   const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id);
+   if (ret) {
+      if (VN_DEBUG(INIT)) {
+         vn_log(gpu->instance, "failed to initialize context: %s",
+                strerror(errno));
+      }
+      return VK_ERROR_INITIALIZATION_FAILED;
+   }
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+virtgpu_init_capset(struct virtgpu *gpu)
+{
+   gpu->capset.id = VIRGL_RENDERER_CAPSET_VENUS;
+   gpu->capset.version = 0;
+
+   const int ret =
+      virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version,
+                             &gpu->capset.data, sizeof(gpu->capset.data));
+   if (ret) {
+      if (VN_DEBUG(INIT)) {
+         vn_log(gpu->instance, "failed to get venus v%d capset: %s",
+                gpu->capset.version, strerror(errno));
+      }
+      return VK_ERROR_INITIALIZATION_FAILED;
+   }
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+virtgpu_init_params(struct virtgpu *gpu)
+{
+   const uint64_t required_params[] = {
+      VIRTGPU_PARAM_3D_FEATURES,   VIRTGPU_PARAM_CAPSET_QUERY_FIX,
+      VIRTGPU_PARAM_RESOURCE_BLOB, VIRTGPU_PARAM_HOST_VISIBLE,
+      VIRTGPU_PARAM_CROSS_DEVICE,  VIRTGPU_PARAM_CONTEXT_INIT,
+   };
+   uint64_t val;
+   for (uint32_t i = 0; i < ARRAY_SIZE(required_params); i++) {
+      val = virtgpu_ioctl_getparam(gpu, required_params[i]);
+      if (!val) {
+         if (VN_DEBUG(INIT)) {
+            vn_log(gpu->instance, "required kernel param %d is missing",
+                   (int)required_params[i]);
+         }
+         return VK_ERROR_INITIALIZATION_FAILED;
+      }
+   }
+
+   val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT);
+   if (!val) {
+      if (VN_DEBUG(INIT))
+         vn_log(gpu->instance, "no sync queue support");
+      return VK_ERROR_INITIALIZATION_FAILED;
+   }
+   gpu->max_sync_queue_count = val;
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+virtgpu_open_device(struct virtgpu *gpu, const drmDevicePtr dev)
+{
+   /* skip unless the device has our PCI vendor/device id and a render node */
+   if (!(dev->available_nodes & (1 << DRM_NODE_RENDER)) ||
+       dev->bustype != DRM_BUS_PCI ||
+       dev->deviceinfo.pci->vendor_id != VIRTGPU_PCI_VENDOR_ID ||
+       dev->deviceinfo.pci->device_id != VIRTGPU_PCI_DEVICE_ID) {
+      if (VN_DEBUG(INIT)) {
+         const char *name = "unknown";
+         for (uint32_t i = 0; i < DRM_NODE_MAX; i++) {
+            if (dev->available_nodes & (1 << i)) {
+               name = dev->nodes[i];
+               break;
+            }
+         }
+         vn_log(gpu->instance, "skipping DRM device %s", name);
+      }
+      return VK_ERROR_INITIALIZATION_FAILED;
+   }
+
+   const char *node_path = dev->nodes[DRM_NODE_RENDER];
+
+   int fd = open(node_path, O_RDWR | O_CLOEXEC);
+   if (fd < 0) {
+      if (VN_DEBUG(INIT))
+         vn_log(gpu->instance, "failed to open %s", node_path);
+      return VK_ERROR_INITIALIZATION_FAILED;
+   }
+
+   drmVersionPtr version = drmGetVersion(fd);
+   if (!version || strcmp(version->name, "virtio_gpu") ||
+       version->version_major != 0) {
+      if (VN_DEBUG(INIT)) {
+         if (version) {
+            vn_log(gpu->instance, "unknown DRM driver %s version %d",
+                   version->name, version->version_major);
+         } else {
+            vn_log(gpu->instance, "failed to get DRM driver version");
+         }
+      }
+      if (version)
+         drmFreeVersion(version);
+      close(fd);
+      return VK_ERROR_INITIALIZATION_FAILED;
+   }
+
+   gpu->fd = fd;
+   gpu->version_minor = version->version_minor;
+   gpu->bus_info = *dev->businfo.pci;
+
+   drmFreeVersion(version);
+
+   if (VN_DEBUG(INIT))
+      vn_log(gpu->instance, "using DRM device %s", node_path);
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+virtgpu_open(struct virtgpu *gpu)
+{
+   drmDevicePtr devs[8];
+   int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
+   if (count < 0) {
+      if (VN_DEBUG(INIT))
+         vn_log(gpu->instance, "failed to enumerate DRM devices");
+      return VK_ERROR_INITIALIZATION_FAILED;
+   }
+
+   VkResult result = VK_ERROR_INITIALIZATION_FAILED;
+   for (int i = 0; i < count; i++) {
+      result = virtgpu_open_device(gpu, devs[i]);
+      if (result == VK_SUCCESS)
+         break;
+   }
+
+   drmFreeDevices(devs, count);
+
+   return result;
+}
+
+static VkResult
+virtgpu_init(struct virtgpu *gpu)
+{
+   VkResult result = virtgpu_open(gpu);
+   if (result == VK_SUCCESS)
+      result = virtgpu_init_params(gpu);
+   if (result == VK_SUCCESS)
+      result = virtgpu_init_capset(gpu);
+   if (result == VK_SUCCESS)
+      result = virtgpu_init_context(gpu);
+   if (result != VK_SUCCESS)
+      return result;
+
+   gpu->base.ops.destroy = virtgpu_destroy;
+   gpu->base.ops.get_info = virtgpu_get_info;
+   gpu->base.ops.submit = virtgpu_submit;
+   gpu->base.ops.wait = virtgpu_wait;
+   gpu->base.ops.bo_create = virtgpu_bo_create;
+   gpu->base.ops.sync_create = virtgpu_sync_create;
+
+   return VK_SUCCESS;
+}
+
+VkResult
+vn_renderer_create_virtgpu(struct vn_instance *instance,
+                           const VkAllocationCallbacks *alloc,
+                           struct vn_renderer **renderer)
+{
+   struct virtgpu *gpu = vk_zalloc(alloc, sizeof(*gpu), VN_DEFAULT_ALIGN,
+                                   VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+   if (!gpu)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   gpu->instance = instance;
+
+   VkResult result = virtgpu_init(gpu);
+   if (result != VK_SUCCESS) {
+      virtgpu_destroy(&gpu->base, alloc);
+      return result;
+   }
+
+   *renderer = &gpu->base;
+
+   return VK_SUCCESS;
+}
diff --git a/src/virtio/vulkan/vn_renderer_vtest.c b/src/virtio/vulkan/vn_renderer_vtest.c
new file mode 100644
index 00000000000..ff9290d55ba
--- /dev/null
+++ b/src/virtio/vulkan/vn_renderer_vtest.c
@@ -0,0 +1,1045 @@
+/*
+ * Copyright 2019 Google LLC
+ * SPDX-License-Identifier: MIT
+ *
+ * based in part on virgl which is:
+ * Copyright 2014, 2015 Red Hat.
+ */
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <poll.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "util/os_file.h"
+#include "util/u_process.h"
+#define VIRGL_RENDERER_UNSTABLE_APIS
+#include "virtio-gpu/virglrenderer_hw.h"
+#include "vtest/vtest_protocol.h"
+
+#include "vn_renderer.h"
+
+#define VTEST_PCI_VENDOR_ID 0x1af4
+#define VTEST_PCI_DEVICE_ID 0x1050
+
+struct vtest;
+
+struct vtest_bo {
+   struct vn_renderer_bo base;
+   struct vtest *vtest;
+
+   uint32_t blob_flags;
+   VkDeviceSize size;
+   /* might be closed after mmap */
+   int res_fd;
+
+   void *res_ptr;
+};
+
+struct vtest_sync {
+   struct vn_renderer_sync base;
+   struct vtest *vtest;
+};
+
+struct vtest {
+   struct vn_renderer base;
+
+   struct vn_instance *instance;
+
+   mtx_t sock_mutex;
+   int sock_fd;
+
+   uint32_t protocol_version;
+   uint32_t max_sync_queue_count;
+
+   struct {
+      enum virgl_renderer_capset id;
+      uint32_t version;
+      struct virgl_renderer_capset_venus data;
+   } capset;
+};
+
+static int
+vtest_connect_socket(struct vn_instance *instance, const char *path)
+{
+   struct sockaddr_un un;
+   int sock;
+
+   sock = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+   if (sock < 0) {
+      vn_log(instance, "failed to create a socket");
+      return -1;
+   }
+
+   memset(&un, 0, sizeof(un));
+   un.sun_family = AF_UNIX;
+   memcpy(un.sun_path, path, strlen(path));
+
+   if (connect(sock, (struct sockaddr *)&un, sizeof(un)) == -1) {
+      vn_log(instance, "failed to connect to %s: %s", path, strerror(errno));
+      close(sock);
+      return -1;
+   }
+
+   return sock;
+}
+
+static void
+vtest_read(struct vtest *vtest, void *buf, size_t size)
+{
+   do {
+      const ssize_t ret = read(vtest->sock_fd, buf, size);
+      if (unlikely(ret < 0)) {
+         vn_log(vtest->instance,
+                "lost connection to rendering server on %zu read %zi %d",
+                size, ret, errno);
+         abort();
+      }
+
+      buf += ret;
+      size -= ret;
+   } while (size);
+}
+
+static int
+vtest_receive_fd(struct vtest *vtest)
+{
+   char cmsg_buf[CMSG_SPACE(sizeof(int))];
+   char dummy;
+   struct msghdr msg = {
+      .msg_iov =
+         &(struct iovec){
+            .iov_base = &dummy,
+            .iov_len = sizeof(dummy),
+         },
+      .msg_iovlen = 1,
+      .msg_control = cmsg_buf,
+      .msg_controllen = sizeof(cmsg_buf),
+   };
+
+   if (recvmsg(vtest->sock_fd, &msg, 0) < 0) {
+      vn_log(vtest->instance, "recvmsg failed: %s", strerror(errno));
+      abort();
+   }
+
+   struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+   if (!cmsg || cmsg->cmsg_level != SOL_SOCKET ||
+       cmsg->cmsg_type != SCM_RIGHTS) {
+      vn_log(vtest->instance, "invalid cmsghdr");
+      abort();
+   }
+
+   return *((int *)CMSG_DATA(cmsg));
+}
+
+static void
+vtest_write(struct vtest *vtest, const void *buf, size_t size)
+{
+   do {
+      const ssize_t ret = write(vtest->sock_fd, buf, size);
+      if (unlikely(ret < 0)) {
+         vn_log(vtest->instance,
+                "lost connection to rendering server on %zu write %zi %d",
+                size, ret, errno);
+         abort();
+      }
+
+      buf += ret;
+      size -= ret;
+   } while (size);
+}
+
+static void
+vtest_vcmd_create_renderer(struct vtest *vtest, const char *name)
+{
+   const size_t size = strlen(name) + 1;
+
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   vtest_hdr[VTEST_CMD_LEN] = size;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_CREATE_RENDERER;
+
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+   vtest_write(vtest, name, size);
+}
+
+static bool
+vtest_vcmd_ping_protocol_version(struct vtest *vtest)
+{
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_PING_PROTOCOL_VERSION_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_PING_PROTOCOL_VERSION;
+
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+
+   /* send a dummy busy wait to avoid blocking in vtest_read in case ping
+    * protocol version is not supported
+    */
+   uint32_t vcmd_busy_wait[VCMD_BUSY_WAIT_SIZE];
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_BUSY_WAIT_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_BUSY_WAIT;
+   vcmd_busy_wait[VCMD_BUSY_WAIT_HANDLE] = 0;
+   vcmd_busy_wait[VCMD_BUSY_WAIT_FLAGS] = 0;
+
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+   vtest_write(vtest, vcmd_busy_wait, sizeof(vcmd_busy_wait));
+
+   uint32_t dummy;
+   vtest_read(vtest, vtest_hdr, sizeof(vtest_hdr));
+   if (vtest_hdr[VTEST_CMD_ID] == VCMD_PING_PROTOCOL_VERSION) {
+      /* consume the dummy busy wait result */
+      vtest_read(vtest, vtest_hdr, sizeof(vtest_hdr));
+      assert(vtest_hdr[VTEST_CMD_ID] == VCMD_RESOURCE_BUSY_WAIT);
+      vtest_read(vtest, &dummy, sizeof(dummy));
+      return true;
+   } else {
+      /* no ping protocol version support */
+      assert(vtest_hdr[VTEST_CMD_ID] == VCMD_RESOURCE_BUSY_WAIT);
+      vtest_read(vtest, &dummy, sizeof(dummy));
+      return false;
+   }
+}
+
+static uint32_t
+vtest_vcmd_protocol_version(struct vtest *vtest)
+{
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   uint32_t vcmd_protocol_version[VCMD_PROTOCOL_VERSION_SIZE];
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_PROTOCOL_VERSION_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_PROTOCOL_VERSION;
+   vcmd_protocol_version[VCMD_PROTOCOL_VERSION_VERSION] =
+      VTEST_PROTOCOL_VERSION;
+
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+   vtest_write(vtest, vcmd_protocol_version, sizeof(vcmd_protocol_version));
+
+   vtest_read(vtest, vtest_hdr, sizeof(vtest_hdr));
+   assert(vtest_hdr[VTEST_CMD_LEN] == VCMD_PROTOCOL_VERSION_SIZE);
+   assert(vtest_hdr[VTEST_CMD_ID] == VCMD_PROTOCOL_VERSION);
+   vtest_read(vtest, vcmd_protocol_version, sizeof(vcmd_protocol_version));
+
+   return vcmd_protocol_version[VCMD_PROTOCOL_VERSION_VERSION];
+}
+
+static uint32_t
+vtest_vcmd_get_param(struct vtest *vtest, enum vcmd_param param)
+{
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   uint32_t vcmd_get_param[VCMD_GET_PARAM_SIZE];
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_GET_PARAM_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_GET_PARAM;
+   vcmd_get_param[VCMD_GET_PARAM_PARAM] = param;
+
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+   vtest_write(vtest, vcmd_get_param, sizeof(vcmd_get_param));
+
+   vtest_read(vtest, vtest_hdr, sizeof(vtest_hdr));
+   assert(vtest_hdr[VTEST_CMD_LEN] == 2);
+   assert(vtest_hdr[VTEST_CMD_ID] == VCMD_GET_PARAM);
+
+   uint32_t resp[2];
+   vtest_read(vtest, resp, sizeof(resp));
+
+   return resp[0] ? resp[1] : 0;
+}
+
+static bool
+vtest_vcmd_get_capset(struct vtest *vtest,
+                      enum virgl_renderer_capset id,
+                      uint32_t version,
+                      void *capset,
+                      size_t capset_size)
+{
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   uint32_t vcmd_get_capset[VCMD_GET_CAPSET_SIZE];
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_GET_CAPSET_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_GET_CAPSET;
+   vcmd_get_capset[VCMD_GET_CAPSET_ID] = id;
+   vcmd_get_capset[VCMD_GET_CAPSET_VERSION] = version;
+
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+   vtest_write(vtest, vcmd_get_capset, sizeof(vcmd_get_capset));
+
+   vtest_read(vtest, vtest_hdr, sizeof(vtest_hdr));
+   assert(vtest_hdr[VTEST_CMD_ID] == VCMD_GET_CAPSET);
+
+   uint32_t valid;
+   vtest_read(vtest, &valid, sizeof(valid));
+   if (!valid)
+      return false;
+
+   size_t read_size = (vtest_hdr[VTEST_CMD_LEN] - 1) * 4;
+   if (capset_size >= read_size) {
+      vtest_read(vtest, capset, read_size);
+      memset(capset + read_size, 0, capset_size - read_size);
+   } else {
+      vtest_read(vtest, capset, capset_size);
+
+      char temp[256];
+      read_size -= capset_size;
+      while (read_size) {
+         const size_t temp_size = MIN2(read_size, ARRAY_SIZE(temp));
+         vtest_read(vtest, temp, temp_size);
+         read_size -= temp_size;
+      }
+   }
+
+   return true;
+}
+
+static void
+vtest_vcmd_context_init(struct vtest *vtest,
+                        enum virgl_renderer_capset capset_id)
+{
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   uint32_t vcmd_context_init[VCMD_CONTEXT_INIT_SIZE];
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_CONTEXT_INIT_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_CONTEXT_INIT;
+   vcmd_context_init[VCMD_CONTEXT_INIT_CAPSET_ID] = capset_id;
+
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+   vtest_write(vtest, vcmd_context_init, sizeof(vcmd_context_init));
+}
+
+static uint32_t
+vtest_vcmd_resource_create_blob(struct vtest *vtest,
+                                enum vcmd_blob_type type,
+                                uint32_t flags,
+                                VkDeviceSize size,
+                                vn_object_id blob_id,
+                                int *res_fd)
+{
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   uint32_t vcmd_res_create_blob[VCMD_RES_CREATE_BLOB_SIZE];
+
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_RES_CREATE_BLOB_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_CREATE_BLOB;
+
+   vcmd_res_create_blob[VCMD_RES_CREATE_BLOB_TYPE] = type;
+   vcmd_res_create_blob[VCMD_RES_CREATE_BLOB_FLAGS] = flags;
+   vcmd_res_create_blob[VCMD_RES_CREATE_BLOB_SIZE_LO] = (uint32_t)size;
+   vcmd_res_create_blob[VCMD_RES_CREATE_BLOB_SIZE_HI] =
+      (uint32_t)(size >> 32);
+   vcmd_res_create_blob[VCMD_RES_CREATE_BLOB_ID_LO] = (uint32_t)blob_id;
+   vcmd_res_create_blob[VCMD_RES_CREATE_BLOB_ID_HI] =
+      (uint32_t)(blob_id >> 32);
+
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+   vtest_write(vtest, vcmd_res_create_blob, sizeof(vcmd_res_create_blob));
+
+   vtest_read(vtest, vtest_hdr, sizeof(vtest_hdr));
+   assert(vtest_hdr[VTEST_CMD_LEN] == 1);
+   assert(vtest_hdr[VTEST_CMD_ID] == VCMD_RESOURCE_CREATE_BLOB);
+
+   uint32_t res_id;
+   vtest_read(vtest, &res_id, sizeof(res_id));
+
+   *res_fd = vtest_receive_fd(vtest);
+
+   return res_id;
+}
+
+static void
+vtest_vcmd_resource_unref(struct vtest *vtest, uint32_t res_id)
+{
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   uint32_t vcmd_res_unref[VCMD_RES_UNREF_SIZE];
+
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_RES_UNREF_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_RESOURCE_UNREF;
+   vcmd_res_unref[VCMD_RES_UNREF_RES_HANDLE] = res_id;
+
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+   vtest_write(vtest, vcmd_res_unref, sizeof(vcmd_res_unref));
+}
+
+static uint32_t
+vtest_vcmd_sync_create(struct vtest *vtest, uint64_t initial_val)
+{
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   uint32_t vcmd_sync_create[VCMD_SYNC_CREATE_SIZE];
+
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_SYNC_CREATE_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_SYNC_CREATE;
+
+   vcmd_sync_create[VCMD_SYNC_CREATE_VALUE_LO] = (uint32_t)initial_val;
+   vcmd_sync_create[VCMD_SYNC_CREATE_VALUE_HI] =
+      (uint32_t)(initial_val >> 32);
+
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+   vtest_write(vtest, vcmd_sync_create, sizeof(vcmd_sync_create));
+
+   vtest_read(vtest, vtest_hdr, sizeof(vtest_hdr));
+   assert(vtest_hdr[VTEST_CMD_LEN] == 1);
+   assert(vtest_hdr[VTEST_CMD_ID] == VCMD_SYNC_CREATE);
+
+   uint32_t sync_id;
+   vtest_read(vtest, &sync_id, sizeof(sync_id));
+
+   return sync_id;
+}
+
+static void
+vtest_vcmd_sync_unref(struct vtest *vtest, uint32_t sync_id)
+{
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   uint32_t vcmd_sync_unref[VCMD_SYNC_UNREF_SIZE];
+
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_SYNC_UNREF_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_SYNC_UNREF;
+   vcmd_sync_unref[VCMD_SYNC_UNREF_ID] = sync_id;
+
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+   vtest_write(vtest, vcmd_sync_unref, sizeof(vcmd_sync_unref));
+}
+
+static uint64_t
+vtest_vcmd_sync_read(struct vtest *vtest, uint32_t sync_id)
+{
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   uint32_t vcmd_sync_read[VCMD_SYNC_READ_SIZE];
+
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_SYNC_READ_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_SYNC_READ;
+
+   vcmd_sync_read[VCMD_SYNC_READ_ID] = sync_id;
+
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+   vtest_write(vtest, vcmd_sync_read, sizeof(vcmd_sync_read));
+
+   vtest_read(vtest, vtest_hdr, sizeof(vtest_hdr));
+   assert(vtest_hdr[VTEST_CMD_LEN] == 2);
+   assert(vtest_hdr[VTEST_CMD_ID] == VCMD_SYNC_READ);
+
+   uint64_t val;
+   vtest_read(vtest, &val, sizeof(val));
+
+   return val;
+}
+
+static void
+vtest_vcmd_sync_write(struct vtest *vtest, uint32_t sync_id, uint64_t val)
+{
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   uint32_t vcmd_sync_write[VCMD_SYNC_WRITE_SIZE];
+
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_SYNC_WRITE_SIZE;
+   vtest_hdr[VTEST_CMD_ID] = VCMD_SYNC_WRITE;
+
+   vcmd_sync_write[VCMD_SYNC_WRITE_ID] = sync_id;
+   vcmd_sync_write[VCMD_SYNC_WRITE_VALUE_LO] = (uint32_t)val;
+   vcmd_sync_write[VCMD_SYNC_WRITE_VALUE_HI] = (uint32_t)(val >> 32);
+
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+   vtest_write(vtest, vcmd_sync_write, sizeof(vcmd_sync_write));
+}
+
+static int
+vtest_vcmd_sync_wait(struct vtest *vtest,
+                     uint32_t flags,
+                     int poll_timeout,
+                     struct vn_renderer_sync *const *syncs,
+                     const uint64_t *vals,
+                     uint32_t count)
+{
+   const uint32_t timeout = poll_timeout >= 0 && poll_timeout <= INT32_MAX
+                               ? poll_timeout
+                               : UINT32_MAX;
+
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   vtest_hdr[VTEST_CMD_LEN] = VCMD_SYNC_WAIT_SIZE(count);
+   vtest_hdr[VTEST_CMD_ID] = VCMD_SYNC_WAIT;
+
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+   vtest_write(vtest, &flags, sizeof(flags));
+   vtest_write(vtest, &timeout, sizeof(timeout));
+   for (uint32_t i = 0; i < count; i++) {
+      const uint64_t val = vals[i];
+      const uint32_t sync[3] = {
+         syncs[i]->sync_id,
+         (uint32_t)val,
+         (uint32_t)(val >> 32),
+      };
+      vtest_write(vtest, sync, sizeof(sync));
+   }
+
+   vtest_read(vtest, vtest_hdr, sizeof(vtest_hdr));
+   assert(vtest_hdr[VTEST_CMD_LEN] == 0);
+   assert(vtest_hdr[VTEST_CMD_ID] == VCMD_SYNC_WAIT);
+
+   return vtest_receive_fd(vtest);
+}
+
+static void
+submit_cmd2_sizes(const struct vn_renderer_submit *submit,
+                  size_t *header_size,
+                  size_t *cs_size,
+                  size_t *sync_size)
+{
+   if (!submit->batch_count) {
+      *header_size = 0;
+      *cs_size = 0;
+      *sync_size = 0;
+      return;
+   }
+
+   *header_size = sizeof(uint32_t) +
+                  sizeof(struct vcmd_submit_cmd2_batch) * submit->batch_count;
+
+   *cs_size = 0;
+   *sync_size = 0;
+   for (uint32_t i = 0; i < submit->batch_count; i++) {
+      const struct vn_renderer_submit_batch *batch = &submit->batches[i];
+      assert(batch->cs_size % sizeof(uint32_t) == 0);
+      *cs_size += batch->cs_size;
+      *sync_size += (sizeof(uint32_t) + sizeof(uint64_t)) * batch->sync_count;
+   }
+
+   assert(*header_size % sizeof(uint32_t) == 0);
+   assert(*cs_size % sizeof(uint32_t) == 0);
+   assert(*sync_size % sizeof(uint32_t) == 0);
+}
+
+static void
+vtest_vcmd_submit_cmd2(struct vtest *vtest,
+                       const struct vn_renderer_submit *submit)
+{
+   size_t header_size;
+   size_t cs_size;
+   size_t sync_size;
+   submit_cmd2_sizes(submit, &header_size, &cs_size, &sync_size);
+   const size_t total_size = header_size + cs_size + sync_size;
+   if (!total_size)
+      return;
+
+   uint32_t vtest_hdr[VTEST_HDR_SIZE];
+   vtest_hdr[VTEST_CMD_LEN] = total_size / sizeof(uint32_t);
+   vtest_hdr[VTEST_CMD_ID] = VCMD_SUBMIT_CMD2;
+   vtest_write(vtest, vtest_hdr, sizeof(vtest_hdr));
+
+   /* write batch count and batch headers */
+   const uint32_t batch_count = submit->batch_count;
+   size_t cs_offset = header_size;
+   size_t sync_offset = cs_offset + cs_size;
+   vtest_write(vtest, &batch_count, sizeof(batch_count));
+   for (uint32_t i = 0; i < submit->batch_count; i++) {
+      const struct vn_renderer_submit_batch *batch = &submit->batches[i];
+      struct vcmd_submit_cmd2_batch dst = {
+         .cmd_offset = cs_offset / sizeof(uint32_t),
+         .cmd_size = batch->cs_size / sizeof(uint32_t),
+         .sync_offset = sync_offset / sizeof(uint32_t),
+         .sync_count = batch->sync_count,
+      };
+      if (!batch->sync_queue_cpu) {
+         dst.flags = VCMD_SUBMIT_CMD2_FLAG_SYNC_QUEUE;
+         dst.sync_queue_index = batch->sync_queue_index;
+         dst.sync_queue_id = batch->vk_queue_id;
+      }
+      vtest_write(vtest, &dst, sizeof(dst));
+
+      cs_offset += batch->cs_size;
+      sync_offset +=
+         (sizeof(uint32_t) + sizeof(uint64_t)) * batch->sync_count;
+   }
+
+   /* write cs */
+   if (cs_size) {
+      for (uint32_t i = 0; i < submit->batch_count; i++) {
+         const struct vn_renderer_submit_batch *batch = &submit->batches[i];
+         if (batch->cs_size)
+            vtest_write(vtest, batch->cs_data, batch->cs_size);
+      }
+   }
+
+   /* write syncs */
+   for (uint32_t i = 0; i < submit->batch_count; i++) {
+      const struct vn_renderer_submit_batch *batch = &submit->batches[i];
+
+      for (uint32_t j = 0; j < batch->sync_count; j++) {
+         const uint64_t val = batch->sync_values[j];
+         const uint32_t sync[3] = {
+            batch->syncs[j]->sync_id,
+            (uint32_t)val,
+            (uint32_t)(val >> 32),
+         };
+         vtest_write(vtest, sync, sizeof(sync));
+      }
+   }
+}
+
+static VkResult
+vtest_sync_write(struct vn_renderer_sync *_sync, uint64_t val)
+{
+   struct vtest_sync *sync = (struct vtest_sync *)_sync;
+   struct vtest *vtest = sync->vtest;
+
+   mtx_lock(&vtest->sock_mutex);
+   vtest_vcmd_sync_write(vtest, sync->base.sync_id, val);
+   mtx_unlock(&vtest->sock_mutex);
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+vtest_sync_read(struct vn_renderer_sync *_sync, uint64_t *val)
+{
+   struct vtest_sync *sync = (struct vtest_sync *)_sync;
+   struct vtest *vtest = sync->vtest;
+
+   mtx_lock(&vtest->sock_mutex);
+   *val = vtest_vcmd_sync_read(vtest, sync->base.sync_id);
+   mtx_unlock(&vtest->sock_mutex);
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+vtest_sync_reset(struct vn_renderer_sync *sync, uint64_t initial_val)
+{
+   /* same as write */
+   return vtest_sync_write(sync, initial_val);
+}
+
+static void
+vtest_sync_release(struct vn_renderer_sync *_sync)
+{
+   struct vtest_sync *sync = (struct vtest_sync *)_sync;
+   struct vtest *vtest = sync->vtest;
+
+   mtx_lock(&vtest->sock_mutex);
+   vtest_vcmd_sync_unref(vtest, sync->base.sync_id);
+   mtx_unlock(&vtest->sock_mutex);
+
+   sync->base.sync_id = 0;
+}
+
+static VkResult
+vtest_sync_init(struct vn_renderer_sync *_sync,
+                uint64_t initial_val,
+                uint32_t flags)
+{
+   struct vtest_sync *sync = (struct vtest_sync *)_sync;
+   struct vtest *vtest = sync->vtest;
+
+   mtx_lock(&vtest->sock_mutex);
+   sync->base.sync_id = vtest_vcmd_sync_create(vtest, initial_val);
+   mtx_unlock(&vtest->sock_mutex);
+
+   return VK_SUCCESS;
+}
+
+static void
+vtest_sync_destroy(struct vn_renderer_sync *_sync)
+{
+   struct vtest_sync *sync = (struct vtest_sync *)_sync;
+
+   if (sync->base.sync_id)
+      vtest_sync_release(&sync->base);
+
+   free(sync);
+}
+
+static struct vn_renderer_sync *
+vtest_sync_create(struct vn_renderer *renderer)
+{
+   struct vtest *vtest = (struct vtest *)renderer;
+
+   struct vtest_sync *sync = calloc(1, sizeof(*sync));
+   if (!sync)
+      return NULL;
+
+   sync->vtest = vtest;
+
+   sync->base.ops.destroy = vtest_sync_destroy;
+   sync->base.ops.init = vtest_sync_init;
+   sync->base.ops.init_syncobj = NULL;
+   sync->base.ops.release = vtest_sync_release;
+   sync->base.ops.export_syncobj = NULL;
+   sync->base.ops.reset = vtest_sync_reset;
+   sync->base.ops.read = vtest_sync_read;
+   sync->base.ops.write = vtest_sync_write;
+
+   return &sync->base;
+}
+
+static void
+vtest_bo_invalidate(struct vn_renderer_bo *bo,
+                    VkDeviceSize offset,
+                    VkDeviceSize size)
+{
+   /* nop */
+}
+
+static void
+vtest_bo_flush(struct vn_renderer_bo *bo,
+               VkDeviceSize offset,
+               VkDeviceSize size)
+{
+   /* nop */
+}
+
+static void *
+vtest_bo_map(struct vn_renderer_bo *_bo)
+{
+   struct vtest_bo *bo = (struct vtest_bo *)_bo;
+   struct vtest *vtest = bo->vtest;
+   const bool mappable = bo->blob_flags & VCMD_BLOB_FLAG_MAPPABLE;
+   const bool shareable = bo->blob_flags & VCMD_BLOB_FLAG_SHAREABLE;
+
+   /* not thread-safe but is fine */
+   if (!bo->res_ptr && mappable) {
+      /* We wrongly assume that mmap(dmabuf) and vkMapMemory(VkDeviceMemory)
+       * are equivalent when the blob type is VCMD_BLOB_TYPE_HOST3D.  While we
+       * check for VCMD_PARAM_HOST_COHERENT_DMABUF_BLOB, we know vtest can
+       * lie.
+       */
+      void *ptr = mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                       bo->res_fd, 0);
+      if (ptr == MAP_FAILED) {
+         vn_log(vtest->instance,
+                "failed to mmap %d of size %" PRIu64 " rw: %s", bo->res_fd,
+                bo->size, strerror(errno));
+      } else {
+         bo->res_ptr = ptr;
+         /* we don't need the fd anymore */
+         if (!shareable) {
+            close(bo->res_fd);
+            bo->res_fd = -1;
+         }
+      }
+   }
+
+   return bo->res_ptr;
+}
+
+static int
+vtest_bo_export_dmabuf(struct vn_renderer_bo *_bo)
+{
+   const struct vtest_bo *bo = (struct vtest_bo *)_bo;
+   /* this suffices because vtest_bo_init_cpu does not set the bit */
+   const bool shareable = bo->blob_flags & VCMD_BLOB_FLAG_SHAREABLE;
+   return shareable ? os_dupfd_cloexec(bo->res_fd) : -1;
+}
+
+static uint32_t
+vtest_bo_blob_flags(VkMemoryPropertyFlags flags,
+                    VkExternalMemoryHandleTypeFlags external_handles)
+{
+   uint32_t blob_flags = 0;
+   if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+      blob_flags |= VCMD_BLOB_FLAG_MAPPABLE;
+   if (external_handles)
+      blob_flags |= VCMD_BLOB_FLAG_SHAREABLE;
+   if (external_handles & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT)
+      blob_flags |= VCMD_BLOB_FLAG_CROSS_DEVICE;
+
+   return blob_flags;
+}
+
+static VkResult
+vtest_bo_init_gpu(struct vn_renderer_bo *_bo,
+                  VkDeviceSize size,
+                  vn_object_id mem_id,
+                  VkMemoryPropertyFlags flags,
+                  VkExternalMemoryHandleTypeFlags external_handles)
+{
+   struct vtest_bo *bo = (struct vtest_bo *)_bo;
+   struct vtest *vtest = bo->vtest;
+
+   bo->blob_flags = vtest_bo_blob_flags(flags, external_handles);
+   bo->size = size;
+
+   mtx_lock(&vtest->sock_mutex);
+   bo->base.res_id = vtest_vcmd_resource_create_blob(
+      vtest, VCMD_BLOB_TYPE_HOST3D, bo->blob_flags, bo->size, mem_id,
+      &bo->res_fd);
+   mtx_unlock(&vtest->sock_mutex);
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+vtest_bo_init_cpu(struct vn_renderer_bo *_bo, VkDeviceSize size)
+{
+   struct vtest_bo *bo = (struct vtest_bo *)_bo;
+   struct vtest *vtest = bo->vtest;
+
+   bo->blob_flags = VCMD_BLOB_FLAG_MAPPABLE;
+   bo->size = size;
+
+   mtx_lock(&vtest->sock_mutex);
+   bo->base.res_id = vtest_vcmd_resource_create_blob(
+      vtest, VCMD_BLOB_TYPE_GUEST, bo->blob_flags, bo->size, 0, &bo->res_fd);
+   mtx_unlock(&vtest->sock_mutex);
+
+   return VK_SUCCESS;
+}
+
+static void
+vtest_bo_destroy(struct vn_renderer_bo *_bo)
+{
+   struct vtest_bo *bo = (struct vtest_bo *)_bo;
+   struct vtest *vtest = bo->vtest;
+
+   if (bo->base.res_id) {
+      if (bo->res_ptr)
+         munmap(bo->res_ptr, bo->size);
+      if (bo->res_fd >= 0)
+         close(bo->res_fd);
+
+      mtx_lock(&vtest->sock_mutex);
+      vtest_vcmd_resource_unref(vtest, bo->base.res_id);
+      mtx_unlock(&vtest->sock_mutex);
+   }
+
+   free(bo);
+}
+
+static struct vn_renderer_bo *
+vtest_bo_create(struct vn_renderer *renderer)
+{
+   struct vtest *vtest = (struct vtest *)renderer;
+
+   struct vtest_bo *bo = calloc(1, sizeof(*bo));
+   if (!bo)
+      return NULL;
+
+   bo->vtest = vtest;
+   bo->res_fd = -1;
+
+   bo->base.ops.destroy = vtest_bo_destroy;
+   bo->base.ops.init_cpu = vtest_bo_init_cpu;
+   bo->base.ops.init_gpu = vtest_bo_init_gpu;
+   bo->base.ops.init_dmabuf = NULL;
+   bo->base.ops.export_dmabuf = vtest_bo_export_dmabuf;
+   bo->base.ops.map = vtest_bo_map;
+   bo->base.ops.flush = vtest_bo_flush;
+   bo->base.ops.invalidate = vtest_bo_invalidate;
+
+   return &bo->base;
+}
+
+static VkResult
+sync_wait_poll(int fd, int poll_timeout)
+{
+   struct pollfd pollfd = {
+      .fd = fd,
+      .events = POLLIN,
+   };
+   int ret;
+   do {
+      ret = poll(&pollfd, 1, poll_timeout);
+   } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
+
+   if (ret < 0 || (ret > 0 && !(pollfd.revents & POLLIN))) {
+      return (ret < 0 && errno == ENOMEM) ? VK_ERROR_OUT_OF_HOST_MEMORY
+                                          : VK_ERROR_DEVICE_LOST;
+   }
+
+   return ret ? VK_SUCCESS : VK_TIMEOUT;
+}
+
+static int
+timeout_to_poll_timeout(uint64_t timeout)
+{
+   const uint64_t ns_per_ms = 1000000;
+   const uint64_t ms = (timeout + ns_per_ms - 1) / ns_per_ms;
+   if (!ms && timeout)
+      return -1;
+   return ms <= INT_MAX ? ms : -1;
+}
+
+static VkResult
+vtest_wait(struct vn_renderer *renderer, const struct vn_renderer_wait *wait)
+{
+   struct vtest *vtest = (struct vtest *)renderer;
+   const uint32_t flags = wait->wait_any ? VCMD_SYNC_WAIT_FLAG_ANY : 0;
+   const int poll_timeout = timeout_to_poll_timeout(wait->timeout);
+
+   /*
+    * vtest_vcmd_sync_wait (and some other sync commands) is executed after
+    * all prior commands are dispatched.  That is far from ideal.
+    *
+    * In virtio-gpu, a drm_syncobj wait ioctl is executed immediately.  It
+    * works because it uses virtio-gpu interrupts as a side channel.  vtest
+    * needs a side channel to perform well.
+    *
+    * virtio-gpu or vtest, we should also set up a 1-byte coherent memory that
+    * is set to non-zero by GPU after the syncs signal.  That would allow us
+    * to do a quick check (or spin a bit) before waiting.
+    */
+   mtx_lock(&vtest->sock_mutex);
+   const int fd =
+      vtest_vcmd_sync_wait(vtest, flags, poll_timeout, wait->syncs,
+                           wait->sync_values, wait->sync_count);
+   mtx_unlock(&vtest->sock_mutex);
+
+   VkResult result = sync_wait_poll(fd, poll_timeout);
+   close(fd);
+
+   return result;
+}
+
+static VkResult
+vtest_submit(struct vn_renderer *renderer,
+             const struct vn_renderer_submit *submit)
+{
+   struct vtest *vtest = (struct vtest *)renderer;
+
+   mtx_lock(&vtest->sock_mutex);
+   vtest_vcmd_submit_cmd2(vtest, submit);
+   mtx_unlock(&vtest->sock_mutex);
+
+   return VK_SUCCESS;
+}
+
+static void
+vtest_get_info(struct vn_renderer *renderer, struct vn_renderer_info *info)
+{
+   struct vtest *vtest = (struct vtest *)renderer;
+
+   memset(info, 0, sizeof(*info));
+
+   info->pci.vendor_id = VTEST_PCI_VENDOR_ID;
+   info->pci.device_id = VTEST_PCI_DEVICE_ID;
+
+   info->has_dmabuf_import = false;
+   info->has_cache_management = false;
+   info->has_timeline_sync = true;
+   info->has_external_sync = false;
+
+   info->max_sync_queue_count = vtest->max_sync_queue_count;
+
+   const struct virgl_renderer_capset_venus *capset = &vtest->capset.data;
+   info->wire_format_version = capset->wire_format_version;
+   info->vk_xml_version = capset->vk_xml_version;
+   info->vk_ext_command_serialization_spec_version =
+      capset->vk_ext_command_serialization_spec_version;
+   info->vk_mesa_venus_protocol_spec_version =
+      capset->vk_mesa_venus_protocol_spec_version;
+}
+
+static void
+vtest_destroy(struct vn_renderer *renderer,
+              const VkAllocationCallbacks *alloc)
+{
+   struct vtest *vtest = (struct vtest *)renderer;
+
+   if (vtest->sock_fd >= 0) {
+      shutdown(vtest->sock_fd, SHUT_RDWR);
+      close(vtest->sock_fd);
+   }
+
+   mtx_destroy(&vtest->sock_mutex);
+
+   vk_free(alloc, vtest);
+}
+
+static VkResult
+vtest_init_capset(struct vtest *vtest)
+{
+   vtest->capset.id = VIRGL_RENDERER_CAPSET_VENUS;
+   vtest->capset.version = 0;
+
+   if (!vtest_vcmd_get_capset(vtest, vtest->capset.id, vtest->capset.version,
+                              &vtest->capset.data,
+                              sizeof(vtest->capset.data))) {
+      vn_log(vtest->instance, "no venus capset");
+      return VK_ERROR_INITIALIZATION_FAILED;
+   }
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+vtest_init_params(struct vtest *vtest)
+{
+   uint32_t val =
+      vtest_vcmd_get_param(vtest, VCMD_PARAM_MAX_SYNC_QUEUE_COUNT);
+   if (!val) {
+      vn_log(vtest->instance, "no sync queue support");
+      return VK_ERROR_INITIALIZATION_FAILED;
+   }
+   vtest->max_sync_queue_count = val;
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+vtest_init_protocol_version(struct vtest *vtest)
+{
+   const uint32_t min_protocol_version = 3;
+
+   const uint32_t ver = vtest_vcmd_ping_protocol_version(vtest)
+                           ? vtest_vcmd_protocol_version(vtest)
+                           : 0;
+   if (ver < min_protocol_version) {
+      vn_log(vtest->instance, "vtest protocol version (%d) too old", ver);
+      return VK_ERROR_INITIALIZATION_FAILED;
+   }
+
+   vtest->protocol_version = ver;
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+vtest_init(struct vtest *vtest)
+{
+   mtx_init(&vtest->sock_mutex, mtx_plain);
+   vtest->sock_fd =
+      vtest_connect_socket(vtest->instance, VTEST_DEFAULT_SOCKET_NAME);
+   if (vtest->sock_fd < 0)
+      return VK_ERROR_INITIALIZATION_FAILED;
+
+   const char *renderer_name = util_get_process_name();
+   if (!renderer_name)
+      renderer_name = "venus";
+   vtest_vcmd_create_renderer(vtest, renderer_name);
+
+   VkResult result = vtest_init_protocol_version(vtest);
+   if (result == VK_SUCCESS)
+      result = vtest_init_params(vtest);
+   if (result == VK_SUCCESS)
+      result = vtest_init_capset(vtest);
+   if (result != VK_SUCCESS)
+      return result;
+
+   vtest_vcmd_context_init(vtest, vtest->capset.id);
+
+   vtest->base.ops.destroy = vtest_destroy;
+   vtest->base.ops.get_info = vtest_get_info;
+   vtest->base.ops.submit = vtest_submit;
+   vtest->base.ops.wait = vtest_wait;
+   vtest->base.ops.bo_create = vtest_bo_create;
+   vtest->base.ops.sync_create = vtest_sync_create;
+
+   return VK_SUCCESS;
+}
+
+VkResult
+vn_renderer_create_vtest(struct vn_instance *instance,
+                         const VkAllocationCallbacks *alloc,
+                         struct vn_renderer **renderer)
+{
+   struct vtest *vtest = vk_zalloc(alloc, sizeof(*vtest), VN_DEFAULT_ALIGN,
+                                   VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+   if (!vtest)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   vtest->instance = instance;
+   vtest->sock_fd = -1;
+
+   VkResult result = vtest_init(vtest);
+   if (result != VK_SUCCESS) {
+      vtest_destroy(&vtest->base, alloc);
+      return result;
+   }
+
+   *renderer = &vtest->base;
+
+   return VK_SUCCESS;
+}
author	Chia-I Wu <olvaffe@gmail.com>	2019-10-29 11:00:00 -0700
committer	Marge Bot <eric+marge@anholt.net>	2021-04-08 17:15:37 +0000
commit	247232d5969235b7f24b5ab9fbfc9dc5570c578e (patch)
tree	c4d32ca2c095510198e79b36ca9da633c2f943bb
parent	b5653e3414b43669bd50e0912c1d829f17cc1124 (diff)