1 files changed, 136 insertions, 55 deletions
diff --git a/src/mesa/main/glthread_bufferobj.c b/src/mesa/main/glthread_bufferobj.c
index 88b0b72ce02..499a480a339 100644
--- a/src/mesa/main/glthread_bufferobj.c
+++ b/src/mesa/main/glthread_bufferobj.c
@@ -32,29 +32,30 @@
 static struct gl_buffer_object *
 new_upload_buffer(struct gl_context *ctx, GLsizeiptr size, uint8_t **ptr)
 {
-   assert(ctx->GLThread.SupportsBufferUploads);
-
-   struct gl_buffer_object *obj = ctx->Driver.NewBufferObject(ctx, -1);
+   /* id 0 is used to avoid returning invalid binding values to apps */
+   struct gl_buffer_object *obj =
+      _mesa_bufferobj_alloc(ctx, 0);
    if (!obj)
       return NULL;
 
    obj->Immutable = true;
+   obj->GLThreadInternal = true;
 
-   if (!ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER, size, NULL,
-                               GL_WRITE_ONLY,
-                               GL_CLIENT_STORAGE_BIT | GL_MAP_WRITE_BIT,
-                               obj)) {
-      ctx->Driver.DeleteBuffer(ctx, obj);
+   if (!_mesa_bufferobj_data(ctx, GL_ARRAY_BUFFER, size, NULL,
+                          GL_WRITE_ONLY,
+                          GL_CLIENT_STORAGE_BIT | GL_MAP_WRITE_BIT,
+                          obj)) {
+      _mesa_delete_buffer_object(ctx, obj);
       return NULL;
    }
 
-   *ptr = ctx->Driver.MapBufferRange(ctx, 0, size,
-                                     GL_MAP_WRITE_BIT |
-                                     GL_MAP_UNSYNCHRONIZED_BIT |
-                                     MESA_MAP_THREAD_SAFE_BIT,
-                                     obj, MAP_GLTHREAD);
+   *ptr = _mesa_bufferobj_map_range(ctx, 0, size,
+                                 GL_MAP_WRITE_BIT |
+                                 GL_MAP_UNSYNCHRONIZED_BIT |
+                                 MESA_MAP_THREAD_SAFE_BIT,
+                                 obj, MAP_GLTHREAD);
    if (!*ptr) {
-      ctx->Driver.DeleteBuffer(ctx, obj);
+      _mesa_delete_buffer_object(ctx, obj);
       return NULL;
    }
 
@@ -62,10 +63,24 @@ new_upload_buffer(struct gl_context *ctx, GLsizeiptr size, uint8_t **ptr)
 }
 
 void
+_mesa_glthread_release_upload_buffer(struct gl_context *ctx)
+{
+   struct glthread_state *glthread = &ctx->GLThread;
+
+   if (glthread->upload_buffer_private_refcount > 0) {
+      p_atomic_add(&glthread->upload_buffer->RefCount,
+                   -glthread->upload_buffer_private_refcount);
+      glthread->upload_buffer_private_refcount = 0;
+   }
+   _mesa_reference_buffer_object(ctx, &glthread->upload_buffer, NULL);
+}
+
+void
 _mesa_glthread_upload(struct gl_context *ctx, const void *data,
                       GLsizeiptr size, unsigned *out_offset,
                       struct gl_buffer_object **out_buffer,
-                      uint8_t **out_ptr)
+                      uint8_t **out_ptr,
+                      unsigned start_offset)
 {
    struct glthread_state *glthread = &ctx->GLThread;
    const unsigned default_size = 1024 * 1024;
@@ -74,22 +89,23 @@ _mesa_glthread_upload(struct gl_context *ctx, const void *data,
       return;
 
    /* The alignment was chosen arbitrarily. */
-   unsigned offset = align(glthread->upload_offset, 8);
+   unsigned offset = align(glthread->upload_offset, size <= 4 ? 4 : 8) + start_offset;
 
    /* Allocate a new buffer if needed. */
    if (unlikely(!glthread->upload_buffer || offset + size > default_size)) {
       /* If the size is greater than the buffer size, allocate a separate buffer
        * just for this upload.
        */
-      if (unlikely(size > default_size)) {
+      if (unlikely(start_offset + size > default_size)) {
          uint8_t *ptr;
 
          assert(*out_buffer == NULL);
-         *out_buffer = new_upload_buffer(ctx, size, &ptr);
+         *out_buffer = new_upload_buffer(ctx, size + start_offset, &ptr);
          if (!*out_buffer)
             return;
 
-         *out_offset = 0;
+         ptr += start_offset;
+         *out_offset = start_offset;
          if (data)
             memcpy(ptr, data, size);
          else
@@ -97,16 +113,12 @@ _mesa_glthread_upload(struct gl_context *ctx, const void *data,
          return;
       }
 
-      if (glthread->upload_buffer_private_refcount > 0) {
-         p_atomic_add(&glthread->upload_buffer->RefCount,
-                      -glthread->upload_buffer_private_refcount);
-         glthread->upload_buffer_private_refcount = 0;
-      }
-      _mesa_reference_buffer_object(ctx, &glthread->upload_buffer, NULL);
+      _mesa_glthread_release_upload_buffer(ctx);
+
       glthread->upload_buffer =
          new_upload_buffer(ctx, default_size, &glthread->upload_ptr);
       glthread->upload_offset = 0;
-      offset = 0;
+      offset = start_offset;
 
       /* Since atomic operations are very very slow when 2 threads are not
        * sharing one L3 cache (which can happen on AMD Zen), prevent using
@@ -170,7 +182,7 @@ _mesa_glthread_upload(struct gl_context *ctx, const void *data,
  * feature that if you pass a bad name, it just gens a buffer object for you,
  * so we escape without having to know if things are valid or not.
  */
-void
+static void
 _mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target, GLuint buffer)
 {
    struct glthread_state *glthread = &ctx->GLThread;
@@ -195,16 +207,87 @@ _mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target, GLuint buffer)
    case GL_PIXEL_UNPACK_BUFFER:
       glthread->CurrentPixelUnpackBufferName = buffer;
       break;
+   case GL_QUERY_BUFFER:
+      glthread->CurrentQueryBufferName = buffer;
+      break;
    }
 }
 
+struct marshal_cmd_BindBuffer
+{
+   struct marshal_cmd_base cmd_base;
+   GLenum16 target;
+   GLuint buffer;
+};
+
+uint32_t
+_mesa_unmarshal_BindBuffer(struct gl_context *ctx,
+                           const struct marshal_cmd_BindBuffer *restrict cmd)
+{
+   CALL_BindBuffer(ctx->Dispatch.Current, (cmd->target, cmd->buffer));
+   return align(sizeof(struct marshal_cmd_BindBuffer), 8) / 8;
+}
+
+void GLAPIENTRY
+_mesa_marshal_BindBuffer(GLenum target, GLuint buffer)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_glthread_BindBuffer(ctx, target, buffer);
+
+   struct glthread_state *glthread = &ctx->GLThread;
+   struct marshal_cmd_BindBuffer *last1 = glthread->LastBindBuffer1;
+   struct marshal_cmd_BindBuffer *last2 = glthread->LastBindBuffer2;
+   int cmd_size = sizeof(struct marshal_cmd_BindBuffer);
+
+   /* Eliminate duplicated BindBuffer calls, which are plentiful
+    * in viewperf2020/catia. In this example, the first 2 calls are eliminated
+    * by glthread by keeping track of the last 2 BindBuffer calls and
+    * overwriting them if the target matches.
+    *
+    *   glBindBuffer(GL_ARRAY_BUFFER, 0);
+    *   glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
+    *   glBindBuffer(GL_ARRAY_BUFFER, 6);
+    *   glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 7);
+    *
+    * If the last call is BindBuffer...
+    * last2 is more recent. last1 is before last2.
+    */
+   if (_mesa_glthread_call_is_last(glthread, &last2->cmd_base,
+                                   align(cmd_size, 8) / 8)) {
+      /* If the target is in the last call and unbinding the buffer, overwrite
+       * the buffer ID there.
+       */
+      if (target == last2->target) {
+         /* We can't overwrite binding non-zero buffers because binding also
+          * creates the GL objects (like glCreateBuffers), which can't be skipped.
+          */
+         if (!last2->buffer) {
+            last2->buffer = buffer;
+            return;
+         }
+      } else if (last1 + 1 == last2 && target == last1->target &&
+                 !last1->buffer) {
+         last1->buffer = buffer;
+         return;
+      }
+   }
+
+   struct marshal_cmd_BindBuffer *cmd =
+      _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BindBuffer, cmd_size);
+   cmd->target = MIN2(target, 0xffff); /* clamped to 0xffff (invalid enum) */
+   cmd->buffer = buffer;
+
+   glthread->LastBindBuffer1 = last2;
+   glthread->LastBindBuffer2 = cmd;
+}
+
 void
 _mesa_glthread_DeleteBuffers(struct gl_context *ctx, GLsizei n,
                              const GLuint *buffers)
 {
    struct glthread_state *glthread = &ctx->GLThread;
 
-   if (!buffers)
+   if (!buffers || n < 0)
       return;
 
    for (unsigned i = 0; i < n; i++) {
@@ -227,6 +310,7 @@ _mesa_glthread_DeleteBuffers(struct gl_context *ctx, GLsizei n,
 struct marshal_cmd_BufferData
 {
    struct marshal_cmd_base cmd_base;
+   uint16_t num_slots;
    GLuint target_or_name;
    GLsizeiptr size;
    GLenum usage;
@@ -239,8 +323,7 @@ struct marshal_cmd_BufferData
 
 uint32_t
 _mesa_unmarshal_BufferData(struct gl_context *ctx,
-                           const struct marshal_cmd_BufferData *cmd,
-                           const uint64_t *last)
+                           const struct marshal_cmd_BufferData *restrict cmd)
 {
    const GLuint target_or_name = cmd->target_or_name;
    const GLsizei size = cmd->size;
@@ -255,22 +338,21 @@ _mesa_unmarshal_BufferData(struct gl_context *ctx,
       data = (const void *) (cmd + 1);
 
    if (cmd->ext_dsa) {
-      CALL_NamedBufferDataEXT(ctx->CurrentServerDispatch,
+      CALL_NamedBufferDataEXT(ctx->Dispatch.Current,
                               (target_or_name, size, data, usage));
    } else if (cmd->named) {
-      CALL_NamedBufferData(ctx->CurrentServerDispatch,
+      CALL_NamedBufferData(ctx->Dispatch.Current,
                            (target_or_name, size, data, usage));
    } else {
-      CALL_BufferData(ctx->CurrentServerDispatch,
+      CALL_BufferData(ctx->Dispatch.Current,
                       (target_or_name, size, data, usage));
    }
-   return cmd->cmd_base.cmd_size;
+   return cmd->num_slots;
 }
 
 uint32_t
 _mesa_unmarshal_NamedBufferData(struct gl_context *ctx,
-                                const struct marshal_cmd_NamedBufferData *cmd,
-                                const uint64_t *last)
+                                const struct marshal_cmd_NamedBufferData *restrict cmd)
 {
    unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
    return 0;
@@ -278,8 +360,7 @@ _mesa_unmarshal_NamedBufferData(struct gl_context *ctx,
 
 uint32_t
 _mesa_unmarshal_NamedBufferDataEXT(struct gl_context *ctx,
-                                   const struct marshal_cmd_NamedBufferDataEXT *cmd,
-                                   const uint64_t *last)
+                                   const struct marshal_cmd_NamedBufferDataEXT *restrict cmd)
 {
    unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
    return 0;
@@ -300,10 +381,10 @@ _mesa_marshal_BufferData_merged(GLuint target_or_name, GLsizeiptr size,
                 (named && target_or_name == 0))) {
       _mesa_glthread_finish_before(ctx, func);
       if (named) {
-         CALL_NamedBufferData(ctx->CurrentServerDispatch,
+         CALL_NamedBufferData(ctx->Dispatch.Current,
                               (target_or_name, size, data, usage));
       } else {
-         CALL_BufferData(ctx->CurrentServerDispatch,
+         CALL_BufferData(ctx->Dispatch.Current,
                          (target_or_name, size, data, usage));
       }
       return;
@@ -312,7 +393,7 @@ _mesa_marshal_BufferData_merged(GLuint target_or_name, GLsizeiptr size,
    struct marshal_cmd_BufferData *cmd =
       _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferData,
                                       cmd_size);
-
+   cmd->num_slots = align(cmd_size, 8) / 8;
    cmd->target_or_name = target_or_name;
    cmd->size = size;
    cmd->usage = usage;
@@ -356,6 +437,7 @@ _mesa_marshal_NamedBufferDataEXT(GLuint buffer, GLsizeiptr size,
 struct marshal_cmd_BufferSubData
 {
    struct marshal_cmd_base cmd_base;
+   uint16_t num_slots;
    GLenum target_or_name;
    GLintptr offset;
    GLsizeiptr size;
@@ -366,8 +448,7 @@ struct marshal_cmd_BufferSubData
 
 uint32_t
 _mesa_unmarshal_BufferSubData(struct gl_context *ctx,
-                              const struct marshal_cmd_BufferSubData *cmd,
-                              const uint64_t *last)
+                              const struct marshal_cmd_BufferSubData *restrict cmd)
 {
    const GLenum target_or_name = cmd->target_or_name;
    const GLintptr offset = cmd->offset;
@@ -375,22 +456,21 @@ _mesa_unmarshal_BufferSubData(struct gl_context *ctx,
    const void *data = (const void *) (cmd + 1);
 
    if (cmd->ext_dsa) {
-      CALL_NamedBufferSubDataEXT(ctx->CurrentServerDispatch,
+      CALL_NamedBufferSubDataEXT(ctx->Dispatch.Current,
                                  (target_or_name, offset, size, data));
    } else if (cmd->named) {
-      CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
+      CALL_NamedBufferSubData(ctx->Dispatch.Current,
                               (target_or_name, offset, size, data));
    } else {
-      CALL_BufferSubData(ctx->CurrentServerDispatch,
+      CALL_BufferSubData(ctx->Dispatch.Current,
                          (target_or_name, offset, size, data));
    }
-   return cmd->cmd_base.cmd_size;
+   return cmd->num_slots;
 }
 
 uint32_t
 _mesa_unmarshal_NamedBufferSubData(struct gl_context *ctx,
-                                   const struct marshal_cmd_NamedBufferSubData *cmd,
-                                   const uint64_t *last)
+                                   const struct marshal_cmd_NamedBufferSubData *restrict cmd)
 {
    unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
    return 0;
@@ -398,8 +478,7 @@ _mesa_unmarshal_NamedBufferSubData(struct gl_context *ctx,
 
 uint32_t
 _mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context *ctx,
-                                      const struct marshal_cmd_NamedBufferSubDataEXT *cmd,
-                                      const uint64_t *last)
+                                      const struct marshal_cmd_NamedBufferSubDataEXT *restrict cmd)
 {
    unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
    return 0;
@@ -420,13 +499,14 @@ _mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset,
     *       If offset == 0 and size == buffer_size, it's better to discard
     *       the buffer storage, but we don't know the buffer size in glthread.
     */
-   if (ctx->GLThread.SupportsBufferUploads &&
+   if (ctx->Const.AllowGLThreadBufferSubDataOpt &&
+       ctx->Dispatch.Current != ctx->Dispatch.ContextLost &&
        data && offset > 0 && size > 0) {
       struct gl_buffer_object *upload_buffer = NULL;
       unsigned upload_offset = 0;
 
       _mesa_glthread_upload(ctx, data, size, &upload_offset, &upload_buffer,
-                            NULL);
+                            NULL, 0);
 
       if (upload_buffer) {
          _mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr)upload_buffer,
@@ -443,10 +523,10 @@ _mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset,
                 (named && target_or_name == 0))) {
       _mesa_glthread_finish_before(ctx, func);
       if (named) {
-         CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
+         CALL_NamedBufferSubData(ctx->Dispatch.Current,
                                  (target_or_name, offset, size, data));
       } else {
-         CALL_BufferSubData(ctx->CurrentServerDispatch,
+         CALL_BufferSubData(ctx->Dispatch.Current,
                             (target_or_name, offset, size, data));
       }
       return;
@@ -455,6 +535,7 @@ _mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset,
    struct marshal_cmd_BufferSubData *cmd =
       _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferSubData,
                                       cmd_size);
+   cmd->num_slots = align(cmd_size, 8) / 8;
    cmd->target_or_name = target_or_name;
    cmd->offset = offset;
    cmd->size = size;