intel: Print GPU timing data based on INTEL_MEASURE

This infrastructure collects GPU timestamps over common intervals, and generates a CSV report to show how long rendering took. The overhead of collection is limited to the flushing that is required at the interval boundaries for accurate timestamps. By default, timing data is sent to stderr. To direct output to a file: INTEL_MEASURE=file=/tmp/measure.csv {workload} To begin capturing timestamps at a particular frame: INTEL_MEASURE=file=/tmp/measure.csv,start=15 {workload} To capture only 23 frames: INTEL_MEASURE=count=23 {workload} To capture frames 15-37, stopping before frame 38: INTEL_MEASURE=start=15,count=23 {workload} Designate an asynchronous control file with: INTEL_MEASURE=control=path/to/control.fifo {workload} As the workload runs, enable capture for 5 frames with: $ echo 5 > path/to/control.fifo Enable unbounded capture: $ echo -1 > path/to/control.fifo and disable with: $ echo 0 > path/to/control.fifo Select the boundaries of each snapshot with: INTEL_MEASURE=draw : DEFAULT - Collects timings for every render INTEL_MEASURE=rt : Collects timings when the render target changes INTEL_MEASURE=batch : Collects timings when batches are submitted INTEL_MEASURE=frame : Collects timings at frame boundaries With INTEL_MEASURE=interval=5, the duration of 5 events will be combined into a single record in the output. When possible, a single start and end event will be submitted to the GPU to minimize stalling. Combined events will not span batches, except in the case of INTEL_MEASURE=frame. Acked-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7354>
author: Mark Janes <markjanes@swizzler.org> 2020-09-09 15:13:30 -0700
committer: Mark Janes <markjanes@swizzler.org> 2021-02-01 17:24:57 -0800
commit: 0f4143ec3787ffc851feeb3ec4ee977ef46ee461 (patch)
tree: 9abf2cc2d1d780e5503d90e4f671c5aa4cd3e426
parent: f9960579c339774a3c549552f287824c0c4371d3 (diff)
3 files changed, 755 insertions, 0 deletions
diff --git a/src/intel/common/intel_measure.c b/src/intel/common/intel_measure.c
new file mode 100644
index 00000000000..7942a240b0f
--- /dev/null
+++ b/src/intel/common/intel_measure.c
@@ -0,0 +1,593 @@
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file intel_measure.c
+ */
+
+#include "intel_measure.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define __STDC_FORMAT_MACROS 1
+#include <inttypes.h>
+
+#include "dev/gen_device_info.h"
+#include "util/debug.h"
+#include "util/macros.h"
+
+
+static const struct debug_control debug_control[] = {
+   { "draw",            INTEL_MEASURE_DRAW       },
+   { "rt",              INTEL_MEASURE_RENDERPASS },
+   { "shader",          INTEL_MEASURE_SHADER     },
+   { "batch",           INTEL_MEASURE_BATCH      },
+   { "frame",           INTEL_MEASURE_FRAME      },
+   { NULL, 0 }
+};
+static struct intel_measure_config config;
+
+void
+intel_measure_init(struct intel_measure_device *device)
+{
+   static bool once = false;
+   if (unlikely(!once)) {
+      once = true;
+      memset(&config, 0, sizeof(struct intel_measure_config));
+      const char *env = getenv("INTEL_MEASURE");
+      if (!env)
+         return;
+
+      config.file = stderr;
+      config.flags = parse_debug_string(env, debug_control);
+      if (!config.flags)
+         config.flags = INTEL_MEASURE_DRAW;
+      config.enabled = true;
+      config.event_interval = 1;
+      config.control_fh = -1;
+
+      /* Overflows of the following defaults will drop data and generate a
+       * warning on the output filehandle.
+       */
+
+      /* default batch_size allows for 8k renders in a single batch */
+      const int DEFAULT_BATCH_SIZE = 16 * 1024;
+      config.batch_size = DEFAULT_BATCH_SIZE;
+
+      /* Default buffer_size allows for 16k batches per line of output in the
+       * csv.  Overflow may occur for offscreen workloads or large 'interval'
+       * settings.
+       */
+      const int DEFAULT_BUFFER_SIZE = 16 * 1024;
+      config.buffer_size = DEFAULT_BUFFER_SIZE;
+
+      const char *filename = strstr(env, "file=");
+      const char *start_frame_s = strstr(env, "start=");
+      const char *count_frame_s = strstr(env, "count=");
+      const char *control_path = strstr(env, "control=");
+      const char *interval_s = strstr(env, "interval=");
+      const char *batch_size_s = strstr(env, "batch_size=");
+      const char *buffer_size_s = strstr(env, "buffer_size=");
+      while (true) {
+         char *sep = strrchr(env, ',');
+         if (sep == NULL)
+            break;
+         *sep = '\0';
+      }
+
+      if (filename) {
+         filename += 5;
+         config.file = fopen(filename, "w");
+         if (!config.file) {
+            fprintf(stderr, "INTEL_MEASURE failed to open output file %s: %s\n",
+                    filename, strerror (errno));
+            abort();
+         }
+      }
+
+      if (start_frame_s) {
+         start_frame_s += 6;
+         const int start_frame = atoi(start_frame_s);
+         if (start_frame < 0) {
+            fprintf(stderr, "INTEL_MEASURE start frame may "
+                    "not be negative: %d\n", start_frame);
+            abort();
+         }
+
+         config.start_frame = start_frame;
+         config.enabled = false;
+      }
+
+      if (count_frame_s) {
+         count_frame_s += 6;
+         const int count_frame = atoi(count_frame_s);
+         if (count_frame <= 0) {
+            fprintf(stderr, "INTEL_MEASURE count frame must be positive: %d\n",
+                    count_frame);
+            abort();
+         }
+
+         config.end_frame = config.start_frame + count_frame;
+      }
+
+      if (control_path) {
+         control_path += 8;
+         if (mkfifoat(AT_FDCWD, control_path, O_CREAT | S_IRUSR | S_IWUSR)) {
+            if (errno != EEXIST) {
+               fprintf(stderr, "INTEL_MEASURE failed to create control "
+                       "fifo %s: %s\n", control_path, strerror (errno));
+               abort();
+            }
+         }
+
+         config.control_fh = openat(AT_FDCWD, control_path,
+                                    O_RDONLY | O_NONBLOCK);
+         if (config.control_fh == -1) {
+            fprintf(stderr, "INTEL_MEASURE failed to open control fifo "
+                    "%s: %s\n", control_path, strerror (errno));
+            abort();
+         }
+
+         /* when using a control fifo, do not start until the user triggers
+          * capture
+          */
+         config.enabled = false;
+      }
+
+      if (interval_s) {
+         interval_s += 9;
+         const int event_interval = atoi(interval_s);
+         if (event_interval < 1) {
+            fprintf(stderr, "INTEL_MEASURE event_interval must be positive: "
+                    "%d\n", event_interval);
+            abort();
+         }
+         config.event_interval = event_interval;
+      }
+
+      if (batch_size_s) {
+         batch_size_s += 11;
+         const int batch_size = atoi(batch_size_s);
+         if (batch_size < DEFAULT_BATCH_SIZE) {
+            fprintf(stderr, "INTEL_MEASURE minimum batch_size is 4k: "
+                    "%d\n", batch_size);
+            abort();
+         }
+         if (batch_size > DEFAULT_BATCH_SIZE * 1024) {
+            fprintf(stderr, "INTEL_MEASURE batch_size limited to 4M: "
+                    "%d\n", batch_size);
+            abort();
+         }
+
+         config.batch_size = batch_size;
+      }
+
+      if (buffer_size_s) {
+         buffer_size_s += 12;
+         const int buffer_size = atoi(buffer_size_s);
+         if (buffer_size < DEFAULT_BUFFER_SIZE) {
+            fprintf(stderr, "INTEL_MEASURE minimum buffer_size is 1k: "
+                    "%d\n", DEFAULT_BUFFER_SIZE);
+         }
+         if (buffer_size > DEFAULT_BUFFER_SIZE * 1024) {
+            fprintf(stderr, "INTEL_MEASURE buffer_size limited to 1M: "
+                    "%d\n", buffer_size);
+         }
+
+         config.buffer_size = buffer_size;
+      }
+
+      fputs("draw_start,draw_end,frame,batch,"
+            "event_index,event_count,type,count,vs,tcs,tes,"
+            "gs,fs,cs,framebuffer,idle_ns,time_ns\n",
+            config.file);
+   }
+
+   device->config = NULL;
+   if (getenv("INTEL_MEASURE"))
+      device->config = &config;
+}
+
+const char *
+intel_measure_snapshot_string(enum intel_measure_snapshot_type type)
+{
+   const char *names[] = {
+      [INTEL_SNAPSHOT_UNDEFINED]           = "undefined",
+      [INTEL_SNAPSHOT_BLIT]                = "blit",
+      [INTEL_SNAPSHOT_CCS_AMBIGUATE]       = "ccs ambiguate",
+      [INTEL_SNAPSHOT_CCS_COLOR_CLEAR]     = "ccs color clear",
+      [INTEL_SNAPSHOT_CCS_PARTIAL_RESOLVE] = "ccs partial resolve",
+      [INTEL_SNAPSHOT_CCS_RESOLVE]         = "ccs resolve",
+      [INTEL_SNAPSHOT_COMPUTE]             = "compute",
+      [INTEL_SNAPSHOT_COPY]                = "copy",
+      [INTEL_SNAPSHOT_DRAW]                = "draw",
+      [INTEL_SNAPSHOT_HIZ_AMBIGUATE]       = "hiz ambiguate",
+      [INTEL_SNAPSHOT_HIZ_CLEAR]           = "hiz clear",
+      [INTEL_SNAPSHOT_HIZ_RESOLVE]         = "hiz resolve",
+      [INTEL_SNAPSHOT_MCS_COLOR_CLEAR]     = "mcs color clear",
+      [INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE] = "mcs partial resolve",
+      [INTEL_SNAPSHOT_SLOW_COLOR_CLEAR]    = "slow color clear",
+      [INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR]    = "slow depth clear",
+      [INTEL_SNAPSHOT_END]                 = "end",
+   };
+   assert(type < ARRAY_SIZE(names));
+   assert(names[type] != NULL);
+   assert(type != INTEL_SNAPSHOT_UNDEFINED);
+   return names[type];
+}
+
+/**
+ * Indicate to the caller whether a new snapshot should be started.
+ *
+ * Callers provide rendering state to this method to determine whether the
+ * current start event should be skipped. Depending on the configuration
+ * flags, a new snapshot may start:
+ *  - at every event
+ *  - when the program changes
+ *  - after a batch is submitted
+ *  - at frame boundaries
+ *
+ * Returns true if a snapshot should be started.
+ */
+bool
+intel_measure_state_changed(const struct intel_measure_batch *batch,
+                            uintptr_t vs, uintptr_t tcs, uintptr_t tes,
+                            uintptr_t gs, uintptr_t fs, uintptr_t cs)
+{
+   if (batch->index == 0) {
+      /* always record the first event */
+      return true;
+   }
+
+   const struct intel_measure_snapshot *last_snap =
+      &batch->snapshots[batch->index - 1];
+
+   if (config.flags & INTEL_MEASURE_DRAW)
+      return true;
+
+   if (batch->index % 2 == 0) {
+      /* no snapshot is running, but we have a start event */
+      return true;
+   }
+
+   if (config.flags & (INTEL_MEASURE_FRAME | INTEL_MEASURE_BATCH)) {
+      /* only start collection when index == 0, at the beginning of a batch */
+      return false;
+   }
+
+   if (config.flags & INTEL_MEASURE_RENDERPASS) {
+      return ((last_snap->framebuffer != batch->framebuffer) ||
+              /* compute workloads are always in their own renderpass */
+              (cs != 0));
+   }
+
+   /* remaining comparisons check the state of the render pipeline for
+    * INTEL_MEASURE_PROGRAM
+    */
+   assert(config.flags & INTEL_MEASURE_SHADER);
+
+   if (!vs && !tcs && !tes && !gs && !fs && !cs) {
+      /* blorp always changes program */
+      return true;
+   }
+
+   return (last_snap->vs  != (uintptr_t) vs ||
+           last_snap->tcs != (uintptr_t) tcs ||
+           last_snap->tes != (uintptr_t) tes ||
+           last_snap->gs  != (uintptr_t) gs ||
+           last_snap->fs  != (uintptr_t) fs ||
+           last_snap->cs  != (uintptr_t) cs);
+}
+
+/**
+ * Notify intel_measure that a frame is about to begin.
+ *
+ * Configuration values and the control fifo may commence measurement at frame
+ * boundaries.
+ */
+void
+intel_measure_frame_transition(unsigned frame)
+{
+   if (frame == config.start_frame)
+      config.enabled = true;
+   else if (frame == config.end_frame)
+      config.enabled = false;
+
+   /* user commands to the control fifo will override any start/count
+    * environment settings
+    */
+   if (config.control_fh != -1) {
+      while (true) {
+         const unsigned BUF_SIZE = 128;
+         char buf[BUF_SIZE];
+         ssize_t bytes = read(config.control_fh, buf, BUF_SIZE - 1);
+         if (bytes == 0)
+            break;
+         if (bytes == -1) {
+            fprintf(stderr, "INTEL_MEASURE failed to read control fifo: %s\n",
+                    strerror(errno));
+            abort();
+         }
+
+         buf[bytes] = '\0';
+         char *nptr = buf, *endptr = buf;
+         while (*nptr != '\0' && *endptr != '\0') {
+            long fcount = strtol(nptr, &endptr, 10);
+            if (nptr == endptr) {
+               config.enabled = false;
+               fprintf(stderr, "INTEL_MEASURE invalid frame count on "
+                       "control fifo.\n");
+               lseek(config.control_fh, 0, SEEK_END);
+               break;
+            } else if (fcount == 0) {
+               config.enabled = false;
+            } else {
+               config.enabled = true;
+               config.end_frame = frame + fcount;
+            }
+
+            nptr = endptr + 1;
+         }
+      }
+   }
+}
+
+#define TIMESTAMP_BITS 36
+static uint64_t
+raw_timestamp_delta(uint64_t time0, uint64_t time1)
+{
+   if (time0 > time1) {
+      return (1ULL << TIMESTAMP_BITS) + time1 - time0;
+   } else {
+      return time1 - time0;
+   }
+}
+
+/**
+ * Submit completed snapshots for buffering.
+ *
+ * Snapshot data becomes available when asynchronous rendering completes.
+ * Depending on configuration, snapshot data may need to be collated before
+ * writing to the output file.
+ */
+void
+intel_measure_push_result(struct intel_measure_device *device,
+                          struct intel_measure_batch *batch,
+                          uint64_t *timestamps)
+{
+   struct intel_measure_ringbuffer *rb = device->ringbuffer;
+
+   for (int i = 0; i < batch->index; i += 2) {
+      const struct intel_measure_snapshot *begin = &batch->snapshots[i];
+      const struct intel_measure_snapshot *end = &batch->snapshots[i+1];
+
+      assert (end->type == INTEL_SNAPSHOT_END);
+      const uint64_t prev_end_ts = rb->results[rb->head].end_ts;
+
+      /* advance ring buffer */
+      if (++rb->head == config.buffer_size)
+         rb->head = 0;
+      if (rb->head == rb->tail) {
+         static bool warned = false;
+         if (unlikely(!warned)) {
+            fprintf(config.file,
+                    "WARNING: Buffered data exceeds INTEL_MEASURE limit: %d. "
+                    "Data has been dropped. "
+                    "Increase setting with INTEL_MEASURE=buffer_size={count}\n",
+                    config.buffer_size);
+            warned = true;
+         }
+         return;
+      }
+
+      struct intel_measure_buffered_result *buffered_result =
+         &rb->results[rb->head];
+
+      memset(buffered_result, 0, sizeof(*buffered_result));
+      memcpy(&buffered_result->snapshot, begin,
+             sizeof(struct intel_measure_snapshot));
+      buffered_result->start_ts = timestamps[i];
+      buffered_result->end_ts = timestamps[i+1];
+      buffered_result->idle_duration =
+         raw_timestamp_delta(prev_end_ts, buffered_result->start_ts);
+      buffered_result->frame = batch->frame;
+      buffered_result->batch_count = batch->batch_count;
+      buffered_result->event_index = i / 2;
+      buffered_result->snapshot.event_count = end->event_count;
+   }
+}
+
+
+static unsigned
+ringbuffer_size(const struct intel_measure_ringbuffer *rb)
+{
+   unsigned head = rb->head;
+   if (head < rb->tail)
+      head += config.buffer_size;
+   return head - rb->tail;
+}
+
+static const struct intel_measure_buffered_result *
+ringbuffer_pop(struct intel_measure_ringbuffer *rb)
+{
+   if (rb->tail == rb->head) {
+      /* encountered ringbuffer overflow while processing events */
+      return NULL;
+   }
+
+   if (++rb->tail == config.buffer_size)
+      rb->tail = 0;
+   return &rb->results[rb->tail];
+}
+
+static const struct intel_measure_buffered_result *
+ringbuffer_peek(const struct intel_measure_ringbuffer *rb, unsigned index)
+{
+   int result_offset = rb->tail + index + 1;
+   if (result_offset >= config.buffer_size)
+      result_offset -= config.buffer_size;
+   return &rb->results[result_offset];
+}
+
+
+/**
+ * Determine the number of buffered events that must be combined for the next
+ * line of csv output. Returns 0 if more events are needed.
+ */
+static unsigned
+buffered_event_count(struct intel_measure_device *device)
+{
+   const struct intel_measure_ringbuffer *rb = device->ringbuffer;
+   const unsigned buffered_event_count = ringbuffer_size(rb);
+   if (buffered_event_count == 0) {
+      /* no events to collect */
+      return 0;
+   }
+
+   /* count the number of buffered events required to meet the configuration */
+   if (config.flags & (INTEL_MEASURE_DRAW |
+                       INTEL_MEASURE_RENDERPASS |
+                       INTEL_MEASURE_SHADER)) {
+      /* For these flags, every buffered event represents a line in the
+       * output.  None of these events span batches.  If the event interval
+       * crosses a batch boundary, then the next interval starts with the new
+       * batch.
+       */
+      return 1;
+   }
+
+   const unsigned start_frame = ringbuffer_peek(rb, 0)->frame;
+   if (config.flags & INTEL_MEASURE_BATCH) {
+      /* each buffered event is a command buffer.  The number of events to
+       * process is the same as the interval, unless the interval crosses a
+       * frame boundary
+       */
+      if (buffered_event_count < config.event_interval) {
+         /* not enough events */
+         return 0;
+      }
+
+      /* Imperfect frame tracking requires us to allow for *older* frames */
+      if (ringbuffer_peek(rb, config.event_interval - 1)->frame <= start_frame) {
+         /* No frame transition.  The next {interval} events should be combined. */
+         return config.event_interval;
+      }
+
+      /* Else a frame transition occurs within the interval.  Find the
+       * transition, so the following line of output begins with the batch
+       * that starts the new frame.
+       */
+      for (int event_index = 1;
+           event_index <= config.event_interval;
+           ++event_index) {
+         if (ringbuffer_peek(rb, event_index)->frame > start_frame)
+            return event_index;
+      }
+
+      assert(false);
+   }
+
+   /* Else we need to search buffered events to find the matching frame
+    * transition for our interval.
+    */
+   assert(config.flags & INTEL_MEASURE_FRAME);
+   for (int event_index = 1;
+        event_index < buffered_event_count;
+        ++event_index) {
+      const int latest_frame = ringbuffer_peek(rb, event_index)->frame;
+      if (latest_frame - start_frame >= config.event_interval)
+         return event_index;
+   }
+
+   return 0;
+}
+
+/**
+ * Take result_count events from the ringbuffer and output them as a single
+ * line.
+ */
+static void
+print_combined_results(struct intel_measure_device *measure_device,
+                       int result_count,
+                       struct gen_device_info *info)
+{
+   if (result_count == 0)
+      return;
+
+   struct intel_measure_ringbuffer *result_rb = measure_device->ringbuffer;
+   assert(ringbuffer_size(result_rb) >= result_count);
+   const struct intel_measure_buffered_result* start_result =
+      ringbuffer_pop(result_rb);
+   const struct intel_measure_buffered_result* current_result = start_result;
+
+   if (start_result == NULL)
+      return;
+   --result_count;
+
+   uint64_t duration_ts = raw_timestamp_delta(start_result->start_ts,
+                                              current_result->end_ts);
+   unsigned event_count = start_result->snapshot.event_count;
+   while (result_count-- > 0) {
+      assert(ringbuffer_size(result_rb) > 0);
+      current_result = ringbuffer_pop(result_rb);
+      if (current_result == NULL)
+         return;
+      duration_ts += raw_timestamp_delta(current_result->start_ts,
+                                         current_result->end_ts);
+      event_count += current_result->snapshot.event_count;
+   }
+
+   const struct intel_measure_snapshot *begin = &start_result->snapshot;
+   fprintf(config.file, "%"PRIu64",%"PRIu64",%u,%u,%u,%u,%s,%u,"
+           "0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR","
+           "0x%"PRIxPTR",0x%"PRIxPTR",%"PRIu64",%"PRIu64"\n",
+           start_result->start_ts, current_result->end_ts,
+           start_result->frame, start_result->batch_count,
+           start_result->event_index, event_count,
+           begin->event_name, begin->count,
+           begin->vs, begin->tcs, begin->tes, begin->gs, begin->fs, begin->cs,
+           begin->framebuffer,
+           gen_device_info_timebase_scale(info, start_result->idle_duration),
+           gen_device_info_timebase_scale(info, duration_ts));
+}
+
+/**
+ * Empty the ringbuffer of events that can be printed.
+ */
+void
+intel_measure_print(struct intel_measure_device *device,
+                    struct gen_device_info *info)
+{
+   while (true) {
+      const int events_to_combine = buffered_event_count(device);
+      if (events_to_combine == 0)
+         break;
+      print_combined_results(device, events_to_combine, info);
+   }
+}
+
diff --git a/src/intel/common/intel_measure.h b/src/intel/common/intel_measure.h
new file mode 100644
index 00000000000..2e28367748c
--- /dev/null
+++ b/src/intel/common/intel_measure.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef INTEL_MEASURE_H
+#define INTEL_MEASURE_H
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "util/list.h"
+
+enum intel_measure_snapshot_type {
+   INTEL_SNAPSHOT_UNDEFINED,
+   INTEL_SNAPSHOT_BLIT,
+   INTEL_SNAPSHOT_CCS_AMBIGUATE,
+   INTEL_SNAPSHOT_CCS_COLOR_CLEAR,
+   INTEL_SNAPSHOT_CCS_PARTIAL_RESOLVE,
+   INTEL_SNAPSHOT_CCS_RESOLVE,
+   INTEL_SNAPSHOT_COMPUTE,
+   INTEL_SNAPSHOT_COPY,
+   INTEL_SNAPSHOT_DRAW,
+   INTEL_SNAPSHOT_HIZ_AMBIGUATE,
+   INTEL_SNAPSHOT_HIZ_CLEAR,
+   INTEL_SNAPSHOT_HIZ_RESOLVE,
+   INTEL_SNAPSHOT_MCS_COLOR_CLEAR,
+   INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE,
+   INTEL_SNAPSHOT_SLOW_COLOR_CLEAR,
+   INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR,
+   INTEL_SNAPSHOT_END,
+};
+
+enum intel_measure_events {
+   INTEL_MEASURE_DRAW       = (1 << 0),
+   INTEL_MEASURE_RENDERPASS = (1 << 1),
+   INTEL_MEASURE_SHADER     = (1 << 2),
+   INTEL_MEASURE_BATCH      = (1 << 3),
+   INTEL_MEASURE_FRAME      = (1 << 4),
+};
+
+struct intel_measure_config {
+
+   /* Stderr, or optionally set with INTEL_MEASURE=file={path{ */
+   FILE                      *file;
+
+   /* Events that will be measured.  Set only one flag, with
+    * INTEL_MEASURE=[draw,rt,shader,batch,frame] */
+   enum intel_measure_events  flags;
+
+   /* Optionally set with INTEL_MEASURE=start={num} */
+   unsigned                   start_frame;
+
+   /* Optionally calculated with INTEL_MEASURE=count={num} based on
+    * start_frame
+    */
+   unsigned                   end_frame;
+
+   /* Number of events to combine per line of output. Optionally set with
+    * INTEL_MEASURE=interval={num}
+    */
+   unsigned                   event_interval;
+
+   /* Max snapshots per batch.  Set with
+    * INTEL_MEASURE=batch_size={num}. Additional snapshots will be dropped.
+    */
+   unsigned                   batch_size;
+
+   /* Max number of batch measurements that can be buffered, for combining
+    * snapshots into frame or interval data.
+    */
+   unsigned                   buffer_size;
+
+   /* Fifo which will be read to enable measurements at run-time.  Set with
+    * INTEL_MEASURE=control={path}.  `echo {num} > {path}` will collect num
+    * frames of measurements, beginning with the next frame boundary.
+    */
+   int                        control_fh;
+
+   /* true when snapshots are currently being collected */
+   bool                       enabled;
+};
+
+struct intel_measure_snapshot {
+   enum intel_measure_snapshot_type type;
+   unsigned count, event_count;
+   const char* event_name;
+   uintptr_t framebuffer, vs, tcs, tes, gs, fs, cs;
+};
+
+struct intel_measure_buffered_result {
+   struct intel_measure_snapshot snapshot;
+   uint64_t start_ts, end_ts, idle_duration;
+   unsigned frame, batch_count, event_index;
+};
+
+struct intel_measure_ringbuffer {
+   unsigned head, tail;
+   struct intel_measure_buffered_result results[0];
+};
+
+struct intel_measure_device {
+   struct intel_measure_config *config;
+   unsigned frame;
+
+   /* Holds the list of (iris/anv)_measure_batch snapshots that have been
+    * submitted for rendering, but have not completed.
+    */
+   pthread_mutex_t mutex;
+   struct list_head queued_snapshots;
+
+   /* Holds completed snapshots that may need to be combined before being
+    * written out
+    */
+   struct intel_measure_ringbuffer *ringbuffer;
+};
+
+struct intel_measure_batch {
+   unsigned index;
+   unsigned frame, batch_count, event_count;
+   uintptr_t framebuffer;
+   bool submitted;
+   struct intel_measure_snapshot snapshots[0];
+};
+
+void intel_measure_init(struct intel_measure_device *device);
+const char * intel_measure_snapshot_string(enum intel_measure_snapshot_type type);
+bool intel_measure_state_changed(const struct intel_measure_batch *batch,
+                                 uintptr_t vs, uintptr_t tcs, uintptr_t tes,
+                                 uintptr_t gs, uintptr_t fs, uintptr_t cs);
+void intel_measure_frame_transition(unsigned frame);
+void intel_measure_push_result(struct intel_measure_device *device,
+                               struct intel_measure_batch *batch,
+                               uint64_t *timestamps);
+
+struct gen_device_info;
+void intel_measure_print(struct intel_measure_device *device,
+                         struct gen_device_info *info);
+
+#endif /* INTEL_MEASURE_H */
diff --git a/src/intel/common/meson.build b/src/intel/common/meson.build
index f4ebe496640..d872ead643e 100644
--- a/src/intel/common/meson.build
+++ b/src/intel/common/meson.build
@@ -40,6 +40,8 @@ files_libintel_common = files(
   'gen_sample_positions.h',
   'gen_uuid.c',
   'gen_uuid.h',
+  'intel_measure.c',
+  'intel_measure.h',
 )
 
 libintel_common = static_library(
author	Mark Janes <markjanes@swizzler.org>	2020-09-09 15:13:30 -0700
committer	Mark Janes <markjanes@swizzler.org>	2021-02-01 17:24:57 -0800
commit	0f4143ec3787ffc851feeb3ec4ee977ef46ee461 (patch)
tree	9abf2cc2d1d780e5503d90e4f671c5aa4cd3e426
parent	f9960579c339774a3c549552f287824c0c4371d3 (diff)