summaryrefslogtreecommitdiff
path: root/src/util/perf
diff options
context:
space:
mode:
Diffstat (limited to 'src/util/perf')
-rw-r--r--src/util/perf/.clang-format20
-rw-r--r--src/util/perf/cpu_trace.h113
-rw-r--r--src/util/perf/gpuvis_trace_utils.h795
-rw-r--r--src/util/perf/u_gpuvis.c53
-rw-r--r--src/util/perf/u_gpuvis.h35
-rw-r--r--src/util/perf/u_perfetto.cc105
-rw-r--r--src/util/perf/u_perfetto.h78
-rw-r--r--src/util/perf/u_perfetto_renderpass.h156
-rw-r--r--src/util/perf/u_trace.c614
-rw-r--r--src/util/perf/u_trace.h186
-rw-r--r--src/util/perf/u_trace.py352
-rw-r--r--src/util/perf/u_trace_priv.h61
12 files changed, 2294 insertions, 274 deletions
diff --git a/src/util/perf/.clang-format b/src/util/perf/.clang-format
new file mode 100644
index 00000000000..11f7a8e3bd6
--- /dev/null
+++ b/src/util/perf/.clang-format
@@ -0,0 +1,20 @@
+
+BasedOnStyle: InheritParentConfig
+DisableFormat: false
+
+BinPackParameters: false
+
+ColumnLimit: 78
+
+Cpp11BracedListStyle: false
+IncludeBlocks: Regroup
+IncludeCategories:
+ - Regex: '^"(c11/|compiler/|main/|nir/|spirv/|util/|vk_|wsi_)'
+ Priority: 3
+ - Regex: '^"(perfetto)'
+ Priority: 2
+ - Regex: '.*'
+ Priority: 1
+
+SpaceAfterCStyleCast: true
+SpaceBeforeCpp11BracedList: true
diff --git a/src/util/perf/cpu_trace.h b/src/util/perf/cpu_trace.h
new file mode 100644
index 00000000000..75db0f7badd
--- /dev/null
+++ b/src/util/perf/cpu_trace.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2022 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef CPU_TRACE_H
+#define CPU_TRACE_H
+
+#include "u_perfetto.h"
+#include "u_gpuvis.h"
+
+#include "util/detect_os.h"
+#include "util/macros.h"
+
+#if defined(HAVE_PERFETTO)
+
+/* note that util_perfetto_is_tracing_enabled always returns false util
+ * util_perfetto_init is called
+ */
+#define _MESA_TRACE_BEGIN(name) \
+ do { \
+ if (unlikely(util_perfetto_is_tracing_enabled())) \
+ util_perfetto_trace_begin(name); \
+ } while (0)
+
+#define _MESA_TRACE_END() \
+ do { \
+ if (unlikely(util_perfetto_is_tracing_enabled())) \
+ util_perfetto_trace_end(); \
+ } while (0)
+
+/* NOTE: for now disable atrace for C++ to workaround a ndk bug with ordering
+ * between stdatomic.h and atomic.h. See:
+ *
+ * https://github.com/android/ndk/issues/1178
+ */
+#elif DETECT_OS_ANDROID && !defined(__cplusplus)
+
+#include <cutils/trace.h>
+
+#define _MESA_TRACE_BEGIN(name) \
+ atrace_begin(ATRACE_TAG_GRAPHICS, name)
+#define _MESA_TRACE_END() atrace_end(ATRACE_TAG_GRAPHICS)
+
+#else
+
+#define _MESA_TRACE_BEGIN(name)
+#define _MESA_TRACE_END()
+
+#endif /* HAVE_PERFETTO */
+
+#if defined(HAVE_GPUVIS)
+
+#define _MESA_GPUVIS_TRACE_BEGIN(name) util_gpuvis_begin(name)
+#define _MESA_GPUVIS_TRACE_END() util_gpuvis_end()
+
+#else
+
+#define _MESA_GPUVIS_TRACE_BEGIN(name)
+#define _MESA_GPUVIS_TRACE_END()
+
+#endif /* HAVE_GPUVIS */
+
+#if __has_attribute(cleanup) && __has_attribute(unused)
+
+#define _MESA_TRACE_SCOPE_VAR_CONCAT(name, suffix) name##suffix
+#define _MESA_TRACE_SCOPE_VAR(suffix) \
+ _MESA_TRACE_SCOPE_VAR_CONCAT(_mesa_trace_scope_, suffix)
+
+/* This must expand to a single non-scoped statement for
+ *
+ * if (cond)
+ * _MESA_TRACE_SCOPE(...)
+ *
+ * to work.
+ */
+#define _MESA_TRACE_SCOPE(name) \
+ int _MESA_TRACE_SCOPE_VAR(__LINE__) \
+ __attribute__((cleanup(_mesa_trace_scope_end), unused)) = \
+ _mesa_trace_scope_begin(name)
+
+static inline int
+_mesa_trace_scope_begin(const char *name)
+{
+ _MESA_TRACE_BEGIN(name);
+ _MESA_GPUVIS_TRACE_BEGIN(name);
+ return 0;
+}
+
+static inline void
+_mesa_trace_scope_end(UNUSED int *scope)
+{
+ _MESA_GPUVIS_TRACE_END();
+ _MESA_TRACE_END();
+}
+
+#else
+
+#define _MESA_TRACE_SCOPE(name)
+
+#endif /* __has_attribute(cleanup) && __has_attribute(unused) */
+
+#define MESA_TRACE_SCOPE(name) _MESA_TRACE_SCOPE(name)
+#define MESA_TRACE_FUNC() _MESA_TRACE_SCOPE(__func__)
+
+static inline void
+util_cpu_trace_init()
+{
+ util_perfetto_init();
+ util_gpuvis_init();
+}
+
+#endif /* CPU_TRACE_H */
diff --git a/src/util/perf/gpuvis_trace_utils.h b/src/util/perf/gpuvis_trace_utils.h
new file mode 100644
index 00000000000..b5120c201d3
--- /dev/null
+++ b/src/util/perf/gpuvis_trace_utils.h
@@ -0,0 +1,795 @@
+//////////////////////////////////////////////////////////////////////////////
+// gpuvis_trace_utils.h - v0.10 - public domain
+// no warranty is offered or implied; use this code at your own risk
+//
+// This is a single header file with useful utilities for gpuvis linux tracing
+//
+// ============================================================================
+// You MUST define GPUVIS_TRACE_IMPLEMENTATION in EXACTLY _one_ C or C++ file
+// that includes this header, BEFORE the include, like this:
+//
+// #define GPUVIS_TRACE_IMPLEMENTATION
+// #include "gpuvis_trace_utils.h"
+//
+// All other files should just #include "gpuvis_trace_utils.h" w/o the #define.
+// ============================================================================
+//
+// Credits
+//
+// Michael Sartain
+//
+// LICENSE
+//
+// This software is dual-licensed to the public domain and under the following
+// license: you are granted a perpetual, irrevocable license to copy, modify,
+// publish, and distribute this file as you see fit.
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// INCLUDE SECTION
+//
+
+#ifndef _GPUVIS_TRACE_UTILS_H_
+#define _GPUVIS_TRACE_UTILS_H_
+
+#include <stdarg.h>
+
+#if !defined( __linux__ )
+#define GPUVIS_TRACE_UTILS_DISABLE
+#endif
+
+#if defined( __clang__ ) || defined( __GNUC__ )
+// printf-style warnings for user functions.
+#define GPUVIS_ATTR_PRINTF( _x, _y ) __attribute__( ( __format__( __printf__, _x, _y ) ) )
+#define GPUVIS_MAY_BE_UNUSED __attribute__( ( unused ) )
+#define GPUVIS_CLEANUP_FUNC( x ) __attribute__( ( __cleanup__( x ) ) )
+#else
+#define GPUVIS_ATTR_PRINTF( _x, _y )
+#define GPUVIS_MAY_BE_UNUSED
+#define GPUVIS_CLEANUP_FUNC( x )
+#endif
+
+#if !defined( GPUVIS_TRACE_UTILS_DISABLE )
+
+#include <time.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+ #define GPUVIS_EXTERN extern "C"
+ #if __cplusplus>=201103L
+ #define THREAD_LOCAL thread_local
+ #else
+ #define THREAD_LOCAL __thread
+ #endif
+#else
+ #define GPUVIS_EXTERN extern
+#endif
+
+// From kernel/trace/trace.h
+#ifndef TRACE_BUF_SIZE
+#define TRACE_BUF_SIZE 1024
+#endif
+
+// Try to open tracefs trace_marker file for writing. Returns -1 on error.
+GPUVIS_EXTERN int gpuvis_trace_init( void );
+// Close tracefs trace_marker file.
+GPUVIS_EXTERN void gpuvis_trace_shutdown( void );
+
+// Write user event to tracefs trace_marker.
+GPUVIS_EXTERN int gpuvis_trace_printf( const char *fmt, ... ) GPUVIS_ATTR_PRINTF( 1, 2 );
+GPUVIS_EXTERN int gpuvis_trace_vprintf( const char *fmt, va_list ap ) GPUVIS_ATTR_PRINTF( 1, 0 );
+
+// Write user event (with duration=XXms) to tracefs trace_marker.
+GPUVIS_EXTERN int gpuvis_trace_duration_printf( float duration, const char *fmt, ... ) GPUVIS_ATTR_PRINTF( 2, 3 );
+GPUVIS_EXTERN int gpuvis_trace_duration_vprintf( float duration, const char *fmt, va_list ap ) GPUVIS_ATTR_PRINTF( 2, 0 );
+
+// Write user event (with begin_ctx=XX) to tracefs trace_marker.
+GPUVIS_EXTERN int gpuvis_trace_begin_ctx_printf( unsigned int ctx, const char *fmt, ... ) GPUVIS_ATTR_PRINTF( 2, 3 );
+GPUVIS_EXTERN int gpuvis_trace_begin_ctx_vprintf( unsigned int ctx, const char *fmt, va_list ap ) GPUVIS_ATTR_PRINTF( 2, 0 );
+
+// Write user event (with end_ctx=XX) to tracefs trace_marker.
+GPUVIS_EXTERN int gpuvis_trace_end_ctx_printf( unsigned int ctx, const char *fmt, ... ) GPUVIS_ATTR_PRINTF( 2, 3 );
+GPUVIS_EXTERN int gpuvis_trace_end_ctx_vprintf( unsigned int ctx, const char *fmt, va_list ap ) GPUVIS_ATTR_PRINTF( 2, 0 );
+
+// Execute "trace-cmd start -b 2000 -D -i -e sched:sched_switch -e ..."
+GPUVIS_EXTERN int gpuvis_start_tracing( unsigned int kbuffersize );
+// Execute "trace-cmd extract"
+GPUVIS_EXTERN int gpuvis_trigger_capture_and_keep_tracing( char *filename, size_t size );
+// Execute "trace-cmd reset"
+GPUVIS_EXTERN int gpuvis_stop_tracing( void );
+
+// -1: tracing not setup, 0: tracing disabled, 1: tracing enabled.
+GPUVIS_EXTERN int gpuvis_tracing_on( void );
+
+// Get tracefs directory. Ie: /sys/kernel/tracing. Returns "" on error.
+GPUVIS_EXTERN const char *gpuvis_get_tracefs_dir( void );
+
+// Get tracefs file path in buf. Ie: /sys/kernel/tracing/trace_marker. Returns NULL on error.
+GPUVIS_EXTERN const char *gpuvis_get_tracefs_filename( char *buf, size_t buflen, const char *file );
+
+// Internal function used by GPUVIS_COUNT_HOT_FUNC_CALLS macro
+GPUVIS_EXTERN void gpuvis_count_hot_func_calls_internal_( const char *func );
+
+struct GpuvisTraceBlock;
+static inline void gpuvis_trace_block_begin( struct GpuvisTraceBlock *block, const char *str );
+static inline void gpuvis_trace_block_end( struct GpuvisTraceBlock *block );
+
+struct GpuvisTraceBlockf;
+static inline void gpuvis_trace_blockf_vbegin( struct GpuvisTraceBlockf *block, const char *fmt, va_list ap );
+static inline void gpuvis_trace_blockf_begin( struct GpuvisTraceBlockf *block, const char *fmt, ... ) GPUVIS_ATTR_PRINTF( 2, 3 );
+static inline void gpuvis_trace_blockf_end( struct GpuvisTraceBlockf *block );
+
+#define LNAME3( _name, _line ) _name ## _line
+#define LNAME2( _name, _line ) LNAME3( _name, _line )
+#define LNAME( _name ) LNAME2( _name, __LINE__ )
+
+struct GpuvisTraceBlock
+{
+ uint64_t m_t0;
+ const char *m_str;
+
+#ifdef __cplusplus
+ GpuvisTraceBlock( const char *str )
+ {
+ gpuvis_trace_block_begin( this, str );
+ }
+
+ ~GpuvisTraceBlock()
+ {
+ gpuvis_trace_block_end( this );
+ }
+#endif
+};
+
+struct GpuvisTraceBlockf
+{
+ uint64_t m_t0;
+ char m_buf[ TRACE_BUF_SIZE ];
+
+#ifdef __cplusplus
+ GpuvisTraceBlockf( const char *fmt, ... ) GPUVIS_ATTR_PRINTF( 2, 3 )
+ {
+ va_list args;
+ va_start( args, fmt );
+ gpuvis_trace_blockf_vbegin( this, fmt, args );
+ va_end( args );
+ }
+
+ ~GpuvisTraceBlockf()
+ {
+ gpuvis_trace_blockf_end( this );
+ }
+#endif
+};
+
+#ifdef __cplusplus
+
+#define GPUVIS_TRACE_BLOCK( _conststr ) GpuvisTraceBlock LNAME( gpuvistimeblock )( _conststr )
+#define GPUVIS_TRACE_BLOCKF( _fmt, ... ) GpuvisTraceBlockf LNAME( gpuvistimeblock )( _fmt, __VA_ARGS__ )
+
+#else
+
+#if defined( __clang__ ) || defined( __GNUC__ )
+
+#define GPUVIS_TRACE_BLOCKF_INIT( _unique, _fmt, ... ) \
+ ({ \
+ struct GpuvisTraceBlockf _unique; \
+ gpuvis_trace_blockf_begin( & _unique, _fmt, __VA_ARGS__ ); \
+ _unique; \
+ })
+
+#define GPUVIS_TRACE_BLOCKF( _fmt, ...) \
+ GPUVIS_CLEANUP_FUNC( gpuvis_trace_blockf_end ) GPUVIS_MAY_BE_UNUSED struct GpuvisTraceBlockf LNAME( gpuvistimeblock ) = \
+ GPUVIS_TRACE_BLOCKF_INIT( LNAME( gpuvistimeblock_init ), _fmt, __VA_ARGS__ )
+
+#define GPUVIS_TRACE_BLOCK( _conststr ) \
+ GPUVIS_CLEANUP_FUNC( gpuvis_trace_block_end ) GPUVIS_MAY_BE_UNUSED struct GpuvisTraceBlock LNAME( gpuvistimeblock ) = \
+ {\
+ .m_t0 = gpuvis_gettime_u64(), \
+ .m_str = _conststr \
+ }
+
+#else
+
+#define GPUVIS_TRACE_BLOCKF( _fmt, ... )
+#define GPUVIS_TRACE_BLOCK( _conststr )
+
+#endif // __clang__ || __GNUC__
+
+#endif // __cplusplus
+
+static inline uint64_t gpuvis_gettime_u64( void )
+{
+ struct timespec ts;
+
+ clock_gettime( CLOCK_MONOTONIC, &ts );
+ return ( ( uint64_t )ts.tv_sec * 1000000000LL) + ts.tv_nsec;
+}
+
+static inline void gpuvis_trace_block_finalize( uint64_t m_t0, const char *str )
+{
+ uint64_t dt = gpuvis_gettime_u64() - m_t0;
+
+ // The cpu clock_gettime() functions seems to vary compared to the
+ // ftrace event timestamps. If we don't reduce the duration here,
+ // scopes oftentimes won't stack correctly when they're drawn.
+ if ( dt > 11000 )
+ dt -= 11000;
+
+ gpuvis_trace_printf( "%s (lduration=-%lu)", str, dt );
+}
+
+static inline void gpuvis_trace_block_begin( struct GpuvisTraceBlock* block, const char *str )
+{
+ block->m_str = str;
+ block->m_t0 = gpuvis_gettime_u64();
+}
+
+static inline void gpuvis_trace_block_end( struct GpuvisTraceBlock *block )
+{
+ gpuvis_trace_block_finalize(block->m_t0, block->m_str);
+}
+
+static inline void gpuvis_trace_blockf_vbegin( struct GpuvisTraceBlockf *block, const char *fmt, va_list ap)
+{
+ vsnprintf(block->m_buf, sizeof(block->m_buf), fmt, ap);
+ block->m_t0 = gpuvis_gettime_u64();
+}
+
+static inline void gpuvis_trace_blockf_begin( struct GpuvisTraceBlockf *block, const char *fmt, ... )
+{
+ va_list args;
+
+ va_start( args, fmt );
+ gpuvis_trace_blockf_vbegin( block, fmt, args );
+ va_end( args );
+}
+
+static inline void gpuvis_trace_blockf_end( struct GpuvisTraceBlockf *block )
+{
+ gpuvis_trace_block_finalize( block->m_t0, block->m_buf );
+}
+
+#define GPUVIS_COUNT_HOT_FUNC_CALLS() gpuvis_count_hot_func_calls_internal_( __func__ );
+
+#else
+
+static inline int gpuvis_trace_init() { return -1; }
+static inline void gpuvis_trace_shutdown() {}
+
+static inline int gpuvis_trace_printf( const char *fmt, ... ) { return 0; }
+static inline int gpuvis_trace_vprintf( const char *fmt, va_list ap ) { return 0; }
+
+static inline int gpuvis_trace_duration_printf( float duration, const char *fmt, ... ) { return 0; }
+static inline int gpuvis_trace_duration_vprintf( float duration, const char *fmt, va_list ap ) { return 0; }
+
+static inline int gpuvis_trace_begin_ctx_printf( unsigned int ctx, const char *fmt, ... ) { return 0; }
+static inline int gpuvis_trace_begin_ctx_vprintf( unsigned int ctx, const char *fmt, va_list ap ) { return 0; }
+
+static inline int gpuvis_trace_end_ctx_printf( unsigned int ctx, const char *fmt, ... ) { return 0; }
+static inline int gpuvis_trace_end_ctx_vprintf( unsigned int ctx, const char *fmt, va_list ap ) { return 0; }
+
+static inline int gpuvis_start_tracing( unsigned int kbuffersize ) { return 0; }
+static inline int gpuvis_trigger_capture_and_keep_tracing( char *filename, size_t size ) { return 0; }
+static inline int gpuvis_stop_tracing() { return 0; }
+
+static inline int gpuvis_tracing_on() { return -1; }
+
+static inline const char *gpuvis_get_tracefs_dir() { return ""; }
+static inline const char *gpuvis_get_tracefs_filename( char *buf, size_t buflen, const char *file ) { return NULL; }
+
+struct GpuvisTraceBlock;
+static inline void gpuvis_trace_block_begin( struct GpuvisTraceBlock *block, const char *str ) {}
+static inline void gpuvis_trace_block_end( struct GpuvisTraceBlock *block ) {}
+
+struct GpuvisTraceBlockf;
+static inline void gpuvis_trace_blockf_vbegin( struct GpuvisTraceBlockf *block, const char *fmt, va_list ap ) {}
+static inline void gpuvis_trace_blockf_begin( struct GpuvisTraceBlockf *block, const char *fmt, ... ) {}
+static inline void gpuvis_trace_blockf_end( struct GpuvisTraceBlockf *block ) {}
+
+#define GPUVIS_TRACE_BLOCK( _conststr )
+#define GPUVIS_TRACE_BLOCKF( _fmt, ... )
+
+#define GPUVIS_COUNT_HOT_FUNC_CALLS()
+
+#endif // !GPUVIS_TRACE_UTILS_DISABLE
+
+#if defined( GPUVIS_TRACE_IMPLEMENTATION ) && !defined( GPUVIS_TRACE_UTILS_DISABLE )
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// IMPLEMENTATION SECTION
+//
+
+#define _GNU_SOURCE 1
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <sys/vfs.h>
+#include <linux/magic.h>
+#include <sys/syscall.h>
+
+#undef GPUVIS_EXTERN
+#ifdef __cplusplus
+#define GPUVIS_EXTERN extern "C"
+#else
+#define GPUVIS_EXTERN
+#endif
+
+#ifndef TRACEFS_MAGIC
+#define TRACEFS_MAGIC 0x74726163
+#endif
+
+#define GPUVIS_STR( x ) #x
+#define GPUVIS_STR_VALUE( x ) GPUVIS_STR( x )
+
+static int g_trace_fd = -2;
+static int g_tracefs_dir_inited = 0;
+static char g_tracefs_dir[ PATH_MAX ];
+
+#ifdef __cplusplus
+#include <unordered_map>
+
+struct funcinfo_t
+{
+ uint64_t tfirst = 0;
+ uint64_t tlast = 0;
+ uint32_t count = 0;
+};
+static std::unordered_map< pid_t, std::unordered_map< const char *, funcinfo_t > > g_hotfuncs;
+#endif // __cplusplus
+
+static pid_t gpuvis_gettid()
+{
+ return ( pid_t )syscall( SYS_gettid );
+}
+
+static int exec_tracecmd( const char *cmd )
+{
+ int ret;
+
+ FILE *fh = popen( cmd, "r" );
+ if ( !fh )
+ {
+ //$ TODO: popen() failed: errno
+ ret = -1;
+ }
+ else
+ {
+ char buf[ 8192 ];
+
+ while ( fgets( buf, sizeof( buf ), fh ) )
+ {
+ //$ TODO
+ printf( "%s: %s", __func__, buf );
+ }
+
+ if ( feof( fh ) )
+ {
+ int pclose_ret = pclose( fh );
+
+ ret = WEXITSTATUS( pclose_ret );
+ }
+ else
+ {
+ //$ TODO: Failed to read pipe to end: errno
+ pclose( fh );
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
+
+GPUVIS_EXTERN int gpuvis_trace_init()
+{
+ if ( g_trace_fd == -2 )
+ {
+ char filename[ PATH_MAX ];
+
+ // The "trace_marker" file allows userspace to write into the ftrace buffer.
+ if ( !gpuvis_get_tracefs_filename( filename, sizeof( filename ), "trace_marker" ) )
+ g_trace_fd = -1;
+ else
+ g_trace_fd = open( filename, O_WRONLY );
+ }
+
+ return g_trace_fd;
+}
+
+#if !defined( __cplusplus )
+static void flush_hot_func_calls()
+{
+ //$ TODO: hot func calls for C
+}
+#else
+static void flush_hot_func_calls()
+{
+ if ( g_hotfuncs.empty() )
+ return;
+
+ uint64_t t0 = gpuvis_gettime_u64();
+
+ for ( auto &x : g_hotfuncs )
+ {
+ for ( auto &y : x.second )
+ {
+ if ( y.second.count )
+ {
+ pid_t tid = x.first;
+ const char *func = y.first;
+ uint64_t offset = t0 - y.second.tfirst;
+ uint64_t duration = y.second.tlast - y.second.tfirst;
+
+ gpuvis_trace_printf( "%s calls:%u (lduration=%lu tid=%d offset=-%lu)\n",
+ func, y.second.count, duration, tid, offset );
+ }
+ }
+ }
+
+ g_hotfuncs.clear();
+}
+
+GPUVIS_EXTERN void gpuvis_count_hot_func_calls_internal_( const char *func )
+{
+ static THREAD_LOCAL pid_t s_tid = gpuvis_gettid();
+
+ uint64_t t0 = gpuvis_gettime_u64();
+ auto &x = g_hotfuncs[ s_tid ];
+ auto &y = x[ func ];
+
+ if ( !y.count )
+ {
+ y.count = 1;
+ y.tfirst = t0;
+ y.tlast = t0 + 1;
+ }
+ else if ( t0 - y.tlast >= 3 * 1000000 ) // 3ms
+ {
+ gpuvis_trace_printf( "%s calls:%u (lduration=%lu offset=-%lu)\n",
+ func, y.count, y.tlast - y.tfirst, t0 - y.tfirst );
+
+ y.count = 1;
+ y.tfirst = t0;
+ y.tlast = t0 + 1;
+ }
+ else
+ {
+ y.tlast = t0;
+ y.count++;
+ }
+}
+#endif // __cplusplus
+
+GPUVIS_EXTERN void gpuvis_trace_shutdown()
+{
+ flush_hot_func_calls();
+
+ if ( g_trace_fd >= 0 )
+ close( g_trace_fd );
+ g_trace_fd = -2;
+
+ g_tracefs_dir_inited = 0;
+ g_tracefs_dir[ 0 ] = 0;
+}
+
+static int trace_printf_impl( const char *keystr, const char *fmt, va_list ap ) GPUVIS_ATTR_PRINTF( 2, 0 );
+static int trace_printf_impl( const char *keystr, const char *fmt, va_list ap )
+{
+ int ret = -1;
+
+ if ( gpuvis_trace_init() >= 0 )
+ {
+ int n;
+ char buf[ TRACE_BUF_SIZE ];
+
+ n = vsnprintf( buf, sizeof( buf ), fmt, ap );
+
+ if ( ( n > 0 ) || ( !n && keystr ) )
+ {
+ if ( ( size_t )n >= sizeof( buf ) )
+ n = sizeof( buf ) - 1;
+
+ if ( keystr && keystr[ 0 ] )
+ {
+ int keystrlen = strlen( keystr );
+
+ if ( ( size_t )n + keystrlen >= sizeof( buf ) )
+ n = sizeof( buf ) - keystrlen - 1;
+
+ strcpy( buf + n, keystr );
+
+ n += keystrlen;
+ }
+
+ ret = write( g_trace_fd, buf, n );
+ }
+ }
+
+ return ret;
+}
+
+GPUVIS_EXTERN int gpuvis_trace_printf( const char *fmt, ... )
+{
+ int ret;
+ va_list ap;
+
+ va_start( ap, fmt );
+ ret = gpuvis_trace_vprintf( fmt, ap );
+ va_end( ap );
+
+ return ret;
+}
+
+GPUVIS_EXTERN int gpuvis_trace_vprintf( const char *fmt, va_list ap )
+{
+ return trace_printf_impl( NULL, fmt, ap );
+}
+
+GPUVIS_EXTERN int gpuvis_trace_duration_printf( float duration, const char *fmt, ... )
+{
+ int ret;
+ va_list ap;
+
+ va_start( ap, fmt );
+ ret = gpuvis_trace_duration_vprintf( duration, fmt, ap );
+ va_end( ap );
+
+ return ret;
+}
+
+GPUVIS_EXTERN int gpuvis_trace_duration_vprintf( float duration, const char *fmt, va_list ap )
+{
+ char keystr[ 128 ];
+
+ snprintf( keystr, sizeof( keystr ), " (duration=%f)", duration ); //$ TODO: Try this with more precision?
+
+ return trace_printf_impl( keystr, fmt, ap );
+}
+
+GPUVIS_EXTERN int gpuvis_trace_begin_ctx_printf( unsigned int ctx, const char *fmt, ... )
+{
+ int ret;
+ va_list ap;
+
+ va_start( ap, fmt );
+ ret = gpuvis_trace_begin_ctx_vprintf( ctx, fmt, ap );
+ va_end( ap );
+
+ return ret;
+}
+
+GPUVIS_EXTERN int gpuvis_trace_begin_ctx_vprintf( unsigned int ctx, const char *fmt, va_list ap )
+{
+ char keystr[ 128 ];
+
+ snprintf( keystr, sizeof( keystr ), " (begin_ctx=%u)", ctx );
+
+ return trace_printf_impl( keystr, fmt, ap );
+}
+
+GPUVIS_EXTERN int gpuvis_trace_end_ctx_printf( unsigned int ctx, const char *fmt, ... )
+{
+ int ret;
+ va_list ap;
+
+ va_start( ap, fmt );
+ ret = gpuvis_trace_end_ctx_vprintf( ctx, fmt, ap );
+ va_end( ap );
+
+ return ret;
+}
+
+GPUVIS_EXTERN int gpuvis_trace_end_ctx_vprintf( unsigned int ctx, const char *fmt, va_list ap )
+{
+ char keystr[ 128 ];
+
+ snprintf( keystr, sizeof( keystr ), " (end_ctx=%u)", ctx );
+
+ return trace_printf_impl( keystr, fmt, ap );
+}
+
+GPUVIS_EXTERN int gpuvis_start_tracing( unsigned int kbuffersize )
+{
+ static const char fmt[] =
+ "trace-cmd start -b %u -D -i "
+ // https://github.com/mikesart/gpuvis/wiki/TechDocs-Linux-Scheduler
+ " -e sched:sched_switch"
+ " -e sched:sched_process_fork"
+ " -e sched:sched_process_exec"
+ " -e sched:sched_process_exit"
+ " -e drm:drm_vblank_event"
+ " -e drm:drm_vblank_event_queued"
+ " -e drm:drm_vblank_event_delivered"
+ // https://github.com/mikesart/gpuvis/wiki/TechDocs-AMDGpu
+ " -e amdgpu:amdgpu_vm_flush"
+ " -e amdgpu:amdgpu_cs_ioctl"
+ " -e amdgpu:amdgpu_sched_run_job"
+ " -e *fence:*fence_signaled"
+ // https://github.com/mikesart/gpuvis/wiki/TechDocs-Intel
+ " -e i915:i915_flip_request"
+ " -e i915:i915_flip_complete"
+ " -e i915:intel_gpu_freq_change"
+ " -e i915:i915_gem_request_add"
+ " -e i915:i915_gem_request_submit" // Require CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS
+ " -e i915:i915_gem_request_in" // Kconfig option to be enabled.
+ " -e i915:i915_gem_request_out" //
+ " -e i915:intel_engine_notify"
+ " -e i915:i915_gem_request_wait_begin"
+ " -e i915:i915_gem_request_wait_end 2>&1";
+ char cmd[ 8192 ];
+
+ if ( !kbuffersize )
+ kbuffersize = 16 * 1024;
+
+ snprintf( cmd, sizeof( cmd ), fmt, kbuffersize );
+
+ return exec_tracecmd( cmd );
+}
+
+GPUVIS_EXTERN int gpuvis_trigger_capture_and_keep_tracing( char *filename, size_t size )
+{
+ int ret = -1;
+
+ if ( filename )
+ filename[ 0 ] = 0;
+
+ flush_hot_func_calls();
+
+ if ( gpuvis_tracing_on() )
+ {
+ char datetime[ 128 ];
+ char cmd[ PATH_MAX ];
+ char exebuf[ PATH_MAX ];
+ const char *exename = NULL;
+ time_t t = time( NULL );
+ struct tm *tmp = localtime( &t );
+
+ strftime( datetime, sizeof( datetime ), "%Y-%m-%d_%H-%M-%S", tmp );
+ datetime[ sizeof( datetime ) - 1 ] = 0;
+
+ ssize_t cbytes = readlink( "/proc/self/exe", exebuf, sizeof( exebuf ) - 1 );
+ if ( cbytes > 0 )
+ {
+ exebuf[ cbytes ] = 0;
+ exename = strrchr( exebuf, '/' );
+ }
+ exename = exename ? ( exename + 1 ) : "trace";
+
+ // Stop tracing
+ exec_tracecmd( "trace-cmd stop 2>&1" );
+
+ // Save the trace data to something like "glxgears_2017-10-13_17-52-56.dat"
+ snprintf( cmd, sizeof( cmd ),
+ "trace-cmd extract -k -o \"%s_%s.dat\" > /tmp/blah.log 2>&1 &",
+ exename, datetime );
+ cmd[ sizeof( cmd ) - 1 ] = 0;
+
+ ret = system( cmd );
+
+ if ( filename && !ret )
+ snprintf( filename, size, "%s_%s.dat", exename, datetime );
+
+ // Restart tracing
+ exec_tracecmd( "trace-cmd restart 2>&1" );
+ }
+
+ return ret;
+}
+
+GPUVIS_EXTERN int gpuvis_stop_tracing()
+{
+ flush_hot_func_calls();
+
+ int ret = exec_tracecmd( "trace-cmd reset 2>&1");
+
+ // Try freeing any snapshot buffers as well
+ exec_tracecmd( "trace-cmd snapshot -f 2>&1" );
+
+ return ret;
+}
+
+GPUVIS_EXTERN int gpuvis_tracing_on()
+{
+ int ret = -1;
+ char buf[ 32 ];
+ char filename[ PATH_MAX ];
+
+ if ( gpuvis_get_tracefs_filename( filename, PATH_MAX, "tracing_on" ) )
+ {
+ int fd = open( filename, O_RDONLY );
+
+ if ( fd >= 0 )
+ {
+ if ( read( fd, buf, sizeof( buf ) ) > 0 )
+ ret = atoi( buf );
+
+ close( fd );
+ }
+ }
+
+ return ret;
+}
+
+static int is_tracefs_dir( const char *dir )
+{
+ struct statfs stat;
+
+ return !statfs( dir, &stat ) && ( stat.f_type == TRACEFS_MAGIC );
+}
+
+GPUVIS_EXTERN const char *gpuvis_get_tracefs_dir()
+{
+ if ( !g_tracefs_dir_inited )
+ {
+ size_t i;
+ static const char *tracefs_dirs[] =
+ {
+ "/sys/kernel/tracing",
+ "/sys/kernel/debug/tracing",
+ "/tracing",
+ "/trace",
+ };
+
+ for ( i = 0; i < sizeof( tracefs_dirs ) / sizeof( tracefs_dirs[ 0 ] ); i++ )
+ {
+ if ( is_tracefs_dir( tracefs_dirs[ i ] ) )
+ {
+ strncpy( g_tracefs_dir, tracefs_dirs[ i ], PATH_MAX );
+ g_tracefs_dir[ PATH_MAX - 1 ] = 0;
+ break;
+ }
+ }
+
+ if ( !g_tracefs_dir[ 0 ] )
+ {
+ FILE *fp;
+ char type[ 128 ];
+ char dir[ PATH_MAX + 1 ];
+
+ fp = fopen( "/proc/mounts", "r" );
+ if ( fp )
+ {
+ while ( fscanf( fp, "%*s %" GPUVIS_STR_VALUE( PATH_MAX ) "s %127s %*s %*d %*d\n", dir, type ) == 2 )
+ {
+ if ( !strcmp( type, "tracefs" ) && is_tracefs_dir( dir ) )
+ {
+ strncpy( g_tracefs_dir, dir, PATH_MAX );
+ g_tracefs_dir[ PATH_MAX - 1 ] = 0;
+ break;
+ }
+ }
+
+ fclose( fp );
+ }
+ }
+
+ g_tracefs_dir_inited = 1;
+ }
+
+ return g_tracefs_dir;
+}
+
+GPUVIS_EXTERN const char *gpuvis_get_tracefs_filename( char *buf, size_t buflen, const char *file )
+{
+ const char *tracefs_dir = gpuvis_get_tracefs_dir();
+
+ if ( tracefs_dir[ 0 ] )
+ {
+ snprintf( buf, buflen, "%s/%s", tracefs_dir, file );
+ buf[ buflen - 1 ] = 0;
+
+ return buf;
+ }
+
+ return NULL;
+}
+
+#endif // GPUVIS_TRACE_IMPLEMENTATION
+
+#endif // _GPUVIS_TRACE_UTILS_H_
diff --git a/src/util/perf/u_gpuvis.c b/src/util/perf/u_gpuvis.c
new file mode 100644
index 00000000000..e3f14cd1f65
--- /dev/null
+++ b/src/util/perf/u_gpuvis.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2023 Bas Nieuwenhuizen
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "u_gpuvis.h"
+
+#include <threads.h>
+
+#define GPUVIS_TRACE_IMPLEMENTATION
+#include "gpuvis_trace_utils.h"
+
+/* Random base value to prevent collisions. As contexts are considered thread
+ * global by gpuvis, collisions are quite likely if we start at 0 and there
+ * are independent libraries tacing
+ */
+static unsigned int gpuvis_base_ctx;
+
+static _Thread_local unsigned int gpuvis_current_ctx;
+
+static once_flag gpuvis_once_flag = ONCE_FLAG_INIT;
+
+static void
+util_gpuvis_init_once()
+{
+ gpuvis_trace_init();
+
+ /* Initialize it by address to avoid collisions between libraries using
+ * this code (e.g. GL & vulkan) */
+ gpuvis_base_ctx = (uintptr_t) util_gpuvis_init_once >> 12;
+}
+
+void
+util_gpuvis_init(void)
+{
+ call_once(&gpuvis_once_flag, util_gpuvis_init_once);
+}
+
+void
+util_gpuvis_begin(const char *name)
+{
+ unsigned int ctx = gpuvis_base_ctx + ++gpuvis_current_ctx;
+ gpuvis_trace_begin_ctx_printf(ctx, "mesa:%s", name);
+}
+
+void
+util_gpuvis_end(void)
+{
+ unsigned int ctx = gpuvis_base_ctx + gpuvis_current_ctx--;
+
+ /* Use an empty string to avoid warnings about an empty format string. */
+ gpuvis_trace_end_ctx_printf(ctx, "%s", "");
+} \ No newline at end of file
diff --git a/src/util/perf/u_gpuvis.h b/src/util/perf/u_gpuvis.h
new file mode 100644
index 00000000000..2a2f1a2e7c6
--- /dev/null
+++ b/src/util/perf/u_gpuvis.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2023 Bas Nieuwenhuizen
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef U_GPUVIS_H
+#define U_GPUVIS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_GPUVIS
+
+void util_gpuvis_init(void);
+
+void util_gpuvis_begin(const char *name);
+
+/* ctx needs to be the return value from begin*/
+void util_gpuvis_end(void);
+
+#else
+
+static inline void
+util_gpuvis_init(void)
+{
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_GPUVIS_H */ \ No newline at end of file
diff --git a/src/util/perf/u_perfetto.cc b/src/util/perf/u_perfetto.cc
new file mode 100644
index 00000000000..897e29cc4f8
--- /dev/null
+++ b/src/util/perf/u_perfetto.cc
@@ -0,0 +1,105 @@
+/*
+ * Copyright © 2021 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "u_perfetto.h"
+
+#include <perfetto.h>
+
+#include "c11/threads.h"
+#include "util/macros.h"
+
+/* perfetto requires string literals */
+#define UTIL_PERFETTO_CATEGORY_DEFAULT_STR "mesa.default"
+
+PERFETTO_DEFINE_CATEGORIES(
+ perfetto::Category(UTIL_PERFETTO_CATEGORY_DEFAULT_STR)
+ .SetDescription("Mesa default events"));
+
+PERFETTO_TRACK_EVENT_STATIC_STORAGE();
+
+int util_perfetto_tracing_state;
+
+static void
+util_perfetto_update_tracing_state(void)
+{
+ p_atomic_set(&util_perfetto_tracing_state,
+ TRACE_EVENT_CATEGORY_ENABLED(UTIL_PERFETTO_CATEGORY_DEFAULT_STR));
+}
+
+void
+util_perfetto_trace_begin(const char *name)
+{
+ TRACE_EVENT_BEGIN(
+ UTIL_PERFETTO_CATEGORY_DEFAULT_STR, nullptr,
+ [&](perfetto::EventContext ctx) { ctx.event()->set_name(name); });
+}
+
+void
+util_perfetto_trace_end(void)
+{
+ TRACE_EVENT_END(UTIL_PERFETTO_CATEGORY_DEFAULT_STR);
+
+ util_perfetto_update_tracing_state();
+}
+
+class UtilPerfettoObserver : public perfetto::TrackEventSessionObserver {
+ public:
+ UtilPerfettoObserver() { perfetto::TrackEvent::AddSessionObserver(this); }
+
+ void OnStart(const perfetto::DataSourceBase::StartArgs &) override
+ {
+ util_perfetto_update_tracing_state();
+ }
+
+ /* XXX There is no PostStop callback. We have to call
+ * util_perfetto_update_tracing_state occasionally to poll.
+ */
+};
+
+static void
+util_perfetto_fini(void)
+{
+ perfetto::Tracing::Shutdown();
+}
+
+static void
+util_perfetto_init_once(void)
+{
+ // Connects to the system tracing service
+ perfetto::TracingInitArgs args;
+ args.backends = perfetto::kSystemBackend;
+ perfetto::Tracing::Initialize(args);
+
+ static UtilPerfettoObserver observer;
+ perfetto::TrackEvent::Register();
+
+ atexit(&util_perfetto_fini);
+}
+
+static once_flag perfetto_once_flag = ONCE_FLAG_INIT;
+
+void
+util_perfetto_init(void)
+{
+ call_once(&perfetto_once_flag, util_perfetto_init_once);
+}
diff --git a/src/util/perf/u_perfetto.h b/src/util/perf/u_perfetto.h
new file mode 100644
index 00000000000..75ae00186a0
--- /dev/null
+++ b/src/util/perf/u_perfetto.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2021 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _UTIL_PERFETTO_H
+#define _UTIL_PERFETTO_H
+
+#include "util/u_atomic.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_PERFETTO
+
+extern int util_perfetto_tracing_state;
+
+void util_perfetto_init(void);
+
+static inline bool
+util_perfetto_is_tracing_enabled(void)
+{
+ return p_atomic_read_relaxed(&util_perfetto_tracing_state);
+}
+
+void util_perfetto_trace_begin(const char *name);
+
+void util_perfetto_trace_end(void);
+
+#else /* HAVE_PERFETTO */
+
+static inline void
+util_perfetto_init(void)
+{
+}
+
+static inline bool
+util_perfetto_is_tracing_enabled(void)
+{
+ return false;
+}
+
+static inline void
+util_perfetto_trace_begin(const char *name)
+{
+}
+
+static inline void
+util_perfetto_trace_end(void)
+{
+}
+
+#endif /* HAVE_PERFETTO */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _UTIL_PERFETTO_H */
diff --git a/src/util/perf/u_perfetto_renderpass.h b/src/util/perf/u_perfetto_renderpass.h
new file mode 100644
index 00000000000..13aad3481eb
--- /dev/null
+++ b/src/util/perf/u_perfetto_renderpass.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright © 2023 Google LLC
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "perfetto.h"
+
+#include "util/hash_table.h"
+#include "util/perf/u_trace.h"
+#include "util/ralloc.h"
+
+using perfetto::DataSource;
+template <typename DataSourceType, typename DataSourceTraits>
+class MesaRenderpassDataSource
+ : public perfetto::DataSource<DataSourceType, DataSourceTraits> {
+
+ public:
+ typedef typename perfetto::DataSource<DataSourceType,
+ DataSourceTraits>::TraceContext
+ TraceContext;
+
+ void OnSetup(const perfetto::DataSourceBase::SetupArgs &) override
+ {
+ // Use this callback to apply any custom configuration to your data
+ // source based on the TraceConfig in SetupArgs.
+ debug_markers = NULL;
+ }
+
+ void OnStart(const perfetto::DataSourceBase::StartArgs &) override
+ {
+ debug_markers = _mesa_hash_table_create(NULL, _mesa_hash_string,
+ _mesa_key_string_equal);
+ // This notification can be used to initialize the GPU driver, enable
+ // counters, etc. StartArgs will contains the DataSourceDescriptor,
+ // which can be extended.
+ u_trace_perfetto_start();
+ PERFETTO_LOG("Tracing started");
+ }
+
+ void OnStop(const perfetto::DataSourceBase::StopArgs &) override
+ {
+ PERFETTO_LOG("Tracing stopped");
+
+ // Undo any initialization done in OnStart.
+ u_trace_perfetto_stop();
+ // TODO we should perhaps block until queued traces are flushed?
+
+ static_cast<DataSourceType *>(this)->Trace([](auto ctx) {
+ auto packet = ctx.NewTracePacket();
+ packet->Finalize();
+ ctx.Flush();
+ });
+
+ ralloc_free(debug_markers);
+ }
+
+ /* Emits a clock sync trace event. Perfetto uses periodic clock events
+ * like this to sync up our GPU render stages with the CPU on the same
+ * timeline, since clocks always drift over time. Note that perfetto
+ * relies on gpu_ts being monotonic, and will perform badly if it goes
+ * backwards -- see tu_perfetto.cc for an example implemntation of handling
+ * going backwards.
+ */
+ static void EmitClockSync(TraceContext &ctx,
+ uint64_t cpu_ts,
+ uint64_t gpu_ts,
+ uint32_t gpu_clock_id)
+ {
+ auto packet = ctx.NewTracePacket();
+
+ packet->set_timestamp_clock_id(
+ perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
+ packet->set_timestamp(cpu_ts);
+
+ auto event = packet->set_clock_snapshot();
+
+ {
+ auto clock = event->add_clocks();
+
+ clock->set_clock_id(
+ perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
+ clock->set_timestamp(cpu_ts);
+ }
+
+ {
+ auto clock = event->add_clocks();
+
+ clock->set_clock_id(gpu_clock_id);
+ clock->set_timestamp(gpu_ts);
+ }
+ }
+
+ /* Returns a stage iid to use for a command stream or queue annotation.
+ *
+ * Using a new stage lets the annotation string show up right on the track
+ * event in the UI, rather than needing to click into the event to find the
+ * name in the metadata. Intended for use with
+ * vkCmdBeginDebugUtilsLabelEXT() and glPushDebugGroup().
+ *
+ * Note that SEQ_INCREMENTAL_STATE_CLEARED must have been set in the
+ * sequence before this is called.
+ */
+ uint64_t debug_marker_stage(TraceContext &ctx, const char *name)
+ {
+ struct hash_entry *entry = _mesa_hash_table_search(debug_markers, name);
+ const uint64_t dynamic_iid_base = 1ull << 32;
+
+ if (entry) {
+ return dynamic_iid_base + (uint32_t) (uintptr_t) entry->data;
+ } else {
+ uint64_t iid = dynamic_iid_base + debug_markers->entries;
+
+ auto packet = ctx.NewTracePacket();
+ auto interned_data = packet->set_interned_data();
+
+ auto desc = interned_data->add_gpu_specifications();
+ desc->set_iid(iid);
+ desc->set_name(name);
+
+ /* We only track the entry count in entry->data, because the
+ * dynamic_iid_base would get lost on 32-bit builds.
+ */
+ _mesa_hash_table_insert(debug_markers,
+ ralloc_strdup(debug_markers, name),
+ (void *) (uintptr_t) debug_markers->entries);
+
+ return iid;
+ }
+ }
+
+ private:
+ /* Hash table of application generated events (string -> iid) (use
+ * tctx.GetDataSourceLocked()->debug_marker_stage() to get a stage iid)
+ */
+ struct hash_table *debug_markers;
+};
+
+/* Begin the C API section. */
diff --git a/src/util/perf/u_trace.c b/src/util/perf/u_trace.c
index f07ae602142..6d9982cb59c 100644
--- a/src/util/perf/u_trace.c
+++ b/src/util/perf/u_trace.c
@@ -16,39 +16,54 @@
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
*/
+#include "u_trace.h"
+
#include <inttypes.h>
#include "util/list.h"
-#include "util/ralloc.h"
+#include "util/u_call_once.h"
#include "util/u_debug.h"
-#include "util/u_inlines.h"
-#include "util/u_fifo.h"
-
-#include "u_trace.h"
+#include "util/u_vector.h"
#define __NEEDS_TRACE_PRIV
#include "u_trace_priv.h"
+#define PAYLOAD_BUFFER_SIZE 0x100
#define TIMESTAMP_BUF_SIZE 0x1000
-#define TRACES_PER_CHUNK (TIMESTAMP_BUF_SIZE / sizeof(uint64_t))
+#define TRACES_PER_CHUNK (TIMESTAMP_BUF_SIZE / sizeof(uint64_t))
-#ifdef HAVE_PERFETTO
-int ut_perfetto_enabled;
+struct u_trace_state {
+ util_once_flag once;
+ FILE *trace_file;
+ enum u_trace_type enabled_traces;
+};
+static struct u_trace_state u_trace_state = { .once = UTIL_ONCE_FLAG_INIT };
+#ifdef HAVE_PERFETTO
/**
* Global list of contexts, so we can defer starting the queue until
* perfetto tracing is started.
- *
- * TODO locking
*/
-struct list_head ctx_list = { &ctx_list, &ctx_list };
+static struct list_head ctx_list = { &ctx_list, &ctx_list };
+
+static simple_mtx_t ctx_list_mutex = SIMPLE_MTX_INITIALIZER;
+/* The amount of Perfetto tracers connected */
+int _u_trace_perfetto_count;
#endif
+struct u_trace_payload_buf {
+ uint32_t refcount;
+
+ uint8_t *buf;
+ uint8_t *next;
+ uint8_t *end;
+};
+
struct u_trace_event {
const struct u_tracepoint *tp;
const void *payload;
@@ -76,17 +91,17 @@ struct u_trace_chunk {
*/
void *timestamps;
- /**
- * For trace payload, we sub-allocate from ralloc'd buffers which
- * hang off of the chunk's ralloc context, so they are automatically
- * free'd when the chunk is free'd
+ /* Array of u_trace_payload_buf referenced by traces[] elements.
*/
- uint8_t *payload_buf, *payload_end;
+ struct u_vector payloads;
+
+ /* Current payload buffer being written. */
+ struct u_trace_payload_buf *payload;
struct util_queue_fence fence;
- bool last; /* this chunk is last in batch */
- bool eof; /* this chunk is last in frame */
+ bool last; /* this chunk is last in batch */
+ bool eof; /* this chunk is last in frame */
void *flush_data; /* assigned by u_trace_flush */
@@ -97,6 +112,172 @@ struct u_trace_chunk {
bool free_flush_data;
};
+struct u_trace_printer {
+ void (*start)(struct u_trace_context *utctx);
+ void (*end)(struct u_trace_context *utctx);
+ void (*start_of_frame)(struct u_trace_context *utctx);
+ void (*end_of_frame)(struct u_trace_context *utctx);
+ void (*start_of_batch)(struct u_trace_context *utctx);
+ void (*end_of_batch)(struct u_trace_context *utctx);
+ void (*event)(struct u_trace_context *utctx,
+ struct u_trace_chunk *chunk,
+ const struct u_trace_event *evt,
+ uint64_t ns,
+ int32_t delta);
+};
+
+static void
+print_txt_start(struct u_trace_context *utctx)
+{
+}
+
+static void
+print_txt_end_of_frame(struct u_trace_context *utctx)
+{
+ fprintf(utctx->out, "END OF FRAME %u\n", utctx->frame_nr);
+}
+
+static void
+print_txt_start_of_batch(struct u_trace_context *utctx)
+{
+ fprintf(utctx->out, "+----- NS -----+ +-- Δ --+ +----- MSG -----\n");
+}
+
+static void
+print_txt_end_of_batch(struct u_trace_context *utctx)
+{
+ uint64_t elapsed = utctx->last_time_ns - utctx->first_time_ns;
+ fprintf(utctx->out, "ELAPSED: %" PRIu64 " ns\n", elapsed);
+}
+
+static void
+print_txt_event(struct u_trace_context *utctx,
+ struct u_trace_chunk *chunk,
+ const struct u_trace_event *evt,
+ uint64_t ns,
+ int32_t delta)
+{
+ if (evt->tp->print) {
+ fprintf(utctx->out, "%016" PRIu64 " %+9d: %s: ", ns, delta,
+ evt->tp->name);
+ evt->tp->print(utctx->out, evt->payload);
+ } else {
+ fprintf(utctx->out, "%016" PRIu64 " %+9d: %s\n", ns, delta,
+ evt->tp->name);
+ }
+}
+
+static struct u_trace_printer txt_printer = {
+ .start = &print_txt_start,
+ .end = &print_txt_start,
+ .start_of_frame = &print_txt_start,
+ .end_of_frame = &print_txt_end_of_frame,
+ .start_of_batch = &print_txt_start_of_batch,
+ .end_of_batch = &print_txt_end_of_batch,
+ .event = &print_txt_event,
+};
+
+static void
+print_json_start(struct u_trace_context *utctx)
+{
+ fprintf(utctx->out, "[\n");
+}
+
+static void
+print_json_end(struct u_trace_context *utctx)
+{
+ fprintf(utctx->out, "\n]");
+}
+
+static void
+print_json_start_of_frame(struct u_trace_context *utctx)
+{
+ if (utctx->frame_nr != 0)
+ fprintf(utctx->out, ",\n");
+ fprintf(utctx->out, "{\n\"frame\": %u,\n", utctx->frame_nr);
+ fprintf(utctx->out, "\"batches\": [\n");
+}
+
+static void
+print_json_end_of_frame(struct u_trace_context *utctx)
+{
+ fprintf(utctx->out, "]\n}\n");
+ fflush(utctx->out);
+}
+
+static void
+print_json_start_of_batch(struct u_trace_context *utctx)
+{
+ if (utctx->batch_nr != 0)
+ fprintf(utctx->out, ",\n");
+ fprintf(utctx->out, "{\n\"events\": [\n");
+}
+
+static void
+print_json_end_of_batch(struct u_trace_context *utctx)
+{
+ uint64_t elapsed = utctx->last_time_ns - utctx->first_time_ns;
+ fprintf(utctx->out, "],\n");
+ fprintf(utctx->out, "\"duration_ns\": %" PRIu64 "\n", elapsed);
+ fprintf(utctx->out, "}\n");
+}
+
+static void
+print_json_event(struct u_trace_context *utctx,
+ struct u_trace_chunk *chunk,
+ const struct u_trace_event *evt,
+ uint64_t ns,
+ int32_t delta)
+{
+ if (utctx->event_nr != 0)
+ fprintf(utctx->out, ",\n");
+ fprintf(utctx->out, "{\n\"event\": \"%s\",\n", evt->tp->name);
+ fprintf(utctx->out, "\"time_ns\": \"%016" PRIu64 "\",\n", ns);
+ fprintf(utctx->out, "\"params\": {");
+ if (evt->tp->print)
+ evt->tp->print_json(utctx->out, evt->payload);
+ fprintf(utctx->out, "}\n}\n");
+}
+
+static struct u_trace_printer json_printer = {
+ .start = print_json_start,
+ .end = print_json_end,
+ .start_of_frame = &print_json_start_of_frame,
+ .end_of_frame = &print_json_end_of_frame,
+ .start_of_batch = &print_json_start_of_batch,
+ .end_of_batch = &print_json_end_of_batch,
+ .event = &print_json_event,
+};
+
+static struct u_trace_payload_buf *
+u_trace_payload_buf_create(void)
+{
+ struct u_trace_payload_buf *payload =
+ malloc(sizeof(*payload) + PAYLOAD_BUFFER_SIZE);
+
+ p_atomic_set(&payload->refcount, 1);
+
+ payload->buf = (uint8_t *) (payload + 1);
+ payload->end = payload->buf + PAYLOAD_BUFFER_SIZE;
+ payload->next = payload->buf;
+
+ return payload;
+}
+
+static struct u_trace_payload_buf *
+u_trace_payload_buf_ref(struct u_trace_payload_buf *payload)
+{
+ p_atomic_inc(&payload->refcount);
+ return payload;
+}
+
+static void
+u_trace_payload_buf_unref(struct u_trace_payload_buf *payload)
+{
+ if (p_atomic_dec_zero(&payload->refcount))
+ free(payload);
+}
+
static void
free_chunk(void *ptr)
{
@@ -104,68 +285,133 @@ free_chunk(void *ptr)
chunk->utctx->delete_timestamp_buffer(chunk->utctx, chunk->timestamps);
+ /* Unref payloads attached to this chunk. */
+ struct u_trace_payload_buf **payload;
+ u_vector_foreach (payload, &chunk->payloads)
+ u_trace_payload_buf_unref(*payload);
+ u_vector_finish(&chunk->payloads);
+
list_del(&chunk->node);
+ free(chunk);
}
static void
free_chunks(struct list_head *chunks)
{
while (!list_is_empty(chunks)) {
- struct u_trace_chunk *chunk = list_first_entry(chunks,
- struct u_trace_chunk, node);
- ralloc_free(chunk);
+ struct u_trace_chunk *chunk =
+ list_first_entry(chunks, struct u_trace_chunk, node);
+ free_chunk(chunk);
}
}
static struct u_trace_chunk *
-get_chunk(struct u_trace *ut)
+get_chunk(struct u_trace *ut, size_t payload_size)
{
struct u_trace_chunk *chunk;
+ assert(payload_size <= PAYLOAD_BUFFER_SIZE);
+
/* do we currently have a non-full chunk to append msgs to? */
if (!list_is_empty(&ut->trace_chunks)) {
- chunk = list_last_entry(&ut->trace_chunks,
- struct u_trace_chunk, node);
- if (chunk->num_traces < TRACES_PER_CHUNK)
- return chunk;
- /* we need to expand to add another chunk to the batch, so
- * the current one is no longer the last one of the batch:
- */
- chunk->last = false;
+ chunk = list_last_entry(&ut->trace_chunks, struct u_trace_chunk, node);
+ /* Can we store a new trace in the chunk? */
+ if (chunk->num_traces < TRACES_PER_CHUNK) {
+ /* If no payload required, nothing else to check. */
+ if (payload_size <= 0)
+ return chunk;
+
+ /* If the payload buffer has space for the payload, we're good.
+ */
+ if (chunk->payload &&
+ (chunk->payload->end - chunk->payload->next) >= payload_size)
+ return chunk;
+
+ /* If we don't have enough space in the payload buffer, can we
+ * allocate a new one?
+ */
+ struct u_trace_payload_buf **buf = u_vector_add(&chunk->payloads);
+ *buf = u_trace_payload_buf_create();
+ chunk->payload = *buf;
+ return chunk;
+ }
+ /* we need to expand to add another chunk to the batch, so
+ * the current one is no longer the last one of the batch:
+ */
+ chunk->last = false;
}
/* .. if not, then create a new one: */
- chunk = rzalloc_size(NULL, sizeof(*chunk));
- ralloc_set_destructor(chunk, free_chunk);
+ chunk = calloc(1, sizeof(*chunk));
chunk->utctx = ut->utctx;
- chunk->timestamps = ut->utctx->create_timestamp_buffer(ut->utctx, TIMESTAMP_BUF_SIZE);
+ chunk->timestamps =
+ ut->utctx->create_timestamp_buffer(ut->utctx, TIMESTAMP_BUF_SIZE);
chunk->last = true;
+ u_vector_init(&chunk->payloads, 4, sizeof(struct u_trace_payload_buf *));
+ if (payload_size > 0) {
+ struct u_trace_payload_buf **buf = u_vector_add(&chunk->payloads);
+ *buf = u_trace_payload_buf_create();
+ chunk->payload = *buf;
+ }
list_addtail(&chunk->node, &ut->trace_chunks);
return chunk;
}
-DEBUG_GET_ONCE_BOOL_OPTION(trace, "GPU_TRACE", false)
-DEBUG_GET_ONCE_FILE_OPTION(trace_file, "GPU_TRACEFILE", NULL, "w")
+static const struct debug_named_value config_control[] = {
+ { "print", U_TRACE_TYPE_PRINT, "Enable print" },
+ { "print_json", U_TRACE_TYPE_PRINT_JSON, "Enable print in JSON" },
+#ifdef HAVE_PERFETTO
+ { "perfetto", U_TRACE_TYPE_PERFETTO_ENV, "Enable perfetto" },
+#endif
+ { "markers", U_TRACE_TYPE_MARKERS, "Enable marker trace" },
+ DEBUG_NAMED_VALUE_END
+};
-static FILE *
-get_tracefile(void)
+DEBUG_GET_ONCE_OPTION(trace_file, "MESA_GPU_TRACEFILE", NULL)
+
+static void
+trace_file_fini(void)
{
- static FILE *tracefile = NULL;
- static bool firsttime = true;
+ fclose(u_trace_state.trace_file);
+ u_trace_state.trace_file = NULL;
+}
- if (firsttime) {
- tracefile = debug_get_option_trace_file();
- if (!tracefile && debug_get_option_trace()) {
- tracefile = stdout;
+static void
+u_trace_state_init_once(void)
+{
+ u_trace_state.enabled_traces =
+ debug_get_flags_option("MESA_GPU_TRACES", config_control, 0);
+ const char *tracefile_name = debug_get_option_trace_file();
+ if (tracefile_name && __normal_user()) {
+ u_trace_state.trace_file = fopen(tracefile_name, "w");
+ if (u_trace_state.trace_file != NULL) {
+ atexit(trace_file_fini);
}
-
- firsttime = false;
}
+ if (!u_trace_state.trace_file) {
+ u_trace_state.trace_file = stdout;
+ }
+}
+
+void
+u_trace_state_init(void)
+{
+ util_call_once(&u_trace_state.once, u_trace_state_init_once);
+}
+
+bool
+u_trace_is_enabled(enum u_trace_type type)
+{
+ /* Active is only tracked in a given u_trace context, so if you're asking
+ * us if U_TRACE_TYPE_PERFETTO (_ENV | _ACTIVE) is enabled, then just check
+ * _ENV ("perfetto tracing is desired, but perfetto might not be running").
+ */
+ type &= ~U_TRACE_TYPE_PERFETTO_ACTIVE;
- return tracefile;
+ return (u_trace_state.enabled_traces & type) == type;
}
static void
@@ -174,9 +420,10 @@ queue_init(struct u_trace_context *utctx)
if (utctx->queue.jobs)
return;
- bool ret = util_queue_init(&utctx->queue, "traceq", 256, 1,
- UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY |
- UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL);
+ bool ret = util_queue_init(
+ &utctx->queue, "traceq", 256, 1,
+ UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY | UTIL_QUEUE_INIT_RESIZE_IF_FULL,
+ NULL);
assert(ret);
if (!ret)
@@ -185,13 +432,16 @@ queue_init(struct u_trace_context *utctx)
void
u_trace_context_init(struct u_trace_context *utctx,
- void *pctx,
- u_trace_create_ts_buffer create_timestamp_buffer,
- u_trace_delete_ts_buffer delete_timestamp_buffer,
- u_trace_record_ts record_timestamp,
- u_trace_read_ts read_timestamp,
- u_trace_delete_flush_data delete_flush_data)
+ void *pctx,
+ u_trace_create_ts_buffer create_timestamp_buffer,
+ u_trace_delete_ts_buffer delete_timestamp_buffer,
+ u_trace_record_ts record_timestamp,
+ u_trace_read_ts read_timestamp,
+ u_trace_delete_flush_data delete_flush_data)
{
+ u_trace_state_init();
+
+ utctx->enabled_traces = u_trace_state.enabled_traces;
utctx->pctx = pctx;
utctx->create_timestamp_buffer = create_timestamp_buffer;
utctx->delete_timestamp_buffer = delete_timestamp_buffer;
@@ -202,32 +452,65 @@ u_trace_context_init(struct u_trace_context *utctx,
utctx->last_time_ns = 0;
utctx->first_time_ns = 0;
utctx->frame_nr = 0;
+ utctx->batch_nr = 0;
+ utctx->event_nr = 0;
+ utctx->start_of_frame = true;
list_inithead(&utctx->flushed_trace_chunks);
- utctx->out = get_tracefile();
+ if (utctx->enabled_traces & U_TRACE_TYPE_PRINT) {
+ utctx->out = u_trace_state.trace_file;
+
+ if (utctx->enabled_traces & U_TRACE_TYPE_JSON) {
+ utctx->out_printer = &json_printer;
+ } else {
+ utctx->out_printer = &txt_printer;
+ }
+ } else {
+ utctx->out = NULL;
+ utctx->out_printer = NULL;
+ }
#ifdef HAVE_PERFETTO
+ simple_mtx_lock(&ctx_list_mutex);
list_add(&utctx->node, &ctx_list);
+ if (_u_trace_perfetto_count > 0)
+ utctx->enabled_traces |= U_TRACE_TYPE_PERFETTO_ACTIVE;
+
+ queue_init(utctx);
+
+ simple_mtx_unlock(&ctx_list_mutex);
+#else
+ queue_init(utctx);
#endif
- if (!u_trace_context_tracing(utctx))
+ if (!(p_atomic_read_relaxed(&utctx->enabled_traces) &
+ U_TRACE_TYPE_REQUIRE_QUEUING))
return;
- queue_init(utctx);
+ if (utctx->out) {
+ utctx->out_printer->start(utctx);
+ }
}
void
u_trace_context_fini(struct u_trace_context *utctx)
{
#ifdef HAVE_PERFETTO
+ simple_mtx_lock(&ctx_list_mutex);
list_del(&utctx->node);
+ simple_mtx_unlock(&ctx_list_mutex);
#endif
+
+ if (utctx->out) {
+ utctx->out_printer->end(utctx);
+ fflush(utctx->out);
+ }
+
if (!utctx->queue.jobs)
return;
util_queue_finish(&utctx->queue);
util_queue_destroy(&utctx->queue);
- fflush(utctx->out);
free_chunks(&utctx->flushed_trace_chunks);
}
@@ -235,16 +518,34 @@ u_trace_context_fini(struct u_trace_context *utctx)
void
u_trace_perfetto_start(void)
{
- list_for_each_entry (struct u_trace_context, utctx, &ctx_list, node)
+ simple_mtx_lock(&ctx_list_mutex);
+
+ list_for_each_entry (struct u_trace_context, utctx, &ctx_list, node) {
queue_init(utctx);
- ut_perfetto_enabled++;
+ p_atomic_set(&utctx->enabled_traces,
+ utctx->enabled_traces | U_TRACE_TYPE_PERFETTO_ACTIVE);
+ }
+
+ _u_trace_perfetto_count++;
+
+ simple_mtx_unlock(&ctx_list_mutex);
}
void
u_trace_perfetto_stop(void)
{
- assert(ut_perfetto_enabled > 0);
- ut_perfetto_enabled--;
+ simple_mtx_lock(&ctx_list_mutex);
+
+ assert(_u_trace_perfetto_count > 0);
+ _u_trace_perfetto_count--;
+ if (_u_trace_perfetto_count == 0) {
+ list_for_each_entry (struct u_trace_context, utctx, &ctx_list, node) {
+ p_atomic_set(&utctx->enabled_traces,
+ utctx->enabled_traces & ~U_TRACE_TYPE_PERFETTO_ACTIVE);
+ }
+ }
+
+ simple_mtx_unlock(&ctx_list_mutex);
}
#endif
@@ -254,9 +555,20 @@ process_chunk(void *job, void *gdata, int thread_index)
struct u_trace_chunk *chunk = job;
struct u_trace_context *utctx = chunk->utctx;
+ if (utctx->start_of_frame) {
+ utctx->start_of_frame = false;
+ utctx->batch_nr = 0;
+ if (utctx->out) {
+ utctx->out_printer->start_of_frame(utctx);
+ }
+ }
+
/* For first chunk of batch, accumulated times will be zerod: */
- if (utctx->out && !utctx->last_time_ns) {
- fprintf(utctx->out, "+----- NS -----+ +-- Δ --+ +----- MSG -----\n");
+ if (!utctx->last_time_ns) {
+ utctx->event_nr = 0;
+ if (utctx->out) {
+ utctx->out_printer->start_of_batch(utctx);
+ }
}
for (unsigned idx = 0; idx < chunk->num_traces; idx++) {
@@ -265,7 +577,8 @@ process_chunk(void *job, void *gdata, int thread_index)
if (!evt->tp)
continue;
- uint64_t ns = utctx->read_timestamp(utctx, chunk->timestamps, idx, chunk->flush_data);
+ uint64_t ns = utctx->read_timestamp(utctx, chunk->timestamps, idx,
+ chunk->flush_data);
int32_t delta;
if (!utctx->first_time_ns)
@@ -283,43 +596,46 @@ process_chunk(void *job, void *gdata, int thread_index)
}
if (utctx->out) {
- if (evt->tp->print) {
- fprintf(utctx->out, "%016"PRIu64" %+9d: %s: ", ns, delta, evt->tp->name);
- evt->tp->print(utctx->out, evt->payload);
- } else {
- fprintf(utctx->out, "%016"PRIu64" %+9d: %s\n", ns, delta, evt->tp->name);
- }
+ utctx->out_printer->event(utctx, chunk, evt, ns, delta);
}
#ifdef HAVE_PERFETTO
- if (evt->tp->perfetto) {
- evt->tp->perfetto(utctx->pctx, ns, chunk->flush_data, evt->payload);
+ if (evt->tp->perfetto &&
+ (p_atomic_read_relaxed(&utctx->enabled_traces) &
+ U_TRACE_TYPE_PERFETTO_ACTIVE)) {
+ evt->tp->perfetto(utctx->pctx, ns, evt->tp->tp_idx, chunk->flush_data, evt->payload);
}
#endif
+
+ utctx->event_nr++;
}
if (chunk->last) {
if (utctx->out) {
- uint64_t elapsed = utctx->last_time_ns - utctx->first_time_ns;
- fprintf(utctx->out, "ELAPSED: %"PRIu64" ns\n", elapsed);
+ utctx->out_printer->end_of_batch(utctx);
}
+ utctx->batch_nr++;
utctx->last_time_ns = 0;
utctx->first_time_ns = 0;
}
- if (chunk->free_flush_data && utctx->delete_flush_data) {
- utctx->delete_flush_data(utctx, chunk->flush_data);
+ if (chunk->eof) {
+ if (utctx->out) {
+ utctx->out_printer->end_of_frame(utctx);
+ }
+ utctx->frame_nr++;
+ utctx->start_of_frame = true;
}
- if (utctx->out && chunk->eof) {
- fprintf(utctx->out, "END OF FRAME %u\n", utctx->frame_nr++);
+ if (chunk->free_flush_data && utctx->delete_flush_data) {
+ utctx->delete_flush_data(utctx, chunk->flush_data);
}
}
static void
cleanup_chunk(void *job, void *gdata, int thread_index)
{
- ralloc_free(job);
+ free_chunk(job);
}
void
@@ -330,32 +646,30 @@ u_trace_context_process(struct u_trace_context *utctx, bool eof)
if (list_is_empty(chunks))
return;
- struct u_trace_chunk *last_chunk = list_last_entry(chunks,
- struct u_trace_chunk, node);
+ struct u_trace_chunk *last_chunk =
+ list_last_entry(chunks, struct u_trace_chunk, node);
last_chunk->eof = eof;
while (!list_is_empty(chunks)) {
- struct u_trace_chunk *chunk = list_first_entry(chunks,
- struct u_trace_chunk, node);
+ struct u_trace_chunk *chunk =
+ list_first_entry(chunks, struct u_trace_chunk, node);
/* remove from list before enqueuing, because chunk is freed
* once it is processed by the queue:
*/
list_delinit(&chunk->node);
- util_queue_add_job(&utctx->queue, chunk, &chunk->fence,
- process_chunk, cleanup_chunk,
- TIMESTAMP_BUF_SIZE);
+ util_queue_add_job(&utctx->queue, chunk, &chunk->fence, process_chunk,
+ cleanup_chunk, TIMESTAMP_BUF_SIZE);
}
}
-
void
u_trace_init(struct u_trace *ut, struct u_trace_context *utctx)
{
ut->utctx = utctx;
+ ut->num_traces = 0;
list_inithead(&ut->trace_chunks);
- ut->enabled = u_trace_context_tracing(utctx);
}
void
@@ -365,6 +679,7 @@ u_trace_fini(struct u_trace *ut)
* have been flushed to the trace-context.
*/
free_chunks(&ut->trace_chunks);
+ ut->num_traces = 0;
}
bool
@@ -376,34 +691,48 @@ u_trace_has_points(struct u_trace *ut)
struct u_trace_iterator
u_trace_begin_iterator(struct u_trace *ut)
{
- if (!ut->enabled)
- return (struct u_trace_iterator) {NULL, NULL, 0};
+ if (list_is_empty(&ut->trace_chunks))
+ return (struct u_trace_iterator) { ut, NULL, 0 };
struct u_trace_chunk *first_chunk =
list_first_entry(&ut->trace_chunks, struct u_trace_chunk, node);
- return (struct u_trace_iterator) { ut, first_chunk, 0};
+ return (struct u_trace_iterator) { ut, first_chunk, 0 };
}
struct u_trace_iterator
u_trace_end_iterator(struct u_trace *ut)
{
- if (!ut->enabled)
- return (struct u_trace_iterator) {NULL, NULL, 0};
+ if (list_is_empty(&ut->trace_chunks))
+ return (struct u_trace_iterator) { ut, NULL, 0 };
struct u_trace_chunk *last_chunk =
list_last_entry(&ut->trace_chunks, struct u_trace_chunk, node);
- return (struct u_trace_iterator) { ut, last_chunk, last_chunk->num_traces};
+ return (struct u_trace_iterator) { ut, last_chunk,
+ last_chunk->num_traces };
+}
+
+/* If an iterator was created when there were no chunks and there are now
+ * chunks, "sanitize" it to include the first chunk.
+ */
+static struct u_trace_iterator
+sanitize_iterator(struct u_trace_iterator iter)
+{
+ if (iter.ut && !iter.chunk && !list_is_empty(&iter.ut->trace_chunks)) {
+ iter.chunk =
+ list_first_entry(&iter.ut->trace_chunks, struct u_trace_chunk, node);
+ }
+
+ return iter;
}
bool
-u_trace_iterator_equal(struct u_trace_iterator a,
- struct u_trace_iterator b)
+u_trace_iterator_equal(struct u_trace_iterator a, struct u_trace_iterator b)
{
- return a.ut == b.ut &&
- a.chunk == b.chunk &&
- a.event_idx == b.event_idx;
+ a = sanitize_iterator(a);
+ b = sanitize_iterator(b);
+ return a.ut == b.ut && a.chunk == b.chunk && a.event_idx == b.event_idx;
}
void
@@ -413,26 +742,40 @@ u_trace_clone_append(struct u_trace_iterator begin_it,
void *cmdstream,
u_trace_copy_ts_buffer copy_ts_buffer)
{
+ begin_it = sanitize_iterator(begin_it);
+ end_it = sanitize_iterator(end_it);
+
struct u_trace_chunk *from_chunk = begin_it.chunk;
uint32_t from_idx = begin_it.event_idx;
while (from_chunk != end_it.chunk || from_idx != end_it.event_idx) {
- struct u_trace_chunk *to_chunk = get_chunk(into);
+ struct u_trace_chunk *to_chunk = get_chunk(into, 0 /* payload_size */);
unsigned to_copy = MIN2(TRACES_PER_CHUNK - to_chunk->num_traces,
from_chunk->num_traces - from_idx);
if (from_chunk == end_it.chunk)
to_copy = MIN2(to_copy, end_it.event_idx - from_idx);
- copy_ts_buffer(begin_it.ut->utctx, cmdstream,
- from_chunk->timestamps, from_idx,
- to_chunk->timestamps, to_chunk->num_traces,
+ copy_ts_buffer(begin_it.ut->utctx, cmdstream, from_chunk->timestamps,
+ from_idx, to_chunk->timestamps, to_chunk->num_traces,
to_copy);
memcpy(&to_chunk->traces[to_chunk->num_traces],
&from_chunk->traces[from_idx],
to_copy * sizeof(struct u_trace_event));
+ /* Take a refcount on payloads from from_chunk if needed. */
+ if (begin_it.ut != into) {
+ struct u_trace_payload_buf **in_payload;
+ u_vector_foreach (in_payload, &from_chunk->payloads) {
+ struct u_trace_payload_buf **out_payload =
+ u_vector_add(&to_chunk->payloads);
+
+ *out_payload = u_trace_payload_buf_ref(*in_payload);
+ }
+ }
+
+ into->num_traces += to_copy;
to_chunk->num_traces += to_copy;
from_idx += to_copy;
@@ -442,7 +785,8 @@ u_trace_clone_append(struct u_trace_iterator begin_it,
break;
from_idx = 0;
- from_chunk = LIST_ENTRY(struct u_trace_chunk, from_chunk->node.next, node);
+ from_chunk =
+ list_entry(from_chunk->node.next, struct u_trace_chunk, node);
}
}
}
@@ -451,14 +795,19 @@ void
u_trace_disable_event_range(struct u_trace_iterator begin_it,
struct u_trace_iterator end_it)
{
+ begin_it = sanitize_iterator(begin_it);
+ end_it = sanitize_iterator(end_it);
+
struct u_trace_chunk *current_chunk = begin_it.chunk;
uint32_t start_idx = begin_it.event_idx;
- while(current_chunk != end_it.chunk) {
+ while (current_chunk != end_it.chunk) {
memset(&current_chunk->traces[start_idx], 0,
- (current_chunk->num_traces - start_idx) * sizeof(struct u_trace_event));
+ (current_chunk->num_traces - start_idx) *
+ sizeof(struct u_trace_event));
start_idx = 0;
- current_chunk = LIST_ENTRY(struct u_trace_chunk, current_chunk->node.next, node);
+ current_chunk =
+ list_entry(current_chunk->node.next, struct u_trace_chunk, node);
}
memset(&current_chunk->traces[start_idx], 0,
@@ -471,34 +820,33 @@ u_trace_disable_event_range(struct u_trace_iterator begin_it,
* functions.
*/
void *
-u_trace_append(struct u_trace *ut, void *cs, const struct u_tracepoint *tp)
+u_trace_appendv(struct u_trace *ut,
+ void *cs,
+ const struct u_tracepoint *tp,
+ unsigned variable_sz)
{
- struct u_trace_chunk *chunk = get_chunk(ut);
-
assert(tp->payload_sz == ALIGN_NPOT(tp->payload_sz, 8));
- if (unlikely((chunk->payload_buf + tp->payload_sz) > chunk->payload_end)) {
- const unsigned payload_chunk_sz = 0x100; /* TODO arbitrary size? */
-
- assert(tp->payload_sz < payload_chunk_sz);
-
- chunk->payload_buf = ralloc_size(chunk, payload_chunk_sz);
- chunk->payload_end = chunk->payload_buf + payload_chunk_sz;
- }
+ unsigned payload_sz = ALIGN_NPOT(tp->payload_sz + variable_sz, 8);
+ struct u_trace_chunk *chunk = get_chunk(ut, payload_sz);
+ unsigned tp_idx = chunk->num_traces++;
/* sub-allocate storage for trace payload: */
- void *payload = chunk->payload_buf;
- chunk->payload_buf += tp->payload_sz;
+ void *payload = NULL;
+ if (payload_sz > 0) {
+ payload = chunk->payload->next;
+ chunk->payload->next += payload_sz;
+ }
/* record a timestamp for the trace: */
- ut->utctx->record_timestamp(ut, cs, chunk->timestamps, chunk->num_traces);
+ ut->utctx->record_timestamp(ut, cs, chunk->timestamps, tp_idx,
+ tp->end_of_pipe);
- chunk->traces[chunk->num_traces] = (struct u_trace_event) {
- .tp = tp,
- .payload = payload,
+ chunk->traces[tp_idx] = (struct u_trace_event) {
+ .tp = tp,
+ .payload = payload,
};
-
- chunk->num_traces++;
+ ut->num_traces++;
return payload;
}
@@ -506,7 +854,8 @@ u_trace_append(struct u_trace *ut, void *cs, const struct u_tracepoint *tp)
void
u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data)
{
- list_for_each_entry(struct u_trace_chunk, chunk, &ut->trace_chunks, node) {
+ list_for_each_entry (struct u_trace_chunk, chunk, &ut->trace_chunks,
+ node) {
chunk->flush_data = flush_data;
chunk->free_flush_data = false;
}
@@ -520,4 +869,5 @@ u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data)
/* transfer batch's log chunks to context: */
list_splicetail(&ut->trace_chunks, &ut->utctx->flushed_trace_chunks);
list_inithead(&ut->trace_chunks);
+ ut->num_traces = 0;
}
diff --git a/src/util/perf/u_trace.h b/src/util/perf/u_trace.h
index c184a14e94d..b61b7cfb800 100644
--- a/src/util/perf/u_trace.h
+++ b/src/util/perf/u_trace.h
@@ -16,9 +16,9 @@
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
*/
#ifndef _U_TRACE_H
@@ -28,9 +28,11 @@
#include <stdint.h>
#include <stdio.h>
+#include "util/macros.h"
+#include "util/u_atomic.h"
#include "util/u_queue.h"
-#ifdef __cplusplus
+#ifdef __cplusplus
extern "C" {
#endif
@@ -69,25 +71,26 @@ extern "C" {
struct u_trace_context;
struct u_trace;
struct u_trace_chunk;
+struct u_trace_printer;
/**
* Special reserved value to indicate that no timestamp was captured,
* and that the timestamp of the previous trace should be reused.
*/
-#define U_TRACE_NO_TIMESTAMP ((uint64_t)0)
+#define U_TRACE_NO_TIMESTAMP ((uint64_t) 0)
/**
* Driver provided callback to create a timestamp buffer which will be
* read by u_trace_read_ts function.
*/
-typedef void* (*u_trace_create_ts_buffer)(struct u_trace_context *utctx,
- uint32_t timestamps_count);
+typedef void *(*u_trace_create_ts_buffer)(struct u_trace_context *utctx,
+ uint32_t timestamps_count);
/**
* Driver provided callback to delete a timestamp buffer.
*/
typedef void (*u_trace_delete_ts_buffer)(struct u_trace_context *utctx,
- void *timestamps);
+ void *timestamps);
/**
* Driver provided callback to emit commands into the soecified command
@@ -98,8 +101,11 @@ typedef void (*u_trace_delete_ts_buffer)(struct u_trace_context *utctx,
* a fixed rate, even as the GPU freq changes. The same source used for
* GL_TIMESTAMP queries should be appropriate.
*/
-typedef void (*u_trace_record_ts)(struct u_trace *ut, void *cs,
- void *timestamps, unsigned idx);
+typedef void (*u_trace_record_ts)(struct u_trace *ut,
+ void *cs,
+ void *timestamps,
+ unsigned idx,
+ bool end_of_pipe);
/**
* Driver provided callback to read back a previously recorded timestamp.
@@ -120,28 +126,56 @@ typedef void (*u_trace_record_ts)(struct u_trace *ut, void *cs,
* capturing the same timestamp multiple times in a row.
*/
typedef uint64_t (*u_trace_read_ts)(struct u_trace_context *utctx,
- void *timestamps, unsigned idx, void *flush_data);
+ void *timestamps,
+ unsigned idx,
+ void *flush_data);
/**
* Driver provided callback to delete flush data.
*/
typedef void (*u_trace_delete_flush_data)(struct u_trace_context *utctx,
- void *flush_data);
+ void *flush_data);
+
+enum u_trace_type {
+ U_TRACE_TYPE_PRINT = 1u << 0,
+ U_TRACE_TYPE_JSON = 1u << 1,
+ U_TRACE_TYPE_PERFETTO_ACTIVE = 1u << 2,
+ U_TRACE_TYPE_PERFETTO_ENV = 1u << 3,
+ U_TRACE_TYPE_MARKERS = 1u << 4,
+
+ U_TRACE_TYPE_PRINT_JSON = U_TRACE_TYPE_PRINT | U_TRACE_TYPE_JSON,
+ U_TRACE_TYPE_PERFETTO =
+ U_TRACE_TYPE_PERFETTO_ACTIVE | U_TRACE_TYPE_PERFETTO_ENV,
+
+ /*
+ * A mask of traces that require appending to the tracepoint chunk list.
+ */
+ U_TRACE_TYPE_REQUIRE_QUEUING = U_TRACE_TYPE_PRINT | U_TRACE_TYPE_PERFETTO,
+ /*
+ * A mask of traces that require processing the tracepoint chunk list.
+ */
+ U_TRACE_TYPE_REQUIRE_PROCESSING =
+ U_TRACE_TYPE_PRINT | U_TRACE_TYPE_PERFETTO_ACTIVE,
+};
/**
* The trace context provides tracking for "in-flight" traces, once the
* cmdstream that records timestamps has been flushed.
*/
struct u_trace_context {
+ /* All traces enabled in this context */
+ enum u_trace_type enabled_traces;
+
void *pctx;
- u_trace_create_ts_buffer create_timestamp_buffer;
- u_trace_delete_ts_buffer delete_timestamp_buffer;
- u_trace_record_ts record_timestamp;
- u_trace_read_ts read_timestamp;
+ u_trace_create_ts_buffer create_timestamp_buffer;
+ u_trace_delete_ts_buffer delete_timestamp_buffer;
+ u_trace_record_ts record_timestamp;
+ u_trace_read_ts read_timestamp;
u_trace_delete_flush_data delete_flush_data;
FILE *out;
+ struct u_trace_printer *out_printer;
/* Once u_trace_flush() is called u_trace_chunk's are queued up to
* render tracepoints on a queue. The per-chunk queue jobs block until
@@ -161,6 +195,9 @@ struct u_trace_context {
uint64_t first_time_ns;
uint32_t frame_nr;
+ uint32_t batch_nr;
+ uint32_t event_nr;
+ bool start_of_frame;
/* list of unprocessed trace chunks in fifo order: */
struct list_head flushed_trace_chunks;
@@ -180,23 +217,24 @@ struct u_trace_context {
struct u_trace {
struct u_trace_context *utctx;
- struct list_head trace_chunks; /* list of unflushed trace chunks in fifo order */
+ uint32_t num_traces;
- bool enabled;
+ struct list_head
+ trace_chunks; /* list of unflushed trace chunks in fifo order */
};
void u_trace_context_init(struct u_trace_context *utctx,
- void *pctx,
- u_trace_create_ts_buffer create_timestamp_buffer,
- u_trace_delete_ts_buffer delete_timestamp_buffer,
- u_trace_record_ts record_timestamp,
- u_trace_read_ts read_timestamp,
- u_trace_delete_flush_data delete_flush_data);
+ void *pctx,
+ u_trace_create_ts_buffer create_timestamp_buffer,
+ u_trace_delete_ts_buffer delete_timestamp_buffer,
+ u_trace_record_ts record_timestamp,
+ u_trace_read_ts read_timestamp,
+ u_trace_delete_flush_data delete_flush_data);
void u_trace_context_fini(struct u_trace_context *utctx);
/**
- * Flush (trigger processing) of traces previously flushed to the trace-context
- * by u_trace_flush().
+ * Flush (trigger processing) of traces previously flushed to the
+ * trace-context by u_trace_flush().
*
* This should typically be called in the driver's pctx->flush().
*/
@@ -205,39 +243,37 @@ void u_trace_context_process(struct u_trace_context *utctx, bool eof);
void u_trace_init(struct u_trace *ut, struct u_trace_context *utctx);
void u_trace_fini(struct u_trace *ut);
+void u_trace_state_init(void);
+bool u_trace_is_enabled(enum u_trace_type type);
+
bool u_trace_has_points(struct u_trace *ut);
-struct u_trace_iterator
-{
+struct u_trace_iterator {
struct u_trace *ut;
struct u_trace_chunk *chunk;
uint32_t event_idx;
};
-struct u_trace_iterator
-u_trace_begin_iterator(struct u_trace *ut);
+struct u_trace_iterator u_trace_begin_iterator(struct u_trace *ut);
-struct u_trace_iterator
-u_trace_end_iterator(struct u_trace *ut);
+struct u_trace_iterator u_trace_end_iterator(struct u_trace *ut);
-bool
-u_trace_iterator_equal(struct u_trace_iterator a,
- struct u_trace_iterator b);
+bool u_trace_iterator_equal(struct u_trace_iterator a,
+ struct u_trace_iterator b);
typedef void (*u_trace_copy_ts_buffer)(struct u_trace_context *utctx,
- void *cmdstream,
- void *ts_from, uint32_t from_offset,
- void *ts_to, uint32_t to_offset,
- uint32_t count);
+ void *cmdstream,
+ void *ts_from,
+ uint32_t from_offset,
+ void *ts_to,
+ uint32_t to_offset,
+ uint32_t count);
/**
* Clones tracepoints range into target u_trace.
* Provides callback for driver to copy timestamps on GPU from
* one buffer to another.
*
- * The payload is shared and remains owned by the original u_trace
- * if tracepoints are being copied between different u_trace!
- *
* It allows:
* - Tracing re-usable command buffer in Vulkan, by copying tracepoints
* each time it is submitted.
@@ -255,37 +291,71 @@ void u_trace_disable_event_range(struct u_trace_iterator begin_it,
/**
* Flush traces to the parent trace-context. At this point, the expectation
- * is that all the tracepoints are "executed" by the GPU following any previously
- * flushed u_trace batch.
+ * is that all the tracepoints are "executed" by the GPU following any
+ * previously flushed u_trace batch.
*
- * flush_data is a way for driver to pass additional data, which becomes available
- * only at the point of flush, to the u_trace_read_ts callback and perfetto.
- * The typical example of such data would be a fence to wait on in u_trace_read_ts,
- * and a submission_id to pass into perfetto.
- * The destruction of the data is done via u_trace_delete_flush_data.
+ * flush_data is a way for driver to pass additional data, which becomes
+ * available only at the point of flush, to the u_trace_read_ts callback and
+ * perfetto. The typical example of such data would be a fence to wait on in
+ * u_trace_read_ts, and a submission_id to pass into perfetto. The destruction
+ * of the data is done via u_trace_delete_flush_data.
*
- * This should typically be called when the corresponding cmdstream (containing
- * the timestamp reads) is flushed to the kernel.
+ * This should typically be called when the corresponding cmdstream
+ * (containing the timestamp reads) is flushed to the kernel.
*/
void u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data);
#ifdef HAVE_PERFETTO
-extern int ut_perfetto_enabled;
+static ALWAYS_INLINE bool
+u_trace_perfetto_active(struct u_trace_context *utctx)
+{
+ return p_atomic_read_relaxed(&utctx->enabled_traces) &
+ U_TRACE_TYPE_PERFETTO_ACTIVE;
+}
void u_trace_perfetto_start(void);
void u_trace_perfetto_stop(void);
#else
-# define ut_perfetto_enabled 0
+static ALWAYS_INLINE bool
+u_trace_perfetto_active(UNUSED struct u_trace_context *utctx)
+{
+ return false;
+}
#endif
-static inline bool
-u_trace_context_tracing(struct u_trace_context *utctx)
+/**
+ * Return whether utrace is enabled at all or not, this can be used to
+ * gate any expensive traces.
+ */
+static ALWAYS_INLINE bool
+u_trace_enabled(struct u_trace_context *utctx)
+{
+ return p_atomic_read_relaxed(&utctx->enabled_traces) != 0;
+}
+
+/**
+ * Return whether chunks should be processed or not.
+ */
+static ALWAYS_INLINE bool
+u_trace_should_process(struct u_trace_context *utctx)
+{
+ return p_atomic_read_relaxed(&utctx->enabled_traces) &
+ U_TRACE_TYPE_REQUIRE_PROCESSING;
+}
+
+/**
+ * Return whether to emit markers into the command stream even if the queue
+ * isn't active.
+ */
+static ALWAYS_INLINE bool
+u_trace_markers_enabled(struct u_trace_context *utctx)
{
- return !!utctx->out || (ut_perfetto_enabled > 0);
+ return p_atomic_read_relaxed(&utctx->enabled_traces) &
+ U_TRACE_TYPE_MARKERS;
}
-#ifdef __cplusplus
+#ifdef __cplusplus
}
#endif
-#endif /* _U_TRACE_H */
+#endif /* _U_TRACE_H */
diff --git a/src/util/perf/u_trace.py b/src/util/perf/u_trace.py
index d0d99c828ee..1035b9c4cf9 100644
--- a/src/util/perf/u_trace.py
+++ b/src/util/perf/u_trace.py
@@ -23,15 +23,18 @@
from mako.template import Template
from collections import namedtuple
-from enum import Flag, auto
+from enum import IntEnum
import os
TRACEPOINTS = {}
+TRACEPOINTS_TOGGLES = {}
class Tracepoint(object):
"""Class that represents all the information about a tracepoint
"""
- def __init__(self, name, args=[], tp_struct=None, tp_print=None, tp_perfetto=None):
+ def __init__(self, name, args=[], toggle_name=None,
+ tp_struct=None, tp_print=None, tp_perfetto=None,
+ tp_markers=None, end_of_pipe=False, need_cs_param=True):
"""Parameters:
- name: the tracepoint name, a tracepoint function with the given
@@ -42,20 +45,48 @@ class Tracepoint(object):
- tp_print: (optional) array of format string followed by expressions
- tp_perfetto: (optional) driver provided callback which can generate
perfetto events
+ - tp_markers: (optional) driver provided printf-style callback which can
+ generate CS markers, this requires 'need_cs_param' as the first param
+ is the CS that the label should be emitted into
+ - need_cs_param: whether tracepoint functions need an additional cs
+ parameter.
"""
assert isinstance(name, str)
assert isinstance(args, list)
assert name not in TRACEPOINTS
+
self.name = name
self.args = args
if tp_struct is None:
tp_struct = args
self.tp_struct = tp_struct
+ self.has_variable_arg = False
+ for arg in self.tp_struct:
+ if arg.length_arg != None:
+ self.has_variable_arg = True
+ break
self.tp_print = tp_print
self.tp_perfetto = tp_perfetto
+ self.tp_markers = tp_markers
+ self.end_of_pipe = end_of_pipe
+ self.toggle_name = toggle_name
+ self.need_cs_param = need_cs_param
TRACEPOINTS[name] = self
+ if toggle_name is not None and toggle_name not in TRACEPOINTS_TOGGLES:
+ TRACEPOINTS_TOGGLES[toggle_name] = len(TRACEPOINTS_TOGGLES)
+
+ def can_generate_print(self):
+ return self.args is not None and len(self.args) > 0
+
+ def enabled_expr(self, trace_toggle_name):
+ if trace_toggle_name is None:
+ return "true"
+ assert self.toggle_name is not None
+ return "({0} & {1}_{2})".format(trace_toggle_name,
+ trace_toggle_name.upper(),
+ self.toggle_name.upper())
class TracepointArgStruct():
"""Represents struct that is being passed as an argument
@@ -75,7 +106,7 @@ class TracepointArgStruct():
class TracepointArg(object):
"""Class that represents either an argument being passed or a field in a struct
"""
- def __init__(self, type, var, c_format, name=None, to_prim_type=None):
+ def __init__(self, type, var, c_format, name=None, to_prim_type=None, length_arg=None, copy_func=None):
"""Parameters:
- type: argument's C type.
@@ -85,6 +116,7 @@ class TracepointArg(object):
be displayed in output or perfetto, otherwise var will be used.
- to_prim_type: (optional) C function to convert from arg's type to a type
compatible with c_format.
+ - length_arg: whether this argument is a variable length array
"""
assert isinstance(type, str)
assert isinstance(var, str)
@@ -97,18 +129,21 @@ class TracepointArg(object):
name = var
self.name = name
self.to_prim_type = to_prim_type
+ self.length_arg = length_arg
+ self.copy_func = copy_func
HEADERS = []
-class HeaderScope(Flag):
- HEADER = auto()
- SOURCE = auto()
+class HeaderScope(IntEnum):
+ HEADER = (1 << 0)
+ SOURCE = (1 << 1)
+ PERFETTO = (1 << 2)
class Header(object):
"""Class that represents a header file dependency of generated tracepoints
"""
- def __init__(self, hdr, scope=HeaderScope.HEADER|HeaderScope.SOURCE):
+ def __init__(self, hdr, scope=HeaderScope.HEADER):
"""Parameters:
- hdr: the required header path
@@ -173,50 +208,80 @@ extern "C" {
${declaration.decl};
% endfor
+% if trace_toggle_name is not None:
+enum ${trace_toggle_name.lower()} {
+% for toggle_name, config_id in TRACEPOINTS_TOGGLES.items():
+ ${trace_toggle_name.upper()}_${toggle_name.upper()} = 1ull << ${config_id},
+% endfor
+};
+
+extern uint64_t ${trace_toggle_name};
+
+void ${trace_toggle_name}_config_variable(void);
+% endif
+
% for trace_name, trace in TRACEPOINTS.items():
+
/*
* ${trace_name}
*/
struct trace_${trace_name} {
% for arg in trace.tp_struct:
- ${arg.type} ${arg.name};
+ ${arg.type} ${arg.name}${"[0]" if arg.length_arg else ""};
% endfor
% if len(trace.args) == 0:
-#ifdef __cplusplus
- /* avoid warnings about empty struct size mis-match in C vs C++..
- * the size mis-match is harmless because (a) nothing will deref
- * the empty struct, and (b) the code that cares about allocating
- * sizeof(struct trace_${trace_name}) (and wants this to be zero
- * if there is no payload) is C
- */
- uint8_t dummy;
+#ifdef __cplusplus
+ /* avoid warnings about empty struct size mis-match in C vs C++..
+ * the size mis-match is harmless because (a) nothing will deref
+ * the empty struct, and (b) the code that cares about allocating
+ * sizeof(struct trace_${trace_name}) (and wants this to be zero
+ * if there is no payload) is C
+ */
+ uint8_t dummy;
#endif
% endif
};
% if trace.tp_perfetto is not None:
#ifdef HAVE_PERFETTO
-void ${trace.tp_perfetto}(${ctx_param}, uint64_t ts_ns, const void *flush_data, const struct trace_${trace_name} *payload);
+void ${trace.tp_perfetto}(
+ ${ctx_param},
+ uint64_t ts_ns,
+ uint16_t tp_idx,
+ const void *flush_data,
+ const struct trace_${trace_name} *payload);
#endif
% endif
-void __trace_${trace_name}(struct u_trace *ut, void *cs
+void __trace_${trace_name}(
+ struct u_trace *ut
+ , enum u_trace_type enabled_traces
+% if trace.need_cs_param:
+ , void *cs
+% endif
% for arg in trace.args:
, ${arg.type} ${arg.var}
% endfor
);
-static inline void trace_${trace_name}(struct u_trace *ut, void *cs
+static ALWAYS_INLINE void trace_${trace_name}(
+ struct u_trace *ut
+% if trace.need_cs_param:
+ , void *cs
+% endif
% for arg in trace.args:
- , ${arg.type} ${arg.var}
+ , ${arg.type} ${arg.var}
% endfor
) {
-% if trace.tp_perfetto is not None:
- if (!unlikely(ut->enabled || ut_perfetto_enabled))
-% else:
- if (!unlikely(ut->enabled))
-% endif
+ enum u_trace_type enabled_traces = p_atomic_read_relaxed(&ut->utctx->enabled_traces);
+ if (!unlikely(enabled_traces != 0 &&
+ ${trace.enabled_expr(trace_toggle_name)}))
return;
- __trace_${trace_name}(ut, cs
+ __trace_${trace_name}(
+ ut
+ , enabled_traces
+% if trace.need_cs_param:
+ , cs
+% endif
% for arg in trace.args:
- , ${arg.var}
+ , ${arg.var}
% endfor
);
}
@@ -252,92 +317,221 @@ src_template = """\
* IN THE SOFTWARE.
*/
+#include "${hdr}"
+
% for header in HEADERS:
#include "${header.hdr}"
% endfor
-#include "${hdr}"
-
#define __NEEDS_TRACE_PRIV
+#include "util/u_debug.h"
#include "util/perf/u_trace_priv.h"
-% for trace_name, trace in TRACEPOINTS.items():
+% if trace_toggle_name is not None:
+static const struct debug_control config_control[] = {
+% for toggle_name in TRACEPOINTS_TOGGLES.keys():
+ { "${toggle_name}", ${trace_toggle_name.upper()}_${toggle_name.upper()}, },
+% endfor
+ { NULL, 0, },
+};
+uint64_t ${trace_toggle_name} = 0;
+
+static void
+${trace_toggle_name}_variable_once(void)
+{
+ uint64_t default_value = 0
+% for name in trace_toggle_defaults:
+ | ${trace_toggle_name.upper()}_${name.upper()}
+% endfor
+ ;
+
+ ${trace_toggle_name} =
+ parse_enable_string(getenv("${trace_toggle_name.upper()}"),
+ default_value,
+ config_control);
+}
+
+void
+${trace_toggle_name}_config_variable(void)
+{
+ static once_flag process_${trace_toggle_name}_variable_flag = ONCE_FLAG_INIT;
+
+ call_once(&process_${trace_toggle_name}_variable_flag,
+ ${trace_toggle_name}_variable_once);
+}
+% endif
+
+% for index, (trace_name, trace) in enumerate(TRACEPOINTS.items()):
/*
* ${trace_name}
*/
-% if trace.args is not None and len(trace.args) > 0:
+ % if trace.can_generate_print():
static void __print_${trace_name}(FILE *out, const void *arg) {
const struct trace_${trace_name} *__entry =
(const struct trace_${trace_name} *)arg;
-% if trace.tp_print is not None:
+ % if trace.tp_print is not None:
fprintf(out, "${trace.tp_print[0]}\\n"
-% for arg in trace.tp_print[1:]:
+ % for arg in trace.tp_print[1:]:
, ${arg}
-% endfor
-% else:
+ % endfor
+ % else:
fprintf(out, ""
-% for arg in trace.tp_struct:
+ % for arg in trace.tp_struct:
"${arg.name}=${arg.c_format}, "
-% endfor
+ % endfor
"\\n"
-% for arg in trace.tp_struct:
- % if arg.to_prim_type:
+ % for arg in trace.tp_struct:
+ % if arg.to_prim_type:
,${arg.to_prim_type.format('__entry->' + arg.name)}
- % else:
+ % else:
,__entry->${arg.name}
- % endif
-% endfor
-%endif
+ % endif
+ % endfor
+ % endif
);
}
-% else:
+
+static void __print_json_${trace_name}(FILE *out, const void *arg) {
+ const struct trace_${trace_name} *__entry =
+ (const struct trace_${trace_name} *)arg;
+ % if trace.tp_print is not None:
+ fprintf(out, "\\"unstructured\\": \\"${trace.tp_print[0]}\\""
+ % for arg in trace.tp_print[1:]:
+ , ${arg}
+ % endfor
+ % else:
+ fprintf(out, ""
+ % for arg in trace.tp_struct:
+ "\\"${arg.name}\\": \\"${arg.c_format}\\""
+ % if arg != trace.tp_struct[-1]:
+ ", "
+ % endif
+ % endfor
+ % for arg in trace.tp_struct:
+ % if arg.to_prim_type:
+ ,${arg.to_prim_type.format('__entry->' + arg.name)}
+ % else:
+ ,__entry->${arg.name}
+ % endif
+ % endfor
+ % endif
+ );
+}
+
+ % else:
#define __print_${trace_name} NULL
-% endif
+#define __print_json_${trace_name} NULL
+ % endif
+ % if trace.tp_markers is not None:
+
+__attribute__((format(printf, 3, 4))) void ${trace.tp_markers}(struct u_trace_context *utctx, void *, const char *, ...);
+
+static void __emit_label_${trace_name}(struct u_trace_context *utctx, void *cs, struct trace_${trace_name} *entry) {
+ ${trace.tp_markers}(utctx, cs, "${trace_name}("
+ % for idx,arg in enumerate(trace.tp_struct):
+ "${"," if idx != 0 else ""}${arg.name}=${arg.c_format}"
+ % endfor
+ ")"
+ % for arg in trace.tp_struct:
+ % if arg.to_prim_type:
+ ,${arg.to_prim_type.format('entry->' + arg.name)}
+ % else:
+ ,entry->${arg.name}
+ % endif
+ % endfor
+ );
+}
+
+ % endif
static const struct u_tracepoint __tp_${trace_name} = {
ALIGN_POT(sizeof(struct trace_${trace_name}), 8), /* keep size 64b aligned */
"${trace_name}",
+ ${"true" if trace.end_of_pipe else "false"},
+ ${index},
__print_${trace_name},
-% if trace.tp_perfetto is not None:
+ __print_json_${trace_name},
+ % if trace.tp_perfetto is not None:
#ifdef HAVE_PERFETTO
- (void (*)(void *pctx, uint64_t, const void *, const void *))${trace.tp_perfetto},
+ (void (*)(void *pctx, uint64_t, uint16_t, const void *, const void *))${trace.tp_perfetto},
#endif
-% endif
+ % endif
};
-void __trace_${trace_name}(struct u_trace *ut, void *cs
-% for arg in trace.args:
- , ${arg.type} ${arg.var}
-% endfor
+void __trace_${trace_name}(
+ struct u_trace *ut
+ , enum u_trace_type enabled_traces
+ % if trace.need_cs_param:
+ , void *cs
+ % endif
+ % for arg in trace.args:
+ , ${arg.type} ${arg.var}
+ % endfor
) {
- struct trace_${trace_name} *__entry =
- (struct trace_${trace_name} *)u_trace_append(ut, cs, &__tp_${trace_name});
- (void)__entry;
-% for arg in trace.tp_struct:
- __entry->${arg.name} = ${arg.var};
-% endfor
+ struct trace_${trace_name} entry;
+ UNUSED struct trace_${trace_name} *__entry =
+ enabled_traces & U_TRACE_TYPE_REQUIRE_QUEUING ?
+ % if trace.has_variable_arg:
+ (struct trace_${trace_name} *)u_trace_appendv(ut, ${"cs," if trace.need_cs_param else "NULL,"} &__tp_${trace_name},
+ 0
+ % for arg in trace.tp_struct:
+ % if arg.length_arg is not None:
+ + ${arg.length_arg}
+ % endif
+ % endfor
+ ) :
+ % else:
+ (struct trace_${trace_name} *)u_trace_append(ut, ${"cs," if trace.need_cs_param else "NULL,"} &__tp_${trace_name}) :
+ % endif
+ &entry;
+ % for arg in trace.tp_struct:
+ % if arg.length_arg is None:
+ __entry->${arg.name} = ${arg.var};
+ % else:
+ ${arg.copy_func}(__entry->${arg.name}, ${arg.var}, ${arg.length_arg});
+ % endif
+ % endfor
+ % if trace.tp_markers is not None:
+ if (enabled_traces & U_TRACE_TYPE_MARKERS)
+ __emit_label_${trace_name}(ut->utctx, cs, __entry);
+ % endif
}
% endfor
"""
-def utrace_generate(cpath, hpath, ctx_param):
+def utrace_generate(cpath, hpath, ctx_param, trace_toggle_name=None,
+ trace_toggle_defaults=[]):
+ """Parameters:
+
+ - cpath: c file to generate.
+ - hpath: h file to generate.
+ - ctx_param: type of the first parameter to the perfetto vfuncs.
+ - trace_toggle_name: (optional) name of the environment variable
+ enabling/disabling tracepoints.
+ - trace_toggle_defaults: (optional) list of tracepoints enabled by default.
+ """
if cpath is not None:
hdr = os.path.basename(cpath).rsplit('.', 1)[0] + '.h'
- with open(cpath, 'w') as f:
+ with open(cpath, 'w', encoding='utf-8') as f:
f.write(Template(src_template).render(
hdr=hdr,
ctx_param=ctx_param,
+ trace_toggle_name=trace_toggle_name,
+ trace_toggle_defaults=trace_toggle_defaults,
HEADERS=[h for h in HEADERS if h.scope & HeaderScope.SOURCE],
- TRACEPOINTS=TRACEPOINTS))
+ TRACEPOINTS=TRACEPOINTS,
+ TRACEPOINTS_TOGGLES=TRACEPOINTS_TOGGLES))
if hpath is not None:
hdr = os.path.basename(hpath)
- with open(hpath, 'w') as f:
+ with open(hpath, 'w', encoding='utf-8') as f:
f.write(Template(hdr_template).render(
hdrname=hdr.rstrip('.h').upper(),
ctx_param=ctx_param,
+ trace_toggle_name=trace_toggle_name,
HEADERS=[h for h in HEADERS if h.scope & HeaderScope.HEADER],
FORWARD_DECLS=FORWARD_DECLS,
- TRACEPOINTS=TRACEPOINTS))
+ TRACEPOINTS=TRACEPOINTS,
+ TRACEPOINTS_TOGGLES=TRACEPOINTS_TOGGLES))
perfetto_utils_hdr_template = """\
@@ -370,40 +564,52 @@ perfetto_utils_hdr_template = """\
#include <perfetto.h>
+% for header in HEADERS:
+#include "${header.hdr}"
+% endfor
+
+UNUSED static const char *${basename}_names[] = {
+% for trace_name, trace in TRACEPOINTS.items():
+ "${trace_name}",
+% endfor
+};
+
% for trace_name, trace in TRACEPOINTS.items():
static void UNUSED
trace_payload_as_extra_${trace_name}(perfetto::protos::pbzero::GpuRenderStageEvent *event,
const struct trace_${trace_name} *payload)
{
-% if all([trace.tp_perfetto, trace.tp_struct]) and len(trace.tp_struct) > 0:
+ % if all([trace.tp_perfetto, trace.tp_struct]) and len(trace.tp_struct) > 0:
char buf[128];
-% for arg in trace.tp_struct:
+ % for arg in trace.tp_struct:
{
auto data = event->add_extra_data();
data->set_name("${arg.name}");
-% if arg.to_prim_type:
+ % if arg.to_prim_type:
sprintf(buf, "${arg.c_format}", ${arg.to_prim_type.format('payload->' + arg.name)});
-% else:
+ % else:
sprintf(buf, "${arg.c_format}", payload->${arg.name});
-% endif
+ % endif
data->set_value(buf);
}
-% endfor
+ % endfor
-% endif
+ % endif
}
% endfor
#endif /* ${guard_name} */
"""
-def utrace_generate_perfetto_utils(hpath):
+def utrace_generate_perfetto_utils(hpath,basename="tracepoint"):
if hpath is not None:
hdr = os.path.basename(hpath)
- with open(hpath, 'wb') as f:
- f.write(Template(perfetto_utils_hdr_template, output_encoding='utf-8').render(
+ with open(hpath, 'w', encoding='utf-8') as f:
+ f.write(Template(perfetto_utils_hdr_template).render(
+ basename=basename,
hdrname=hdr.rstrip('.h').upper(),
+ HEADERS=[h for h in HEADERS if h.scope & HeaderScope.PERFETTO],
TRACEPOINTS=TRACEPOINTS))
diff --git a/src/util/perf/u_trace_priv.h b/src/util/perf/u_trace_priv.h
index 331a8c84ace..a25811a48e8 100644
--- a/src/util/perf/u_trace_priv.h
+++ b/src/util/perf/u_trace_priv.h
@@ -16,45 +16,84 @@
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
*/
#ifndef __NEEDS_TRACE_PRIV
-# error "Do not use this header!"
+#error "Do not use this header!"
#endif
#ifndef _U_TRACE_PRIV_H
#define _U_TRACE_PRIV_H
-#include <stdio.h>
-
#include "u_trace.h"
+#include <stdio.h>
/*
* Internal interface used by generated tracepoints
*/
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/**
* Tracepoint descriptor.
*/
struct u_tracepoint {
unsigned payload_sz;
const char *name;
+ /**
+ * Whether this tracepoint's timestamp must be recorded with as an
+ * end-of-pipe timestamp (for some GPUs the recording timestamp instruction
+ * might be different for top/end of pipe).
+ */
+ bool end_of_pipe:1;
+ /**
+ * Index of this tracepoint in <basename>_tracepoint_names in the generated
+ * u_trace perfetto header. By associating these names with iids in setup,
+ * tracepoints can be presented with with their own names by passing that
+ * to event->set_stage_iid().
+ */
+ uint16_t tp_idx;
void (*print)(FILE *out, const void *payload);
+ void (*print_json)(FILE *out, const void *payload);
#ifdef HAVE_PERFETTO
/**
* Callback to emit a perfetto event, such as render-stage trace
*/
- void (*perfetto)(void *pctx, uint64_t ts_ns, const void *flush_data, const void *payload);
+ void (*perfetto)(void *pctx,
+ uint64_t ts_ns,
+ uint16_t tp_idx,
+ const void *flush_data,
+ const void *payload);
#endif
};
/**
- * Append a tracepoint, returning pointer that can be filled with trace
- * payload.
+ * Append a tracepoint followed by some amount of memory specified by
+ * variable_sz, returning pointer that can be filled with trace payload.
*/
-void * u_trace_append(struct u_trace *ut, void *cs, const struct u_tracepoint *tp);
+void *u_trace_appendv(struct u_trace *ut,
+ void *cs,
+ const struct u_tracepoint *tp,
+ unsigned variable_sz);
+
+/**
+ * Append a trace event, returning pointer to buffer of tp->payload_sz
+ * to be filled in with trace payload. Called by generated tracepoint
+ * functions.
+ */
+static inline void *
+u_trace_append(struct u_trace *ut, void *cs, const struct u_tracepoint *tp)
+{
+ return u_trace_appendv(ut, cs, tp, 0);
+}
+
+#ifdef __cplusplus
+}
+#endif
-#endif /* _U_TRACE_PRIV_H */
+#endif /* _U_TRACE_PRIV_H */