diff options
Diffstat (limited to 'src/util/perf')
-rw-r--r-- | src/util/perf/.clang-format | 20 | ||||
-rw-r--r-- | src/util/perf/cpu_trace.h | 113 | ||||
-rw-r--r-- | src/util/perf/gpuvis_trace_utils.h | 795 | ||||
-rw-r--r-- | src/util/perf/u_gpuvis.c | 53 | ||||
-rw-r--r-- | src/util/perf/u_gpuvis.h | 35 | ||||
-rw-r--r-- | src/util/perf/u_perfetto.cc | 105 | ||||
-rw-r--r-- | src/util/perf/u_perfetto.h | 78 | ||||
-rw-r--r-- | src/util/perf/u_perfetto_renderpass.h | 156 | ||||
-rw-r--r-- | src/util/perf/u_trace.c | 614 | ||||
-rw-r--r-- | src/util/perf/u_trace.h | 186 | ||||
-rw-r--r-- | src/util/perf/u_trace.py | 352 | ||||
-rw-r--r-- | src/util/perf/u_trace_priv.h | 61 |
12 files changed, 2294 insertions, 274 deletions
diff --git a/src/util/perf/.clang-format b/src/util/perf/.clang-format new file mode 100644 index 00000000000..11f7a8e3bd6 --- /dev/null +++ b/src/util/perf/.clang-format @@ -0,0 +1,20 @@ + +BasedOnStyle: InheritParentConfig +DisableFormat: false + +BinPackParameters: false + +ColumnLimit: 78 + +Cpp11BracedListStyle: false +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^"(c11/|compiler/|main/|nir/|spirv/|util/|vk_|wsi_)' + Priority: 3 + - Regex: '^"(perfetto)' + Priority: 2 + - Regex: '.*' + Priority: 1 + +SpaceAfterCStyleCast: true +SpaceBeforeCpp11BracedList: true diff --git a/src/util/perf/cpu_trace.h b/src/util/perf/cpu_trace.h new file mode 100644 index 00000000000..75db0f7badd --- /dev/null +++ b/src/util/perf/cpu_trace.h @@ -0,0 +1,113 @@ +/* + * Copyright 2022 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef CPU_TRACE_H +#define CPU_TRACE_H + +#include "u_perfetto.h" +#include "u_gpuvis.h" + +#include "util/detect_os.h" +#include "util/macros.h" + +#if defined(HAVE_PERFETTO) + +/* note that util_perfetto_is_tracing_enabled always returns false util + * util_perfetto_init is called + */ +#define _MESA_TRACE_BEGIN(name) \ + do { \ + if (unlikely(util_perfetto_is_tracing_enabled())) \ + util_perfetto_trace_begin(name); \ + } while (0) + +#define _MESA_TRACE_END() \ + do { \ + if (unlikely(util_perfetto_is_tracing_enabled())) \ + util_perfetto_trace_end(); \ + } while (0) + +/* NOTE: for now disable atrace for C++ to workaround a ndk bug with ordering + * between stdatomic.h and atomic.h. See: + * + * https://github.com/android/ndk/issues/1178 + */ +#elif DETECT_OS_ANDROID && !defined(__cplusplus) + +#include <cutils/trace.h> + +#define _MESA_TRACE_BEGIN(name) \ + atrace_begin(ATRACE_TAG_GRAPHICS, name) +#define _MESA_TRACE_END() atrace_end(ATRACE_TAG_GRAPHICS) + +#else + +#define _MESA_TRACE_BEGIN(name) +#define _MESA_TRACE_END() + +#endif /* HAVE_PERFETTO */ + +#if defined(HAVE_GPUVIS) + +#define _MESA_GPUVIS_TRACE_BEGIN(name) util_gpuvis_begin(name) +#define _MESA_GPUVIS_TRACE_END() util_gpuvis_end() + +#else + +#define _MESA_GPUVIS_TRACE_BEGIN(name) +#define _MESA_GPUVIS_TRACE_END() + +#endif /* HAVE_GPUVIS */ + +#if __has_attribute(cleanup) && __has_attribute(unused) + +#define _MESA_TRACE_SCOPE_VAR_CONCAT(name, suffix) name##suffix +#define _MESA_TRACE_SCOPE_VAR(suffix) \ + _MESA_TRACE_SCOPE_VAR_CONCAT(_mesa_trace_scope_, suffix) + +/* This must expand to a single non-scoped statement for + * + * if (cond) + * _MESA_TRACE_SCOPE(...) + * + * to work. + */ +#define _MESA_TRACE_SCOPE(name) \ + int _MESA_TRACE_SCOPE_VAR(__LINE__) \ + __attribute__((cleanup(_mesa_trace_scope_end), unused)) = \ + _mesa_trace_scope_begin(name) + +static inline int +_mesa_trace_scope_begin(const char *name) +{ + _MESA_TRACE_BEGIN(name); + _MESA_GPUVIS_TRACE_BEGIN(name); + return 0; +} + +static inline void +_mesa_trace_scope_end(UNUSED int *scope) +{ + _MESA_GPUVIS_TRACE_END(); + _MESA_TRACE_END(); +} + +#else + +#define _MESA_TRACE_SCOPE(name) + +#endif /* __has_attribute(cleanup) && __has_attribute(unused) */ + +#define MESA_TRACE_SCOPE(name) _MESA_TRACE_SCOPE(name) +#define MESA_TRACE_FUNC() _MESA_TRACE_SCOPE(__func__) + +static inline void +util_cpu_trace_init() +{ + util_perfetto_init(); + util_gpuvis_init(); +} + +#endif /* CPU_TRACE_H */ diff --git a/src/util/perf/gpuvis_trace_utils.h b/src/util/perf/gpuvis_trace_utils.h new file mode 100644 index 00000000000..b5120c201d3 --- /dev/null +++ b/src/util/perf/gpuvis_trace_utils.h @@ -0,0 +1,795 @@ +////////////////////////////////////////////////////////////////////////////// +// gpuvis_trace_utils.h - v0.10 - public domain +// no warranty is offered or implied; use this code at your own risk +// +// This is a single header file with useful utilities for gpuvis linux tracing +// +// ============================================================================ +// You MUST define GPUVIS_TRACE_IMPLEMENTATION in EXACTLY _one_ C or C++ file +// that includes this header, BEFORE the include, like this: +// +// #define GPUVIS_TRACE_IMPLEMENTATION +// #include "gpuvis_trace_utils.h" +// +// All other files should just #include "gpuvis_trace_utils.h" w/o the #define. +// ============================================================================ +// +// Credits +// +// Michael Sartain +// +// LICENSE +// +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. + +////////////////////////////////////////////////////////////////////////////// +// +// INCLUDE SECTION +// + +#ifndef _GPUVIS_TRACE_UTILS_H_ +#define _GPUVIS_TRACE_UTILS_H_ + +#include <stdarg.h> + +#if !defined( __linux__ ) +#define GPUVIS_TRACE_UTILS_DISABLE +#endif + +#if defined( __clang__ ) || defined( __GNUC__ ) +// printf-style warnings for user functions. +#define GPUVIS_ATTR_PRINTF( _x, _y ) __attribute__( ( __format__( __printf__, _x, _y ) ) ) +#define GPUVIS_MAY_BE_UNUSED __attribute__( ( unused ) ) +#define GPUVIS_CLEANUP_FUNC( x ) __attribute__( ( __cleanup__( x ) ) ) +#else +#define GPUVIS_ATTR_PRINTF( _x, _y ) +#define GPUVIS_MAY_BE_UNUSED +#define GPUVIS_CLEANUP_FUNC( x ) +#endif + +#if !defined( GPUVIS_TRACE_UTILS_DISABLE ) + +#include <time.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> + +#ifdef __cplusplus + #define GPUVIS_EXTERN extern "C" + #if __cplusplus>=201103L + #define THREAD_LOCAL thread_local + #else + #define THREAD_LOCAL __thread + #endif +#else + #define GPUVIS_EXTERN extern +#endif + +// From kernel/trace/trace.h +#ifndef TRACE_BUF_SIZE +#define TRACE_BUF_SIZE 1024 +#endif + +// Try to open tracefs trace_marker file for writing. Returns -1 on error. +GPUVIS_EXTERN int gpuvis_trace_init( void ); +// Close tracefs trace_marker file. +GPUVIS_EXTERN void gpuvis_trace_shutdown( void ); + +// Write user event to tracefs trace_marker. +GPUVIS_EXTERN int gpuvis_trace_printf( const char *fmt, ... ) GPUVIS_ATTR_PRINTF( 1, 2 ); +GPUVIS_EXTERN int gpuvis_trace_vprintf( const char *fmt, va_list ap ) GPUVIS_ATTR_PRINTF( 1, 0 ); + +// Write user event (with duration=XXms) to tracefs trace_marker. +GPUVIS_EXTERN int gpuvis_trace_duration_printf( float duration, const char *fmt, ... ) GPUVIS_ATTR_PRINTF( 2, 3 ); +GPUVIS_EXTERN int gpuvis_trace_duration_vprintf( float duration, const char *fmt, va_list ap ) GPUVIS_ATTR_PRINTF( 2, 0 ); + +// Write user event (with begin_ctx=XX) to tracefs trace_marker. +GPUVIS_EXTERN int gpuvis_trace_begin_ctx_printf( unsigned int ctx, const char *fmt, ... ) GPUVIS_ATTR_PRINTF( 2, 3 ); +GPUVIS_EXTERN int gpuvis_trace_begin_ctx_vprintf( unsigned int ctx, const char *fmt, va_list ap ) GPUVIS_ATTR_PRINTF( 2, 0 ); + +// Write user event (with end_ctx=XX) to tracefs trace_marker. +GPUVIS_EXTERN int gpuvis_trace_end_ctx_printf( unsigned int ctx, const char *fmt, ... ) GPUVIS_ATTR_PRINTF( 2, 3 ); +GPUVIS_EXTERN int gpuvis_trace_end_ctx_vprintf( unsigned int ctx, const char *fmt, va_list ap ) GPUVIS_ATTR_PRINTF( 2, 0 ); + +// Execute "trace-cmd start -b 2000 -D -i -e sched:sched_switch -e ..." +GPUVIS_EXTERN int gpuvis_start_tracing( unsigned int kbuffersize ); +// Execute "trace-cmd extract" +GPUVIS_EXTERN int gpuvis_trigger_capture_and_keep_tracing( char *filename, size_t size ); +// Execute "trace-cmd reset" +GPUVIS_EXTERN int gpuvis_stop_tracing( void ); + +// -1: tracing not setup, 0: tracing disabled, 1: tracing enabled. +GPUVIS_EXTERN int gpuvis_tracing_on( void ); + +// Get tracefs directory. Ie: /sys/kernel/tracing. Returns "" on error. +GPUVIS_EXTERN const char *gpuvis_get_tracefs_dir( void ); + +// Get tracefs file path in buf. Ie: /sys/kernel/tracing/trace_marker. Returns NULL on error. +GPUVIS_EXTERN const char *gpuvis_get_tracefs_filename( char *buf, size_t buflen, const char *file ); + +// Internal function used by GPUVIS_COUNT_HOT_FUNC_CALLS macro +GPUVIS_EXTERN void gpuvis_count_hot_func_calls_internal_( const char *func ); + +struct GpuvisTraceBlock; +static inline void gpuvis_trace_block_begin( struct GpuvisTraceBlock *block, const char *str ); +static inline void gpuvis_trace_block_end( struct GpuvisTraceBlock *block ); + +struct GpuvisTraceBlockf; +static inline void gpuvis_trace_blockf_vbegin( struct GpuvisTraceBlockf *block, const char *fmt, va_list ap ); +static inline void gpuvis_trace_blockf_begin( struct GpuvisTraceBlockf *block, const char *fmt, ... ) GPUVIS_ATTR_PRINTF( 2, 3 ); +static inline void gpuvis_trace_blockf_end( struct GpuvisTraceBlockf *block ); + +#define LNAME3( _name, _line ) _name ## _line +#define LNAME2( _name, _line ) LNAME3( _name, _line ) +#define LNAME( _name ) LNAME2( _name, __LINE__ ) + +struct GpuvisTraceBlock +{ + uint64_t m_t0; + const char *m_str; + +#ifdef __cplusplus + GpuvisTraceBlock( const char *str ) + { + gpuvis_trace_block_begin( this, str ); + } + + ~GpuvisTraceBlock() + { + gpuvis_trace_block_end( this ); + } +#endif +}; + +struct GpuvisTraceBlockf +{ + uint64_t m_t0; + char m_buf[ TRACE_BUF_SIZE ]; + +#ifdef __cplusplus + GpuvisTraceBlockf( const char *fmt, ... ) GPUVIS_ATTR_PRINTF( 2, 3 ) + { + va_list args; + va_start( args, fmt ); + gpuvis_trace_blockf_vbegin( this, fmt, args ); + va_end( args ); + } + + ~GpuvisTraceBlockf() + { + gpuvis_trace_blockf_end( this ); + } +#endif +}; + +#ifdef __cplusplus + +#define GPUVIS_TRACE_BLOCK( _conststr ) GpuvisTraceBlock LNAME( gpuvistimeblock )( _conststr ) +#define GPUVIS_TRACE_BLOCKF( _fmt, ... ) GpuvisTraceBlockf LNAME( gpuvistimeblock )( _fmt, __VA_ARGS__ ) + +#else + +#if defined( __clang__ ) || defined( __GNUC__ ) + +#define GPUVIS_TRACE_BLOCKF_INIT( _unique, _fmt, ... ) \ + ({ \ + struct GpuvisTraceBlockf _unique; \ + gpuvis_trace_blockf_begin( & _unique, _fmt, __VA_ARGS__ ); \ + _unique; \ + }) + +#define GPUVIS_TRACE_BLOCKF( _fmt, ...) \ + GPUVIS_CLEANUP_FUNC( gpuvis_trace_blockf_end ) GPUVIS_MAY_BE_UNUSED struct GpuvisTraceBlockf LNAME( gpuvistimeblock ) = \ + GPUVIS_TRACE_BLOCKF_INIT( LNAME( gpuvistimeblock_init ), _fmt, __VA_ARGS__ ) + +#define GPUVIS_TRACE_BLOCK( _conststr ) \ + GPUVIS_CLEANUP_FUNC( gpuvis_trace_block_end ) GPUVIS_MAY_BE_UNUSED struct GpuvisTraceBlock LNAME( gpuvistimeblock ) = \ + {\ + .m_t0 = gpuvis_gettime_u64(), \ + .m_str = _conststr \ + } + +#else + +#define GPUVIS_TRACE_BLOCKF( _fmt, ... ) +#define GPUVIS_TRACE_BLOCK( _conststr ) + +#endif // __clang__ || __GNUC__ + +#endif // __cplusplus + +static inline uint64_t gpuvis_gettime_u64( void ) +{ + struct timespec ts; + + clock_gettime( CLOCK_MONOTONIC, &ts ); + return ( ( uint64_t )ts.tv_sec * 1000000000LL) + ts.tv_nsec; +} + +static inline void gpuvis_trace_block_finalize( uint64_t m_t0, const char *str ) +{ + uint64_t dt = gpuvis_gettime_u64() - m_t0; + + // The cpu clock_gettime() functions seems to vary compared to the + // ftrace event timestamps. If we don't reduce the duration here, + // scopes oftentimes won't stack correctly when they're drawn. + if ( dt > 11000 ) + dt -= 11000; + + gpuvis_trace_printf( "%s (lduration=-%lu)", str, dt ); +} + +static inline void gpuvis_trace_block_begin( struct GpuvisTraceBlock* block, const char *str ) +{ + block->m_str = str; + block->m_t0 = gpuvis_gettime_u64(); +} + +static inline void gpuvis_trace_block_end( struct GpuvisTraceBlock *block ) +{ + gpuvis_trace_block_finalize(block->m_t0, block->m_str); +} + +static inline void gpuvis_trace_blockf_vbegin( struct GpuvisTraceBlockf *block, const char *fmt, va_list ap) +{ + vsnprintf(block->m_buf, sizeof(block->m_buf), fmt, ap); + block->m_t0 = gpuvis_gettime_u64(); +} + +static inline void gpuvis_trace_blockf_begin( struct GpuvisTraceBlockf *block, const char *fmt, ... ) +{ + va_list args; + + va_start( args, fmt ); + gpuvis_trace_blockf_vbegin( block, fmt, args ); + va_end( args ); +} + +static inline void gpuvis_trace_blockf_end( struct GpuvisTraceBlockf *block ) +{ + gpuvis_trace_block_finalize( block->m_t0, block->m_buf ); +} + +#define GPUVIS_COUNT_HOT_FUNC_CALLS() gpuvis_count_hot_func_calls_internal_( __func__ ); + +#else + +static inline int gpuvis_trace_init() { return -1; } +static inline void gpuvis_trace_shutdown() {} + +static inline int gpuvis_trace_printf( const char *fmt, ... ) { return 0; } +static inline int gpuvis_trace_vprintf( const char *fmt, va_list ap ) { return 0; } + +static inline int gpuvis_trace_duration_printf( float duration, const char *fmt, ... ) { return 0; } +static inline int gpuvis_trace_duration_vprintf( float duration, const char *fmt, va_list ap ) { return 0; } + +static inline int gpuvis_trace_begin_ctx_printf( unsigned int ctx, const char *fmt, ... ) { return 0; } +static inline int gpuvis_trace_begin_ctx_vprintf( unsigned int ctx, const char *fmt, va_list ap ) { return 0; } + +static inline int gpuvis_trace_end_ctx_printf( unsigned int ctx, const char *fmt, ... ) { return 0; } +static inline int gpuvis_trace_end_ctx_vprintf( unsigned int ctx, const char *fmt, va_list ap ) { return 0; } + +static inline int gpuvis_start_tracing( unsigned int kbuffersize ) { return 0; } +static inline int gpuvis_trigger_capture_and_keep_tracing( char *filename, size_t size ) { return 0; } +static inline int gpuvis_stop_tracing() { return 0; } + +static inline int gpuvis_tracing_on() { return -1; } + +static inline const char *gpuvis_get_tracefs_dir() { return ""; } +static inline const char *gpuvis_get_tracefs_filename( char *buf, size_t buflen, const char *file ) { return NULL; } + +struct GpuvisTraceBlock; +static inline void gpuvis_trace_block_begin( struct GpuvisTraceBlock *block, const char *str ) {} +static inline void gpuvis_trace_block_end( struct GpuvisTraceBlock *block ) {} + +struct GpuvisTraceBlockf; +static inline void gpuvis_trace_blockf_vbegin( struct GpuvisTraceBlockf *block, const char *fmt, va_list ap ) {} +static inline void gpuvis_trace_blockf_begin( struct GpuvisTraceBlockf *block, const char *fmt, ... ) {} +static inline void gpuvis_trace_blockf_end( struct GpuvisTraceBlockf *block ) {} + +#define GPUVIS_TRACE_BLOCK( _conststr ) +#define GPUVIS_TRACE_BLOCKF( _fmt, ... ) + +#define GPUVIS_COUNT_HOT_FUNC_CALLS() + +#endif // !GPUVIS_TRACE_UTILS_DISABLE + +#if defined( GPUVIS_TRACE_IMPLEMENTATION ) && !defined( GPUVIS_TRACE_UTILS_DISABLE ) + +////////////////////////////////////////////////////////////////////////////// +// +// IMPLEMENTATION SECTION +// + +#define _GNU_SOURCE 1 +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <limits.h> +#include <fcntl.h> +#include <sys/vfs.h> +#include <linux/magic.h> +#include <sys/syscall.h> + +#undef GPUVIS_EXTERN +#ifdef __cplusplus +#define GPUVIS_EXTERN extern "C" +#else +#define GPUVIS_EXTERN +#endif + +#ifndef TRACEFS_MAGIC +#define TRACEFS_MAGIC 0x74726163 +#endif + +#define GPUVIS_STR( x ) #x +#define GPUVIS_STR_VALUE( x ) GPUVIS_STR( x ) + +static int g_trace_fd = -2; +static int g_tracefs_dir_inited = 0; +static char g_tracefs_dir[ PATH_MAX ]; + +#ifdef __cplusplus +#include <unordered_map> + +struct funcinfo_t +{ + uint64_t tfirst = 0; + uint64_t tlast = 0; + uint32_t count = 0; +}; +static std::unordered_map< pid_t, std::unordered_map< const char *, funcinfo_t > > g_hotfuncs; +#endif // __cplusplus + +static pid_t gpuvis_gettid() +{ + return ( pid_t )syscall( SYS_gettid ); +} + +static int exec_tracecmd( const char *cmd ) +{ + int ret; + + FILE *fh = popen( cmd, "r" ); + if ( !fh ) + { + //$ TODO: popen() failed: errno + ret = -1; + } + else + { + char buf[ 8192 ]; + + while ( fgets( buf, sizeof( buf ), fh ) ) + { + //$ TODO + printf( "%s: %s", __func__, buf ); + } + + if ( feof( fh ) ) + { + int pclose_ret = pclose( fh ); + + ret = WEXITSTATUS( pclose_ret ); + } + else + { + //$ TODO: Failed to read pipe to end: errno + pclose( fh ); + ret = -1; + } + } + + return ret; +} + +GPUVIS_EXTERN int gpuvis_trace_init() +{ + if ( g_trace_fd == -2 ) + { + char filename[ PATH_MAX ]; + + // The "trace_marker" file allows userspace to write into the ftrace buffer. + if ( !gpuvis_get_tracefs_filename( filename, sizeof( filename ), "trace_marker" ) ) + g_trace_fd = -1; + else + g_trace_fd = open( filename, O_WRONLY ); + } + + return g_trace_fd; +} + +#if !defined( __cplusplus ) +static void flush_hot_func_calls() +{ + //$ TODO: hot func calls for C +} +#else +static void flush_hot_func_calls() +{ + if ( g_hotfuncs.empty() ) + return; + + uint64_t t0 = gpuvis_gettime_u64(); + + for ( auto &x : g_hotfuncs ) + { + for ( auto &y : x.second ) + { + if ( y.second.count ) + { + pid_t tid = x.first; + const char *func = y.first; + uint64_t offset = t0 - y.second.tfirst; + uint64_t duration = y.second.tlast - y.second.tfirst; + + gpuvis_trace_printf( "%s calls:%u (lduration=%lu tid=%d offset=-%lu)\n", + func, y.second.count, duration, tid, offset ); + } + } + } + + g_hotfuncs.clear(); +} + +GPUVIS_EXTERN void gpuvis_count_hot_func_calls_internal_( const char *func ) +{ + static THREAD_LOCAL pid_t s_tid = gpuvis_gettid(); + + uint64_t t0 = gpuvis_gettime_u64(); + auto &x = g_hotfuncs[ s_tid ]; + auto &y = x[ func ]; + + if ( !y.count ) + { + y.count = 1; + y.tfirst = t0; + y.tlast = t0 + 1; + } + else if ( t0 - y.tlast >= 3 * 1000000 ) // 3ms + { + gpuvis_trace_printf( "%s calls:%u (lduration=%lu offset=-%lu)\n", + func, y.count, y.tlast - y.tfirst, t0 - y.tfirst ); + + y.count = 1; + y.tfirst = t0; + y.tlast = t0 + 1; + } + else + { + y.tlast = t0; + y.count++; + } +} +#endif // __cplusplus + +GPUVIS_EXTERN void gpuvis_trace_shutdown() +{ + flush_hot_func_calls(); + + if ( g_trace_fd >= 0 ) + close( g_trace_fd ); + g_trace_fd = -2; + + g_tracefs_dir_inited = 0; + g_tracefs_dir[ 0 ] = 0; +} + +static int trace_printf_impl( const char *keystr, const char *fmt, va_list ap ) GPUVIS_ATTR_PRINTF( 2, 0 ); +static int trace_printf_impl( const char *keystr, const char *fmt, va_list ap ) +{ + int ret = -1; + + if ( gpuvis_trace_init() >= 0 ) + { + int n; + char buf[ TRACE_BUF_SIZE ]; + + n = vsnprintf( buf, sizeof( buf ), fmt, ap ); + + if ( ( n > 0 ) || ( !n && keystr ) ) + { + if ( ( size_t )n >= sizeof( buf ) ) + n = sizeof( buf ) - 1; + + if ( keystr && keystr[ 0 ] ) + { + int keystrlen = strlen( keystr ); + + if ( ( size_t )n + keystrlen >= sizeof( buf ) ) + n = sizeof( buf ) - keystrlen - 1; + + strcpy( buf + n, keystr ); + + n += keystrlen; + } + + ret = write( g_trace_fd, buf, n ); + } + } + + return ret; +} + +GPUVIS_EXTERN int gpuvis_trace_printf( const char *fmt, ... ) +{ + int ret; + va_list ap; + + va_start( ap, fmt ); + ret = gpuvis_trace_vprintf( fmt, ap ); + va_end( ap ); + + return ret; +} + +GPUVIS_EXTERN int gpuvis_trace_vprintf( const char *fmt, va_list ap ) +{ + return trace_printf_impl( NULL, fmt, ap ); +} + +GPUVIS_EXTERN int gpuvis_trace_duration_printf( float duration, const char *fmt, ... ) +{ + int ret; + va_list ap; + + va_start( ap, fmt ); + ret = gpuvis_trace_duration_vprintf( duration, fmt, ap ); + va_end( ap ); + + return ret; +} + +GPUVIS_EXTERN int gpuvis_trace_duration_vprintf( float duration, const char *fmt, va_list ap ) +{ + char keystr[ 128 ]; + + snprintf( keystr, sizeof( keystr ), " (duration=%f)", duration ); //$ TODO: Try this with more precision? + + return trace_printf_impl( keystr, fmt, ap ); +} + +GPUVIS_EXTERN int gpuvis_trace_begin_ctx_printf( unsigned int ctx, const char *fmt, ... ) +{ + int ret; + va_list ap; + + va_start( ap, fmt ); + ret = gpuvis_trace_begin_ctx_vprintf( ctx, fmt, ap ); + va_end( ap ); + + return ret; +} + +GPUVIS_EXTERN int gpuvis_trace_begin_ctx_vprintf( unsigned int ctx, const char *fmt, va_list ap ) +{ + char keystr[ 128 ]; + + snprintf( keystr, sizeof( keystr ), " (begin_ctx=%u)", ctx ); + + return trace_printf_impl( keystr, fmt, ap ); +} + +GPUVIS_EXTERN int gpuvis_trace_end_ctx_printf( unsigned int ctx, const char *fmt, ... ) +{ + int ret; + va_list ap; + + va_start( ap, fmt ); + ret = gpuvis_trace_end_ctx_vprintf( ctx, fmt, ap ); + va_end( ap ); + + return ret; +} + +GPUVIS_EXTERN int gpuvis_trace_end_ctx_vprintf( unsigned int ctx, const char *fmt, va_list ap ) +{ + char keystr[ 128 ]; + + snprintf( keystr, sizeof( keystr ), " (end_ctx=%u)", ctx ); + + return trace_printf_impl( keystr, fmt, ap ); +} + +GPUVIS_EXTERN int gpuvis_start_tracing( unsigned int kbuffersize ) +{ + static const char fmt[] = + "trace-cmd start -b %u -D -i " + // https://github.com/mikesart/gpuvis/wiki/TechDocs-Linux-Scheduler + " -e sched:sched_switch" + " -e sched:sched_process_fork" + " -e sched:sched_process_exec" + " -e sched:sched_process_exit" + " -e drm:drm_vblank_event" + " -e drm:drm_vblank_event_queued" + " -e drm:drm_vblank_event_delivered" + // https://github.com/mikesart/gpuvis/wiki/TechDocs-AMDGpu + " -e amdgpu:amdgpu_vm_flush" + " -e amdgpu:amdgpu_cs_ioctl" + " -e amdgpu:amdgpu_sched_run_job" + " -e *fence:*fence_signaled" + // https://github.com/mikesart/gpuvis/wiki/TechDocs-Intel + " -e i915:i915_flip_request" + " -e i915:i915_flip_complete" + " -e i915:intel_gpu_freq_change" + " -e i915:i915_gem_request_add" + " -e i915:i915_gem_request_submit" // Require CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS + " -e i915:i915_gem_request_in" // Kconfig option to be enabled. + " -e i915:i915_gem_request_out" // + " -e i915:intel_engine_notify" + " -e i915:i915_gem_request_wait_begin" + " -e i915:i915_gem_request_wait_end 2>&1"; + char cmd[ 8192 ]; + + if ( !kbuffersize ) + kbuffersize = 16 * 1024; + + snprintf( cmd, sizeof( cmd ), fmt, kbuffersize ); + + return exec_tracecmd( cmd ); +} + +GPUVIS_EXTERN int gpuvis_trigger_capture_and_keep_tracing( char *filename, size_t size ) +{ + int ret = -1; + + if ( filename ) + filename[ 0 ] = 0; + + flush_hot_func_calls(); + + if ( gpuvis_tracing_on() ) + { + char datetime[ 128 ]; + char cmd[ PATH_MAX ]; + char exebuf[ PATH_MAX ]; + const char *exename = NULL; + time_t t = time( NULL ); + struct tm *tmp = localtime( &t ); + + strftime( datetime, sizeof( datetime ), "%Y-%m-%d_%H-%M-%S", tmp ); + datetime[ sizeof( datetime ) - 1 ] = 0; + + ssize_t cbytes = readlink( "/proc/self/exe", exebuf, sizeof( exebuf ) - 1 ); + if ( cbytes > 0 ) + { + exebuf[ cbytes ] = 0; + exename = strrchr( exebuf, '/' ); + } + exename = exename ? ( exename + 1 ) : "trace"; + + // Stop tracing + exec_tracecmd( "trace-cmd stop 2>&1" ); + + // Save the trace data to something like "glxgears_2017-10-13_17-52-56.dat" + snprintf( cmd, sizeof( cmd ), + "trace-cmd extract -k -o \"%s_%s.dat\" > /tmp/blah.log 2>&1 &", + exename, datetime ); + cmd[ sizeof( cmd ) - 1 ] = 0; + + ret = system( cmd ); + + if ( filename && !ret ) + snprintf( filename, size, "%s_%s.dat", exename, datetime ); + + // Restart tracing + exec_tracecmd( "trace-cmd restart 2>&1" ); + } + + return ret; +} + +GPUVIS_EXTERN int gpuvis_stop_tracing() +{ + flush_hot_func_calls(); + + int ret = exec_tracecmd( "trace-cmd reset 2>&1"); + + // Try freeing any snapshot buffers as well + exec_tracecmd( "trace-cmd snapshot -f 2>&1" ); + + return ret; +} + +GPUVIS_EXTERN int gpuvis_tracing_on() +{ + int ret = -1; + char buf[ 32 ]; + char filename[ PATH_MAX ]; + + if ( gpuvis_get_tracefs_filename( filename, PATH_MAX, "tracing_on" ) ) + { + int fd = open( filename, O_RDONLY ); + + if ( fd >= 0 ) + { + if ( read( fd, buf, sizeof( buf ) ) > 0 ) + ret = atoi( buf ); + + close( fd ); + } + } + + return ret; +} + +static int is_tracefs_dir( const char *dir ) +{ + struct statfs stat; + + return !statfs( dir, &stat ) && ( stat.f_type == TRACEFS_MAGIC ); +} + +GPUVIS_EXTERN const char *gpuvis_get_tracefs_dir() +{ + if ( !g_tracefs_dir_inited ) + { + size_t i; + static const char *tracefs_dirs[] = + { + "/sys/kernel/tracing", + "/sys/kernel/debug/tracing", + "/tracing", + "/trace", + }; + + for ( i = 0; i < sizeof( tracefs_dirs ) / sizeof( tracefs_dirs[ 0 ] ); i++ ) + { + if ( is_tracefs_dir( tracefs_dirs[ i ] ) ) + { + strncpy( g_tracefs_dir, tracefs_dirs[ i ], PATH_MAX ); + g_tracefs_dir[ PATH_MAX - 1 ] = 0; + break; + } + } + + if ( !g_tracefs_dir[ 0 ] ) + { + FILE *fp; + char type[ 128 ]; + char dir[ PATH_MAX + 1 ]; + + fp = fopen( "/proc/mounts", "r" ); + if ( fp ) + { + while ( fscanf( fp, "%*s %" GPUVIS_STR_VALUE( PATH_MAX ) "s %127s %*s %*d %*d\n", dir, type ) == 2 ) + { + if ( !strcmp( type, "tracefs" ) && is_tracefs_dir( dir ) ) + { + strncpy( g_tracefs_dir, dir, PATH_MAX ); + g_tracefs_dir[ PATH_MAX - 1 ] = 0; + break; + } + } + + fclose( fp ); + } + } + + g_tracefs_dir_inited = 1; + } + + return g_tracefs_dir; +} + +GPUVIS_EXTERN const char *gpuvis_get_tracefs_filename( char *buf, size_t buflen, const char *file ) +{ + const char *tracefs_dir = gpuvis_get_tracefs_dir(); + + if ( tracefs_dir[ 0 ] ) + { + snprintf( buf, buflen, "%s/%s", tracefs_dir, file ); + buf[ buflen - 1 ] = 0; + + return buf; + } + + return NULL; +} + +#endif // GPUVIS_TRACE_IMPLEMENTATION + +#endif // _GPUVIS_TRACE_UTILS_H_ diff --git a/src/util/perf/u_gpuvis.c b/src/util/perf/u_gpuvis.c new file mode 100644 index 00000000000..e3f14cd1f65 --- /dev/null +++ b/src/util/perf/u_gpuvis.c @@ -0,0 +1,53 @@ +/* + * Copyright 2023 Bas Nieuwenhuizen + * SPDX-License-Identifier: MIT + */ + +#include "u_gpuvis.h" + +#include <threads.h> + +#define GPUVIS_TRACE_IMPLEMENTATION +#include "gpuvis_trace_utils.h" + +/* Random base value to prevent collisions. As contexts are considered thread + * global by gpuvis, collisions are quite likely if we start at 0 and there + * are independent libraries tacing + */ +static unsigned int gpuvis_base_ctx; + +static _Thread_local unsigned int gpuvis_current_ctx; + +static once_flag gpuvis_once_flag = ONCE_FLAG_INIT; + +static void +util_gpuvis_init_once() +{ + gpuvis_trace_init(); + + /* Initialize it by address to avoid collisions between libraries using + * this code (e.g. GL & vulkan) */ + gpuvis_base_ctx = (uintptr_t) util_gpuvis_init_once >> 12; +} + +void +util_gpuvis_init(void) +{ + call_once(&gpuvis_once_flag, util_gpuvis_init_once); +} + +void +util_gpuvis_begin(const char *name) +{ + unsigned int ctx = gpuvis_base_ctx + ++gpuvis_current_ctx; + gpuvis_trace_begin_ctx_printf(ctx, "mesa:%s", name); +} + +void +util_gpuvis_end(void) +{ + unsigned int ctx = gpuvis_base_ctx + gpuvis_current_ctx--; + + /* Use an empty string to avoid warnings about an empty format string. */ + gpuvis_trace_end_ctx_printf(ctx, "%s", ""); +}
\ No newline at end of file diff --git a/src/util/perf/u_gpuvis.h b/src/util/perf/u_gpuvis.h new file mode 100644 index 00000000000..2a2f1a2e7c6 --- /dev/null +++ b/src/util/perf/u_gpuvis.h @@ -0,0 +1,35 @@ +/* + * Copyright 2023 Bas Nieuwenhuizen + * SPDX-License-Identifier: MIT + */ + +#ifndef U_GPUVIS_H +#define U_GPUVIS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef HAVE_GPUVIS + +void util_gpuvis_init(void); + +void util_gpuvis_begin(const char *name); + +/* ctx needs to be the return value from begin*/ +void util_gpuvis_end(void); + +#else + +static inline void +util_gpuvis_init(void) +{ +} + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* U_GPUVIS_H */
\ No newline at end of file diff --git a/src/util/perf/u_perfetto.cc b/src/util/perf/u_perfetto.cc new file mode 100644 index 00000000000..897e29cc4f8 --- /dev/null +++ b/src/util/perf/u_perfetto.cc @@ -0,0 +1,105 @@ +/* + * Copyright © 2021 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "u_perfetto.h" + +#include <perfetto.h> + +#include "c11/threads.h" +#include "util/macros.h" + +/* perfetto requires string literals */ +#define UTIL_PERFETTO_CATEGORY_DEFAULT_STR "mesa.default" + +PERFETTO_DEFINE_CATEGORIES( + perfetto::Category(UTIL_PERFETTO_CATEGORY_DEFAULT_STR) + .SetDescription("Mesa default events")); + +PERFETTO_TRACK_EVENT_STATIC_STORAGE(); + +int util_perfetto_tracing_state; + +static void +util_perfetto_update_tracing_state(void) +{ + p_atomic_set(&util_perfetto_tracing_state, + TRACE_EVENT_CATEGORY_ENABLED(UTIL_PERFETTO_CATEGORY_DEFAULT_STR)); +} + +void +util_perfetto_trace_begin(const char *name) +{ + TRACE_EVENT_BEGIN( + UTIL_PERFETTO_CATEGORY_DEFAULT_STR, nullptr, + [&](perfetto::EventContext ctx) { ctx.event()->set_name(name); }); +} + +void +util_perfetto_trace_end(void) +{ + TRACE_EVENT_END(UTIL_PERFETTO_CATEGORY_DEFAULT_STR); + + util_perfetto_update_tracing_state(); +} + +class UtilPerfettoObserver : public perfetto::TrackEventSessionObserver { + public: + UtilPerfettoObserver() { perfetto::TrackEvent::AddSessionObserver(this); } + + void OnStart(const perfetto::DataSourceBase::StartArgs &) override + { + util_perfetto_update_tracing_state(); + } + + /* XXX There is no PostStop callback. We have to call + * util_perfetto_update_tracing_state occasionally to poll. + */ +}; + +static void +util_perfetto_fini(void) +{ + perfetto::Tracing::Shutdown(); +} + +static void +util_perfetto_init_once(void) +{ + // Connects to the system tracing service + perfetto::TracingInitArgs args; + args.backends = perfetto::kSystemBackend; + perfetto::Tracing::Initialize(args); + + static UtilPerfettoObserver observer; + perfetto::TrackEvent::Register(); + + atexit(&util_perfetto_fini); +} + +static once_flag perfetto_once_flag = ONCE_FLAG_INIT; + +void +util_perfetto_init(void) +{ + call_once(&perfetto_once_flag, util_perfetto_init_once); +} diff --git a/src/util/perf/u_perfetto.h b/src/util/perf/u_perfetto.h new file mode 100644 index 00000000000..75ae00186a0 --- /dev/null +++ b/src/util/perf/u_perfetto.h @@ -0,0 +1,78 @@ +/* + * Copyright © 2021 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _UTIL_PERFETTO_H +#define _UTIL_PERFETTO_H + +#include "util/u_atomic.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef HAVE_PERFETTO + +extern int util_perfetto_tracing_state; + +void util_perfetto_init(void); + +static inline bool +util_perfetto_is_tracing_enabled(void) +{ + return p_atomic_read_relaxed(&util_perfetto_tracing_state); +} + +void util_perfetto_trace_begin(const char *name); + +void util_perfetto_trace_end(void); + +#else /* HAVE_PERFETTO */ + +static inline void +util_perfetto_init(void) +{ +} + +static inline bool +util_perfetto_is_tracing_enabled(void) +{ + return false; +} + +static inline void +util_perfetto_trace_begin(const char *name) +{ +} + +static inline void +util_perfetto_trace_end(void) +{ +} + +#endif /* HAVE_PERFETTO */ + +#ifdef __cplusplus +} +#endif + +#endif /* _UTIL_PERFETTO_H */ diff --git a/src/util/perf/u_perfetto_renderpass.h b/src/util/perf/u_perfetto_renderpass.h new file mode 100644 index 00000000000..13aad3481eb --- /dev/null +++ b/src/util/perf/u_perfetto_renderpass.h @@ -0,0 +1,156 @@ +/* + * Copyright © 2023 Google LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "perfetto.h" + +#include "util/hash_table.h" +#include "util/perf/u_trace.h" +#include "util/ralloc.h" + +using perfetto::DataSource; +template <typename DataSourceType, typename DataSourceTraits> +class MesaRenderpassDataSource + : public perfetto::DataSource<DataSourceType, DataSourceTraits> { + + public: + typedef typename perfetto::DataSource<DataSourceType, + DataSourceTraits>::TraceContext + TraceContext; + + void OnSetup(const perfetto::DataSourceBase::SetupArgs &) override + { + // Use this callback to apply any custom configuration to your data + // source based on the TraceConfig in SetupArgs. + debug_markers = NULL; + } + + void OnStart(const perfetto::DataSourceBase::StartArgs &) override + { + debug_markers = _mesa_hash_table_create(NULL, _mesa_hash_string, + _mesa_key_string_equal); + // This notification can be used to initialize the GPU driver, enable + // counters, etc. StartArgs will contains the DataSourceDescriptor, + // which can be extended. + u_trace_perfetto_start(); + PERFETTO_LOG("Tracing started"); + } + + void OnStop(const perfetto::DataSourceBase::StopArgs &) override + { + PERFETTO_LOG("Tracing stopped"); + + // Undo any initialization done in OnStart. + u_trace_perfetto_stop(); + // TODO we should perhaps block until queued traces are flushed? + + static_cast<DataSourceType *>(this)->Trace([](auto ctx) { + auto packet = ctx.NewTracePacket(); + packet->Finalize(); + ctx.Flush(); + }); + + ralloc_free(debug_markers); + } + + /* Emits a clock sync trace event. Perfetto uses periodic clock events + * like this to sync up our GPU render stages with the CPU on the same + * timeline, since clocks always drift over time. Note that perfetto + * relies on gpu_ts being monotonic, and will perform badly if it goes + * backwards -- see tu_perfetto.cc for an example implemntation of handling + * going backwards. + */ + static void EmitClockSync(TraceContext &ctx, + uint64_t cpu_ts, + uint64_t gpu_ts, + uint32_t gpu_clock_id) + { + auto packet = ctx.NewTracePacket(); + + packet->set_timestamp_clock_id( + perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); + packet->set_timestamp(cpu_ts); + + auto event = packet->set_clock_snapshot(); + + { + auto clock = event->add_clocks(); + + clock->set_clock_id( + perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME); + clock->set_timestamp(cpu_ts); + } + + { + auto clock = event->add_clocks(); + + clock->set_clock_id(gpu_clock_id); + clock->set_timestamp(gpu_ts); + } + } + + /* Returns a stage iid to use for a command stream or queue annotation. + * + * Using a new stage lets the annotation string show up right on the track + * event in the UI, rather than needing to click into the event to find the + * name in the metadata. Intended for use with + * vkCmdBeginDebugUtilsLabelEXT() and glPushDebugGroup(). + * + * Note that SEQ_INCREMENTAL_STATE_CLEARED must have been set in the + * sequence before this is called. + */ + uint64_t debug_marker_stage(TraceContext &ctx, const char *name) + { + struct hash_entry *entry = _mesa_hash_table_search(debug_markers, name); + const uint64_t dynamic_iid_base = 1ull << 32; + + if (entry) { + return dynamic_iid_base + (uint32_t) (uintptr_t) entry->data; + } else { + uint64_t iid = dynamic_iid_base + debug_markers->entries; + + auto packet = ctx.NewTracePacket(); + auto interned_data = packet->set_interned_data(); + + auto desc = interned_data->add_gpu_specifications(); + desc->set_iid(iid); + desc->set_name(name); + + /* We only track the entry count in entry->data, because the + * dynamic_iid_base would get lost on 32-bit builds. + */ + _mesa_hash_table_insert(debug_markers, + ralloc_strdup(debug_markers, name), + (void *) (uintptr_t) debug_markers->entries); + + return iid; + } + } + + private: + /* Hash table of application generated events (string -> iid) (use + * tctx.GetDataSourceLocked()->debug_marker_stage() to get a stage iid) + */ + struct hash_table *debug_markers; +}; + +/* Begin the C API section. */ diff --git a/src/util/perf/u_trace.c b/src/util/perf/u_trace.c index f07ae602142..6d9982cb59c 100644 --- a/src/util/perf/u_trace.c +++ b/src/util/perf/u_trace.c @@ -16,39 +16,54 @@ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. */ +#include "u_trace.h" + #include <inttypes.h> #include "util/list.h" -#include "util/ralloc.h" +#include "util/u_call_once.h" #include "util/u_debug.h" -#include "util/u_inlines.h" -#include "util/u_fifo.h" - -#include "u_trace.h" +#include "util/u_vector.h" #define __NEEDS_TRACE_PRIV #include "u_trace_priv.h" +#define PAYLOAD_BUFFER_SIZE 0x100 #define TIMESTAMP_BUF_SIZE 0x1000 -#define TRACES_PER_CHUNK (TIMESTAMP_BUF_SIZE / sizeof(uint64_t)) +#define TRACES_PER_CHUNK (TIMESTAMP_BUF_SIZE / sizeof(uint64_t)) -#ifdef HAVE_PERFETTO -int ut_perfetto_enabled; +struct u_trace_state { + util_once_flag once; + FILE *trace_file; + enum u_trace_type enabled_traces; +}; +static struct u_trace_state u_trace_state = { .once = UTIL_ONCE_FLAG_INIT }; +#ifdef HAVE_PERFETTO /** * Global list of contexts, so we can defer starting the queue until * perfetto tracing is started. - * - * TODO locking */ -struct list_head ctx_list = { &ctx_list, &ctx_list }; +static struct list_head ctx_list = { &ctx_list, &ctx_list }; + +static simple_mtx_t ctx_list_mutex = SIMPLE_MTX_INITIALIZER; +/* The amount of Perfetto tracers connected */ +int _u_trace_perfetto_count; #endif +struct u_trace_payload_buf { + uint32_t refcount; + + uint8_t *buf; + uint8_t *next; + uint8_t *end; +}; + struct u_trace_event { const struct u_tracepoint *tp; const void *payload; @@ -76,17 +91,17 @@ struct u_trace_chunk { */ void *timestamps; - /** - * For trace payload, we sub-allocate from ralloc'd buffers which - * hang off of the chunk's ralloc context, so they are automatically - * free'd when the chunk is free'd + /* Array of u_trace_payload_buf referenced by traces[] elements. */ - uint8_t *payload_buf, *payload_end; + struct u_vector payloads; + + /* Current payload buffer being written. */ + struct u_trace_payload_buf *payload; struct util_queue_fence fence; - bool last; /* this chunk is last in batch */ - bool eof; /* this chunk is last in frame */ + bool last; /* this chunk is last in batch */ + bool eof; /* this chunk is last in frame */ void *flush_data; /* assigned by u_trace_flush */ @@ -97,6 +112,172 @@ struct u_trace_chunk { bool free_flush_data; }; +struct u_trace_printer { + void (*start)(struct u_trace_context *utctx); + void (*end)(struct u_trace_context *utctx); + void (*start_of_frame)(struct u_trace_context *utctx); + void (*end_of_frame)(struct u_trace_context *utctx); + void (*start_of_batch)(struct u_trace_context *utctx); + void (*end_of_batch)(struct u_trace_context *utctx); + void (*event)(struct u_trace_context *utctx, + struct u_trace_chunk *chunk, + const struct u_trace_event *evt, + uint64_t ns, + int32_t delta); +}; + +static void +print_txt_start(struct u_trace_context *utctx) +{ +} + +static void +print_txt_end_of_frame(struct u_trace_context *utctx) +{ + fprintf(utctx->out, "END OF FRAME %u\n", utctx->frame_nr); +} + +static void +print_txt_start_of_batch(struct u_trace_context *utctx) +{ + fprintf(utctx->out, "+----- NS -----+ +-- Δ --+ +----- MSG -----\n"); +} + +static void +print_txt_end_of_batch(struct u_trace_context *utctx) +{ + uint64_t elapsed = utctx->last_time_ns - utctx->first_time_ns; + fprintf(utctx->out, "ELAPSED: %" PRIu64 " ns\n", elapsed); +} + +static void +print_txt_event(struct u_trace_context *utctx, + struct u_trace_chunk *chunk, + const struct u_trace_event *evt, + uint64_t ns, + int32_t delta) +{ + if (evt->tp->print) { + fprintf(utctx->out, "%016" PRIu64 " %+9d: %s: ", ns, delta, + evt->tp->name); + evt->tp->print(utctx->out, evt->payload); + } else { + fprintf(utctx->out, "%016" PRIu64 " %+9d: %s\n", ns, delta, + evt->tp->name); + } +} + +static struct u_trace_printer txt_printer = { + .start = &print_txt_start, + .end = &print_txt_start, + .start_of_frame = &print_txt_start, + .end_of_frame = &print_txt_end_of_frame, + .start_of_batch = &print_txt_start_of_batch, + .end_of_batch = &print_txt_end_of_batch, + .event = &print_txt_event, +}; + +static void +print_json_start(struct u_trace_context *utctx) +{ + fprintf(utctx->out, "[\n"); +} + +static void +print_json_end(struct u_trace_context *utctx) +{ + fprintf(utctx->out, "\n]"); +} + +static void +print_json_start_of_frame(struct u_trace_context *utctx) +{ + if (utctx->frame_nr != 0) + fprintf(utctx->out, ",\n"); + fprintf(utctx->out, "{\n\"frame\": %u,\n", utctx->frame_nr); + fprintf(utctx->out, "\"batches\": [\n"); +} + +static void +print_json_end_of_frame(struct u_trace_context *utctx) +{ + fprintf(utctx->out, "]\n}\n"); + fflush(utctx->out); +} + +static void +print_json_start_of_batch(struct u_trace_context *utctx) +{ + if (utctx->batch_nr != 0) + fprintf(utctx->out, ",\n"); + fprintf(utctx->out, "{\n\"events\": [\n"); +} + +static void +print_json_end_of_batch(struct u_trace_context *utctx) +{ + uint64_t elapsed = utctx->last_time_ns - utctx->first_time_ns; + fprintf(utctx->out, "],\n"); + fprintf(utctx->out, "\"duration_ns\": %" PRIu64 "\n", elapsed); + fprintf(utctx->out, "}\n"); +} + +static void +print_json_event(struct u_trace_context *utctx, + struct u_trace_chunk *chunk, + const struct u_trace_event *evt, + uint64_t ns, + int32_t delta) +{ + if (utctx->event_nr != 0) + fprintf(utctx->out, ",\n"); + fprintf(utctx->out, "{\n\"event\": \"%s\",\n", evt->tp->name); + fprintf(utctx->out, "\"time_ns\": \"%016" PRIu64 "\",\n", ns); + fprintf(utctx->out, "\"params\": {"); + if (evt->tp->print) + evt->tp->print_json(utctx->out, evt->payload); + fprintf(utctx->out, "}\n}\n"); +} + +static struct u_trace_printer json_printer = { + .start = print_json_start, + .end = print_json_end, + .start_of_frame = &print_json_start_of_frame, + .end_of_frame = &print_json_end_of_frame, + .start_of_batch = &print_json_start_of_batch, + .end_of_batch = &print_json_end_of_batch, + .event = &print_json_event, +}; + +static struct u_trace_payload_buf * +u_trace_payload_buf_create(void) +{ + struct u_trace_payload_buf *payload = + malloc(sizeof(*payload) + PAYLOAD_BUFFER_SIZE); + + p_atomic_set(&payload->refcount, 1); + + payload->buf = (uint8_t *) (payload + 1); + payload->end = payload->buf + PAYLOAD_BUFFER_SIZE; + payload->next = payload->buf; + + return payload; +} + +static struct u_trace_payload_buf * +u_trace_payload_buf_ref(struct u_trace_payload_buf *payload) +{ + p_atomic_inc(&payload->refcount); + return payload; +} + +static void +u_trace_payload_buf_unref(struct u_trace_payload_buf *payload) +{ + if (p_atomic_dec_zero(&payload->refcount)) + free(payload); +} + static void free_chunk(void *ptr) { @@ -104,68 +285,133 @@ free_chunk(void *ptr) chunk->utctx->delete_timestamp_buffer(chunk->utctx, chunk->timestamps); + /* Unref payloads attached to this chunk. */ + struct u_trace_payload_buf **payload; + u_vector_foreach (payload, &chunk->payloads) + u_trace_payload_buf_unref(*payload); + u_vector_finish(&chunk->payloads); + list_del(&chunk->node); + free(chunk); } static void free_chunks(struct list_head *chunks) { while (!list_is_empty(chunks)) { - struct u_trace_chunk *chunk = list_first_entry(chunks, - struct u_trace_chunk, node); - ralloc_free(chunk); + struct u_trace_chunk *chunk = + list_first_entry(chunks, struct u_trace_chunk, node); + free_chunk(chunk); } } static struct u_trace_chunk * -get_chunk(struct u_trace *ut) +get_chunk(struct u_trace *ut, size_t payload_size) { struct u_trace_chunk *chunk; + assert(payload_size <= PAYLOAD_BUFFER_SIZE); + /* do we currently have a non-full chunk to append msgs to? */ if (!list_is_empty(&ut->trace_chunks)) { - chunk = list_last_entry(&ut->trace_chunks, - struct u_trace_chunk, node); - if (chunk->num_traces < TRACES_PER_CHUNK) - return chunk; - /* we need to expand to add another chunk to the batch, so - * the current one is no longer the last one of the batch: - */ - chunk->last = false; + chunk = list_last_entry(&ut->trace_chunks, struct u_trace_chunk, node); + /* Can we store a new trace in the chunk? */ + if (chunk->num_traces < TRACES_PER_CHUNK) { + /* If no payload required, nothing else to check. */ + if (payload_size <= 0) + return chunk; + + /* If the payload buffer has space for the payload, we're good. + */ + if (chunk->payload && + (chunk->payload->end - chunk->payload->next) >= payload_size) + return chunk; + + /* If we don't have enough space in the payload buffer, can we + * allocate a new one? + */ + struct u_trace_payload_buf **buf = u_vector_add(&chunk->payloads); + *buf = u_trace_payload_buf_create(); + chunk->payload = *buf; + return chunk; + } + /* we need to expand to add another chunk to the batch, so + * the current one is no longer the last one of the batch: + */ + chunk->last = false; } /* .. if not, then create a new one: */ - chunk = rzalloc_size(NULL, sizeof(*chunk)); - ralloc_set_destructor(chunk, free_chunk); + chunk = calloc(1, sizeof(*chunk)); chunk->utctx = ut->utctx; - chunk->timestamps = ut->utctx->create_timestamp_buffer(ut->utctx, TIMESTAMP_BUF_SIZE); + chunk->timestamps = + ut->utctx->create_timestamp_buffer(ut->utctx, TIMESTAMP_BUF_SIZE); chunk->last = true; + u_vector_init(&chunk->payloads, 4, sizeof(struct u_trace_payload_buf *)); + if (payload_size > 0) { + struct u_trace_payload_buf **buf = u_vector_add(&chunk->payloads); + *buf = u_trace_payload_buf_create(); + chunk->payload = *buf; + } list_addtail(&chunk->node, &ut->trace_chunks); return chunk; } -DEBUG_GET_ONCE_BOOL_OPTION(trace, "GPU_TRACE", false) -DEBUG_GET_ONCE_FILE_OPTION(trace_file, "GPU_TRACEFILE", NULL, "w") +static const struct debug_named_value config_control[] = { + { "print", U_TRACE_TYPE_PRINT, "Enable print" }, + { "print_json", U_TRACE_TYPE_PRINT_JSON, "Enable print in JSON" }, +#ifdef HAVE_PERFETTO + { "perfetto", U_TRACE_TYPE_PERFETTO_ENV, "Enable perfetto" }, +#endif + { "markers", U_TRACE_TYPE_MARKERS, "Enable marker trace" }, + DEBUG_NAMED_VALUE_END +}; -static FILE * -get_tracefile(void) +DEBUG_GET_ONCE_OPTION(trace_file, "MESA_GPU_TRACEFILE", NULL) + +static void +trace_file_fini(void) { - static FILE *tracefile = NULL; - static bool firsttime = true; + fclose(u_trace_state.trace_file); + u_trace_state.trace_file = NULL; +} - if (firsttime) { - tracefile = debug_get_option_trace_file(); - if (!tracefile && debug_get_option_trace()) { - tracefile = stdout; +static void +u_trace_state_init_once(void) +{ + u_trace_state.enabled_traces = + debug_get_flags_option("MESA_GPU_TRACES", config_control, 0); + const char *tracefile_name = debug_get_option_trace_file(); + if (tracefile_name && __normal_user()) { + u_trace_state.trace_file = fopen(tracefile_name, "w"); + if (u_trace_state.trace_file != NULL) { + atexit(trace_file_fini); } - - firsttime = false; } + if (!u_trace_state.trace_file) { + u_trace_state.trace_file = stdout; + } +} + +void +u_trace_state_init(void) +{ + util_call_once(&u_trace_state.once, u_trace_state_init_once); +} + +bool +u_trace_is_enabled(enum u_trace_type type) +{ + /* Active is only tracked in a given u_trace context, so if you're asking + * us if U_TRACE_TYPE_PERFETTO (_ENV | _ACTIVE) is enabled, then just check + * _ENV ("perfetto tracing is desired, but perfetto might not be running"). + */ + type &= ~U_TRACE_TYPE_PERFETTO_ACTIVE; - return tracefile; + return (u_trace_state.enabled_traces & type) == type; } static void @@ -174,9 +420,10 @@ queue_init(struct u_trace_context *utctx) if (utctx->queue.jobs) return; - bool ret = util_queue_init(&utctx->queue, "traceq", 256, 1, - UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY | - UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL); + bool ret = util_queue_init( + &utctx->queue, "traceq", 256, 1, + UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY | UTIL_QUEUE_INIT_RESIZE_IF_FULL, + NULL); assert(ret); if (!ret) @@ -185,13 +432,16 @@ queue_init(struct u_trace_context *utctx) void u_trace_context_init(struct u_trace_context *utctx, - void *pctx, - u_trace_create_ts_buffer create_timestamp_buffer, - u_trace_delete_ts_buffer delete_timestamp_buffer, - u_trace_record_ts record_timestamp, - u_trace_read_ts read_timestamp, - u_trace_delete_flush_data delete_flush_data) + void *pctx, + u_trace_create_ts_buffer create_timestamp_buffer, + u_trace_delete_ts_buffer delete_timestamp_buffer, + u_trace_record_ts record_timestamp, + u_trace_read_ts read_timestamp, + u_trace_delete_flush_data delete_flush_data) { + u_trace_state_init(); + + utctx->enabled_traces = u_trace_state.enabled_traces; utctx->pctx = pctx; utctx->create_timestamp_buffer = create_timestamp_buffer; utctx->delete_timestamp_buffer = delete_timestamp_buffer; @@ -202,32 +452,65 @@ u_trace_context_init(struct u_trace_context *utctx, utctx->last_time_ns = 0; utctx->first_time_ns = 0; utctx->frame_nr = 0; + utctx->batch_nr = 0; + utctx->event_nr = 0; + utctx->start_of_frame = true; list_inithead(&utctx->flushed_trace_chunks); - utctx->out = get_tracefile(); + if (utctx->enabled_traces & U_TRACE_TYPE_PRINT) { + utctx->out = u_trace_state.trace_file; + + if (utctx->enabled_traces & U_TRACE_TYPE_JSON) { + utctx->out_printer = &json_printer; + } else { + utctx->out_printer = &txt_printer; + } + } else { + utctx->out = NULL; + utctx->out_printer = NULL; + } #ifdef HAVE_PERFETTO + simple_mtx_lock(&ctx_list_mutex); list_add(&utctx->node, &ctx_list); + if (_u_trace_perfetto_count > 0) + utctx->enabled_traces |= U_TRACE_TYPE_PERFETTO_ACTIVE; + + queue_init(utctx); + + simple_mtx_unlock(&ctx_list_mutex); +#else + queue_init(utctx); #endif - if (!u_trace_context_tracing(utctx)) + if (!(p_atomic_read_relaxed(&utctx->enabled_traces) & + U_TRACE_TYPE_REQUIRE_QUEUING)) return; - queue_init(utctx); + if (utctx->out) { + utctx->out_printer->start(utctx); + } } void u_trace_context_fini(struct u_trace_context *utctx) { #ifdef HAVE_PERFETTO + simple_mtx_lock(&ctx_list_mutex); list_del(&utctx->node); + simple_mtx_unlock(&ctx_list_mutex); #endif + + if (utctx->out) { + utctx->out_printer->end(utctx); + fflush(utctx->out); + } + if (!utctx->queue.jobs) return; util_queue_finish(&utctx->queue); util_queue_destroy(&utctx->queue); - fflush(utctx->out); free_chunks(&utctx->flushed_trace_chunks); } @@ -235,16 +518,34 @@ u_trace_context_fini(struct u_trace_context *utctx) void u_trace_perfetto_start(void) { - list_for_each_entry (struct u_trace_context, utctx, &ctx_list, node) + simple_mtx_lock(&ctx_list_mutex); + + list_for_each_entry (struct u_trace_context, utctx, &ctx_list, node) { queue_init(utctx); - ut_perfetto_enabled++; + p_atomic_set(&utctx->enabled_traces, + utctx->enabled_traces | U_TRACE_TYPE_PERFETTO_ACTIVE); + } + + _u_trace_perfetto_count++; + + simple_mtx_unlock(&ctx_list_mutex); } void u_trace_perfetto_stop(void) { - assert(ut_perfetto_enabled > 0); - ut_perfetto_enabled--; + simple_mtx_lock(&ctx_list_mutex); + + assert(_u_trace_perfetto_count > 0); + _u_trace_perfetto_count--; + if (_u_trace_perfetto_count == 0) { + list_for_each_entry (struct u_trace_context, utctx, &ctx_list, node) { + p_atomic_set(&utctx->enabled_traces, + utctx->enabled_traces & ~U_TRACE_TYPE_PERFETTO_ACTIVE); + } + } + + simple_mtx_unlock(&ctx_list_mutex); } #endif @@ -254,9 +555,20 @@ process_chunk(void *job, void *gdata, int thread_index) struct u_trace_chunk *chunk = job; struct u_trace_context *utctx = chunk->utctx; + if (utctx->start_of_frame) { + utctx->start_of_frame = false; + utctx->batch_nr = 0; + if (utctx->out) { + utctx->out_printer->start_of_frame(utctx); + } + } + /* For first chunk of batch, accumulated times will be zerod: */ - if (utctx->out && !utctx->last_time_ns) { - fprintf(utctx->out, "+----- NS -----+ +-- Δ --+ +----- MSG -----\n"); + if (!utctx->last_time_ns) { + utctx->event_nr = 0; + if (utctx->out) { + utctx->out_printer->start_of_batch(utctx); + } } for (unsigned idx = 0; idx < chunk->num_traces; idx++) { @@ -265,7 +577,8 @@ process_chunk(void *job, void *gdata, int thread_index) if (!evt->tp) continue; - uint64_t ns = utctx->read_timestamp(utctx, chunk->timestamps, idx, chunk->flush_data); + uint64_t ns = utctx->read_timestamp(utctx, chunk->timestamps, idx, + chunk->flush_data); int32_t delta; if (!utctx->first_time_ns) @@ -283,43 +596,46 @@ process_chunk(void *job, void *gdata, int thread_index) } if (utctx->out) { - if (evt->tp->print) { - fprintf(utctx->out, "%016"PRIu64" %+9d: %s: ", ns, delta, evt->tp->name); - evt->tp->print(utctx->out, evt->payload); - } else { - fprintf(utctx->out, "%016"PRIu64" %+9d: %s\n", ns, delta, evt->tp->name); - } + utctx->out_printer->event(utctx, chunk, evt, ns, delta); } #ifdef HAVE_PERFETTO - if (evt->tp->perfetto) { - evt->tp->perfetto(utctx->pctx, ns, chunk->flush_data, evt->payload); + if (evt->tp->perfetto && + (p_atomic_read_relaxed(&utctx->enabled_traces) & + U_TRACE_TYPE_PERFETTO_ACTIVE)) { + evt->tp->perfetto(utctx->pctx, ns, evt->tp->tp_idx, chunk->flush_data, evt->payload); } #endif + + utctx->event_nr++; } if (chunk->last) { if (utctx->out) { - uint64_t elapsed = utctx->last_time_ns - utctx->first_time_ns; - fprintf(utctx->out, "ELAPSED: %"PRIu64" ns\n", elapsed); + utctx->out_printer->end_of_batch(utctx); } + utctx->batch_nr++; utctx->last_time_ns = 0; utctx->first_time_ns = 0; } - if (chunk->free_flush_data && utctx->delete_flush_data) { - utctx->delete_flush_data(utctx, chunk->flush_data); + if (chunk->eof) { + if (utctx->out) { + utctx->out_printer->end_of_frame(utctx); + } + utctx->frame_nr++; + utctx->start_of_frame = true; } - if (utctx->out && chunk->eof) { - fprintf(utctx->out, "END OF FRAME %u\n", utctx->frame_nr++); + if (chunk->free_flush_data && utctx->delete_flush_data) { + utctx->delete_flush_data(utctx, chunk->flush_data); } } static void cleanup_chunk(void *job, void *gdata, int thread_index) { - ralloc_free(job); + free_chunk(job); } void @@ -330,32 +646,30 @@ u_trace_context_process(struct u_trace_context *utctx, bool eof) if (list_is_empty(chunks)) return; - struct u_trace_chunk *last_chunk = list_last_entry(chunks, - struct u_trace_chunk, node); + struct u_trace_chunk *last_chunk = + list_last_entry(chunks, struct u_trace_chunk, node); last_chunk->eof = eof; while (!list_is_empty(chunks)) { - struct u_trace_chunk *chunk = list_first_entry(chunks, - struct u_trace_chunk, node); + struct u_trace_chunk *chunk = + list_first_entry(chunks, struct u_trace_chunk, node); /* remove from list before enqueuing, because chunk is freed * once it is processed by the queue: */ list_delinit(&chunk->node); - util_queue_add_job(&utctx->queue, chunk, &chunk->fence, - process_chunk, cleanup_chunk, - TIMESTAMP_BUF_SIZE); + util_queue_add_job(&utctx->queue, chunk, &chunk->fence, process_chunk, + cleanup_chunk, TIMESTAMP_BUF_SIZE); } } - void u_trace_init(struct u_trace *ut, struct u_trace_context *utctx) { ut->utctx = utctx; + ut->num_traces = 0; list_inithead(&ut->trace_chunks); - ut->enabled = u_trace_context_tracing(utctx); } void @@ -365,6 +679,7 @@ u_trace_fini(struct u_trace *ut) * have been flushed to the trace-context. */ free_chunks(&ut->trace_chunks); + ut->num_traces = 0; } bool @@ -376,34 +691,48 @@ u_trace_has_points(struct u_trace *ut) struct u_trace_iterator u_trace_begin_iterator(struct u_trace *ut) { - if (!ut->enabled) - return (struct u_trace_iterator) {NULL, NULL, 0}; + if (list_is_empty(&ut->trace_chunks)) + return (struct u_trace_iterator) { ut, NULL, 0 }; struct u_trace_chunk *first_chunk = list_first_entry(&ut->trace_chunks, struct u_trace_chunk, node); - return (struct u_trace_iterator) { ut, first_chunk, 0}; + return (struct u_trace_iterator) { ut, first_chunk, 0 }; } struct u_trace_iterator u_trace_end_iterator(struct u_trace *ut) { - if (!ut->enabled) - return (struct u_trace_iterator) {NULL, NULL, 0}; + if (list_is_empty(&ut->trace_chunks)) + return (struct u_trace_iterator) { ut, NULL, 0 }; struct u_trace_chunk *last_chunk = list_last_entry(&ut->trace_chunks, struct u_trace_chunk, node); - return (struct u_trace_iterator) { ut, last_chunk, last_chunk->num_traces}; + return (struct u_trace_iterator) { ut, last_chunk, + last_chunk->num_traces }; +} + +/* If an iterator was created when there were no chunks and there are now + * chunks, "sanitize" it to include the first chunk. + */ +static struct u_trace_iterator +sanitize_iterator(struct u_trace_iterator iter) +{ + if (iter.ut && !iter.chunk && !list_is_empty(&iter.ut->trace_chunks)) { + iter.chunk = + list_first_entry(&iter.ut->trace_chunks, struct u_trace_chunk, node); + } + + return iter; } bool -u_trace_iterator_equal(struct u_trace_iterator a, - struct u_trace_iterator b) +u_trace_iterator_equal(struct u_trace_iterator a, struct u_trace_iterator b) { - return a.ut == b.ut && - a.chunk == b.chunk && - a.event_idx == b.event_idx; + a = sanitize_iterator(a); + b = sanitize_iterator(b); + return a.ut == b.ut && a.chunk == b.chunk && a.event_idx == b.event_idx; } void @@ -413,26 +742,40 @@ u_trace_clone_append(struct u_trace_iterator begin_it, void *cmdstream, u_trace_copy_ts_buffer copy_ts_buffer) { + begin_it = sanitize_iterator(begin_it); + end_it = sanitize_iterator(end_it); + struct u_trace_chunk *from_chunk = begin_it.chunk; uint32_t from_idx = begin_it.event_idx; while (from_chunk != end_it.chunk || from_idx != end_it.event_idx) { - struct u_trace_chunk *to_chunk = get_chunk(into); + struct u_trace_chunk *to_chunk = get_chunk(into, 0 /* payload_size */); unsigned to_copy = MIN2(TRACES_PER_CHUNK - to_chunk->num_traces, from_chunk->num_traces - from_idx); if (from_chunk == end_it.chunk) to_copy = MIN2(to_copy, end_it.event_idx - from_idx); - copy_ts_buffer(begin_it.ut->utctx, cmdstream, - from_chunk->timestamps, from_idx, - to_chunk->timestamps, to_chunk->num_traces, + copy_ts_buffer(begin_it.ut->utctx, cmdstream, from_chunk->timestamps, + from_idx, to_chunk->timestamps, to_chunk->num_traces, to_copy); memcpy(&to_chunk->traces[to_chunk->num_traces], &from_chunk->traces[from_idx], to_copy * sizeof(struct u_trace_event)); + /* Take a refcount on payloads from from_chunk if needed. */ + if (begin_it.ut != into) { + struct u_trace_payload_buf **in_payload; + u_vector_foreach (in_payload, &from_chunk->payloads) { + struct u_trace_payload_buf **out_payload = + u_vector_add(&to_chunk->payloads); + + *out_payload = u_trace_payload_buf_ref(*in_payload); + } + } + + into->num_traces += to_copy; to_chunk->num_traces += to_copy; from_idx += to_copy; @@ -442,7 +785,8 @@ u_trace_clone_append(struct u_trace_iterator begin_it, break; from_idx = 0; - from_chunk = LIST_ENTRY(struct u_trace_chunk, from_chunk->node.next, node); + from_chunk = + list_entry(from_chunk->node.next, struct u_trace_chunk, node); } } } @@ -451,14 +795,19 @@ void u_trace_disable_event_range(struct u_trace_iterator begin_it, struct u_trace_iterator end_it) { + begin_it = sanitize_iterator(begin_it); + end_it = sanitize_iterator(end_it); + struct u_trace_chunk *current_chunk = begin_it.chunk; uint32_t start_idx = begin_it.event_idx; - while(current_chunk != end_it.chunk) { + while (current_chunk != end_it.chunk) { memset(¤t_chunk->traces[start_idx], 0, - (current_chunk->num_traces - start_idx) * sizeof(struct u_trace_event)); + (current_chunk->num_traces - start_idx) * + sizeof(struct u_trace_event)); start_idx = 0; - current_chunk = LIST_ENTRY(struct u_trace_chunk, current_chunk->node.next, node); + current_chunk = + list_entry(current_chunk->node.next, struct u_trace_chunk, node); } memset(¤t_chunk->traces[start_idx], 0, @@ -471,34 +820,33 @@ u_trace_disable_event_range(struct u_trace_iterator begin_it, * functions. */ void * -u_trace_append(struct u_trace *ut, void *cs, const struct u_tracepoint *tp) +u_trace_appendv(struct u_trace *ut, + void *cs, + const struct u_tracepoint *tp, + unsigned variable_sz) { - struct u_trace_chunk *chunk = get_chunk(ut); - assert(tp->payload_sz == ALIGN_NPOT(tp->payload_sz, 8)); - if (unlikely((chunk->payload_buf + tp->payload_sz) > chunk->payload_end)) { - const unsigned payload_chunk_sz = 0x100; /* TODO arbitrary size? */ - - assert(tp->payload_sz < payload_chunk_sz); - - chunk->payload_buf = ralloc_size(chunk, payload_chunk_sz); - chunk->payload_end = chunk->payload_buf + payload_chunk_sz; - } + unsigned payload_sz = ALIGN_NPOT(tp->payload_sz + variable_sz, 8); + struct u_trace_chunk *chunk = get_chunk(ut, payload_sz); + unsigned tp_idx = chunk->num_traces++; /* sub-allocate storage for trace payload: */ - void *payload = chunk->payload_buf; - chunk->payload_buf += tp->payload_sz; + void *payload = NULL; + if (payload_sz > 0) { + payload = chunk->payload->next; + chunk->payload->next += payload_sz; + } /* record a timestamp for the trace: */ - ut->utctx->record_timestamp(ut, cs, chunk->timestamps, chunk->num_traces); + ut->utctx->record_timestamp(ut, cs, chunk->timestamps, tp_idx, + tp->end_of_pipe); - chunk->traces[chunk->num_traces] = (struct u_trace_event) { - .tp = tp, - .payload = payload, + chunk->traces[tp_idx] = (struct u_trace_event) { + .tp = tp, + .payload = payload, }; - - chunk->num_traces++; + ut->num_traces++; return payload; } @@ -506,7 +854,8 @@ u_trace_append(struct u_trace *ut, void *cs, const struct u_tracepoint *tp) void u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data) { - list_for_each_entry(struct u_trace_chunk, chunk, &ut->trace_chunks, node) { + list_for_each_entry (struct u_trace_chunk, chunk, &ut->trace_chunks, + node) { chunk->flush_data = flush_data; chunk->free_flush_data = false; } @@ -520,4 +869,5 @@ u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data) /* transfer batch's log chunks to context: */ list_splicetail(&ut->trace_chunks, &ut->utctx->flushed_trace_chunks); list_inithead(&ut->trace_chunks); + ut->num_traces = 0; } diff --git a/src/util/perf/u_trace.h b/src/util/perf/u_trace.h index c184a14e94d..b61b7cfb800 100644 --- a/src/util/perf/u_trace.h +++ b/src/util/perf/u_trace.h @@ -16,9 +16,9 @@ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. */ #ifndef _U_TRACE_H @@ -28,9 +28,11 @@ #include <stdint.h> #include <stdio.h> +#include "util/macros.h" +#include "util/u_atomic.h" #include "util/u_queue.h" -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif @@ -69,25 +71,26 @@ extern "C" { struct u_trace_context; struct u_trace; struct u_trace_chunk; +struct u_trace_printer; /** * Special reserved value to indicate that no timestamp was captured, * and that the timestamp of the previous trace should be reused. */ -#define U_TRACE_NO_TIMESTAMP ((uint64_t)0) +#define U_TRACE_NO_TIMESTAMP ((uint64_t) 0) /** * Driver provided callback to create a timestamp buffer which will be * read by u_trace_read_ts function. */ -typedef void* (*u_trace_create_ts_buffer)(struct u_trace_context *utctx, - uint32_t timestamps_count); +typedef void *(*u_trace_create_ts_buffer)(struct u_trace_context *utctx, + uint32_t timestamps_count); /** * Driver provided callback to delete a timestamp buffer. */ typedef void (*u_trace_delete_ts_buffer)(struct u_trace_context *utctx, - void *timestamps); + void *timestamps); /** * Driver provided callback to emit commands into the soecified command @@ -98,8 +101,11 @@ typedef void (*u_trace_delete_ts_buffer)(struct u_trace_context *utctx, * a fixed rate, even as the GPU freq changes. The same source used for * GL_TIMESTAMP queries should be appropriate. */ -typedef void (*u_trace_record_ts)(struct u_trace *ut, void *cs, - void *timestamps, unsigned idx); +typedef void (*u_trace_record_ts)(struct u_trace *ut, + void *cs, + void *timestamps, + unsigned idx, + bool end_of_pipe); /** * Driver provided callback to read back a previously recorded timestamp. @@ -120,28 +126,56 @@ typedef void (*u_trace_record_ts)(struct u_trace *ut, void *cs, * capturing the same timestamp multiple times in a row. */ typedef uint64_t (*u_trace_read_ts)(struct u_trace_context *utctx, - void *timestamps, unsigned idx, void *flush_data); + void *timestamps, + unsigned idx, + void *flush_data); /** * Driver provided callback to delete flush data. */ typedef void (*u_trace_delete_flush_data)(struct u_trace_context *utctx, - void *flush_data); + void *flush_data); + +enum u_trace_type { + U_TRACE_TYPE_PRINT = 1u << 0, + U_TRACE_TYPE_JSON = 1u << 1, + U_TRACE_TYPE_PERFETTO_ACTIVE = 1u << 2, + U_TRACE_TYPE_PERFETTO_ENV = 1u << 3, + U_TRACE_TYPE_MARKERS = 1u << 4, + + U_TRACE_TYPE_PRINT_JSON = U_TRACE_TYPE_PRINT | U_TRACE_TYPE_JSON, + U_TRACE_TYPE_PERFETTO = + U_TRACE_TYPE_PERFETTO_ACTIVE | U_TRACE_TYPE_PERFETTO_ENV, + + /* + * A mask of traces that require appending to the tracepoint chunk list. + */ + U_TRACE_TYPE_REQUIRE_QUEUING = U_TRACE_TYPE_PRINT | U_TRACE_TYPE_PERFETTO, + /* + * A mask of traces that require processing the tracepoint chunk list. + */ + U_TRACE_TYPE_REQUIRE_PROCESSING = + U_TRACE_TYPE_PRINT | U_TRACE_TYPE_PERFETTO_ACTIVE, +}; /** * The trace context provides tracking for "in-flight" traces, once the * cmdstream that records timestamps has been flushed. */ struct u_trace_context { + /* All traces enabled in this context */ + enum u_trace_type enabled_traces; + void *pctx; - u_trace_create_ts_buffer create_timestamp_buffer; - u_trace_delete_ts_buffer delete_timestamp_buffer; - u_trace_record_ts record_timestamp; - u_trace_read_ts read_timestamp; + u_trace_create_ts_buffer create_timestamp_buffer; + u_trace_delete_ts_buffer delete_timestamp_buffer; + u_trace_record_ts record_timestamp; + u_trace_read_ts read_timestamp; u_trace_delete_flush_data delete_flush_data; FILE *out; + struct u_trace_printer *out_printer; /* Once u_trace_flush() is called u_trace_chunk's are queued up to * render tracepoints on a queue. The per-chunk queue jobs block until @@ -161,6 +195,9 @@ struct u_trace_context { uint64_t first_time_ns; uint32_t frame_nr; + uint32_t batch_nr; + uint32_t event_nr; + bool start_of_frame; /* list of unprocessed trace chunks in fifo order: */ struct list_head flushed_trace_chunks; @@ -180,23 +217,24 @@ struct u_trace_context { struct u_trace { struct u_trace_context *utctx; - struct list_head trace_chunks; /* list of unflushed trace chunks in fifo order */ + uint32_t num_traces; - bool enabled; + struct list_head + trace_chunks; /* list of unflushed trace chunks in fifo order */ }; void u_trace_context_init(struct u_trace_context *utctx, - void *pctx, - u_trace_create_ts_buffer create_timestamp_buffer, - u_trace_delete_ts_buffer delete_timestamp_buffer, - u_trace_record_ts record_timestamp, - u_trace_read_ts read_timestamp, - u_trace_delete_flush_data delete_flush_data); + void *pctx, + u_trace_create_ts_buffer create_timestamp_buffer, + u_trace_delete_ts_buffer delete_timestamp_buffer, + u_trace_record_ts record_timestamp, + u_trace_read_ts read_timestamp, + u_trace_delete_flush_data delete_flush_data); void u_trace_context_fini(struct u_trace_context *utctx); /** - * Flush (trigger processing) of traces previously flushed to the trace-context - * by u_trace_flush(). + * Flush (trigger processing) of traces previously flushed to the + * trace-context by u_trace_flush(). * * This should typically be called in the driver's pctx->flush(). */ @@ -205,39 +243,37 @@ void u_trace_context_process(struct u_trace_context *utctx, bool eof); void u_trace_init(struct u_trace *ut, struct u_trace_context *utctx); void u_trace_fini(struct u_trace *ut); +void u_trace_state_init(void); +bool u_trace_is_enabled(enum u_trace_type type); + bool u_trace_has_points(struct u_trace *ut); -struct u_trace_iterator -{ +struct u_trace_iterator { struct u_trace *ut; struct u_trace_chunk *chunk; uint32_t event_idx; }; -struct u_trace_iterator -u_trace_begin_iterator(struct u_trace *ut); +struct u_trace_iterator u_trace_begin_iterator(struct u_trace *ut); -struct u_trace_iterator -u_trace_end_iterator(struct u_trace *ut); +struct u_trace_iterator u_trace_end_iterator(struct u_trace *ut); -bool -u_trace_iterator_equal(struct u_trace_iterator a, - struct u_trace_iterator b); +bool u_trace_iterator_equal(struct u_trace_iterator a, + struct u_trace_iterator b); typedef void (*u_trace_copy_ts_buffer)(struct u_trace_context *utctx, - void *cmdstream, - void *ts_from, uint32_t from_offset, - void *ts_to, uint32_t to_offset, - uint32_t count); + void *cmdstream, + void *ts_from, + uint32_t from_offset, + void *ts_to, + uint32_t to_offset, + uint32_t count); /** * Clones tracepoints range into target u_trace. * Provides callback for driver to copy timestamps on GPU from * one buffer to another. * - * The payload is shared and remains owned by the original u_trace - * if tracepoints are being copied between different u_trace! - * * It allows: * - Tracing re-usable command buffer in Vulkan, by copying tracepoints * each time it is submitted. @@ -255,37 +291,71 @@ void u_trace_disable_event_range(struct u_trace_iterator begin_it, /** * Flush traces to the parent trace-context. At this point, the expectation - * is that all the tracepoints are "executed" by the GPU following any previously - * flushed u_trace batch. + * is that all the tracepoints are "executed" by the GPU following any + * previously flushed u_trace batch. * - * flush_data is a way for driver to pass additional data, which becomes available - * only at the point of flush, to the u_trace_read_ts callback and perfetto. - * The typical example of such data would be a fence to wait on in u_trace_read_ts, - * and a submission_id to pass into perfetto. - * The destruction of the data is done via u_trace_delete_flush_data. + * flush_data is a way for driver to pass additional data, which becomes + * available only at the point of flush, to the u_trace_read_ts callback and + * perfetto. The typical example of such data would be a fence to wait on in + * u_trace_read_ts, and a submission_id to pass into perfetto. The destruction + * of the data is done via u_trace_delete_flush_data. * - * This should typically be called when the corresponding cmdstream (containing - * the timestamp reads) is flushed to the kernel. + * This should typically be called when the corresponding cmdstream + * (containing the timestamp reads) is flushed to the kernel. */ void u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data); #ifdef HAVE_PERFETTO -extern int ut_perfetto_enabled; +static ALWAYS_INLINE bool +u_trace_perfetto_active(struct u_trace_context *utctx) +{ + return p_atomic_read_relaxed(&utctx->enabled_traces) & + U_TRACE_TYPE_PERFETTO_ACTIVE; +} void u_trace_perfetto_start(void); void u_trace_perfetto_stop(void); #else -# define ut_perfetto_enabled 0 +static ALWAYS_INLINE bool +u_trace_perfetto_active(UNUSED struct u_trace_context *utctx) +{ + return false; +} #endif -static inline bool -u_trace_context_tracing(struct u_trace_context *utctx) +/** + * Return whether utrace is enabled at all or not, this can be used to + * gate any expensive traces. + */ +static ALWAYS_INLINE bool +u_trace_enabled(struct u_trace_context *utctx) +{ + return p_atomic_read_relaxed(&utctx->enabled_traces) != 0; +} + +/** + * Return whether chunks should be processed or not. + */ +static ALWAYS_INLINE bool +u_trace_should_process(struct u_trace_context *utctx) +{ + return p_atomic_read_relaxed(&utctx->enabled_traces) & + U_TRACE_TYPE_REQUIRE_PROCESSING; +} + +/** + * Return whether to emit markers into the command stream even if the queue + * isn't active. + */ +static ALWAYS_INLINE bool +u_trace_markers_enabled(struct u_trace_context *utctx) { - return !!utctx->out || (ut_perfetto_enabled > 0); + return p_atomic_read_relaxed(&utctx->enabled_traces) & + U_TRACE_TYPE_MARKERS; } -#ifdef __cplusplus +#ifdef __cplusplus } #endif -#endif /* _U_TRACE_H */ +#endif /* _U_TRACE_H */ diff --git a/src/util/perf/u_trace.py b/src/util/perf/u_trace.py index d0d99c828ee..1035b9c4cf9 100644 --- a/src/util/perf/u_trace.py +++ b/src/util/perf/u_trace.py @@ -23,15 +23,18 @@ from mako.template import Template from collections import namedtuple -from enum import Flag, auto +from enum import IntEnum import os TRACEPOINTS = {} +TRACEPOINTS_TOGGLES = {} class Tracepoint(object): """Class that represents all the information about a tracepoint """ - def __init__(self, name, args=[], tp_struct=None, tp_print=None, tp_perfetto=None): + def __init__(self, name, args=[], toggle_name=None, + tp_struct=None, tp_print=None, tp_perfetto=None, + tp_markers=None, end_of_pipe=False, need_cs_param=True): """Parameters: - name: the tracepoint name, a tracepoint function with the given @@ -42,20 +45,48 @@ class Tracepoint(object): - tp_print: (optional) array of format string followed by expressions - tp_perfetto: (optional) driver provided callback which can generate perfetto events + - tp_markers: (optional) driver provided printf-style callback which can + generate CS markers, this requires 'need_cs_param' as the first param + is the CS that the label should be emitted into + - need_cs_param: whether tracepoint functions need an additional cs + parameter. """ assert isinstance(name, str) assert isinstance(args, list) assert name not in TRACEPOINTS + self.name = name self.args = args if tp_struct is None: tp_struct = args self.tp_struct = tp_struct + self.has_variable_arg = False + for arg in self.tp_struct: + if arg.length_arg != None: + self.has_variable_arg = True + break self.tp_print = tp_print self.tp_perfetto = tp_perfetto + self.tp_markers = tp_markers + self.end_of_pipe = end_of_pipe + self.toggle_name = toggle_name + self.need_cs_param = need_cs_param TRACEPOINTS[name] = self + if toggle_name is not None and toggle_name not in TRACEPOINTS_TOGGLES: + TRACEPOINTS_TOGGLES[toggle_name] = len(TRACEPOINTS_TOGGLES) + + def can_generate_print(self): + return self.args is not None and len(self.args) > 0 + + def enabled_expr(self, trace_toggle_name): + if trace_toggle_name is None: + return "true" + assert self.toggle_name is not None + return "({0} & {1}_{2})".format(trace_toggle_name, + trace_toggle_name.upper(), + self.toggle_name.upper()) class TracepointArgStruct(): """Represents struct that is being passed as an argument @@ -75,7 +106,7 @@ class TracepointArgStruct(): class TracepointArg(object): """Class that represents either an argument being passed or a field in a struct """ - def __init__(self, type, var, c_format, name=None, to_prim_type=None): + def __init__(self, type, var, c_format, name=None, to_prim_type=None, length_arg=None, copy_func=None): """Parameters: - type: argument's C type. @@ -85,6 +116,7 @@ class TracepointArg(object): be displayed in output or perfetto, otherwise var will be used. - to_prim_type: (optional) C function to convert from arg's type to a type compatible with c_format. + - length_arg: whether this argument is a variable length array """ assert isinstance(type, str) assert isinstance(var, str) @@ -97,18 +129,21 @@ class TracepointArg(object): name = var self.name = name self.to_prim_type = to_prim_type + self.length_arg = length_arg + self.copy_func = copy_func HEADERS = [] -class HeaderScope(Flag): - HEADER = auto() - SOURCE = auto() +class HeaderScope(IntEnum): + HEADER = (1 << 0) + SOURCE = (1 << 1) + PERFETTO = (1 << 2) class Header(object): """Class that represents a header file dependency of generated tracepoints """ - def __init__(self, hdr, scope=HeaderScope.HEADER|HeaderScope.SOURCE): + def __init__(self, hdr, scope=HeaderScope.HEADER): """Parameters: - hdr: the required header path @@ -173,50 +208,80 @@ extern "C" { ${declaration.decl}; % endfor +% if trace_toggle_name is not None: +enum ${trace_toggle_name.lower()} { +% for toggle_name, config_id in TRACEPOINTS_TOGGLES.items(): + ${trace_toggle_name.upper()}_${toggle_name.upper()} = 1ull << ${config_id}, +% endfor +}; + +extern uint64_t ${trace_toggle_name}; + +void ${trace_toggle_name}_config_variable(void); +% endif + % for trace_name, trace in TRACEPOINTS.items(): + /* * ${trace_name} */ struct trace_${trace_name} { % for arg in trace.tp_struct: - ${arg.type} ${arg.name}; + ${arg.type} ${arg.name}${"[0]" if arg.length_arg else ""}; % endfor % if len(trace.args) == 0: -#ifdef __cplusplus - /* avoid warnings about empty struct size mis-match in C vs C++.. - * the size mis-match is harmless because (a) nothing will deref - * the empty struct, and (b) the code that cares about allocating - * sizeof(struct trace_${trace_name}) (and wants this to be zero - * if there is no payload) is C - */ - uint8_t dummy; +#ifdef __cplusplus + /* avoid warnings about empty struct size mis-match in C vs C++.. + * the size mis-match is harmless because (a) nothing will deref + * the empty struct, and (b) the code that cares about allocating + * sizeof(struct trace_${trace_name}) (and wants this to be zero + * if there is no payload) is C + */ + uint8_t dummy; #endif % endif }; % if trace.tp_perfetto is not None: #ifdef HAVE_PERFETTO -void ${trace.tp_perfetto}(${ctx_param}, uint64_t ts_ns, const void *flush_data, const struct trace_${trace_name} *payload); +void ${trace.tp_perfetto}( + ${ctx_param}, + uint64_t ts_ns, + uint16_t tp_idx, + const void *flush_data, + const struct trace_${trace_name} *payload); #endif % endif -void __trace_${trace_name}(struct u_trace *ut, void *cs +void __trace_${trace_name}( + struct u_trace *ut + , enum u_trace_type enabled_traces +% if trace.need_cs_param: + , void *cs +% endif % for arg in trace.args: , ${arg.type} ${arg.var} % endfor ); -static inline void trace_${trace_name}(struct u_trace *ut, void *cs +static ALWAYS_INLINE void trace_${trace_name}( + struct u_trace *ut +% if trace.need_cs_param: + , void *cs +% endif % for arg in trace.args: - , ${arg.type} ${arg.var} + , ${arg.type} ${arg.var} % endfor ) { -% if trace.tp_perfetto is not None: - if (!unlikely(ut->enabled || ut_perfetto_enabled)) -% else: - if (!unlikely(ut->enabled)) -% endif + enum u_trace_type enabled_traces = p_atomic_read_relaxed(&ut->utctx->enabled_traces); + if (!unlikely(enabled_traces != 0 && + ${trace.enabled_expr(trace_toggle_name)})) return; - __trace_${trace_name}(ut, cs + __trace_${trace_name}( + ut + , enabled_traces +% if trace.need_cs_param: + , cs +% endif % for arg in trace.args: - , ${arg.var} + , ${arg.var} % endfor ); } @@ -252,92 +317,221 @@ src_template = """\ * IN THE SOFTWARE. */ +#include "${hdr}" + % for header in HEADERS: #include "${header.hdr}" % endfor -#include "${hdr}" - #define __NEEDS_TRACE_PRIV +#include "util/u_debug.h" #include "util/perf/u_trace_priv.h" -% for trace_name, trace in TRACEPOINTS.items(): +% if trace_toggle_name is not None: +static const struct debug_control config_control[] = { +% for toggle_name in TRACEPOINTS_TOGGLES.keys(): + { "${toggle_name}", ${trace_toggle_name.upper()}_${toggle_name.upper()}, }, +% endfor + { NULL, 0, }, +}; +uint64_t ${trace_toggle_name} = 0; + +static void +${trace_toggle_name}_variable_once(void) +{ + uint64_t default_value = 0 +% for name in trace_toggle_defaults: + | ${trace_toggle_name.upper()}_${name.upper()} +% endfor + ; + + ${trace_toggle_name} = + parse_enable_string(getenv("${trace_toggle_name.upper()}"), + default_value, + config_control); +} + +void +${trace_toggle_name}_config_variable(void) +{ + static once_flag process_${trace_toggle_name}_variable_flag = ONCE_FLAG_INIT; + + call_once(&process_${trace_toggle_name}_variable_flag, + ${trace_toggle_name}_variable_once); +} +% endif + +% for index, (trace_name, trace) in enumerate(TRACEPOINTS.items()): /* * ${trace_name} */ -% if trace.args is not None and len(trace.args) > 0: + % if trace.can_generate_print(): static void __print_${trace_name}(FILE *out, const void *arg) { const struct trace_${trace_name} *__entry = (const struct trace_${trace_name} *)arg; -% if trace.tp_print is not None: + % if trace.tp_print is not None: fprintf(out, "${trace.tp_print[0]}\\n" -% for arg in trace.tp_print[1:]: + % for arg in trace.tp_print[1:]: , ${arg} -% endfor -% else: + % endfor + % else: fprintf(out, "" -% for arg in trace.tp_struct: + % for arg in trace.tp_struct: "${arg.name}=${arg.c_format}, " -% endfor + % endfor "\\n" -% for arg in trace.tp_struct: - % if arg.to_prim_type: + % for arg in trace.tp_struct: + % if arg.to_prim_type: ,${arg.to_prim_type.format('__entry->' + arg.name)} - % else: + % else: ,__entry->${arg.name} - % endif -% endfor -%endif + % endif + % endfor + % endif ); } -% else: + +static void __print_json_${trace_name}(FILE *out, const void *arg) { + const struct trace_${trace_name} *__entry = + (const struct trace_${trace_name} *)arg; + % if trace.tp_print is not None: + fprintf(out, "\\"unstructured\\": \\"${trace.tp_print[0]}\\"" + % for arg in trace.tp_print[1:]: + , ${arg} + % endfor + % else: + fprintf(out, "" + % for arg in trace.tp_struct: + "\\"${arg.name}\\": \\"${arg.c_format}\\"" + % if arg != trace.tp_struct[-1]: + ", " + % endif + % endfor + % for arg in trace.tp_struct: + % if arg.to_prim_type: + ,${arg.to_prim_type.format('__entry->' + arg.name)} + % else: + ,__entry->${arg.name} + % endif + % endfor + % endif + ); +} + + % else: #define __print_${trace_name} NULL -% endif +#define __print_json_${trace_name} NULL + % endif + % if trace.tp_markers is not None: + +__attribute__((format(printf, 3, 4))) void ${trace.tp_markers}(struct u_trace_context *utctx, void *, const char *, ...); + +static void __emit_label_${trace_name}(struct u_trace_context *utctx, void *cs, struct trace_${trace_name} *entry) { + ${trace.tp_markers}(utctx, cs, "${trace_name}(" + % for idx,arg in enumerate(trace.tp_struct): + "${"," if idx != 0 else ""}${arg.name}=${arg.c_format}" + % endfor + ")" + % for arg in trace.tp_struct: + % if arg.to_prim_type: + ,${arg.to_prim_type.format('entry->' + arg.name)} + % else: + ,entry->${arg.name} + % endif + % endfor + ); +} + + % endif static const struct u_tracepoint __tp_${trace_name} = { ALIGN_POT(sizeof(struct trace_${trace_name}), 8), /* keep size 64b aligned */ "${trace_name}", + ${"true" if trace.end_of_pipe else "false"}, + ${index}, __print_${trace_name}, -% if trace.tp_perfetto is not None: + __print_json_${trace_name}, + % if trace.tp_perfetto is not None: #ifdef HAVE_PERFETTO - (void (*)(void *pctx, uint64_t, const void *, const void *))${trace.tp_perfetto}, + (void (*)(void *pctx, uint64_t, uint16_t, const void *, const void *))${trace.tp_perfetto}, #endif -% endif + % endif }; -void __trace_${trace_name}(struct u_trace *ut, void *cs -% for arg in trace.args: - , ${arg.type} ${arg.var} -% endfor +void __trace_${trace_name}( + struct u_trace *ut + , enum u_trace_type enabled_traces + % if trace.need_cs_param: + , void *cs + % endif + % for arg in trace.args: + , ${arg.type} ${arg.var} + % endfor ) { - struct trace_${trace_name} *__entry = - (struct trace_${trace_name} *)u_trace_append(ut, cs, &__tp_${trace_name}); - (void)__entry; -% for arg in trace.tp_struct: - __entry->${arg.name} = ${arg.var}; -% endfor + struct trace_${trace_name} entry; + UNUSED struct trace_${trace_name} *__entry = + enabled_traces & U_TRACE_TYPE_REQUIRE_QUEUING ? + % if trace.has_variable_arg: + (struct trace_${trace_name} *)u_trace_appendv(ut, ${"cs," if trace.need_cs_param else "NULL,"} &__tp_${trace_name}, + 0 + % for arg in trace.tp_struct: + % if arg.length_arg is not None: + + ${arg.length_arg} + % endif + % endfor + ) : + % else: + (struct trace_${trace_name} *)u_trace_append(ut, ${"cs," if trace.need_cs_param else "NULL,"} &__tp_${trace_name}) : + % endif + &entry; + % for arg in trace.tp_struct: + % if arg.length_arg is None: + __entry->${arg.name} = ${arg.var}; + % else: + ${arg.copy_func}(__entry->${arg.name}, ${arg.var}, ${arg.length_arg}); + % endif + % endfor + % if trace.tp_markers is not None: + if (enabled_traces & U_TRACE_TYPE_MARKERS) + __emit_label_${trace_name}(ut->utctx, cs, __entry); + % endif } % endfor """ -def utrace_generate(cpath, hpath, ctx_param): +def utrace_generate(cpath, hpath, ctx_param, trace_toggle_name=None, + trace_toggle_defaults=[]): + """Parameters: + + - cpath: c file to generate. + - hpath: h file to generate. + - ctx_param: type of the first parameter to the perfetto vfuncs. + - trace_toggle_name: (optional) name of the environment variable + enabling/disabling tracepoints. + - trace_toggle_defaults: (optional) list of tracepoints enabled by default. + """ if cpath is not None: hdr = os.path.basename(cpath).rsplit('.', 1)[0] + '.h' - with open(cpath, 'w') as f: + with open(cpath, 'w', encoding='utf-8') as f: f.write(Template(src_template).render( hdr=hdr, ctx_param=ctx_param, + trace_toggle_name=trace_toggle_name, + trace_toggle_defaults=trace_toggle_defaults, HEADERS=[h for h in HEADERS if h.scope & HeaderScope.SOURCE], - TRACEPOINTS=TRACEPOINTS)) + TRACEPOINTS=TRACEPOINTS, + TRACEPOINTS_TOGGLES=TRACEPOINTS_TOGGLES)) if hpath is not None: hdr = os.path.basename(hpath) - with open(hpath, 'w') as f: + with open(hpath, 'w', encoding='utf-8') as f: f.write(Template(hdr_template).render( hdrname=hdr.rstrip('.h').upper(), ctx_param=ctx_param, + trace_toggle_name=trace_toggle_name, HEADERS=[h for h in HEADERS if h.scope & HeaderScope.HEADER], FORWARD_DECLS=FORWARD_DECLS, - TRACEPOINTS=TRACEPOINTS)) + TRACEPOINTS=TRACEPOINTS, + TRACEPOINTS_TOGGLES=TRACEPOINTS_TOGGLES)) perfetto_utils_hdr_template = """\ @@ -370,40 +564,52 @@ perfetto_utils_hdr_template = """\ #include <perfetto.h> +% for header in HEADERS: +#include "${header.hdr}" +% endfor + +UNUSED static const char *${basename}_names[] = { +% for trace_name, trace in TRACEPOINTS.items(): + "${trace_name}", +% endfor +}; + % for trace_name, trace in TRACEPOINTS.items(): static void UNUSED trace_payload_as_extra_${trace_name}(perfetto::protos::pbzero::GpuRenderStageEvent *event, const struct trace_${trace_name} *payload) { -% if all([trace.tp_perfetto, trace.tp_struct]) and len(trace.tp_struct) > 0: + % if all([trace.tp_perfetto, trace.tp_struct]) and len(trace.tp_struct) > 0: char buf[128]; -% for arg in trace.tp_struct: + % for arg in trace.tp_struct: { auto data = event->add_extra_data(); data->set_name("${arg.name}"); -% if arg.to_prim_type: + % if arg.to_prim_type: sprintf(buf, "${arg.c_format}", ${arg.to_prim_type.format('payload->' + arg.name)}); -% else: + % else: sprintf(buf, "${arg.c_format}", payload->${arg.name}); -% endif + % endif data->set_value(buf); } -% endfor + % endfor -% endif + % endif } % endfor #endif /* ${guard_name} */ """ -def utrace_generate_perfetto_utils(hpath): +def utrace_generate_perfetto_utils(hpath,basename="tracepoint"): if hpath is not None: hdr = os.path.basename(hpath) - with open(hpath, 'wb') as f: - f.write(Template(perfetto_utils_hdr_template, output_encoding='utf-8').render( + with open(hpath, 'w', encoding='utf-8') as f: + f.write(Template(perfetto_utils_hdr_template).render( + basename=basename, hdrname=hdr.rstrip('.h').upper(), + HEADERS=[h for h in HEADERS if h.scope & HeaderScope.PERFETTO], TRACEPOINTS=TRACEPOINTS)) diff --git a/src/util/perf/u_trace_priv.h b/src/util/perf/u_trace_priv.h index 331a8c84ace..a25811a48e8 100644 --- a/src/util/perf/u_trace_priv.h +++ b/src/util/perf/u_trace_priv.h @@ -16,45 +16,84 @@ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. */ #ifndef __NEEDS_TRACE_PRIV -# error "Do not use this header!" +#error "Do not use this header!" #endif #ifndef _U_TRACE_PRIV_H #define _U_TRACE_PRIV_H -#include <stdio.h> - #include "u_trace.h" +#include <stdio.h> /* * Internal interface used by generated tracepoints */ +#ifdef __cplusplus +extern "C" { +#endif + /** * Tracepoint descriptor. */ struct u_tracepoint { unsigned payload_sz; const char *name; + /** + * Whether this tracepoint's timestamp must be recorded with as an + * end-of-pipe timestamp (for some GPUs the recording timestamp instruction + * might be different for top/end of pipe). + */ + bool end_of_pipe:1; + /** + * Index of this tracepoint in <basename>_tracepoint_names in the generated + * u_trace perfetto header. By associating these names with iids in setup, + * tracepoints can be presented with with their own names by passing that + * to event->set_stage_iid(). + */ + uint16_t tp_idx; void (*print)(FILE *out, const void *payload); + void (*print_json)(FILE *out, const void *payload); #ifdef HAVE_PERFETTO /** * Callback to emit a perfetto event, such as render-stage trace */ - void (*perfetto)(void *pctx, uint64_t ts_ns, const void *flush_data, const void *payload); + void (*perfetto)(void *pctx, + uint64_t ts_ns, + uint16_t tp_idx, + const void *flush_data, + const void *payload); #endif }; /** - * Append a tracepoint, returning pointer that can be filled with trace - * payload. + * Append a tracepoint followed by some amount of memory specified by + * variable_sz, returning pointer that can be filled with trace payload. */ -void * u_trace_append(struct u_trace *ut, void *cs, const struct u_tracepoint *tp); +void *u_trace_appendv(struct u_trace *ut, + void *cs, + const struct u_tracepoint *tp, + unsigned variable_sz); + +/** + * Append a trace event, returning pointer to buffer of tp->payload_sz + * to be filled in with trace payload. Called by generated tracepoint + * functions. + */ +static inline void * +u_trace_append(struct u_trace *ut, void *cs, const struct u_tracepoint *tp) +{ + return u_trace_appendv(ut, cs, tp, 0); +} + +#ifdef __cplusplus +} +#endif -#endif /* _U_TRACE_PRIV_H */ +#endif /* _U_TRACE_PRIV_H */ |