diff options
-rw-r--r-- | ChangeLog | 13 | ||||
-rw-r--r-- | TODO | 46 | ||||
-rw-r--r-- | collector.c | 110 | ||||
-rw-r--r-- | module/sysprof-module.c | 81 | ||||
-rw-r--r-- | module/sysprof-module.h | 12 | ||||
-rw-r--r-- | process.c | 171 | ||||
-rw-r--r-- | process.h | 3 | ||||
-rw-r--r-- | sysprof-text.c | 10 | ||||
-rw-r--r-- | sysprof.c | 19 |
9 files changed, 354 insertions, 111 deletions
@@ -1,3 +1,16 @@ +2007-09-16 Soren Sandmann <sandmann@daimi.au.dk> + + * process.c (process_lookup_kernel_symbol): Add support for + looking up kernel symbols from /proc/kallsyms + + * sysprof-text.c (dump_data): Print note that the file is being + saved. + + * module/sysprof-module.[ch] (timer_notify): Send a copy of the + kernel stack to userspace. + + * collector.c: Do kernel symbol lookups. + 2007-08-26 Soren Sandmann <sandmann@daimi.au.dk> * profile.c (profile_get_size): Compute the size by simply summing @@ -23,6 +23,15 @@ Before 1.0.4: Before 1.2: +* Performance: + Switching between descendant views is a slow: + - gtk_tree_store_get_path() is O(n^2) and accounts + for 43% of the time. + - GObject signal emission overhead accounts for 18% of + the time. + Consider adding a forked version of GtkTreeStore with + performance fixes. + * Make sure that labels look decent in case of "No Map" etc. * Elf bugs: @@ -64,12 +73,27 @@ Before 1.2: Unless of course, we store the entire stack in the stackstash. This may use way too much memory though. - - vdso - - assume its the same across processes, just look at - sysprof's own copy. - Done: vdso is done now - - send copy of it to userspace once, or for every - sample. + - Locking, possibly useful code: + + /* In principle we should use get_task_mm() but + * that will use task_lock() leading to deadlock + * if somebody already has the lock + */ + if (spin_is_locked (¤t->alloc_lock)) + printk ("alreadylocked\n"); + { + struct mm_struct *mm = current->mm; + if (mm) + { + printk (KERN_ALERT "stack size: %d (%d)\n", + mm->start_stack - regs->REG_STACK_PTR, + current->pid); + + stacksize = mm->start_stack - regs->REG_STACK_PTR; + } + else + stacksize = 1; + } - regular elf - usually have eh_frame section which is mapped into memory @@ -713,7 +737,15 @@ Later: of outstanding disk requests. --=-=-=-=-=-=-=-=-=-=-=-=-=-=- ALREADY DONE -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- +-=-=-=-=-=-=-=-=-=-=-=-=-=-=- ALREADY DONE: -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + + +* vdso + - assume its the same across processes, just look at + sysprof's own copy. + Done: vdso is done now + - send copy of it to userspace once, or for every + sample. * Various: - decorate_node should be done lazily diff --git a/collector.c b/collector.c index 062d539..9fc207a 100644 --- a/collector.c +++ b/collector.c @@ -62,7 +62,7 @@ collector_new (CollectorFunc callback, collector->stash = NULL; collector_reset (collector); - + return collector; } @@ -92,23 +92,53 @@ add_trace_to_stash (const SysprofStackTrace *trace, gulong *addrs; Process *process = process_get_from_pid (trace->pid); int n_addresses; - + int n_kernel_words; + int a; + n_addresses = trace->n_addresses; + n_kernel_words = trace->n_kernel_words; + + addrs = g_new (gulong, n_addresses + n_kernel_words + 2); - addrs = g_new (gulong, n_addresses + 1); + a = 0; + /* Add kernel addresses */ + if (trace->n_kernel_words) + { + for (i = 0; i < trace->n_kernel_words; ++i) + { + gulong addr = (gulong)trace->kernel_stack[i]; + + if (process_is_kernel_address (addr)) + addrs[a++] = addr; + } + + /* Add kernel marker */ + addrs[a++] = 0x01; + } + + /* Add user addresses */ for (i = 0; i < n_addresses; ++i) { - process_ensure_map (process, trace->pid, - (gulong)trace->addresses[i]); + gulong addr = (gulong)trace->addresses[i]; - addrs[i] = (gulong)trace->addresses[i]; + process_ensure_map (process, trace->pid, addr); + addrs[a++] = addr; } + + /* Add process */ + addrs[a++] = (gulong)process; + +#if 0 + if (a != n_addresses) + g_print ("a: %d, n_addresses: %d, kernel words: %d\n trace->nad %d", + a, n_addresses, trace->n_kernel_words, trace->n_addresses); - addrs[i] = (gulong)process; + g_assert (a == n_addresses); +#endif stack_stash_add_trace ( - stash, addrs, n_addresses + 1, 1); + stash, addrs, a, 1); g_free (addrs); } @@ -152,7 +182,7 @@ on_read (gpointer data) const SysprofStackTrace *trace; trace = &(collector->map_area->traces[collector->current]); - + #if 0 { int i; @@ -263,6 +293,12 @@ collector_start (Collector *collector, if (collector->fd < 0 && !open_fd (collector, err)) return FALSE; + /* Hack to make sure we parse the kernel symbols before + * starting collection, so the parsing doesn't interfere + * with the profiling. + */ + process_is_kernel_address (0); + fd_set_read_callback (collector->fd, on_read); return TRUE; } @@ -330,15 +366,44 @@ unique_dup (GHashTable *unique_symbols, const char *sym) } static char * -lookup_symbol (Process *process, gpointer address, GHashTable *unique_symbols) +lookup_symbol (Process *process, gpointer address, + GHashTable *unique_symbols, + gboolean kernel, + gboolean first_kernel_addr) { const char *sym; g_assert (process); + + if (kernel) + { + gulong offset; + sym = process_lookup_kernel_symbol ((gulong)address, &offset); + + /* If offset is 0, it is a callback, not a return address */ + if (offset == 0 && !first_kernel_addr) + sym = NULL; + + /* If offset is greater than 4096, then what happened is most + * likely that it is the address of something in the gap between the + * kernel text and the text of the modules. Rather than assign + * this to the last function of the kernel text, we remove it here. + * + * FIXME: what we really should do is find out where this split + * is, and act accordingly. + */ + if (offset > 4096) + sym = NULL; + } + else + { + sym = process_lookup_symbol (process, (gulong)address); + } - sym = process_lookup_symbol (process, (gulong)address); - - return unique_dup (unique_symbols, sym); + if (sym) + return unique_dup (unique_symbols, sym); + else + return NULL; } static void @@ -350,15 +415,28 @@ resolve_symbols (GList *trace, gint size, gpointer data) Process *process = g_list_last (trace)->data; GPtrArray *resolved_trace = g_ptr_array_new (); char *cmdline; + gboolean in_kernel = FALSE; + gboolean first_kernel_addr = TRUE; for (list = trace; list && list->next; list = list->next) { + if (list->data == GINT_TO_POINTER (0x01)) + in_kernel = TRUE; + } + + for (list = trace; list && list->next; list = list->next) + { gpointer address = list->data; char *symbol; - symbol = lookup_symbol (process, address, info->unique_symbols); - - g_ptr_array_add (resolved_trace, symbol); + if (address == GINT_TO_POINTER (0x01)) + in_kernel = FALSE; + symbol = lookup_symbol (process, address, info->unique_symbols, + in_kernel, first_kernel_addr); + first_kernel_addr = FALSE; + + if (symbol) + g_ptr_array_add (resolved_trace, symbol); } cmdline = g_hash_table_lookup (info->unique_cmdlines, diff --git a/module/sysprof-module.c b/module/sysprof-module.c index 59b7910..d2f5ab5 100644 --- a/module/sysprof-module.c +++ b/module/sysprof-module.c @@ -106,6 +106,12 @@ read_frame (void *frame_pointer, StackFrame *frame) DEFINE_PER_CPU(int, n_samples); +static int +minimum (int a, int b) +{ + return a > b ? b : a; +} + #ifdef OLD_PROFILE static int timer_notify(struct notifier_block * self, unsigned long val, void * data) #else @@ -123,9 +129,6 @@ timer_notify (struct pt_regs *regs) StackFrame frame; int result; static atomic_t in_timer_notify = ATOMIC_INIT(1); -#if 0 - int stacksize; -#endif int n; n = ++get_cpu_var(n_samples); @@ -135,7 +138,6 @@ timer_notify (struct pt_regs *regs) return 0; /* 0: locked, 1: unlocked */ - if (!atomic_dec_and_test(&in_timer_notify)) goto out; @@ -150,58 +152,51 @@ timer_notify (struct pt_regs *regs) memset(trace, 0, sizeof (SysprofStackTrace)); trace->pid = current->pid; - + + trace->n_kernel_words = 0; + trace->n_addresses = 0; + i = 0; if (!is_user) { - trace->addresses[i++] = (void *)0x01; - regs = (void *)current->thread.REG_STACK_PTR0 - sizeof (struct pt_regs); + int n_bytes; + char *esp; + char *eos; + + trace->kernel_stack[0] = (void *)regs->REG_INS_PTR; + trace->n_kernel_words = 1; + + /* The timer interrupt happened in kernel mode. When this + * happens the registers are pushed on the stack, _except_ + * esp. So we can't use regs->esp to copy the stack pointer. + * Instead we use the fact that the regs pointer itself + * points to the stack. + */ + esp = (char *)regs + sizeof (struct pt_regs); + eos = (char *)current->thread.REG_STACK_PTR0 - sizeof (struct pt_regs); + + n_bytes = minimum ((char *)eos - esp, + sizeof (trace->kernel_stack)); + + if (n_bytes > 0) { + memcpy (&(trace->kernel_stack[1]), esp, n_bytes); + + trace->n_kernel_words += (n_bytes) / sizeof (void *); + } + + /* Now trace the user stack */ + regs = (struct pt_regs *)eos; } + i = 0; trace->addresses[i++] = (void *)regs->REG_INS_PTR; frame_pointer = (void *)regs->REG_FRAME_PTR; - - { -#if 0 - /* In principle we should use get_task_mm() but - * that will use task_lock() leading to deadlock - * if somebody already has the lock - */ - if (spin_is_locked (¤t->alloc_lock)) - printk ("alreadylocked\n"); - { - struct mm_struct *mm = current->mm; - if (mm) - { - printk (KERN_ALERT "stack size: %d (%d)\n", - mm->start_stack - regs->REG_STACK_PTR, - current->pid); - - stacksize = mm->start_stack - regs->REG_STACK_PTR; - } - else - stacksize = 1; - } -#endif -#if 0 - else - printk (KERN_ALERT "could not lock on %d\n", current->pid); -#endif - } -#if 0 - if (stacksize < 100000) - goto out; -#endif - while (((result = read_frame (frame_pointer, &frame)) == 0) && i < SYSPROF_MAX_ADDRESSES && (unsigned long)frame_pointer >= regs->REG_STACK_PTR) { -#if 0 - printk ("frame pointer: %p (retaddr: %p)\n", frame_pointer, frame.return_address); -#endif trace->addresses[i++] = (void *)frame.return_address; frame_pointer = (StackFrame *)frame.next; } diff --git a/module/sysprof-module.h b/module/sysprof-module.h index 9b7e4df..d92886d 100644 --- a/module/sysprof-module.h +++ b/module/sysprof-module.h @@ -25,16 +25,18 @@ typedef struct SysprofStackInfo SysprofStackInfo; typedef struct SysprofMmapArea SysprofMmapArea; #define SYSPROF_N_TRACES 64 -#define SYSPROF_MAX_ADDRESSES 1021 /* to make it one page wide */ +#define SYSPROF_MAX_ADDRESSES 1020 /* to make it three pages wide */ struct SysprofStackTrace { + void *kernel_stack[1024]; + void *addresses[SYSPROF_MAX_ADDRESSES]; + int n_kernel_words; + int n_addresses; /* note: this can be 1 if the process was compiled + * with -fomit-frame-pointer or is otherwise weird + */ int pid; /* -1 if in kernel */ int truncated; - int n_addresses; /* note: this can be 1 if the process was compiled - * with -fomit-frame-pointer or is otherwise weird - */ - void *addresses[SYSPROF_MAX_ADDRESSES]; }; struct SysprofMmapArea @@ -3,7 +3,7 @@ * Copyright 2002, Kristian Rietveld * * Sysprof -- Sampling, systemwide CPU profiler - * Copyright 2004-2005 Soeren Sandmann + * Copyright 2004-2007 Soeren Sandmann * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -285,7 +285,7 @@ static int page_size (void) { static int page_size; - static gboolean has_page_size; + static gboolean has_page_size = FALSE; if (!has_page_size) { @@ -489,9 +489,15 @@ find_kernel_binary (void) return binary; } +typedef struct +{ + gulong address; + char *name; +} KernelSymbol; + static void parse_kallsym_line (const char *line, - GHashTable *table) + GArray *table) { char **tokens = g_strsplit_set (line, " \t", -1); @@ -502,19 +508,25 @@ parse_kallsym_line (const char *line, address = strtoul (tokens[0], &endptr, 16); - if (*endptr == '\0') + if (*endptr == '\0' && + (strcmp (tokens[1], "T") == 0 || + strcmp (tokens[1], "t") == 0)) { - g_hash_table_insert ( - table, GUINT_TO_POINTER (address), g_strdup (tokens[2])); + KernelSymbol sym; + + sym.address = address; + sym.name = g_strdup (tokens[2]); + + g_array_append_val (table, sym); } } g_strfreev (tokens); } -static void +static gboolean parse_kallsyms (const char *kallsyms, - GHashTable *table) + GArray *table) { const char *sol; const char *eol; @@ -532,47 +544,133 @@ parse_kallsyms (const char *kallsyms, sol = eol + 1; eol = strchr (sol, '\n'); } + + if (table->len <= 1) + return FALSE; + + return TRUE; +} + +static int +compare_syms (gconstpointer a, gconstpointer b) +{ + const KernelSymbol *sym_a = a; + const KernelSymbol *sym_b = b; + + if (sym_a->address > sym_b->address) + return 1; + else if (sym_a->address == sym_b->address) + return 0; + else + return -1; } -static GHashTable * +static GArray * get_kernel_symbols (void) { - static gboolean read_symbols = FALSE; - static GHashTable *kernel_syms; + static GArray *kernel_syms; + static gboolean initialized = FALSE; - if (!read_symbols) + if (!initialized) { char *kallsyms; - g_file_get_contents ("/proc/kallsyms", &kallsyms, NULL, NULL); - - if (kallsyms) + if (g_file_get_contents ("/proc/kallsyms", &kallsyms, NULL, NULL)) { - kernel_syms = g_hash_table_new_full (g_direct_hash, g_direct_equal, - NULL, g_free); - - parse_kallsyms (kallsyms, kernel_syms); - - g_free (kallsyms); - g_hash_table_destroy (kernel_syms); + if (kallsyms) + { + kernel_syms = g_array_new (TRUE, TRUE, sizeof (KernelSymbol)); + + if (parse_kallsyms (kallsyms, kernel_syms)) + { + g_array_sort (kernel_syms, compare_syms); + } + else + { + g_array_free (kernel_syms, TRUE); + kernel_syms = NULL; + } + } } - read_symbols = TRUE; + if (!kernel_syms) + g_print ("Warning: /proc/kallsyms could not be " + "read. Kernel symbols will not be available\n"); + + initialized = TRUE; } - return NULL; + return kernel_syms; +} + +gboolean +process_is_kernel_address (gulong address) +{ + GArray *ksyms = get_kernel_symbols (); + + if (ksyms && + address >= g_array_index (ksyms, KernelSymbol, 0).address && + address < g_array_index (ksyms, KernelSymbol, ksyms->len - 1).address) + { + return TRUE; + } + + return FALSE; +} + +static KernelSymbol * +do_lookup (KernelSymbol *symbols, + gulong address, + int first, + int last) +{ + if (address >= symbols[last].address) + { + return &(symbols[last]); + } + else if (last - first < 3) + { + while (last >= first) + { + if (address >= symbols[last].address) + return &(symbols[last]); + + last--; + } + + return NULL; + } + else + { + int mid = (first + last) / 2; + + if (symbols[mid].address > address) + return do_lookup (symbols, address, first, mid); + else + return do_lookup (symbols, address, mid, last); + } } -static const char * -lookup_kernel_symbol (gulong address) +const char * +process_lookup_kernel_symbol (gulong address, + gulong *offset) { - static const char *const kernel = "In kernel"; + GArray *ksyms = get_kernel_symbols (); + KernelSymbol *result; - return kernel; + if (ksyms->len == 0) + return NULL; + + result = do_lookup ((KernelSymbol *)ksyms->data, address, 0, ksyms->len - 1); + if (result && offset) + *offset = address - result->address; + + return result? result->name : NULL; } const char * process_lookup_symbol (Process *process, gulong address) { + static const char *const kernel = "kernel"; const BinSymbol *result; Map *map = process_locate_map (process, address); @@ -580,12 +678,16 @@ process_lookup_symbol (Process *process, gulong address) if (address == 0x1) { - get_kernel_symbols (); - - return lookup_kernel_symbol (address); + return kernel; } else if (!map) { + gulong offset; + const char *res = process_lookup_kernel_symbol (address, &offset); + + if (res && offset != 0) + return res; + if (!process->undefined) { process->undefined = @@ -615,11 +717,6 @@ process_lookup_symbol (Process *process, gulong address) address -= map->start; address += map->offset; - -#if 0 - address -= map->start; - address += map->offset; -#endif #if 0 if (strcmp (map->filename, "[vdso]") == 0) @@ -637,7 +734,7 @@ process_lookup_symbol (Process *process, gulong address) if (!bin_file_check_inode (map->bin_file, map->inode)) { /* If the inodes don't match, it's probably because the - * file has changed since the process started. Just return + * file has changed since the process was started. Just return * the undefined symbol in that case. */ address = 0x0; @@ -56,5 +56,8 @@ const char * process_lookup_symbol (Process *process, const char * process_get_cmdline (Process *process); void process_flush_caches (void); const guint8 *process_get_vdso_bytes (gsize *length); +gboolean process_is_kernel_address (gulong address); +const char * process_lookup_kernel_symbol (gulong address, + gulong *offset); #endif diff --git a/sysprof-text.c b/sysprof-text.c index 34b9da3..19be1f7 100644 --- a/sysprof-text.c +++ b/sysprof-text.c @@ -25,6 +25,7 @@ #include <fcntl.h> #include <signal.h> #include <glib.h> +#include <stdio.h> #include "stackstash.h" #include "module/sysprof-module.h" @@ -46,18 +47,23 @@ static void dump_data (Application *app) { GError *err = NULL; - Profile *profile = collector_create_profile (app->collector); + Profile *profile; + + printf ("Saving profile in %s ... ", app->outfile); + fflush (stdout); + profile = collector_create_profile (app->collector); profile_save (profile, app->outfile, &err); if (err) { + printf ("failed\n"); fprintf (stderr, "Error saving %s: %s\n", app->outfile, err->message); exit (1); } else { - printf ("Saved profile in %s\n\n", app->outfile); + printf ("done\n\n"); } } @@ -461,6 +461,10 @@ fill_main_list (Application *app) OBJECT_NAME, object->name, OBJECT_SELF, 100.0 * object->self / profile_size, OBJECT_TOTAL, 100.0 * object->total / profile_size, +#if 0 + OBJECT_SELF, (double)object->self, + OBJECT_TOTAL, (double)object->total, +#endif OBJECT_OBJECT, object->name, -1); } @@ -493,6 +497,10 @@ add_node (GtkTreeStore *store, DESCENDANTS_NAME, node->name, DESCENDANTS_SELF, 100 * (node->self)/(double)size, DESCENDANTS_NON_RECURSE, 100 * (node->non_recursion)/(double)size, +#if 0 + DESCENDANTS_SELF, (double)node->self, + DESCENDANTS_NON_RECURSE, (double)node->non_recursion, +#endif DESCENDANTS_OBJECT, node->name, -1); @@ -560,6 +568,10 @@ add_callers (GtkListStore *list_store, CALLERS_NAME, name, CALLERS_SELF, 100.0 * callers->self / profile_size, CALLERS_TOTAL, 100.0 * callers->total / profile_size, +#if 0 + CALLERS_SELF, (double)callers->self, + CALLERS_TOTAL, (double)callers->total, +#endif CALLERS_OBJECT, callers->name, -1); @@ -1146,7 +1158,8 @@ update_screenshot_window (Application *app) } } - app->update_screenshot_id = g_idle_add (update_screenshot_window_idle, app); + app->update_screenshot_id = g_idle_add ( + update_screenshot_window_idle, app); } static void @@ -1675,6 +1688,10 @@ main (int argc, */ g_idle_add_full (G_PRIORITY_LOW, load_file, file_open_data, NULL); } + +#if 0 + g_idle_add (gtk_main_quit, NULL); +#endif gtk_main (); |