summaryrefslogtreecommitdiff
path: root/collector.c
diff options
context:
space:
mode:
Diffstat (limited to 'collector.c')
-rw-r--r--collector.c804
1 files changed, 804 insertions, 0 deletions
diff --git a/collector.c b/collector.c
new file mode 100644
index 0000000..86d96ed
--- /dev/null
+++ b/collector.c
@@ -0,0 +1,804 @@
+/* Sysprof -- Sampling, systemwide CPU profiler
+ * Copyright 2004, Red Hat, Inc.
+ * Copyright 2004, 2005, Soeren Sandmann
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <stdint.h>
+#include <glib.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
+#include "stackstash.h"
+#include "collector.h"
+#include "watch.h"
+#include "elfparser.h"
+#include "tracker.h"
+
+#include "perf_counter.h"
+#include "util.h"
+
+#define d_print(...)
+
+#define N_PAGES 32 /* Number of pages in the ringbuffer */
+
+#define N_WAKEUP_EVENTS 149
+
+typedef struct counter_t counter_t;
+typedef struct sample_event_t sample_event_t;
+typedef struct mmap_event_t mmap_event_t;
+typedef struct comm_event_t comm_event_t;
+typedef struct exit_event_t exit_event_t;
+typedef struct fork_event_t fork_event_t;
+typedef union counter_event_t counter_event_t;
+
+static void process_event (Collector *collector,
+ counter_t *counter,
+ counter_event_t *event);
+
+struct counter_t
+{
+ Collector * collector;
+
+ int fd;
+ struct perf_counter_mmap_page * mmap_page;
+ uint8_t * data;
+
+ uint64_t tail;
+ int cpu;
+};
+
+struct sample_event_t
+{
+ struct perf_event_header header;
+ uint64_t ip;
+ uint32_t pid, tid;
+ uint64_t n_ips;
+ uint64_t ips[1];
+};
+
+struct comm_event_t
+{
+ struct perf_event_header header;
+ uint32_t pid, tid;
+ char comm[1];
+};
+
+struct mmap_event_t
+{
+ struct perf_event_header header;
+
+ uint32_t pid, tid;
+ uint64_t addr;
+ uint64_t len;
+ uint64_t pgoff;
+ char filename[1];
+};
+
+struct fork_event_t
+{
+ struct perf_event_header header;
+
+ uint32_t pid, ppid;
+ uint32_t tid, ptid;
+};
+
+struct exit_event_t
+{
+ struct perf_event_header header;
+
+ uint32_t pid, ppid;
+ uint32_t tid, ptid;
+};
+
+union counter_event_t
+{
+ struct perf_event_header header;
+ mmap_event_t mmap;
+ comm_event_t comm;
+ sample_event_t sample;
+ fork_event_t fork;
+ exit_event_t exit;
+};
+
+struct Collector
+{
+ CollectorFunc callback;
+ gpointer data;
+
+ tracker_t * tracker;
+ GTimeVal latest_reset;
+
+ int prev_samples;
+ int n_samples;
+
+ GList * counters;
+
+ gboolean use_hw_counters;
+};
+
+static int
+get_n_cpus (void)
+{
+ return sysconf (_SC_NPROCESSORS_ONLN);
+}
+
+static int
+sysprof_perf_counter_open (struct perf_counter_attr *attr,
+ pid_t pid,
+ int cpu,
+ int group_fd,
+ unsigned long flags)
+{
+#ifndef __NR_perf_counter_open
+#if defined(__i386__)
+#define __NR_perf_counter_open 336
+#elif defined(__x86_64__)
+#define __NR_perf_counter_open 298
+#elif defined(__arm__)
+#define __NR_perf_counter_open 364
+#elif defined(__bfin__)
+#define __NR_perf_counter_open 369
+#elif defined(__frv__)
+#define __NR_perf_counter_open 336
+#elif defined(__m68k__)
+#define __NR_perf_counter_open 332
+#elif defined(__MICROBLAZE__)
+#define __NR_perf_counter_open 366
+#elif defined(__mips__) && defined(_ABIO32)
+#define __NR_perf_counter_open 4333
+#elif defined(__mips__) && defined(_ABIN32)
+#define __NR_perf_counter_open 6296
+#elif defined(__mips__) && defined(_ABI64)
+#define __NR_perf_counter_open 5292
+#elif defined(__mn10300__)
+#define __NR_perf_counter_open 337
+#elif defined(__hppa__)
+#define __NR_perf_counter_open 318
+#elif defined(__ppc__) || defined(__ppc64__)
+#define __NR_perf_counter_open 319
+#elif defined(__s390__)
+#define __NR_perf_counter_open 331
+#elif defined(__sh__) && (!defined(__SH5__) || __SH5__ == 32)
+#define __NR_perf_counter_open 336
+#elif defined(__sh__) && defined(__SH5__) && __SH5__ == 64
+#define __NR_perf_counter_open 364
+#elif defined(__sparc__) || defined(__sparc64__)
+#define __NR_perf_counter_open 327
+#endif
+#endif
+
+ attr->size = sizeof(*attr);
+
+ return syscall (__NR_perf_counter_open, attr, pid, cpu, group_fd, flags);
+}
+
+
+static double
+timeval_to_ms (const GTimeVal *timeval)
+{
+ return (timeval->tv_sec * G_USEC_PER_SEC + timeval->tv_usec) / 1000.0;
+}
+
+static double
+time_diff (const GTimeVal *first,
+ const GTimeVal *second)
+{
+ double first_ms = timeval_to_ms (first);
+ double second_ms = timeval_to_ms (second);
+
+ return first_ms - second_ms;
+}
+
+#define RESET_DEAD_PERIOD 250
+
+static gboolean
+in_dead_period (Collector *collector)
+{
+ GTimeVal now;
+ double diff;
+
+ g_get_current_time (&now);
+
+ diff = time_diff (&now, &collector->latest_reset);
+
+ if (diff >= 0.0 && diff < RESET_DEAD_PERIOD)
+ return TRUE;
+
+ return FALSE;
+}
+
+static int
+get_page_size (void)
+{
+ static int page_size;
+ static gboolean has_page_size = FALSE;
+
+ if (!has_page_size)
+ {
+ page_size = getpagesize();
+ has_page_size = TRUE;
+ }
+
+ return page_size;
+}
+
+static void
+on_read (gpointer data)
+{
+ counter_t *counter = data;
+ int mask = (N_PAGES * get_page_size() - 1);
+ int n_bytes = mask + 1;
+ gboolean skip_samples;
+ Collector *collector;
+ uint64_t head, tail;
+
+ collector = counter->collector;
+
+ tail = counter->tail;
+
+ head = counter->mmap_page->data_head;
+ rmb();
+
+ if (head < tail)
+ {
+ g_warning ("sysprof fails at ring buffers (head "FMT64", tail "FMT64"\n", head, tail);
+
+ tail = head;
+ }
+
+#if 0
+ /* Verify that the double mapping works */
+ x = g_random_int() & mask;
+ g_assert (*(counter->data + x) == *(counter->data + x + n_bytes));
+#endif
+
+ skip_samples = in_dead_period (collector);
+
+#if 0
+ g_print ("n bytes %d\n", head - tail);
+#endif
+
+ while (head - tail >= sizeof (struct perf_event_header))
+ {
+ struct perf_event_header *header;
+ guint8 buffer[4096];
+ guint8 *free_me;
+
+ free_me = NULL;
+
+ /* Note that:
+ *
+ * - perf events are a multiple of 64 bits
+ * - the perf event header is 64 bits
+ * - the data area is a multiple of 64 bits
+ *
+ * which means there will always be space for one header, which means we
+ * can safely dereference the size field.
+ */
+ header = (struct perf_event_header *)(counter->data + (tail & mask));
+
+ if (header->size > head - tail)
+ {
+ /* The kernel did not generate a complete event.
+ * I don't think that can happen, but we may as well
+ * be paranoid.
+ */
+ break;
+ }
+
+ if (counter->data + (tail & mask) + header->size > counter->data + n_bytes)
+ {
+ int n_before, n_after;
+ guint8 *b;
+
+ if (header->size > sizeof (buffer))
+ free_me = b = g_malloc (header->size);
+ else
+ b = buffer;
+
+ n_after = (tail & mask) + header->size - n_bytes;
+ n_before = header->size - n_after;
+
+ memcpy (b, counter->data + (tail & mask), n_before);
+ memcpy (b + n_before, counter->data, n_after);
+
+ header = (struct perf_event_header *)b;
+ }
+
+ if (!skip_samples || header->type != PERF_EVENT_SAMPLE)
+ {
+ if (header->type == PERF_EVENT_SAMPLE)
+ collector->n_samples++;
+
+ process_event (collector, counter, (counter_event_t *)header);
+ }
+
+ if (free_me)
+ g_free (free_me);
+
+ tail += header->size;
+ }
+
+ counter->tail = tail;
+ counter->mmap_page->data_tail = tail;
+
+ if (collector->callback)
+ {
+ if (collector->n_samples - collector->prev_samples >= N_WAKEUP_EVENTS)
+ {
+ gboolean first_sample = collector->prev_samples == 0;
+
+ collector->callback (first_sample, collector->data);
+
+ collector->prev_samples = collector->n_samples;
+ }
+ }
+}
+
+static void *
+fail (GError **err, const char *what)
+{
+ g_set_error (err, COLLECTOR_ERROR, COLLECTOR_ERROR_FAILED,
+ "%s: %s", what, g_strerror (errno));
+
+ return NULL;
+}
+
+static void *
+map_buffer (counter_t *counter, GError **err)
+{
+ int n_bytes = N_PAGES * get_page_size();
+ void *address;
+
+ address = mmap (NULL, n_bytes + get_page_size(), PROT_READ | PROT_WRITE, MAP_SHARED, counter->fd, 0);
+
+ if (address == MAP_FAILED)
+ return fail (err, "mmap");
+
+ return address;
+}
+
+static gboolean
+counter_set_output (counter_t *counter, int output)
+{
+ return ioctl (counter->fd, PERF_COUNTER_IOC_SET_OUTPUT, output) == 0;
+}
+
+static void
+counter_enable (counter_t *counter)
+{
+ ioctl (counter->fd, PERF_COUNTER_IOC_ENABLE);
+}
+
+static void
+counter_disable (counter_t *counter)
+{
+ d_print ("disable\n");
+
+ ioctl (counter->fd, PERF_COUNTER_IOC_DISABLE);
+}
+
+static counter_t *
+counter_new (Collector *collector,
+ int cpu,
+ counter_t *output,
+ GError **err)
+{
+ struct perf_counter_attr attr;
+ counter_t *counter;
+ int fd;
+
+ counter = g_new (counter_t, 1);
+
+ memset (&attr, 0, sizeof (attr));
+
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+ attr.sample_period = 1200000 ; /* In number of clock cycles -
+ * FIXME: consider using frequency instead
+ */
+ attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_CALLCHAIN;
+ attr.wakeup_events = N_WAKEUP_EVENTS;
+ attr.disabled = TRUE;
+
+ attr.mmap = 1;
+ attr.comm = 1;
+ attr.task = 1;
+ attr.exclude_idle = 1;
+
+ if (!collector->use_hw_counters || (fd = sysprof_perf_counter_open (&attr, -1, cpu, -1, 0)) < 0)
+ {
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.sample_period = 1000000;
+
+ fd = sysprof_perf_counter_open (&attr, -1, cpu, -1, 0);
+ }
+
+ if (fd < 0)
+ return fail (err, "Could not open performance counter");
+
+ counter->collector = collector;
+ counter->fd = fd;
+ counter->cpu = cpu;
+
+ if (output && counter_set_output (counter, output->fd))
+ {
+ counter->mmap_page = NULL;
+ counter->data = NULL;
+ counter->tail = 0;
+ }
+ else
+ {
+ counter->mmap_page = map_buffer (counter, err);
+
+ if (!counter->mmap_page || counter->mmap_page == MAP_FAILED)
+ return NULL;
+
+ counter->data = (uint8_t *)counter->mmap_page + get_page_size ();
+ counter->tail = 0;
+
+ fd_add_watch (fd, counter);
+
+ fd_set_read_callback (fd, on_read);
+ }
+
+ return counter;
+}
+
+static void
+counter_free (counter_t *counter)
+{
+ d_print ("munmap\n");
+
+ munmap (counter->mmap_page, (N_PAGES + 1) * get_page_size());
+ fd_remove_watch (counter->fd);
+
+ close (counter->fd);
+
+ g_free (counter);
+}
+
+/*
+ * Collector
+ */
+static void
+enable_counters (Collector *collector)
+{
+ GList *list;
+
+ d_print ("enable\n");
+
+ for (list = collector->counters; list != NULL; list = list->next)
+ {
+ counter_t *counter = list->data;
+
+ counter_enable (counter);
+ }
+}
+
+static void
+disable_counters (Collector *collector)
+{
+ GList *list;
+
+ d_print ("disable\n");
+
+ for (list = collector->counters; list != NULL; list = list->next)
+ {
+ counter_t *counter = list->data;
+
+ counter_disable (counter);
+ }
+}
+
+void
+collector_reset (Collector *collector)
+{
+ /* Disable the counters so that we won't track
+ * the activity of tracker_free()/tracker_new()
+ *
+ * They will still record fork/mmap/etc. so
+ * we can keep an accurate log of process creation
+ */
+ if (collector->counters)
+ {
+ d_print ("disable counters\n");
+
+ disable_counters (collector);
+ }
+
+ if (collector->tracker)
+ {
+ tracker_free (collector->tracker);
+ collector->tracker = tracker_new ();
+ }
+
+ collector->n_samples = 0;
+ collector->prev_samples = 0;
+
+ g_get_current_time (&collector->latest_reset);
+
+ if (collector->counters)
+ {
+ d_print ("enable counters\n");
+
+ enable_counters (collector);
+ }
+}
+
+/* callback is called whenever a new sample arrives */
+Collector *
+collector_new (gboolean use_hw_counters,
+ CollectorFunc callback,
+ gpointer data)
+{
+ Collector *collector = g_new0 (Collector, 1);
+
+ collector->callback = callback;
+ collector->data = data;
+ collector->tracker = NULL;
+ collector->use_hw_counters = use_hw_counters;
+
+ collector_reset (collector);
+
+ return collector;
+}
+
+static void
+process_mmap (Collector *collector, mmap_event_t *mmap)
+{
+ tracker_add_map (collector->tracker,
+ mmap->pid,
+ mmap->addr,
+ mmap->addr + mmap->len,
+ mmap->pgoff,
+ 0, /* inode */
+ mmap->filename);
+}
+
+static void
+process_comm (Collector *collector, comm_event_t *comm)
+{
+ d_print ("pid, tid: %d %d", comm->pid, comm->tid);
+
+ tracker_add_process (collector->tracker,
+ comm->pid,
+ comm->comm);
+}
+
+static void
+process_fork (Collector *collector, fork_event_t *fork)
+{
+ d_print ("ppid: %d pid: %d ptid: %d tid %d\n",
+ fork->ppid, fork->pid, fork->ptid, fork->tid);
+
+ tracker_add_fork (collector->tracker, fork->ppid, fork->pid);
+}
+
+static void
+process_exit (Collector *collector, exit_event_t *exit)
+{
+ d_print ("for %d %d", exit->pid, exit->tid);
+
+ tracker_add_exit (collector->tracker, exit->pid);
+}
+
+static void
+process_sample (Collector *collector,
+ sample_event_t *sample)
+{
+ uint64_t *ips;
+ int n_ips;
+
+ d_print ("pid, tid: %d %d", sample->pid, sample->tid);
+
+ if (sample->n_ips == 0)
+ {
+ uint64_t trace[3];
+
+ if (sample->header.misc & PERF_EVENT_MISC_KERNEL)
+ {
+ trace[0] = PERF_CONTEXT_KERNEL;
+ trace[1] = sample->ip;
+ trace[2] = PERF_CONTEXT_USER;
+
+ ips = trace;
+ n_ips = 3;
+ }
+ else
+ {
+ trace[0] = PERF_CONTEXT_USER;
+ trace[1] = sample->ip;
+
+ ips = trace;
+ n_ips = 2;
+ }
+ }
+ else
+ {
+ ips = sample->ips;
+ n_ips = sample->n_ips;
+ }
+
+ tracker_add_sample (collector->tracker,
+ sample->pid, ips, n_ips);
+}
+
+static void
+process_event (Collector *collector,
+ counter_t *counter,
+ counter_event_t *event)
+{
+ char *name;
+
+ switch (event->header.type)
+ {
+ case PERF_EVENT_MMAP: name = "mmap"; break;
+ case PERF_EVENT_LOST: name = "lost"; break;
+ case PERF_EVENT_COMM: name = "comm"; break;
+ case PERF_EVENT_EXIT: name = "exit"; break;
+ case PERF_EVENT_THROTTLE: name = "throttle"; break;
+ case PERF_EVENT_UNTHROTTLE: name = "unthrottle"; break;
+ case PERF_EVENT_FORK: name = "fork"; break;
+ case PERF_EVENT_READ: name = "read"; break;
+ case PERF_EVENT_SAMPLE: name = "samp"; break;
+ default: name = "unknown"; break;
+ }
+
+ d_print ("cpu %d :: %s :: ", counter->cpu, name);
+
+ switch (event->header.type)
+ {
+ case PERF_EVENT_MMAP:
+ process_mmap (collector, &event->mmap);
+ break;
+
+ case PERF_EVENT_LOST:
+ g_print ("lost event\n");
+ break;
+
+ case PERF_EVENT_COMM:
+ process_comm (collector, &event->comm);
+ break;
+
+ case PERF_EVENT_EXIT:
+ process_exit (collector, &event->exit);
+ break;
+
+ case PERF_EVENT_THROTTLE:
+ g_print ("throttle\n");
+ break;
+
+ case PERF_EVENT_UNTHROTTLE:
+ g_print ("unthrottle\n");
+ break;
+
+ case PERF_EVENT_FORK:
+ process_fork (collector, &event->fork);
+ break;
+
+ case PERF_EVENT_READ:
+ break;
+
+ case PERF_EVENT_SAMPLE:
+ process_sample (collector, &event->sample);
+ break;
+
+ default:
+ g_warning ("unknown event: %d (%d)\n",
+ event->header.type, event->header.size);
+ break;
+ }
+
+ d_print ("\n");
+}
+
+gboolean
+collector_start (Collector *collector,
+ GError **err)
+{
+ int n_cpus = get_n_cpus ();
+ int i;
+ counter_t *output;
+
+ if (!collector->tracker)
+ collector->tracker = tracker_new ();
+
+ output = NULL;
+ for (i = 0; i < n_cpus; ++i)
+ {
+ counter_t *counter = counter_new (collector, i, output, err);
+
+ if (!counter)
+ {
+ GList *list;
+
+ for (list = collector->counters; list != NULL; list = list->next)
+ counter_free (list->data);
+
+ collector->tracker = NULL;
+
+ return FALSE;
+ }
+
+ collector->counters = g_list_append (collector->counters, counter);
+
+ if (!output)
+ output = counter;
+ }
+
+ enable_counters (collector);
+
+ return TRUE;
+}
+
+void
+collector_stop (Collector *collector)
+{
+ GList *list;
+
+ if (!collector->counters)
+ return;
+
+ /* Read any remaining data */
+ for (list = collector->counters; list != NULL; list = list->next)
+ {
+ counter_t *counter = list->data;
+
+ if (counter->data)
+ on_read (counter);
+
+ counter_free (counter);
+ }
+
+ g_list_free (collector->counters);
+ collector->counters = NULL;
+}
+
+int
+collector_get_n_samples (Collector *collector)
+{
+ return collector->n_samples;
+}
+
+Profile *
+collector_create_profile (Collector *collector)
+{
+ /* The collector must be stopped when you create a profile */
+ g_assert (!collector->counters);
+
+ return tracker_create_profile (collector->tracker);
+}
+
+GQuark
+collector_error_quark (void)
+{
+ static GQuark q = 0;
+
+ if (q == 0)
+ q = g_quark_from_static_string ("collector-error-quark");
+
+ return q;
+}