/* * Copyright © 2015 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * */ #include #include #include #include "igt_core.h" #include "igt_stats.h" #define U64_MAX ((uint64_t)~0ULL) #define sorted_value(stats, i) (stats->is_float ? stats->sorted_f[i] : stats->sorted_u64[i]) #define unsorted_value(stats, i) (stats->is_float ? stats->values_f[i] : stats->values_u64[i]) /** * SECTION:igt_stats * @short_description: Tools for statistical analysis * @title: Stats * @include: igt.h * * Various tools to make sense of data. * * #igt_stats_t is a container of data samples. igt_stats_push() is used to add * new samples and various results (mean, variance, standard deviation, ...) * can then be retrieved. * * |[ * igt_stats_t stats; * * igt_stats_init(&stats, 8); * * igt_stats_push(&stats, 2); * igt_stats_push(&stats, 4); * igt_stats_push(&stats, 4); * igt_stats_push(&stats, 4); * igt_stats_push(&stats, 5); * igt_stats_push(&stats, 5); * igt_stats_push(&stats, 7); * igt_stats_push(&stats, 9); * * printf("Mean: %lf\n", igt_stats_get_mean(&stats)); * * igt_stats_fini(&stats); * ]| */ static unsigned int get_new_capacity(int need) { unsigned int new_capacity; /* taken from Python's list */ new_capacity = (need >> 6) + (need < 9 ? 3 : 6); new_capacity += need; return new_capacity; } static void igt_stats_ensure_capacity(igt_stats_t *stats, unsigned int n_additional_values) { unsigned int new_n_values = stats->n_values + n_additional_values; unsigned int new_capacity; if (new_n_values <= stats->capacity) return; new_capacity = get_new_capacity(new_n_values); stats->values_u64 = realloc(stats->values_u64, sizeof(*stats->values_u64) * new_capacity); igt_assert(stats->values_u64); stats->capacity = new_capacity; free(stats->sorted_u64); stats->sorted_u64 = NULL; } /** * igt_stats_init: * @stats: An #igt_stats_t instance * * Initializes an #igt_stats_t instance. igt_stats_fini() must be called once * finished with @stats. */ void igt_stats_init(igt_stats_t *stats) { memset(stats, 0, sizeof(*stats)); igt_stats_ensure_capacity(stats, 128); stats->min = U64_MAX; stats->max = 0; } /** * igt_stats_init_with_size: * @stats: An #igt_stats_t instance * @capacity: Number of data samples @stats can contain * * Like igt_stats_init() but with a size to avoid reallocating the underlying * array(s) when pushing new values. Useful if we have a good idea of the * number of data points we want @stats to hold. * * igt_stats_fini() must be called once finished with @stats. */ void igt_stats_init_with_size(igt_stats_t *stats, unsigned int capacity) { memset(stats, 0, sizeof(*stats)); igt_stats_ensure_capacity(stats, capacity); stats->min = U64_MAX; stats->max = 0; stats->range[0] = HUGE_VAL; stats->range[1] = -HUGE_VAL; } /** * igt_stats_fini: * @stats: An #igt_stats_t instance * * Frees resources allocated in igt_stats_init(). */ void igt_stats_fini(igt_stats_t *stats) { free(stats->values_u64); free(stats->sorted_u64); } /** * igt_stats_is_population: * @stats: An #igt_stats_t instance * * Returns: #true if @stats represents a population, #false if only a sample. * * See igt_stats_set_population() for more details. */ bool igt_stats_is_population(igt_stats_t *stats) { return stats->is_population; } /** * igt_stats_set_population: * @stats: An #igt_stats_t instance * @full_population: Whether we're dealing with sample data or a full * population * * In statistics, we usually deal with a subset of the full data (which may be * a continuous or infinite set). Data analysis is then done on a sample of * this population. * * This has some importance as only having a sample of the data leads to * [biased estimators](https://en.wikipedia.org/wiki/Bias_of_an_estimator). We * currently used the information given by this method to apply * [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction) * to the variance. * * Note that even if we manage to have an unbiased variance by multiplying * a sample variance by the Bessel's correction, n/(n - 1), the standard * deviation derived from the unbiased variance isn't itself unbiased. * Statisticians talk about a "corrected" standard deviation. * * When giving #true to this function, the data set in @stats is considered a * full population. It's considered a sample of a bigger population otherwise. * * When newly created, @stats defaults to holding sample data. */ void igt_stats_set_population(igt_stats_t *stats, bool full_population) { if (full_population == stats->is_population) return; stats->is_population = full_population; stats->mean_variance_valid = false; } /** * igt_stats_push: * @stats: An #igt_stats_t instance * @value: An integer value * * Adds a new value to the @stats dataset. */ void igt_stats_push(igt_stats_t *stats, uint64_t value) { if (stats->is_float) { igt_stats_push_float(stats, value); return; } igt_stats_ensure_capacity(stats, 1); stats->values_u64[stats->n_values++] = value; stats->mean_variance_valid = false; stats->sorted_array_valid = false; if (value < stats->min) stats->min = value; if (value > stats->max) stats->max = value; } /** * igt_stats_push: * @stats: An #igt_stats_t instance * @value: An floating point * * Adds a new value to the @stats dataset and converts the igt_stats from * an integer collection to a floating point one. */ void igt_stats_push_float(igt_stats_t *stats, double value) { igt_stats_ensure_capacity(stats, 1); if (!stats->is_float) { int n; for (n = 0; n < stats->n_values; n++) stats->values_f[n] = stats->values_u64[n]; stats->is_float = true; } stats->values_f[stats->n_values++] = value; stats->mean_variance_valid = false; stats->sorted_array_valid = false; if (value < stats->range[0]) stats->range[0] = value; if (value > stats->range[1]) stats->range[1] = value; } /** * igt_stats_push_array: * @stats: An #igt_stats_t instance * @values: (array length=n_values): A pointer to an array of data points * @n_values: The number of data points to add * * Adds an array of values to the @stats dataset. */ void igt_stats_push_array(igt_stats_t *stats, const uint64_t *values, unsigned int n_values) { unsigned int i; igt_stats_ensure_capacity(stats, n_values); for (i = 0; i < n_values; i++) igt_stats_push(stats, values[i]); } /** * igt_stats_get_min: * @stats: An #igt_stats_t instance * * Retrieves the minimal value in @stats */ uint64_t igt_stats_get_min(igt_stats_t *stats) { igt_assert(!stats->is_float); return stats->min; } /** * igt_stats_get_max: * @stats: An #igt_stats_t instance * * Retrieves the maximum value in @stats */ uint64_t igt_stats_get_max(igt_stats_t *stats) { igt_assert(!stats->is_float); return stats->max; } /** * igt_stats_get_range: * @stats: An #igt_stats_t instance * * Retrieves the range of the values in @stats. The range is the difference * between the highest and the lowest value. * * The range can be a deceiving characterization of the values, because there * can be extreme minimal and maximum values that are just anomalies. Prefer * the interquatile range (see igt_stats_get_iqr()) or an histogram. */ uint64_t igt_stats_get_range(igt_stats_t *stats) { return igt_stats_get_max(stats) - igt_stats_get_min(stats); } static int cmp_u64(const void *pa, const void *pb) { const uint64_t *a = pa, *b = pb; if (*a < *b) return -1; if (*a > *b) return 1; return 0; } static int cmp_f(const void *pa, const void *pb) { const double *a = pa, *b = pb; if (*a < *b) return -1; if (*a > *b) return 1; return 0; } static void igt_stats_ensure_sorted_values(igt_stats_t *stats) { if (stats->sorted_array_valid) return; if (!stats->sorted_u64) { /* * igt_stats_ensure_capacity() will free ->sorted when the * capacity increases, which also correspond to an invalidation * of the sorted array. We'll then reallocate it here on * demand. */ stats->sorted_u64 = calloc(stats->capacity, sizeof(*stats->values_u64)); igt_assert(stats->sorted_u64); } memcpy(stats->sorted_u64, stats->values_u64, sizeof(*stats->values_u64) * stats->n_values); qsort(stats->sorted_u64, stats->n_values, sizeof(*stats->values_u64), stats->is_float ? cmp_f : cmp_u64); stats->sorted_array_valid = true; } /* * We use Tukey's hinge for our quartiles determination. * ends (end, lower_end) are exclusive. */ static double igt_stats_get_median_internal(igt_stats_t *stats, unsigned int start, unsigned int end, unsigned int *lower_end /* out */, unsigned int *upper_start /* out */) { unsigned int mid, n_values = end - start; double median; igt_stats_ensure_sorted_values(stats); /* odd number of data points */ if (n_values % 2 == 1) { /* median is the value in the middle (actual datum) */ mid = start + n_values / 2; median = sorted_value(stats, mid); /* the two halves contain the median value */ if (lower_end) *lower_end = mid + 1; if (upper_start) *upper_start = mid; /* even number of data points */ } else { /* * The middle is in between two indexes, 'mid' points at the * lower one. The median is then the average between those two * values. */ mid = start + n_values / 2 - 1; median = (sorted_value(stats, mid) + sorted_value(stats, mid+1))/2.; if (lower_end) *lower_end = mid + 1; if (upper_start) *upper_start = mid + 1; } return median; } /** * igt_stats_get_quartiles: * @stats: An #igt_stats_t instance * @q1: (out): lower or 25th quartile * @q2: (out): median or 50th quartile * @q3: (out): upper or 75th quartile * * Retrieves the [quartiles](https://en.wikipedia.org/wiki/Quartile) of the * @stats dataset. */ void igt_stats_get_quartiles(igt_stats_t *stats, double *q1, double *q2, double *q3) { unsigned int lower_end, upper_start; double ret; if (stats->n_values < 3) { if (q1) *q1 = 0.; if (q2) *q2 = 0.; if (q3) *q3 = 0.; return; } ret = igt_stats_get_median_internal(stats, 0, stats->n_values, &lower_end, &upper_start); if (q2) *q2 = ret; ret = igt_stats_get_median_internal(stats, 0, lower_end, NULL, NULL); if (q1) *q1 = ret; ret = igt_stats_get_median_internal(stats, upper_start, stats->n_values, NULL, NULL); if (q3) *q3 = ret; } /** * igt_stats_get_iqr: * @stats: An #igt_stats_t instance * * Retrieves the * [interquartile range](https://en.wikipedia.org/wiki/Interquartile_range) * (IQR) of the @stats dataset. */ double igt_stats_get_iqr(igt_stats_t *stats) { double q1, q3; igt_stats_get_quartiles(stats, &q1, NULL, &q3); return (q3 - q1); } /** * igt_stats_get_median: * @stats: An #igt_stats_t instance * * Retrieves the median of the @stats dataset. */ double igt_stats_get_median(igt_stats_t *stats) { return igt_stats_get_median_internal(stats, 0, stats->n_values, NULL, NULL); } /* * Algorithm popularised by Knuth in: * * The Art of Computer Programming, volume 2: Seminumerical Algorithms, * 3rd edn., p. 232. Boston: Addison-Wesley * * Source: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance */ static void igt_stats_knuth_mean_variance(igt_stats_t *stats) { double mean = 0., m2 = 0.; unsigned int i; if (stats->mean_variance_valid) return; for (i = 0; i < stats->n_values; i++) { double delta = unsorted_value(stats, i) - mean; mean += delta / (i + 1); m2 += delta * (unsorted_value(stats, i) - mean); } stats->mean = mean; if (stats->n_values > 1 && !stats->is_population) stats->variance = m2 / (stats->n_values - 1); else stats->variance = m2 / stats->n_values; stats->mean_variance_valid = true; } /** * igt_stats_get_mean: * @stats: An #igt_stats_t instance * * Retrieves the mean of the @stats dataset. */ double igt_stats_get_mean(igt_stats_t *stats) { igt_stats_knuth_mean_variance(stats); return stats->mean; } /** * igt_stats_get_variance: * @stats: An #igt_stats_t instance * * Retrieves the variance of the @stats dataset. */ double igt_stats_get_variance(igt_stats_t *stats) { igt_stats_knuth_mean_variance(stats); return stats->variance; } /** * igt_stats_get_std_deviation: * @stats: An #igt_stats_t instance * * Retrieves the standard deviation of the @stats dataset. */ double igt_stats_get_std_deviation(igt_stats_t *stats) { igt_stats_knuth_mean_variance(stats); return sqrt(stats->variance); } /** * igt_stats_get_iqm: * @stats: An #igt_stats_t instance * * Retrieves the * [interquartile mean](https://en.wikipedia.org/wiki/Interquartile_mean) (IQM) * of the @stats dataset. * * The interquartile mean is a "statistical measure of central tendency". * It is a truncated mean that discards the lowest and highest 25% of values, * and calculates the mean value of the remaining central values. * * It's useful to hide outliers in measurements (due to cold cache etc). */ double igt_stats_get_iqm(igt_stats_t *stats) { unsigned int q1, q3, i; double mean; igt_stats_ensure_sorted_values(stats); q1 = (stats->n_values + 3) / 4; q3 = 3 * stats->n_values / 4; mean = 0; for (i = 0; i <= q3 - q1; i++) mean += (sorted_value(stats, q1 + i) - mean) / (i + 1); if (stats->n_values % 4) { double rem = .5 * (stats->n_values % 4) / 4; q1 = (stats->n_values) / 4; q3 = (3 * stats->n_values + 3) / 4; mean += rem * (sorted_value(stats, q1) - mean) / i++; mean += rem * (sorted_value(stats, q3) - mean) / i++; } return mean; } /** * igt_stats_get_trimean: * @stats: An #igt_stats_t instance * * Retrieves the [trimean](https://en.wikipedia.org/wiki/Trimean) of the @stats * dataset. * * The trimean is a the most efficient 3-point L-estimator, even more * robust than the median at estimating the average of a sample population. */ double igt_stats_get_trimean(igt_stats_t *stats) { double q1, q2, q3; igt_stats_get_quartiles(stats, &q1, &q2, &q3); return (q1 + 2*q2 + q3) / 4; } /** * igt_mean_init: * @m: tracking structure * * Initializes or resets @m. */ void igt_mean_init(struct igt_mean *m) { memset(m, 0, sizeof(*m)); m->max = -HUGE_VAL; m->min = HUGE_VAL; } /** * igt_mean_add: * @m: tracking structure * @v: value * * Adds a new value @v to @m. */ void igt_mean_add(struct igt_mean *m, double v) { double delta = v - m->mean; m->mean += delta / ++m->count; m->sq += delta * (v - m->mean); if (v < m->min) m->min = v; if (v > m->max) m->max = v; } /** * igt_mean_get: * @m: tracking structure * * Computes the current mean of the samples tracked in @m. */ double igt_mean_get(struct igt_mean *m) { return m->mean; } /** * igt_mean_get_variance: * @m: tracking structure * * Computes the current variance of the samples tracked in @m. */ double igt_mean_get_variance(struct igt_mean *m) { return m->sq / m->count; }