ladybird/AK/Statistics.h
Staubfinger 6b9344e86c AK: Use AK:quickselect_inline to compute AK::Statistics::median
Quick select is an algorithm that is able to find the median
of a Vector without fully sorting it.
This replaces the old very naive implementation
for `AK::Statistics::median()` with `AK::quickselect_inline`
2023-02-03 19:04:15 +01:00

108 lines
2.7 KiB
C++

/*
* Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Concepts.h>
#include <AK/Math.h>
#include <AK/QuickSelect.h>
#include <AK/Vector.h>
namespace AK {
template<Arithmetic T = float>
class Statistics {
public:
Statistics() = default;
~Statistics() = default;
void add(T const& value)
{
// FIXME: Check for an overflow
m_sum += value;
m_values.append(value);
}
T const sum() const { return m_sum; }
// FIXME: Unclear Wording, average can mean a lot of different things
// Median, Arithmetic Mean (which this is), Geometric Mean, Harmonic Mean etc
float average() const
{
// Let's assume the average of an empty dataset is 0
if (size() == 0)
return 0;
// TODO: sum might overflow so maybe do multiple partial sums and intermediate divisions here
return (float)sum() / size();
}
T const min() const
{
// Lets Rather fail than read over the end of a collection
VERIFY(size() != 0);
T minimum = m_values[0];
for (T number : values()) {
if (number < minimum) {
minimum = number;
}
}
return minimum;
}
T const max() const
{
// Lets Rather fail than read over the end of a collection
VERIFY(size() != 0);
T maximum = m_values[0];
for (T number : values()) {
if (number > maximum) {
maximum = number;
}
}
return maximum;
}
T const median()
{
// Let's assume the Median of an empty dataset is 0
if (size() == 0)
return 0;
// If the number of values is even, the median is the arithmetic mean of the two middle values
if (size() % 2 == 0) {
auto index = size() / 2;
auto median1 = m_values.at(AK::quickselect_inplace(m_values, index));
auto median2 = m_values.at(AK::quickselect_inplace(m_values, index - 1));
return (median1 + median2) / 2;
}
return m_values.at(AK::quickselect_inplace(m_values, size() / 2));
}
float standard_deviation() const { return sqrt(variance()); }
float variance() const
{
float summation = 0;
float avg = average();
for (T number : values()) {
float difference = (float)number - avg;
summation += (difference * difference);
}
summation = summation / size();
return summation;
}
Vector<T> const& values() const { return m_values; }
size_t size() const { return m_values.size(); }
private:
Vector<T> m_values;
T m_sum {};
};
}