2021-08-23 17:13:54 -04:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2021, Tobias Christiansen <tobyase@serenityos.org>
|
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2022-11-14 13:20:59 -05:00
|
|
|
#include <AK/Concepts.h>
|
2021-08-23 17:13:54 -04:00
|
|
|
#include <AK/Math.h>
|
2023-01-08 08:44:59 -05:00
|
|
|
#include <AK/QuickSelect.h>
|
2023-01-08 08:55:54 -05:00
|
|
|
#include <AK/QuickSort.h>
|
2021-08-23 17:13:54 -04:00
|
|
|
#include <AK/Vector.h>
|
|
|
|
|
|
|
|
namespace AK {
|
|
|
|
|
2023-01-08 08:55:54 -05:00
|
|
|
static constexpr int ODD_NAIVE_MEDIAN_CUTOFF = 200;
|
|
|
|
static constexpr int EVEN_NAIVE_MEDIAN_CUTOFF = 350;
|
|
|
|
|
2023-08-13 08:15:37 -04:00
|
|
|
template<Arithmetic T = float, typename ContainerType = Vector<T>>
|
2022-11-14 13:20:59 -05:00
|
|
|
class Statistics {
|
2021-08-23 17:13:54 -04:00
|
|
|
public:
|
|
|
|
Statistics() = default;
|
|
|
|
~Statistics() = default;
|
|
|
|
|
2023-08-13 08:15:37 -04:00
|
|
|
explicit Statistics(ContainerType&& existing_container)
|
|
|
|
: m_values(forward<ContainerType>(existing_container))
|
|
|
|
{
|
|
|
|
for (auto const& value : m_values)
|
|
|
|
m_sum += value;
|
|
|
|
}
|
|
|
|
|
2021-08-23 17:13:54 -04:00
|
|
|
void add(T const& value)
|
|
|
|
{
|
|
|
|
// FIXME: Check for an overflow
|
|
|
|
m_sum += value;
|
|
|
|
m_values.append(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
T const sum() const { return m_sum; }
|
2023-01-08 08:44:59 -05:00
|
|
|
|
|
|
|
// FIXME: Unclear Wording, average can mean a lot of different things
|
|
|
|
// Median, Arithmetic Mean (which this is), Geometric Mean, Harmonic Mean etc
|
|
|
|
float average() const
|
|
|
|
{
|
|
|
|
// Let's assume the average of an empty dataset is 0
|
|
|
|
if (size() == 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// TODO: sum might overflow so maybe do multiple partial sums and intermediate divisions here
|
|
|
|
return (float)sum() / size();
|
|
|
|
}
|
2021-08-23 17:13:54 -04:00
|
|
|
|
2021-10-29 08:59:52 -04:00
|
|
|
T const min() const
|
|
|
|
{
|
2023-01-08 08:44:59 -05:00
|
|
|
// Lets Rather fail than read over the end of a collection
|
|
|
|
VERIFY(size() != 0);
|
|
|
|
|
2021-10-29 08:59:52 -04:00
|
|
|
T minimum = m_values[0];
|
|
|
|
for (T number : values()) {
|
|
|
|
if (number < minimum) {
|
|
|
|
minimum = number;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return minimum;
|
|
|
|
}
|
|
|
|
|
|
|
|
T const max() const
|
|
|
|
{
|
2023-01-08 08:44:59 -05:00
|
|
|
// Lets Rather fail than read over the end of a collection
|
|
|
|
VERIFY(size() != 0);
|
|
|
|
|
2021-10-29 08:59:52 -04:00
|
|
|
T maximum = m_values[0];
|
|
|
|
for (T number : values()) {
|
|
|
|
if (number > maximum) {
|
|
|
|
maximum = number;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return maximum;
|
|
|
|
}
|
|
|
|
|
2021-08-23 17:13:54 -04:00
|
|
|
T const median()
|
|
|
|
{
|
2023-01-08 08:44:59 -05:00
|
|
|
// Let's assume the Median of an empty dataset is 0
|
|
|
|
if (size() == 0)
|
|
|
|
return 0;
|
|
|
|
|
2023-01-06 15:38:21 -05:00
|
|
|
// If the number of values is even, the median is the arithmetic mean of the two middle values
|
2023-01-08 08:55:54 -05:00
|
|
|
if (size() <= EVEN_NAIVE_MEDIAN_CUTOFF && size() % 2 == 0) {
|
|
|
|
quick_sort(m_values);
|
|
|
|
return (m_values.at(size() / 2) + m_values.at(size() / 2 - 1)) / 2;
|
|
|
|
} else if (size() <= ODD_NAIVE_MEDIAN_CUTOFF && size() % 2 == 1) {
|
|
|
|
quick_sort(m_values);
|
|
|
|
return m_values.at(m_values.size() / 2);
|
|
|
|
} else if (size() % 2 == 0) {
|
2023-01-06 15:38:21 -05:00
|
|
|
auto index = size() / 2;
|
2023-01-08 08:44:59 -05:00
|
|
|
auto median1 = m_values.at(AK::quickselect_inplace(m_values, index));
|
|
|
|
auto median2 = m_values.at(AK::quickselect_inplace(m_values, index - 1));
|
|
|
|
return (median1 + median2) / 2;
|
2023-01-06 15:38:21 -05:00
|
|
|
}
|
2023-01-08 08:44:59 -05:00
|
|
|
return m_values.at(AK::quickselect_inplace(m_values, size() / 2));
|
2021-08-23 17:13:54 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
float standard_deviation() const { return sqrt(variance()); }
|
|
|
|
float variance() const
|
|
|
|
{
|
|
|
|
float summation = 0;
|
|
|
|
float avg = average();
|
|
|
|
for (T number : values()) {
|
|
|
|
float difference = (float)number - avg;
|
|
|
|
summation += (difference * difference);
|
|
|
|
}
|
|
|
|
summation = summation / size();
|
|
|
|
return summation;
|
|
|
|
}
|
|
|
|
|
2023-08-13 08:15:37 -04:00
|
|
|
ContainerType const& values() const { return m_values; }
|
2021-08-23 17:13:54 -04:00
|
|
|
size_t size() const { return m_values.size(); }
|
|
|
|
|
|
|
|
private:
|
2023-08-13 08:15:37 -04:00
|
|
|
ContainerType m_values;
|
2021-08-23 17:13:54 -04:00
|
|
|
T m_sum {};
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|
2024-05-14 12:22:39 -04:00
|
|
|
|
|
|
|
#if USING_AK_GLOBALLY
|
|
|
|
using AK::Statistics;
|
|
|
|
#endif
|