2022-12-08 16:44:46 -05:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2022, Lucas Chollet <lucas.chollet@free.fr>
|
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <AK/CircularBuffer.h>
|
|
|
|
#include <AK/MemMem.h>
|
2023-03-29 20:25:34 -04:00
|
|
|
#include <AK/Stream.h>
|
2022-12-08 16:44:46 -05:00
|
|
|
|
|
|
|
namespace AK {
|
|
|
|
|
|
|
|
CircularBuffer::CircularBuffer(ByteBuffer buffer)
|
|
|
|
: m_buffer(move(buffer))
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorOr<CircularBuffer> CircularBuffer::create_empty(size_t size)
|
|
|
|
{
|
|
|
|
auto temporary_buffer = TRY(ByteBuffer::create_uninitialized(size));
|
|
|
|
|
|
|
|
CircularBuffer circular_buffer { move(temporary_buffer) };
|
|
|
|
|
|
|
|
return circular_buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorOr<CircularBuffer> CircularBuffer::create_initialized(ByteBuffer buffer)
|
|
|
|
{
|
|
|
|
CircularBuffer circular_buffer { move(buffer) };
|
|
|
|
|
|
|
|
circular_buffer.m_used_space = circular_buffer.m_buffer.size();
|
|
|
|
|
|
|
|
return circular_buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t CircularBuffer::empty_space() const
|
|
|
|
{
|
|
|
|
return capacity() - m_used_space;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t CircularBuffer::used_space() const
|
|
|
|
{
|
|
|
|
return m_used_space;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t CircularBuffer::capacity() const
|
|
|
|
{
|
|
|
|
return m_buffer.size();
|
|
|
|
}
|
|
|
|
|
2023-03-16 10:15:44 -04:00
|
|
|
size_t CircularBuffer::seekback_limit() const
|
|
|
|
{
|
|
|
|
return m_seekback_limit;
|
|
|
|
}
|
|
|
|
|
2023-06-01 18:01:15 -04:00
|
|
|
size_t SearchableCircularBuffer::search_limit() const
|
|
|
|
{
|
|
|
|
return m_seekback_limit - m_used_space;
|
|
|
|
}
|
|
|
|
|
2022-12-08 16:44:46 -05:00
|
|
|
bool CircularBuffer::is_wrapping_around() const
|
|
|
|
{
|
|
|
|
return capacity() <= m_reading_head + m_used_space;
|
|
|
|
}
|
|
|
|
|
2023-01-07 13:09:20 -05:00
|
|
|
Optional<size_t> CircularBuffer::offset_of(StringView needle, Optional<size_t> from, Optional<size_t> until) const
|
2022-12-08 16:44:46 -05:00
|
|
|
{
|
2023-01-07 13:09:20 -05:00
|
|
|
auto const read_from = from.value_or(0);
|
2022-12-08 16:44:46 -05:00
|
|
|
auto const read_until = until.value_or(m_used_space);
|
2023-01-07 13:09:20 -05:00
|
|
|
VERIFY(read_from <= read_until);
|
2022-12-08 16:44:46 -05:00
|
|
|
|
|
|
|
Array<ReadonlyBytes, 2> spans {};
|
|
|
|
spans[0] = next_read_span();
|
2023-01-15 17:14:11 -05:00
|
|
|
auto const original_span_0_size = spans[0].size();
|
2022-12-08 16:44:46 -05:00
|
|
|
|
2023-01-07 13:09:20 -05:00
|
|
|
if (read_from > 0)
|
|
|
|
spans[0] = spans[0].slice(min(spans[0].size(), read_from));
|
2022-12-08 16:44:46 -05:00
|
|
|
|
2023-01-07 13:09:20 -05:00
|
|
|
if (spans[0].size() + read_from > read_until)
|
|
|
|
spans[0] = spans[0].trim(read_until - read_from);
|
2023-01-15 17:14:11 -05:00
|
|
|
else if (is_wrapping_around())
|
|
|
|
spans[1] = m_buffer.span().slice(max(original_span_0_size, read_from) - original_span_0_size, min(read_until, m_used_space) - original_span_0_size);
|
2023-01-07 13:09:20 -05:00
|
|
|
|
|
|
|
auto maybe_found = AK::memmem(spans.begin(), spans.end(), needle.bytes());
|
|
|
|
if (maybe_found.has_value())
|
|
|
|
*maybe_found += read_from;
|
|
|
|
|
|
|
|
return maybe_found;
|
2022-12-08 16:44:46 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
void CircularBuffer::clear()
|
|
|
|
{
|
|
|
|
m_reading_head = 0;
|
|
|
|
m_used_space = 0;
|
2022-12-31 17:35:45 -05:00
|
|
|
m_seekback_limit = 0;
|
2022-12-08 16:44:46 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
Bytes CircularBuffer::next_write_span()
|
|
|
|
{
|
|
|
|
if (is_wrapping_around())
|
|
|
|
return m_buffer.span().slice(m_reading_head + m_used_space - capacity(), capacity() - m_used_space);
|
|
|
|
return m_buffer.span().slice(m_reading_head + m_used_space, capacity() - (m_reading_head + m_used_space));
|
|
|
|
}
|
|
|
|
|
2023-06-01 17:39:05 -04:00
|
|
|
ReadonlyBytes CircularBuffer::next_read_span(size_t offset) const
|
2022-12-08 16:44:46 -05:00
|
|
|
{
|
2023-06-01 17:39:05 -04:00
|
|
|
auto reading_head = m_reading_head;
|
|
|
|
auto used_space = m_used_space;
|
|
|
|
|
|
|
|
if (offset > 0) {
|
|
|
|
if (offset >= used_space)
|
|
|
|
return Bytes {};
|
|
|
|
|
|
|
|
reading_head = (reading_head + offset) % capacity();
|
|
|
|
used_space -= offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
return m_buffer.span().slice(reading_head, min(capacity() - reading_head, used_space));
|
2022-12-08 16:44:46 -05:00
|
|
|
}
|
|
|
|
|
2023-06-01 18:01:15 -04:00
|
|
|
ReadonlyBytes CircularBuffer::next_seekback_span(size_t distance) const
|
2022-12-31 17:35:45 -05:00
|
|
|
{
|
|
|
|
VERIFY(m_seekback_limit <= capacity());
|
|
|
|
VERIFY(distance <= m_seekback_limit);
|
|
|
|
|
|
|
|
// Note: We are adding the capacity once here to ensure that we can wrap around the negative space by using modulo.
|
|
|
|
auto read_offset = (capacity() + m_reading_head + m_used_space - distance) % capacity();
|
|
|
|
|
2023-04-04 13:42:42 -04:00
|
|
|
return m_buffer.span().slice(read_offset, min(capacity() - read_offset, distance));
|
2022-12-31 17:35:45 -05:00
|
|
|
}
|
|
|
|
|
2023-06-01 18:01:15 -04:00
|
|
|
ReadonlyBytes SearchableCircularBuffer::next_search_span(size_t distance) const
|
|
|
|
{
|
|
|
|
VERIFY(search_limit() <= capacity());
|
|
|
|
VERIFY(distance <= search_limit());
|
|
|
|
|
|
|
|
// Note: We are adding the capacity once here to ensure that we can wrap around the negative space by using modulo.
|
|
|
|
auto read_offset = (capacity() + m_reading_head - distance) % capacity();
|
|
|
|
|
|
|
|
return m_buffer.span().slice(read_offset, min(capacity() - read_offset, distance));
|
|
|
|
}
|
|
|
|
|
2022-12-08 16:44:46 -05:00
|
|
|
size_t CircularBuffer::write(ReadonlyBytes bytes)
|
|
|
|
{
|
|
|
|
auto remaining = bytes.size();
|
|
|
|
|
|
|
|
while (remaining > 0) {
|
|
|
|
auto const next_span = next_write_span();
|
|
|
|
if (next_span.size() == 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
auto const written_bytes = bytes.slice(bytes.size() - remaining).copy_trimmed_to(next_span);
|
|
|
|
|
|
|
|
m_used_space += written_bytes;
|
|
|
|
|
2022-12-31 17:35:45 -05:00
|
|
|
m_seekback_limit += written_bytes;
|
|
|
|
if (m_seekback_limit > capacity())
|
|
|
|
m_seekback_limit = capacity();
|
|
|
|
|
2022-12-08 16:44:46 -05:00
|
|
|
remaining -= written_bytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
return bytes.size() - remaining;
|
|
|
|
}
|
|
|
|
|
|
|
|
Bytes CircularBuffer::read(Bytes bytes)
|
|
|
|
{
|
|
|
|
auto remaining = bytes.size();
|
|
|
|
|
|
|
|
while (remaining > 0) {
|
|
|
|
auto const next_span = next_read_span();
|
|
|
|
if (next_span.size() == 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
auto written_bytes = next_span.copy_trimmed_to(bytes.slice(bytes.size() - remaining));
|
|
|
|
|
|
|
|
m_used_space -= written_bytes;
|
|
|
|
m_reading_head += written_bytes;
|
|
|
|
|
|
|
|
if (m_reading_head >= capacity())
|
|
|
|
m_reading_head -= capacity();
|
|
|
|
|
|
|
|
remaining -= written_bytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
return bytes.trim(bytes.size() - remaining);
|
|
|
|
}
|
|
|
|
|
2023-06-02 06:57:03 -04:00
|
|
|
ErrorOr<Bytes> CircularBuffer::read_with_seekback(Bytes bytes, size_t distance) const
|
2022-12-31 17:35:45 -05:00
|
|
|
{
|
|
|
|
if (distance > m_seekback_limit)
|
|
|
|
return Error::from_string_literal("Tried a seekback read beyond the seekback limit");
|
|
|
|
|
|
|
|
auto remaining = bytes.size();
|
|
|
|
|
|
|
|
while (remaining > 0) {
|
2023-06-01 18:01:15 -04:00
|
|
|
auto const next_span = next_seekback_span(distance);
|
2022-12-31 17:35:45 -05:00
|
|
|
if (next_span.size() == 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
auto written_bytes = next_span.copy_trimmed_to(bytes.slice(bytes.size() - remaining));
|
|
|
|
|
|
|
|
distance -= written_bytes;
|
|
|
|
remaining -= written_bytes;
|
|
|
|
}
|
|
|
|
|
|
|
|
return bytes.trim(bytes.size() - remaining);
|
|
|
|
}
|
|
|
|
|
2022-12-08 16:44:46 -05:00
|
|
|
ErrorOr<void> CircularBuffer::discard(size_t discarding_size)
|
|
|
|
{
|
|
|
|
if (m_used_space < discarding_size)
|
|
|
|
return Error::from_string_literal("Can not discard more data than what the buffer contains");
|
|
|
|
m_used_space -= discarding_size;
|
|
|
|
m_reading_head = (m_reading_head + discarding_size) % capacity();
|
|
|
|
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2023-03-29 20:25:34 -04:00
|
|
|
ErrorOr<size_t> CircularBuffer::fill_from_stream(Stream& stream)
|
|
|
|
{
|
|
|
|
auto next_span = next_write_span();
|
|
|
|
if (next_span.size() == 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
auto bytes = TRY(stream.read_some(next_span));
|
|
|
|
m_used_space += bytes.size();
|
|
|
|
|
|
|
|
m_seekback_limit += bytes.size();
|
|
|
|
if (m_seekback_limit > capacity())
|
|
|
|
m_seekback_limit = capacity();
|
|
|
|
|
|
|
|
return bytes.size();
|
|
|
|
}
|
|
|
|
|
2023-04-30 20:56:20 -04:00
|
|
|
ErrorOr<size_t> CircularBuffer::flush_to_stream(Stream& stream)
|
|
|
|
{
|
|
|
|
auto next_span = next_read_span();
|
|
|
|
if (next_span.size() == 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
auto written_bytes = TRY(stream.write_some(next_span));
|
|
|
|
|
|
|
|
m_used_space -= written_bytes;
|
|
|
|
m_reading_head += written_bytes;
|
|
|
|
|
|
|
|
if (m_reading_head >= capacity())
|
|
|
|
m_reading_head -= capacity();
|
|
|
|
|
|
|
|
return written_bytes;
|
|
|
|
}
|
|
|
|
|
2023-04-04 12:29:43 -04:00
|
|
|
ErrorOr<size_t> CircularBuffer::copy_from_seekback(size_t distance, size_t length)
|
2023-03-30 14:02:22 -04:00
|
|
|
{
|
|
|
|
if (distance > m_seekback_limit)
|
|
|
|
return Error::from_string_literal("Tried a seekback copy beyond the seekback limit");
|
|
|
|
|
2023-04-04 12:29:43 -04:00
|
|
|
auto remaining_length = length;
|
|
|
|
while (remaining_length > 0) {
|
2023-04-04 12:43:24 -04:00
|
|
|
if (empty_space() == 0)
|
|
|
|
break;
|
|
|
|
|
2023-06-01 18:01:15 -04:00
|
|
|
auto next_span = next_seekback_span(distance);
|
2023-03-30 14:02:22 -04:00
|
|
|
if (next_span.size() == 0)
|
|
|
|
break;
|
|
|
|
|
2023-04-13 09:47:12 -04:00
|
|
|
auto length_written = write(next_span.trim(remaining_length));
|
|
|
|
remaining_length -= length_written;
|
|
|
|
|
|
|
|
// If we copied right from the end of the seekback area (i.e. our length is larger than the distance)
|
|
|
|
// and the last copy was one complete "chunk", we can now double the distance to copy twice as much data in one go.
|
|
|
|
if (remaining_length > distance && length_written == distance)
|
|
|
|
distance *= 2;
|
2023-03-30 14:02:22 -04:00
|
|
|
}
|
|
|
|
|
2023-04-04 12:29:43 -04:00
|
|
|
return length - remaining_length;
|
2023-03-30 14:02:22 -04:00
|
|
|
}
|
|
|
|
|
2023-06-01 14:48:35 -04:00
|
|
|
SearchableCircularBuffer::SearchableCircularBuffer(ByteBuffer buffer)
|
|
|
|
: CircularBuffer(move(buffer))
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorOr<SearchableCircularBuffer> SearchableCircularBuffer::create_empty(size_t size)
|
|
|
|
{
|
|
|
|
auto temporary_buffer = TRY(ByteBuffer::create_uninitialized(size));
|
|
|
|
|
|
|
|
SearchableCircularBuffer circular_buffer { move(temporary_buffer) };
|
|
|
|
|
|
|
|
return circular_buffer;
|
|
|
|
}
|
|
|
|
|
|
|
|
ErrorOr<SearchableCircularBuffer> SearchableCircularBuffer::create_initialized(ByteBuffer buffer)
|
|
|
|
{
|
|
|
|
SearchableCircularBuffer circular_buffer { move(buffer) };
|
|
|
|
|
|
|
|
circular_buffer.m_used_space = circular_buffer.m_buffer.size();
|
|
|
|
|
|
|
|
return circular_buffer;
|
|
|
|
}
|
|
|
|
|
2023-06-01 16:24:28 -04:00
|
|
|
ErrorOr<Vector<SearchableCircularBuffer::Match>> SearchableCircularBuffer::find_copy_in_seekback(size_t maximum_length, size_t minimum_length) const
|
2023-05-03 05:14:36 -04:00
|
|
|
{
|
|
|
|
VERIFY(minimum_length > 0);
|
|
|
|
|
|
|
|
// Clip the maximum length to the amount of data that we actually store.
|
|
|
|
if (maximum_length > m_used_space)
|
|
|
|
maximum_length = m_used_space;
|
|
|
|
|
|
|
|
if (maximum_length < minimum_length)
|
|
|
|
return Vector<Match> {};
|
|
|
|
|
|
|
|
Vector<Match> matches;
|
|
|
|
|
2023-06-01 16:24:28 -04:00
|
|
|
// Use memmem to find the initial matches.
|
|
|
|
size_t haystack_offset_from_start = 0;
|
|
|
|
Vector<ReadonlyBytes, 2> haystack;
|
2023-06-01 18:01:15 -04:00
|
|
|
haystack.append(next_search_span(search_limit()));
|
|
|
|
if (haystack[0].size() < search_limit())
|
|
|
|
haystack.append(next_search_span(search_limit() - haystack[0].size()));
|
2023-05-03 05:14:36 -04:00
|
|
|
|
2023-06-01 16:24:28 -04:00
|
|
|
auto needle = next_read_span().trim(minimum_length);
|
2023-05-03 05:14:36 -04:00
|
|
|
|
2023-06-01 16:24:28 -04:00
|
|
|
auto memmem_match = AK::memmem(haystack.begin(), haystack.end(), needle);
|
|
|
|
while (memmem_match.has_value()) {
|
|
|
|
auto match_offset = memmem_match.release_value();
|
2023-05-03 05:14:36 -04:00
|
|
|
|
2023-06-01 16:24:28 -04:00
|
|
|
// Add the match to the list of matches to work with.
|
|
|
|
TRY(matches.try_empend(m_seekback_limit - used_space() - haystack_offset_from_start - match_offset, minimum_length));
|
2023-05-03 05:14:36 -04:00
|
|
|
|
2023-06-01 16:24:28 -04:00
|
|
|
auto size_to_discard = match_offset + 1;
|
|
|
|
|
|
|
|
// Trim away the already processed bytes from the haystack.
|
|
|
|
haystack_offset_from_start += size_to_discard;
|
|
|
|
while (size_to_discard > 0) {
|
|
|
|
if (haystack[0].size() < size_to_discard) {
|
|
|
|
size_to_discard -= haystack[0].size();
|
|
|
|
haystack.remove(0);
|
|
|
|
} else {
|
|
|
|
haystack[0] = haystack[0].slice(size_to_discard);
|
|
|
|
break;
|
2023-05-03 05:14:36 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-01 16:24:28 -04:00
|
|
|
if (haystack.size() == 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Try and find the next match.
|
|
|
|
memmem_match = AK::memmem(haystack.begin(), haystack.end(), needle);
|
|
|
|
}
|
|
|
|
|
|
|
|
// From now on, all matches that we have stored have at least a length of `minimum_length` and they all refer to the same value.
|
|
|
|
// For the remaining part, we will keep checking the next byte incrementally and keep eliminating matches until we eliminated all of them.
|
|
|
|
Vector<Match> next_matches;
|
|
|
|
|
|
|
|
for (size_t offset = minimum_length; offset < maximum_length; offset++) {
|
|
|
|
auto needle_data = m_buffer[(capacity() + m_reading_head + offset) % capacity()];
|
|
|
|
|
|
|
|
for (auto const& match : matches) {
|
|
|
|
auto haystack_data = m_buffer[(capacity() + m_reading_head - match.distance + offset) % capacity()];
|
|
|
|
|
|
|
|
if (haystack_data != needle_data)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
TRY(next_matches.try_empend(match.distance, match.length + 1));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (next_matches.size() == 0)
|
|
|
|
return matches;
|
|
|
|
|
|
|
|
swap(matches, next_matches);
|
|
|
|
next_matches.clear_with_capacity();
|
|
|
|
}
|
|
|
|
|
|
|
|
return matches;
|
|
|
|
}
|
|
|
|
|
2023-06-01 19:25:56 -04:00
|
|
|
Optional<SearchableCircularBuffer::Match> SearchableCircularBuffer::find_copy_in_seekback(ReadonlySpan<size_t> distances, size_t maximum_length, size_t minimum_length) const
|
2023-06-01 16:24:28 -04:00
|
|
|
{
|
|
|
|
VERIFY(minimum_length > 0);
|
|
|
|
|
|
|
|
// Clip the maximum length to the amount of data that we actually store.
|
|
|
|
if (maximum_length > m_used_space)
|
|
|
|
maximum_length = m_used_space;
|
|
|
|
|
|
|
|
if (maximum_length < minimum_length)
|
2023-06-01 19:25:56 -04:00
|
|
|
return Optional<Match> {};
|
2023-06-01 16:24:28 -04:00
|
|
|
|
2023-06-01 19:25:56 -04:00
|
|
|
Optional<Match> best_match;
|
|
|
|
|
|
|
|
for (auto distance : distances) {
|
|
|
|
// Discard distances outside the valid range.
|
|
|
|
if (distance > search_limit() || distance <= 0)
|
|
|
|
continue;
|
2023-06-01 16:24:28 -04:00
|
|
|
|
|
|
|
// TODO: This does not yet support looping repetitions.
|
|
|
|
if (distance < minimum_length)
|
|
|
|
continue;
|
|
|
|
|
2023-06-01 19:25:56 -04:00
|
|
|
auto current_match_length = 0ul;
|
2023-05-03 05:14:36 -04:00
|
|
|
|
2023-06-01 19:25:56 -04:00
|
|
|
while (current_match_length < maximum_length) {
|
|
|
|
auto haystack = next_search_span(distance - current_match_length).trim(maximum_length - current_match_length);
|
|
|
|
auto needle = next_read_span(current_match_length).trim(maximum_length - current_match_length);
|
2023-05-03 05:14:36 -04:00
|
|
|
|
2023-06-01 19:25:56 -04:00
|
|
|
auto submatch_length = haystack.matching_prefix_length(needle);
|
2023-05-03 05:14:36 -04:00
|
|
|
|
2023-06-01 19:25:56 -04:00
|
|
|
if (submatch_length == 0)
|
|
|
|
break;
|
2023-05-03 05:14:36 -04:00
|
|
|
|
2023-06-01 19:25:56 -04:00
|
|
|
current_match_length += submatch_length;
|
2023-05-03 05:14:36 -04:00
|
|
|
}
|
|
|
|
|
2023-06-01 19:25:56 -04:00
|
|
|
// Discard matches that don't reach the minimum length.
|
|
|
|
if (current_match_length < minimum_length)
|
|
|
|
continue;
|
2023-05-03 05:14:36 -04:00
|
|
|
|
2023-06-01 19:25:56 -04:00
|
|
|
if (!best_match.has_value() || best_match->length < current_match_length)
|
|
|
|
best_match = Match { distance, current_match_length };
|
2023-05-03 05:14:36 -04:00
|
|
|
}
|
|
|
|
|
2023-06-01 19:25:56 -04:00
|
|
|
return best_match;
|
2023-05-03 05:14:36 -04:00
|
|
|
}
|
|
|
|
|
2022-12-08 16:44:46 -05:00
|
|
|
}
|