mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-01-22 09:12:13 -05:00
AK: Construct Strings from StringBuilder without re-allocating the data
Currently, invoking StringBuilder::to_string will re-allocate the string data to construct the String. This is wasteful both in terms of memory and speed. The goal here is to simply hand the string buffer over to String, and let String take ownership of that buffer. To do this, StringBuilder must have the same memory layout as Detail::StringData. This layout is just the members of the StringData class followed by the string itself. So when a StringBuilder is created, we reserve sizeof(StringData) bytes at the front of the buffer. StringData can then construct itself into the buffer with placement new. Things to note: * StringData must now be aware of the actual capacity of its buffer, as that can be larger than the string size. * We must take care not to pass ownership of inlined string buffers, as these live on the stack.
This commit is contained in:
parent
77eef8a8f6
commit
29879a69a4
Notes:
github-actions[bot]
2024-07-20 07:31:38 +00:00
Author: https://github.com/trflynn89 Commit: https://github.com/LadybirdBrowser/ladybird/commit/29879a69a4b Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/724
9 changed files with 139 additions and 28 deletions
|
@ -8,6 +8,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <AK/Assertions.h>
|
||||
#include <AK/Badge.h>
|
||||
#include <AK/Error.h>
|
||||
#include <AK/Span.h>
|
||||
#include <AK/Types.h>
|
||||
|
@ -301,6 +302,23 @@ public:
|
|||
operator ReadonlyBytes() const { return bytes(); }
|
||||
|
||||
ALWAYS_INLINE size_t capacity() const { return m_inline ? inline_capacity : m_outline_capacity; }
|
||||
ALWAYS_INLINE bool is_inline() const { return m_inline; }
|
||||
|
||||
struct OutlineBuffer {
|
||||
Bytes buffer;
|
||||
size_t capacity { 0 };
|
||||
};
|
||||
Optional<OutlineBuffer> leak_outline_buffer(Badge<StringBuilder>)
|
||||
{
|
||||
if (m_inline)
|
||||
return {};
|
||||
|
||||
auto buffer = bytes();
|
||||
m_inline = true;
|
||||
m_size = 0;
|
||||
|
||||
return OutlineBuffer { buffer, capacity() };
|
||||
}
|
||||
|
||||
private:
|
||||
void move_from(ByteBuffer&& other)
|
||||
|
|
|
@ -16,6 +16,8 @@ namespace AK {
|
|||
namespace Detail {
|
||||
template<size_t inline_capacity>
|
||||
class ByteBuffer;
|
||||
|
||||
class StringData;
|
||||
}
|
||||
|
||||
enum class TrailingCodePointTransformation : u8;
|
||||
|
|
|
@ -96,6 +96,23 @@ ErrorOr<String> String::from_stream(Stream& stream, size_t byte_count)
|
|||
return result;
|
||||
}
|
||||
|
||||
ErrorOr<String> String::from_string_builder(Badge<StringBuilder>, StringBuilder& builder)
|
||||
{
|
||||
if (!Utf8View { builder.string_view() }.validate())
|
||||
return Error::from_string_literal("String::from_string_builder: Input was not valid UTF-8");
|
||||
|
||||
String result;
|
||||
result.replace_with_string_builder(builder);
|
||||
return result;
|
||||
}
|
||||
|
||||
String String::from_string_builder_without_validation(Badge<StringBuilder>, StringBuilder& builder)
|
||||
{
|
||||
String result;
|
||||
result.replace_with_string_builder(builder);
|
||||
return result;
|
||||
}
|
||||
|
||||
ErrorOr<String> String::repeated(u32 code_point, size_t count)
|
||||
{
|
||||
VERIFY(is_unicode(code_point));
|
||||
|
|
|
@ -57,6 +57,9 @@ public:
|
|||
|
||||
[[nodiscard]] static String from_utf8_without_validation(ReadonlyBytes);
|
||||
|
||||
static ErrorOr<String> from_string_builder(Badge<StringBuilder>, StringBuilder&);
|
||||
[[nodiscard]] static String from_string_builder_without_validation(Badge<StringBuilder>, StringBuilder&);
|
||||
|
||||
// Creates a new String from a sequence of UTF-16 encoded code points.
|
||||
static ErrorOr<String> from_utf16(Utf16View const&);
|
||||
|
||||
|
|
|
@ -90,6 +90,19 @@ bool StringBase::operator==(StringBase const& other) const
|
|||
return bytes() == other.bytes();
|
||||
}
|
||||
|
||||
void StringBase::replace_with_string_builder(StringBuilder& builder)
|
||||
{
|
||||
if (builder.length() <= MAX_SHORT_STRING_BYTE_COUNT) {
|
||||
return replace_with_new_short_string(builder.length(), [&](Bytes buffer) {
|
||||
builder.string_view().bytes().copy_to(buffer);
|
||||
});
|
||||
}
|
||||
|
||||
destroy_string();
|
||||
|
||||
m_data = &StringData::create_from_string_builder(builder).leak_ref();
|
||||
}
|
||||
|
||||
ErrorOr<Bytes> StringBase::replace_with_uninitialized_buffer(size_t byte_count)
|
||||
{
|
||||
if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT)
|
||||
|
|
|
@ -92,6 +92,8 @@ protected:
|
|||
callback(buffer);
|
||||
}
|
||||
|
||||
void replace_with_string_builder(StringBuilder&);
|
||||
|
||||
// This is not a trivial operation with storage, so it does not belong here. Unfortunately, it
|
||||
// is impossible to implement it without access to StringData.
|
||||
ErrorOr<StringBase> substring_from_byte_offset_with_shared_superstring(size_t start, size_t byte_count) const;
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <AK/FlyString.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/StringData.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/UnicodeUtils.h>
|
||||
#include <AK/Utf16View.h>
|
||||
|
@ -18,16 +19,33 @@
|
|||
|
||||
namespace AK {
|
||||
|
||||
static constexpr auto STRING_BASE_PREFIX_SIZE = sizeof(Detail::StringData);
|
||||
|
||||
static ErrorOr<StringBuilder::Buffer> create_buffer(size_t capacity)
|
||||
{
|
||||
StringBuilder::Buffer buffer;
|
||||
|
||||
if (capacity > StringBuilder::inline_capacity)
|
||||
TRY(buffer.try_ensure_capacity(STRING_BASE_PREFIX_SIZE + capacity));
|
||||
|
||||
TRY(buffer.try_resize(STRING_BASE_PREFIX_SIZE));
|
||||
return buffer;
|
||||
}
|
||||
|
||||
ErrorOr<StringBuilder> StringBuilder::create(size_t initial_capacity)
|
||||
{
|
||||
StringBuilder builder;
|
||||
TRY(builder.m_buffer.try_ensure_capacity(initial_capacity));
|
||||
return builder;
|
||||
auto buffer = TRY(create_buffer(initial_capacity));
|
||||
return StringBuilder { move(buffer) };
|
||||
}
|
||||
|
||||
StringBuilder::StringBuilder(size_t initial_capacity)
|
||||
: m_buffer(MUST(create_buffer(initial_capacity)))
|
||||
{
|
||||
}
|
||||
|
||||
StringBuilder::StringBuilder(Buffer buffer)
|
||||
: m_buffer(move(buffer))
|
||||
{
|
||||
m_buffer.ensure_capacity(initial_capacity);
|
||||
}
|
||||
|
||||
inline ErrorOr<void> StringBuilder::will_append(size_t size)
|
||||
|
@ -47,12 +65,12 @@ inline ErrorOr<void> StringBuilder::will_append(size_t size)
|
|||
|
||||
size_t StringBuilder::length() const
|
||||
{
|
||||
return m_buffer.size();
|
||||
return m_buffer.size() - STRING_BASE_PREFIX_SIZE;
|
||||
}
|
||||
|
||||
bool StringBuilder::is_empty() const
|
||||
{
|
||||
return m_buffer.is_empty();
|
||||
return length() == 0;
|
||||
}
|
||||
|
||||
void StringBuilder::trim(size_t count)
|
||||
|
@ -122,14 +140,18 @@ ByteString StringBuilder::to_byte_string() const
|
|||
return ByteString((char const*)data(), length());
|
||||
}
|
||||
|
||||
ErrorOr<String> StringBuilder::to_string() const
|
||||
ErrorOr<String> StringBuilder::to_string()
|
||||
{
|
||||
return String::from_utf8(string_view());
|
||||
if (m_buffer.is_inline())
|
||||
return String::from_utf8(string_view());
|
||||
return String::from_string_builder({}, *this);
|
||||
}
|
||||
|
||||
String StringBuilder::to_string_without_validation() const
|
||||
String StringBuilder::to_string_without_validation()
|
||||
{
|
||||
return String::from_utf8_without_validation(string_view().bytes());
|
||||
if (m_buffer.is_inline())
|
||||
return String::from_utf8_without_validation(string_view().bytes());
|
||||
return String::from_string_builder_without_validation({}, *this);
|
||||
}
|
||||
|
||||
FlyString StringBuilder::to_fly_string_without_validation() const
|
||||
|
@ -144,22 +166,22 @@ ErrorOr<FlyString> StringBuilder::to_fly_string() const
|
|||
|
||||
u8* StringBuilder::data()
|
||||
{
|
||||
return m_buffer.data();
|
||||
return m_buffer.data() + STRING_BASE_PREFIX_SIZE;
|
||||
}
|
||||
|
||||
u8 const* StringBuilder::data() const
|
||||
{
|
||||
return m_buffer.data();
|
||||
return m_buffer.data() + STRING_BASE_PREFIX_SIZE;
|
||||
}
|
||||
|
||||
StringView StringBuilder::string_view() const
|
||||
{
|
||||
return StringView { data(), m_buffer.size() };
|
||||
return m_buffer.span().slice(STRING_BASE_PREFIX_SIZE);
|
||||
}
|
||||
|
||||
void StringBuilder::clear()
|
||||
{
|
||||
m_buffer.clear();
|
||||
m_buffer.resize(STRING_BASE_PREFIX_SIZE);
|
||||
}
|
||||
|
||||
ErrorOr<void> StringBuilder::try_append_code_point(u32 code_point)
|
||||
|
@ -272,4 +294,14 @@ ErrorOr<void> StringBuilder::try_append_escaped_for_json(StringView string)
|
|||
return {};
|
||||
}
|
||||
|
||||
auto StringBuilder::leak_buffer_for_string_construction(Badge<Detail::StringData>) -> Optional<Buffer::OutlineBuffer>
|
||||
{
|
||||
if (auto buffer = m_buffer.leak_outline_buffer({}); buffer.has_value()) {
|
||||
clear();
|
||||
return buffer;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ class StringBuilder {
|
|||
public:
|
||||
static constexpr size_t inline_capacity = 256;
|
||||
|
||||
using Buffer = Detail::ByteBuffer<inline_capacity>;
|
||||
using OutputType = ByteString;
|
||||
|
||||
static ErrorOr<StringBuilder> create(size_t initial_capacity = inline_capacity);
|
||||
|
@ -61,8 +62,8 @@ public:
|
|||
|
||||
[[nodiscard]] ByteString to_byte_string() const;
|
||||
|
||||
[[nodiscard]] String to_string_without_validation() const;
|
||||
ErrorOr<String> to_string() const;
|
||||
[[nodiscard]] String to_string_without_validation();
|
||||
ErrorOr<String> to_string();
|
||||
|
||||
[[nodiscard]] FlyString to_fly_string_without_validation() const;
|
||||
ErrorOr<FlyString> to_fly_string() const;
|
||||
|
@ -95,12 +96,16 @@ public:
|
|||
return {};
|
||||
}
|
||||
|
||||
Optional<Buffer::OutlineBuffer> leak_buffer_for_string_construction(Badge<Detail::StringData>);
|
||||
|
||||
private:
|
||||
explicit StringBuilder(Buffer);
|
||||
|
||||
ErrorOr<void> will_append(size_t);
|
||||
u8* data();
|
||||
u8 const* data() const;
|
||||
|
||||
Detail::ByteBuffer<inline_capacity> m_buffer;
|
||||
Buffer m_buffer;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <AK/NonnullRefPtr.h>
|
||||
#include <AK/RefCounted.h>
|
||||
#include <AK/StringBase.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/kmalloc.h>
|
||||
|
||||
namespace AK::Detail {
|
||||
|
@ -20,25 +21,39 @@ public:
|
|||
static ErrorOr<NonnullRefPtr<StringData>> create_uninitialized(size_t byte_count, u8*& buffer)
|
||||
{
|
||||
VERIFY(byte_count);
|
||||
void* slot = malloc(allocation_size_for_string_data(byte_count));
|
||||
if (!slot) {
|
||||
|
||||
auto capacity = allocation_size_for_string_data(byte_count);
|
||||
void* slot = malloc(capacity);
|
||||
if (!slot)
|
||||
return Error::from_errno(ENOMEM);
|
||||
}
|
||||
auto new_string_data = adopt_ref(*new (slot) StringData(byte_count));
|
||||
|
||||
auto new_string_data = adopt_ref(*new (slot) StringData(byte_count, capacity));
|
||||
buffer = const_cast<u8*>(new_string_data->bytes().data());
|
||||
return new_string_data;
|
||||
}
|
||||
|
||||
static NonnullRefPtr<StringData> create_from_string_builder(StringBuilder& builder)
|
||||
{
|
||||
auto byte_count = builder.length();
|
||||
VERIFY(byte_count > MAX_SHORT_STRING_BYTE_COUNT);
|
||||
|
||||
auto buffer = builder.leak_buffer_for_string_construction({});
|
||||
VERIFY(buffer.has_value()); // We should only arrive here if the buffer is outlined.
|
||||
|
||||
return adopt_ref(*new (buffer->buffer.data()) StringData(byte_count, buffer->capacity));
|
||||
}
|
||||
|
||||
static ErrorOr<NonnullRefPtr<StringData>> create_substring(StringData const& superstring, size_t start, size_t byte_count)
|
||||
{
|
||||
// Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
|
||||
VERIFY(byte_count > MAX_SHORT_STRING_BYTE_COUNT);
|
||||
|
||||
void* slot = malloc(sizeof(StringData) + sizeof(StringData::SubstringData));
|
||||
if (!slot) {
|
||||
auto capacity = sizeof(StringData) + sizeof(StringData::SubstringData);
|
||||
void* slot = malloc(capacity);
|
||||
if (!slot)
|
||||
return Error::from_errno(ENOMEM);
|
||||
}
|
||||
return adopt_ref(*new (slot) StringData(superstring, start, byte_count));
|
||||
|
||||
return adopt_ref(*new (slot) StringData(superstring, start, byte_count, capacity));
|
||||
}
|
||||
|
||||
struct SubstringData {
|
||||
|
@ -48,7 +63,7 @@ public:
|
|||
|
||||
void operator delete(void* ptr)
|
||||
{
|
||||
kfree_sized(ptr, allocation_size_for_string_data(static_cast<StringData const*>(ptr)->m_byte_count));
|
||||
kfree_sized(ptr, static_cast<StringData const*>(ptr)->m_capacity);
|
||||
}
|
||||
|
||||
~StringData()
|
||||
|
@ -99,13 +114,15 @@ private:
|
|||
return sizeof(StringData) + (sizeof(char) * length);
|
||||
}
|
||||
|
||||
explicit StringData(size_t byte_count)
|
||||
StringData(size_t byte_count, size_t capacity)
|
||||
: m_byte_count(byte_count)
|
||||
, m_capacity(capacity)
|
||||
{
|
||||
}
|
||||
|
||||
StringData(StringData const& superstring, size_t start, size_t byte_count)
|
||||
StringData(StringData const& superstring, size_t start, size_t byte_count, size_t capacity)
|
||||
: m_byte_count(byte_count)
|
||||
, m_capacity(capacity)
|
||||
, m_substring(true)
|
||||
{
|
||||
auto& data = const_cast<SubstringData&>(substring_data());
|
||||
|
@ -125,6 +142,8 @@ private:
|
|||
}
|
||||
|
||||
u32 m_byte_count { 0 };
|
||||
u32 m_capacity { 0 };
|
||||
|
||||
mutable unsigned m_hash { 0 };
|
||||
mutable bool m_has_hash { false };
|
||||
bool m_substring { false };
|
||||
|
|
Loading…
Reference in a new issue