ladybird/AK/PrintfImplementation.h
asynts 71b7ef0992 AK+Format: Support all format specifiers for strings.
The following is now possible:

    outf("{:.4}", "abcdef"); // abcd
    outf("{:*<8}", "abcdef"); // abcdef**
2020-09-29 16:14:58 +02:00

703 lines
22 KiB
C++

/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/Assertions.h>
#include <AK/LogStream.h>
#include <AK/StdLibExtras.h>
#include <AK/StringBuilder.h>
#include <AK/Types.h>
#include <stdarg.h>
namespace PrintfImplementation {
static constexpr const char* printf_hex_digits_lower = "0123456789abcdef";
static constexpr const char* printf_hex_digits_upper = "0123456789ABCDEF";
enum class Align {
Left,
Center,
Right,
};
enum class SignMode {
OnlyIfNeeded,
Always,
Reserved
};
// The worst case is that we have the largest 64-bit value formatted as binary number, this would take
// 65 bytes. Choosing a larger power of two won't hurt and is a bit of mitigation against out-of-bounds accesses.
inline size_t convert_unsigned_to_string(u64 value, Array<u8, 128>& buffer, u8 base, bool upper_case)
{
ASSERT(base >= 2 && base <= 16);
static constexpr const char* lowercase_lookup = "0123456789abcdef";
static constexpr const char* uppercase_lookup = "0123456789ABCDEF";
if (value == 0) {
buffer[0] = '0';
return 1;
}
size_t used = 0;
while (value > 0) {
if (upper_case)
buffer[used++] = uppercase_lookup[value % base];
else
buffer[used++] = lowercase_lookup[value % base];
value /= base;
}
// Reverse the list; I came up with this logic in like three seconds so it's probably wrong in some edge case.
for (size_t i = 0; i < used / 2; ++i)
swap(buffer[i], buffer[used - i - 1]);
return used;
}
inline size_t convert_unsigned_to_string(
u64 value,
StringBuilder& builder,
u8 base = 10,
bool prefix = false,
bool upper_case = false,
bool zero_pad = false,
Align align = Align::Right,
size_t width = 0,
char fill = ' ',
SignMode sign_mode = SignMode::OnlyIfNeeded,
bool is_negative = false)
{
Array<u8, 128> buffer;
const auto used_by_significant_digits = convert_unsigned_to_string(value, buffer, base, upper_case);
size_t used_by_prefix = sign_mode == SignMode::OnlyIfNeeded ? static_cast<size_t>(is_negative) : 1;
if (prefix) {
if (base == 8)
used_by_prefix += 1;
else if (base == 16)
used_by_prefix += 2;
else if (base == 2)
used_by_prefix += 2;
}
const auto put_prefix = [&]() {
if (is_negative)
builder.append('-');
else if (sign_mode == SignMode::Always)
builder.append('+');
else if (sign_mode == SignMode::Reserved)
builder.append(' ');
if (prefix) {
if (base == 2) {
if (upper_case)
builder.append("0B");
else
builder.append("0b");
} else if (base == 8) {
builder.append("0");
} else if (base == 16) {
if (upper_case)
builder.append("0X");
else
builder.append("0x");
}
}
};
const auto put_padding = [&](size_t amount, char fill) {
for (size_t i = 0; i < amount; ++i)
builder.append(fill);
};
const auto put_digits = [&]() {
builder.append(StringView { buffer.span().trim(used_by_significant_digits) });
};
const auto used_by_field = used_by_significant_digits + used_by_prefix;
const auto used_by_padding = width < used_by_field ? 0 : width - used_by_field;
if (align == Align::Left) {
const auto used_by_right_padding = used_by_padding;
put_prefix();
put_digits();
put_padding(used_by_right_padding, fill);
return used_by_field + used_by_right_padding;
}
if (align == Align::Center) {
const auto used_by_left_padding = used_by_padding / 2;
const auto used_by_right_padding = ceil_div<size_t, size_t>(used_by_padding, 2);
put_padding(used_by_left_padding, fill);
put_prefix();
put_digits();
put_padding(used_by_right_padding, fill);
return used_by_left_padding + used_by_field + used_by_right_padding;
}
if (align == Align::Right) {
const auto used_by_left_padding = used_by_padding;
if (zero_pad) {
put_prefix();
put_padding(used_by_left_padding, '0');
put_digits();
} else {
put_padding(used_by_left_padding, fill);
put_prefix();
put_digits();
}
return used_by_field + used_by_left_padding;
}
ASSERT_NOT_REACHED();
}
inline size_t convert_signed_to_string(
i64 value,
StringBuilder& builder,
u8 base = 10,
bool common_prefix = false,
bool upper_case = false,
bool zero_pad = false,
Align align = Align::Right,
size_t width = 0,
char fill = ' ',
SignMode sign_mode = SignMode::OnlyIfNeeded)
{
bool is_negative = value < 0;
if (value < 0)
value = -value;
return convert_unsigned_to_string(static_cast<size_t>(value), builder, base, common_prefix, upper_case, zero_pad, align, width, fill, sign_mode, is_negative);
}
#ifdef __serenity__
extern "C" size_t strlen(const char*);
#else
# include <string.h>
#endif
template<typename PutChFunc, typename T>
ALWAYS_INLINE int print_hex(PutChFunc putch, char*& bufptr, T number, bool upper_case, bool alternate_form, bool left_pad, bool zero_pad, u8 field_width)
{
int ret = 0;
int digits = 0;
for (T n = number; n > 0; n >>= 4)
++digits;
if (digits == 0)
digits = 1;
if (left_pad) {
int stop_at = field_width - digits;
if (alternate_form)
stop_at -= 2;
while (ret < stop_at) {
putch(bufptr, ' ');
++ret;
}
}
if (alternate_form) {
putch(bufptr, '0');
putch(bufptr, 'x');
ret += 2;
field_width += 2;
}
if (zero_pad) {
while (ret < field_width - digits) {
putch(bufptr, '0');
++ret;
}
}
if (number == 0) {
putch(bufptr, '0');
++ret;
} else {
u8 shift_count = digits * 4;
while (shift_count) {
shift_count -= 4;
putch(bufptr,
upper_case
? printf_hex_digits_upper[(number >> shift_count) & 0x0f]
: printf_hex_digits_lower[(number >> shift_count) & 0x0f]);
++ret;
}
}
return ret;
}
template<typename PutChFunc>
ALWAYS_INLINE int print_number(PutChFunc putch, char*& bufptr, u32 number, bool left_pad, bool zero_pad, u32 field_width)
{
u32 divisor = 1000000000;
char ch;
char padding = 1;
char buf[16];
char* p = buf;
for (;;) {
ch = '0' + (number / divisor);
number %= divisor;
if (ch != '0')
padding = 0;
if (!padding || divisor == 1)
*(p++) = ch;
if (divisor == 1)
break;
divisor /= 10;
}
size_t numlen = p - buf;
if (!field_width || field_width < numlen)
field_width = numlen;
if (!left_pad) {
for (unsigned i = 0; i < field_width - numlen; ++i) {
putch(bufptr, zero_pad ? '0' : ' ');
}
}
for (unsigned i = 0; i < numlen; ++i) {
putch(bufptr, buf[i]);
}
if (left_pad) {
for (unsigned i = 0; i < field_width - numlen; ++i) {
putch(bufptr, ' ');
}
}
return field_width;
}
template<typename PutChFunc>
ALWAYS_INLINE int print_u64(PutChFunc putch, char*& bufptr, u64 number, bool left_pad, bool zero_pad, u32 field_width)
{
u64 divisor = 10000000000000000000LLU;
char ch;
char padding = 1;
char buf[16];
char* p = buf;
for (;;) {
ch = '0' + (number / divisor);
number %= divisor;
if (ch != '0')
padding = 0;
if (!padding || divisor == 1)
*(p++) = ch;
if (divisor == 1)
break;
divisor /= 10;
}
size_t numlen = p - buf;
if (!field_width || field_width < numlen)
field_width = numlen;
if (!left_pad) {
for (unsigned i = 0; i < field_width - numlen; ++i) {
putch(bufptr, zero_pad ? '0' : ' ');
}
}
for (unsigned i = 0; i < numlen; ++i) {
putch(bufptr, buf[i]);
}
if (left_pad) {
for (unsigned i = 0; i < field_width - numlen; ++i) {
putch(bufptr, ' ');
}
}
return field_width;
}
template<typename PutChFunc>
ALWAYS_INLINE int print_double(PutChFunc putch, char*& bufptr, double number, bool left_pad, bool zero_pad, u32 field_width, u32 fraction_length)
{
int length = 0;
if (number < 0) {
putch(bufptr, '-');
length++;
number = 0 - number;
}
length = print_u64(putch, bufptr, (i64)number, left_pad, zero_pad, field_width);
putch(bufptr, '.');
length++;
double fraction = number - (i64)number;
for (u32 i = 0; i < fraction_length; ++i)
fraction = fraction * 10;
return length + print_u64(putch, bufptr, (i64)fraction, false, true, fraction_length);
}
template<typename PutChFunc>
ALWAYS_INLINE int print_i64(PutChFunc putch, char*& bufptr, i64 number, bool left_pad, bool zero_pad, u32 field_width)
{
// FIXME: This won't work if there is padding. ' -17' becomes '- 17'.
if (number < 0) {
putch(bufptr, '-');
return print_u64(putch, bufptr, 0 - number, left_pad, zero_pad, field_width) + 1;
}
return print_u64(putch, bufptr, number, left_pad, zero_pad, field_width);
}
template<typename PutChFunc>
ALWAYS_INLINE int print_octal_number(PutChFunc putch, char*& bufptr, u32 number, bool left_pad, bool zero_pad, u32 field_width)
{
u32 divisor = 134217728;
char ch;
char padding = 1;
char buf[32];
char* p = buf;
for (;;) {
ch = '0' + (number / divisor);
number %= divisor;
if (ch != '0')
padding = 0;
if (!padding || divisor == 1)
*(p++) = ch;
if (divisor == 1)
break;
divisor /= 8;
}
size_t numlen = p - buf;
if (!field_width || field_width < numlen)
field_width = numlen;
if (!left_pad) {
for (unsigned i = 0; i < field_width - numlen; ++i) {
putch(bufptr, zero_pad ? '0' : ' ');
}
}
for (unsigned i = 0; i < numlen; ++i) {
putch(bufptr, buf[i]);
}
if (left_pad) {
for (unsigned i = 0; i < field_width - numlen; ++i) {
putch(bufptr, ' ');
}
}
return field_width;
}
template<typename PutChFunc>
ALWAYS_INLINE int print_string(PutChFunc putch, char*& bufptr, const char* str, size_t len, bool left_pad, size_t field_width, bool dot)
{
if (!dot && (!field_width || field_width < len))
field_width = len;
size_t pad_amount = field_width > len ? field_width - len : 0;
if (!left_pad) {
for (size_t i = 0; i < pad_amount; ++i)
putch(bufptr, ' ');
}
for (size_t i = 0; i < min(len, field_width); ++i) {
putch(bufptr, str[i]);
}
if (left_pad) {
for (size_t i = 0; i < pad_amount; ++i)
putch(bufptr, ' ');
}
return field_width;
}
template<typename PutChFunc>
ALWAYS_INLINE int print_signed_number(PutChFunc putch, char*& bufptr, int number, bool left_pad, bool zero_pad, u32 field_width, bool always_sign)
{
if (number < 0) {
putch(bufptr, '-');
return print_number(putch, bufptr, 0 - number, left_pad, zero_pad, field_width) + 1;
}
if (always_sign)
putch(bufptr, '+');
return print_number(putch, bufptr, number, left_pad, zero_pad, field_width);
}
struct ModifierState {
bool left_pad { false };
bool zero_pad { false };
bool dot { false };
unsigned field_width { 0 };
bool has_fraction_length { false };
unsigned fraction_length { 6 };
unsigned long_qualifiers { 0 };
bool size_qualifier { false };
bool alternate_form { 0 };
bool always_sign { false };
};
template<typename PutChFunc, typename ArgumentListRefT, template<typename T, typename U = ArgumentListRefT> typename NextArgument>
struct PrintfImpl {
ALWAYS_INLINE PrintfImpl(PutChFunc& putch, char*& bufptr, const int& nwritten)
: m_bufptr(bufptr)
, m_nwritten(nwritten)
, m_putch(putch)
{
}
ALWAYS_INLINE int format_s(const ModifierState& state, ArgumentListRefT ap) const
{
const char* sp = NextArgument<const char*>()(ap);
if (!sp)
sp = "(null)";
return print_string(m_putch, m_bufptr, sp, strlen(sp), state.left_pad, state.field_width, state.dot);
}
ALWAYS_INLINE int format_d(const ModifierState& state, ArgumentListRefT ap) const
{
if (state.long_qualifiers >= 2)
return print_i64(m_putch, m_bufptr, NextArgument<i64>()(ap), state.left_pad, state.zero_pad, state.field_width);
return print_signed_number(m_putch, m_bufptr, NextArgument<int>()(ap), state.left_pad, state.zero_pad, state.field_width, state.always_sign);
}
ALWAYS_INLINE int format_i(const ModifierState& state, ArgumentListRefT ap) const
{
return format_d(state, ap);
}
ALWAYS_INLINE int format_u(const ModifierState& state, ArgumentListRefT ap) const
{
if (state.long_qualifiers >= 2)
return print_u64(m_putch, m_bufptr, NextArgument<u64>()(ap), state.left_pad, state.zero_pad, state.field_width);
return print_number(m_putch, m_bufptr, NextArgument<u32>()(ap), state.left_pad, state.zero_pad, state.field_width);
}
ALWAYS_INLINE int format_Q(const ModifierState& state, ArgumentListRefT ap) const
{
return print_u64(m_putch, m_bufptr, NextArgument<u64>()(ap), state.left_pad, state.zero_pad, state.field_width);
}
ALWAYS_INLINE int format_q(const ModifierState& state, ArgumentListRefT ap) const
{
return print_hex(m_putch, m_bufptr, NextArgument<u64>()(ap), false, false, state.left_pad, state.zero_pad, 16);
}
ALWAYS_INLINE int format_g(const ModifierState& state, ArgumentListRefT ap) const
{
return format_f(state, ap);
}
ALWAYS_INLINE int format_f(const ModifierState& state, ArgumentListRefT ap) const
{
return print_double(m_putch, m_bufptr, NextArgument<double>()(ap), state.left_pad, state.zero_pad, state.field_width, state.fraction_length);
}
ALWAYS_INLINE int format_o(const ModifierState& state, ArgumentListRefT ap) const
{
if (state.alternate_form)
m_putch(m_bufptr, '0');
return (state.alternate_form ? 1 : 0) + print_octal_number(m_putch, m_bufptr, NextArgument<u32>()(ap), state.left_pad, state.zero_pad, state.field_width);
}
ALWAYS_INLINE int format_x(const ModifierState& state, ArgumentListRefT ap) const
{
if (state.long_qualifiers >= 2)
return print_hex(m_putch, m_bufptr, NextArgument<u64>()(ap), false, state.alternate_form, state.left_pad, state.zero_pad, state.field_width);
return print_hex(m_putch, m_bufptr, NextArgument<u32>()(ap), false, state.alternate_form, state.left_pad, state.zero_pad, state.field_width);
}
ALWAYS_INLINE int format_X(const ModifierState& state, ArgumentListRefT ap) const
{
if (state.long_qualifiers >= 2)
return print_hex(m_putch, m_bufptr, NextArgument<u64>()(ap), true, state.alternate_form, state.left_pad, state.zero_pad, state.field_width);
return print_hex(m_putch, m_bufptr, NextArgument<u32>()(ap), true, state.alternate_form, state.left_pad, state.zero_pad, state.field_width);
}
ALWAYS_INLINE int format_n(const ModifierState&, ArgumentListRefT ap) const
{
*NextArgument<int*>()(ap) = m_nwritten;
return 0;
}
ALWAYS_INLINE int format_p(const ModifierState&, ArgumentListRefT ap) const
{
return print_hex(m_putch, m_bufptr, NextArgument<u32>()(ap), false, true, false, true, 8);
}
ALWAYS_INLINE int format_P(const ModifierState&, ArgumentListRefT ap) const
{
return print_hex(m_putch, m_bufptr, NextArgument<u32>()(ap), true, true, false, true, 8);
}
ALWAYS_INLINE int format_percent(const ModifierState&, ArgumentListRefT) const
{
m_putch(m_bufptr, '%');
return 1;
}
ALWAYS_INLINE int format_w(const ModifierState& state, ArgumentListRefT ap) const
{
return print_hex(m_putch, m_bufptr, NextArgument<int>()(ap), false, state.alternate_form, false, true, 4);
}
ALWAYS_INLINE int format_b(const ModifierState& state, ArgumentListRefT ap) const
{
return print_hex(m_putch, m_bufptr, NextArgument<int>()(ap), false, state.alternate_form, false, true, 2);
}
ALWAYS_INLINE int format_c(const ModifierState& state, ArgumentListRefT ap) const
{
char c = NextArgument<int>()(ap);
return print_string(m_putch, m_bufptr, &c, 1, state.left_pad, state.field_width, state.dot);
}
ALWAYS_INLINE int format_unrecognized(char format_op, const char* fmt, const ModifierState&, ArgumentListRefT) const
{
dbg() << "printf_internal: Unimplemented format specifier " << format_op << " (fmt: " << fmt << ")";
return 0;
}
protected:
char*& m_bufptr;
const int& m_nwritten;
PutChFunc& m_putch;
};
template<typename T, typename V>
struct VaArgNextArgument {
ALWAYS_INLINE T operator()(V ap) const
{
return va_arg(ap, T);
}
};
#define PRINTF_IMPL_DELEGATE_TO_IMPL(c) \
case* #c: \
ret += impl.format_##c(state, ap); \
break;
template<typename PutChFunc, template<typename T, typename U, template<typename X, typename Y> typename V> typename Impl = PrintfImpl, typename ArgumentListT = va_list, template<typename T, typename V = decltype(declval<ArgumentListT&>())> typename NextArgument = VaArgNextArgument>
ALWAYS_INLINE int printf_internal(PutChFunc putch, char* buffer, const char*& fmt, ArgumentListT ap)
{
int ret = 0;
char* bufptr = buffer;
Impl<PutChFunc, ArgumentListT&, NextArgument> impl { putch, bufptr, ret };
for (const char* p = fmt; *p; ++p) {
ModifierState state;
if (*p == '%' && *(p + 1)) {
one_more:
++p;
if (*p == '.') {
state.dot = true;
if (*(p + 1))
goto one_more;
}
if (*p == '-') {
state.left_pad = true;
if (*(p + 1))
goto one_more;
}
if (*p == '+') {
state.always_sign = true;
if (*(p + 1))
goto one_more;
}
if (!state.zero_pad && !state.field_width && *p == '0') {
state.zero_pad = true;
if (*(p + 1))
goto one_more;
}
if (*p >= '0' && *p <= '9') {
if (!state.dot) {
state.field_width *= 10;
state.field_width += *p - '0';
if (*(p + 1))
goto one_more;
} else {
if (!state.has_fraction_length) {
state.has_fraction_length = true;
state.fraction_length = 0;
}
state.fraction_length *= 10;
state.fraction_length += *p - '0';
if (*(p + 1))
goto one_more;
}
}
if (*p == '*') {
state.field_width = NextArgument<int>()(ap);
if (*(p + 1))
goto one_more;
}
if (*p == 'l') {
++state.long_qualifiers;
if (*(p + 1))
goto one_more;
}
if (*p == 'z') {
state.size_qualifier = true;
if (*(p + 1))
goto one_more;
}
if (*p == '#') {
state.alternate_form = true;
if (*(p + 1))
goto one_more;
}
switch (*p) {
case '%':
ret += impl.format_percent(state, ap);
break;
PRINTF_IMPL_DELEGATE_TO_IMPL(P);
PRINTF_IMPL_DELEGATE_TO_IMPL(Q);
PRINTF_IMPL_DELEGATE_TO_IMPL(X);
PRINTF_IMPL_DELEGATE_TO_IMPL(b);
PRINTF_IMPL_DELEGATE_TO_IMPL(c);
PRINTF_IMPL_DELEGATE_TO_IMPL(d);
#ifndef KERNEL
PRINTF_IMPL_DELEGATE_TO_IMPL(f);
PRINTF_IMPL_DELEGATE_TO_IMPL(g);
#endif
PRINTF_IMPL_DELEGATE_TO_IMPL(i);
PRINTF_IMPL_DELEGATE_TO_IMPL(n);
PRINTF_IMPL_DELEGATE_TO_IMPL(o);
PRINTF_IMPL_DELEGATE_TO_IMPL(p);
PRINTF_IMPL_DELEGATE_TO_IMPL(q);
PRINTF_IMPL_DELEGATE_TO_IMPL(s);
PRINTF_IMPL_DELEGATE_TO_IMPL(u);
PRINTF_IMPL_DELEGATE_TO_IMPL(w);
PRINTF_IMPL_DELEGATE_TO_IMPL(x);
default:
ret += impl.format_unrecognized(*p, fmt, state, ap);
break;
}
} else {
putch(bufptr, *p);
++ret;
}
}
return ret;
}
#undef PRINTF_IMPL_DELEGATE_TO_IMPL
}
using PrintfImplementation::printf_internal;