mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-23 18:02:05 -05:00
AK: Don't destructively re-encode query strings in the URL parser
We were decoding and then re-encoding the query string in URLs. This round-trip caused us to lose information about plus ('+') ASCII characters encoded as "%2B".
This commit is contained in:
parent
3724ce765e
commit
79c77debb0
3 changed files with 40 additions and 3 deletions
|
@ -367,7 +367,7 @@ void URL::append_percent_encoded(StringBuilder& builder, u32 code_point)
|
|||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#c0-control-percent-encode-set
|
||||
constexpr bool code_point_is_in_percent_encode_set(u32 code_point, URL::PercentEncodeSet set)
|
||||
bool URL::code_point_is_in_percent_encode_set(u32 code_point, URL::PercentEncodeSet set)
|
||||
{
|
||||
switch (set) {
|
||||
case URL::PercentEncodeSet::C0Control:
|
||||
|
|
2
AK/URL.h
2
AK/URL.h
|
@ -113,6 +113,8 @@ public:
|
|||
|
||||
bool operator==(URL const& other) const { return equals(other, ExcludeFragment::No); }
|
||||
|
||||
static bool code_point_is_in_percent_encode_set(u32 code_point, URL::PercentEncodeSet);
|
||||
|
||||
private:
|
||||
URL(String&& data_mime_type, String&& data_payload, bool payload_is_base64)
|
||||
: m_valid(true)
|
||||
|
|
|
@ -115,6 +115,41 @@ constexpr bool is_double_dot_path_segment(StringView input)
|
|||
return input == ".."sv || input.equals_ignoring_case(".%2e"sv) || input.equals_ignoring_case("%2e."sv) || input.equals_ignoring_case("%2e%2e"sv);
|
||||
}
|
||||
|
||||
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
|
||||
static String percent_encode_after_encoding(StringView input, URL::PercentEncodeSet percent_encode_set, bool space_as_plus = false)
|
||||
{
|
||||
// NOTE: This is written somewhat ad-hoc since we don't yet implement the Encoding spec.
|
||||
|
||||
StringBuilder output;
|
||||
|
||||
// 3. For each byte of encodeOutput converted to a byte sequence:
|
||||
for (auto byte : input) {
|
||||
// 1. If spaceAsPlus is true and byte is 0x20 (SP), then append U+002B (+) to output and continue.
|
||||
if (space_as_plus && byte == ' ') {
|
||||
output.append('+');
|
||||
continue;
|
||||
}
|
||||
|
||||
// 2. Let isomorph be a code point whose value is byte’s value.
|
||||
u32 isomorph = byte;
|
||||
|
||||
// 3. Assert: percentEncodeSet includes all non-ASCII code points.
|
||||
|
||||
// 4. If isomorphic is not in percentEncodeSet, then append isomorph to output.
|
||||
if (!URL::code_point_is_in_percent_encode_set(isomorph, percent_encode_set)) {
|
||||
output.append_code_point(isomorph);
|
||||
}
|
||||
|
||||
// 5. Otherwise, percent-encode byte and append the result to output.
|
||||
else {
|
||||
output.appendff("%{:02X}", byte);
|
||||
}
|
||||
}
|
||||
|
||||
// 6. Return output.
|
||||
return output.to_string();
|
||||
}
|
||||
|
||||
// https://fetch.spec.whatwg.org/#data-urls
|
||||
// FIXME: This only loosely follows the spec, as we use the same class for "regular" and data URLs, unlike the spec.
|
||||
Optional<URL> URLParser::parse_data_url(StringView raw_input)
|
||||
|
@ -635,11 +670,11 @@ URL URLParser::parse(StringView raw_input, URL const* base_url, Optional<URL> ur
|
|||
}
|
||||
break;
|
||||
case State::Query:
|
||||
// https://url.spec.whatwg.org/#query-state
|
||||
if (code_point == end_of_file || code_point == '#') {
|
||||
VERIFY(url->m_query == "");
|
||||
auto query_percent_encode_set = url->is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query;
|
||||
// NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences.
|
||||
url->m_query = URL::percent_decode(URL::percent_encode(buffer.string_view(), query_percent_encode_set));
|
||||
url->m_query = percent_encode_after_encoding(buffer.string_view(), query_percent_encode_set);
|
||||
buffer.clear();
|
||||
if (code_point == '#') {
|
||||
url->m_fragment = "";
|
||||
|
|
Loading…
Add table
Reference in a new issue