2020-01-18 03:38:21 -05:00
/*
* Copyright ( c ) 2018 - 2020 , Andreas Kling < kling @ serenityos . org >
2021-05-23 17:31:16 -04:00
* Copyright ( c ) 2021 , Max Wipfli < mail @ maxwipfli . ch >
2020-01-18 03:38:21 -05:00
*
2021-04-22 04:24:48 -04:00
* SPDX - License - Identifier : BSD - 2 - Clause
2020-01-18 03:38:21 -05:00
*/
2021-06-01 15:18:08 -04:00
# include <AK/CharacterTypes.h>
2021-05-27 15:05:07 -04:00
# include <AK/Debug.h>
2020-05-26 07:52:44 -04:00
# include <AK/LexicalPath.h>
2019-08-10 11:27:56 -04:00
# include <AK/StringBuilder.h>
# include <AK/URL.h>
2021-05-27 15:05:07 -04:00
# include <AK/URLParser.h>
2021-05-25 07:50:03 -04:00
# include <AK/Utf8View.h>
2019-08-10 11:27:56 -04:00
namespace AK {
2021-05-27 15:05:07 -04:00
// FIXME: It could make sense to force users of URL to use URLParser::parse() explicitly instead of using a constructor.
2021-11-10 18:55:02 -05:00
URL : : URL ( StringView string )
2021-09-13 15:34:14 -04:00
: URL ( URLParser : : parse ( string ) )
2019-08-10 11:27:56 -04:00
{
2021-05-27 15:05:07 -04:00
if constexpr ( URL_PARSER_DEBUG ) {
if ( m_valid )
dbgln ( " URL constructor: Parsed URL to be '{}'. " , serialize ( ) ) ;
else
dbgln ( " URL constructor: Parsed URL to be invalid. " ) ;
}
2019-08-10 11:27:56 -04:00
}
2022-12-04 13:02:33 -05:00
DeprecatedString URL : : path ( ) const
2021-05-25 15:32:20 -04:00
{
if ( cannot_be_a_base_url ( ) )
return paths ( ) [ 0 ] ;
StringBuilder builder ;
for ( auto & path : m_paths ) {
builder . append ( ' / ' ) ;
builder . append ( path ) ;
}
2022-12-05 20:12:49 -05:00
return builder . to_deprecated_string ( ) ;
2021-05-25 15:32:20 -04:00
}
2022-12-04 13:02:33 -05:00
URL URL : : complete_url ( DeprecatedString const & string ) const
2019-11-18 16:04:39 -05:00
{
2020-06-07 12:23:33 -04:00
if ( ! is_valid ( ) )
return { } ;
2021-09-13 15:34:14 -04:00
return URLParser : : parse ( string , this ) ;
2019-11-18 16:04:39 -05:00
}
2022-12-04 13:02:33 -05:00
void URL : : set_scheme ( DeprecatedString scheme )
2020-04-11 17:07:23 -04:00
{
2021-06-01 04:58:27 -04:00
m_scheme = move ( scheme ) ;
2020-04-11 17:07:23 -04:00
m_valid = compute_validity ( ) ;
}
2022-12-04 13:02:33 -05:00
void URL : : set_username ( DeprecatedString username )
2021-05-25 15:32:20 -04:00
{
2021-06-01 04:58:27 -04:00
m_username = move ( username ) ;
2021-05-25 15:32:20 -04:00
m_valid = compute_validity ( ) ;
}
2022-12-04 13:02:33 -05:00
void URL : : set_password ( DeprecatedString password )
2021-05-25 15:32:20 -04:00
{
2021-06-01 04:58:27 -04:00
m_password = move ( password ) ;
2021-05-25 15:32:20 -04:00
m_valid = compute_validity ( ) ;
}
2022-12-04 13:02:33 -05:00
void URL : : set_host ( DeprecatedString host )
2020-04-11 17:07:23 -04:00
{
2021-06-01 04:58:27 -04:00
m_host = move ( host ) ;
2020-04-11 17:07:23 -04:00
m_valid = compute_validity ( ) ;
}
2021-09-13 16:12:16 -04:00
void URL : : set_port ( Optional < u16 > port )
2020-11-04 01:20:20 -05:00
{
2021-05-25 15:32:20 -04:00
if ( port = = default_port_for_scheme ( m_scheme ) ) {
2021-09-13 16:12:16 -04:00
m_port = { } ;
2021-05-25 15:32:20 -04:00
return ;
}
2021-09-13 16:12:16 -04:00
m_port = move ( port ) ;
2020-11-04 01:20:20 -05:00
m_valid = compute_validity ( ) ;
}
2022-12-04 13:02:33 -05:00
void URL : : set_paths ( Vector < DeprecatedString > paths )
2021-05-25 15:32:20 -04:00
{
2021-06-01 04:58:27 -04:00
m_paths = move ( paths ) ;
2021-05-25 15:32:20 -04:00
m_valid = compute_validity ( ) ;
}
2022-12-04 13:02:33 -05:00
void URL : : set_query ( DeprecatedString query )
2020-04-11 17:07:23 -04:00
{
2021-06-01 04:58:27 -04:00
m_query = move ( query ) ;
2020-04-11 17:07:23 -04:00
}
2022-12-04 13:02:33 -05:00
void URL : : set_fragment ( DeprecatedString fragment )
2020-04-11 18:38:13 -04:00
{
2021-06-01 04:58:27 -04:00
m_fragment = move ( fragment ) ;
2020-04-11 18:38:13 -04:00
}
2021-05-29 14:46:49 -04:00
// FIXME: This is by no means complete.
// NOTE: This relies on some assumptions about how the spec-defined URL parser works that may turn out to be wrong.
2020-04-11 17:07:23 -04:00
bool URL : : compute_validity ( ) const
{
2021-05-23 17:31:16 -04:00
if ( m_scheme . is_empty ( ) )
2020-04-11 17:07:23 -04:00
return false ;
2020-11-04 01:20:20 -05:00
2021-05-23 17:31:16 -04:00
if ( m_scheme = = " data " ) {
2020-11-04 01:20:20 -05:00
if ( m_data_mime_type . is_empty ( ) )
2020-04-19 04:36:56 -04:00
return false ;
2021-05-29 14:46:49 -04:00
if ( m_data_payload_is_base64 ) {
if ( m_data_payload . length ( ) % 4 ! = 0 )
return false ;
for ( auto character : m_data_payload ) {
if ( ! is_ascii_alphanumeric ( character ) | | character = = ' + ' | | character = = ' / ' | | character = = ' = ' )
return false ;
}
}
} else if ( m_cannot_be_a_base_url ) {
if ( m_paths . size ( ) ! = 1 )
return false ;
if ( m_paths [ 0 ] . is_empty ( ) )
return false ;
} else {
if ( m_scheme . is_one_of ( " about " , " mailto " ) )
return false ;
// NOTE: Maybe it is allowed to have a zero-segment path.
if ( m_paths . size ( ) = = 0 )
return false ;
2020-04-11 17:07:23 -04:00
}
2020-11-04 01:20:20 -05:00
2021-05-29 14:46:49 -04:00
// NOTE: A file URL's host should be the empty string for localhost, not null.
if ( m_scheme = = " file " & & m_host . is_null ( ) )
2020-11-04 01:20:20 -05:00
return false ;
2020-04-11 17:07:23 -04:00
return true ;
}
2021-11-10 18:55:02 -05:00
bool URL : : scheme_requires_port ( StringView scheme )
2020-11-04 01:20:20 -05:00
{
2021-05-23 17:31:16 -04:00
return ( default_port_for_scheme ( scheme ) ! = 0 ) ;
2020-11-04 01:20:20 -05:00
}
2021-11-10 18:55:02 -05:00
u16 URL : : default_port_for_scheme ( StringView scheme )
2020-11-04 01:20:20 -05:00
{
2021-05-23 17:31:16 -04:00
if ( scheme = = " http " )
2020-11-04 01:20:20 -05:00
return 80 ;
2021-05-23 17:31:16 -04:00
if ( scheme = = " https " )
2020-11-04 01:20:20 -05:00
return 443 ;
2021-05-23 17:31:16 -04:00
if ( scheme = = " gemini " )
2020-11-04 01:20:20 -05:00
return 1965 ;
2021-05-23 17:31:16 -04:00
if ( scheme = = " irc " )
2020-11-04 01:20:20 -05:00
return 6667 ;
2021-05-23 17:31:16 -04:00
if ( scheme = = " ircs " )
2020-11-04 01:20:20 -05:00
return 6697 ;
2021-05-23 17:31:16 -04:00
if ( scheme = = " ws " )
2021-04-16 09:21:03 -04:00
return 80 ;
2021-05-23 17:31:16 -04:00
if ( scheme = = " wss " )
2021-04-16 09:21:03 -04:00
return 443 ;
2020-11-04 01:20:20 -05:00
return 0 ;
}
2022-12-04 13:02:33 -05:00
URL URL : : create_with_file_scheme ( DeprecatedString const & path , DeprecatedString const & fragment , DeprecatedString const & hostname )
2020-04-18 16:02:04 -04:00
{
2021-05-27 15:40:02 -04:00
LexicalPath lexical_path ( path ) ;
2021-06-29 07:11:03 -04:00
if ( ! lexical_path . is_absolute ( ) )
2021-05-27 15:40:02 -04:00
return { } ;
2021-05-29 15:57:20 -04:00
2020-04-18 16:02:04 -04:00
URL url ;
2021-05-23 17:31:16 -04:00
url . set_scheme ( " file " ) ;
2021-05-29 15:57:20 -04:00
// NOTE: If the hostname is localhost (or null, which implies localhost), it should be set to the empty string.
2022-03-23 22:46:52 -04:00
// This is because a file URL always needs a non-null hostname.
2022-12-04 13:02:33 -05:00
url . set_host ( hostname . is_null ( ) | | hostname = = " localhost " ? DeprecatedString : : empty ( ) : hostname ) ;
2022-03-23 22:46:52 -04:00
url . set_paths ( lexical_path . parts ( ) ) ;
// NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment.
if ( path . ends_with ( ' / ' ) )
url . append_path ( " " ) ;
url . set_fragment ( fragment ) ;
return url ;
}
2022-12-04 13:02:33 -05:00
URL URL : : create_with_help_scheme ( DeprecatedString const & path , DeprecatedString const & fragment , DeprecatedString const & hostname )
2022-03-23 22:46:52 -04:00
{
LexicalPath lexical_path ( path ) ;
URL url ;
url . set_scheme ( " help " ) ;
// NOTE: If the hostname is localhost (or null, which implies localhost), it should be set to the empty string.
2021-05-29 15:57:20 -04:00
// This is because a file URL always needs a non-null hostname.
2022-12-04 13:02:33 -05:00
url . set_host ( hostname . is_null ( ) | | hostname = = " localhost " ? DeprecatedString : : empty ( ) : hostname ) ;
2021-05-27 15:40:02 -04:00
url . set_paths ( lexical_path . parts ( ) ) ;
// NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment.
if ( path . ends_with ( ' / ' ) )
url . append_path ( " " ) ;
2021-03-01 16:24:34 -05:00
url . set_fragment ( fragment ) ;
2020-04-18 16:02:04 -04:00
return url ;
}
2022-12-04 13:02:33 -05:00
URL URL : : create_with_url_or_path ( DeprecatedString const & url_or_path )
2020-04-19 04:55:59 -04:00
{
URL url = url_or_path ;
if ( url . is_valid ( ) )
return url ;
2022-12-04 13:02:33 -05:00
DeprecatedString path = LexicalPath : : canonicalized_path ( url_or_path ) ;
2021-05-23 17:31:16 -04:00
return URL : : create_with_file_scheme ( path ) ;
2020-04-19 04:55:59 -04:00
}
2021-05-25 16:05:01 -04:00
// https://url.spec.whatwg.org/#special-scheme
2021-11-10 18:55:02 -05:00
bool URL : : is_special_scheme ( StringView scheme )
2021-05-25 16:05:01 -04:00
{
return scheme . is_one_of ( " ftp " , " file " , " http " , " https " , " ws " , " wss " ) ;
}
2022-12-04 13:02:33 -05:00
DeprecatedString URL : : serialize_data_url ( ) const
2021-05-25 16:32:39 -04:00
{
VERIFY ( m_scheme = = " data " ) ;
VERIFY ( ! m_data_mime_type . is_null ( ) ) ;
VERIFY ( ! m_data_payload . is_null ( ) ) ;
StringBuilder builder ;
builder . append ( m_scheme ) ;
builder . append ( ' : ' ) ;
builder . append ( m_data_mime_type ) ;
if ( m_data_payload_is_base64 )
2022-07-11 13:32:29 -04:00
builder . append ( " ;base64 " sv ) ;
2021-05-25 16:32:39 -04:00
builder . append ( ' , ' ) ;
// NOTE: The specification does not say anything about encoding this, but we should encode at least control and non-ASCII
// characters (since this is also a valid representation of the same data URL).
builder . append ( URL : : percent_encode ( m_data_payload , PercentEncodeSet : : C0Control ) ) ;
2022-12-05 20:12:49 -05:00
return builder . to_deprecated_string ( ) ;
2021-05-25 16:32:39 -04:00
}
// https://url.spec.whatwg.org/#concept-url-serializer
2022-12-04 13:02:33 -05:00
DeprecatedString URL : : serialize ( ExcludeFragment exclude_fragment ) const
2021-05-25 16:32:39 -04:00
{
if ( m_scheme = = " data " )
return serialize_data_url ( ) ;
StringBuilder builder ;
builder . append ( m_scheme ) ;
builder . append ( ' : ' ) ;
if ( ! m_host . is_null ( ) ) {
2022-07-11 13:32:29 -04:00
builder . append ( " // " sv ) ;
2021-05-25 16:32:39 -04:00
if ( includes_credentials ( ) ) {
builder . append ( percent_encode ( m_username , PercentEncodeSet : : Userinfo ) ) ;
if ( ! m_password . is_empty ( ) ) {
builder . append ( ' : ' ) ;
builder . append ( percent_encode ( m_password , PercentEncodeSet : : Userinfo ) ) ;
}
builder . append ( ' @ ' ) ;
}
builder . append ( m_host ) ;
2021-09-13 16:12:16 -04:00
if ( m_port . has_value ( ) )
builder . appendff ( " :{} " , * m_port ) ;
2021-05-25 16:32:39 -04:00
}
if ( cannot_be_a_base_url ( ) ) {
builder . append ( percent_encode ( m_paths [ 0 ] , PercentEncodeSet : : Path ) ) ;
} else {
2021-05-27 15:40:02 -04:00
if ( m_host . is_null ( ) & & m_paths . size ( ) > 1 & & m_paths [ 0 ] . is_empty ( ) )
2022-07-11 13:32:29 -04:00
builder . append ( " /. " sv ) ;
2021-05-27 15:40:02 -04:00
for ( auto & segment : m_paths ) {
builder . append ( ' / ' ) ;
builder . append ( percent_encode ( segment , PercentEncodeSet : : Path ) ) ;
2021-05-25 16:32:39 -04:00
}
}
if ( ! m_query . is_null ( ) ) {
builder . append ( ' ? ' ) ;
builder . append ( percent_encode ( m_query , is_special ( ) ? URL : : PercentEncodeSet : : SpecialQuery : URL : : PercentEncodeSet : : Query ) ) ;
}
if ( exclude_fragment = = ExcludeFragment : : No & & ! m_fragment . is_null ( ) ) {
builder . append ( ' # ' ) ;
builder . append ( percent_encode ( m_fragment , PercentEncodeSet : : Fragment ) ) ;
}
2022-12-05 20:12:49 -05:00
return builder . to_deprecated_string ( ) ;
2021-05-25 16:32:39 -04:00
}
// https://url.spec.whatwg.org/#url-rendering
// NOTE: This does e.g. not display credentials.
// FIXME: Parts of the URL other than the host should have their sequences of percent-encoded bytes replaced with code points
// resulting from percent-decoding those sequences converted to bytes, unless that renders those sequences invisible.
2022-12-04 13:02:33 -05:00
DeprecatedString URL : : serialize_for_display ( ) const
2021-05-25 16:32:39 -04:00
{
VERIFY ( m_valid ) ;
if ( m_scheme = = " data " )
return serialize_data_url ( ) ;
StringBuilder builder ;
builder . append ( m_scheme ) ;
builder . append ( ' : ' ) ;
if ( ! m_host . is_null ( ) ) {
2022-07-11 13:32:29 -04:00
builder . append ( " // " sv ) ;
2021-05-25 16:32:39 -04:00
builder . append ( m_host ) ;
2021-09-13 16:12:16 -04:00
if ( m_port . has_value ( ) )
builder . appendff ( " :{} " , * m_port ) ;
2021-05-25 16:32:39 -04:00
}
if ( cannot_be_a_base_url ( ) ) {
builder . append ( percent_encode ( m_paths [ 0 ] , PercentEncodeSet : : Path ) ) ;
} else {
2021-05-27 15:40:02 -04:00
if ( m_host . is_null ( ) & & m_paths . size ( ) > 1 & & m_paths [ 0 ] . is_empty ( ) )
2022-07-11 13:32:29 -04:00
builder . append ( " /. " sv ) ;
2021-05-27 15:40:02 -04:00
for ( auto & segment : m_paths ) {
builder . append ( ' / ' ) ;
builder . append ( percent_encode ( segment , PercentEncodeSet : : Path ) ) ;
2021-05-25 16:32:39 -04:00
}
}
if ( ! m_query . is_null ( ) ) {
builder . append ( ' ? ' ) ;
builder . append ( percent_encode ( m_query , is_special ( ) ? URL : : PercentEncodeSet : : SpecialQuery : URL : : PercentEncodeSet : : Query ) ) ;
}
if ( ! m_fragment . is_null ( ) ) {
builder . append ( ' # ' ) ;
builder . append ( percent_encode ( m_fragment , PercentEncodeSet : : Fragment ) ) ;
}
2022-12-05 20:12:49 -05:00
return builder . to_deprecated_string ( ) ;
2021-05-25 16:32:39 -04:00
}
2021-09-13 15:18:14 -04:00
// https://html.spec.whatwg.org/multipage/origin.html#ascii-serialisation-of-an-origin
// https://url.spec.whatwg.org/#concept-url-origin
2022-12-04 13:02:33 -05:00
DeprecatedString URL : : serialize_origin ( ) const
2021-09-13 15:18:14 -04:00
{
VERIFY ( m_valid ) ;
if ( m_scheme = = " blob " sv ) {
// TODO: 1. If URL’ s blob URL entry is non-null, then return URL’ s blob URL entry’ s environment’ s origin.
// 2. Let url be the result of parsing URL’ s path[0].
VERIFY ( ! m_paths . is_empty ( ) ) ;
URL url = m_paths [ 0 ] ;
// 3. Return a new opaque origin, if url is failure, and url’ s origin otherwise.
if ( ! url . is_valid ( ) )
return " null " ;
return url . serialize_origin ( ) ;
} else if ( ! m_scheme . is_one_of ( " ftp " sv , " http " sv , " https " sv , " ws " sv , " wss " sv ) ) { // file: "Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin."
return " null " ;
}
StringBuilder builder ;
builder . append ( m_scheme ) ;
builder . append ( " :// " sv ) ;
builder . append ( m_host ) ;
2021-09-13 16:12:16 -04:00
if ( m_port . has_value ( ) )
2022-06-10 14:37:51 -04:00
builder . appendff ( " :{} " , * m_port ) ;
2021-09-13 15:18:14 -04:00
return builder . build ( ) ;
}
2021-06-01 04:58:27 -04:00
bool URL : : equals ( URL const & other , ExcludeFragment exclude_fragments ) const
2021-05-27 15:38:16 -04:00
{
2021-06-01 05:14:30 -04:00
if ( this = = & other )
return true ;
2021-05-27 15:38:16 -04:00
if ( ! m_valid | | ! other . m_valid )
return false ;
return serialize ( exclude_fragments ) = = other . serialize ( exclude_fragments ) ;
}
2022-12-04 13:02:33 -05:00
DeprecatedString URL : : basename ( ) const
2020-05-05 17:56:35 -04:00
{
if ( ! m_valid )
return { } ;
2021-05-25 15:32:20 -04:00
if ( m_paths . is_empty ( ) )
return { } ;
return m_paths . last ( ) ;
2020-05-05 17:56:35 -04:00
}
2021-05-25 07:50:03 -04:00
void URL : : append_percent_encoded ( StringBuilder & builder , u32 code_point )
{
if ( code_point < = 0x7f )
builder . appendff ( " %{:02X} " , code_point ) ;
else if ( code_point < = 0x07ff )
builder . appendff ( " %{:02X}%{:02X} " , ( ( code_point > > 6 ) & 0x1f ) | 0xc0 , ( code_point & 0x3f ) | 0x80 ) ;
else if ( code_point < = 0xffff )
builder . appendff ( " %{:02X}%{:02X}%{:02X} " , ( ( code_point > > 12 ) & 0x0f ) | 0xe0 , ( ( code_point > > 6 ) & 0x3f ) | 0x80 , ( code_point & 0x3f ) | 0x80 ) ;
else if ( code_point < = 0x10ffff )
builder . appendff ( " %{:02X}%{:02X}%{:02X}%{:02X} " , ( ( code_point > > 18 ) & 0x07 ) | 0xf0 , ( ( code_point > > 12 ) & 0x3f ) | 0x80 , ( ( code_point > > 6 ) & 0x3f ) | 0x80 , ( code_point & 0x3f ) | 0x80 ) ;
else
VERIFY_NOT_REACHED ( ) ;
}
// https://url.spec.whatwg.org/#c0-control-percent-encode-set
2022-04-09 18:48:15 -04:00
bool URL : : code_point_is_in_percent_encode_set ( u32 code_point , URL : : PercentEncodeSet set )
2021-05-25 07:50:03 -04:00
{
switch ( set ) {
case URL : : PercentEncodeSet : : C0Control :
return code_point < 0x20 | | code_point > 0x7E ;
case URL : : PercentEncodeSet : : Fragment :
return code_point_is_in_percent_encode_set ( code_point , URL : : PercentEncodeSet : : C0Control ) | | " \" <>` " sv . contains ( code_point ) ;
case URL : : PercentEncodeSet : : Query :
return code_point_is_in_percent_encode_set ( code_point , URL : : PercentEncodeSet : : C0Control ) | | " \" #<> " sv . contains ( code_point ) ;
case URL : : PercentEncodeSet : : SpecialQuery :
return code_point_is_in_percent_encode_set ( code_point , URL : : PercentEncodeSet : : Query ) | | code_point = = ' \' ' ;
case URL : : PercentEncodeSet : : Path :
return code_point_is_in_percent_encode_set ( code_point , URL : : PercentEncodeSet : : Query ) | | " ?` { } " sv.contains(code_point);
case URL : : PercentEncodeSet : : Userinfo :
return code_point_is_in_percent_encode_set ( code_point , URL : : PercentEncodeSet : : Path ) | | " /: ; = @ [ \ \ ] ^ | " sv.contains(code_point);
case URL : : PercentEncodeSet : : Component :
return code_point_is_in_percent_encode_set ( code_point , URL : : PercentEncodeSet : : Userinfo ) | | " $%&+, " sv . contains ( code_point ) ;
case URL : : PercentEncodeSet : : ApplicationXWWWFormUrlencoded :
2022-06-10 14:39:08 -04:00
return code_point_is_in_percent_encode_set ( code_point , URL : : PercentEncodeSet : : Component ) | | " !'()~ " sv . contains ( code_point ) ;
2021-05-25 07:50:03 -04:00
case URL : : PercentEncodeSet : : EncodeURI :
// NOTE: This is the same percent encode set that JS encodeURI() uses.
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
2022-12-25 14:25:34 -05:00
return code_point > 0x7E | | ( ! is_ascii_alphanumeric ( code_point ) & & ! " ;,/?:@&=+$-_.!~*'()# " sv . contains ( static_cast < char > ( code_point ) ) ) ;
2021-05-25 07:50:03 -04:00
default :
VERIFY_NOT_REACHED ( ) ;
}
}
2022-04-08 09:20:30 -04:00
void URL : : append_percent_encoded_if_necessary ( StringBuilder & builder , u32 code_point , URL : : PercentEncodeSet set )
2021-05-25 07:50:03 -04:00
{
2022-04-08 09:20:30 -04:00
if ( code_point_is_in_percent_encode_set ( code_point , set ) )
2021-05-25 07:50:03 -04:00
append_percent_encoded ( builder , code_point ) ;
else
builder . append_code_point ( code_point ) ;
}
2022-12-04 13:02:33 -05:00
DeprecatedString URL : : percent_encode ( StringView input , URL : : PercentEncodeSet set , SpaceAsPlus space_as_plus )
2021-05-25 07:50:03 -04:00
{
StringBuilder builder ;
for ( auto code_point : Utf8View ( input ) ) {
2022-04-09 12:34:49 -04:00
if ( space_as_plus = = SpaceAsPlus : : Yes & & code_point = = ' ' )
builder . append ( ' + ' ) ;
else
append_percent_encoded_if_necessary ( builder , code_point , set ) ;
2021-05-25 07:50:03 -04:00
}
2022-12-05 20:12:49 -05:00
return builder . to_deprecated_string ( ) ;
2021-05-25 07:50:03 -04:00
}
2022-12-04 13:02:33 -05:00
DeprecatedString URL : : percent_decode ( StringView input )
2021-05-25 07:50:03 -04:00
{
if ( ! input . contains ( ' % ' ) )
return input ;
StringBuilder builder ;
Utf8View utf8_view ( input ) ;
for ( auto it = utf8_view . begin ( ) ; ! it . done ( ) ; + + it ) {
if ( * it ! = ' % ' ) {
builder . append_code_point ( * it ) ;
} else if ( ! is_ascii_hex_digit ( it . peek ( 1 ) . value_or ( 0 ) ) | | ! is_ascii_hex_digit ( it . peek ( 2 ) . value_or ( 0 ) ) ) {
builder . append_code_point ( * it ) ;
} else {
+ + it ;
2021-06-01 15:18:08 -04:00
u8 byte = parse_ascii_hex_digit ( * it ) < < 4 ;
2021-05-25 07:50:03 -04:00
+ + it ;
2021-06-01 15:18:08 -04:00
byte + = parse_ascii_hex_digit ( * it ) ;
2021-05-25 07:50:03 -04:00
builder . append ( byte ) ;
}
}
2022-12-05 20:12:49 -05:00
return builder . to_deprecated_string ( ) ;
2021-05-25 07:50:03 -04:00
}
2019-08-10 11:27:56 -04:00
}