2020-03-11 19:27:43 +01:00
/*
2021-05-29 12:38:28 +02:00
* Copyright ( c ) 2020 , Stephan Unverwerth < s . unverwerth @ serenityos . org >
2022-01-16 23:51:28 +01:00
* Copyright ( c ) 2021 - 2022 , David Tuin < davidot @ serenityos . org >
2023-02-19 22:07:52 +01:00
* Copyright ( c ) 2023 , Andreas Kling < kling @ serenityos . org >
2020-03-11 19:27:43 +01:00
*
2021-04-22 01:24:48 -07:00
* SPDX - License - Identifier : BSD - 2 - Clause
2020-03-11 19:27:43 +01:00
*/
# pragma once
2022-02-15 22:34:59 -08:00
# include <AK/Assertions.h>
2020-10-08 10:49:08 -07:00
# include <AK/HashTable.h>
2020-03-18 11:23:53 +01:00
# include <AK/NonnullRefPtr.h>
2020-05-26 12:57:50 +01:00
# include <AK/StringBuilder.h>
2020-05-27 22:22:08 -07:00
# include <LibJS/AST.h>
# include <LibJS/Lexer.h>
2022-11-23 12:39:23 +01:00
# include <LibJS/ParserError.h>
2022-01-15 17:26:06 +01:00
# include <LibJS/Runtime/FunctionConstructor.h>
2020-12-28 20:45:22 +03:30
# include <LibJS/SourceRange.h>
2022-02-15 22:34:59 -08:00
# include <LibJS/Token.h>
# include <initializer_list>
2020-05-14 16:26:01 +01:00
# include <stdio.h>
2020-03-11 19:27:43 +01:00
namespace JS {
2020-03-12 23:02:41 +01:00
enum class Associativity {
Left ,
Right
} ;
2020-10-20 17:56:49 +01:00
struct FunctionNodeParseOptions {
2022-09-02 00:46:37 +02:00
enum : u16 {
2020-10-20 17:56:49 +01:00
CheckForFunctionAndName = 1 < < 0 ,
AllowSuperPropertyLookup = 1 < < 1 ,
AllowSuperConstructorCall = 1 < < 2 ,
2020-10-20 18:32:51 +01:00
IsGetterFunction = 1 < < 3 ,
IsSetterFunction = 1 < < 4 ,
2020-10-25 11:14:04 +00:00
IsArrowFunction = 1 < < 5 ,
2021-06-14 14:52:59 +04:30
IsGeneratorFunction = 1 < < 6 ,
2021-11-09 20:39:22 +02:00
IsAsyncFunction = 1 < < 7 ,
2022-09-02 00:46:37 +02:00
HasDefaultExportName = 1 < < 8 ,
2020-10-20 17:56:49 +01:00
} ;
} ;
2021-09-22 12:44:56 +02:00
class ScopePusher ;
2020-03-11 19:27:43 +01:00
class Parser {
public :
2022-04-10 00:55:45 +01:00
struct EvalInitialState {
bool in_eval_function_context { false } ;
bool allow_super_property_lookup { false } ;
bool allow_super_constructor_call { false } ;
bool in_class_field_initializer { false } ;
} ;
explicit Parser ( Lexer lexer , Program : : Type program_type = Program : : Type : : Script , Optional < EvalInitialState > initial_state_for_eval = { } ) ;
2020-03-11 19:27:43 +01:00
2021-06-19 20:13:53 -07:00
NonnullRefPtr < Program > parse_program ( bool starts_in_strict_mode = false ) ;
2020-03-11 19:27:43 +01:00
2020-03-19 11:52:56 +01:00
template < typename FunctionNodeType >
2022-09-02 00:46:37 +02:00
NonnullRefPtr < FunctionNodeType > parse_function_node ( u16 parse_options = FunctionNodeParseOptions : : CheckForFunctionAndName , Optional < Position > const & function_start = { } ) ;
2022-11-23 13:12:36 +01:00
Vector < FunctionParameter > parse_formal_parameters ( int & function_length , u16 parse_options = 0 ) ;
2021-09-18 01:11:32 +02:00
enum class AllowDuplicates {
Yes ,
No
} ;
enum class AllowMemberExpressions {
Yes ,
No
} ;
2023-02-19 22:07:52 +01:00
RefPtr < BindingPattern const > parse_binding_pattern ( AllowDuplicates is_var_declaration = AllowDuplicates : : No , AllowMemberExpressions allow_member_expressions = AllowMemberExpressions : : No ) ;
2020-03-19 11:52:56 +01:00
2021-06-14 15:46:41 +04:30
struct PrimaryExpressionParseResult {
2023-02-19 22:07:52 +01:00
NonnullRefPtr < Expression const > result ;
2021-06-14 15:46:41 +04:30
bool should_continue_parsing_as_expression { true } ;
} ;
2023-02-19 22:07:52 +01:00
NonnullRefPtr < Declaration const > parse_declaration ( ) ;
2021-07-25 01:01:22 +02:00
enum class AllowLabelledFunction {
No ,
Yes
} ;
2023-02-19 22:07:52 +01:00
NonnullRefPtr < Statement const > parse_statement ( AllowLabelledFunction allow_labelled_function = AllowLabelledFunction : : No ) ;
NonnullRefPtr < BlockStatement const > parse_block_statement ( ) ;
NonnullRefPtr < FunctionBody const > parse_function_body ( Vector < FunctionParameter > const & parameters , FunctionKind function_kind , bool & contains_direct_call_to_eval ) ;
NonnullRefPtr < ReturnStatement const > parse_return_statement ( ) ;
2022-12-20 22:09:57 +01:00
enum class IsForLoopVariableDeclaration {
No ,
Yes
} ;
2023-02-19 22:07:52 +01:00
NonnullRefPtr < VariableDeclaration const > parse_variable_declaration ( IsForLoopVariableDeclaration is_for_loop_variable_declaration = IsForLoopVariableDeclaration : : No ) ;
RefPtr < Identifier const > parse_lexical_binding ( ) ;
NonnullRefPtr < UsingDeclaration const > parse_using_declaration ( IsForLoopVariableDeclaration is_for_loop_variable_declaration = IsForLoopVariableDeclaration : : No ) ;
NonnullRefPtr < Statement const > parse_for_statement ( ) ;
2021-11-23 16:09:28 +01:00
enum class IsForAwaitLoop {
No ,
Yes
} ;
2022-02-15 22:34:59 -08:00
struct ForbiddenTokens {
ForbiddenTokens ( std : : initializer_list < TokenType > const & forbidden ) ;
ForbiddenTokens merge ( ForbiddenTokens other ) const ;
bool allows ( TokenType token ) const ;
ForbiddenTokens forbid ( std : : initializer_list < TokenType > const & forbidden ) const ;
private :
void forbid_tokens ( std : : initializer_list < TokenType > const & forbidden ) ;
bool m_forbid_in_token : 1 { false } ;
bool m_forbid_logical_tokens : 1 { false } ;
bool m_forbid_coalesce_token : 1 { false } ;
bool m_forbid_paren_open : 1 { false } ;
bool m_forbid_question_mark_period : 1 { false } ;
bool m_forbid_equals : 1 { false } ;
} ;
struct ExpressionResult {
2023-02-19 22:07:52 +01:00
template < typename T >
ExpressionResult ( NonnullRefPtr < T const > expression , ForbiddenTokens forbidden = { } )
: expression ( move ( expression ) )
, forbidden ( forbidden )
{
}
2022-02-15 22:34:59 -08:00
template < typename T >
ExpressionResult ( NonnullRefPtr < T > expression , ForbiddenTokens forbidden = { } )
2023-02-19 22:07:52 +01:00
: expression ( move ( expression ) )
2022-02-15 22:34:59 -08:00
, forbidden ( forbidden )
{
}
2023-02-19 22:07:52 +01:00
NonnullRefPtr < Expression const > expression ;
2022-02-15 22:34:59 -08:00
ForbiddenTokens forbidden ;
} ;
2023-02-19 22:07:52 +01:00
NonnullRefPtr < Statement const > parse_for_in_of_statement ( NonnullRefPtr < ASTNode const > lhs , IsForAwaitLoop is_await ) ;
NonnullRefPtr < IfStatement const > parse_if_statement ( ) ;
NonnullRefPtr < ThrowStatement const > parse_throw_statement ( ) ;
NonnullRefPtr < TryStatement const > parse_try_statement ( ) ;
NonnullRefPtr < CatchClause const > parse_catch_clause ( ) ;
NonnullRefPtr < SwitchStatement const > parse_switch_statement ( ) ;
NonnullRefPtr < SwitchCase const > parse_switch_case ( ) ;
NonnullRefPtr < BreakStatement const > parse_break_statement ( ) ;
NonnullRefPtr < ContinueStatement const > parse_continue_statement ( ) ;
NonnullRefPtr < DoWhileStatement const > parse_do_while_statement ( ) ;
NonnullRefPtr < WhileStatement const > parse_while_statement ( ) ;
NonnullRefPtr < WithStatement const > parse_with_statement ( ) ;
NonnullRefPtr < DebuggerStatement const > parse_debugger_statement ( ) ;
NonnullRefPtr < ConditionalExpression const > parse_conditional_expression ( NonnullRefPtr < Expression const > test , ForbiddenTokens ) ;
NonnullRefPtr < OptionalChain const > parse_optional_chain ( NonnullRefPtr < Expression const > base ) ;
NonnullRefPtr < Expression const > parse_expression ( int min_precedence , Associativity associate = Associativity : : Right , ForbiddenTokens forbidden = { } ) ;
2021-06-14 15:46:41 +04:30
PrimaryExpressionParseResult parse_primary_expression ( ) ;
2023-02-19 22:07:52 +01:00
NonnullRefPtr < Expression const > parse_unary_prefixed_expression ( ) ;
NonnullRefPtr < RegExpLiteral const > parse_regexp_literal ( ) ;
NonnullRefPtr < ObjectExpression const > parse_object_expression ( ) ;
NonnullRefPtr < ArrayExpression const > parse_array_expression ( ) ;
2022-08-17 02:04:27 +02:00
enum class StringLiteralType {
Normal ,
NonTaggedTemplate ,
TaggedTemplate
} ;
2023-02-19 22:07:52 +01:00
NonnullRefPtr < StringLiteral const > parse_string_literal ( Token const & token , StringLiteralType string_literal_type = StringLiteralType : : Normal , bool * contains_invalid_escape = nullptr ) ;
NonnullRefPtr < TemplateLiteral const > parse_template_literal ( bool is_tagged ) ;
ExpressionResult parse_secondary_expression ( NonnullRefPtr < Expression const > , int min_precedence , Associativity associate = Associativity : : Right , ForbiddenTokens forbidden = { } ) ;
NonnullRefPtr < Expression const > parse_call_expression ( NonnullRefPtr < Expression const > ) ;
NonnullRefPtr < NewExpression const > parse_new_expression ( ) ;
NonnullRefPtr < ClassDeclaration const > parse_class_declaration ( ) ;
NonnullRefPtr < ClassExpression const > parse_class_expression ( bool expect_class_name ) ;
NonnullRefPtr < YieldExpression const > parse_yield_expression ( ) ;
NonnullRefPtr < AwaitExpression const > parse_await_expression ( ) ;
NonnullRefPtr < Expression const > parse_property_key ( ) ;
NonnullRefPtr < AssignmentExpression const > parse_assignment_expression ( AssignmentOp , NonnullRefPtr < Expression const > lhs , int min_precedence , Associativity , ForbiddenTokens forbidden = { } ) ;
NonnullRefPtr < Identifier const > parse_identifier ( ) ;
NonnullRefPtr < ImportStatement const > parse_import_statement ( Program & program ) ;
NonnullRefPtr < ExportStatement const > parse_export_statement ( Program & program ) ;
RefPtr < FunctionExpression const > try_parse_arrow_function_expression ( bool expect_parens , bool is_async = false ) ;
RefPtr < LabelledStatement const > try_parse_labelled_statement ( AllowLabelledFunction allow_function ) ;
RefPtr < MetaProperty const > try_parse_new_target_expression ( ) ;
RefPtr < MetaProperty const > try_parse_import_meta_expression ( ) ;
NonnullRefPtr < ImportCall const > parse_import_call ( ) ;
2020-11-02 21:27:42 +00:00
2021-09-14 06:56:31 +04:30
Vector < CallExpression : : Argument > parse_arguments ( ) ;
2021-06-19 14:43:09 +02:00
bool has_errors ( ) const { return m_state . errors . size ( ) ; }
2022-11-23 12:39:23 +01:00
Vector < ParserError > const & errors ( ) const { return m_state . errors ; }
2021-07-19 17:56:21 +02:00
void print_errors ( bool print_hint = true ) const
2020-05-14 16:26:01 +01:00
{
2021-06-19 14:43:09 +02:00
for ( auto & error : m_state . errors ) {
2021-07-19 17:56:21 +02:00
if ( print_hint ) {
auto hint = error . source_location_hint ( m_state . lexer . source ( ) ) ;
if ( ! hint . is_empty ( ) )
warnln ( " {} " , hint ) ;
}
2022-12-06 01:12:49 +00:00
warnln ( " SyntaxError: {} " , error . to_deprecated_string ( ) ) ;
2020-12-06 14:50:39 +00:00
}
2020-05-14 16:26:01 +01:00
}
2020-03-11 19:27:43 +01:00
2021-04-11 22:41:51 +02:00
struct TokenMemoization {
bool try_parse_arrow_function_expression_failed ;
} ;
2022-01-15 17:26:06 +01:00
// Needs to mess with m_state, and we're not going to expose a non-const getter for that :^)
2022-08-21 20:38:35 +01:00
friend ThrowCompletionOr < ECMAScriptFunctionObject * > FunctionConstructor : : create_dynamic_function ( VM & , FunctionObject & , FunctionObject * , FunctionKind , MarkedVector < Value > const & ) ;
2022-01-15 17:26:06 +01:00
2023-07-07 23:14:03 +02:00
static Parser parse_function_body_from_string ( DeprecatedString const & body_string , u16 parse_options , Vector < FunctionParameter > const & parameters , FunctionKind kind , bool & contains_direct_call_to_eval ) ;
2020-03-11 19:27:43 +01:00
private :
2020-04-13 16:42:54 +02:00
friend class ScopePusher ;
2021-11-27 00:01:23 +01:00
void parse_script ( Program & program , bool starts_in_strict_mode ) ;
void parse_module ( Program & program ) ;
2020-04-13 16:42:54 +02:00
2020-03-12 23:02:41 +01:00
Associativity operator_associativity ( TokenType ) const ;
2020-03-11 19:27:43 +01:00
bool match_expression ( ) const ;
2020-03-14 20:45:51 +02:00
bool match_unary_prefixed_expression ( ) const ;
2022-02-15 22:34:59 -08:00
bool match_secondary_expression ( ForbiddenTokens forbidden = { } ) const ;
2020-03-11 19:27:43 +01:00
bool match_statement ( ) const ;
2021-08-14 17:42:30 +02:00
bool match_export_or_import ( ) const ;
2021-12-20 15:29:25 +01:00
bool match_assert_clause ( ) const ;
2022-12-20 22:09:57 +01:00
enum class AllowUsingDeclaration {
No ,
Yes
} ;
bool match_declaration ( AllowUsingDeclaration allow_using = AllowUsingDeclaration : : No ) const ;
2021-10-08 00:38:24 +02:00
bool try_match_let_declaration ( ) const ;
2022-12-20 22:09:57 +01:00
bool try_match_using_declaration ( ) const ;
2021-10-08 00:38:24 +02:00
bool match_variable_declaration ( ) const ;
2021-07-11 15:34:55 +04:30
bool match_identifier ( ) const ;
2022-12-23 01:45:29 +01:00
bool token_is_identifier ( Token const & ) const ;
2020-04-18 20:31:27 +02:00
bool match_identifier_name ( ) const ;
2020-06-08 13:31:21 -05:00
bool match_property_key ( ) const ;
2021-10-12 22:45:52 +02:00
bool is_private_identifier_valid ( ) const ;
2020-03-11 19:27:43 +01:00
bool match ( TokenType type ) const ;
bool done ( ) const ;
2022-04-01 20:58:27 +03:00
void expected ( char const * what ) ;
2022-12-04 18:02:33 +00:00
void syntax_error ( DeprecatedString const & message , Optional < Position > = { } ) ;
2020-03-11 19:27:43 +01:00
Token consume ( ) ;
2023-05-28 00:08:52 +02:00
Token consume_and_allow_division ( ) ;
2021-07-11 15:34:55 +04:30
Token consume_identifier ( ) ;
Token consume_identifier_reference ( ) ;
2020-03-11 19:27:43 +01:00
Token consume ( TokenType type ) ;
2020-10-19 18:01:28 +01:00
Token consume_and_validate_numeric_literal ( ) ;
2020-04-17 15:05:58 +02:00
void consume_or_insert_semicolon ( ) ;
2020-03-30 08:24:43 -05:00
void save_state ( ) ;
void load_state ( ) ;
2020-12-29 16:47:39 +03:30
void discard_saved_state ( ) ;
2020-11-02 21:03:19 +00:00
Position position ( ) const ;
2020-03-11 19:27:43 +01:00
2023-02-19 22:07:52 +01:00
RefPtr < BindingPattern const > synthesize_binding_pattern ( Expression const & expression ) ;
2021-09-18 01:11:32 +02:00
2022-01-16 23:51:28 +01:00
Token next_token ( size_t steps = 1 ) const ;
2021-08-28 17:04:37 +02:00
2023-01-08 19:23:00 -05:00
void check_identifier_name_for_assignment_validity ( DeprecatedFlyString const & , bool force_strict = false ) ;
2021-07-11 15:34:55 +04:30
2022-04-01 20:58:27 +03:00
bool try_parse_arrow_function_expression_failed_at_position ( Position const & ) const ;
void set_try_parse_arrow_function_expression_failed_at_position ( Position const & , bool ) ;
2021-04-11 22:41:51 +02:00
2021-09-18 23:02:50 +02:00
bool match_invalid_escaped_keyword ( ) const ;
2021-09-22 12:44:56 +02:00
bool parse_directive ( ScopeNode & body ) ;
void parse_statement_list ( ScopeNode & output_node , AllowLabelledFunction allow_labelled_functions = AllowLabelledFunction : : No ) ;
2022-01-16 23:51:28 +01:00
2023-01-08 19:23:00 -05:00
DeprecatedFlyString consume_string_value ( ) ;
2022-01-16 23:51:28 +01:00
ModuleRequest parse_module_request ( ) ;
2021-09-22 12:44:56 +02:00
2020-12-28 20:45:22 +03:30
struct RulePosition {
2020-12-29 08:42:02 +03:30
AK_MAKE_NONCOPYABLE ( RulePosition ) ;
AK_MAKE_NONMOVABLE ( RulePosition ) ;
public :
2020-12-28 20:45:22 +03:30
RulePosition ( Parser & parser , Position position )
: m_parser ( parser )
, m_position ( position )
{
2020-12-29 16:47:39 +03:30
m_parser . m_rule_starts . append ( position ) ;
2020-12-28 20:45:22 +03:30
}
~ RulePosition ( )
{
2020-12-29 16:47:39 +03:30
auto last = m_parser . m_rule_starts . take_last ( ) ;
2021-02-23 20:42:32 +01:00
VERIFY ( last . line = = m_position . line ) ;
VERIFY ( last . column = = m_position . column ) ;
2020-12-28 20:45:22 +03:30
}
2022-04-01 20:58:27 +03:00
Position const & position ( ) const { return m_position ; }
2020-12-28 20:45:22 +03:30
private :
Parser & m_parser ;
Position m_position ;
} ;
[ [ nodiscard ] ] RulePosition push_start ( ) { return { * this , position ( ) } ; }
2020-03-30 08:24:43 -05:00
struct ParserState {
2021-06-19 14:43:09 +02:00
Lexer lexer ;
Token current_token ;
2022-11-23 12:39:23 +01:00
Vector < ParserError > errors ;
2021-10-08 01:55:24 +02:00
ScopePusher * current_scope_pusher { nullptr } ;
2021-06-14 09:30:43 +02:00
2021-09-18 23:01:54 +02:00
HashMap < StringView , Optional < Position > > labels_in_scope ;
2022-11-27 02:24:38 +01:00
HashMap < size_t , Position > invalid_property_range_in_object_expression ;
2021-10-12 22:45:52 +02:00
HashTable < StringView > * referenced_private_names { nullptr } ;
2021-06-19 14:43:09 +02:00
bool strict_mode { false } ;
bool allow_super_property_lookup { false } ;
bool allow_super_constructor_call { false } ;
bool in_function_context { false } ;
2023-07-12 04:02:27 +02:00
bool initiated_by_eval { false } ;
2022-04-10 00:55:45 +01:00
bool in_eval_function_context { false } ; // This controls if we allow new.target or not. Note that eval("return") is not allowed, so we have to have a separate state variable for eval.
2021-08-21 11:31:36 +02:00
bool in_formal_parameter_context { false } ;
2023-07-12 04:02:27 +02:00
bool in_catch_parameter_context { false } ;
2021-06-19 14:43:09 +02:00
bool in_generator_function_context { false } ;
2021-11-26 23:50:32 +01:00
bool await_expression_is_valid { false } ;
2021-06-19 14:43:09 +02:00
bool in_arrow_function_context { false } ;
bool in_break_context { false } ;
bool in_continue_context { false } ;
bool string_legacy_octal_escape_sequence_in_scope { false } ;
2021-08-28 17:11:05 +02:00
bool in_class_field_initializer { false } ;
2021-11-09 22:52:21 +02:00
bool in_class_static_init_block { false } ;
LibJS: Add an optimization to avoid needless arguments object creation
This gives FunctionNode a "might need arguments object" boolean flag and
sets it based on the simplest possible heuristic for this: if we
encounter an identifier called "arguments" or "eval" up to the next
(nested) function declaration or expression, we won't need an arguments
object. Otherwise, we *might* need one - the final decision is made in
the FunctionDeclarationInstantiation AO.
Now, this is obviously not perfect. Even if you avoid eval, something
like `foo.arguments` will still trigger a false positive - but it's a
start and already massively cuts down on needlessly allocated objects,
especially in real-world code that is often minified, and so a full
"arguments" identifier will be an actual arguments object more often
than not.
To illustrate the actual impact of this change, here's the number of
allocated arguments objects during a full test-js run:
Before:
- Unmapped arguments objects: 78765
- Mapped arguments objects: 2455
After:
- Unmapped arguments objects: 18
- Mapped arguments objects: 37
This results in a ~5% speedup of test-js on my Linux host machine, and
about 3.5% on i686 Serenity in QEMU (warm runs, average of 5).
The following microbenchmark (calling an empty function 1M times) runs
25% faster on Linux and 45% on Serenity:
function foo() {}
for (var i = 0; i < 1_000_000; ++i)
foo();
test262 reports no changes in either direction, apart from a speedup :^)
2021-10-05 08:44:58 +01:00
bool function_might_need_arguments_object { false } ;
2020-03-30 08:24:43 -05:00
2021-08-14 17:30:37 +02:00
ParserState ( Lexer , Program : : Type ) ;
2020-03-30 08:24:43 -05:00
} ;
2021-04-11 22:41:51 +02:00
class PositionKeyTraits {
public :
2022-04-01 20:58:27 +03:00
static int hash ( Position const & position )
2021-04-11 22:41:51 +02:00
{
return int_hash ( position . line ) ^ int_hash ( position . column ) ;
}
2022-04-01 20:58:27 +03:00
static bool equals ( Position const & a , Position const & b )
2021-04-11 22:41:51 +02:00
{
return a . column = = b . column & & a . line = = b . line ;
}
} ;
2023-07-05 00:14:41 +02:00
NonnullRefPtr < Identifier const > create_identifier_and_register_in_current_scope ( SourceRange range , DeprecatedFlyString string ) ;
2023-02-19 22:07:52 +01:00
NonnullRefPtr < SourceCode const > m_source_code ;
2020-12-29 16:47:39 +03:30
Vector < Position > m_rule_starts ;
2021-06-19 14:43:09 +02:00
ParserState m_state ;
2023-01-08 19:23:00 -05:00
DeprecatedFlyString m_filename ;
2020-05-02 11:46:39 -07:00
Vector < ParserState > m_saved_state ;
2021-04-11 22:41:51 +02:00
HashMap < Position , TokenMemoization , PositionKeyTraits > m_token_memoizations ;
2021-08-14 17:30:37 +02:00
Program : : Type m_program_type ;
2020-03-11 19:27:43 +01:00
} ;
}