2020-05-24 00:14:23 +02:00
/*
* Copyright ( c ) 2020 , Andreas Kling < kling @ serenityos . org >
* All rights reserved .
*
* Redistribution and use in source and binary forms , with or without
* modification , are permitted provided that the following conditions are met :
*
* 1. Redistributions of source code must retain the above copyright notice , this
* list of conditions and the following disclaimer .
*
* 2. Redistributions in binary form must reproduce the above copyright notice ,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution .
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS " AS IS "
* AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR CONSEQUENTIAL
* DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS INTERRUPTION ) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT LIABILITY ,
* OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .
*/
2020-06-01 19:07:38 +02:00
//#define PARSER_DEBUG
2020-05-27 23:32:50 +02:00
2020-05-24 19:51:50 +02:00
# include <AK/Utf32View.h>
2020-05-24 20:29:01 +02:00
# include <LibWeb/DOM/Comment.h>
2020-05-24 00:14:23 +02:00
# include <LibWeb/DOM/Document.h>
# include <LibWeb/DOM/DocumentType.h>
# include <LibWeb/DOM/ElementFactory.h>
2020-05-27 23:32:50 +02:00
# include <LibWeb/DOM/Event.h>
2020-07-28 18:20:36 +02:00
# include <LibWeb/DOM/Text.h>
2020-10-18 13:45:28 +02:00
# include <LibWeb/DOM/Window.h>
2020-07-26 15:08:16 +02:00
# include <LibWeb/HTML/HTMLFormElement.h>
# include <LibWeb/HTML/HTMLHeadElement.h>
# include <LibWeb/HTML/HTMLScriptElement.h>
2020-08-19 22:30:33 +01:00
# include <LibWeb/HTML/HTMLTemplateElement.h>
2020-07-28 19:18:23 +02:00
# include <LibWeb/HTML/Parser/HTMLDocumentParser.h>
# include <LibWeb/HTML/Parser/HTMLToken.h>
2020-10-10 02:48:05 +01:00
# include <LibWeb/Namespace.h>
2020-10-12 01:51:28 +01:00
# include <LibWeb/SVG/TagNames.h>
2020-05-24 00:14:23 +02:00
2020-07-28 18:20:36 +02:00
namespace Web : : HTML {
2020-05-29 21:44:36 +02:00
# define PARSE_ERROR() \
do { \
2020-05-29 21:20:49 +02:00
dbg ( ) < < " Parse error! " < < __PRETTY_FUNCTION__ < < " @ " < < __LINE__ ; \
2020-05-25 20:02:27 +02:00
} while ( 0 )
2020-07-18 21:17:17 +01:00
static Vector < FlyString > s_quirks_public_ids = {
" +//Silmaril//dtd html Pro v0r11 19970101// " ,
" -//AS//DTD HTML 3.0 asWedit + extensions// " ,
" -//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions// " ,
" -//IETF//DTD HTML 2.0 Level 1// " ,
" -//IETF//DTD HTML 2.0 Level 2// " ,
" -//IETF//DTD HTML 2.0 Strict Level 1// " ,
" -//IETF//DTD HTML 2.0 Strict Level 2// " ,
" -//IETF//DTD HTML 2.0 Strict// " ,
" -//IETF//DTD HTML 2.0// " ,
" -//IETF//DTD HTML 2.1E// " ,
" -//IETF//DTD HTML 3.0// " ,
" -//IETF//DTD HTML 3.2 Final// " ,
" -//IETF//DTD HTML 3.2// " ,
" -//IETF//DTD HTML 3// " ,
" -//IETF//DTD HTML Level 0// " ,
" -//IETF//DTD HTML Level 1// " ,
" -//IETF//DTD HTML Level 2// " ,
" -//IETF//DTD HTML Level 3// " ,
" -//IETF//DTD HTML Strict Level 0// " ,
" -//IETF//DTD HTML Strict Level 1// " ,
" -//IETF//DTD HTML Strict Level 2// " ,
" -//IETF//DTD HTML Strict Level 3// " ,
" -//IETF//DTD HTML Strict// " ,
" -//IETF//DTD HTML// " ,
" -//Metrius//DTD Metrius Presentational// " ,
" -//Microsoft//DTD Internet Explorer 2.0 HTML Strict// " ,
" -//Microsoft//DTD Internet Explorer 2.0 HTML// " ,
" -//Microsoft//DTD Internet Explorer 2.0 Tables// " ,
" -//Microsoft//DTD Internet Explorer 3.0 HTML Strict// " ,
" -//Microsoft//DTD Internet Explorer 3.0 HTML// " ,
" -//Microsoft//DTD Internet Explorer 3.0 Tables// " ,
" -//Netscape Comm. Corp.//DTD HTML// " ,
" -//Netscape Comm. Corp.//DTD Strict HTML// " ,
" -//O'Reilly and Associates//DTD HTML 2.0// " ,
" -//O'Reilly and Associates//DTD HTML Extended 1.0// " ,
" -//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0// " ,
" -//SQ//DTD HTML 2.0 HoTMetaL + extensions// " ,
" -//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0// " ,
" -//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0// " ,
" -//Spyglass//DTD HTML 2.0 Extended// " ,
" -//Sun Microsystems Corp.//DTD HotJava HTML// " ,
" -//Sun Microsystems Corp.//DTD HotJava Strict HTML// " ,
" -//W3C//DTD HTML 3 1995-03-24// " ,
" -//W3C//DTD HTML 3.2 Draft// " ,
" -//W3C//DTD HTML 3.2 Final// " ,
" -//W3C//DTD HTML 3.2// " ,
" -//W3C//DTD HTML 3.2S Draft// " ,
" -//W3C//DTD HTML 4.0 Frameset// " ,
" -//W3C//DTD HTML 4.0 Transitional// " ,
" -//W3C//DTD HTML Experimental 19960712// " ,
" -//W3C//DTD HTML Experimental 970421// " ,
" -//W3C//DTD W3 HTML// " ,
" -//W3O//DTD W3 HTML 3.0// " ,
" -//WebTechs//DTD Mozilla HTML 2.0// " ,
" -//WebTechs//DTD Mozilla HTML// "
} ;
2020-07-26 19:37:56 +02:00
RefPtr < DOM : : Document > parse_html_document ( const StringView & data , const URL & url , const String & encoding )
2020-06-21 22:29:05 +02:00
{
HTMLDocumentParser parser ( data , encoding ) ;
parser . run ( url ) ;
return parser . document ( ) ;
}
2020-05-28 12:35:19 +02:00
HTMLDocumentParser : : HTMLDocumentParser ( const StringView & input , const String & encoding )
: m_tokenizer ( input , encoding )
2020-05-24 00:14:23 +02:00
{
2020-10-23 08:31:26 +02:00
m_document = DOM : : Document : : create ( ) ;
2020-05-24 00:14:23 +02:00
}
2020-07-26 19:37:56 +02:00
HTMLDocumentParser : : HTMLDocumentParser ( const StringView & input , const String & encoding , DOM : : Document & existing_document )
2020-07-24 21:24:11 +01:00
: m_tokenizer ( input , encoding )
, m_document ( existing_document )
{
}
2020-05-24 00:14:23 +02:00
HTMLDocumentParser : : ~ HTMLDocumentParser ( )
{
}
2020-05-24 22:00:46 +02:00
void HTMLDocumentParser : : run ( const URL & url )
2020-05-24 00:14:23 +02:00
{
2020-05-24 22:00:46 +02:00
m_document - > set_url ( url ) ;
2020-05-28 12:35:19 +02:00
m_document - > set_source ( m_tokenizer . source ( ) ) ;
2020-05-24 00:14:23 +02:00
for ( ; ; ) {
auto optional_token = m_tokenizer . next_token ( ) ;
if ( ! optional_token . has_value ( ) )
2020-05-27 23:32:50 +02:00
break ;
2020-05-24 00:14:23 +02:00
auto & token = optional_token . value ( ) ;
2020-05-27 23:32:50 +02:00
# ifdef PARSER_DEBUG
2020-05-24 00:14:23 +02:00
dbg ( ) < < " [ " < < insertion_mode_name ( ) < < " ] " < < token . to_string ( ) ;
2020-05-27 23:32:50 +02:00
# endif
2020-10-12 01:51:28 +01:00
// FIXME: If the adjusted current node is a MathML text integration point and the token is a start tag whose tag name is neither "mglyph" nor "malignmark"
// FIXME: If the adjusted current node is a MathML text integration point and the token is a character token
// FIXME: If the adjusted current node is a MathML annotation-xml element and the token is a start tag whose tag name is "svg"
// FIXME: If the adjusted current node is an HTML integration point and the token is a start tag
// FIXME: If the adjusted current node is an HTML integration point and the token is a character token
if ( m_stack_of_open_elements . is_empty ( )
| | adjusted_current_node ( ) . namespace_ ( ) = = Namespace : : HTML
| | token . is_end_of_file ( ) ) {
process_using_the_rules_for ( m_insertion_mode , token ) ;
} else {
process_using_the_rules_for_foreign_content ( token ) ;
}
2020-05-28 18:55:18 +02:00
if ( m_stop_parsing ) {
2020-07-06 10:57:16 -04:00
# ifdef PARSER_DEBUG
2020-07-21 19:03:05 +01:00
dbg ( ) < < " Stop parsing " < < ( m_parsing_fragment ? " fragment " : " " ) < < " ! :^) " ;
2020-07-06 10:57:16 -04:00
# endif
2020-05-28 18:55:18 +02:00
break ;
}
2020-05-24 19:51:50 +02:00
}
2020-05-27 23:32:50 +02:00
2020-06-03 21:53:08 +02:00
flush_character_insertions ( ) ;
2020-05-27 23:32:50 +02:00
// "The end"
2020-08-31 13:56:16 +01:00
m_document - > set_ready_state ( " interactive " ) ;
2020-05-30 12:26:15 +02:00
auto scripts_to_execute_when_parsing_has_finished = m_document - > take_scripts_to_execute_when_parsing_has_finished ( { } ) ;
for ( auto & script : scripts_to_execute_when_parsing_has_finished ) {
script . execute_script ( ) ;
}
2020-07-26 19:37:56 +02:00
m_document - > dispatch_event ( DOM : : Event : : create ( " DOMContentLoaded " ) ) ;
2020-05-30 12:26:15 +02:00
2020-10-18 13:45:28 +02:00
// FIXME: These are not in the right place, they should only fire once subresources are ready.
m_document - > dispatch_event ( DOM : : Event : : create ( " load " ) ) ;
m_document - > window ( ) . dispatch_event ( DOM : : Event : : create ( " load " ) ) ;
2020-05-30 12:26:15 +02:00
auto scripts_to_execute_as_soon_as_possible = m_document - > take_scripts_to_execute_as_soon_as_possible ( { } ) ;
for ( auto & script : scripts_to_execute_as_soon_as_possible ) {
script . execute_script ( ) ;
}
2020-08-31 13:56:16 +01:00
m_document - > set_ready_state ( " complete " ) ;
2020-05-24 19:51:50 +02:00
}
2020-05-24 00:14:23 +02:00
2020-05-24 19:51:50 +02:00
void HTMLDocumentParser : : process_using_the_rules_for ( InsertionMode mode , HTMLToken & token )
{
switch ( mode ) {
case InsertionMode : : Initial :
handle_initial ( token ) ;
break ;
case InsertionMode : : BeforeHTML :
handle_before_html ( token ) ;
break ;
case InsertionMode : : BeforeHead :
handle_before_head ( token ) ;
break ;
case InsertionMode : : InHead :
handle_in_head ( token ) ;
break ;
case InsertionMode : : InHeadNoscript :
handle_in_head_noscript ( token ) ;
break ;
case InsertionMode : : AfterHead :
handle_after_head ( token ) ;
break ;
case InsertionMode : : InBody :
handle_in_body ( token ) ;
break ;
case InsertionMode : : AfterBody :
handle_after_body ( token ) ;
break ;
case InsertionMode : : AfterAfterBody :
handle_after_after_body ( token ) ;
break ;
case InsertionMode : : Text :
handle_text ( token ) ;
break ;
2020-05-25 20:30:34 +02:00
case InsertionMode : : InTable :
handle_in_table ( token ) ;
break ;
2020-05-28 00:27:46 +02:00
case InsertionMode : : InTableBody :
handle_in_table_body ( token ) ;
break ;
case InsertionMode : : InRow :
handle_in_row ( token ) ;
break ;
case InsertionMode : : InCell :
handle_in_cell ( token ) ;
break ;
2020-05-30 17:57:41 +02:00
case InsertionMode : : InTableText :
handle_in_table_text ( token ) ;
break ;
2020-05-30 19:58:52 +02:00
case InsertionMode : : InSelectInTable :
handle_in_select_in_table ( token ) ;
break ;
case InsertionMode : : InSelect :
handle_in_select ( token ) ;
break ;
2020-06-13 05:09:54 +01:00
case InsertionMode : : InCaption :
handle_in_caption ( token ) ;
break ;
2020-06-13 06:22:18 +01:00
case InsertionMode : : InColumnGroup :
handle_in_column_group ( token ) ;
break ;
2020-06-21 06:58:03 +02:00
case InsertionMode : : InTemplate :
handle_in_template ( token ) ;
break ;
case InsertionMode : : InFrameset :
handle_in_frameset ( token ) ;
break ;
case InsertionMode : : AfterFrameset :
handle_after_frameset ( token ) ;
break ;
case InsertionMode : : AfterAfterFrameset :
handle_after_after_frameset ( token ) ;
break ;
2020-05-24 19:51:50 +02:00
default :
ASSERT_NOT_REACHED ( ) ;
2020-05-24 00:14:23 +02:00
}
}
2020-07-26 19:37:56 +02:00
DOM : : QuirksMode HTMLDocumentParser : : which_quirks_mode ( const HTMLToken & doctype_token ) const
2020-07-18 21:17:17 +01:00
{
if ( doctype_token . m_doctype . force_quirks )
2020-07-26 19:37:56 +02:00
return DOM : : QuirksMode : : Yes ;
2020-07-18 21:17:17 +01:00
// NOTE: The tokenizer puts the name into lower case for us.
if ( doctype_token . m_doctype . name . to_string ( ) ! = " html " )
2020-07-26 19:37:56 +02:00
return DOM : : QuirksMode : : Yes ;
2020-07-18 21:17:17 +01:00
auto public_identifier = doctype_token . m_doctype . public_identifier . to_string ( ) ;
auto system_identifier = doctype_token . m_doctype . system_identifier . to_string ( ) ;
if ( public_identifier . equals_ignoring_case ( " -//W3O//DTD W3 HTML Strict 3.0//EN// " ) )
2020-07-26 19:37:56 +02:00
return DOM : : QuirksMode : : Yes ;
2020-07-18 21:17:17 +01:00
if ( public_identifier . equals_ignoring_case ( " -/W3C/DTD HTML 4.0 Transitional/EN " ) )
2020-07-26 19:37:56 +02:00
return DOM : : QuirksMode : : Yes ;
2020-07-18 21:17:17 +01:00
if ( public_identifier . equals_ignoring_case ( " HTML " ) )
2020-07-26 19:37:56 +02:00
return DOM : : QuirksMode : : Yes ;
2020-07-18 21:17:17 +01:00
if ( system_identifier . equals_ignoring_case ( " http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd " ) )
2020-07-26 19:37:56 +02:00
return DOM : : QuirksMode : : Yes ;
2020-07-18 21:17:17 +01:00
for ( auto & public_id : s_quirks_public_ids ) {
if ( public_identifier . starts_with ( public_id , CaseSensitivity : : CaseInsensitive ) )
2020-07-26 19:37:56 +02:00
return DOM : : QuirksMode : : Yes ;
2020-07-18 21:17:17 +01:00
}
if ( doctype_token . m_doctype . missing_system_identifier ) {
if ( public_identifier . starts_with ( " -//W3C//DTD HTML 4.01 Frameset// " , CaseSensitivity : : CaseInsensitive ) )
2020-07-26 19:37:56 +02:00
return DOM : : QuirksMode : : Yes ;
2020-07-18 21:17:17 +01:00
if ( public_identifier . starts_with ( " -//W3C//DTD HTML 4.01 Transitional// " , CaseSensitivity : : CaseInsensitive ) )
2020-07-26 19:37:56 +02:00
return DOM : : QuirksMode : : Yes ;
2020-07-18 21:17:17 +01:00
}
if ( public_identifier . starts_with ( " -//W3C//DTD XHTML 1.0 Frameset// " , CaseSensitivity : : CaseInsensitive ) )
2020-07-26 19:37:56 +02:00
return DOM : : QuirksMode : : Limited ;
2020-07-18 21:17:17 +01:00
if ( public_identifier . starts_with ( " -//W3C//DTD XHTML 1.0 Transitional// " , CaseSensitivity : : CaseInsensitive ) )
2020-07-26 19:37:56 +02:00
return DOM : : QuirksMode : : Limited ;
2020-07-18 21:17:17 +01:00
if ( ! doctype_token . m_doctype . missing_system_identifier ) {
if ( public_identifier . starts_with ( " -//W3C//DTD HTML 4.01 Frameset// " , CaseSensitivity : : CaseInsensitive ) )
2020-07-26 19:37:56 +02:00
return DOM : : QuirksMode : : Limited ;
2020-07-18 21:17:17 +01:00
if ( public_identifier . starts_with ( " -//W3C//DTD HTML 4.01 Transitional// " , CaseSensitivity : : CaseInsensitive ) )
2020-07-26 19:37:56 +02:00
return DOM : : QuirksMode : : Limited ;
2020-07-18 21:17:17 +01:00
}
2020-07-26 19:37:56 +02:00
return DOM : : QuirksMode : : No ;
2020-07-18 21:17:17 +01:00
}
2020-05-24 00:14:23 +02:00
void HTMLDocumentParser : : handle_initial ( HTMLToken & token )
{
2020-05-27 01:49:40 +02:00
if ( token . is_character ( ) & & token . is_parser_whitespace ( ) ) {
return ;
}
if ( token . is_comment ( ) ) {
2020-07-26 19:37:56 +02:00
auto comment = adopt ( * new DOM : : Comment ( document ( ) , token . m_comment_or_character . data . to_string ( ) ) ) ;
2020-05-27 01:49:40 +02:00
document ( ) . append_child ( move ( comment ) ) ;
return ;
}
if ( token . is_doctype ( ) ) {
2020-07-26 19:37:56 +02:00
auto doctype = adopt ( * new DOM : : DocumentType ( document ( ) ) ) ;
2020-05-24 00:14:23 +02:00
doctype - > set_name ( token . m_doctype . name . to_string ( ) ) ;
2020-07-18 21:17:17 +01:00
doctype - > set_public_id ( token . m_doctype . public_identifier . to_string ( ) ) ;
doctype - > set_system_id ( token . m_doctype . system_identifier . to_string ( ) ) ;
2020-05-24 00:14:23 +02:00
document ( ) . append_child ( move ( doctype ) ) ;
2020-07-18 21:17:17 +01:00
document ( ) . set_quirks_mode ( which_quirks_mode ( token ) ) ;
2020-05-24 00:14:23 +02:00
m_insertion_mode = InsertionMode : : BeforeHTML ;
return ;
}
2020-05-28 00:21:31 +02:00
PARSE_ERROR ( ) ;
2020-07-26 19:37:56 +02:00
document ( ) . set_quirks_mode ( DOM : : QuirksMode : : Yes ) ;
2020-05-28 00:21:31 +02:00
m_insertion_mode = InsertionMode : : BeforeHTML ;
process_using_the_rules_for ( InsertionMode : : BeforeHTML , token ) ;
2020-05-24 00:14:23 +02:00
}
void HTMLDocumentParser : : handle_before_html ( HTMLToken & token )
{
2020-05-27 01:49:40 +02:00
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( token . is_comment ( ) ) {
2020-07-26 19:37:56 +02:00
auto comment = adopt ( * new DOM : : Comment ( document ( ) , token . m_comment_or_character . data . to_string ( ) ) ) ;
2020-05-27 01:49:40 +02:00
document ( ) . append_child ( move ( comment ) ) ;
return ;
}
2020-05-24 19:51:50 +02:00
if ( token . is_character ( ) & & token . is_parser_whitespace ( ) ) {
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) {
2020-10-10 02:48:05 +01:00
auto element = create_element_for ( token , Namespace : : HTML ) ;
2020-05-24 00:14:23 +02:00
document ( ) . append_child ( element ) ;
2020-05-24 19:24:36 +02:00
m_stack_of_open_elements . push ( move ( element ) ) ;
2020-05-24 00:14:23 +02:00
m_insertion_mode = InsertionMode : : BeforeHead ;
return ;
}
2020-05-27 01:49:40 +02:00
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : head , HTML : : TagNames : : body , HTML : : TagNames : : html , HTML : : TagNames : : br ) ) {
2020-05-27 01:49:40 +02:00
goto AnythingElse ;
}
if ( token . is_end_tag ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
AnythingElse :
2020-10-10 02:48:05 +01:00
auto element = create_element ( document ( ) , HTML : : TagNames : : html , Namespace : : HTML ) ;
2020-06-02 08:47:20 +02:00
document ( ) . append_child ( element ) ;
2020-05-27 01:49:40 +02:00
m_stack_of_open_elements . push ( element ) ;
// FIXME: If the Document is being loaded as part of navigation of a browsing context, then: run the application cache selection algorithm with no manifest, passing it the Document object.
m_insertion_mode = InsertionMode : : BeforeHead ;
process_using_the_rules_for ( InsertionMode : : BeforeHead , token ) ;
return ;
2020-05-24 00:14:23 +02:00
}
2020-07-26 19:37:56 +02:00
DOM : : Element & HTMLDocumentParser : : current_node ( )
2020-05-24 00:14:23 +02:00
{
2020-05-24 19:24:36 +02:00
return m_stack_of_open_elements . current_node ( ) ;
2020-05-24 00:14:23 +02:00
}
2020-10-12 01:51:28 +01:00
DOM : : Element & HTMLDocumentParser : : adjusted_current_node ( )
{
if ( m_parsing_fragment & & m_stack_of_open_elements . elements ( ) . size ( ) = = 1 )
return * m_context_element ;
return current_node ( ) ;
}
2020-07-26 19:37:56 +02:00
DOM : : Element & HTMLDocumentParser : : node_before_current_node ( )
2020-05-30 19:58:52 +02:00
{
return m_stack_of_open_elements . elements ( ) . at ( m_stack_of_open_elements . elements ( ) . size ( ) - 2 ) ;
}
2020-06-21 17:00:55 +02:00
HTMLDocumentParser : : AdjustedInsertionLocation HTMLDocumentParser : : find_appropriate_place_for_inserting_node ( )
2020-05-24 00:14:23 +02:00
{
2020-05-24 19:24:36 +02:00
auto & target = current_node ( ) ;
2020-08-19 22:30:33 +01:00
HTMLDocumentParser : : AdjustedInsertionLocation adjusted_insertion_location ;
2020-07-23 18:18:13 +02:00
if ( m_foster_parenting & & target . local_name ( ) . is_one_of ( HTML : : TagNames : : table , HTML : : TagNames : : tbody , HTML : : TagNames : : tfoot , HTML : : TagNames : : thead , HTML : : TagNames : : tr ) ) {
2020-08-19 22:30:33 +01:00
auto last_template = m_stack_of_open_elements . last_element_with_tag_name ( HTML : : TagNames : : template_ ) ;
2020-06-21 17:00:55 +02:00
auto last_table = m_stack_of_open_elements . last_element_with_tag_name ( HTML : : TagNames : : table ) ;
2020-08-19 22:30:33 +01:00
if ( last_template . element & & ( ! last_table . element | | last_template . index > last_table . index ) ) {
// This returns the template content, so no need to check the parent is a template.
return { downcast < HTMLTemplateElement > ( last_template . element ) - > content ( ) , nullptr } ;
}
if ( ! last_table . element ) {
2020-07-21 19:03:05 +01:00
ASSERT ( m_parsing_fragment ) ;
2020-08-19 22:30:33 +01:00
// Guaranteed not to be a template element (it will be the html element),
// so no need to check the parent is a template.
2020-06-21 17:00:55 +02:00
return { m_stack_of_open_elements . elements ( ) . first ( ) , nullptr } ;
2020-07-21 19:03:05 +01:00
}
2020-08-19 22:30:33 +01:00
if ( last_table . element - > parent_node ( ) )
adjusted_insertion_location = { last_table . element - > parent_node ( ) , last_table . element } ;
else
adjusted_insertion_location = { m_stack_of_open_elements . element_before ( * last_table . element ) , nullptr } ;
} else {
adjusted_insertion_location = { target , nullptr } ;
2020-06-21 17:00:55 +02:00
}
2020-08-19 22:30:33 +01:00
if ( is < HTMLTemplateElement > ( * adjusted_insertion_location . parent ) )
return { downcast < HTMLTemplateElement > ( * adjusted_insertion_location . parent ) . content ( ) , nullptr } ;
return adjusted_insertion_location ;
2020-05-24 00:14:23 +02:00
}
2020-10-10 02:48:05 +01:00
NonnullRefPtr < DOM : : Element > HTMLDocumentParser : : create_element_for ( const HTMLToken & token , const FlyString & namespace_ )
2020-05-24 00:14:23 +02:00
{
2020-10-10 02:48:05 +01:00
auto element = create_element ( document ( ) , token . tag_name ( ) , namespace_ ) ;
2020-05-24 00:14:23 +02:00
for ( auto & attribute : token . m_tag . attributes ) {
2020-06-21 06:58:03 +02:00
element - > set_attribute ( attribute . local_name_builder . to_string ( ) , attribute . value_builder . to_string ( ) ) ;
2020-05-24 00:14:23 +02:00
}
return element ;
}
2020-10-10 02:48:05 +01:00
RefPtr < DOM : : Element > HTMLDocumentParser : : insert_foreign_element ( const HTMLToken & token , const FlyString & namespace_ )
2020-05-24 00:14:23 +02:00
{
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node ( ) ;
2020-10-10 02:48:05 +01:00
auto element = create_element_for ( token , namespace_ ) ;
2020-05-24 00:14:23 +02:00
// FIXME: Check if it's possible to insert `element` at `adjusted_insertion_location`
2020-06-21 17:00:55 +02:00
adjusted_insertion_location . parent - > insert_before ( element , adjusted_insertion_location . insert_before_sibling ) ;
2020-05-24 19:24:36 +02:00
m_stack_of_open_elements . push ( element ) ;
2020-05-24 00:14:23 +02:00
return element ;
}
2020-10-10 02:48:05 +01:00
RefPtr < DOM : : Element > HTMLDocumentParser : : insert_html_element ( const HTMLToken & token )
{
return insert_foreign_element ( token , Namespace : : HTML ) ;
}
2020-05-24 00:14:23 +02:00
void HTMLDocumentParser : : handle_before_head ( HTMLToken & token )
{
2020-05-24 19:51:50 +02:00
if ( token . is_character ( ) & & token . is_parser_whitespace ( ) ) {
return ;
}
2020-05-27 01:49:40 +02:00
if ( token . is_comment ( ) ) {
insert_comment ( token ) ;
return ;
}
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) {
2020-05-27 01:49:40 +02:00
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : head ) {
2020-05-24 00:14:23 +02:00
auto element = insert_html_element ( token ) ;
2020-07-26 17:16:18 +02:00
m_head_element = downcast < HTMLHeadElement > ( * element ) ;
2020-05-24 00:14:23 +02:00
m_insertion_mode = InsertionMode : : InHead ;
return ;
}
2020-05-27 01:49:40 +02:00
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : head , HTML : : TagNames : : body , HTML : : TagNames : : html , HTML : : TagNames : : br ) ) {
2020-05-27 01:49:40 +02:00
goto AnythingElse ;
}
if ( token . is_end_tag ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
AnythingElse :
2020-07-26 17:16:18 +02:00
m_head_element = downcast < HTMLHeadElement > ( * insert_html_element ( HTMLToken : : make_start_tag ( HTML : : TagNames : : head ) ) ) ;
2020-05-27 01:49:40 +02:00
m_insertion_mode = InsertionMode : : InHead ;
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
2020-05-24 00:14:23 +02:00
}
2020-05-24 20:29:01 +02:00
void HTMLDocumentParser : : insert_comment ( HTMLToken & token )
{
auto data = token . m_comment_or_character . data . to_string ( ) ;
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node ( ) ;
2020-07-26 19:37:56 +02:00
adjusted_insertion_location . parent - > insert_before ( adopt ( * new DOM : : Comment ( document ( ) , data ) ) , adjusted_insertion_location . insert_before_sibling ) ;
2020-05-24 20:29:01 +02:00
}
2020-05-24 00:14:23 +02:00
void HTMLDocumentParser : : handle_in_head ( HTMLToken & token )
{
2020-05-24 20:24:43 +02:00
if ( token . is_parser_whitespace ( ) ) {
2020-08-05 16:31:20 -04:00
insert_character ( token . code_point ( ) ) ;
2020-05-24 20:24:43 +02:00
return ;
}
2020-05-24 20:29:01 +02:00
if ( token . is_comment ( ) ) {
insert_comment ( token ) ;
return ;
}
2020-05-25 20:16:48 +02:00
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) {
2020-05-25 20:16:48 +02:00
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : base , HTML : : TagNames : : basefont , HTML : : TagNames : : bgsound , HTML : : TagNames : : link ) ) {
2020-05-25 20:16:48 +02:00
insert_html_element ( token ) ;
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : meta ) {
2020-05-30 12:28:12 +02:00
auto element = insert_html_element ( token ) ;
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : title ) {
2020-05-24 20:24:43 +02:00
insert_html_element ( token ) ;
m_tokenizer . switch_to ( { } , HTMLTokenizer : : State : : RCDATA ) ;
m_original_insertion_mode = m_insertion_mode ;
m_insertion_mode = InsertionMode : : Text ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & ( ( token . tag_name ( ) = = HTML : : TagNames : : noscript & & m_scripting_enabled ) | | token . tag_name ( ) = = HTML : : TagNames : : noframes | | token . tag_name ( ) = = HTML : : TagNames : : style ) ) {
2020-05-24 20:36:43 +02:00
parse_generic_raw_text_element ( token ) ;
return ;
}
2020-06-21 06:58:03 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : noscript & & ! m_scripting_enabled ) {
insert_html_element ( token ) ;
m_insertion_mode = InsertionMode : : InHeadNoscript ;
2020-06-21 17:00:55 +02:00
return ;
2020-06-21 06:58:03 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : script ) {
2020-05-24 22:00:46 +02:00
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node ( ) ;
2020-10-10 02:48:05 +01:00
auto element = create_element_for ( token , Namespace : : HTML ) ;
2020-07-26 17:16:18 +02:00
auto & script_element = downcast < HTMLScriptElement > ( * element ) ;
2020-05-24 22:00:46 +02:00
script_element . set_parser_document ( { } , document ( ) ) ;
script_element . set_non_blocking ( { } , false ) ;
if ( m_parsing_fragment ) {
TODO ( ) ;
}
if ( m_invoked_via_document_write ) {
TODO ( ) ;
}
2020-06-21 17:00:55 +02:00
adjusted_insertion_location . parent - > insert_before ( element , adjusted_insertion_location . insert_before_sibling , false ) ;
2020-05-24 22:00:46 +02:00
m_stack_of_open_elements . push ( element ) ;
m_tokenizer . switch_to ( { } , HTMLTokenizer : : State : : ScriptData ) ;
m_original_insertion_mode = m_insertion_mode ;
m_insertion_mode = InsertionMode : : Text ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : head ) {
2020-05-24 19:24:36 +02:00
m_stack_of_open_elements . pop ( ) ;
2020-05-24 00:14:23 +02:00
m_insertion_mode = InsertionMode : : AfterHead ;
return ;
}
2020-05-30 12:28:12 +02:00
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : body , HTML : : TagNames : : html , HTML : : TagNames : : br ) ) {
2020-06-21 06:58:03 +02:00
goto AnythingElse ;
2020-05-30 12:28:12 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : template_ ) {
2020-05-30 19:23:04 +02:00
insert_html_element ( token ) ;
2020-08-19 22:30:33 +01:00
m_list_of_active_formatting_elements . add_marker ( ) ;
m_frameset_ok = false ;
m_insertion_mode = InsertionMode : : InTemplate ;
m_stack_of_template_insertion_modes . append ( InsertionMode : : InTemplate ) ;
2020-05-30 19:23:04 +02:00
return ;
2020-05-30 12:28:12 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : template_ ) {
2020-08-19 22:30:33 +01:00
if ( ! m_stack_of_open_elements . contains ( HTML : : TagNames : : template_ ) ) {
PARSE_ERROR ( ) ;
return ;
}
generate_all_implied_end_tags_thoroughly ( ) ;
if ( current_node ( ) . local_name ( ) ! = HTML : : TagNames : : template_ )
PARSE_ERROR ( ) ;
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : template_ ) ;
m_list_of_active_formatting_elements . clear_up_to_the_last_marker ( ) ;
m_stack_of_template_insertion_modes . take_last ( ) ;
reset_the_insertion_mode_appropriately ( ) ;
2020-05-30 19:23:04 +02:00
return ;
2020-05-30 12:28:12 +02:00
}
2020-06-07 23:53:16 +02:00
if ( ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : head ) | | token . is_end_tag ( ) ) {
2020-05-30 12:28:12 +02:00
PARSE_ERROR ( ) ;
return ;
}
2020-06-21 06:58:03 +02:00
AnythingElse :
2020-05-30 12:28:12 +02:00
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = InsertionMode : : AfterHead ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
2020-05-24 00:14:23 +02:00
}
2020-06-21 06:58:03 +02:00
void HTMLDocumentParser : : handle_in_head_noscript ( HTMLToken & token )
2020-05-24 00:14:23 +02:00
{
2020-06-21 06:58:03 +02:00
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : noscript ) {
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = InsertionMode : : InHead ;
return ;
}
if ( token . is_parser_whitespace ( ) | | token . is_comment ( ) | | ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : basefont , HTML : : TagNames : : bgsound , HTML : : TagNames : : link , HTML : : TagNames : : meta , HTML : : TagNames : : noframes , HTML : : TagNames : : style ) ) ) {
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : br ) {
goto AnythingElse ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : head , HTML : : TagNames : : noscript ) ) {
PARSE_ERROR ( ) ;
return ;
}
AnythingElse :
PARSE_ERROR ( ) ;
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = InsertionMode : : InHead ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
2020-05-24 00:14:23 +02:00
}
2020-05-24 20:36:43 +02:00
void HTMLDocumentParser : : parse_generic_raw_text_element ( HTMLToken & token )
{
insert_html_element ( token ) ;
m_tokenizer . switch_to ( { } , HTMLTokenizer : : State : : RAWTEXT ) ;
m_original_insertion_mode = m_insertion_mode ;
m_insertion_mode = InsertionMode : : Text ;
}
2020-07-26 19:37:56 +02:00
DOM : : Text * HTMLDocumentParser : : find_character_insertion_node ( )
2020-05-24 19:51:50 +02:00
{
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node ( ) ;
2020-06-21 17:00:55 +02:00
if ( adjusted_insertion_location . insert_before_sibling ) {
TODO ( ) ;
}
if ( adjusted_insertion_location . parent - > is_document ( ) )
2020-06-03 21:53:08 +02:00
return nullptr ;
2020-06-21 17:00:55 +02:00
if ( adjusted_insertion_location . parent - > last_child ( ) & & adjusted_insertion_location . parent - > last_child ( ) - > is_text ( ) )
2020-07-26 19:37:56 +02:00
return downcast < DOM : : Text > ( adjusted_insertion_location . parent - > last_child ( ) ) ;
auto new_text_node = adopt ( * new DOM : : Text ( document ( ) , " " ) ) ;
2020-06-21 17:00:55 +02:00
adjusted_insertion_location . parent - > append_child ( new_text_node ) ;
2020-06-03 21:53:08 +02:00
return new_text_node ;
}
void HTMLDocumentParser : : flush_character_insertions ( )
{
if ( m_character_insertion_builder . is_empty ( ) )
2020-05-24 19:51:50 +02:00
return ;
2020-06-03 21:53:08 +02:00
m_character_insertion_node - > set_data ( m_character_insertion_builder . to_string ( ) ) ;
2020-06-03 22:11:54 +02:00
m_character_insertion_node - > parent ( ) - > children_changed ( ) ;
2020-06-03 21:53:08 +02:00
m_character_insertion_builder . clear ( ) ;
}
void HTMLDocumentParser : : insert_character ( u32 data )
{
auto node = find_character_insertion_node ( ) ;
if ( node = = m_character_insertion_node ) {
m_character_insertion_builder . append ( Utf32View { & data , 1 } ) ;
2020-05-24 19:51:50 +02:00
return ;
}
2020-06-03 21:53:08 +02:00
if ( ! m_character_insertion_node ) {
m_character_insertion_node = node ;
m_character_insertion_builder . append ( Utf32View { & data , 1 } ) ;
return ;
}
flush_character_insertions ( ) ;
m_character_insertion_node = node ;
m_character_insertion_builder . append ( Utf32View { & data , 1 } ) ;
2020-05-24 19:51:50 +02:00
}
2020-05-24 00:14:23 +02:00
void HTMLDocumentParser : : handle_after_head ( HTMLToken & token )
{
2020-05-31 19:27:51 +02:00
if ( token . is_character ( ) & & token . is_parser_whitespace ( ) ) {
2020-08-05 16:31:20 -04:00
insert_character ( token . code_point ( ) ) ;
2020-05-31 19:27:51 +02:00
return ;
2020-05-24 00:14:23 +02:00
}
if ( token . is_comment ( ) ) {
2020-05-31 19:27:51 +02:00
insert_comment ( token ) ;
return ;
2020-05-24 00:14:23 +02:00
}
if ( token . is_doctype ( ) ) {
2020-05-31 19:27:51 +02:00
PARSE_ERROR ( ) ;
return ;
2020-05-24 00:14:23 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) {
2020-05-31 19:27:51 +02:00
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
2020-05-24 00:14:23 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : body ) {
2020-05-24 00:49:22 +02:00
insert_html_element ( token ) ;
m_frameset_ok = false ;
m_insertion_mode = InsertionMode : : InBody ;
return ;
2020-05-24 00:14:23 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : frameset ) {
2020-05-31 19:27:51 +02:00
insert_html_element ( token ) ;
m_insertion_mode = InsertionMode : : InFrameset ;
return ;
2020-05-24 00:14:23 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : base , HTML : : TagNames : : basefont , HTML : : TagNames : : bgsound , HTML : : TagNames : : link , HTML : : TagNames : : meta , HTML : : TagNames : : noframes , HTML : : TagNames : : script , HTML : : TagNames : : style , HTML : : TagNames : : template_ , HTML : : TagNames : : title ) ) {
2020-05-30 19:23:04 +02:00
PARSE_ERROR ( ) ;
m_stack_of_open_elements . push ( * m_head_element ) ;
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
m_stack_of_open_elements . elements ( ) . remove_first_matching ( [ & ] ( auto & entry ) {
return entry . ptr ( ) = = m_head_element ;
} ) ;
return ;
2020-05-24 00:14:23 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : template_ ) {
2020-06-21 06:58:03 +02:00
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
2020-06-21 17:00:55 +02:00
return ;
2020-05-24 00:14:23 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : body , HTML : : TagNames : : html , HTML : : TagNames : : br ) ) {
2020-05-24 00:14:23 +02:00
goto AnythingElse ;
}
2020-06-07 23:53:16 +02:00
if ( ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : head ) | | token . is_end_tag ( ) ) {
2020-05-31 19:27:51 +02:00
PARSE_ERROR ( ) ;
return ;
2020-05-24 00:14:23 +02:00
}
AnythingElse :
2020-07-23 17:30:03 +02:00
insert_html_element ( HTMLToken : : make_start_tag ( HTML : : TagNames : : body ) ) ;
2020-05-24 00:14:23 +02:00
m_insertion_mode = InsertionMode : : InBody ;
2020-05-31 19:27:51 +02:00
process_using_the_rules_for ( m_insertion_mode , token ) ;
2020-05-24 00:14:23 +02:00
}
2020-05-24 22:21:25 +02:00
void HTMLDocumentParser : : generate_implied_end_tags ( const FlyString & exception )
2020-05-24 00:14:23 +02:00
{
2020-07-23 18:18:13 +02:00
while ( current_node ( ) . local_name ( ) ! = exception & & current_node ( ) . local_name ( ) . is_one_of ( HTML : : TagNames : : dd , HTML : : TagNames : : dt , HTML : : TagNames : : li , HTML : : TagNames : : optgroup , HTML : : TagNames : : option , HTML : : TagNames : : p , HTML : : TagNames : : rb , HTML : : TagNames : : rp , HTML : : TagNames : : rt , HTML : : TagNames : : rtc ) )
2020-05-24 19:24:36 +02:00
m_stack_of_open_elements . pop ( ) ;
2020-05-24 00:49:22 +02:00
}
2020-08-19 22:30:33 +01:00
void HTMLDocumentParser : : generate_all_implied_end_tags_thoroughly ( )
{
while ( current_node ( ) . local_name ( ) . is_one_of ( HTML : : TagNames : : caption , HTML : : TagNames : : colgroup , HTML : : TagNames : : dd , HTML : : TagNames : : dt , HTML : : TagNames : : li , HTML : : TagNames : : optgroup , HTML : : TagNames : : option , HTML : : TagNames : : p , HTML : : TagNames : : rb , HTML : : TagNames : : rp , HTML : : TagNames : : rt , HTML : : TagNames : : rtc , HTML : : TagNames : : tbody , HTML : : TagNames : : td , HTML : : TagNames : : tfoot , HTML : : TagNames : : th , HTML : : TagNames : : thead , HTML : : TagNames : : tr ) )
m_stack_of_open_elements . pop ( ) ;
}
2020-05-24 22:21:25 +02:00
void HTMLDocumentParser : : close_a_p_element ( )
{
2020-06-07 23:53:16 +02:00
generate_implied_end_tags ( HTML : : TagNames : : p ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = HTML : : TagNames : : p ) {
2020-05-25 20:02:27 +02:00
PARSE_ERROR ( ) ;
2020-05-24 22:21:25 +02:00
}
2020-06-07 23:53:16 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : p ) ;
2020-05-24 22:21:25 +02:00
}
2020-05-24 00:49:22 +02:00
void HTMLDocumentParser : : handle_after_body ( HTMLToken & token )
{
2020-05-24 19:51:50 +02:00
if ( token . is_character ( ) & & token . is_parser_whitespace ( ) ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
2020-05-28 18:52:32 +02:00
if ( token . is_comment ( ) ) {
2020-06-21 06:58:03 +02:00
auto data = token . m_comment_or_character . data . to_string ( ) ;
auto & insertion_location = m_stack_of_open_elements . first ( ) ;
2020-07-26 19:37:56 +02:00
insertion_location . append_child ( adopt ( * new DOM : : Comment ( document ( ) , data ) ) ) ;
2020-06-21 06:58:03 +02:00
return ;
2020-05-28 18:52:32 +02:00
}
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) {
2020-05-28 18:52:32 +02:00
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) {
2020-05-24 00:49:22 +02:00
if ( m_parsing_fragment ) {
2020-07-21 19:03:05 +01:00
PARSE_ERROR ( ) ;
return ;
2020-05-24 00:49:22 +02:00
}
m_insertion_mode = InsertionMode : : AfterAfterBody ;
return ;
}
2020-05-28 18:52:32 +02:00
2020-06-21 06:58:03 +02:00
if ( token . is_end_of_file ( ) ) {
stop_parsing ( ) ;
return ;
}
2020-05-28 18:52:32 +02:00
PARSE_ERROR ( ) ;
m_insertion_mode = InsertionMode : : InBody ;
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
2020-05-24 00:49:22 +02:00
}
void HTMLDocumentParser : : handle_after_after_body ( HTMLToken & token )
{
2020-06-01 12:38:17 +02:00
if ( token . is_comment ( ) ) {
2020-07-26 19:37:56 +02:00
auto comment = adopt ( * new DOM : : Comment ( document ( ) , token . m_comment_or_character . data . to_string ( ) ) ) ;
2020-06-01 12:38:17 +02:00
document ( ) . append_child ( move ( comment ) ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_doctype ( ) | | token . is_parser_whitespace ( ) | | ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) ) {
2020-05-24 19:51:50 +02:00
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
2020-05-24 00:49:22 +02:00
if ( token . is_end_of_file ( ) ) {
2020-05-28 18:55:18 +02:00
stop_parsing ( ) ;
2020-05-24 00:49:22 +02:00
return ;
}
2020-06-01 12:38:17 +02:00
PARSE_ERROR ( ) ;
m_insertion_mode = InsertionMode : : InBody ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
2020-05-24 00:49:22 +02:00
}
2020-05-24 19:51:50 +02:00
void HTMLDocumentParser : : reconstruct_the_active_formatting_elements ( )
{
2020-05-24 22:39:59 +02:00
// FIXME: This needs to care about "markers"
2020-05-24 19:51:50 +02:00
if ( m_list_of_active_formatting_elements . is_empty ( ) )
return ;
2020-05-28 00:27:46 +02:00
if ( m_list_of_active_formatting_elements . entries ( ) . last ( ) . is_marker ( ) )
return ;
2020-05-27 23:22:42 +02:00
if ( m_stack_of_open_elements . contains ( * m_list_of_active_formatting_elements . entries ( ) . last ( ) . element ) )
2020-05-24 22:39:59 +02:00
return ;
2020-05-27 23:22:42 +02:00
ssize_t index = m_list_of_active_formatting_elements . entries ( ) . size ( ) - 1 ;
2020-07-26 19:37:56 +02:00
RefPtr < DOM : : Element > entry = m_list_of_active_formatting_elements . entries ( ) . at ( index ) . element ;
2020-05-27 23:22:42 +02:00
ASSERT ( entry ) ;
2020-05-24 22:39:59 +02:00
Rewind :
2020-05-26 21:39:28 +02:00
if ( index = = 0 ) {
2020-05-24 22:39:59 +02:00
goto Create ;
}
- - index ;
2020-05-27 23:22:42 +02:00
entry = m_list_of_active_formatting_elements . entries ( ) . at ( index ) . element ;
ASSERT ( entry ) ;
2020-05-24 22:39:59 +02:00
if ( ! m_stack_of_open_elements . contains ( * entry ) )
goto Rewind ;
Advance :
+ + index ;
2020-05-27 23:22:42 +02:00
entry = m_list_of_active_formatting_elements . entries ( ) . at ( index ) . element ;
ASSERT ( entry ) ;
2020-05-24 22:39:59 +02:00
Create :
// FIXME: Hold on to the real token!
2020-07-23 18:18:13 +02:00
auto new_element = insert_html_element ( HTMLToken : : make_start_tag ( entry - > local_name ( ) ) ) ;
2020-05-24 22:39:59 +02:00
2020-05-27 23:22:42 +02:00
m_list_of_active_formatting_elements . entries ( ) . at ( index ) . element = * new_element ;
2020-05-24 22:39:59 +02:00
2020-05-27 23:22:42 +02:00
if ( index ! = ( ssize_t ) m_list_of_active_formatting_elements . entries ( ) . size ( ) - 1 )
2020-05-24 22:39:59 +02:00
goto Advance ;
2020-05-24 19:51:50 +02:00
}
2020-05-30 16:22:25 +02:00
HTMLDocumentParser : : AdoptionAgencyAlgorithmOutcome HTMLDocumentParser : : run_the_adoption_agency_algorithm ( HTMLToken & token )
2020-05-27 23:22:42 +02:00
{
auto subject = token . tag_name ( ) ;
// If the current node is an HTML element whose tag name is subject,
// and the current node is not in the list of active formatting elements,
// then pop the current node off the stack of open elements, and return.
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) = = subject & & ! m_list_of_active_formatting_elements . contains ( current_node ( ) ) ) {
2020-05-27 23:22:42 +02:00
m_stack_of_open_elements . pop ( ) ;
2020-05-30 16:22:25 +02:00
return AdoptionAgencyAlgorithmOutcome : : DoNothing ;
2020-05-27 23:22:42 +02:00
}
size_t outer_loop_counter = 0 ;
2020-05-27 23:32:50 +02:00
//OuterLoop:
2020-05-27 23:22:42 +02:00
if ( outer_loop_counter > = 8 )
2020-05-30 16:22:25 +02:00
return AdoptionAgencyAlgorithmOutcome : : DoNothing ;
2020-05-27 23:22:42 +02:00
+ + outer_loop_counter ;
auto formatting_element = m_list_of_active_formatting_elements . last_element_with_tag_name_before_marker ( subject ) ;
2020-05-30 16:22:25 +02:00
if ( ! formatting_element )
return AdoptionAgencyAlgorithmOutcome : : RunAnyOtherEndTagSteps ;
2020-05-27 23:22:42 +02:00
if ( ! m_stack_of_open_elements . contains ( * formatting_element ) ) {
PARSE_ERROR ( ) ;
// FIXME: If formatting element is not in the stack of open elements,
// then this is a parse error; remove the element from the list, and return.
TODO ( ) ;
}
if ( ! m_stack_of_open_elements . has_in_scope ( * formatting_element ) ) {
PARSE_ERROR ( ) ;
2020-05-30 16:22:25 +02:00
return AdoptionAgencyAlgorithmOutcome : : DoNothing ;
2020-05-27 23:22:42 +02:00
}
if ( formatting_element ! = & current_node ( ) ) {
PARSE_ERROR ( ) ;
}
2020-07-26 19:37:56 +02:00
RefPtr < DOM : : Element > furthest_block = m_stack_of_open_elements . topmost_special_node_below ( * formatting_element ) ;
2020-05-27 23:22:42 +02:00
if ( ! furthest_block ) {
while ( & current_node ( ) ! = formatting_element )
m_stack_of_open_elements . pop ( ) ;
m_stack_of_open_elements . pop ( ) ;
m_list_of_active_formatting_elements . remove ( * formatting_element ) ;
2020-05-30 16:22:25 +02:00
return AdoptionAgencyAlgorithmOutcome : : DoNothing ;
2020-05-27 23:22:42 +02:00
}
// FIXME: Implement the rest of the AAA :^)
TODO ( ) ;
}
2020-10-12 01:51:28 +01:00
bool HTMLDocumentParser : : is_special_tag ( const FlyString & tag_name , const FlyString & namespace_ )
2020-05-29 21:44:36 +02:00
{
2020-10-12 01:51:28 +01:00
if ( namespace_ = = Namespace : : HTML ) {
return tag_name . is_one_of (
HTML : : TagNames : : address ,
HTML : : TagNames : : applet ,
HTML : : TagNames : : area ,
HTML : : TagNames : : article ,
HTML : : TagNames : : aside ,
HTML : : TagNames : : base ,
HTML : : TagNames : : basefont ,
HTML : : TagNames : : bgsound ,
HTML : : TagNames : : blockquote ,
HTML : : TagNames : : body ,
HTML : : TagNames : : br ,
HTML : : TagNames : : button ,
HTML : : TagNames : : caption ,
HTML : : TagNames : : center ,
HTML : : TagNames : : col ,
HTML : : TagNames : : colgroup ,
HTML : : TagNames : : dd ,
HTML : : TagNames : : details ,
HTML : : TagNames : : dir ,
HTML : : TagNames : : div ,
HTML : : TagNames : : dl ,
HTML : : TagNames : : dt ,
HTML : : TagNames : : embed ,
HTML : : TagNames : : fieldset ,
HTML : : TagNames : : figcaption ,
HTML : : TagNames : : figure ,
HTML : : TagNames : : footer ,
HTML : : TagNames : : form ,
HTML : : TagNames : : frame ,
HTML : : TagNames : : frameset ,
HTML : : TagNames : : h1 ,
HTML : : TagNames : : h2 ,
HTML : : TagNames : : h3 ,
HTML : : TagNames : : h4 ,
HTML : : TagNames : : h5 ,
HTML : : TagNames : : h6 ,
HTML : : TagNames : : head ,
HTML : : TagNames : : header ,
HTML : : TagNames : : hgroup ,
HTML : : TagNames : : hr ,
HTML : : TagNames : : html ,
HTML : : TagNames : : iframe ,
HTML : : TagNames : : img ,
HTML : : TagNames : : input ,
HTML : : TagNames : : keygen ,
HTML : : TagNames : : li ,
HTML : : TagNames : : link ,
HTML : : TagNames : : listing ,
HTML : : TagNames : : main ,
HTML : : TagNames : : marquee ,
HTML : : TagNames : : menu ,
HTML : : TagNames : : meta ,
HTML : : TagNames : : nav ,
HTML : : TagNames : : noembed ,
HTML : : TagNames : : noframes ,
HTML : : TagNames : : noscript ,
HTML : : TagNames : : object ,
HTML : : TagNames : : ol ,
HTML : : TagNames : : p ,
HTML : : TagNames : : param ,
HTML : : TagNames : : plaintext ,
HTML : : TagNames : : pre ,
HTML : : TagNames : : script ,
HTML : : TagNames : : section ,
HTML : : TagNames : : select ,
HTML : : TagNames : : source ,
HTML : : TagNames : : style ,
HTML : : TagNames : : summary ,
HTML : : TagNames : : table ,
HTML : : TagNames : : tbody ,
HTML : : TagNames : : td ,
HTML : : TagNames : : template_ ,
HTML : : TagNames : : textarea ,
HTML : : TagNames : : tfoot ,
HTML : : TagNames : : th ,
HTML : : TagNames : : thead ,
HTML : : TagNames : : title ,
HTML : : TagNames : : tr ,
HTML : : TagNames : : track ,
HTML : : TagNames : : ul ,
HTML : : TagNames : : wbr ,
HTML : : TagNames : : xmp ) ;
} else if ( namespace_ = = Namespace : : SVG ) {
return tag_name . is_one_of (
SVG : : TagNames : : desc ,
SVG : : TagNames : : foreignObject ,
SVG : : TagNames : : title ) ;
} else if ( namespace_ = = Namespace : : MathML ) {
TODO ( ) ;
}
return false ;
2020-05-29 21:44:36 +02:00
}
2020-05-24 00:49:22 +02:00
void HTMLDocumentParser : : handle_in_body ( HTMLToken & token )
{
2020-05-24 19:51:50 +02:00
if ( token . is_character ( ) ) {
2020-08-05 16:31:20 -04:00
if ( token . code_point ( ) = = 0 ) {
2020-06-03 22:37:45 -06:00
PARSE_ERROR ( ) ;
return ;
2020-05-24 19:51:50 +02:00
}
if ( token . is_parser_whitespace ( ) ) {
reconstruct_the_active_formatting_elements ( ) ;
2020-08-05 16:31:20 -04:00
insert_character ( token . code_point ( ) ) ;
2020-05-24 19:51:50 +02:00
return ;
}
2020-05-24 22:21:25 +02:00
reconstruct_the_active_formatting_elements ( ) ;
2020-08-05 16:31:20 -04:00
insert_character ( token . code_point ( ) ) ;
2020-05-24 22:21:25 +02:00
m_frameset_ok = false ;
return ;
2020-05-24 19:51:50 +02:00
}
2020-05-28 18:46:39 +02:00
if ( token . is_comment ( ) ) {
insert_comment ( token ) ;
return ;
}
2020-05-29 21:20:49 +02:00
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) {
2020-06-01 12:38:17 +02:00
PARSE_ERROR ( ) ;
2020-06-07 23:53:16 +02:00
if ( m_stack_of_open_elements . contains ( HTML : : TagNames : : template_ ) )
2020-06-01 12:38:17 +02:00
return ;
for ( auto & attribute : token . m_tag . attributes ) {
2020-06-21 06:58:03 +02:00
if ( current_node ( ) . has_attribute ( attribute . local_name_builder . string_view ( ) ) )
2020-06-01 12:38:17 +02:00
continue ;
2020-06-21 06:58:03 +02:00
current_node ( ) . set_attribute ( attribute . local_name_builder . to_string ( ) , attribute . value_builder . to_string ( ) ) ;
2020-06-01 12:38:17 +02:00
}
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : base , HTML : : TagNames : : basefont , HTML : : TagNames : : bgsound , HTML : : TagNames : : link , HTML : : TagNames : : meta , HTML : : TagNames : : noframes , HTML : : TagNames : : script , HTML : : TagNames : : style , HTML : : TagNames : : template_ , HTML : : TagNames : : title ) ) {
2020-05-29 21:20:49 +02:00
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : template_ ) {
2020-05-29 21:20:49 +02:00
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : body ) {
2020-06-01 12:38:17 +02:00
PARSE_ERROR ( ) ;
if ( m_stack_of_open_elements . elements ( ) . size ( ) = = 1
2020-07-23 18:18:13 +02:00
| | m_stack_of_open_elements . elements ( ) . at ( 1 ) . local_name ( ) ! = HTML : : TagNames : : body
2020-06-07 23:53:16 +02:00
| | m_stack_of_open_elements . contains ( HTML : : TagNames : : template_ ) ) {
2020-07-21 19:03:05 +01:00
ASSERT ( m_parsing_fragment ) ;
2020-06-01 12:38:17 +02:00
return ;
}
m_frameset_ok = false ;
2020-07-21 19:03:05 +01:00
auto & body_element = m_stack_of_open_elements . elements ( ) . at ( 1 ) ;
2020-06-01 12:38:17 +02:00
for ( auto & attribute : token . m_tag . attributes ) {
2020-07-21 19:03:05 +01:00
if ( body_element . has_attribute ( attribute . local_name_builder . string_view ( ) ) )
2020-06-01 12:38:17 +02:00
continue ;
2020-07-21 19:03:05 +01:00
body_element . set_attribute ( attribute . local_name_builder . to_string ( ) , attribute . value_builder . to_string ( ) ) ;
2020-06-01 12:38:17 +02:00
}
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : frameset ) {
2020-07-21 19:03:05 +01:00
PARSE_ERROR ( ) ;
if ( m_stack_of_open_elements . elements ( ) . size ( ) = = 1
2020-07-23 18:18:13 +02:00
| | m_stack_of_open_elements . elements ( ) . at ( 1 ) . local_name ( ) ! = HTML : : TagNames : : body ) {
2020-07-21 19:03:05 +01:00
ASSERT ( m_parsing_fragment ) ;
return ;
}
if ( ! m_frameset_ok )
return ;
2020-05-29 21:20:49 +02:00
TODO ( ) ;
}
if ( token . is_end_of_file ( ) ) {
2020-08-19 22:30:33 +01:00
if ( ! m_stack_of_template_insertion_modes . is_empty ( ) ) {
process_using_the_rules_for ( InsertionMode : : InTemplate , token ) ;
return ;
}
2020-05-30 12:40:12 +02:00
2020-08-19 22:30:33 +01:00
for ( auto & node : m_stack_of_open_elements . elements ( ) ) {
if ( ! node . local_name ( ) . is_one_of ( HTML : : TagNames : : dd , HTML : : TagNames : : dt , HTML : : TagNames : : li , HTML : : TagNames : : optgroup , HTML : : TagNames : : option , HTML : : TagNames : : p , HTML : : TagNames : : rb , HTML : : TagNames : : rp , HTML : : TagNames : : rt , HTML : : TagNames : : rtc , HTML : : TagNames : : tbody , HTML : : TagNames : : td , HTML : : TagNames : : tfoot , HTML : : TagNames : : th , HTML : : TagNames : : thead , HTML : : TagNames : : tr , HTML : : TagNames : : body , HTML : : TagNames : : html ) ) {
PARSE_ERROR ( ) ;
break ;
}
}
2020-05-30 12:40:12 +02:00
stop_parsing ( ) ;
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : body ) {
if ( ! m_stack_of_open_elements . has_in_scope ( HTML : : TagNames : : body ) ) {
2020-06-03 22:36:08 -06:00
PARSE_ERROR ( ) ;
return ;
2020-05-24 00:49:22 +02:00
}
2020-06-03 22:36:08 -06:00
for ( auto & node : m_stack_of_open_elements . elements ( ) ) {
2020-07-23 18:18:13 +02:00
if ( ! node . local_name ( ) . is_one_of ( HTML : : TagNames : : dd , HTML : : TagNames : : dt , HTML : : TagNames : : li , HTML : : TagNames : : optgroup , HTML : : TagNames : : option , HTML : : TagNames : : p , HTML : : TagNames : : rb , HTML : : TagNames : : rp , HTML : : TagNames : : rt , HTML : : TagNames : : rtc , HTML : : TagNames : : tbody , HTML : : TagNames : : td , HTML : : TagNames : : tfoot , HTML : : TagNames : : th , HTML : : TagNames : : thead , HTML : : TagNames : : tr , HTML : : TagNames : : body , HTML : : TagNames : : html ) ) {
2020-06-03 22:36:08 -06:00
PARSE_ERROR ( ) ;
break ;
}
}
2020-05-24 00:49:22 +02:00
m_insertion_mode = InsertionMode : : AfterBody ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) {
if ( ! m_stack_of_open_elements . has_in_scope ( HTML : : TagNames : : body ) ) {
2020-06-03 23:28:57 -06:00
PARSE_ERROR ( ) ;
return ;
}
for ( auto & node : m_stack_of_open_elements . elements ( ) ) {
2020-07-23 18:18:13 +02:00
if ( ! node . local_name ( ) . is_one_of ( HTML : : TagNames : : dd , HTML : : TagNames : : dt , HTML : : TagNames : : li , HTML : : TagNames : : optgroup , HTML : : TagNames : : option , HTML : : TagNames : : p , HTML : : TagNames : : rb , HTML : : TagNames : : rp , HTML : : TagNames : : rt , HTML : : TagNames : : rtc , HTML : : TagNames : : tbody , HTML : : TagNames : : td , HTML : : TagNames : : tfoot , HTML : : TagNames : : th , HTML : : TagNames : : thead , HTML : : TagNames : : tr , HTML : : TagNames : : body , HTML : : TagNames : : html ) ) {
2020-06-03 23:28:57 -06:00
PARSE_ERROR ( ) ;
break ;
}
}
m_insertion_mode = InsertionMode : : AfterBody ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : address , HTML : : TagNames : : article , HTML : : TagNames : : aside , HTML : : TagNames : : blockquote , HTML : : TagNames : : center , HTML : : TagNames : : details , HTML : : TagNames : : dialog , HTML : : TagNames : : dir , HTML : : TagNames : : div , HTML : : TagNames : : dl , HTML : : TagNames : : fieldset , HTML : : TagNames : : figcaption , HTML : : TagNames : : figure , HTML : : TagNames : : footer , HTML : : TagNames : : header , HTML : : TagNames : : hgroup , HTML : : TagNames : : main , HTML : : TagNames : : menu , HTML : : TagNames : : nav , HTML : : TagNames : : ol , HTML : : TagNames : : p , HTML : : TagNames : : section , HTML : : TagNames : : summary , HTML : : TagNames : : ul ) ) {
if ( m_stack_of_open_elements . has_in_button_scope ( HTML : : TagNames : : p ) )
2020-05-29 21:20:49 +02:00
close_a_p_element ( ) ;
insert_html_element ( token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : h1 , HTML : : TagNames : : h2 , HTML : : TagNames : : h3 , HTML : : TagNames : : h4 , HTML : : TagNames : : h5 , HTML : : TagNames : : h6 ) ) {
if ( m_stack_of_open_elements . has_in_button_scope ( HTML : : TagNames : : p ) )
2020-05-25 12:57:20 +02:00
close_a_p_element ( ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) . is_one_of ( HTML : : TagNames : : h1 , HTML : : TagNames : : h2 , HTML : : TagNames : : h3 , HTML : : TagNames : : h4 , HTML : : TagNames : : h5 , HTML : : TagNames : : h6 ) ) {
2020-05-27 18:18:39 +02:00
PARSE_ERROR ( ) ;
m_stack_of_open_elements . pop ( ) ;
2020-05-24 22:21:25 +02:00
}
2020-05-25 12:57:20 +02:00
insert_html_element ( token ) ;
return ;
2020-05-24 22:21:25 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : pre , HTML : : TagNames : : listing ) ) {
if ( m_stack_of_open_elements . has_in_button_scope ( HTML : : TagNames : : p ) )
2020-05-30 13:07:47 +02:00
close_a_p_element ( ) ;
insert_html_element ( token ) ;
m_frameset_ok = false ;
// If the next token is a U+000A LINE FEED (LF) character token,
// then ignore that token and move on to the next one.
// (Newlines at the start of pre blocks are ignored as an authoring convenience.)
auto next_token = m_tokenizer . next_token ( ) ;
2020-08-05 16:31:20 -04:00
if ( next_token . has_value ( ) & & next_token . value ( ) . is_character ( ) & & next_token . value ( ) . code_point ( ) = = ' \n ' ) {
2020-05-30 13:07:47 +02:00
// Ignore it.
} else {
process_using_the_rules_for ( m_insertion_mode , next_token . value ( ) ) ;
}
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : form ) {
2020-06-13 07:36:09 +01:00
if ( m_form_element & & ! m_stack_of_open_elements . contains ( HTML : : TagNames : : template_ ) ) {
2020-05-30 11:13:57 +02:00
PARSE_ERROR ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( m_stack_of_open_elements . has_in_button_scope ( HTML : : TagNames : : p ) )
2020-05-30 11:13:57 +02:00
close_a_p_element ( ) ;
auto element = insert_html_element ( token ) ;
2020-06-07 23:53:16 +02:00
if ( ! m_stack_of_open_elements . contains ( HTML : : TagNames : : template_ ) )
2020-07-26 17:16:18 +02:00
m_form_element = downcast < HTMLFormElement > ( * element ) ;
2020-05-30 11:13:57 +02:00
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : li ) {
2020-05-29 21:44:36 +02:00
m_frameset_ok = false ;
for ( ssize_t i = m_stack_of_open_elements . elements ( ) . size ( ) - 1 ; i > = 0 ; - - i ) {
2020-07-26 19:37:56 +02:00
RefPtr < DOM : : Element > node = m_stack_of_open_elements . elements ( ) [ i ] ;
2020-05-29 21:44:36 +02:00
2020-07-23 18:18:13 +02:00
if ( node - > local_name ( ) = = HTML : : TagNames : : li ) {
2020-06-07 23:53:16 +02:00
generate_implied_end_tags ( HTML : : TagNames : : li ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = HTML : : TagNames : : li ) {
2020-05-29 21:44:36 +02:00
PARSE_ERROR ( ) ;
}
2020-06-07 23:53:16 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : li ) ;
2020-05-29 21:44:36 +02:00
break ;
}
2020-10-12 01:51:28 +01:00
if ( is_special_tag ( node - > local_name ( ) , node - > namespace_ ( ) ) & & ! node - > local_name ( ) . is_one_of ( HTML : : TagNames : : address , HTML : : TagNames : : div , HTML : : TagNames : : p ) )
2020-05-29 21:44:36 +02:00
break ;
}
2020-06-07 23:53:16 +02:00
if ( m_stack_of_open_elements . has_in_button_scope ( HTML : : TagNames : : p ) )
2020-05-29 21:44:36 +02:00
close_a_p_element ( ) ;
insert_html_element ( token ) ;
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : dd , HTML : : TagNames : : dt ) ) {
2020-05-30 13:07:47 +02:00
m_frameset_ok = false ;
for ( ssize_t i = m_stack_of_open_elements . elements ( ) . size ( ) - 1 ; i > = 0 ; - - i ) {
2020-07-26 19:37:56 +02:00
RefPtr < DOM : : Element > node = m_stack_of_open_elements . elements ( ) [ i ] ;
2020-07-23 18:18:13 +02:00
if ( node - > local_name ( ) = = HTML : : TagNames : : dd ) {
2020-06-07 23:53:16 +02:00
generate_implied_end_tags ( HTML : : TagNames : : dd ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = HTML : : TagNames : : dd ) {
2020-05-30 13:07:47 +02:00
PARSE_ERROR ( ) ;
}
2020-06-07 23:53:16 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : dd ) ;
2020-05-30 13:07:47 +02:00
break ;
}
2020-07-23 18:18:13 +02:00
if ( node - > local_name ( ) = = HTML : : TagNames : : dt ) {
2020-06-07 23:53:16 +02:00
generate_implied_end_tags ( HTML : : TagNames : : dt ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = HTML : : TagNames : : dt ) {
2020-05-30 13:07:47 +02:00
PARSE_ERROR ( ) ;
}
2020-06-07 23:53:16 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : dt ) ;
2020-05-30 13:07:47 +02:00
break ;
}
2020-10-12 01:51:28 +01:00
if ( is_special_tag ( node - > local_name ( ) , node - > namespace_ ( ) ) & & ! node - > local_name ( ) . is_one_of ( HTML : : TagNames : : address , HTML : : TagNames : : div , HTML : : TagNames : : p ) )
2020-05-30 13:07:47 +02:00
break ;
}
2020-06-07 23:53:16 +02:00
if ( m_stack_of_open_elements . has_in_button_scope ( HTML : : TagNames : : p ) )
2020-05-30 13:07:47 +02:00
close_a_p_element ( ) ;
insert_html_element ( token ) ;
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : plaintext ) {
if ( m_stack_of_open_elements . has_in_button_scope ( HTML : : TagNames : : p ) )
2020-05-29 21:20:49 +02:00
close_a_p_element ( ) ;
insert_html_element ( token ) ;
m_tokenizer . switch_to ( { } , HTMLTokenizer : : State : : PLAINTEXT ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : button ) {
if ( m_stack_of_open_elements . has_in_button_scope ( HTML : : TagNames : : button ) ) {
2020-05-29 21:20:49 +02:00
PARSE_ERROR ( ) ;
generate_implied_end_tags ( ) ;
2020-06-07 23:53:16 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : button ) ;
2020-05-29 21:20:49 +02:00
}
reconstruct_the_active_formatting_elements ( ) ;
insert_html_element ( token ) ;
m_frameset_ok = false ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : address , HTML : : TagNames : : article , HTML : : TagNames : : aside , HTML : : TagNames : : blockquote , HTML : : TagNames : : button , HTML : : TagNames : : center , HTML : : TagNames : : details , HTML : : TagNames : : dialog , HTML : : TagNames : : dir , HTML : : TagNames : : div , HTML : : TagNames : : dl , HTML : : TagNames : : fieldset , HTML : : TagNames : : figcaption , HTML : : TagNames : : figure , HTML : : TagNames : : footer , HTML : : TagNames : : header , HTML : : TagNames : : hgroup , HTML : : TagNames : : listing , HTML : : TagNames : : main , HTML : : TagNames : : menu , HTML : : TagNames : : nav , HTML : : TagNames : : ol , HTML : : TagNames : : pre , HTML : : TagNames : : section , HTML : : TagNames : : summary , HTML : : TagNames : : ul ) ) {
2020-05-29 21:20:49 +02:00
if ( ! m_stack_of_open_elements . has_in_scope ( token . tag_name ( ) ) ) {
PARSE_ERROR ( ) ;
return ;
}
generate_implied_end_tags ( ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = token . tag_name ( ) ) {
2020-05-29 21:20:49 +02:00
PARSE_ERROR ( ) ;
}
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( token . tag_name ( ) ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : form ) {
if ( ! m_stack_of_open_elements . contains ( HTML : : TagNames : : template_ ) ) {
2020-05-30 11:13:57 +02:00
auto node = m_form_element ;
m_form_element = nullptr ;
2020-06-15 20:29:04 +02:00
if ( ! node | | ! m_stack_of_open_elements . has_in_scope ( * node ) ) {
2020-05-30 11:13:57 +02:00
PARSE_ERROR ( ) ;
return ;
}
generate_implied_end_tags ( ) ;
if ( & current_node ( ) ! = node ) {
PARSE_ERROR ( ) ;
}
m_stack_of_open_elements . elements ( ) . remove_first_matching ( [ & ] ( auto & entry ) { return entry . ptr ( ) = = node . ptr ( ) ; } ) ;
} else {
2020-06-07 23:53:16 +02:00
if ( ! m_stack_of_open_elements . has_in_scope ( HTML : : TagNames : : form ) ) {
2020-06-03 23:03:19 -06:00
PARSE_ERROR ( ) ;
return ;
}
generate_implied_end_tags ( ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = HTML : : TagNames : : form ) {
2020-06-03 23:03:19 -06:00
PARSE_ERROR ( ) ;
}
2020-06-07 23:53:16 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : form ) ;
2020-05-30 11:13:57 +02:00
}
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : p ) {
if ( ! m_stack_of_open_elements . has_in_button_scope ( HTML : : TagNames : : p ) ) {
2020-05-29 21:20:49 +02:00
PARSE_ERROR ( ) ;
2020-07-23 17:30:03 +02:00
insert_html_element ( HTMLToken : : make_start_tag ( HTML : : TagNames : : p ) ) ;
2020-05-29 21:20:49 +02:00
}
close_a_p_element ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : li ) {
if ( ! m_stack_of_open_elements . has_in_list_item_scope ( HTML : : TagNames : : li ) ) {
2020-05-29 22:06:05 +02:00
PARSE_ERROR ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
generate_implied_end_tags ( HTML : : TagNames : : li ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = HTML : : TagNames : : li ) {
2020-05-29 22:06:05 +02:00
PARSE_ERROR ( ) ;
2020-07-23 18:18:13 +02:00
dbg ( ) < < " Expected <li> current node, but had < " < < current_node ( ) . local_name ( ) < < " > " ;
2020-05-29 22:06:05 +02:00
}
2020-06-07 23:53:16 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : li ) ;
2020-05-29 22:06:05 +02:00
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : dd , HTML : : TagNames : : dt ) ) {
2020-05-30 22:59:15 +02:00
if ( ! m_stack_of_open_elements . has_in_scope ( token . tag_name ( ) ) ) {
PARSE_ERROR ( ) ;
return ;
}
generate_implied_end_tags ( token . tag_name ( ) ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = token . tag_name ( ) ) {
2020-05-30 22:59:15 +02:00
PARSE_ERROR ( ) ;
}
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( token . tag_name ( ) ) ;
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : h1 , HTML : : TagNames : : h2 , HTML : : TagNames : : h3 , HTML : : TagNames : : h4 , HTML : : TagNames : : h5 , HTML : : TagNames : : h6 ) ) {
if ( ! m_stack_of_open_elements . has_in_scope ( HTML : : TagNames : : h1 )
& & ! m_stack_of_open_elements . has_in_scope ( HTML : : TagNames : : h2 )
& & ! m_stack_of_open_elements . has_in_scope ( HTML : : TagNames : : h3 )
& & ! m_stack_of_open_elements . has_in_scope ( HTML : : TagNames : : h4 )
& & ! m_stack_of_open_elements . has_in_scope ( HTML : : TagNames : : h5 )
& & ! m_stack_of_open_elements . has_in_scope ( HTML : : TagNames : : h6 ) ) {
2020-05-27 18:18:39 +02:00
PARSE_ERROR ( ) ;
return ;
2020-05-25 12:57:20 +02:00
}
2020-05-24 22:21:25 +02:00
2020-05-25 12:57:20 +02:00
generate_implied_end_tags ( ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = token . tag_name ( ) ) {
2020-05-27 18:18:39 +02:00
PARSE_ERROR ( ) ;
2020-05-25 12:57:20 +02:00
}
2020-05-24 22:21:25 +02:00
2020-05-25 12:57:20 +02:00
for ( ; ; ) {
auto popped_element = m_stack_of_open_elements . pop ( ) ;
2020-07-23 18:18:13 +02:00
if ( popped_element - > local_name ( ) . is_one_of ( HTML : : TagNames : : h1 , HTML : : TagNames : : h2 , HTML : : TagNames : : h3 , HTML : : TagNames : : h4 , HTML : : TagNames : : h5 , HTML : : TagNames : : h6 ) )
2020-05-25 12:57:20 +02:00
break ;
2020-05-24 22:21:25 +02:00
}
2020-05-25 12:57:20 +02:00
return ;
2020-05-24 22:21:25 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : a ) {
if ( auto * element = m_list_of_active_formatting_elements . last_element_with_tag_name_before_marker ( HTML : : TagNames : : a ) ) {
2020-05-30 11:03:05 +02:00
PARSE_ERROR ( ) ;
2020-05-30 16:22:25 +02:00
if ( run_the_adoption_agency_algorithm ( token ) = = AdoptionAgencyAlgorithmOutcome : : RunAnyOtherEndTagSteps )
goto AnyOtherEndTag ;
2020-05-30 11:03:05 +02:00
m_list_of_active_formatting_elements . remove ( * element ) ;
m_stack_of_open_elements . elements ( ) . remove_first_matching ( [ & ] ( auto & entry ) {
return entry . ptr ( ) = = element ;
} ) ;
2020-05-29 22:06:05 +02:00
}
reconstruct_the_active_formatting_elements ( ) ;
auto element = insert_html_element ( token ) ;
m_list_of_active_formatting_elements . add ( * element ) ;
return ;
2020-05-24 22:21:25 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : b , HTML : : TagNames : : big , HTML : : TagNames : : code , HTML : : TagNames : : em , HTML : : TagNames : : font , HTML : : TagNames : : i , HTML : : TagNames : : s , HTML : : TagNames : : small , HTML : : TagNames : : strike , HTML : : TagNames : : strong , HTML : : TagNames : : tt , HTML : : TagNames : : u ) ) {
2020-05-27 23:22:42 +02:00
reconstruct_the_active_formatting_elements ( ) ;
auto element = insert_html_element ( token ) ;
m_list_of_active_formatting_elements . add ( * element ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : nobr ) {
2020-06-03 23:03:44 -06:00
reconstruct_the_active_formatting_elements ( ) ;
2020-06-07 23:53:16 +02:00
if ( m_stack_of_open_elements . has_in_scope ( HTML : : TagNames : : nobr ) ) {
2020-06-03 23:03:44 -06:00
PARSE_ERROR ( ) ;
run_the_adoption_agency_algorithm ( token ) ;
reconstruct_the_active_formatting_elements ( ) ;
}
auto element = insert_html_element ( token ) ;
m_list_of_active_formatting_elements . add ( * element ) ;
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : a , HTML : : TagNames : : b , HTML : : TagNames : : big , HTML : : TagNames : : code , HTML : : TagNames : : em , HTML : : TagNames : : font , HTML : : TagNames : : i , HTML : : TagNames : : nobr , HTML : : TagNames : : s , HTML : : TagNames : : small , HTML : : TagNames : : strike , HTML : : TagNames : : strong , HTML : : TagNames : : tt , HTML : : TagNames : : u ) ) {
2020-05-30 16:22:25 +02:00
if ( run_the_adoption_agency_algorithm ( token ) = = AdoptionAgencyAlgorithmOutcome : : RunAnyOtherEndTagSteps )
goto AnyOtherEndTag ;
2020-05-27 23:22:42 +02:00
return ;
2020-05-24 22:21:25 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : applet , HTML : : TagNames : : marquee , HTML : : TagNames : : object ) ) {
2020-05-29 21:20:49 +02:00
reconstruct_the_active_formatting_elements ( ) ;
2020-05-25 12:57:20 +02:00
insert_html_element ( token ) ;
2020-05-29 21:20:49 +02:00
m_list_of_active_formatting_elements . add_marker ( ) ;
m_frameset_ok = false ;
2020-05-25 12:57:20 +02:00
return ;
}
2020-05-24 00:49:22 +02:00
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : applet , HTML : : TagNames : : marquee , HTML : : TagNames : : object ) ) {
2020-06-03 22:33:10 -06:00
if ( ! m_stack_of_open_elements . has_in_scope ( token . tag_name ( ) ) ) {
PARSE_ERROR ( ) ;
return ;
}
generate_implied_end_tags ( ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = token . tag_name ( ) ) {
2020-06-03 22:33:10 -06:00
PARSE_ERROR ( ) ;
}
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( token . tag_name ( ) ) ;
m_list_of_active_formatting_elements . clear_up_to_the_last_marker ( ) ;
return ;
2020-05-24 00:49:22 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : table ) {
2020-05-29 21:20:49 +02:00
if ( ! document ( ) . in_quirks_mode ( ) ) {
2020-06-07 23:53:16 +02:00
if ( m_stack_of_open_elements . has_in_button_scope ( HTML : : TagNames : : p ) )
2020-05-29 21:20:49 +02:00
close_a_p_element ( ) ;
}
2020-05-25 20:30:34 +02:00
insert_html_element ( token ) ;
m_frameset_ok = false ;
m_insertion_mode = InsertionMode : : InTable ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : br ) {
2020-06-03 22:34:50 -06:00
token . drop_attributes ( ) ;
goto BRStartTag ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : area , HTML : : TagNames : : br , HTML : : TagNames : : embed , HTML : : TagNames : : img , HTML : : TagNames : : keygen , HTML : : TagNames : : wbr ) ) {
2020-06-03 22:34:50 -06:00
BRStartTag :
2020-05-28 00:25:30 +02:00
reconstruct_the_active_formatting_elements ( ) ;
insert_html_element ( token ) ;
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
m_frameset_ok = false ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : input ) {
2020-05-28 12:18:46 +02:00
reconstruct_the_active_formatting_elements ( ) ;
insert_html_element ( token ) ;
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
auto type_attribute = token . attribute ( HTML : : AttributeNames : : type ) ;
2020-06-13 07:36:09 +01:00
if ( type_attribute . is_null ( ) | | ! type_attribute . equals_ignoring_case ( " hidden " ) ) {
2020-05-28 12:18:46 +02:00
m_frameset_ok = false ;
}
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : param , HTML : : TagNames : : source , HTML : : TagNames : : track ) ) {
2020-06-05 21:59:46 +02:00
insert_html_element ( token ) ;
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : hr ) {
if ( m_stack_of_open_elements . has_in_button_scope ( HTML : : TagNames : : p ) )
2020-05-29 21:20:49 +02:00
close_a_p_element ( ) ;
insert_html_element ( token ) ;
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
m_frameset_ok = false ;
return ;
}
2020-06-21 06:58:03 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : image ) {
2020-06-07 23:53:16 +02:00
// Parse error. Change the token's tag name to HTML::TagNames::img and reprocess it. (Don't ask.)
2020-05-29 21:20:49 +02:00
PARSE_ERROR ( ) ;
token . m_tag . tag_name . clear ( ) ;
2020-06-07 23:53:16 +02:00
token . m_tag . tag_name . append ( HTML : : TagNames : : img ) ;
2020-05-29 21:20:49 +02:00
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : textarea ) {
2020-05-30 18:40:23 +02:00
insert_html_element ( token ) ;
2020-06-05 12:05:42 +02:00
m_tokenizer . switch_to ( { } , HTMLTokenizer : : State : : RCDATA ) ;
2020-05-30 18:40:23 +02:00
// If the next token is a U+000A LINE FEED (LF) character token,
// then ignore that token and move on to the next one.
// (Newlines at the start of pre blocks are ignored as an authoring convenience.)
auto next_token = m_tokenizer . next_token ( ) ;
m_original_insertion_mode = m_insertion_mode ;
m_frameset_ok = false ;
m_insertion_mode = InsertionMode : : Text ;
2020-08-05 16:31:20 -04:00
if ( next_token . has_value ( ) & & next_token . value ( ) . is_character ( ) & & next_token . value ( ) . code_point ( ) = = ' \n ' ) {
2020-05-30 18:40:23 +02:00
// Ignore it.
} else {
process_using_the_rules_for ( m_insertion_mode , next_token . value ( ) ) ;
}
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : xmp ) {
if ( m_stack_of_open_elements . has_in_button_scope ( HTML : : TagNames : : p ) ) {
2020-06-03 23:04:03 -06:00
close_a_p_element ( ) ;
}
reconstruct_the_active_formatting_elements ( ) ;
m_frameset_ok = false ;
parse_generic_raw_text_element ( token ) ;
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : iframe ) {
2020-05-29 21:20:49 +02:00
m_frameset_ok = false ;
parse_generic_raw_text_element ( token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & ( ( token . tag_name ( ) = = HTML : : TagNames : : noembed ) | | ( token . tag_name ( ) = = HTML : : TagNames : : noscript & & m_scripting_enabled ) ) ) {
2020-05-29 21:20:49 +02:00
parse_generic_raw_text_element ( token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : select ) {
2020-05-30 19:58:52 +02:00
reconstruct_the_active_formatting_elements ( ) ;
insert_html_element ( token ) ;
m_frameset_ok = false ;
switch ( m_insertion_mode ) {
case InsertionMode : : InTable :
case InsertionMode : : InCaption :
case InsertionMode : : InTableBody :
case InsertionMode : : InRow :
case InsertionMode : : InCell :
m_insertion_mode = InsertionMode : : InSelectInTable ;
break ;
default :
m_insertion_mode = InsertionMode : : InSelect ;
break ;
}
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : optgroup , HTML : : TagNames : : option ) ) {
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) = = HTML : : TagNames : : option )
2020-05-29 21:20:49 +02:00
m_stack_of_open_elements . pop ( ) ;
reconstruct_the_active_formatting_elements ( ) ;
insert_html_element ( token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : rb , HTML : : TagNames : : rtc ) ) {
2020-06-21 06:58:03 +02:00
if ( m_stack_of_open_elements . has_in_scope ( HTML : : TagNames : : ruby ) )
generate_implied_end_tags ( ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = HTML : : TagNames : : ruby )
2020-06-21 06:58:03 +02:00
PARSE_ERROR ( ) ;
insert_html_element ( token ) ;
2020-06-21 17:00:55 +02:00
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : rp , HTML : : TagNames : : rt ) ) {
2020-06-21 06:58:03 +02:00
if ( m_stack_of_open_elements . has_in_scope ( HTML : : TagNames : : ruby ) )
generate_implied_end_tags ( HTML : : TagNames : : rtc ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = HTML : : TagNames : : rtc | | current_node ( ) . local_name ( ) ! = HTML : : TagNames : : ruby )
2020-06-21 06:58:03 +02:00
PARSE_ERROR ( ) ;
insert_html_element ( token ) ;
2020-06-21 17:00:55 +02:00
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-21 06:58:03 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : math ) {
2020-05-30 18:46:39 +02:00
dbg ( ) < < " <math> element encountered. " ;
reconstruct_the_active_formatting_elements ( ) ;
2020-06-21 06:58:03 +02:00
adjust_mathml_attributes ( token ) ;
adjust_foreign_attributes ( token ) ;
2020-10-10 02:48:05 +01:00
insert_foreign_element ( token , Namespace : : MathML ) ;
2020-06-21 06:58:03 +02:00
if ( token . is_self_closing ( ) ) {
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
}
2020-05-30 18:46:39 +02:00
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-21 06:58:03 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : svg ) {
2020-05-30 18:46:39 +02:00
dbg ( ) < < " <svg> element encountered. " ;
reconstruct_the_active_formatting_elements ( ) ;
2020-06-21 06:58:03 +02:00
adjust_svg_attributes ( token ) ;
adjust_foreign_attributes ( token ) ;
2020-10-10 02:48:05 +01:00
insert_foreign_element ( token , Namespace : : SVG ) ;
2020-06-21 06:58:03 +02:00
if ( token . is_self_closing ( ) ) {
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
}
2020-05-30 18:46:39 +02:00
return ;
2020-05-29 21:20:49 +02:00
}
2020-06-07 23:53:16 +02:00
if ( ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : caption , HTML : : TagNames : : col , HTML : : TagNames : : colgroup , HTML : : TagNames : : frame , HTML : : TagNames : : head , HTML : : TagNames : : tbody , HTML : : TagNames : : td , HTML : : TagNames : : tfoot , HTML : : TagNames : : th , HTML : : TagNames : : thead , HTML : : TagNames : : tr ) ) ) {
2020-05-29 21:20:49 +02:00
PARSE_ERROR ( ) ;
return ;
}
// Any other start tag
2020-05-24 22:21:25 +02:00
if ( token . is_start_tag ( ) ) {
reconstruct_the_active_formatting_elements ( ) ;
insert_html_element ( token ) ;
return ;
}
2020-05-24 22:39:59 +02:00
if ( token . is_end_tag ( ) ) {
2020-05-30 16:22:25 +02:00
AnyOtherEndTag :
2020-07-26 19:37:56 +02:00
RefPtr < DOM : : Element > node ;
2020-05-24 22:39:59 +02:00
for ( ssize_t i = m_stack_of_open_elements . elements ( ) . size ( ) - 1 ; i > = 0 ; - - i ) {
node = m_stack_of_open_elements . elements ( ) [ i ] ;
2020-07-23 18:18:13 +02:00
if ( node - > local_name ( ) = = token . tag_name ( ) ) {
2020-05-24 22:39:59 +02:00
generate_implied_end_tags ( token . tag_name ( ) ) ;
if ( node ! = current_node ( ) ) {
2020-05-27 01:49:40 +02:00
PARSE_ERROR ( ) ;
2020-05-24 22:39:59 +02:00
}
while ( & current_node ( ) ! = node ) {
m_stack_of_open_elements . pop ( ) ;
}
m_stack_of_open_elements . pop ( ) ;
break ;
}
2020-10-12 01:51:28 +01:00
if ( is_special_tag ( node - > local_name ( ) , node - > namespace_ ( ) ) ) {
2020-05-30 18:19:15 +02:00
PARSE_ERROR ( ) ;
return ;
}
2020-05-24 22:39:59 +02:00
}
return ;
}
2020-06-21 06:58:03 +02:00
}
void HTMLDocumentParser : : adjust_mathml_attributes ( HTMLToken & token )
{
token . adjust_attribute_name ( " definitionurl " , " definitionURL " ) ;
}
2020-10-12 01:51:28 +01:00
void HTMLDocumentParser : : adjust_svg_tag_names ( HTMLToken & token )
{
token . adjust_tag_name ( " altglyph " , " altGlyph " ) ;
token . adjust_tag_name ( " altglyphdef " , " altGlyphDef " ) ;
token . adjust_tag_name ( " altglyphitem " , " altGlyphItem " ) ;
token . adjust_tag_name ( " animatecolor " , " animateColor " ) ;
token . adjust_tag_name ( " animatemotion " , " animateMotion " ) ;
token . adjust_tag_name ( " animatetransform " , " animateTransform " ) ;
token . adjust_tag_name ( " clippath " , " clipPath " ) ;
token . adjust_tag_name ( " feblend " , " feBlend " ) ;
token . adjust_tag_name ( " fecolormatrix " , " feColorMatrix " ) ;
token . adjust_tag_name ( " fecomponenttransfer " , " feComponentTransfer " ) ;
token . adjust_tag_name ( " fecomposite " , " feComposite " ) ;
token . adjust_tag_name ( " feconvolvematrix " , " feConvolveMatrix " ) ;
token . adjust_tag_name ( " fediffuselighting " , " feDiffuseLighting " ) ;
token . adjust_tag_name ( " fedisplacementmap " , " feDisplacementMap " ) ;
token . adjust_tag_name ( " fedistantlight " , " feDistantLight " ) ;
token . adjust_tag_name ( " fedropshadow " , " feDropShadow " ) ;
token . adjust_tag_name ( " feflood " , " feFlood " ) ;
token . adjust_tag_name ( " fefunca " , " feFuncA " ) ;
token . adjust_tag_name ( " fefuncb " , " feFuncB " ) ;
token . adjust_tag_name ( " fefuncg " , " feFuncG " ) ;
token . adjust_tag_name ( " fefuncr " , " feFuncR " ) ;
token . adjust_tag_name ( " fegaussianblur " , " feGaussianBlur " ) ;
token . adjust_tag_name ( " feimage " , " feImage " ) ;
token . adjust_tag_name ( " femerge " , " feMerge " ) ;
token . adjust_tag_name ( " femergenode " , " feMergeNode " ) ;
token . adjust_tag_name ( " femorphology " , " feMorphology " ) ;
token . adjust_tag_name ( " feoffset " , " feOffset " ) ;
token . adjust_tag_name ( " fepointlight " , " fePointLight " ) ;
token . adjust_tag_name ( " fespecularlighting " , " feSpecularLighting " ) ;
token . adjust_tag_name ( " fespotlight " , " feSpotlight " ) ;
token . adjust_tag_name ( " glyphref " , " glyphRef " ) ;
token . adjust_tag_name ( " lineargradient " , " linearGradient " ) ;
token . adjust_tag_name ( " radialgradient " , " radialGradient " ) ;
token . adjust_tag_name ( " textpath " , " textPath " ) ;
}
2020-06-21 06:58:03 +02:00
void HTMLDocumentParser : : adjust_svg_attributes ( HTMLToken & token )
{
token . adjust_attribute_name ( " attributename " , " attributeName " ) ;
token . adjust_attribute_name ( " attributetype " , " attributeType " ) ;
token . adjust_attribute_name ( " basefrequency " , " baseFrequency " ) ;
token . adjust_attribute_name ( " baseprofile " , " baseProfile " ) ;
token . adjust_attribute_name ( " calcmode " , " calcMode " ) ;
token . adjust_attribute_name ( " clippathunits " , " clipPathUnits " ) ;
token . adjust_attribute_name ( " diffuseconstant " , " diffuseConstant " ) ;
token . adjust_attribute_name ( " edgemode " , " edgeMode " ) ;
token . adjust_attribute_name ( " filterunits " , " filterUnits " ) ;
token . adjust_attribute_name ( " glyphref " , " glyphRef " ) ;
token . adjust_attribute_name ( " gradienttransform " , " gradientTransform " ) ;
token . adjust_attribute_name ( " gradientunits " , " gradientUnits " ) ;
token . adjust_attribute_name ( " kernelmatrix " , " kernelMatrix " ) ;
token . adjust_attribute_name ( " kernelunitlength " , " kernelUnitLength " ) ;
token . adjust_attribute_name ( " keypoints " , " keyPoints " ) ;
token . adjust_attribute_name ( " keysplines " , " keySplines " ) ;
token . adjust_attribute_name ( " keytimes " , " keyTimes " ) ;
token . adjust_attribute_name ( " lengthadjust " , " lengthAdjust " ) ;
token . adjust_attribute_name ( " limitingconeangle " , " limitingConeAngle " ) ;
token . adjust_attribute_name ( " markerheight " , " markerHeight " ) ;
token . adjust_attribute_name ( " markerunits " , " markerUnits " ) ;
token . adjust_attribute_name ( " markerwidth " , " markerWidth " ) ;
token . adjust_attribute_name ( " maskcontentunits " , " maskContentUnits " ) ;
token . adjust_attribute_name ( " maskunits " , " maskUnits " ) ;
token . adjust_attribute_name ( " numoctaves " , " numOctaves " ) ;
token . adjust_attribute_name ( " pathlength " , " pathLength " ) ;
token . adjust_attribute_name ( " patterncontentunits " , " patternContentUnits " ) ;
token . adjust_attribute_name ( " patterntransform " , " patternTransform " ) ;
token . adjust_attribute_name ( " patternunits " , " patternUnits " ) ;
token . adjust_attribute_name ( " pointsatx " , " pointsAtX " ) ;
token . adjust_attribute_name ( " pointsaty " , " pointsAtY " ) ;
token . adjust_attribute_name ( " pointsatz " , " pointsAtZ " ) ;
token . adjust_attribute_name ( " preservealpha " , " preserveAlpha " ) ;
token . adjust_attribute_name ( " preserveaspectratio " , " preserveAspectRatio " ) ;
token . adjust_attribute_name ( " primitiveunits " , " primitiveUnits " ) ;
token . adjust_attribute_name ( " refx " , " refX " ) ;
token . adjust_attribute_name ( " refy " , " refY " ) ;
token . adjust_attribute_name ( " repeatcount " , " repeatCount " ) ;
token . adjust_attribute_name ( " repeatdur " , " repeatDur " ) ;
token . adjust_attribute_name ( " requiredextensions " , " requiredExtensions " ) ;
token . adjust_attribute_name ( " requiredfeatures " , " requiredFeatures " ) ;
token . adjust_attribute_name ( " specularconstant " , " specularConstant " ) ;
token . adjust_attribute_name ( " specularexponent " , " specularExponent " ) ;
token . adjust_attribute_name ( " spreadmethod " , " spreadMethod " ) ;
token . adjust_attribute_name ( " startoffset " , " startOffset " ) ;
token . adjust_attribute_name ( " stddeviation " , " stdDeviation " ) ;
token . adjust_attribute_name ( " stitchtiles " , " stitchTiles " ) ;
token . adjust_attribute_name ( " surfacescale " , " surfaceScale " ) ;
token . adjust_attribute_name ( " systemlanguage " , " systemLanguage " ) ;
token . adjust_attribute_name ( " tablevalues " , " tableValues " ) ;
token . adjust_attribute_name ( " targetx " , " targetX " ) ;
token . adjust_attribute_name ( " targety " , " targetY " ) ;
token . adjust_attribute_name ( " textlength " , " textLength " ) ;
token . adjust_attribute_name ( " viewbox " , " viewBox " ) ;
token . adjust_attribute_name ( " viewtarget " , " viewTarget " ) ;
token . adjust_attribute_name ( " xchannelselector " , " xChannelSelector " ) ;
token . adjust_attribute_name ( " ychannelselector " , " yChannelSelector " ) ;
token . adjust_attribute_name ( " zoomandpan " , " zoomAndPan " ) ;
}
2020-10-12 01:51:28 +01:00
2020-06-21 06:58:03 +02:00
void HTMLDocumentParser : : adjust_foreign_attributes ( HTMLToken & token )
{
2020-10-12 01:51:28 +01:00
token . adjust_foreign_attribute ( " xlink:actuate " , " xlink " , " actuate " , Namespace : : XLink ) ;
token . adjust_foreign_attribute ( " xlink:arcrole " , " xlink " , " arcrole " , Namespace : : XLink ) ;
token . adjust_foreign_attribute ( " xlink:href " , " xlink " , " href " , Namespace : : XLink ) ;
token . adjust_foreign_attribute ( " xlink:role " , " xlink " , " role " , Namespace : : XLink ) ;
token . adjust_foreign_attribute ( " xlink:show " , " xlink " , " show " , Namespace : : XLink ) ;
token . adjust_foreign_attribute ( " xlink:title " , " xlink " , " title " , Namespace : : XLink ) ;
token . adjust_foreign_attribute ( " xlink:type " , " xlink " , " type " , Namespace : : XLink ) ;
token . adjust_foreign_attribute ( " xml:lang " , " xml " , " lang " , Namespace : : XML ) ;
token . adjust_foreign_attribute ( " xml:space " , " xml " , " space " , Namespace : : XML ) ;
token . adjust_foreign_attribute ( " xmlns " , " " , " xmlns " , Namespace : : XMLNS ) ;
token . adjust_foreign_attribute ( " xmlns:xlink " , " xmlns " , " xlink " , Namespace : : XMLNS ) ;
2020-05-24 00:14:23 +02:00
}
2020-05-24 22:00:46 +02:00
void HTMLDocumentParser : : increment_script_nesting_level ( )
{
+ + m_script_nesting_level ;
}
void HTMLDocumentParser : : decrement_script_nesting_level ( )
{
ASSERT ( m_script_nesting_level ) ;
- - m_script_nesting_level ;
}
2020-05-24 20:24:43 +02:00
void HTMLDocumentParser : : handle_text ( HTMLToken & token )
2020-05-24 00:14:23 +02:00
{
2020-05-24 20:24:43 +02:00
if ( token . is_character ( ) ) {
2020-08-05 16:31:20 -04:00
insert_character ( token . code_point ( ) ) ;
2020-05-24 20:24:43 +02:00
return ;
}
2020-06-06 16:32:27 +02:00
if ( token . is_end_of_file ( ) ) {
PARSE_ERROR ( ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) = = HTML : : TagNames : : script )
2020-07-26 17:16:18 +02:00
downcast < HTMLScriptElement > ( current_node ( ) ) . set_already_started ( { } , true ) ;
2020-06-06 16:32:27 +02:00
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = m_original_insertion_mode ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : script ) {
2020-06-23 16:38:44 +02:00
// Make sure the <script> element has up-to-date text content before preparing the script.
flush_character_insertions ( ) ;
2020-07-26 17:16:18 +02:00
NonnullRefPtr < HTMLScriptElement > script = downcast < HTMLScriptElement > ( current_node ( ) ) ;
2020-05-24 22:00:46 +02:00
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = m_original_insertion_mode ;
// FIXME: Handle tokenizer insertion point stuff here.
increment_script_nesting_level ( ) ;
script - > prepare_script ( { } ) ;
decrement_script_nesting_level ( ) ;
if ( script_nesting_level ( ) = = 0 )
m_parser_pause_flag = false ;
// FIXME: Handle tokenizer insertion point stuff here too.
2020-05-27 23:01:04 +02:00
while ( document ( ) . pending_parsing_blocking_script ( ) ) {
if ( script_nesting_level ( ) ! = 0 ) {
m_parser_pause_flag = true ;
// FIXME: Abort the processing of any nested invocations of the tokenizer,
// yielding control back to the caller. (Tokenization will resume when
// the caller returns to the "outer" tree construction stage.)
TODO ( ) ;
} else {
auto the_script = document ( ) . take_pending_parsing_blocking_script ( { } ) ;
m_tokenizer . set_blocked ( true ) ;
// FIXME: If the parser's Document has a style sheet that is blocking scripts
// or the script's "ready to be parser-executed" flag is not set:
// spin the event loop until the parser's Document has no style sheet
// that is blocking scripts and the script's "ready to be parser-executed"
// flag is set.
2020-06-15 18:37:48 +02:00
if ( the_script - > failed_to_load ( ) )
return ;
2020-05-27 23:01:04 +02:00
ASSERT ( the_script - > is_ready_to_be_parser_executed ( ) ) ;
if ( m_aborted )
return ;
m_tokenizer . set_blocked ( false ) ;
// FIXME: Handle tokenizer insertion point stuff here too.
ASSERT ( script_nesting_level ( ) = = 0 ) ;
increment_script_nesting_level ( ) ;
the_script - > execute_script ( ) ;
decrement_script_nesting_level ( ) ;
ASSERT ( script_nesting_level ( ) = = 0 ) ;
m_parser_pause_flag = false ;
// FIXME: Handle tokenizer insertion point stuff here too.
}
}
2020-05-24 22:00:46 +02:00
return ;
2020-05-24 20:24:43 +02:00
}
2020-05-27 23:01:04 +02:00
2020-05-24 20:24:43 +02:00
if ( token . is_end_tag ( ) ) {
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = m_original_insertion_mode ;
return ;
}
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:14:23 +02:00
}
2020-05-28 00:27:46 +02:00
void HTMLDocumentParser : : clear_the_stack_back_to_a_table_context ( )
{
2020-07-23 18:18:13 +02:00
while ( ! current_node ( ) . local_name ( ) . is_one_of ( HTML : : TagNames : : table , HTML : : TagNames : : template_ , HTML : : TagNames : : html ) )
2020-05-28 00:27:46 +02:00
m_stack_of_open_elements . pop ( ) ;
2020-07-21 19:03:05 +01:00
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) = = HTML : : TagNames : : html )
2020-07-21 19:03:05 +01:00
ASSERT ( m_parsing_fragment ) ;
2020-05-28 00:27:46 +02:00
}
void HTMLDocumentParser : : clear_the_stack_back_to_a_table_row_context ( )
{
2020-07-23 18:18:13 +02:00
while ( ! current_node ( ) . local_name ( ) . is_one_of ( HTML : : TagNames : : tr , HTML : : TagNames : : template_ , HTML : : TagNames : : html ) )
2020-05-28 00:27:46 +02:00
m_stack_of_open_elements . pop ( ) ;
2020-07-21 19:03:05 +01:00
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) = = HTML : : TagNames : : html )
2020-07-21 19:03:05 +01:00
ASSERT ( m_parsing_fragment ) ;
2020-05-28 00:27:46 +02:00
}
void HTMLDocumentParser : : clear_the_stack_back_to_a_table_body_context ( )
{
2020-07-23 18:18:13 +02:00
while ( ! current_node ( ) . local_name ( ) . is_one_of ( HTML : : TagNames : : tbody , HTML : : TagNames : : tfoot , HTML : : TagNames : : thead , HTML : : TagNames : : template_ , HTML : : TagNames : : html ) )
2020-05-28 00:27:46 +02:00
m_stack_of_open_elements . pop ( ) ;
2020-07-21 19:03:05 +01:00
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) = = HTML : : TagNames : : html )
2020-07-21 19:03:05 +01:00
ASSERT ( m_parsing_fragment ) ;
2020-05-28 00:27:46 +02:00
}
void HTMLDocumentParser : : handle_in_row ( HTMLToken & token )
{
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : th , HTML : : TagNames : : td ) ) {
2020-05-28 00:27:46 +02:00
clear_the_stack_back_to_a_table_row_context ( ) ;
insert_html_element ( token ) ;
m_insertion_mode = InsertionMode : : InCell ;
m_list_of_active_formatting_elements . add_marker ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : tr ) {
if ( ! m_stack_of_open_elements . has_in_table_scope ( HTML : : TagNames : : tr ) ) {
2020-05-28 00:27:46 +02:00
PARSE_ERROR ( ) ;
return ;
}
clear_the_stack_back_to_a_table_row_context ( ) ;
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = InsertionMode : : InTableBody ;
return ;
}
2020-06-21 17:49:02 +02:00
if ( ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : caption , HTML : : TagNames : : col , HTML : : TagNames : : colgroup , HTML : : TagNames : : tbody , HTML : : TagNames : : tfoot , HTML : : TagNames : : thead , HTML : : TagNames : : tr ) )
| | ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : table ) ) {
if ( ! m_stack_of_open_elements . has_in_table_scope ( HTML : : TagNames : : tr ) ) {
2020-05-30 13:07:47 +02:00
PARSE_ERROR ( ) ;
return ;
}
clear_the_stack_back_to_a_table_row_context ( ) ;
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = InsertionMode : : InTableBody ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : tbody , HTML : : TagNames : : tfoot , HTML : : TagNames : : thead ) ) {
2020-05-30 13:07:47 +02:00
if ( ! m_stack_of_open_elements . has_in_table_scope ( token . tag_name ( ) ) ) {
PARSE_ERROR ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( ! m_stack_of_open_elements . has_in_table_scope ( HTML : : TagNames : : tr ) ) {
2020-05-30 13:07:47 +02:00
return ;
}
clear_the_stack_back_to_a_table_row_context ( ) ;
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = InsertionMode : : InTableBody ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : body , HTML : : TagNames : : caption , HTML : : TagNames : : col , HTML : : TagNames : : colgroup , HTML : : TagNames : : html , HTML : : TagNames : : td , HTML : : TagNames : : th ) ) {
2020-05-30 13:07:47 +02:00
PARSE_ERROR ( ) ;
return ;
}
process_using_the_rules_for ( InsertionMode : : InTable , token ) ;
2020-05-28 00:27:46 +02:00
}
2020-05-28 11:45:40 +02:00
void HTMLDocumentParser : : close_the_cell ( )
{
generate_implied_end_tags ( ) ;
2020-07-23 18:18:13 +02:00
if ( ! current_node ( ) . local_name ( ) . is_one_of ( HTML : : TagNames : : td , HTML : : TagNames : : th ) ) {
2020-05-28 11:45:40 +02:00
PARSE_ERROR ( ) ;
}
2020-07-23 18:18:13 +02:00
while ( ! current_node ( ) . local_name ( ) . is_one_of ( HTML : : TagNames : : td , HTML : : TagNames : : th ) )
2020-05-28 11:45:40 +02:00
m_stack_of_open_elements . pop ( ) ;
m_stack_of_open_elements . pop ( ) ;
m_list_of_active_formatting_elements . clear_up_to_the_last_marker ( ) ;
m_insertion_mode = InsertionMode : : InRow ;
}
2020-05-28 00:27:46 +02:00
void HTMLDocumentParser : : handle_in_cell ( HTMLToken & token )
{
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : td , HTML : : TagNames : : th ) ) {
2020-05-28 00:27:46 +02:00
if ( ! m_stack_of_open_elements . has_in_table_scope ( token . tag_name ( ) ) ) {
PARSE_ERROR ( ) ;
return ;
}
generate_implied_end_tags ( ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = token . tag_name ( ) ) {
2020-05-28 00:27:46 +02:00
PARSE_ERROR ( ) ;
}
2020-05-28 18:20:55 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( token . tag_name ( ) ) ;
2020-05-28 00:27:46 +02:00
m_list_of_active_formatting_elements . clear_up_to_the_last_marker ( ) ;
m_insertion_mode = InsertionMode : : InRow ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : caption , HTML : : TagNames : : col , HTML : : TagNames : : colgroup , HTML : : TagNames : : tbody , HTML : : TagNames : : td , HTML : : TagNames : : tfoot , HTML : : TagNames : : th , HTML : : TagNames : : thead , HTML : : TagNames : : tr ) ) {
2020-07-21 19:03:05 +01:00
if ( ! m_stack_of_open_elements . has_in_table_scope ( HTML : : TagNames : : td ) & & ! m_stack_of_open_elements . has_in_table_scope ( HTML : : TagNames : : th ) ) {
ASSERT ( m_parsing_fragment ) ;
2020-05-28 11:45:40 +02:00
PARSE_ERROR ( ) ;
return ;
}
close_the_cell ( ) ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
2020-05-28 00:27:46 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : body , HTML : : TagNames : : caption , HTML : : TagNames : : col , HTML : : TagNames : : colgroup , HTML : : TagNames : : html ) ) {
2020-05-28 00:27:46 +02:00
PARSE_ERROR ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : table , HTML : : TagNames : : tbody , HTML : : TagNames : : tfoot , HTML : : TagNames : : thead , HTML : : TagNames : : tr ) ) {
2020-06-21 17:49:02 +02:00
if ( ! m_stack_of_open_elements . has_in_table_scope ( token . tag_name ( ) ) ) {
2020-05-30 13:07:47 +02:00
PARSE_ERROR ( ) ;
return ;
}
close_the_cell ( ) ;
// Reprocess the token.
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
2020-05-28 00:27:46 +02:00
}
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
}
2020-05-30 17:57:41 +02:00
void HTMLDocumentParser : : handle_in_table_text ( HTMLToken & token )
{
if ( token . is_character ( ) ) {
2020-08-05 16:31:20 -04:00
if ( token . code_point ( ) = = 0 ) {
2020-05-30 17:57:41 +02:00
PARSE_ERROR ( ) ;
return ;
}
m_pending_table_character_tokens . append ( token ) ;
return ;
}
for ( auto & pending_token : m_pending_table_character_tokens ) {
ASSERT ( pending_token . is_character ( ) ) ;
if ( ! pending_token . is_parser_whitespace ( ) ) {
2020-08-30 20:10:19 +02:00
// If any of the tokens in the pending table character tokens list
2020-05-30 17:57:41 +02:00
// are character tokens that are not ASCII whitespace, then this is a parse error:
// reprocess the character tokens in the pending table character tokens list using
// the rules given in the "anything else" entry in the "in table" insertion mode.
2020-08-30 20:10:19 +02:00
PARSE_ERROR ( ) ;
m_foster_parenting = true ;
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
m_foster_parenting = false ;
return ;
2020-05-30 17:57:41 +02:00
}
}
for ( auto & pending_token : m_pending_table_character_tokens ) {
2020-08-05 16:31:20 -04:00
insert_character ( pending_token . code_point ( ) ) ;
2020-05-30 17:57:41 +02:00
}
m_insertion_mode = m_original_insertion_mode ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
}
2020-05-28 00:27:46 +02:00
void HTMLDocumentParser : : handle_in_table_body ( HTMLToken & token )
{
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : tr ) {
2020-05-28 00:27:46 +02:00
clear_the_stack_back_to_a_table_body_context ( ) ;
insert_html_element ( token ) ;
m_insertion_mode = InsertionMode : : InRow ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : th , HTML : : TagNames : : td ) ) {
2020-06-01 22:08:48 +02:00
PARSE_ERROR ( ) ;
clear_the_stack_back_to_a_table_body_context ( ) ;
2020-07-23 17:30:03 +02:00
insert_html_element ( HTMLToken : : make_start_tag ( HTML : : TagNames : : tr ) ) ;
2020-06-01 22:08:48 +02:00
m_insertion_mode = InsertionMode : : InRow ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
2020-05-30 17:57:41 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : tbody , HTML : : TagNames : : tfoot , HTML : : TagNames : : thead ) ) {
2020-05-30 19:58:52 +02:00
if ( ! m_stack_of_open_elements . has_in_table_scope ( token . tag_name ( ) ) ) {
PARSE_ERROR ( ) ;
return ;
}
clear_the_stack_back_to_a_table_body_context ( ) ;
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = InsertionMode : : InTable ;
return ;
2020-05-30 17:57:41 +02:00
}
2020-06-07 23:53:16 +02:00
if ( ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : caption , HTML : : TagNames : : col , HTML : : TagNames : : colgroup , HTML : : TagNames : : tbody , HTML : : TagNames : : tfoot , HTML : : TagNames : : thead ) )
| | ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : table ) ) {
2020-06-21 06:58:03 +02:00
if ( ! m_stack_of_open_elements . has_in_table_scope ( HTML : : TagNames : : tbody )
2020-06-21 17:30:18 +02:00
& & ! m_stack_of_open_elements . has_in_table_scope ( HTML : : TagNames : : thead )
& & ! m_stack_of_open_elements . has_in_table_scope ( HTML : : TagNames : : tfoot ) ) {
2020-06-21 06:58:03 +02:00
PARSE_ERROR ( ) ;
return ;
}
2020-05-28 00:27:46 +02:00
clear_the_stack_back_to_a_table_body_context ( ) ;
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = InsertionMode : : InTable ;
process_using_the_rules_for ( InsertionMode : : InTable , token ) ;
return ;
}
2020-05-30 17:57:41 +02:00
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : body , HTML : : TagNames : : caption , HTML : : TagNames : : col , HTML : : TagNames : : colgroup , HTML : : TagNames : : html , HTML : : TagNames : : td , HTML : : TagNames : : th , HTML : : TagNames : : tr ) ) {
2020-05-30 17:57:41 +02:00
PARSE_ERROR ( ) ;
return ;
}
process_using_the_rules_for ( InsertionMode : : InTable , token ) ;
2020-05-28 00:27:46 +02:00
}
2020-05-25 20:30:34 +02:00
void HTMLDocumentParser : : handle_in_table ( HTMLToken & token )
{
2020-07-23 18:18:13 +02:00
if ( token . is_character ( ) & & current_node ( ) . local_name ( ) . is_one_of ( HTML : : TagNames : : table , HTML : : TagNames : : tbody , HTML : : TagNames : : tfoot , HTML : : TagNames : : thead , HTML : : TagNames : : tr ) ) {
2020-05-30 17:57:41 +02:00
m_pending_table_character_tokens . clear ( ) ;
m_original_insertion_mode = m_insertion_mode ;
m_insertion_mode = InsertionMode : : InTableText ;
process_using_the_rules_for ( InsertionMode : : InTableText , token ) ;
return ;
2020-05-25 20:30:34 +02:00
}
if ( token . is_comment ( ) ) {
insert_comment ( token ) ;
return ;
}
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : caption ) {
2020-06-13 05:09:54 +01:00
clear_the_stack_back_to_a_table_context ( ) ;
m_list_of_active_formatting_elements . add_marker ( ) ;
insert_html_element ( token ) ;
m_insertion_mode = InsertionMode : : InCaption ;
return ;
2020-05-25 20:30:34 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : colgroup ) {
2020-06-13 06:22:18 +01:00
clear_the_stack_back_to_a_table_context ( ) ;
insert_html_element ( token ) ;
m_insertion_mode = InsertionMode : : InColumnGroup ;
return ;
2020-05-25 20:30:34 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : col ) {
2020-06-13 06:22:18 +01:00
clear_the_stack_back_to_a_table_context ( ) ;
2020-07-23 17:30:03 +02:00
insert_html_element ( HTMLToken : : make_start_tag ( HTML : : TagNames : : colgroup ) ) ;
2020-06-13 06:22:18 +01:00
m_insertion_mode = InsertionMode : : InColumnGroup ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
2020-05-25 20:30:34 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : tbody , HTML : : TagNames : : tfoot , HTML : : TagNames : : thead ) ) {
2020-05-30 13:07:47 +02:00
clear_the_stack_back_to_a_table_context ( ) ;
insert_html_element ( token ) ;
m_insertion_mode = InsertionMode : : InTableBody ;
return ;
2020-05-25 20:30:34 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : td , HTML : : TagNames : : th , HTML : : TagNames : : tr ) ) {
2020-05-28 00:27:46 +02:00
clear_the_stack_back_to_a_table_context ( ) ;
2020-07-23 17:30:03 +02:00
insert_html_element ( HTMLToken : : make_start_tag ( HTML : : TagNames : : tbody ) ) ;
2020-05-28 00:27:46 +02:00
m_insertion_mode = InsertionMode : : InTableBody ;
2020-06-13 07:36:09 +01:00
process_using_the_rules_for ( m_insertion_mode , token ) ;
2020-05-28 00:27:46 +02:00
return ;
2020-05-25 20:30:34 +02:00
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : table ) {
2020-05-25 20:30:34 +02:00
PARSE_ERROR ( ) ;
2020-06-13 07:36:09 +01:00
if ( ! m_stack_of_open_elements . has_in_table_scope ( HTML : : TagNames : : table ) )
return ;
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : table ) ;
reset_the_insertion_mode_appropriately ( ) ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
2020-05-25 20:30:34 +02:00
}
2020-06-13 07:36:09 +01:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : table ) {
2020-06-07 23:53:16 +02:00
if ( ! m_stack_of_open_elements . has_in_table_scope ( HTML : : TagNames : : table ) ) {
2020-05-25 20:30:34 +02:00
PARSE_ERROR ( ) ;
return ;
}
2020-05-28 18:09:31 +02:00
2020-06-07 23:53:16 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : table ) ;
2020-05-25 20:30:34 +02:00
reset_the_insertion_mode_appropriately ( ) ;
return ;
}
2020-06-13 07:36:09 +01:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : body , HTML : : TagNames : : caption , HTML : : TagNames : : col , HTML : : TagNames : : colgroup , HTML : : TagNames : : html , HTML : : TagNames : : tbody , HTML : : TagNames : : td , HTML : : TagNames : : tfoot , HTML : : TagNames : : th , HTML : : TagNames : : thead , HTML : : TagNames : : tr ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : style , HTML : : TagNames : : script , HTML : : TagNames : : template_ ) )
2020-06-21 06:58:03 +02:00
| | ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : template_ ) ) {
2020-06-13 07:36:09 +01:00
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : input ) {
auto type_attribute = token . attribute ( HTML : : AttributeNames : : type ) ;
if ( type_attribute . is_null ( ) | | ! type_attribute . equals_ignoring_case ( " hidden " ) ) {
goto AnythingElse ;
}
PARSE_ERROR ( ) ;
insert_html_element ( token ) ;
// FIXME: Is this the correct interpretation of "Pop that input element off the stack of open elements."?
// Because this wording is the first time it's seen in the spec.
// Other times it's worded as: "Immediately pop the current node off the stack of open elements."
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : form ) {
PARSE_ERROR ( ) ;
if ( m_form_element | | m_stack_of_open_elements . contains ( HTML : : TagNames : : template_ ) ) {
return ;
}
2020-07-26 17:16:18 +02:00
m_form_element = downcast < HTMLFormElement > ( * insert_html_element ( token ) ) ;
2020-06-13 07:36:09 +01:00
// FIXME: See previous FIXME, as this is the same situation but for form.
m_stack_of_open_elements . pop ( ) ;
return ;
}
if ( token . is_end_of_file ( ) ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
AnythingElse :
PARSE_ERROR ( ) ;
m_foster_parenting = true ;
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
m_foster_parenting = false ;
2020-05-25 20:30:34 +02:00
}
2020-05-30 19:58:52 +02:00
void HTMLDocumentParser : : handle_in_select_in_table ( HTMLToken & token )
{
2020-06-21 06:58:03 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : caption , HTML : : TagNames : : table , HTML : : TagNames : : tbody , HTML : : TagNames : : tfoot , HTML : : TagNames : : thead , HTML : : TagNames : : tr , HTML : : TagNames : : td , HTML : : TagNames : : th ) ) {
PARSE_ERROR ( ) ;
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : select ) ;
reset_the_insertion_mode_appropriately ( ) ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : caption , HTML : : TagNames : : table , HTML : : TagNames : : tbody , HTML : : TagNames : : tfoot , HTML : : TagNames : : thead , HTML : : TagNames : : tr , HTML : : TagNames : : td , HTML : : TagNames : : th ) ) {
PARSE_ERROR ( ) ;
if ( ! m_stack_of_open_elements . has_in_table_scope ( token . tag_name ( ) ) )
return ;
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : select ) ;
reset_the_insertion_mode_appropriately ( ) ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
process_using_the_rules_for ( InsertionMode : : InSelect , token ) ;
2020-05-30 19:58:52 +02:00
}
void HTMLDocumentParser : : handle_in_select ( HTMLToken & token )
{
if ( token . is_character ( ) ) {
2020-08-05 16:31:20 -04:00
if ( token . code_point ( ) = = 0 ) {
2020-05-30 19:58:52 +02:00
PARSE_ERROR ( ) ;
return ;
}
2020-08-05 16:31:20 -04:00
insert_character ( token . code_point ( ) ) ;
2020-05-30 19:58:52 +02:00
return ;
}
if ( token . is_comment ( ) ) {
insert_comment ( token ) ;
return ;
}
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) {
2020-05-30 19:58:52 +02:00
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : option ) {
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) = = HTML : : TagNames : : option ) {
2020-05-30 19:58:52 +02:00
m_stack_of_open_elements . pop ( ) ;
}
insert_html_element ( token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : optgroup ) {
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) = = HTML : : TagNames : : option ) {
2020-05-30 19:58:52 +02:00
m_stack_of_open_elements . pop ( ) ;
}
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) = = HTML : : TagNames : : optgroup ) {
2020-05-30 19:58:52 +02:00
m_stack_of_open_elements . pop ( ) ;
}
insert_html_element ( token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : optgroup ) {
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) = = HTML : : TagNames : : option & & node_before_current_node ( ) . local_name ( ) = = HTML : : TagNames : : optgroup )
2020-05-30 19:58:52 +02:00
m_stack_of_open_elements . pop ( ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) = = HTML : : TagNames : : optgroup ) {
2020-05-30 19:58:52 +02:00
m_stack_of_open_elements . pop ( ) ;
} else {
PARSE_ERROR ( ) ;
return ;
}
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : option ) {
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) = = HTML : : TagNames : : option ) {
2020-05-30 19:58:52 +02:00
m_stack_of_open_elements . pop ( ) ;
} else {
PARSE_ERROR ( ) ;
return ;
}
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : select ) {
2020-06-15 19:57:20 +02:00
if ( ! m_stack_of_open_elements . has_in_select_scope ( HTML : : TagNames : : select ) ) {
2020-07-21 19:03:05 +01:00
ASSERT ( m_parsing_fragment ) ;
2020-05-30 19:58:52 +02:00
PARSE_ERROR ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : select ) ;
2020-05-30 19:58:52 +02:00
reset_the_insertion_mode_appropriately ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : select ) {
2020-05-30 19:58:52 +02:00
PARSE_ERROR ( ) ;
2020-07-21 19:03:05 +01:00
if ( ! m_stack_of_open_elements . has_in_select_scope ( HTML : : TagNames : : select ) ) {
ASSERT ( m_parsing_fragment ) ;
2020-05-30 19:58:52 +02:00
return ;
2020-07-21 19:03:05 +01:00
}
2020-05-30 19:58:52 +02:00
2020-06-07 23:53:16 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : select ) ;
2020-05-30 19:58:52 +02:00
reset_the_insertion_mode_appropriately ( ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : input , HTML : : TagNames : : keygen , HTML : : TagNames : : textarea ) ) {
2020-05-30 19:58:52 +02:00
PARSE_ERROR ( ) ;
2020-06-07 23:53:16 +02:00
if ( ! m_stack_of_open_elements . has_in_select_scope ( HTML : : TagNames : : select ) ) {
2020-07-21 19:03:05 +01:00
ASSERT ( m_parsing_fragment ) ;
2020-05-30 19:58:52 +02:00
return ;
}
2020-06-07 23:53:16 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : select ) ;
2020-05-30 19:58:52 +02:00
reset_the_insertion_mode_appropriately ( ) ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : script , HTML : : TagNames : : template_ ) ) {
2020-05-30 19:58:52 +02:00
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
}
2020-06-07 23:53:16 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : template_ ) {
2020-05-30 19:58:52 +02:00
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
}
if ( token . is_end_of_file ( ) ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
PARSE_ERROR ( ) ;
}
2020-06-13 05:09:54 +01:00
void HTMLDocumentParser : : handle_in_caption ( HTMLToken & token )
{
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : caption ) {
if ( ! m_stack_of_open_elements . has_in_table_scope ( HTML : : TagNames : : caption ) ) {
2020-07-21 19:03:05 +01:00
ASSERT ( m_parsing_fragment ) ;
2020-06-13 05:09:54 +01:00
PARSE_ERROR ( ) ;
return ;
}
generate_implied_end_tags ( ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = HTML : : TagNames : : caption )
2020-06-13 05:09:54 +01:00
PARSE_ERROR ( ) ;
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : caption ) ;
m_list_of_active_formatting_elements . clear_up_to_the_last_marker ( ) ;
m_insertion_mode = InsertionMode : : InTable ;
return ;
}
if ( ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : caption , HTML : : TagNames : : col , HTML : : TagNames : : colgroup , HTML : : TagNames : : tbody , HTML : : TagNames : : td , HTML : : TagNames : : tfoot , HTML : : TagNames : : th , HTML : : TagNames : : thead , HTML : : TagNames : : tr ) )
2020-06-21 06:58:03 +02:00
| | ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : table ) ) {
2020-06-13 05:09:54 +01:00
if ( ! m_stack_of_open_elements . has_in_table_scope ( HTML : : TagNames : : caption ) ) {
2020-07-21 19:03:05 +01:00
ASSERT ( m_parsing_fragment ) ;
2020-06-13 05:09:54 +01:00
PARSE_ERROR ( ) ;
return ;
}
generate_implied_end_tags ( ) ;
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = HTML : : TagNames : : caption )
2020-06-13 05:09:54 +01:00
PARSE_ERROR ( ) ;
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : caption ) ;
m_list_of_active_formatting_elements . clear_up_to_the_last_marker ( ) ;
m_insertion_mode = InsertionMode : : InTable ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : body , HTML : : TagNames : : col , HTML : : TagNames : : colgroup , HTML : : TagNames : : html , HTML : : TagNames : : tbody , HTML : : TagNames : : td , HTML : : TagNames : : tfoot , HTML : : TagNames : : th , HTML : : TagNames : : thead , HTML : : TagNames : : tr ) ) {
PARSE_ERROR ( ) ;
return ;
}
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
}
2020-06-13 06:22:18 +01:00
void HTMLDocumentParser : : handle_in_column_group ( HTMLToken & token )
{
if ( token . is_character ( ) & & token . is_parser_whitespace ( ) ) {
2020-08-05 16:31:20 -04:00
insert_character ( token . code_point ( ) ) ;
2020-06-13 06:22:18 +01:00
return ;
}
if ( token . is_comment ( ) ) {
insert_comment ( token ) ;
return ;
}
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : col ) {
insert_html_element ( token ) ;
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : colgroup ) {
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = HTML : : TagNames : : colgroup ) {
2020-06-13 06:22:18 +01:00
PARSE_ERROR ( ) ;
return ;
}
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = InsertionMode : : InTable ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : col ) {
PARSE_ERROR ( ) ;
return ;
}
if ( ( token . is_start_tag ( ) | | token . is_end_tag ( ) ) & & token . tag_name ( ) = = HTML : : TagNames : : template_ ) {
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
}
if ( token . is_end_of_file ( ) ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
2020-07-23 18:18:13 +02:00
if ( current_node ( ) . local_name ( ) ! = HTML : : TagNames : : colgroup ) {
2020-06-13 06:22:18 +01:00
PARSE_ERROR ( ) ;
return ;
}
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = InsertionMode : : InTable ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
}
2020-06-21 06:58:03 +02:00
void HTMLDocumentParser : : handle_in_template ( HTMLToken & token )
{
if ( token . is_character ( ) | | token . is_comment ( ) | | token . is_doctype ( ) ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : base , HTML : : TagNames : : basefont , HTML : : TagNames : : bgsound , HTML : : TagNames : : link , HTML : : TagNames : : meta , HTML : : TagNames : : noframes , HTML : : TagNames : : script , HTML : : TagNames : : style , HTML : : TagNames : : template_ , HTML : : TagNames : : title ) ) {
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : template_ ) {
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : caption , HTML : : TagNames : : colgroup , HTML : : TagNames : : tbody , HTML : : TagNames : : tfoot , HTML : : TagNames : : thead ) ) {
m_stack_of_template_insertion_modes . take_last ( ) ;
m_stack_of_template_insertion_modes . append ( InsertionMode : : InTable ) ;
m_insertion_mode = InsertionMode : : InTable ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : col ) {
m_stack_of_template_insertion_modes . take_last ( ) ;
m_stack_of_template_insertion_modes . append ( InsertionMode : : InColumnGroup ) ;
m_insertion_mode = InsertionMode : : InColumnGroup ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : tr ) {
m_stack_of_template_insertion_modes . take_last ( ) ;
m_stack_of_template_insertion_modes . append ( InsertionMode : : InTableBody ) ;
m_insertion_mode = InsertionMode : : InTableBody ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : td , HTML : : TagNames : : th ) ) {
m_stack_of_template_insertion_modes . take_last ( ) ;
m_stack_of_template_insertion_modes . append ( InsertionMode : : InRow ) ;
m_insertion_mode = InsertionMode : : InRow ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
if ( token . is_start_tag ( ) ) {
m_stack_of_template_insertion_modes . take_last ( ) ;
m_stack_of_template_insertion_modes . append ( InsertionMode : : InBody ) ;
m_insertion_mode = InsertionMode : : InBody ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
if ( token . is_end_tag ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( token . is_end_of_file ( ) ) {
if ( ! m_stack_of_open_elements . contains ( HTML : : TagNames : : template_ ) ) {
2020-07-21 19:03:05 +01:00
ASSERT ( m_parsing_fragment ) ;
2020-06-21 06:58:03 +02:00
stop_parsing ( ) ;
2020-07-21 19:03:05 +01:00
return ;
2020-06-21 06:58:03 +02:00
}
2020-07-21 19:03:05 +01:00
PARSE_ERROR ( ) ;
2020-06-21 06:58:03 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( HTML : : TagNames : : template_ ) ;
m_list_of_active_formatting_elements . clear_up_to_the_last_marker ( ) ;
m_stack_of_template_insertion_modes . take_last ( ) ;
reset_the_insertion_mode_appropriately ( ) ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
}
}
void HTMLDocumentParser : : handle_in_frameset ( HTMLToken & token )
{
if ( token . is_character ( ) & & token . is_parser_whitespace ( ) ) {
2020-08-05 16:31:20 -04:00
insert_character ( token . code_point ( ) ) ;
2020-06-21 06:58:03 +02:00
return ;
}
if ( token . is_comment ( ) ) {
insert_comment ( token ) ;
return ;
}
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : frameset ) {
insert_html_element ( token ) ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : frameset ) {
// FIXME: If the current node is the root html element, then this is a parse error; ignore the token. (fragment case)
m_stack_of_open_elements . pop ( ) ;
2020-07-23 18:18:13 +02:00
if ( ! m_parsing_fragment & & current_node ( ) . local_name ( ) ! = HTML : : TagNames : : frameset ) {
2020-06-21 06:58:03 +02:00
m_insertion_mode = InsertionMode : : AfterFrameset ;
}
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : frame ) {
insert_html_element ( token ) ;
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : noframes ) {
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
}
if ( token . is_end_of_file ( ) ) {
//FIXME: If the current node is not the root html element, then this is a parse error.
stop_parsing ( ) ;
return ;
}
PARSE_ERROR ( ) ;
}
void HTMLDocumentParser : : handle_after_frameset ( HTMLToken & token )
{
if ( token . is_character ( ) & & token . is_parser_whitespace ( ) ) {
2020-08-05 16:31:20 -04:00
insert_character ( token . code_point ( ) ) ;
2020-06-21 06:58:03 +02:00
return ;
}
if ( token . is_comment ( ) ) {
insert_comment ( token ) ;
return ;
}
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) {
m_insertion_mode = InsertionMode : : AfterAfterFrameset ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : noframes ) {
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
}
if ( token . is_end_of_file ( ) ) {
stop_parsing ( ) ;
return ;
}
PARSE_ERROR ( ) ;
}
void HTMLDocumentParser : : handle_after_after_frameset ( HTMLToken & token )
{
if ( token . is_comment ( ) ) {
2020-07-26 19:37:56 +02:00
auto comment = adopt ( * new DOM : : Comment ( document ( ) , token . m_comment_or_character . data . to_string ( ) ) ) ;
2020-06-21 06:58:03 +02:00
document ( ) . append_child ( move ( comment ) ) ;
return ;
}
if ( token . is_doctype ( ) | | token . is_parser_whitespace ( ) | | ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : html ) ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
if ( token . is_end_of_file ( ) ) {
stop_parsing ( ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : noframes ) {
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
}
PARSE_ERROR ( ) ;
}
2020-10-12 01:51:28 +01:00
void HTMLDocumentParser : : process_using_the_rules_for_foreign_content ( HTMLToken & token )
{
if ( token . is_character ( ) ) {
if ( token . code_point ( ) = = 0 ) {
PARSE_ERROR ( ) ;
insert_character ( 0xFFFD ) ;
return ;
}
if ( token . is_parser_whitespace ( ) ) {
insert_character ( token . code_point ( ) ) ;
return ;
}
insert_character ( token . code_point ( ) ) ;
m_frameset_ok = false ;
return ;
}
if ( token . is_comment ( ) ) {
insert_comment ( token ) ;
return ;
}
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( HTML : : TagNames : : b , HTML : : TagNames : : big , HTML : : TagNames : : blockquote , HTML : : TagNames : : body , HTML : : TagNames : : br , HTML : : TagNames : : center , HTML : : TagNames : : code , HTML : : TagNames : : dd , HTML : : TagNames : : div , HTML : : TagNames : : dl , HTML : : TagNames : : dt , HTML : : TagNames : : em , HTML : : TagNames : : embed , HTML : : TagNames : : h1 , HTML : : TagNames : : h2 , HTML : : TagNames : : h3 , HTML : : TagNames : : h4 , HTML : : TagNames : : h5 , HTML : : TagNames : : h6 , HTML : : TagNames : : head , HTML : : TagNames : : hr , HTML : : TagNames : : i , HTML : : TagNames : : img , HTML : : TagNames : : li , HTML : : TagNames : : listing , HTML : : TagNames : : menu , HTML : : TagNames : : meta , HTML : : TagNames : : nobr , HTML : : TagNames : : ol , HTML : : TagNames : : p , HTML : : TagNames : : pre , HTML : : TagNames : : ruby , HTML : : TagNames : : s , HTML : : TagNames : : small , HTML : : TagNames : : span , HTML : : TagNames : : strong , HTML : : TagNames : : strike , HTML : : TagNames : : sub , HTML : : TagNames : : sup , HTML : : TagNames : : table , HTML : : TagNames : : tt , HTML : : TagNames : : u , HTML : : TagNames : : ul , HTML : : TagNames : : var ) )
| | ( token . is_start_tag ( ) & & token . tag_name ( ) = = HTML : : TagNames : : font & & ( token . has_attribute ( HTML : : AttributeNames : : color ) | | token . has_attribute ( HTML : : AttributeNames : : face ) | | token . has_attribute ( HTML : : AttributeNames : : size ) ) ) ) {
PARSE_ERROR ( ) ;
if ( m_parsing_fragment ) {
goto AnyOtherStartTag ;
}
TODO ( ) ;
}
if ( token . is_start_tag ( ) ) {
AnyOtherStartTag :
if ( adjusted_current_node ( ) . namespace_ ( ) = = Namespace : : MathML ) {
adjust_mathml_attributes ( token ) ;
} else if ( adjusted_current_node ( ) . namespace_ ( ) = = Namespace : : SVG ) {
adjust_svg_tag_names ( token ) ;
adjust_svg_attributes ( token ) ;
}
adjust_foreign_attributes ( token ) ;
insert_foreign_element ( token , adjusted_current_node ( ) . namespace_ ( ) ) ;
if ( token . is_self_closing ( ) ) {
if ( token . tag_name ( ) = = SVG : : TagNames : : script & & current_node ( ) . namespace_ ( ) = = Namespace : : SVG ) {
token . acknowledge_self_closing_flag_if_set ( ) ;
goto ScriptEndTag ;
}
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
}
return ;
}
if ( token . is_end_tag ( ) & & current_node ( ) . namespace_ ( ) = = Namespace : : SVG & & current_node ( ) . tag_name ( ) = = SVG : : TagNames : : script ) {
ScriptEndTag :
m_stack_of_open_elements . pop ( ) ;
TODO ( ) ;
}
if ( token . is_end_tag ( ) ) {
auto & node = current_node ( ) ;
// FIXME: Not sure if this is the correct to_lowercase, as the specification says "to ASCII lowercase"
if ( node . tag_name ( ) . to_lowercase ( ) ! = token . tag_name ( ) )
PARSE_ERROR ( ) ;
while ( true ) {
if ( & node = = & m_stack_of_open_elements . first ( ) ) {
ASSERT ( m_parsing_fragment ) ;
return ;
}
// FIXME: See the above FIXME
if ( node . tag_name ( ) . to_lowercase ( ) = = token . tag_name ( ) ) {
while ( & current_node ( ) ! = & node )
m_stack_of_open_elements . pop ( ) ;
m_stack_of_open_elements . pop ( ) ;
return ;
}
TODO ( ) ;
}
}
ASSERT_NOT_REACHED ( ) ;
}
2020-07-21 19:03:05 +01:00
void HTMLDocumentParser : : reset_the_insertion_mode_appropriately ( )
2020-05-25 20:30:34 +02:00
{
2020-05-28 00:26:33 +02:00
for ( ssize_t i = m_stack_of_open_elements . elements ( ) . size ( ) - 1 ; i > = 0 ; - - i ) {
2020-07-21 19:03:05 +01:00
bool last = i = = 0 ;
2020-06-25 23:42:08 +02:00
// NOTE: When parsing fragments, we substitute the context element for the root of the stack of open elements.
2020-07-26 19:37:56 +02:00
RefPtr < DOM : : Element > node ;
2020-07-21 19:03:05 +01:00
if ( last & & m_parsing_fragment ) {
node = m_context_element ;
2020-06-25 23:42:08 +02:00
} else {
node = m_stack_of_open_elements . elements ( ) . at ( i ) ;
}
2020-05-28 00:26:33 +02:00
2020-07-23 18:18:13 +02:00
if ( node - > local_name ( ) = = HTML : : TagNames : : select ) {
2020-05-28 00:26:33 +02:00
TODO ( ) ;
}
2020-07-23 18:18:13 +02:00
if ( ! last & & node - > local_name ( ) . is_one_of ( HTML : : TagNames : : td , HTML : : TagNames : : th ) ) {
2020-05-28 00:26:33 +02:00
m_insertion_mode = InsertionMode : : InCell ;
return ;
}
2020-07-23 18:18:13 +02:00
if ( node - > local_name ( ) = = HTML : : TagNames : : tr ) {
2020-05-28 00:26:33 +02:00
m_insertion_mode = InsertionMode : : InRow ;
return ;
}
2020-07-23 18:18:13 +02:00
if ( node - > local_name ( ) . is_one_of ( HTML : : TagNames : : tbody , HTML : : TagNames : : thead , HTML : : TagNames : : tfoot ) ) {
2020-05-28 00:26:33 +02:00
m_insertion_mode = InsertionMode : : InTableBody ;
return ;
}
2020-07-23 18:18:13 +02:00
if ( node - > local_name ( ) = = HTML : : TagNames : : caption ) {
2020-05-28 00:26:33 +02:00
m_insertion_mode = InsertionMode : : InCaption ;
return ;
}
2020-07-23 18:18:13 +02:00
if ( node - > local_name ( ) = = HTML : : TagNames : : colgroup ) {
2020-05-28 00:26:33 +02:00
m_insertion_mode = InsertionMode : : InColumnGroup ;
return ;
}
2020-07-23 18:18:13 +02:00
if ( node - > local_name ( ) = = HTML : : TagNames : : table ) {
2020-05-28 00:26:33 +02:00
m_insertion_mode = InsertionMode : : InTable ;
return ;
}
2020-07-23 18:18:13 +02:00
if ( node - > local_name ( ) = = HTML : : TagNames : : template_ ) {
2020-08-19 22:30:33 +01:00
m_insertion_mode = m_stack_of_template_insertion_modes . last ( ) ;
return ;
2020-05-28 00:26:33 +02:00
}
2020-07-23 18:18:13 +02:00
if ( ! last & & node - > local_name ( ) = = HTML : : TagNames : : head ) {
2020-07-21 19:03:05 +01:00
m_insertion_mode = InsertionMode : : InHead ;
return ;
}
2020-07-23 18:18:13 +02:00
if ( node - > local_name ( ) = = HTML : : TagNames : : body ) {
2020-05-28 00:26:33 +02:00
m_insertion_mode = InsertionMode : : InBody ;
return ;
}
2020-07-23 18:18:13 +02:00
if ( node - > local_name ( ) = = HTML : : TagNames : : frameset ) {
2020-07-21 19:03:05 +01:00
ASSERT ( m_parsing_fragment ) ;
2020-05-28 00:26:33 +02:00
m_insertion_mode = InsertionMode : : InFrameset ;
return ;
}
2020-07-23 18:18:13 +02:00
if ( node - > local_name ( ) = = HTML : : TagNames : : html ) {
2020-07-21 19:03:05 +01:00
if ( ! m_head_element ) {
ASSERT ( m_parsing_fragment ) ;
m_insertion_mode = InsertionMode : : BeforeHead ;
return ;
}
m_insertion_mode = InsertionMode : : AfterHead ;
return ;
2020-05-28 00:26:33 +02:00
}
}
2020-07-21 19:03:05 +01:00
ASSERT ( m_parsing_fragment ) ;
2020-05-28 00:26:33 +02:00
m_insertion_mode = InsertionMode : : InBody ;
2020-05-25 20:30:34 +02:00
}
2020-05-24 00:14:23 +02:00
const char * HTMLDocumentParser : : insertion_mode_name ( ) const
{
switch ( m_insertion_mode ) {
# define __ENUMERATE_INSERTION_MODE(mode) \
case InsertionMode : : mode : \
return # mode ;
ENUMERATE_INSERTION_MODES
# undef __ENUMERATE_INSERTION_MODE
}
ASSERT_NOT_REACHED ( ) ;
}
2020-07-26 19:37:56 +02:00
DOM : : Document & HTMLDocumentParser : : document ( )
2020-05-24 00:14:23 +02:00
{
return * m_document ;
}
2020-06-25 23:42:08 +02:00
2020-07-26 19:37:56 +02:00
NonnullRefPtrVector < DOM : : Node > HTMLDocumentParser : : parse_html_fragment ( DOM : : Element & context_element , const StringView & markup )
2020-06-25 23:42:08 +02:00
{
HTMLDocumentParser parser ( markup , " utf-8 " ) ;
2020-07-21 19:03:05 +01:00
parser . m_context_element = context_element ;
2020-06-25 23:42:08 +02:00
parser . m_parsing_fragment = true ;
2020-07-18 21:17:17 +01:00
parser . document ( ) . set_quirks_mode ( context_element . document ( ) . mode ( ) ) ;
2020-06-25 23:42:08 +02:00
2020-07-23 18:18:13 +02:00
if ( context_element . local_name ( ) . is_one_of ( HTML : : TagNames : : title , HTML : : TagNames : : textarea ) ) {
2020-06-25 23:42:08 +02:00
parser . m_tokenizer . switch_to ( { } , HTMLTokenizer : : State : : RCDATA ) ;
2020-07-23 18:18:13 +02:00
} else if ( context_element . local_name ( ) . is_one_of ( HTML : : TagNames : : style , HTML : : TagNames : : xmp , HTML : : TagNames : : iframe , HTML : : TagNames : : noembed , HTML : : TagNames : : noframes ) ) {
2020-06-25 23:42:08 +02:00
parser . m_tokenizer . switch_to ( { } , HTMLTokenizer : : State : : RAWTEXT ) ;
2020-07-23 18:18:13 +02:00
} else if ( context_element . local_name ( ) . is_one_of ( HTML : : TagNames : : script ) ) {
2020-06-25 23:42:08 +02:00
parser . m_tokenizer . switch_to ( { } , HTMLTokenizer : : State : : ScriptData ) ;
2020-07-23 18:18:13 +02:00
} else if ( context_element . local_name ( ) . is_one_of ( HTML : : TagNames : : noscript ) ) {
2020-06-25 23:42:08 +02:00
if ( context_element . document ( ) . is_scripting_enabled ( ) )
parser . m_tokenizer . switch_to ( { } , HTMLTokenizer : : State : : RAWTEXT ) ;
2020-07-23 18:18:13 +02:00
} else if ( context_element . local_name ( ) . is_one_of ( HTML : : TagNames : : plaintext ) ) {
2020-06-25 23:42:08 +02:00
parser . m_tokenizer . switch_to ( { } , HTMLTokenizer : : State : : PLAINTEXT ) ;
}
2020-10-10 02:48:05 +01:00
auto root = create_element ( context_element . document ( ) , HTML : : TagNames : : html , Namespace : : HTML ) ;
2020-06-25 23:42:08 +02:00
parser . document ( ) . append_child ( root ) ;
parser . m_stack_of_open_elements . push ( root ) ;
2020-07-23 18:18:13 +02:00
if ( context_element . local_name ( ) = = HTML : : TagNames : : template_ ) {
2020-08-19 22:30:33 +01:00
parser . m_stack_of_template_insertion_modes . append ( InsertionMode : : InTemplate ) ;
2020-06-25 23:42:08 +02:00
}
// FIXME: Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
2020-07-21 19:03:05 +01:00
parser . reset_the_insertion_mode_appropriately ( ) ;
2020-06-25 23:42:08 +02:00
for ( auto * form_candidate = & context_element ; form_candidate ; form_candidate = form_candidate - > parent_element ( ) ) {
if ( is < HTMLFormElement > ( * form_candidate ) ) {
2020-07-26 17:16:18 +02:00
parser . m_form_element = downcast < HTMLFormElement > ( * form_candidate ) ;
2020-06-25 23:42:08 +02:00
break ;
}
}
parser . run ( context_element . document ( ) . url ( ) ) ;
2020-07-26 19:37:56 +02:00
NonnullRefPtrVector < DOM : : Node > children ;
while ( RefPtr < DOM : : Node > child = root - > first_child ( ) ) {
2020-06-25 23:42:08 +02:00
root - > remove_child ( * child ) ;
context_element . document ( ) . adopt_node ( * child ) ;
children . append ( * child ) ;
}
return children ;
}
2020-05-24 00:14:23 +02:00
}