mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-01-22 09:12:13 -05:00
LibWeb: Add more HTML tokenization states to Swift implementation
This patch adds support for start and end tags, as well as script tag rules.
This commit is contained in:
parent
91de0438fe
commit
d96c7edfb6
Notes:
github-actions[bot]
2024-10-02 07:45:32 +00:00
Author: https://github.com/ADKaster Commit: https://github.com/LadybirdBrowser/ladybird/commit/d96c7edfb66 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1589
3 changed files with 942 additions and 22 deletions
|
@ -76,7 +76,7 @@ struct TestHTMLTokenizerSwift {
|
|||
#expect(token2 == nil)
|
||||
}
|
||||
|
||||
@Test func dataStateTagOpen() {
|
||||
@Test func tagOpenOnly() {
|
||||
guard let tokenizer = HTMLTokenizer(input: "<") else {
|
||||
Issue.record("Failed to create tokenizer for '<'")
|
||||
return
|
||||
|
@ -84,11 +84,14 @@ struct TestHTMLTokenizerSwift {
|
|||
#expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
|
||||
|
||||
let token = tokenizer.nextToken()
|
||||
#expect(token?.type == .EndOfFile)
|
||||
#expect(tokenizer.state == HTMLTokenizer.State.TagOpen)
|
||||
#expect(token?.type == .Character(codePoint: "<"))
|
||||
|
||||
let token2 = tokenizer.nextToken()
|
||||
#expect(token2 == nil)
|
||||
#expect(token2?.type == .EndOfFile)
|
||||
#expect(tokenizer.state == HTMLTokenizer.State.TagOpen)
|
||||
|
||||
let token3 = tokenizer.nextToken()
|
||||
#expect(token3 == nil)
|
||||
}
|
||||
|
||||
@Test func dataStateNulChar() {
|
||||
|
@ -112,4 +115,141 @@ struct TestHTMLTokenizerSwift {
|
|||
|
||||
#expect(tokenizer.state == HTMLTokenizer.State.Data)
|
||||
}
|
||||
|
||||
@Test func scriptTagWithAttributes() {
|
||||
guard let tokenizer = HTMLTokenizer(input: "<script type=\"text/javascript\">") else {
|
||||
Issue.record("Failed to create tokenizer for '<script type=\"text/javascript\">'")
|
||||
return
|
||||
}
|
||||
#expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
|
||||
|
||||
let token = tokenizer.nextToken()
|
||||
#expect(token?.type == .StartTag(tagName: "script", attributes: [HTMLToken.Attribute(localName: "type", value: "text/javascript")]))
|
||||
|
||||
let token2 = tokenizer.nextToken()
|
||||
#expect(token2?.type == .EndOfFile)
|
||||
|
||||
#expect(tokenizer.state == HTMLTokenizer.State.Data)
|
||||
}
|
||||
|
||||
@Test func scriptWithContent() {
|
||||
guard let tokenizer = HTMLTokenizer(input: "<script>var x = 1;</script>") else {
|
||||
Issue.record("Failed to create tokenizer for '<script>var x = 1;</script>'")
|
||||
return
|
||||
}
|
||||
|
||||
let token = tokenizer.nextToken()
|
||||
#expect(token?.type == .StartTag(tagName: "script", attributes: []))
|
||||
|
||||
for codePoint in "var x = 1;" {
|
||||
let token = tokenizer.nextToken()
|
||||
#expect(token?.type == .Character(codePoint: codePoint))
|
||||
}
|
||||
|
||||
let token2 = tokenizer.nextToken()
|
||||
#expect(token2?.type == .EndTag(tagName: "script"))
|
||||
|
||||
let token3 = tokenizer.nextToken()
|
||||
#expect(token3?.type == .EndOfFile)
|
||||
}
|
||||
|
||||
@Test func simpleDivWithContent() {
|
||||
guard let tokenizer = HTMLTokenizer(input: "<div>hi</div>") else {
|
||||
Issue.record("Failed to create tokenizer for '<div>hi</div>'")
|
||||
return
|
||||
}
|
||||
#expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
|
||||
|
||||
let token = tokenizer.nextToken()
|
||||
#expect(token?.type == .StartTag(tagName: "div", attributes: []))
|
||||
|
||||
let token2 = tokenizer.nextToken()
|
||||
#expect(token2?.type == .Character(codePoint: "h"))
|
||||
|
||||
let token3 = tokenizer.nextToken()
|
||||
#expect(token3?.type == .Character(codePoint: "i"))
|
||||
|
||||
let token4 = tokenizer.nextToken()
|
||||
#expect(token4?.type == .EndTag(tagName: "div"))
|
||||
|
||||
let token5 = tokenizer.nextToken()
|
||||
#expect(token5?.type == .EndOfFile)
|
||||
}
|
||||
|
||||
@Test func simpleDivWithContentAndAttributes() {
|
||||
guard let tokenizer = HTMLTokenizer(input: "<div class=\"foo\">hi</div>") else {
|
||||
Issue.record("Failed to create tokenizer for '<div class=\"foo\">hi</div>'")
|
||||
return
|
||||
}
|
||||
#expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
|
||||
|
||||
let token = tokenizer.nextToken()
|
||||
#expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo")]))
|
||||
|
||||
let token2 = tokenizer.nextToken()
|
||||
#expect(token2?.type == .Character(codePoint: "h"))
|
||||
|
||||
let token3 = tokenizer.nextToken()
|
||||
#expect(token3?.type == .Character(codePoint: "i"))
|
||||
|
||||
let token4 = tokenizer.nextToken()
|
||||
#expect(token4?.type == .EndTag(tagName: "div"))
|
||||
|
||||
let token5 = tokenizer.nextToken()
|
||||
#expect(token5?.type == .EndOfFile)
|
||||
}
|
||||
|
||||
@Test func severalDivsWithAttributesAndContent() {
|
||||
// Explicitly use unquoted and single quotes for attribute values
|
||||
guard let tokenizer = HTMLTokenizer(input: "<div class=foo>hi</div><div class='bar'>bye</div>") else {
|
||||
Issue.record("Failed to create tokenizer for '<div class=\"foo\">hi</div><div class=\"bar\">bye</div>'")
|
||||
return
|
||||
}
|
||||
|
||||
let token = tokenizer.nextToken()
|
||||
#expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo")]))
|
||||
|
||||
for codePoint in "hi" {
|
||||
let token = tokenizer.nextToken()
|
||||
#expect(token?.type == .Character(codePoint: codePoint))
|
||||
}
|
||||
|
||||
let token2 = tokenizer.nextToken()
|
||||
#expect(token2?.type == .EndTag(tagName: "div"))
|
||||
|
||||
let token3 = tokenizer.nextToken()
|
||||
#expect(token3?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "bar")]))
|
||||
|
||||
for codePoint in "bye" {
|
||||
let token = tokenizer.nextToken()
|
||||
#expect(token?.type == .Character(codePoint: codePoint))
|
||||
}
|
||||
|
||||
let token4 = tokenizer.nextToken()
|
||||
#expect(token4?.type == .EndTag(tagName: "div"))
|
||||
|
||||
let token5 = tokenizer.nextToken()
|
||||
#expect(token5?.type == .EndOfFile)
|
||||
}
|
||||
|
||||
@Test func startTagWithMultipleAttributes() {
|
||||
guard let tokenizer = HTMLTokenizer(input: "<div class=\"foo\" id=\"bar\">hi</div attr=endTagAttributeWhee>") else {
|
||||
Issue.record("Failed to create tokenizer for '<div class=\"foo\" id=\"bar\">hi</div>'")
|
||||
return
|
||||
}
|
||||
|
||||
let token = tokenizer.nextToken()
|
||||
#expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo"), HTMLToken.Attribute(localName: "id", value: "bar")]))
|
||||
|
||||
for codePoint in "hi" {
|
||||
let token = tokenizer.nextToken()
|
||||
#expect(token?.type == .Character(codePoint: codePoint))
|
||||
}
|
||||
|
||||
let token2 = tokenizer.nextToken()
|
||||
#expect(token2?.type == .EndTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "attr", value: "endTagAttributeWhee")]))
|
||||
|
||||
let token3 = tokenizer.nextToken()
|
||||
#expect(token3?.type == .EndOfFile)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,14 +14,19 @@ public class HTMLToken {
|
|||
}
|
||||
|
||||
public struct Attribute: Equatable {
|
||||
var prefix: Swift.String?
|
||||
var localName: Swift.String
|
||||
var namespace_: Swift.String?
|
||||
var value: Swift.String
|
||||
var nameStartPosition: Position
|
||||
var nameEndPosition: Position
|
||||
var valueStartPosition: Position
|
||||
var valueEndPosition: Position
|
||||
public var prefix: Swift.String? = nil
|
||||
public var localName: Swift.String
|
||||
public var namespace_: Swift.String? = nil
|
||||
public var value: Swift.String
|
||||
public var nameStartPosition = Position()
|
||||
public var nameEndPosition = Position()
|
||||
public var valueStartPosition = Position()
|
||||
public var valueEndPosition = Position()
|
||||
|
||||
public init(localName: Swift.String, value: Swift.String) {
|
||||
self.localName = localName
|
||||
self.value = value
|
||||
}
|
||||
}
|
||||
|
||||
public enum TokenType: Equatable {
|
||||
|
@ -33,14 +38,14 @@ public class HTMLToken {
|
|||
forceQuirksMode: Bool)
|
||||
case StartTag(
|
||||
tagName: Swift.String,
|
||||
selfClosing: Bool,
|
||||
selfClosingAcknowledged: Bool,
|
||||
attributes: [Attribute])
|
||||
selfClosing: Bool = false,
|
||||
selfClosingAcknowledged: Bool = false,
|
||||
attributes: [Attribute] = [])
|
||||
case EndTag(
|
||||
tagName: Swift.String,
|
||||
selfClosing: Bool,
|
||||
selfClosingAcknowledged: Bool,
|
||||
attributes: [Attribute])
|
||||
selfClosing: Bool = false,
|
||||
selfClosingAcknowledged: Bool = false,
|
||||
attributes: [Attribute] = [])
|
||||
case Comment(data: Swift.String)
|
||||
case Character(codePoint: Character)
|
||||
case EndOfFile
|
||||
|
@ -53,6 +58,24 @@ public class HTMLToken {
|
|||
return false
|
||||
}
|
||||
|
||||
public func isEndTag() -> Bool {
|
||||
if case .EndTag(_, _, _, _) = self.type {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
public func isStartTag() -> Bool {
|
||||
if case .StartTag(_, _, _, _) = self.type {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
public func isTag() -> Bool {
|
||||
return isStartTag() || isEndTag()
|
||||
}
|
||||
|
||||
public func isParserWhitespace() -> Bool {
|
||||
precondition(isCharacter(), "isParserWhitespace() called on non-character token")
|
||||
|
||||
|
@ -73,6 +96,52 @@ public class HTMLToken {
|
|||
public var startPosition = Position()
|
||||
public var endPosition = Position()
|
||||
|
||||
// Is in-place mutating enums a thing? Seems not https://forums.swift.org/t/in-place-mutation-of-an-enum-associated-value/11747
|
||||
public var attributes: [Attribute] {
|
||||
get {
|
||||
switch self.type {
|
||||
case .StartTag(_, _, _, let attributes):
|
||||
return attributes
|
||||
case .EndTag(_, _, _, let attributes):
|
||||
return attributes
|
||||
default:
|
||||
preconditionFailure("attributes called on non-tag token")
|
||||
}
|
||||
}
|
||||
set {
|
||||
switch self.type {
|
||||
case .StartTag(let tagName, let selfClosing, let selfClosingAcknowledged, attributes: _):
|
||||
self.type = .StartTag(tagName: tagName, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: newValue)
|
||||
case .EndTag(let tagName, let selfClosing, let selfClosingAcknowledged, attributes: _):
|
||||
self.type = .EndTag(tagName: tagName, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: newValue)
|
||||
default:
|
||||
preconditionFailure("attributes= called on non-tag token")
|
||||
}
|
||||
}
|
||||
}
|
||||
public var tagName: Swift.String {
|
||||
get {
|
||||
switch self.type {
|
||||
case .StartTag(let tagName, _, _, _):
|
||||
return tagName
|
||||
case .EndTag(let tagName, _, _, _):
|
||||
return tagName
|
||||
default:
|
||||
preconditionFailure("tagName called on non-tag token")
|
||||
}
|
||||
}
|
||||
set {
|
||||
switch self.type {
|
||||
case .StartTag(tagName: _, let selfClosing, let selfClosingAcknowledged, let attributes):
|
||||
self.type = .StartTag(tagName: newValue, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: attributes)
|
||||
case .EndTag(tagName: _, let selfClosing, let selfClosingAcknowledged, let attributes):
|
||||
self.type = .EndTag(tagName: newValue, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: attributes)
|
||||
default:
|
||||
preconditionFailure("tagName= called on non-tag token")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public init() {}
|
||||
public init(type: TokenType) {
|
||||
self.type = type
|
||||
|
|
|
@ -18,6 +18,12 @@ extension Swift.String {
|
|||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
public mutating func takeString() -> Swift.String {
|
||||
let result = self
|
||||
self = ""
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
public class HTMLTokenizer {
|
||||
|
@ -115,9 +121,24 @@ public class HTMLTokenizer {
|
|||
private var currentToken = HTMLToken()
|
||||
private var queuedTokens = Deque<HTMLToken>()
|
||||
|
||||
private var currentBuilder = Swift.String()
|
||||
private var temporaryBuffer = Swift.String()
|
||||
private var lastStartTagName: Swift.String? = nil
|
||||
private var currentTokensAttributes: [HTMLToken.Attribute]? = nil
|
||||
private var currentAttribute: HTMLToken.Attribute? = nil
|
||||
|
||||
private var aborted = false
|
||||
private var hasEmittedEOF = false
|
||||
|
||||
// https://infra.spec.whatwg.org/#ascii-upper-alpha
|
||||
static private var asciiUpperAlpha = CharacterSet(charactersIn: "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||
|
||||
// https://infra.spec.whatwg.org/#ascii-lower-alpha
|
||||
static private var asciiLowerAlpha = CharacterSet(charactersIn: "abcdefghijklmnopqrstuvwxyz")
|
||||
|
||||
// https://infra.spec.whatwg.org/#ascii-upper-alpha
|
||||
static private var asciiAlpha = asciiUpperAlpha.union(asciiLowerAlpha)
|
||||
|
||||
public init() {
|
||||
self.cursor = self.input.startIndex
|
||||
self.previousCursor = self.input.startIndex
|
||||
|
@ -181,14 +202,37 @@ public class HTMLTokenizer {
|
|||
|
||||
func createNewToken(_ token: HTMLToken) {
|
||||
self.currentToken = token
|
||||
if self.currentToken.isTag() {
|
||||
self.currentTokensAttributes = []
|
||||
}
|
||||
// FIXME: Assign Position
|
||||
}
|
||||
|
||||
enum AttributeStringBehavior {
|
||||
case SetName
|
||||
case SetValue
|
||||
case IgnoreString
|
||||
}
|
||||
func finalizeCurrentAttribute(_ behavior: AttributeStringBehavior) {
|
||||
precondition(self.currentAttribute != nil && self.currentTokensAttributes != nil)
|
||||
switch behavior {
|
||||
case .SetName:
|
||||
self.currentAttribute!.localName = self.currentBuilder.takeString()
|
||||
case .SetValue:
|
||||
self.currentAttribute!.value = self.currentBuilder.takeString()
|
||||
case .IgnoreString:
|
||||
_ = self.currentBuilder.takeString()
|
||||
}
|
||||
self.currentTokensAttributes!.append(self.currentAttribute!)
|
||||
self.currentAttribute = nil
|
||||
}
|
||||
|
||||
enum NextTokenState {
|
||||
case Emit(token: HTMLToken?)
|
||||
case SwitchTo
|
||||
case Reconsume(inputCharacter: Character?)
|
||||
case ReprocessQueue
|
||||
case Continue
|
||||
}
|
||||
|
||||
public func nextToken(stopAtInsertionPoint: Bool = false) -> HTMLToken? {
|
||||
|
@ -210,7 +254,7 @@ public class HTMLTokenizer {
|
|||
switch nextTokenImpl(nextInputCharacter) {
|
||||
case .Emit(let token):
|
||||
return token
|
||||
case .SwitchTo:
|
||||
case .SwitchTo, .Continue:
|
||||
nextInputCharacter = nil
|
||||
break
|
||||
case .Reconsume(let character):
|
||||
|
@ -226,12 +270,16 @@ public class HTMLTokenizer {
|
|||
}
|
||||
}
|
||||
|
||||
func continueInCurrentState() -> NextTokenState {
|
||||
return .Continue
|
||||
}
|
||||
|
||||
func switchTo(_ state: State) -> NextTokenState {
|
||||
self.state = state
|
||||
return .SwitchTo
|
||||
}
|
||||
|
||||
func reconsume(_ character: Character, `in` state: State) -> NextTokenState {
|
||||
func reconsume(_ character: Character?, `in` state: State) -> NextTokenState {
|
||||
self.state = state
|
||||
return .Reconsume(inputCharacter: character)
|
||||
}
|
||||
|
@ -251,6 +299,10 @@ public class HTMLTokenizer {
|
|||
|
||||
func switchToAndEmitCurrentToken(_ state: State) -> NextTokenState {
|
||||
self.state = state
|
||||
if self.currentToken.isTag() {
|
||||
self.currentToken.attributes = self.currentTokensAttributes ?? []
|
||||
self.currentTokensAttributes = nil
|
||||
}
|
||||
self.queuedTokens.append(self.currentToken)
|
||||
self.currentToken = HTMLToken()
|
||||
return .Emit(token: self.queuedTokens.popFirst()!)
|
||||
|
@ -280,6 +332,10 @@ public class HTMLTokenizer {
|
|||
|
||||
func emitCurrentTokenFollowedByEOF() -> NextTokenState {
|
||||
precondition(!self.hasEmittedEOF)
|
||||
if self.currentToken.isTag() {
|
||||
self.currentToken.attributes = self.currentTokensAttributes ?? []
|
||||
self.currentTokensAttributes = nil
|
||||
}
|
||||
self.queuedTokens.append(self.currentToken)
|
||||
self.currentToken = HTMLToken()
|
||||
return emitEOF()
|
||||
|
@ -292,20 +348,44 @@ public class HTMLTokenizer {
|
|||
return .Emit(token: self.queuedTokens.popFirst()!)
|
||||
}
|
||||
|
||||
func flushCodepointsConsumedAsACharacterReference() {
|
||||
if consumedAsPartOfAnAttribute() {
|
||||
self.currentBuilder += self.temporaryBuffer.takeString()
|
||||
} else {
|
||||
for codePoint in self.temporaryBuffer.takeString() {
|
||||
self.queuedTokens.append(HTMLToken(type: .Character(codePoint: codePoint)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func consumedAsPartOfAnAttribute() -> Bool {
|
||||
return self.returnState == .AttributeValueDoubleQuoted || self.returnState == .AttributeValueSingleQuoted || self.returnState == .AttributeValueUnquoted
|
||||
}
|
||||
|
||||
func isAppropriateEndTagToken(_ token: HTMLToken) -> Bool {
|
||||
guard case let .EndTag(endTagName, _, _, _) = token.type else {
|
||||
preconditionFailure("isAppropriateEndTagToken called with non-end-tag token")
|
||||
}
|
||||
if let startTagName = self.lastStartTagName {
|
||||
return startTagName == endTagName
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func nextTokenImpl(_ nextInputCharacter: Character? = nil) -> NextTokenState {
|
||||
let dontConsumeNextInputCharacter = {
|
||||
self.restoreCursorToPrevious()
|
||||
}
|
||||
let _ = dontConsumeNextInputCharacter
|
||||
|
||||
// FIXME: flushCodepointsConsumedAsACharacterReference needs currentBuilder
|
||||
|
||||
// Handle reconsume by passing the character around in the state enum
|
||||
let currentInputCharacter = nextInputCharacter ?? nextCodePoint()
|
||||
|
||||
switch self.state {
|
||||
// 13.2.5.1 Data state, https://html.spec.whatwg.org/multipage/parsing.html#data-state
|
||||
case .Data:
|
||||
precondition(currentTokensAttributes == nil)
|
||||
switch currentInputCharacter {
|
||||
case "&":
|
||||
self.returnState = .Data
|
||||
|
@ -320,6 +400,637 @@ public class HTMLTokenizer {
|
|||
default:
|
||||
return emitCharacter(currentInputCharacter!)
|
||||
}
|
||||
|
||||
// 13.2.5.2 RCDATA state, https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
|
||||
case .RCDATA:
|
||||
switch currentInputCharacter {
|
||||
case "&":
|
||||
self.returnState = .RCDATA
|
||||
return switchTo(.CharacterReference)
|
||||
case "<":
|
||||
return switchTo(.RCDATALessThanSign)
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
return emitCharacter("\u{FFFD}")
|
||||
case nil:
|
||||
return emitEOF()
|
||||
default:
|
||||
return emitCharacter(currentInputCharacter!)
|
||||
}
|
||||
|
||||
// 13.2.5.3. RAWTEXT state, https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state
|
||||
case .RAWTEXT:
|
||||
switch currentInputCharacter {
|
||||
case "<":
|
||||
return switchTo(.RAWTEXTLessThanSign)
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
return emitCharacter("\u{FFFD}")
|
||||
case nil:
|
||||
return emitEOF()
|
||||
default:
|
||||
return emitCharacter(currentInputCharacter!)
|
||||
}
|
||||
// 13.2.5.4 Script data state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-state
|
||||
case .ScriptData:
|
||||
switch currentInputCharacter {
|
||||
case "<":
|
||||
return switchTo(.ScriptDataLessThanSign)
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
return emitCharacter("\u{FFFD}")
|
||||
case nil:
|
||||
return emitEOF()
|
||||
default:
|
||||
return emitCharacter(currentInputCharacter!)
|
||||
}
|
||||
// 13.2.5.5 PLAINTEXT state, https://html.spec.whatwg.org/multipage/parsing.html#plaintext-state
|
||||
case .PLAINTEXT:
|
||||
switch currentInputCharacter {
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
return emitCharacter("\u{FFFD}")
|
||||
case nil:
|
||||
return emitEOF()
|
||||
default:
|
||||
return emitCharacter(currentInputCharacter!)
|
||||
}
|
||||
// 13.2.5.6 Tag open state https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
|
||||
case .TagOpen:
|
||||
switch currentInputCharacter {
|
||||
case "!":
|
||||
return switchTo(.MarkupDeclarationOpen)
|
||||
case "/":
|
||||
return switchTo(.EndTagOpen)
|
||||
case let c? where HTMLTokenizer.asciiAlpha.contains(c.unicodeScalars.first!):
|
||||
createNewToken(HTMLToken(type: .StartTag(tagName: "")))
|
||||
return reconsume(currentInputCharacter!, in: .TagName)
|
||||
case "?":
|
||||
// FIXME: log_parse_error()
|
||||
createNewToken(HTMLToken(type: .Comment(data: "")))
|
||||
return reconsume(currentInputCharacter!, in: .BogusComment)
|
||||
case nil:
|
||||
// FIXME: log_parse_error()
|
||||
queuedTokens.append(HTMLToken(type: .Character(codePoint: "<")))
|
||||
return emitEOF()
|
||||
default:
|
||||
// FIXME: log_parse_error()
|
||||
queuedTokens.append(HTMLToken(type: .Character(codePoint: "<")))
|
||||
return reconsume(currentInputCharacter!, in: .Data)
|
||||
}
|
||||
// 13.2.5.7 End tag open state, https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
|
||||
case .EndTagOpen:
|
||||
switch currentInputCharacter {
|
||||
case let c? where HTMLTokenizer.asciiAlpha.contains(c.unicodeScalars.first!):
|
||||
createNewToken(HTMLToken(type: .EndTag(tagName: "")))
|
||||
return reconsume(currentInputCharacter!, in: .TagName)
|
||||
default:
|
||||
return emitEOF()
|
||||
}
|
||||
// 13.2.5.8 Tag name state, https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
|
||||
case .TagName:
|
||||
switch currentInputCharacter {
|
||||
case "\t", "\n", "\u{000C}", " ":
|
||||
self.currentToken.tagName = self.currentBuilder.takeString()
|
||||
return switchTo(.BeforeAttributeName)
|
||||
case "/":
|
||||
self.currentToken.tagName = self.currentBuilder.takeString()
|
||||
return switchTo(.SelfClosingStartTag)
|
||||
case ">":
|
||||
self.currentToken.tagName = self.currentBuilder.takeString()
|
||||
return switchToAndEmitCurrentToken(.Data)
|
||||
case let c? where HTMLTokenizer.asciiUpperAlpha.contains(c.unicodeScalars.first!):
|
||||
currentBuilder.append(Character(Unicode.Scalar(c.asciiValue! + 0x20)))
|
||||
return continueInCurrentState()
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
currentBuilder += "\u{FFFD}"
|
||||
return continueInCurrentState()
|
||||
case nil:
|
||||
// FIXME: log_parse_error()
|
||||
return emitEOF()
|
||||
default:
|
||||
currentBuilder.append(currentInputCharacter!)
|
||||
return continueInCurrentState()
|
||||
}
|
||||
// 13.2.5.9 RCDATA less-than sign state, https://html.spec.whatwg.org/multipage/parsing.html#rcdata-less-than-sign-state
|
||||
case .RCDATALessThanSign:
|
||||
switch currentInputCharacter {
|
||||
case "/":
|
||||
self.temporaryBuffer = ""
|
||||
return switchTo(.RCDATAEndTagOpen)
|
||||
default:
|
||||
return emitCharacterAndReconsume("<", in: .RCDATA, currentInputCharacter: currentInputCharacter)
|
||||
}
|
||||
// 13.2.5.10 RCDATA end tag open state, https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-open-state
|
||||
case .RCDATAEndTagOpen:
|
||||
switch currentInputCharacter {
|
||||
case let c? where HTMLTokenizer.asciiAlpha.contains(c.unicodeScalars.first!):
|
||||
createNewToken(HTMLToken(type: .EndTag(tagName: "")))
|
||||
return reconsume(currentInputCharacter!, in: .RCDATAEndTagName)
|
||||
default:
|
||||
queuedTokens.append(HTMLToken(type: .Character(codePoint: "<")))
|
||||
queuedTokens.append(HTMLToken(type: .Character(codePoint: "/")))
|
||||
return reconsume(currentInputCharacter, in: .RCDATA)
|
||||
}
|
||||
// 13.2.5.11 RCDATA end tag name state, https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-name-state
|
||||
case .RCDATAEndTagName:
|
||||
switch currentInputCharacter {
|
||||
case "\t", "\n", "\u{000C}", " ":
|
||||
if self.isAppropriateEndTagToken(currentToken) {
|
||||
return switchTo(.BeforeAttributeName)
|
||||
}
|
||||
break
|
||||
case "/":
|
||||
if self.isAppropriateEndTagToken(currentToken) {
|
||||
return switchTo(.SelfClosingStartTag)
|
||||
}
|
||||
break
|
||||
case ">":
|
||||
if self.isAppropriateEndTagToken(currentToken) {
|
||||
return switchToAndEmitCurrentToken(.Data)
|
||||
}
|
||||
break
|
||||
case let c? where HTMLTokenizer.asciiUpperAlpha.contains(c.unicodeScalars.first!):
|
||||
self.currentBuilder.append(Character(Unicode.Scalar(c.asciiValue! + 0x20)))
|
||||
self.temporaryBuffer.append(c)
|
||||
return continueInCurrentState()
|
||||
case let c? where HTMLTokenizer.asciiLowerAlpha.contains(c.unicodeScalars.first!):
|
||||
self.currentBuilder.append(c)
|
||||
self.temporaryBuffer.append(c)
|
||||
return continueInCurrentState()
|
||||
default:
|
||||
break
|
||||
}
|
||||
|
||||
// First three steps fall through to the "anything else" block
|
||||
self.queuedTokens.append(HTMLToken(type: .Character(codePoint: "<")))
|
||||
self.queuedTokens.append(HTMLToken(type: .Character(codePoint: "/")))
|
||||
// NOTE: The spec doesn't mention this, but it seems that m_current_token (an end tag) is just dropped in this case.
|
||||
self.currentBuilder = ""
|
||||
for codePoint in self.temporaryBuffer {
|
||||
self.queuedTokens.append(HTMLToken(type: .Character(codePoint: codePoint)))
|
||||
}
|
||||
return reconsume(currentInputCharacter, in: .RCDATA)
|
||||
// 13.2.5.15 Script data less-than sign state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-less-than-sign-state
|
||||
case .ScriptDataLessThanSign:
|
||||
switch currentInputCharacter {
|
||||
case "/":
|
||||
self.temporaryBuffer = ""
|
||||
return switchTo(.ScriptDataEndTagOpen)
|
||||
case "!":
|
||||
self.queuedTokens.append(HTMLToken(type: .Character(codePoint: "<")))
|
||||
self.queuedTokens.append(HTMLToken(type: .Character(codePoint: "!")))
|
||||
return switchTo(.ScriptDataEscapeStart)
|
||||
default:
|
||||
return emitCharacterAndReconsume("<", in: .ScriptData, currentInputCharacter: currentInputCharacter)
|
||||
}
|
||||
// 13.2.5.16 Script data end tag open state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-open-state
|
||||
case .ScriptDataEndTagOpen:
|
||||
switch currentInputCharacter {
|
||||
case let c? where HTMLTokenizer.asciiAlpha.contains(c.unicodeScalars.first!):
|
||||
createNewToken(HTMLToken(type: .EndTag(tagName: "")))
|
||||
return reconsume(currentInputCharacter!, in: .ScriptDataEndTagName)
|
||||
default:
|
||||
queuedTokens.append(HTMLToken(type: .Character(codePoint: "<")))
|
||||
queuedTokens.append(HTMLToken(type: .Character(codePoint: "/")))
|
||||
return reconsume(currentInputCharacter, in: .ScriptData)
|
||||
}
|
||||
// 13.2.5.17 Script data end tag name state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-name-state
|
||||
case .ScriptDataEndTagName:
|
||||
switch currentInputCharacter {
|
||||
case "\t", "\n", "\u{000C}", " ":
|
||||
if self.isAppropriateEndTagToken(currentToken) {
|
||||
return switchTo(.BeforeAttributeName)
|
||||
}
|
||||
break
|
||||
case "/":
|
||||
if self.isAppropriateEndTagToken(currentToken) {
|
||||
return switchTo(.SelfClosingStartTag)
|
||||
}
|
||||
break
|
||||
case ">":
|
||||
if self.isAppropriateEndTagToken(currentToken) {
|
||||
return switchToAndEmitCurrentToken(.Data)
|
||||
}
|
||||
break
|
||||
case let c? where HTMLTokenizer.asciiUpperAlpha.contains(c.unicodeScalars.first!):
|
||||
self.currentBuilder.append(Character(Unicode.Scalar(c.asciiValue! + 0x20)))
|
||||
self.temporaryBuffer.append(c)
|
||||
return continueInCurrentState()
|
||||
case let c? where HTMLTokenizer.asciiLowerAlpha.contains(c.unicodeScalars.first!):
|
||||
self.currentBuilder.append(c)
|
||||
self.temporaryBuffer.append(c)
|
||||
return continueInCurrentState()
|
||||
default:
|
||||
break
|
||||
}
|
||||
|
||||
// First three steps fall through to the "anything else" block
|
||||
self.queuedTokens.append(HTMLToken(type: .Character(codePoint: "<")))
|
||||
self.queuedTokens.append(HTMLToken(type: .Character(codePoint: "/")))
|
||||
// NOTE: The spec doesn't mention this, but it seems that m_current_token (an end tag) is just dropped in this case.
|
||||
self.currentBuilder = ""
|
||||
for codePoint in self.temporaryBuffer {
|
||||
self.queuedTokens.append(HTMLToken(type: .Character(codePoint: codePoint)))
|
||||
}
|
||||
return reconsume(currentInputCharacter, in: .ScriptData)
|
||||
// 13.2.5.18 Script data escape start state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-state
|
||||
case .ScriptDataEscapeStart:
|
||||
switch currentInputCharacter {
|
||||
case "-":
|
||||
return switchToAndEmitCharacter(.ScriptDataEscapeStartDash, character: "-")
|
||||
default:
|
||||
return reconsume(currentInputCharacter, in: .ScriptData)
|
||||
}
|
||||
// 13.2.5.19 Script data escape start dash state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-dash-state
|
||||
case .ScriptDataEscapeStartDash:
|
||||
switch currentInputCharacter {
|
||||
case "-":
|
||||
return switchToAndEmitCharacter(.ScriptDataEscapedDashDash, character: "-")
|
||||
default:
|
||||
return reconsume(currentInputCharacter, in: .ScriptData)
|
||||
}
|
||||
// 13.2.5.20 Script data escaped state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-state
|
||||
case .ScriptDataEscaped:
|
||||
switch currentInputCharacter {
|
||||
case "-":
|
||||
return switchToAndEmitCharacter(.ScriptDataEscapedDash, character: "-")
|
||||
case "<":
|
||||
return switchTo(.ScriptDataEscapedLessThanSign)
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
return emitCharacter("\u{FFFD}")
|
||||
case nil:
|
||||
// FIXME: log_parse_error()
|
||||
return emitEOF()
|
||||
default:
|
||||
return emitCharacter(currentInputCharacter!)
|
||||
}
|
||||
// 13.2.5.21 Script data escaped dash state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-state
|
||||
case .ScriptDataEscapedDash:
|
||||
switch currentInputCharacter {
|
||||
case "-":
|
||||
return switchToAndEmitCharacter(.ScriptDataEscapedDashDash, character: "-")
|
||||
case "<":
|
||||
return switchTo(.ScriptDataEscapedLessThanSign)
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
return switchToAndEmitCharacter(.ScriptDataEscaped, character: "\u{FFFD}")
|
||||
case nil:
|
||||
// FIXME: log_parse_error()
|
||||
return emitEOF()
|
||||
default:
|
||||
return switchToAndEmitCharacter(.ScriptDataEscaped, character: currentInputCharacter!)
|
||||
}
|
||||
// 13.2.5.22 Script data escaped dash dash state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-dash-state
|
||||
case .ScriptDataEscapedDashDash:
|
||||
switch currentInputCharacter {
|
||||
case "-":
|
||||
return emitCharacter("-")
|
||||
case "<":
|
||||
return switchTo(.ScriptDataEscapedLessThanSign)
|
||||
case ">":
|
||||
return switchToAndEmitCharacter(.ScriptData, character: ">")
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
return switchToAndEmitCharacter(.ScriptDataEscaped, character: "\u{FFFD}")
|
||||
case nil:
|
||||
// FIXME: log_parse_error()
|
||||
return emitEOF()
|
||||
default:
|
||||
return switchToAndEmitCharacter(.ScriptDataEscaped, character: currentInputCharacter!)
|
||||
}
|
||||
// 13.2.5.23 Script data escaped less-than sign state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-less-than-sign-state
|
||||
case .ScriptDataEscapedLessThanSign:
|
||||
switch currentInputCharacter {
|
||||
case "/":
|
||||
self.temporaryBuffer = ""
|
||||
return switchTo(.ScriptDataEscapedEndTagOpen)
|
||||
case let c? where HTMLTokenizer.asciiAlpha.contains(c.unicodeScalars.first!):
|
||||
self.temporaryBuffer = ""
|
||||
self.queuedTokens.append(HTMLToken(type: .Character(codePoint: "<")))
|
||||
return reconsume(currentInputCharacter!, in: .ScriptDataDoubleEscapeStart)
|
||||
default:
|
||||
return emitCharacterAndReconsume("<", in: .ScriptDataEscaped, currentInputCharacter: currentInputCharacter)
|
||||
}
|
||||
// 13.2.5.24 Script data escaped end tag open state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-open-state
|
||||
case .ScriptDataEscapedEndTagOpen:
|
||||
switch currentInputCharacter {
|
||||
case let c? where HTMLTokenizer.asciiAlpha.contains(c.unicodeScalars.first!):
|
||||
createNewToken(HTMLToken(type: .EndTag(tagName: "")))
|
||||
return reconsume(currentInputCharacter!, in: .ScriptDataEscapedEndTagName)
|
||||
default:
|
||||
queuedTokens.append(HTMLToken(type: .Character(codePoint: "<")))
|
||||
queuedTokens.append(HTMLToken(type: .Character(codePoint: "/")))
|
||||
return reconsume(currentInputCharacter, in: .ScriptDataEscaped)
|
||||
}
|
||||
// 13.2.5.25 Script data escaped end tag name state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-name-state
|
||||
case .ScriptDataEscapedEndTagName:
|
||||
switch currentInputCharacter {
|
||||
case "\t", "\n", "\u{000C}", " ":
|
||||
if self.isAppropriateEndTagToken(currentToken) {
|
||||
return switchTo(.BeforeAttributeName)
|
||||
}
|
||||
break
|
||||
case "/":
|
||||
if self.isAppropriateEndTagToken(currentToken) {
|
||||
return switchTo(.SelfClosingStartTag)
|
||||
}
|
||||
break
|
||||
case ">":
|
||||
if self.isAppropriateEndTagToken(currentToken) {
|
||||
return switchToAndEmitCurrentToken(.Data)
|
||||
}
|
||||
break
|
||||
case let c? where HTMLTokenizer.asciiUpperAlpha.contains(c.unicodeScalars.first!):
|
||||
self.currentBuilder.append(Character(Unicode.Scalar(c.asciiValue! + 0x20)))
|
||||
self.temporaryBuffer.append(c)
|
||||
return continueInCurrentState()
|
||||
case let c? where HTMLTokenizer.asciiLowerAlpha.contains(c.unicodeScalars.first!):
|
||||
self.currentBuilder.append(c)
|
||||
self.temporaryBuffer.append(c)
|
||||
return continueInCurrentState()
|
||||
default:
|
||||
break
|
||||
}
|
||||
|
||||
// First three steps fall through to the "anything else" block
|
||||
self.queuedTokens.append(HTMLToken(type: .Character(codePoint: "<")))
|
||||
self.queuedTokens.append(HTMLToken(type: .Character(codePoint: "/")))
|
||||
// NOTE: The spec doesn't mention this, but it seems that m_current_token (an end tag) is just dropped in this case.
|
||||
self.currentBuilder = ""
|
||||
for codePoint in self.temporaryBuffer {
|
||||
self.queuedTokens.append(HTMLToken(type: .Character(codePoint: codePoint)))
|
||||
}
|
||||
return reconsume(currentInputCharacter, in: .ScriptDataEscaped)
|
||||
// 13.2.5.26 Script data double escape start state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-start-state
|
||||
case .ScriptDataDoubleEscapeStart:
|
||||
switch currentInputCharacter {
|
||||
case "\t", "\n", "\u{000C}", " ", "/", ">":
|
||||
if self.temporaryBuffer == "script" {
|
||||
return switchToAndEmitCharacter(.ScriptDataDoubleEscaped, character: currentInputCharacter!)
|
||||
} else {
|
||||
return switchToAndEmitCharacter(.ScriptDataEscaped, character: currentInputCharacter!)
|
||||
}
|
||||
case let c? where HTMLTokenizer.asciiUpperAlpha.contains(c.unicodeScalars.first!):
|
||||
self.temporaryBuffer.append(Character(Unicode.Scalar(c.asciiValue! + 0x20)))
|
||||
return emitCharacter(currentInputCharacter!)
|
||||
case let c? where HTMLTokenizer.asciiLowerAlpha.contains(c.unicodeScalars.first!):
|
||||
self.temporaryBuffer.append(c)
|
||||
return emitCharacter(currentInputCharacter!)
|
||||
default:
|
||||
return reconsume(currentInputCharacter, in: .ScriptDataEscaped)
|
||||
}
|
||||
// 13.2.5.27 Script data double escaped state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-state
|
||||
case .ScriptDataDoubleEscaped:
|
||||
switch currentInputCharacter {
|
||||
case "-":
|
||||
return switchToAndEmitCharacter(.ScriptDataDoubleEscapedDash, character: "-")
|
||||
case "<":
|
||||
return switchTo(.ScriptDataDoubleEscapedLessThanSign)
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
return emitCharacter("\u{FFFD}")
|
||||
case nil:
|
||||
// FIXME: log_parse_error()
|
||||
return emitEOF()
|
||||
default:
|
||||
return emitCharacter(currentInputCharacter!)
|
||||
}
|
||||
// 13.2.5.28 Script data double escaped dash state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-state
|
||||
case .ScriptDataDoubleEscapedDash:
|
||||
switch currentInputCharacter {
|
||||
case "-":
|
||||
return switchToAndEmitCharacter(.ScriptDataDoubleEscapedDashDash, character: "-")
|
||||
case "<":
|
||||
return switchTo(.ScriptDataDoubleEscapedLessThanSign)
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
return switchToAndEmitCharacter(.ScriptDataDoubleEscaped, character: "\u{FFFD}")
|
||||
case nil:
|
||||
// FIXME: log_parse_error()
|
||||
return emitEOF()
|
||||
default:
|
||||
return switchToAndEmitCharacter(.ScriptDataDoubleEscaped, character: currentInputCharacter!)
|
||||
}
|
||||
// 13.2.5.29 Script data double escaped dash dash state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-dash-state
|
||||
case .ScriptDataDoubleEscapedDashDash:
|
||||
switch currentInputCharacter {
|
||||
case "-":
|
||||
return emitCharacter("-")
|
||||
case "<":
|
||||
return switchToAndEmitCharacter(.ScriptDataDoubleEscapedLessThanSign, character: "<")
|
||||
case ">":
|
||||
return switchToAndEmitCharacter(.ScriptData, character: ">")
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
return switchToAndEmitCharacter(.ScriptDataDoubleEscaped, character: "\u{FFFD}")
|
||||
case nil:
|
||||
// FIXME: log_parse_error()
|
||||
return emitEOF()
|
||||
default:
|
||||
return switchToAndEmitCharacter(.ScriptDataDoubleEscaped, character: currentInputCharacter!)
|
||||
}
|
||||
// 13.2.5.30 Script data double escaped less-than sign state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-less-than-sign-state
|
||||
case .ScriptDataDoubleEscapedLessThanSign:
|
||||
switch currentInputCharacter {
|
||||
case "/":
|
||||
self.temporaryBuffer = ""
|
||||
return switchToAndEmitCharacter(.ScriptDataDoubleEscapeEnd, character: "/")
|
||||
default:
|
||||
return reconsume(currentInputCharacter, in: .ScriptDataDoubleEscaped)
|
||||
}
|
||||
// 13.2.5.31 Script data double escape end state, https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-end-state
|
||||
case .ScriptDataDoubleEscapeEnd:
|
||||
switch currentInputCharacter {
|
||||
case "\t", "\n", "\u{000C}", " ", "/", ">":
|
||||
if self.temporaryBuffer == "script" {
|
||||
return switchToAndEmitCharacter(.ScriptDataEscaped, character: currentInputCharacter!)
|
||||
} else {
|
||||
return switchToAndEmitCharacter(.ScriptDataDoubleEscaped, character: currentInputCharacter!)
|
||||
}
|
||||
case let c? where HTMLTokenizer.asciiUpperAlpha.contains(c.unicodeScalars.first!):
|
||||
self.temporaryBuffer.append(Character(Unicode.Scalar(c.asciiValue! + 0x20)))
|
||||
return emitCharacter(currentInputCharacter!)
|
||||
case let c? where HTMLTokenizer.asciiLowerAlpha.contains(c.unicodeScalars.first!):
|
||||
self.temporaryBuffer.append(c)
|
||||
return emitCharacter(currentInputCharacter!)
|
||||
default:
|
||||
return reconsume(currentInputCharacter, in: .ScriptDataDoubleEscaped)
|
||||
}
|
||||
// 13.2.5.32 Before attribute name state, https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
|
||||
case .BeforeAttributeName:
|
||||
switch currentInputCharacter {
|
||||
case "\t", "\n", "\u{000C}", " ":
|
||||
return continueInCurrentState()
|
||||
case "/", ">", nil:
|
||||
return reconsume(currentInputCharacter, in: .AfterAttributeName)
|
||||
case "=":
|
||||
// FIXME: log_parse_error()
|
||||
self.currentBuilder = Swift.String(currentInputCharacter!)
|
||||
self.currentAttribute = HTMLToken.Attribute(localName: "", value: "")
|
||||
return switchTo(.AttributeName)
|
||||
default:
|
||||
self.currentAttribute = HTMLToken.Attribute(localName: "", value: "")
|
||||
return reconsume(currentInputCharacter!, in: .AttributeName)
|
||||
}
|
||||
// 13.2.5.33 Attribute name state, https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
|
||||
case .AttributeName:
|
||||
// FIXME: When the user agent leaves the attribute name state (and before emitting the tag token, if appropriate),
|
||||
// the complete attribute's name must be compared to the other attributes on the same token;
|
||||
// if there is already an attribute on the token with the exact same name, then this is a duplicate-attribute
|
||||
// parse error and the new attribute must be removed from the token.
|
||||
// NOTE: If an attribute is so removed from a token, it, and the value that gets associated with it, if any,
|
||||
// are never subsequently used by the parser, and are therefore effectively discarded. Removing the attribute
|
||||
// in this way does not change its status as the "current attribute" for the purposes of the tokenizer, however.
|
||||
switch currentInputCharacter {
|
||||
case "\t", "\n", "\u{000C}", " ", "/", ">", nil:
|
||||
// FIXME: set name position
|
||||
self.currentAttribute!.localName = self.currentBuilder.takeString()
|
||||
return reconsume(currentInputCharacter, in: .AfterAttributeName)
|
||||
case "=":
|
||||
// FIXME: set name position
|
||||
self.currentAttribute!.localName = self.currentBuilder.takeString()
|
||||
return switchTo(.BeforeAttributeValue)
|
||||
case let c? where HTMLTokenizer.asciiUpperAlpha.contains(c.unicodeScalars.first!):
|
||||
self.currentBuilder.append(Character(Unicode.Scalar(c.asciiValue! + 0x20)))
|
||||
return continueInCurrentState()
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
self.currentBuilder.append("\u{FFFD}")
|
||||
return continueInCurrentState()
|
||||
default:
|
||||
self.currentBuilder.append(currentInputCharacter!)
|
||||
return continueInCurrentState()
|
||||
}
|
||||
// 13.2.5.34 After attribute name state, https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-name-state
|
||||
case .AfterAttributeName:
|
||||
switch currentInputCharacter {
|
||||
case "\t", "\n", "\u{000C}", " ":
|
||||
return continueInCurrentState()
|
||||
case "/":
|
||||
self.finalizeCurrentAttribute(.SetName)
|
||||
return switchTo(.SelfClosingStartTag)
|
||||
case "=":
|
||||
self.finalizeCurrentAttribute(.SetName)
|
||||
return switchTo(.BeforeAttributeValue)
|
||||
case ">":
|
||||
self.finalizeCurrentAttribute(.SetName)
|
||||
return switchToAndEmitCurrentToken(.Data)
|
||||
case nil:
|
||||
// FIXME: log_parse_error()
|
||||
self.finalizeCurrentAttribute(.IgnoreString)
|
||||
return emitEOF()
|
||||
default:
|
||||
self.finalizeCurrentAttribute(.SetName)
|
||||
self.currentAttribute = HTMLToken.Attribute(localName: "", value: "")
|
||||
return reconsume(currentInputCharacter!, in: .AttributeName)
|
||||
}
|
||||
// 13.2.5.35 Before attribute value state, https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-value-state
|
||||
case .BeforeAttributeValue:
|
||||
switch currentInputCharacter {
|
||||
case "\t", "\n", "\u{000C}", " ":
|
||||
return continueInCurrentState()
|
||||
case "\"":
|
||||
return switchTo(.AttributeValueDoubleQuoted)
|
||||
case "'":
|
||||
return switchTo(.AttributeValueSingleQuoted)
|
||||
case ">":
|
||||
// FIXME: log_parse_error()
|
||||
self.finalizeCurrentAttribute(.IgnoreString)
|
||||
return switchToAndEmitCurrentToken(.Data)
|
||||
default:
|
||||
return reconsume(currentInputCharacter, in: .AttributeValueUnquoted)
|
||||
}
|
||||
// 13.2.5.36 Attribute value (double-quoted) state, https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-double-quoted-state
|
||||
case .AttributeValueDoubleQuoted:
|
||||
switch currentInputCharacter {
|
||||
case "\"":
|
||||
return switchTo(.AfterAttributeValueQuoted)
|
||||
case "&":
|
||||
self.returnState = .AttributeValueDoubleQuoted
|
||||
return switchTo(.CharacterReference)
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
self.currentBuilder.append("\u{FFFD}")
|
||||
return continueInCurrentState()
|
||||
case nil:
|
||||
// FIXME: log_parse_error()
|
||||
self.finalizeCurrentAttribute(.IgnoreString)
|
||||
return emitEOF()
|
||||
default:
|
||||
self.currentBuilder.append(currentInputCharacter!)
|
||||
return continueInCurrentState()
|
||||
}
|
||||
// 13.2.5.37 Attribute value (single-quoted) state, https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-single-quoted-state
|
||||
case .AttributeValueSingleQuoted:
|
||||
switch currentInputCharacter {
|
||||
case "'":
|
||||
return switchTo(.AfterAttributeValueQuoted)
|
||||
case "&":
|
||||
self.returnState = .AttributeValueSingleQuoted
|
||||
return switchTo(.CharacterReference)
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
self.currentBuilder.append("\u{FFFD}")
|
||||
return continueInCurrentState()
|
||||
case nil:
|
||||
// FIXME: log_parse_error()
|
||||
return emitEOF()
|
||||
default:
|
||||
self.currentBuilder.append(currentInputCharacter!)
|
||||
return continueInCurrentState()
|
||||
}
|
||||
// 13.2.5.38 Attribute value (unquoted) state, https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-unquoted-state
|
||||
case .AttributeValueUnquoted:
|
||||
switch currentInputCharacter {
|
||||
case "\t", "\n", "\u{000C}", " ":
|
||||
self.finalizeCurrentAttribute(.SetValue)
|
||||
return switchTo(.BeforeAttributeName)
|
||||
case "&":
|
||||
self.returnState = .AttributeValueUnquoted
|
||||
return switchTo(.CharacterReference)
|
||||
case ">":
|
||||
self.finalizeCurrentAttribute(.SetValue)
|
||||
return switchToAndEmitCurrentToken(.Data)
|
||||
case "\0":
|
||||
// FIXME: log_parse_error()
|
||||
self.currentBuilder.append("\u{FFFD}")
|
||||
return continueInCurrentState()
|
||||
case "\"", "'", "<", "=", "`":
|
||||
// FIXME: log_parse_error()
|
||||
self.currentBuilder.append(currentInputCharacter!)
|
||||
return continueInCurrentState()
|
||||
case nil:
|
||||
// FIXME: log_parse_error()
|
||||
self.finalizeCurrentAttribute(.IgnoreString)
|
||||
return emitEOF()
|
||||
default:
|
||||
self.currentBuilder.append(currentInputCharacter!)
|
||||
return continueInCurrentState()
|
||||
}
|
||||
// 13.2.5.39 After attribute value (quoted) state, https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-value-quoted-state
|
||||
case .AfterAttributeValueQuoted:
|
||||
switch currentInputCharacter {
|
||||
case "\t", "\n", "\u{000C}", " ":
|
||||
self.finalizeCurrentAttribute(.SetValue)
|
||||
return switchTo(.BeforeAttributeName)
|
||||
case "/":
|
||||
self.finalizeCurrentAttribute(.SetValue)
|
||||
return switchTo(.SelfClosingStartTag)
|
||||
case ">":
|
||||
self.finalizeCurrentAttribute(.SetValue)
|
||||
return switchToAndEmitCurrentToken(.Data)
|
||||
case nil:
|
||||
// FIXME: log_parse_error()
|
||||
self.finalizeCurrentAttribute(.IgnoreString)
|
||||
return emitEOF()
|
||||
default:
|
||||
// FIXME: log_parse_error()
|
||||
self.finalizeCurrentAttribute(.SetValue)
|
||||
return reconsume(currentInputCharacter!, in: .BeforeAttributeName)
|
||||
}
|
||||
default:
|
||||
print("TODO: In state \(self.state) with input \(Swift.String(describing: currentInputCharacter))")
|
||||
return emitEOF()
|
||||
|
|
Loading…
Reference in a new issue