HtmlTokenizer
Performs the tokenization of the source code. Follows the tokenization algorithm at:
http://www.w3.org/html/wg/drafts/html/master/syntax.html
using AngleSharp.Common;
using AngleSharp.Dom;
using AngleSharp.Html.Dom.Events;
using AngleSharp.Html.Parser.Tokens;
using AngleSharp.Html.Parser.Tokens.Struct;
using AngleSharp.Text;
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
namespace AngleSharp.Html.Parser
{
[System.Runtime.CompilerServices.NullableContext(1)]
[System.Runtime.CompilerServices.Nullable(0)]
public sealed class HtmlTokenizer : BaseTokenizer
{
[System.Runtime.CompilerServices.NullableContext(0)]
private enum AttributeState : byte
{
BeforeName,
Name,
AfterName,
BeforeValue,
QuotedValue,
AfterValue,
UnquotedValue,
SelfClose
}
[System.Runtime.CompilerServices.NullableContext(0)]
private enum ScriptState : byte
{
Normal,
OpenTag,
EndTag,
StartEscape,
Escaped,
StartEscapeDash,
EscapedDash,
EscapedDashDash,
EscapedOpenTag,
EscapedEndTag,
EscapedNameEndTag,
StartDoubleEscape,
EscapedDouble,
EscapedDoubleDash,
EscapedDoubleDashDash,
EscapedDoubleOpenTag,
EndDoubleEscape
}
[System.Runtime.CompilerServices.NullableContext(2)]
[System.Runtime.CompilerServices.Nullable(0)]
private readonly struct EntityProvider
{
private readonly IEntityProviderExtended _fast;
private readonly IEntityProvider _slow;
[System.Runtime.CompilerServices.NullableContext(1)]
public EntityProvider(IEntityProvider slow)
{
_fast = null;
_slow = slow;
}
[System.Runtime.CompilerServices.NullableContext(1)]
public EntityProvider(IEntityProviderExtended fast)
{
_slow = null;
_fast = fast;
}
public string GetSymbol(StringOrMemory name)
{
if (_fast != null)
return _fast.GetSymbol(name);
if (_slow != null)
return _slow.GetSymbol(name.ToString());
throw new InvalidOperationException("Should not get there, please file a bug report.");
}
}
private readonly EntityProvider _resolver;
private StringOrMemory _lastStartTag;
private TextPosition _position;
private StructHtmlToken _token;
private ShouldEmitAttribute _shouldEmitAttribute = delegate {
return true;
};
[System.Runtime.CompilerServices.Nullable(2)]
private char[] _characterReferenceBuffer;
public bool SkipDataText { get; set; }
public bool SkipScriptText { get; set; }
public bool SkipRawText { get; set; }
public bool SkipComments { get; set; }
public bool SkipPlaintext { get; set; }
public bool SkipRCDataText { get; set; }
public bool SkipCDATA { get; set; }
public bool SkipProcessingInstructions { get; set; }
public ShouldEmitAttribute ShouldEmitAttribute {
get {
return _shouldEmitAttribute;
}
set {
if (value != null)
_shouldEmitAttribute = value;
}
}
public bool IsAcceptingCharacterData { get; set; }
public bool IsPreservingAttributeNames { get; set; }
public bool IsNotConsumingCharacterReferences { get; set; }
public HtmlParseMode State { get; set; }
public bool IsStrictMode { get; set; }
public bool IsSupportingProcessingInstructions { get; set; }
[System.Runtime.CompilerServices.Nullable(new byte[] {
2,
1
})]
[field: System.Runtime.CompilerServices.Nullable(new byte[] {
2,
1
})]
public Action<HtmlToken, TextRange> OnToken {
[return: System.Runtime.CompilerServices.Nullable(new byte[] {
2,
1
})]
get;
[param: System.Runtime.CompilerServices.Nullable(new byte[] {
2,
1
})]
set;
}
[System.Runtime.CompilerServices.Nullable(new byte[] {
2,
1
})]
[method: System.Runtime.CompilerServices.Nullable(new byte[] {
2,
1
})]
[field: System.Runtime.CompilerServices.Nullable(new byte[] {
2,
1
})]
public event EventHandler<HtmlErrorEvent> Error;
public HtmlTokenizer(TextSource source, IEntityProvider resolver)
: base(source)
{
State = HtmlParseMode.PCData;
_lastStartTag = StringOrMemory.Empty;
_resolver = new EntityProvider(resolver);
}
public HtmlTokenizer(TextSource source, IEntityProviderExtended resolver)
: base(source)
{
State = HtmlParseMode.PCData;
_lastStartTag = StringOrMemory.Empty;
_resolver = new EntityProvider(resolver);
}
public HtmlToken Get()
{
return GetStructToken().ToHtmlToken();
}
public ref StructHtmlToken GetStructToken()
{
ref StructHtmlToken nextStructToken = ref GetNextStructToken();
OnToken?.Invoke(nextStructToken.ToHtmlToken(), new TextRange(_position, GetCurrentPosition().After(base.Current)));
return ref nextStructToken;
}
internal void RaiseErrorOccurred(HtmlParseError code, TextPosition position)
{
EventHandler<HtmlErrorEvent> error = this.Error;
if (IsStrictMode) {
string message = "Error while parsing the provided HTML document.";
throw new HtmlParseException(code.GetCode(), message, position);
}
if (error != null) {
HtmlErrorEvent e = new HtmlErrorEvent(code, position);
error(this, e);
}
}
private ref StructHtmlToken Data(char c)
{
if (c != '<')
return ref DataText(c);
return ref TagOpen(GetNext());
}
private ref StructHtmlToken DataText(char c)
{
while (true) {
switch (c) {
case '<':
case '':
Back();
if (SkipDataText)
return ref NewSkippedContent(HtmlTokenType.Character);
return ref NewCharacter();
case '&':
AppendCharacterReference(GetNext(), ' ', false);
break;
case ' ':
RaiseErrorOccurred(HtmlParseError.Null);
break;
default:
Append(c);
break;
}
c = GetNext();
}
}
private ref StructHtmlToken Plaintext(char c)
{
while (true) {
switch (c) {
case ' ':
AppendReplacement();
break;
case '':
Back();
if (SkipPlaintext)
return ref NewSkippedContent(HtmlTokenType.Character);
return ref NewCharacter();
default:
Append(c);
break;
}
c = GetNext();
}
}
private ref StructHtmlToken RCData(char c)
{
if (c != '<')
return ref RCDataText(c);
return ref RCDataLt(GetNext());
}
private ref StructHtmlToken RCDataText(char c)
{
while (true) {
switch (c) {
case '&':
AppendCharacterReference(GetNext(), ' ', false);
break;
case '<':
case '':
Back();
if (SkipRCDataText)
return ref NewSkippedContent(HtmlTokenType.Character);
return ref NewCharacter();
case ' ':
AppendReplacement();
break;
default:
Append(c);
break;
}
c = GetNext();
}
}
private ref StructHtmlToken RCDataLt(char c)
{
if (c == '/') {
c = GetNext();
if (c.IsUppercaseAscii()) {
Append(char.ToLowerInvariant(c));
return ref RCDataNameEndTag(GetNext());
}
if (c.IsLowercaseAscii()) {
Append(c);
return ref RCDataNameEndTag(GetNext());
}
Append('<', '/');
return ref RCDataText(c);
}
Append('<');
return ref RCDataText(c);
}
private ref StructHtmlToken RCDataNameEndTag(char c)
{
while (true) {
if (CreateIfAppropriate(c, ref _token))
return ref _token;
if (c.IsUppercaseAscii())
Append(char.ToLowerInvariant(c));
else {
if (!c.IsLowercaseAscii())
break;
Append(c);
}
c = GetNext();
}
base.CharBuffer.Insert(0, '<').Insert(1, '/');
return ref RCDataText(c);
}
private ref StructHtmlToken Rawtext(char c)
{
if (c != '<')
return ref RawtextText(c);
return ref RawtextLT(GetNext());
}
private ref StructHtmlToken RawtextText(char c)
{
while (true) {
switch (c) {
case '<':
case '':
Back();
if (SkipRawText)
return ref NewSkippedContent(HtmlTokenType.Character);
return ref NewCharacter();
case ' ':
AppendReplacement();
break;
default:
Append(c);
break;
}
c = GetNext();
}
}
private ref StructHtmlToken RawtextLT(char c)
{
if (c == '/') {
c = GetNext();
if (c.IsUppercaseAscii()) {
Append(char.ToLowerInvariant(c));
return ref RawtextNameEndTag(GetNext());
}
if (c.IsLowercaseAscii()) {
Append(c);
return ref RawtextNameEndTag(GetNext());
}
Append('<', '/');
return ref RawtextText(c);
}
Append('<');
return ref RawtextText(c);
}
private ref StructHtmlToken RawtextNameEndTag(char c)
{
while (true) {
if (CreateIfAppropriate(c, ref _token))
return ref _token;
if (c.IsUppercaseAscii())
Append(char.ToLowerInvariant(c));
else {
if (!c.IsLowercaseAscii())
break;
Append(c);
}
c = GetNext();
}
base.CharBuffer.Insert(0, '<').Insert(1, '/');
return ref RawtextText(c);
}
private ref StructHtmlToken CharacterData(char c)
{
while (true) {
switch (c) {
case '':
Back();
goto IL_003c;
case ']':
{
if (!ContinuesWithSensitive("]]>"))
break;
Advance(2);
goto IL_003c;
}
IL_003c:
if (SkipCDATA)
return ref NewSkippedContent(HtmlTokenType.Character);
return ref NewCharacter();
}
Append(c);
c = GetNext();
}
}
private void AppendCharacterReference(char c, char allowedCharacter = ' ', bool isAttribute = false)
{
if (IsNotConsumingCharacterReferences || c.IsSpaceCharacter() || c == '<' || c == '' || c == '&' || c == allowedCharacter) {
Back();
Append('&');
} else {
string text = null;
text = ((c != '#') ? GetLookupCharacterReference(allowedCharacter, isAttribute) : GetNumericCharacterReference(GetNext(), isAttribute));
if (text == null)
Append('&');
else
base.CharBuffer.Append(MemoryExtensions.AsSpan(text));
}
}
[System.Runtime.CompilerServices.NullableContext(2)]
private string GetNumericCharacterReference(char c, bool isAttribute)
{
int num = 10;
int num2 = 1;
int num3 = 0;
List<int> list = new List<int>();
bool flag = c == 'x' || c == 'X';
if (!flag) {
while (c.IsDigit()) {
list.Add(c.FromHex());
c = GetNext();
}
} else {
num = 16;
while ((c = GetNext()).IsHex()) {
list.Add(c.FromHex());
}
}
for (int num4 = list.Count - 1; num4 >= 0; num4--) {
num3 += list[num4] * num2;
num2 *= num;
}
if (list.Count == 0) {
Back(2);
if (flag)
Back();
if (!isAttribute)
RaiseErrorOccurred(HtmlParseError.CharacterReferenceWrongNumber);
return null;
}
if (c != ';') {
RaiseErrorOccurred(HtmlParseError.CharacterReferenceSemicolonMissing);
Back();
}
if (HtmlEntityProvider.IsInCharacterTable(num3)) {
RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidCode);
return HtmlEntityProvider.GetSymbolFromTable(num3);
}
if (HtmlEntityProvider.IsInvalidNumber(num3)) {
RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidNumber);
return '�'.ToString();
}
if (HtmlEntityProvider.IsInInvalidRange(num3))
RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidRange);
return char.ConvertFromUtf32(num3);
}
[System.Runtime.CompilerServices.NullableContext(2)]
private string GetLookupCharacterReference(char allowedCharacter, bool isAttribute)
{
string text = null;
int insertionPoint = base.InsertionPoint - 1;
if (_characterReferenceBuffer == null)
_characterReferenceBuffer = new char[32];
int num = 0;
char c = base.Current;
while (c != ';' && c.IsName()) {
_characterReferenceBuffer[num++] = c;
c = GetNext();
if (c == '' || num >= 31)
break;
}
if (c == ';') {
_characterReferenceBuffer[num] = ';';
text = _resolver.GetSymbol(new StringOrMemory(MemoryExtensions.AsMemory(_characterReferenceBuffer, 0, num + 1)));
}
while (text == null && num > 0) {
text = _resolver.GetSymbol(new StringOrMemory(MemoryExtensions.AsMemory(_characterReferenceBuffer, 0, num--)));
if (text == null)
Back();
}
c = base.Current;
if (c != ';') {
if (allowedCharacter != 0 && (c == '=' || c.IsAlphanumericAscii())) {
if (c == '=')
RaiseErrorOccurred(HtmlParseError.CharacterReferenceAttributeEqualsFound);
base.InsertionPoint = insertionPoint;
return null;
}
Back();
if (!isAttribute)
RaiseErrorOccurred(HtmlParseError.CharacterReferenceNotTerminated);
}
return text;
}
private ref StructHtmlToken TagOpen(char c)
{
if (c == '/')
return ref TagEnd(GetNext());
if (c.IsLowercaseAscii()) {
Append(c);
return ref TagName(ref NewTagOpen());
}
if (c.IsUppercaseAscii()) {
Append(char.ToLowerInvariant(c));
return ref TagName(ref NewTagOpen());
}
switch (c) {
case '!':
return ref MarkupDeclaration(GetNext());
case '?':
if (IsSupportingProcessingInstructions)
return ref ProcessingInstruction(c);
break;
}
if (c != '?') {
State = HtmlParseMode.PCData;
RaiseErrorOccurred(HtmlParseError.AmbiguousOpenTag);
Append('<');
return ref DataText(c);
}
RaiseErrorOccurred(HtmlParseError.BogusComment);
return ref BogusComment(c);
}
private ref StructHtmlToken TagEnd(char c)
{
if (c.IsLowercaseAscii()) {
Append(c);
return ref TagName(ref NewTagClose());
}
if (!c.IsUppercaseAscii()) {
switch (c) {
case '>':
State = HtmlParseMode.PCData;
RaiseErrorOccurred(HtmlParseError.TagClosedWrong);
return ref Data(GetNext());
case '':
Back();
RaiseErrorOccurred(HtmlParseError.EOF);
Append('<', '/');
return ref NewCharacter();
default:
RaiseErrorOccurred(HtmlParseError.BogusComment);
return ref BogusComment(c);
}
}
Append(char.ToLowerInvariant(c));
return ref TagName(ref NewTagClose());
}
private ref StructHtmlToken TagName(ref StructHtmlToken tag)
{
while (true) {
char next = GetNext();
if (next == '>') {
tag.Name = FlushBufferFast(HtmlTagNameLookup.TryGetWellKnownTagName);
return ref EmitTag(ref tag);
}
if (next.IsSpaceCharacter()) {
tag.Name = FlushBufferFast(HtmlTagNameLookup.TryGetWellKnownTagName);
return ref ParseAttributes(ref tag);
}
if (next == '/')
break;
if (next.IsUppercaseAscii())
Append(char.ToLowerInvariant(next));
else {
switch (next) {
case ' ':
AppendReplacement();
break;
default:
Append(next);
break;
case '':
return ref NewEof(false);
}
}
}
tag.Name = FlushBufferFast(HtmlTagNameLookup.TryGetWellKnownTagName);
return ref TagSelfClosing(ref tag);
}
private ref StructHtmlToken TagSelfClosing(ref StructHtmlToken tag)
{
if (TagSelfClosingInner(ref tag))
return ref tag;
return ref ParseAttributes(ref tag);
}
private bool TagSelfClosingInner(ref StructHtmlToken tag)
{
while (true) {
switch (GetNext()) {
case '>':
tag.IsSelfClosing = true;
tag = EmitTag(ref tag);
return true;
case '':
tag = NewEof(false);
return true;
case '/':
break;
default:
RaiseErrorOccurred(HtmlParseError.ClosingSlashMisplaced);
Back();
return false;
}
RaiseErrorOccurred(HtmlParseError.ClosingSlashMisplaced);
}
}
private ref StructHtmlToken MarkupDeclaration(char c)
{
if (ContinuesWithSensitive("--")) {
Advance();
return ref CommentStart(GetNext());
}
if (ContinuesWithInsensitive(TagNames.Doctype)) {
Advance(6);
return ref Doctype(GetNext());
}
if (IsAcceptingCharacterData && ContinuesWithSensitive(Keywords.CData)) {
Advance(6);
return ref CharacterData(GetNext());
}
RaiseErrorOccurred(HtmlParseError.UndefinedMarkupDeclaration);
return ref BogusComment(c);
}
private ref StructHtmlToken ProcessingInstruction(char c)
{
base.CharBuffer.Discard();
while (true) {
switch (c) {
case '':
Back();
goto case '>';
case ' ':
c = '�';
break;
case '>':
State = HtmlParseMode.PCData;
return ref NewProcessingInstruction();
}
Append(c);
c = GetNext();
}
}
private ref StructHtmlToken BogusComment(char c)
{
base.CharBuffer.Discard();
while (true) {
switch (c) {
case '':
Back();
goto case '>';
case ' ':
c = '�';
break;
case '>':
State = HtmlParseMode.PCData;
return ref NewComment();
}
Append(c);
c = GetNext();
}
}
private ref StructHtmlToken CommentStart(char c)
{
base.CharBuffer.Discard();
switch (c) {
case '-':
if (CommentDashStart(GetNext(), ref _token))
return ref _token;
return ref Comment(GetNext());
case ' ':
AppendReplacement();
return ref Comment(GetNext());
case '>':
State = HtmlParseMode.PCData;
RaiseErrorOccurred(HtmlParseError.TagClosedWrong);
break;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
Back();
break;
default:
Append(c);
return ref Comment(GetNext());
}
return ref NewComment();
}
private bool CommentDashStart(char c, ref StructHtmlToken token)
{
switch (c) {
case '-':
return CommentEnd(GetNext(), ref token);
case ' ':
RaiseErrorOccurred(HtmlParseError.Null);
Append('-', '�');
ref token = ref Comment(GetNext());
return true;
case '>':
State = HtmlParseMode.PCData;
RaiseErrorOccurred(HtmlParseError.TagClosedWrong);
break;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
Back();
break;
default:
Append('-', c);
ref token = ref Comment(GetNext());
return true;
}
token = NewComment();
return true;
}
private ref StructHtmlToken Comment(char c)
{
while (true) {
switch (c) {
case '-':
if (CommentDashEnd(GetNext(), ref _token))
return ref _token;
break;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
Back();
return ref NewComment();
case ' ':
AppendReplacement();
break;
default:
Append(c);
break;
}
c = GetNext();
}
}
private bool CommentDashEnd(char c, ref StructHtmlToken token)
{
switch (c) {
case '-':
return CommentEnd(GetNext(), ref token);
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
Back();
token = NewComment();
return true;
case ' ':
RaiseErrorOccurred(HtmlParseError.Null);
c = '�';
break;
}
Append('-', c);
return false;
}
private bool CommentEnd(char c, ref StructHtmlToken token)
{
while (true) {
switch (c) {
case '>':
State = HtmlParseMode.PCData;
token = NewComment();
return true;
case ' ':
RaiseErrorOccurred(HtmlParseError.Null);
Append('-', '�');
return false;
case '!':
RaiseErrorOccurred(HtmlParseError.CommentEndedWithEM);
return CommentBangEnd(GetNext(), ref token);
case '-':
break;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
Back();
token = NewComment();
return true;
default:
RaiseErrorOccurred(HtmlParseError.CommentEndedUnexpected);
Append('-', '-', c);
return false;
}
RaiseErrorOccurred(HtmlParseError.CommentEndedWithDash);
Append('-');
c = GetNext();
}
}
private bool CommentBangEnd(char c, ref StructHtmlToken token)
{
switch (c) {
case '-':
Append('-', '-', '!');
return CommentDashEnd(GetNext(), ref token);
case '>':
State = HtmlParseMode.PCData;
break;
case ' ':
RaiseErrorOccurred(HtmlParseError.Null);
Append('-', '-', '!', '�');
return false;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
Back();
break;
default:
Append('-', '-', '!', c);
return false;
}
token = NewComment();
return true;
}
private ref StructHtmlToken Doctype(char c)
{
if (c.IsSpaceCharacter())
return ref DoctypeNameBefore(GetNext());
if (c == '') {
RaiseErrorOccurred(HtmlParseError.EOF);
Back();
return ref NewDoctype(true);
}
RaiseErrorOccurred(HtmlParseError.DoctypeUnexpected);
return ref DoctypeNameBefore(c);
}
private ref StructHtmlToken DoctypeNameBefore(char c)
{
while (c.IsSpaceCharacter()) {
c = GetNext();
}
if (!c.IsUppercaseAscii()) {
switch (c) {
case ' ': {
ref StructHtmlToken doctype2 = ref NewDoctype(false);
AppendReplacement();
return ref DoctypeName(ref doctype2);
}
case '>': {
ref StructHtmlToken result2 = ref NewDoctype(true);
State = HtmlParseMode.PCData;
RaiseErrorOccurred(HtmlParseError.TagClosedWrong);
return ref result2;
}
case '': {
ref StructHtmlToken result = ref NewDoctype(true);
RaiseErrorOccurred(HtmlParseError.EOF);
Back();
return ref result;
}
default: {
ref StructHtmlToken doctype = ref NewDoctype(false);
Append(c);
return ref DoctypeName(ref doctype);
}
}
}
ref StructHtmlToken doctype3 = ref NewDoctype(false);
Append(char.ToLowerInvariant(c));
return ref DoctypeName(ref doctype3);
}
private ref StructHtmlToken DoctypeName(ref StructHtmlToken doctype)
{
while (true) {
char next = GetNext();
if (next.IsSpaceCharacter()) {
doctype.Name = FlushBufferFast();
return ref DoctypeNameAfter(ref doctype);
}
if (next == '>') {
State = HtmlParseMode.PCData;
doctype.Name = FlushBufferFast();
break;
}
if (next.IsUppercaseAscii())
Append(char.ToLowerInvariant(next));
else {
switch (next) {
case ' ':
break;
case '':
goto IL_0066;
default:
goto IL_0088;
}
AppendReplacement();
}
continue;
IL_0088:
Append(next);
continue;
IL_0066:
RaiseErrorOccurred(HtmlParseError.EOF);
Back();
doctype.IsQuirksForced = true;
doctype.Name = FlushBufferFast();
break;
}
return ref doctype;
}
private ref StructHtmlToken DoctypeNameAfter(ref StructHtmlToken doctype)
{
switch (SkipSpaces()) {
case '>':
State = HtmlParseMode.PCData;
break;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
Back();
doctype.IsQuirksForced = true;
break;
default:
if (ContinuesWithInsensitive(Keywords.Public)) {
Advance(5);
return ref DoctypePublic(ref doctype);
}
if (ContinuesWithInsensitive(Keywords.System)) {
Advance(5);
return ref DoctypeSystem(ref doctype);
}
RaiseErrorOccurred(HtmlParseError.DoctypeUnexpectedAfterName);
doctype.IsQuirksForced = true;
return ref BogusDoctype(ref doctype);
}
return ref doctype;
}
private ref StructHtmlToken DoctypePublic(ref StructHtmlToken doctype)
{
char next = GetNext();
if (next.IsSpaceCharacter())
return ref DoctypePublicIdentifierBefore(ref doctype);
switch (next) {
case '"':
RaiseErrorOccurred(HtmlParseError.DoubleQuotationMarkUnexpected);
doctype.PublicIdentifier = StringOrMemory.Empty;
return ref DoctypePublicIdentifierDoubleQuoted(ref doctype);
case '\'':
RaiseErrorOccurred(HtmlParseError.SingleQuotationMarkUnexpected);
doctype.PublicIdentifier = StringOrMemory.Empty;
return ref DoctypePublicIdentifierSingleQuoted(ref doctype);
case '>':
State = HtmlParseMode.PCData;
RaiseErrorOccurred(HtmlParseError.TagClosedWrong);
doctype.IsQuirksForced = true;
break;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
doctype.IsQuirksForced = true;
Back();
break;
default:
RaiseErrorOccurred(HtmlParseError.DoctypePublicInvalid);
doctype.IsQuirksForced = true;
return ref BogusDoctype(ref doctype);
}
return ref doctype;
}
private ref StructHtmlToken DoctypePublicIdentifierBefore(ref StructHtmlToken doctype)
{
switch (SkipSpaces()) {
case '"':
doctype.PublicIdentifier = StringOrMemory.Empty;
return ref DoctypePublicIdentifierDoubleQuoted(ref doctype);
case '\'':
doctype.PublicIdentifier = StringOrMemory.Empty;
return ref DoctypePublicIdentifierSingleQuoted(ref doctype);
case '>':
State = HtmlParseMode.PCData;
RaiseErrorOccurred(HtmlParseError.TagClosedWrong);
doctype.IsQuirksForced = true;
break;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
doctype.IsQuirksForced = true;
Back();
break;
default:
RaiseErrorOccurred(HtmlParseError.DoctypePublicInvalid);
doctype.IsQuirksForced = true;
return ref BogusDoctype(ref doctype);
}
return ref doctype;
}
private ref StructHtmlToken DoctypePublicIdentifierDoubleQuoted(ref StructHtmlToken doctype)
{
while (true) {
char next = GetNext();
switch (next) {
case '"':
doctype.PublicIdentifier = FlushBufferFast();
return ref DoctypePublicIdentifierAfter(ref doctype);
case ' ':
AppendReplacement();
break;
case '>':
State = HtmlParseMode.PCData;
RaiseErrorOccurred(HtmlParseError.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.PublicIdentifier = FlushBufferFast();
goto IL_008a;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
Back();
doctype.IsQuirksForced = true;
doctype.PublicIdentifier = FlushBufferFast();
goto IL_008a;
default:
{
Append(next);
break;
}
IL_008a:
return ref doctype;
}
}
}
private ref StructHtmlToken DoctypePublicIdentifierSingleQuoted(ref StructHtmlToken doctype)
{
while (true) {
char next = GetNext();
switch (next) {
case '\'':
doctype.PublicIdentifier = FlushBufferFast();
return ref DoctypePublicIdentifierAfter(ref doctype);
case ' ':
AppendReplacement();
break;
case '>':
State = HtmlParseMode.PCData;
RaiseErrorOccurred(HtmlParseError.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.PublicIdentifier = FlushBufferFast();
goto IL_008a;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
doctype.IsQuirksForced = true;
doctype.PublicIdentifier = FlushBufferFast();
Back();
goto IL_008a;
default:
{
Append(next);
break;
}
IL_008a:
return ref doctype;
}
}
}
private ref StructHtmlToken DoctypePublicIdentifierAfter(ref StructHtmlToken doctype)
{
char next = GetNext();
if (next.IsSpaceCharacter())
return ref DoctypeBetween(ref doctype);
switch (next) {
case '>':
State = HtmlParseMode.PCData;
break;
case '"':
RaiseErrorOccurred(HtmlParseError.DoubleQuotationMarkUnexpected);
doctype.SystemIdentifier = StringOrMemory.Empty;
return ref DoctypeSystemIdentifierDoubleQuoted(ref doctype);
case '\'':
RaiseErrorOccurred(HtmlParseError.SingleQuotationMarkUnexpected);
doctype.SystemIdentifier = StringOrMemory.Empty;
return ref DoctypeSystemIdentifierSingleQuoted(ref doctype);
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
doctype.IsQuirksForced = true;
Back();
break;
default:
RaiseErrorOccurred(HtmlParseError.DoctypeInvalidCharacter);
doctype.IsQuirksForced = true;
return ref BogusDoctype(ref doctype);
}
return ref doctype;
}
private ref StructHtmlToken DoctypeBetween(ref StructHtmlToken doctype)
{
switch (SkipSpaces()) {
case '>':
State = HtmlParseMode.PCData;
break;
case '"':
doctype.SystemIdentifier = StringOrMemory.Empty;
return ref DoctypeSystemIdentifierDoubleQuoted(ref doctype);
case '\'':
doctype.SystemIdentifier = StringOrMemory.Empty;
return ref DoctypeSystemIdentifierSingleQuoted(ref doctype);
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
doctype.IsQuirksForced = true;
Back();
break;
default:
RaiseErrorOccurred(HtmlParseError.DoctypeInvalidCharacter);
doctype.IsQuirksForced = true;
return ref BogusDoctype(ref doctype);
}
return ref doctype;
}
private ref StructHtmlToken DoctypeSystem(ref StructHtmlToken doctype)
{
char next = GetNext();
if (next.IsSpaceCharacter()) {
State = HtmlParseMode.PCData;
return ref DoctypeSystemIdentifierBefore(ref doctype);
}
switch (next) {
case '"':
RaiseErrorOccurred(HtmlParseError.DoubleQuotationMarkUnexpected);
doctype.SystemIdentifier = StringOrMemory.Empty;
return ref DoctypeSystemIdentifierDoubleQuoted(ref doctype);
case '\'':
RaiseErrorOccurred(HtmlParseError.SingleQuotationMarkUnexpected);
doctype.SystemIdentifier = StringOrMemory.Empty;
return ref DoctypeSystemIdentifierSingleQuoted(ref doctype);
case '>':
RaiseErrorOccurred(HtmlParseError.TagClosedWrong);
doctype.SystemIdentifier = FlushBufferFast();
doctype.IsQuirksForced = true;
break;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
doctype.IsQuirksForced = true;
Back();
break;
default:
RaiseErrorOccurred(HtmlParseError.DoctypeSystemInvalid);
doctype.IsQuirksForced = true;
return ref BogusDoctype(ref doctype);
}
return ref doctype;
}
private ref StructHtmlToken DoctypeSystemIdentifierBefore(ref StructHtmlToken doctype)
{
switch (SkipSpaces()) {
case '"':
doctype.SystemIdentifier = StringOrMemory.Empty;
return ref DoctypeSystemIdentifierDoubleQuoted(ref doctype);
case '\'':
doctype.SystemIdentifier = StringOrMemory.Empty;
return ref DoctypeSystemIdentifierSingleQuoted(ref doctype);
case '>':
State = HtmlParseMode.PCData;
RaiseErrorOccurred(HtmlParseError.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = FlushBufferFast();
break;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = FlushBufferFast();
Back();
break;
default:
RaiseErrorOccurred(HtmlParseError.DoctypeInvalidCharacter);
doctype.IsQuirksForced = true;
return ref BogusDoctype(ref doctype);
}
return ref doctype;
}
private ref StructHtmlToken DoctypeSystemIdentifierDoubleQuoted(ref StructHtmlToken doctype)
{
while (true) {
char next = GetNext();
switch (next) {
case '"':
doctype.SystemIdentifier = FlushBufferFast();
return ref DoctypeSystemIdentifierAfter(ref doctype);
case ' ':
AppendReplacement();
break;
case '>':
State = HtmlParseMode.PCData;
RaiseErrorOccurred(HtmlParseError.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = FlushBufferFast();
goto IL_008a;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = FlushBufferFast();
Back();
goto IL_008a;
default:
{
Append(next);
break;
}
IL_008a:
return ref doctype;
}
}
}
private ref StructHtmlToken DoctypeSystemIdentifierSingleQuoted(ref StructHtmlToken doctype)
{
while (true) {
char next = GetNext();
switch (next) {
case '\'':
doctype.SystemIdentifier = FlushBufferFast();
return ref DoctypeSystemIdentifierAfter(ref doctype);
case ' ':
AppendReplacement();
break;
case '>':
State = HtmlParseMode.PCData;
RaiseErrorOccurred(HtmlParseError.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = FlushBufferFast();
goto IL_0093;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = FlushBufferFast();
Back();
goto IL_0093;
default:
{
Append(next);
break;
}
IL_0093:
return ref doctype;
}
}
}
private ref StructHtmlToken DoctypeSystemIdentifierAfter(ref StructHtmlToken doctype)
{
switch (SkipSpaces()) {
case '>':
State = HtmlParseMode.PCData;
break;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
doctype.IsQuirksForced = true;
Back();
break;
default:
RaiseErrorOccurred(HtmlParseError.DoctypeInvalidCharacter);
return ref BogusDoctype(ref doctype);
}
return ref doctype;
}
private ref StructHtmlToken BogusDoctype(ref StructHtmlToken doctype)
{
while (true) {
switch (GetNext()) {
case '>':
State = HtmlParseMode.PCData;
goto IL_0025;
case '':
{
Back();
goto IL_0025;
}
IL_0025:
return ref doctype;
}
}
}
private ref StructHtmlToken ParseAttributes(ref StructHtmlToken tag)
{
AttributeState attributeState = AttributeState.BeforeName;
char c = '"';
char c2 = ' ';
TextPosition currentPosition = GetCurrentPosition();
bool flag = false;
while (true) {
switch (attributeState) {
case AttributeState.BeforeName:
c2 = SkipSpaces();
switch (c2) {
case '/':
attributeState = AttributeState.SelfClose;
break;
case '>':
return ref EmitTag(ref tag);
default:
if (!c2.IsUppercaseAscii() || IsPreservingAttributeNames) {
switch (c2) {
case ' ':
AppendReplacement();
currentPosition = GetCurrentPosition();
attributeState = AttributeState.Name;
break;
case '"':
case '\'':
case '<':
case '=':
RaiseErrorOccurred(HtmlParseError.AttributeNameInvalid);
Append(c2);
currentPosition = GetCurrentPosition();
attributeState = AttributeState.Name;
break;
default:
Append(c2);
currentPosition = GetCurrentPosition();
attributeState = AttributeState.Name;
break;
case '':
return ref NewEof(false);
}
} else {
Append(char.ToLowerInvariant(c2));
currentPosition = GetCurrentPosition();
attributeState = AttributeState.Name;
}
break;
}
break;
case AttributeState.Name:
c2 = GetNext();
switch (c2) {
case '=': {
StringOrMemory name4 = FlushBufferFast(HtmlAttributesLookup.TryGetWellKnownAttributeName);
flag = _shouldEmitAttribute(ref tag, name4.Memory);
if (flag)
tag.AddAttribute(name4, currentPosition);
attributeState = AttributeState.BeforeValue;
break;
}
case '>': {
StringOrMemory name3 = FlushBufferFast(HtmlAttributesLookup.TryGetWellKnownAttributeName);
if (_shouldEmitAttribute(ref tag, name3.Memory))
tag.AddAttribute(name3, currentPosition);
return ref EmitTag(ref tag);
}
default:
if (c2.IsSpaceCharacter()) {
StringOrMemory name = FlushBufferFast(HtmlAttributesLookup.TryGetWellKnownAttributeName);
flag = _shouldEmitAttribute(ref tag, name.Memory);
if (flag)
tag.AddAttribute(name, currentPosition);
attributeState = AttributeState.AfterName;
} else if (c2 == '/') {
StringOrMemory name2 = FlushBufferFast(HtmlAttributesLookup.TryGetWellKnownAttributeName);
flag = _shouldEmitAttribute(ref tag, name2.Memory);
if (flag)
tag.AddAttribute(name2, currentPosition);
attributeState = AttributeState.SelfClose;
} else if (!c2.IsUppercaseAscii() || IsPreservingAttributeNames) {
switch (c2) {
case '"':
case '\'':
case '<':
RaiseErrorOccurred(HtmlParseError.AttributeNameInvalid);
Append(c2);
break;
case ' ':
AppendReplacement();
break;
default:
Append(c2);
break;
case '':
return ref NewEof(false);
}
} else {
Append(char.ToLowerInvariant(c2));
}
break;
}
break;
case AttributeState.AfterName:
c2 = SkipSpaces();
switch (c2) {
case '>':
return ref EmitTag(ref tag);
case '=':
attributeState = AttributeState.BeforeValue;
break;
case '/':
attributeState = AttributeState.SelfClose;
break;
default:
if (!c2.IsUppercaseAscii() || IsPreservingAttributeNames) {
switch (c2) {
case '"':
case '\'':
case '<':
RaiseErrorOccurred(HtmlParseError.AttributeNameInvalid);
Append(c2);
currentPosition = GetCurrentPosition();
attributeState = AttributeState.Name;
break;
case ' ':
AppendReplacement();
currentPosition = GetCurrentPosition();
attributeState = AttributeState.Name;
break;
default:
Append(c2);
currentPosition = GetCurrentPosition();
attributeState = AttributeState.Name;
break;
case '':
return ref NewEof(false);
}
} else {
Append(char.ToLowerInvariant(c2));
currentPosition = GetCurrentPosition();
attributeState = AttributeState.Name;
}
break;
}
break;
case AttributeState.BeforeValue:
c2 = SkipSpaces();
switch (c2) {
case '"':
case '\'':
attributeState = AttributeState.QuotedValue;
c = c2;
break;
case '&':
attributeState = AttributeState.UnquotedValue;
break;
case '>':
RaiseErrorOccurred(HtmlParseError.TagClosedWrong);
return ref EmitTag(ref tag);
case '<':
case '=':
case '`':
RaiseErrorOccurred(HtmlParseError.AttributeValueInvalid);
Append(c2);
attributeState = AttributeState.UnquotedValue;
c2 = GetNext();
break;
case ' ':
AppendReplacement();
attributeState = AttributeState.UnquotedValue;
c2 = GetNext();
break;
default:
Append(c2);
attributeState = AttributeState.UnquotedValue;
c2 = GetNext();
break;
case '':
return ref NewEof(false);
}
break;
case AttributeState.QuotedValue:
c2 = GetNext();
if (c2 != c) {
switch (c2) {
case '&':
AppendCharacterReference(GetNext(), c, true);
break;
case ' ':
AppendReplacement();
break;
default:
Append(c2);
break;
case '':
return ref NewEof(false);
}
} else {
if (flag) {
StringOrMemory attributeValue3 = FlushBufferFast();
tag.SetAttributeValue(attributeValue3);
} else
base.CharBuffer.Discard();
attributeState = AttributeState.AfterValue;
}
break;
case AttributeState.UnquotedValue:
if (c2 == '>') {
if (flag) {
StringOrMemory attributeValue = FlushBufferFast();
tag.SetAttributeValue(attributeValue);
} else
base.CharBuffer.Discard();
return ref EmitTag(ref tag);
}
if (!c2.IsSpaceCharacter()) {
switch (c2) {
case '&':
AppendCharacterReference(GetNext(), '>', true);
c2 = GetNext();
break;
case ' ':
AppendReplacement();
c2 = GetNext();
break;
case '"':
case '\'':
case '<':
case '=':
case '`':
RaiseErrorOccurred(HtmlParseError.AttributeValueInvalid);
Append(c2);
c2 = GetNext();
break;
default:
Append(c2);
c2 = GetNext();
break;
case '':
return ref NewEof(false);
}
} else {
if (flag) {
StringOrMemory attributeValue2 = FlushBufferFast();
tag.SetAttributeValue(attributeValue2);
} else
base.CharBuffer.Discard();
attributeState = AttributeState.BeforeName;
}
break;
case AttributeState.AfterValue:
c2 = GetNext();
if (c2 == '>')
return ref EmitTag(ref tag);
if (!c2.IsSpaceCharacter()) {
switch (c2) {
case '/':
attributeState = AttributeState.SelfClose;
break;
case '':
return ref NewEof(false);
default:
RaiseErrorOccurred(HtmlParseError.AttributeNameExpected);
Back();
attributeState = AttributeState.BeforeName;
break;
}
} else
attributeState = AttributeState.BeforeName;
break;
case AttributeState.SelfClose:
if (TagSelfClosingInner(ref tag))
return ref tag;
attributeState = AttributeState.BeforeName;
break;
}
}
}
private ref StructHtmlToken ScriptData(char c)
{
int length = _lastStartTag.Length;
int length2 = TagNames.Script.Length;
ScriptState scriptState = ScriptState.Normal;
int num = 0;
while (true) {
switch (scriptState) {
case ScriptState.Normal:
switch (c) {
case ' ':
AppendReplacement();
goto IL_00b1;
case '<':
Append('<');
scriptState = ScriptState.OpenTag;
break;
case '':
Back();
if (SkipScriptText)
return ref NewSkippedContent(HtmlTokenType.Character);
return ref NewCharacter();
default:
{
Append(c);
goto IL_00b1;
}
IL_00b1:
c = GetNext();
break;
}
break;
case ScriptState.OpenTag:
c = GetNext();
switch (c) {
case '/':
scriptState = ScriptState.EndTag;
break;
case '!':
scriptState = ScriptState.StartEscape;
break;
default:
scriptState = ScriptState.Normal;
break;
}
break;
case ScriptState.StartEscape:
Append('!');
c = GetNext();
scriptState = ((c == '-') ? ScriptState.StartEscapeDash : ScriptState.Normal);
break;
case ScriptState.StartEscapeDash:
c = GetNext();
Append('-');
if (c == '-') {
Append('-');
scriptState = ScriptState.EscapedDashDash;
} else
scriptState = ScriptState.Normal;
break;
case ScriptState.EndTag: {
c = GetNext();
Append('/');
num = base.CharBuffer.Length;
ref StructHtmlToken reference = ref NewTagClose();
while (c.IsLetter()) {
Append(c);
c = GetNext();
bool flag = c.IsSpaceCharacter();
bool flag2 = c == '>';
bool flag3 = c == '/';
if (base.CharBuffer.Length - num == length && (flag | flag2 | flag3) && base.CharBuffer.HasTextAt(_lastStartTag.Memory.Span, num, length, StringComparison.OrdinalIgnoreCase)) {
if (num > 2) {
Back(3 + length);
base.CharBuffer.Remove(num - 2, length + 2);
if (SkipScriptText)
return ref NewSkippedContent(HtmlTokenType.Character);
return ref NewCharacter();
}
base.CharBuffer.Discard();
if (flag) {
reference.Name = _lastStartTag;
return ref ParseAttributes(ref reference);
}
if (flag3) {
reference.Name = _lastStartTag;
return ref TagSelfClosing(ref reference);
}
if (flag2) {
reference.Name = _lastStartTag;
return ref EmitTag(ref reference);
}
}
}
scriptState = ScriptState.Normal;
break;
}
case ScriptState.Escaped:
switch (c) {
case '-':
Append('-');
c = GetNext();
scriptState = ScriptState.EscapedDash;
break;
case '<':
c = GetNext();
scriptState = ScriptState.EscapedOpenTag;
break;
case ' ':
AppendReplacement();
c = GetNext();
break;
case '':
Back();
if (SkipScriptText)
return ref NewSkippedContent(HtmlTokenType.Character);
return ref NewCharacter();
default:
scriptState = ScriptState.Normal;
break;
}
break;
case ScriptState.EscapedDash:
switch (c) {
case '-':
Append('-');
scriptState = ScriptState.EscapedDashDash;
break;
case '<':
c = GetNext();
scriptState = ScriptState.EscapedOpenTag;
break;
case ' ':
AppendReplacement();
goto IL_0350;
case '':
Back();
if (SkipScriptText)
return ref NewSkippedContent(HtmlTokenType.Character);
return ref NewCharacter();
default:
{
Append(c);
goto IL_0350;
}
IL_0350:
c = GetNext();
scriptState = ScriptState.Escaped;
break;
}
break;
case ScriptState.EscapedDashDash:
c = GetNext();
switch (c) {
case '-':
Append('-');
break;
case '<':
c = GetNext();
scriptState = ScriptState.EscapedOpenTag;
break;
case '>':
Append('>');
c = GetNext();
scriptState = ScriptState.Normal;
break;
case ' ':
AppendReplacement();
c = GetNext();
scriptState = ScriptState.Escaped;
break;
case '':
if (SkipScriptText)
return ref NewSkippedContent(HtmlTokenType.Character);
return ref NewCharacter();
default:
Append(c);
c = GetNext();
scriptState = ScriptState.Escaped;
break;
}
break;
case ScriptState.EscapedOpenTag:
if (c == '/') {
c = GetNext();
scriptState = ScriptState.EscapedEndTag;
} else if (c.IsLetter()) {
Append('<');
num = base.CharBuffer.Length;
Append(c);
scriptState = ScriptState.StartDoubleEscape;
} else {
Append('<');
scriptState = ScriptState.Escaped;
}
break;
case ScriptState.EscapedEndTag:
Append('<', '/');
num = base.CharBuffer.Length;
if (c.IsLetter()) {
Append(c);
scriptState = ScriptState.EscapedNameEndTag;
} else
scriptState = ScriptState.Escaped;
break;
case ScriptState.EscapedNameEndTag:
c = GetNext();
if (base.CharBuffer.Length - num == length2 && (c == '/' || c == '>' || c.IsSpaceCharacter())) {
if (base.CharBuffer.Isi(num, length2, MemoryExtensions.AsSpan(TagNames.Script))) {
Back(length2 + 3);
base.CharBuffer.Remove(num - 2, length2 + 2);
if (SkipScriptText)
return ref NewSkippedContent(HtmlTokenType.Character);
return ref NewCharacter();
}
} else if (!c.IsLetter()) {
scriptState = ScriptState.Escaped;
} else {
Append(c);
}
break;
case ScriptState.StartDoubleEscape:
c = GetNext();
if (base.CharBuffer.Length - num == length2 && (c == '/' || c == '>' || c.IsSpaceCharacter())) {
bool num3 = base.CharBuffer.Isi(num, length2, MemoryExtensions.AsSpan(TagNames.Script));
Append(c);
c = GetNext();
scriptState = (num3 ? ScriptState.EscapedDouble : ScriptState.Escaped);
} else if (c.IsLetter()) {
Append(c);
} else {
scriptState = ScriptState.Escaped;
}
break;
case ScriptState.EscapedDouble:
switch (c) {
case '-':
Append('-');
c = GetNext();
scriptState = ScriptState.EscapedDoubleDash;
break;
case '<':
Append('<');
c = GetNext();
scriptState = ScriptState.EscapedDoubleOpenTag;
break;
case ' ':
AppendReplacement();
c = GetNext();
break;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
Back();
if (SkipScriptText)
return ref NewSkippedContent(HtmlTokenType.Character);
return ref NewCharacter();
default:
Append(c);
c = GetNext();
break;
}
break;
case ScriptState.EscapedDoubleDash:
switch (c) {
case '-':
Append('-');
scriptState = ScriptState.EscapedDoubleDashDash;
break;
case '<':
Append('<');
c = GetNext();
scriptState = ScriptState.EscapedDoubleOpenTag;
break;
case ' ':
RaiseErrorOccurred(HtmlParseError.Null);
c = '�';
goto default;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
Back();
if (SkipScriptText)
return ref NewSkippedContent(HtmlTokenType.Character);
return ref NewCharacter();
default:
scriptState = ScriptState.EscapedDouble;
break;
}
break;
case ScriptState.EscapedDoubleDashDash:
c = GetNext();
switch (c) {
case '-':
Append('-');
break;
case '<':
Append('<');
c = GetNext();
scriptState = ScriptState.EscapedDoubleOpenTag;
break;
case '>':
Append('>');
c = GetNext();
scriptState = ScriptState.Normal;
break;
case ' ':
AppendReplacement();
c = GetNext();
scriptState = ScriptState.EscapedDouble;
break;
case '':
RaiseErrorOccurred(HtmlParseError.EOF);
Back();
if (SkipScriptText)
return ref NewSkippedContent(HtmlTokenType.Character);
return ref NewCharacter();
default:
Append(c);
c = GetNext();
scriptState = ScriptState.EscapedDouble;
break;
}
break;
case ScriptState.EscapedDoubleOpenTag:
if (c == '/') {
Append('/');
num = base.CharBuffer.Length;
scriptState = ScriptState.EndDoubleEscape;
} else
scriptState = ScriptState.EscapedDouble;
break;
case ScriptState.EndDoubleEscape:
c = GetNext();
if (base.CharBuffer.Length - num == length2 && (c.IsSpaceCharacter() || c == '/' || c == '>')) {
bool num2 = base.CharBuffer.Isi(num, length2, MemoryExtensions.AsSpan(TagNames.Script));
Append(c);
c = GetNext();
scriptState = (num2 ? ScriptState.Escaped : ScriptState.EscapedDouble);
} else if (c.IsLetter()) {
Append(c);
} else {
scriptState = ScriptState.EscapedDouble;
}
break;
}
}
}
private ref StructHtmlToken GetNextStructToken()
{
char next = GetNext();
_position = GetCurrentPosition();
if (next != '') {
switch (State) {
case HtmlParseMode.PCData:
return ref Data(next);
case HtmlParseMode.RCData:
return ref RCData(next);
case HtmlParseMode.Plaintext:
return ref Plaintext(next);
case HtmlParseMode.Rawtext:
return ref Rawtext(next);
case HtmlParseMode.Script:
return ref ScriptData(next);
}
}
return ref NewEof(true);
}
private ref StructHtmlToken NewSkippedContent(HtmlTokenType htmlTokenType = HtmlTokenType.Character)
{
base.CharBuffer.Discard();
_token = StructHtmlToken.Skipped(htmlTokenType, _position);
return ref _token;
}
private ref StructHtmlToken NewCharacter()
{
StringOrMemory name = FlushBufferFast();
_token = StructHtmlToken.Character(name, _position);
return ref _token;
}
private ref StructHtmlToken NewProcessingInstruction()
{
if (SkipProcessingInstructions)
return ref NewSkippedContent(HtmlTokenType.Comment);
StringOrMemory name = FlushBufferFast();
_token = StructHtmlToken.ProcessingInstruction(name, _position);
return ref _token;
}
private ref StructHtmlToken NewComment()
{
if (SkipComments)
return ref NewSkippedContent(HtmlTokenType.Comment);
StringOrMemory name = FlushBufferFast();
_token = StructHtmlToken.Comment(name, _position);
return ref _token;
}
private ref StructHtmlToken NewEof(bool acceptable = false)
{
if (!acceptable)
RaiseErrorOccurred(HtmlParseError.EOF);
_token = StructHtmlToken.EndOfFile(_position);
return ref _token;
}
private ref StructHtmlToken NewDoctype(bool quirksForced)
{
_token = StructHtmlToken.Doctype(quirksForced, _position);
return ref _token;
}
private ref StructHtmlToken NewTagOpen()
{
_token = StructHtmlToken.TagOpen(_position);
return ref _token;
}
private ref StructHtmlToken NewTagClose()
{
_token = StructHtmlToken.TagClose(_position);
return ref _token;
}
private void RaiseErrorOccurred(HtmlParseError code)
{
RaiseErrorOccurred(code, GetCurrentPosition());
}
private void AppendReplacement()
{
RaiseErrorOccurred(HtmlParseError.Null);
Append('�');
}
private bool CreateIfAppropriate(char c, ref StructHtmlToken token)
{
bool flag = c.IsSpaceCharacter();
bool flag2 = c == '>';
bool flag3 = c == '/';
if (base.CharBuffer.Length == _lastStartTag.Length && (flag | flag2 | flag3) && base.CharBuffer.Is(_lastStartTag)) {
ref StructHtmlToken reference = ref NewTagClose();
base.CharBuffer.Discard();
if (flag) {
reference.Name = _lastStartTag;
token = ParseAttributes(ref reference);
return true;
}
if (flag3) {
reference.Name = _lastStartTag;
token = TagSelfClosing(ref reference);
return true;
}
if (flag2) {
reference.Name = _lastStartTag;
token = EmitTag(ref reference);
return true;
}
}
return false;
}
private ref StructHtmlToken EmitTag(ref StructHtmlToken tag)
{
StructAttributes attributes = tag.Attributes;
State = HtmlParseMode.PCData;
switch (tag.Type) {
case HtmlTokenType.StartTag:
for (int num = attributes.Count - 1; num > 0; num--) {
for (int num2 = num - 1; num2 >= 0; num2--) {
MemoryHtmlAttributeToken memoryHtmlAttributeToken = attributes[num2];
StringOrMemory name = memoryHtmlAttributeToken.Name;
memoryHtmlAttributeToken = attributes[num];
if (name.Is(memoryHtmlAttributeToken.Name)) {
tag.RemoveAttributeAt(num);
RaiseErrorOccurred(HtmlParseError.AttributeDuplicateOmitted, tag.Position);
break;
}
}
}
_lastStartTag = tag.Name;
break;
case HtmlTokenType.EndTag:
if (tag.IsSelfClosing)
RaiseErrorOccurred(HtmlParseError.EndTagCannotBeSelfClosed, tag.Position);
if (attributes.Count != 0)
RaiseErrorOccurred(HtmlParseError.EndTagCannotHaveAttributes, tag.Position);
break;
}
return ref tag;
}
}
}