AngleSharp by AngleSharp

<PackageReference Include="AngleSharp" Version="1.3.1" />

 HtmlTokenizer

public sealed class HtmlTokenizer : BaseTokenizer
Performs the tokenization of the source code. Follows the tokenization algorithm at: http://www.w3.org/html/wg/drafts/html/master/syntax.html
using AngleSharp.Common; using AngleSharp.Dom; using AngleSharp.Html.Dom.Events; using AngleSharp.Html.Parser.Tokens; using AngleSharp.Html.Parser.Tokens.Struct; using AngleSharp.Text; using System; using System.Collections.Generic; using System.Runtime.CompilerServices; namespace AngleSharp.Html.Parser { [System.Runtime.CompilerServices.NullableContext(1)] [System.Runtime.CompilerServices.Nullable(0)] public sealed class HtmlTokenizer : BaseTokenizer { [System.Runtime.CompilerServices.NullableContext(0)] private enum AttributeState : byte { BeforeName, Name, AfterName, BeforeValue, QuotedValue, AfterValue, UnquotedValue, SelfClose } [System.Runtime.CompilerServices.NullableContext(0)] private enum ScriptState : byte { Normal, OpenTag, EndTag, StartEscape, Escaped, StartEscapeDash, EscapedDash, EscapedDashDash, EscapedOpenTag, EscapedEndTag, EscapedNameEndTag, StartDoubleEscape, EscapedDouble, EscapedDoubleDash, EscapedDoubleDashDash, EscapedDoubleOpenTag, EndDoubleEscape } [System.Runtime.CompilerServices.NullableContext(2)] [System.Runtime.CompilerServices.Nullable(0)] private readonly struct EntityProvider { private readonly IEntityProviderExtended _fast; private readonly IEntityProvider _slow; [System.Runtime.CompilerServices.NullableContext(1)] public EntityProvider(IEntityProvider slow) { _fast = null; _slow = slow; } [System.Runtime.CompilerServices.NullableContext(1)] public EntityProvider(IEntityProviderExtended fast) { _slow = null; _fast = fast; } public string GetSymbol(StringOrMemory name) { if (_fast != null) return _fast.GetSymbol(name); if (_slow != null) return _slow.GetSymbol(name.ToString()); throw new InvalidOperationException("Should not get there, please file a bug report."); } } private readonly EntityProvider _resolver; private StringOrMemory _lastStartTag; private TextPosition _position; private StructHtmlToken _token; private ShouldEmitAttribute _shouldEmitAttribute = delegate { return true; }; [System.Runtime.CompilerServices.Nullable(2)] private char[] _characterReferenceBuffer; public bool SkipDataText { get; set; } public bool SkipScriptText { get; set; } public bool SkipRawText { get; set; } public bool SkipComments { get; set; } public bool SkipPlaintext { get; set; } public bool SkipRCDataText { get; set; } public bool SkipCDATA { get; set; } public bool SkipProcessingInstructions { get; set; } public ShouldEmitAttribute ShouldEmitAttribute { get { return _shouldEmitAttribute; } set { if (value != null) _shouldEmitAttribute = value; } } public bool IsAcceptingCharacterData { get; set; } public bool IsPreservingAttributeNames { get; set; } public bool IsNotConsumingCharacterReferences { get; set; } public HtmlParseMode State { get; set; } public bool IsStrictMode { get; set; } public bool IsSupportingProcessingInstructions { get; set; } [System.Runtime.CompilerServices.Nullable(new byte[] { 2, 1 })] [field: System.Runtime.CompilerServices.Nullable(new byte[] { 2, 1 })] public Action<HtmlToken, TextRange> OnToken { [return: System.Runtime.CompilerServices.Nullable(new byte[] { 2, 1 })] get; [param: System.Runtime.CompilerServices.Nullable(new byte[] { 2, 1 })] set; } [System.Runtime.CompilerServices.Nullable(new byte[] { 2, 1 })] [method: System.Runtime.CompilerServices.Nullable(new byte[] { 2, 1 })] [field: System.Runtime.CompilerServices.Nullable(new byte[] { 2, 1 })] public event EventHandler<HtmlErrorEvent> Error; public HtmlTokenizer(TextSource source, IEntityProvider resolver) : base(source) { State = HtmlParseMode.PCData; _lastStartTag = StringOrMemory.Empty; _resolver = new EntityProvider(resolver); } public HtmlTokenizer(TextSource source, IEntityProviderExtended resolver) : base(source) { State = HtmlParseMode.PCData; _lastStartTag = StringOrMemory.Empty; _resolver = new EntityProvider(resolver); } public HtmlToken Get() { return GetStructToken().ToHtmlToken(); } public ref StructHtmlToken GetStructToken() { ref StructHtmlToken nextStructToken = ref GetNextStructToken(); OnToken?.Invoke(nextStructToken.ToHtmlToken(), new TextRange(_position, GetCurrentPosition().After(base.Current))); return ref nextStructToken; } internal void RaiseErrorOccurred(HtmlParseError code, TextPosition position) { EventHandler<HtmlErrorEvent> error = this.Error; if (IsStrictMode) { string message = "Error while parsing the provided HTML document."; throw new HtmlParseException(code.GetCode(), message, position); } if (error != null) { HtmlErrorEvent e = new HtmlErrorEvent(code, position); error(this, e); } } private ref StructHtmlToken Data(char c) { if (c != '<') return ref DataText(c); return ref TagOpen(GetNext()); } private ref StructHtmlToken DataText(char c) { while (true) { switch (c) { case '<': case '￿': Back(); if (SkipDataText) return ref NewSkippedContent(HtmlTokenType.Character); return ref NewCharacter(); case '&': AppendCharacterReference(GetNext(), '', false); break; case '': RaiseErrorOccurred(HtmlParseError.Null); break; default: Append(c); break; } c = GetNext(); } } private ref StructHtmlToken Plaintext(char c) { while (true) { switch (c) { case '': AppendReplacement(); break; case '￿': Back(); if (SkipPlaintext) return ref NewSkippedContent(HtmlTokenType.Character); return ref NewCharacter(); default: Append(c); break; } c = GetNext(); } } private ref StructHtmlToken RCData(char c) { if (c != '<') return ref RCDataText(c); return ref RCDataLt(GetNext()); } private ref StructHtmlToken RCDataText(char c) { while (true) { switch (c) { case '&': AppendCharacterReference(GetNext(), '', false); break; case '<': case '￿': Back(); if (SkipRCDataText) return ref NewSkippedContent(HtmlTokenType.Character); return ref NewCharacter(); case '': AppendReplacement(); break; default: Append(c); break; } c = GetNext(); } } private ref StructHtmlToken RCDataLt(char c) { if (c == '/') { c = GetNext(); if (c.IsUppercaseAscii()) { Append(char.ToLowerInvariant(c)); return ref RCDataNameEndTag(GetNext()); } if (c.IsLowercaseAscii()) { Append(c); return ref RCDataNameEndTag(GetNext()); } Append('<', '/'); return ref RCDataText(c); } Append('<'); return ref RCDataText(c); } private ref StructHtmlToken RCDataNameEndTag(char c) { while (true) { if (CreateIfAppropriate(c, ref _token)) return ref _token; if (c.IsUppercaseAscii()) Append(char.ToLowerInvariant(c)); else { if (!c.IsLowercaseAscii()) break; Append(c); } c = GetNext(); } base.CharBuffer.Insert(0, '<').Insert(1, '/'); return ref RCDataText(c); } private ref StructHtmlToken Rawtext(char c) { if (c != '<') return ref RawtextText(c); return ref RawtextLT(GetNext()); } private ref StructHtmlToken RawtextText(char c) { while (true) { switch (c) { case '<': case '￿': Back(); if (SkipRawText) return ref NewSkippedContent(HtmlTokenType.Character); return ref NewCharacter(); case '': AppendReplacement(); break; default: Append(c); break; } c = GetNext(); } } private ref StructHtmlToken RawtextLT(char c) { if (c == '/') { c = GetNext(); if (c.IsUppercaseAscii()) { Append(char.ToLowerInvariant(c)); return ref RawtextNameEndTag(GetNext()); } if (c.IsLowercaseAscii()) { Append(c); return ref RawtextNameEndTag(GetNext()); } Append('<', '/'); return ref RawtextText(c); } Append('<'); return ref RawtextText(c); } private ref StructHtmlToken RawtextNameEndTag(char c) { while (true) { if (CreateIfAppropriate(c, ref _token)) return ref _token; if (c.IsUppercaseAscii()) Append(char.ToLowerInvariant(c)); else { if (!c.IsLowercaseAscii()) break; Append(c); } c = GetNext(); } base.CharBuffer.Insert(0, '<').Insert(1, '/'); return ref RawtextText(c); } private ref StructHtmlToken CharacterData(char c) { while (true) { switch (c) { case '￿': Back(); goto IL_003c; case ']': { if (!ContinuesWithSensitive("]]>")) break; Advance(2); goto IL_003c; } IL_003c: if (SkipCDATA) return ref NewSkippedContent(HtmlTokenType.Character); return ref NewCharacter(); } Append(c); c = GetNext(); } } private void AppendCharacterReference(char c, char allowedCharacter = '', bool isAttribute = false) { if (IsNotConsumingCharacterReferences || c.IsSpaceCharacter() || c == '<' || c == '￿' || c == '&' || c == allowedCharacter) { Back(); Append('&'); } else { string text = null; text = ((c != '#') ? GetLookupCharacterReference(allowedCharacter, isAttribute) : GetNumericCharacterReference(GetNext(), isAttribute)); if (text == null) Append('&'); else base.CharBuffer.Append(MemoryExtensions.AsSpan(text)); } } [System.Runtime.CompilerServices.NullableContext(2)] private string GetNumericCharacterReference(char c, bool isAttribute) { int num = 10; int num2 = 1; int num3 = 0; List<int> list = new List<int>(); bool flag = c == 'x' || c == 'X'; if (!flag) { while (c.IsDigit()) { list.Add(c.FromHex()); c = GetNext(); } } else { num = 16; while ((c = GetNext()).IsHex()) { list.Add(c.FromHex()); } } for (int num4 = list.Count - 1; num4 >= 0; num4--) { num3 += list[num4] * num2; num2 *= num; } if (list.Count == 0) { Back(2); if (flag) Back(); if (!isAttribute) RaiseErrorOccurred(HtmlParseError.CharacterReferenceWrongNumber); return null; } if (c != ';') { RaiseErrorOccurred(HtmlParseError.CharacterReferenceSemicolonMissing); Back(); } if (HtmlEntityProvider.IsInCharacterTable(num3)) { RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidCode); return HtmlEntityProvider.GetSymbolFromTable(num3); } if (HtmlEntityProvider.IsInvalidNumber(num3)) { RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidNumber); return '�'.ToString(); } if (HtmlEntityProvider.IsInInvalidRange(num3)) RaiseErrorOccurred(HtmlParseError.CharacterReferenceInvalidRange); return char.ConvertFromUtf32(num3); } [System.Runtime.CompilerServices.NullableContext(2)] private string GetLookupCharacterReference(char allowedCharacter, bool isAttribute) { string text = null; int insertionPoint = base.InsertionPoint - 1; if (_characterReferenceBuffer == null) _characterReferenceBuffer = new char[32]; int num = 0; char c = base.Current; while (c != ';' && c.IsName()) { _characterReferenceBuffer[num++] = c; c = GetNext(); if (c == '￿' || num >= 31) break; } if (c == ';') { _characterReferenceBuffer[num] = ';'; text = _resolver.GetSymbol(new StringOrMemory(MemoryExtensions.AsMemory(_characterReferenceBuffer, 0, num + 1))); } while (text == null && num > 0) { text = _resolver.GetSymbol(new StringOrMemory(MemoryExtensions.AsMemory(_characterReferenceBuffer, 0, num--))); if (text == null) Back(); } c = base.Current; if (c != ';') { if (allowedCharacter != 0 && (c == '=' || c.IsAlphanumericAscii())) { if (c == '=') RaiseErrorOccurred(HtmlParseError.CharacterReferenceAttributeEqualsFound); base.InsertionPoint = insertionPoint; return null; } Back(); if (!isAttribute) RaiseErrorOccurred(HtmlParseError.CharacterReferenceNotTerminated); } return text; } private ref StructHtmlToken TagOpen(char c) { if (c == '/') return ref TagEnd(GetNext()); if (c.IsLowercaseAscii()) { Append(c); return ref TagName(ref NewTagOpen()); } if (c.IsUppercaseAscii()) { Append(char.ToLowerInvariant(c)); return ref TagName(ref NewTagOpen()); } switch (c) { case '!': return ref MarkupDeclaration(GetNext()); case '?': if (IsSupportingProcessingInstructions) return ref ProcessingInstruction(c); break; } if (c != '?') { State = HtmlParseMode.PCData; RaiseErrorOccurred(HtmlParseError.AmbiguousOpenTag); Append('<'); return ref DataText(c); } RaiseErrorOccurred(HtmlParseError.BogusComment); return ref BogusComment(c); } private ref StructHtmlToken TagEnd(char c) { if (c.IsLowercaseAscii()) { Append(c); return ref TagName(ref NewTagClose()); } if (!c.IsUppercaseAscii()) { switch (c) { case '>': State = HtmlParseMode.PCData; RaiseErrorOccurred(HtmlParseError.TagClosedWrong); return ref Data(GetNext()); case '￿': Back(); RaiseErrorOccurred(HtmlParseError.EOF); Append('<', '/'); return ref NewCharacter(); default: RaiseErrorOccurred(HtmlParseError.BogusComment); return ref BogusComment(c); } } Append(char.ToLowerInvariant(c)); return ref TagName(ref NewTagClose()); } private ref StructHtmlToken TagName(ref StructHtmlToken tag) { while (true) { char next = GetNext(); if (next == '>') { tag.Name = FlushBufferFast(HtmlTagNameLookup.TryGetWellKnownTagName); return ref EmitTag(ref tag); } if (next.IsSpaceCharacter()) { tag.Name = FlushBufferFast(HtmlTagNameLookup.TryGetWellKnownTagName); return ref ParseAttributes(ref tag); } if (next == '/') break; if (next.IsUppercaseAscii()) Append(char.ToLowerInvariant(next)); else { switch (next) { case '': AppendReplacement(); break; default: Append(next); break; case '￿': return ref NewEof(false); } } } tag.Name = FlushBufferFast(HtmlTagNameLookup.TryGetWellKnownTagName); return ref TagSelfClosing(ref tag); } private ref StructHtmlToken TagSelfClosing(ref StructHtmlToken tag) { if (TagSelfClosingInner(ref tag)) return ref tag; return ref ParseAttributes(ref tag); } private bool TagSelfClosingInner(ref StructHtmlToken tag) { while (true) { switch (GetNext()) { case '>': tag.IsSelfClosing = true; tag = EmitTag(ref tag); return true; case '￿': tag = NewEof(false); return true; case '/': break; default: RaiseErrorOccurred(HtmlParseError.ClosingSlashMisplaced); Back(); return false; } RaiseErrorOccurred(HtmlParseError.ClosingSlashMisplaced); } } private ref StructHtmlToken MarkupDeclaration(char c) { if (ContinuesWithSensitive("--")) { Advance(); return ref CommentStart(GetNext()); } if (ContinuesWithInsensitive(TagNames.Doctype)) { Advance(6); return ref Doctype(GetNext()); } if (IsAcceptingCharacterData && ContinuesWithSensitive(Keywords.CData)) { Advance(6); return ref CharacterData(GetNext()); } RaiseErrorOccurred(HtmlParseError.UndefinedMarkupDeclaration); return ref BogusComment(c); } private ref StructHtmlToken ProcessingInstruction(char c) { base.CharBuffer.Discard(); while (true) { switch (c) { case '￿': Back(); goto case '>'; case '': c = '�'; break; case '>': State = HtmlParseMode.PCData; return ref NewProcessingInstruction(); } Append(c); c = GetNext(); } } private ref StructHtmlToken BogusComment(char c) { base.CharBuffer.Discard(); while (true) { switch (c) { case '￿': Back(); goto case '>'; case '': c = '�'; break; case '>': State = HtmlParseMode.PCData; return ref NewComment(); } Append(c); c = GetNext(); } } private ref StructHtmlToken CommentStart(char c) { base.CharBuffer.Discard(); switch (c) { case '-': if (CommentDashStart(GetNext(), ref _token)) return ref _token; return ref Comment(GetNext()); case '': AppendReplacement(); return ref Comment(GetNext()); case '>': State = HtmlParseMode.PCData; RaiseErrorOccurred(HtmlParseError.TagClosedWrong); break; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); Back(); break; default: Append(c); return ref Comment(GetNext()); } return ref NewComment(); } private bool CommentDashStart(char c, ref StructHtmlToken token) { switch (c) { case '-': return CommentEnd(GetNext(), ref token); case '': RaiseErrorOccurred(HtmlParseError.Null); Append('-', '�'); ref token = ref Comment(GetNext()); return true; case '>': State = HtmlParseMode.PCData; RaiseErrorOccurred(HtmlParseError.TagClosedWrong); break; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); Back(); break; default: Append('-', c); ref token = ref Comment(GetNext()); return true; } token = NewComment(); return true; } private ref StructHtmlToken Comment(char c) { while (true) { switch (c) { case '-': if (CommentDashEnd(GetNext(), ref _token)) return ref _token; break; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); Back(); return ref NewComment(); case '': AppendReplacement(); break; default: Append(c); break; } c = GetNext(); } } private bool CommentDashEnd(char c, ref StructHtmlToken token) { switch (c) { case '-': return CommentEnd(GetNext(), ref token); case '￿': RaiseErrorOccurred(HtmlParseError.EOF); Back(); token = NewComment(); return true; case '': RaiseErrorOccurred(HtmlParseError.Null); c = '�'; break; } Append('-', c); return false; } private bool CommentEnd(char c, ref StructHtmlToken token) { while (true) { switch (c) { case '>': State = HtmlParseMode.PCData; token = NewComment(); return true; case '': RaiseErrorOccurred(HtmlParseError.Null); Append('-', '�'); return false; case '!': RaiseErrorOccurred(HtmlParseError.CommentEndedWithEM); return CommentBangEnd(GetNext(), ref token); case '-': break; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); Back(); token = NewComment(); return true; default: RaiseErrorOccurred(HtmlParseError.CommentEndedUnexpected); Append('-', '-', c); return false; } RaiseErrorOccurred(HtmlParseError.CommentEndedWithDash); Append('-'); c = GetNext(); } } private bool CommentBangEnd(char c, ref StructHtmlToken token) { switch (c) { case '-': Append('-', '-', '!'); return CommentDashEnd(GetNext(), ref token); case '>': State = HtmlParseMode.PCData; break; case '': RaiseErrorOccurred(HtmlParseError.Null); Append('-', '-', '!', '�'); return false; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); Back(); break; default: Append('-', '-', '!', c); return false; } token = NewComment(); return true; } private ref StructHtmlToken Doctype(char c) { if (c.IsSpaceCharacter()) return ref DoctypeNameBefore(GetNext()); if (c == '￿') { RaiseErrorOccurred(HtmlParseError.EOF); Back(); return ref NewDoctype(true); } RaiseErrorOccurred(HtmlParseError.DoctypeUnexpected); return ref DoctypeNameBefore(c); } private ref StructHtmlToken DoctypeNameBefore(char c) { while (c.IsSpaceCharacter()) { c = GetNext(); } if (!c.IsUppercaseAscii()) { switch (c) { case '': { ref StructHtmlToken doctype2 = ref NewDoctype(false); AppendReplacement(); return ref DoctypeName(ref doctype2); } case '>': { ref StructHtmlToken result2 = ref NewDoctype(true); State = HtmlParseMode.PCData; RaiseErrorOccurred(HtmlParseError.TagClosedWrong); return ref result2; } case '￿': { ref StructHtmlToken result = ref NewDoctype(true); RaiseErrorOccurred(HtmlParseError.EOF); Back(); return ref result; } default: { ref StructHtmlToken doctype = ref NewDoctype(false); Append(c); return ref DoctypeName(ref doctype); } } } ref StructHtmlToken doctype3 = ref NewDoctype(false); Append(char.ToLowerInvariant(c)); return ref DoctypeName(ref doctype3); } private ref StructHtmlToken DoctypeName(ref StructHtmlToken doctype) { while (true) { char next = GetNext(); if (next.IsSpaceCharacter()) { doctype.Name = FlushBufferFast(); return ref DoctypeNameAfter(ref doctype); } if (next == '>') { State = HtmlParseMode.PCData; doctype.Name = FlushBufferFast(); break; } if (next.IsUppercaseAscii()) Append(char.ToLowerInvariant(next)); else { switch (next) { case '': break; case '￿': goto IL_0066; default: goto IL_0088; } AppendReplacement(); } continue; IL_0088: Append(next); continue; IL_0066: RaiseErrorOccurred(HtmlParseError.EOF); Back(); doctype.IsQuirksForced = true; doctype.Name = FlushBufferFast(); break; } return ref doctype; } private ref StructHtmlToken DoctypeNameAfter(ref StructHtmlToken doctype) { switch (SkipSpaces()) { case '>': State = HtmlParseMode.PCData; break; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); Back(); doctype.IsQuirksForced = true; break; default: if (ContinuesWithInsensitive(Keywords.Public)) { Advance(5); return ref DoctypePublic(ref doctype); } if (ContinuesWithInsensitive(Keywords.System)) { Advance(5); return ref DoctypeSystem(ref doctype); } RaiseErrorOccurred(HtmlParseError.DoctypeUnexpectedAfterName); doctype.IsQuirksForced = true; return ref BogusDoctype(ref doctype); } return ref doctype; } private ref StructHtmlToken DoctypePublic(ref StructHtmlToken doctype) { char next = GetNext(); if (next.IsSpaceCharacter()) return ref DoctypePublicIdentifierBefore(ref doctype); switch (next) { case '"': RaiseErrorOccurred(HtmlParseError.DoubleQuotationMarkUnexpected); doctype.PublicIdentifier = StringOrMemory.Empty; return ref DoctypePublicIdentifierDoubleQuoted(ref doctype); case '\'': RaiseErrorOccurred(HtmlParseError.SingleQuotationMarkUnexpected); doctype.PublicIdentifier = StringOrMemory.Empty; return ref DoctypePublicIdentifierSingleQuoted(ref doctype); case '>': State = HtmlParseMode.PCData; RaiseErrorOccurred(HtmlParseError.TagClosedWrong); doctype.IsQuirksForced = true; break; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); doctype.IsQuirksForced = true; Back(); break; default: RaiseErrorOccurred(HtmlParseError.DoctypePublicInvalid); doctype.IsQuirksForced = true; return ref BogusDoctype(ref doctype); } return ref doctype; } private ref StructHtmlToken DoctypePublicIdentifierBefore(ref StructHtmlToken doctype) { switch (SkipSpaces()) { case '"': doctype.PublicIdentifier = StringOrMemory.Empty; return ref DoctypePublicIdentifierDoubleQuoted(ref doctype); case '\'': doctype.PublicIdentifier = StringOrMemory.Empty; return ref DoctypePublicIdentifierSingleQuoted(ref doctype); case '>': State = HtmlParseMode.PCData; RaiseErrorOccurred(HtmlParseError.TagClosedWrong); doctype.IsQuirksForced = true; break; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); doctype.IsQuirksForced = true; Back(); break; default: RaiseErrorOccurred(HtmlParseError.DoctypePublicInvalid); doctype.IsQuirksForced = true; return ref BogusDoctype(ref doctype); } return ref doctype; } private ref StructHtmlToken DoctypePublicIdentifierDoubleQuoted(ref StructHtmlToken doctype) { while (true) { char next = GetNext(); switch (next) { case '"': doctype.PublicIdentifier = FlushBufferFast(); return ref DoctypePublicIdentifierAfter(ref doctype); case '': AppendReplacement(); break; case '>': State = HtmlParseMode.PCData; RaiseErrorOccurred(HtmlParseError.TagClosedWrong); doctype.IsQuirksForced = true; doctype.PublicIdentifier = FlushBufferFast(); goto IL_008a; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); Back(); doctype.IsQuirksForced = true; doctype.PublicIdentifier = FlushBufferFast(); goto IL_008a; default: { Append(next); break; } IL_008a: return ref doctype; } } } private ref StructHtmlToken DoctypePublicIdentifierSingleQuoted(ref StructHtmlToken doctype) { while (true) { char next = GetNext(); switch (next) { case '\'': doctype.PublicIdentifier = FlushBufferFast(); return ref DoctypePublicIdentifierAfter(ref doctype); case '': AppendReplacement(); break; case '>': State = HtmlParseMode.PCData; RaiseErrorOccurred(HtmlParseError.TagClosedWrong); doctype.IsQuirksForced = true; doctype.PublicIdentifier = FlushBufferFast(); goto IL_008a; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); doctype.IsQuirksForced = true; doctype.PublicIdentifier = FlushBufferFast(); Back(); goto IL_008a; default: { Append(next); break; } IL_008a: return ref doctype; } } } private ref StructHtmlToken DoctypePublicIdentifierAfter(ref StructHtmlToken doctype) { char next = GetNext(); if (next.IsSpaceCharacter()) return ref DoctypeBetween(ref doctype); switch (next) { case '>': State = HtmlParseMode.PCData; break; case '"': RaiseErrorOccurred(HtmlParseError.DoubleQuotationMarkUnexpected); doctype.SystemIdentifier = StringOrMemory.Empty; return ref DoctypeSystemIdentifierDoubleQuoted(ref doctype); case '\'': RaiseErrorOccurred(HtmlParseError.SingleQuotationMarkUnexpected); doctype.SystemIdentifier = StringOrMemory.Empty; return ref DoctypeSystemIdentifierSingleQuoted(ref doctype); case '￿': RaiseErrorOccurred(HtmlParseError.EOF); doctype.IsQuirksForced = true; Back(); break; default: RaiseErrorOccurred(HtmlParseError.DoctypeInvalidCharacter); doctype.IsQuirksForced = true; return ref BogusDoctype(ref doctype); } return ref doctype; } private ref StructHtmlToken DoctypeBetween(ref StructHtmlToken doctype) { switch (SkipSpaces()) { case '>': State = HtmlParseMode.PCData; break; case '"': doctype.SystemIdentifier = StringOrMemory.Empty; return ref DoctypeSystemIdentifierDoubleQuoted(ref doctype); case '\'': doctype.SystemIdentifier = StringOrMemory.Empty; return ref DoctypeSystemIdentifierSingleQuoted(ref doctype); case '￿': RaiseErrorOccurred(HtmlParseError.EOF); doctype.IsQuirksForced = true; Back(); break; default: RaiseErrorOccurred(HtmlParseError.DoctypeInvalidCharacter); doctype.IsQuirksForced = true; return ref BogusDoctype(ref doctype); } return ref doctype; } private ref StructHtmlToken DoctypeSystem(ref StructHtmlToken doctype) { char next = GetNext(); if (next.IsSpaceCharacter()) { State = HtmlParseMode.PCData; return ref DoctypeSystemIdentifierBefore(ref doctype); } switch (next) { case '"': RaiseErrorOccurred(HtmlParseError.DoubleQuotationMarkUnexpected); doctype.SystemIdentifier = StringOrMemory.Empty; return ref DoctypeSystemIdentifierDoubleQuoted(ref doctype); case '\'': RaiseErrorOccurred(HtmlParseError.SingleQuotationMarkUnexpected); doctype.SystemIdentifier = StringOrMemory.Empty; return ref DoctypeSystemIdentifierSingleQuoted(ref doctype); case '>': RaiseErrorOccurred(HtmlParseError.TagClosedWrong); doctype.SystemIdentifier = FlushBufferFast(); doctype.IsQuirksForced = true; break; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); doctype.IsQuirksForced = true; Back(); break; default: RaiseErrorOccurred(HtmlParseError.DoctypeSystemInvalid); doctype.IsQuirksForced = true; return ref BogusDoctype(ref doctype); } return ref doctype; } private ref StructHtmlToken DoctypeSystemIdentifierBefore(ref StructHtmlToken doctype) { switch (SkipSpaces()) { case '"': doctype.SystemIdentifier = StringOrMemory.Empty; return ref DoctypeSystemIdentifierDoubleQuoted(ref doctype); case '\'': doctype.SystemIdentifier = StringOrMemory.Empty; return ref DoctypeSystemIdentifierSingleQuoted(ref doctype); case '>': State = HtmlParseMode.PCData; RaiseErrorOccurred(HtmlParseError.TagClosedWrong); doctype.IsQuirksForced = true; doctype.SystemIdentifier = FlushBufferFast(); break; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); doctype.IsQuirksForced = true; doctype.SystemIdentifier = FlushBufferFast(); Back(); break; default: RaiseErrorOccurred(HtmlParseError.DoctypeInvalidCharacter); doctype.IsQuirksForced = true; return ref BogusDoctype(ref doctype); } return ref doctype; } private ref StructHtmlToken DoctypeSystemIdentifierDoubleQuoted(ref StructHtmlToken doctype) { while (true) { char next = GetNext(); switch (next) { case '"': doctype.SystemIdentifier = FlushBufferFast(); return ref DoctypeSystemIdentifierAfter(ref doctype); case '': AppendReplacement(); break; case '>': State = HtmlParseMode.PCData; RaiseErrorOccurred(HtmlParseError.TagClosedWrong); doctype.IsQuirksForced = true; doctype.SystemIdentifier = FlushBufferFast(); goto IL_008a; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); doctype.IsQuirksForced = true; doctype.SystemIdentifier = FlushBufferFast(); Back(); goto IL_008a; default: { Append(next); break; } IL_008a: return ref doctype; } } } private ref StructHtmlToken DoctypeSystemIdentifierSingleQuoted(ref StructHtmlToken doctype) { while (true) { char next = GetNext(); switch (next) { case '\'': doctype.SystemIdentifier = FlushBufferFast(); return ref DoctypeSystemIdentifierAfter(ref doctype); case '': AppendReplacement(); break; case '>': State = HtmlParseMode.PCData; RaiseErrorOccurred(HtmlParseError.TagClosedWrong); doctype.IsQuirksForced = true; doctype.SystemIdentifier = FlushBufferFast(); goto IL_0093; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); doctype.IsQuirksForced = true; doctype.SystemIdentifier = FlushBufferFast(); Back(); goto IL_0093; default: { Append(next); break; } IL_0093: return ref doctype; } } } private ref StructHtmlToken DoctypeSystemIdentifierAfter(ref StructHtmlToken doctype) { switch (SkipSpaces()) { case '>': State = HtmlParseMode.PCData; break; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); doctype.IsQuirksForced = true; Back(); break; default: RaiseErrorOccurred(HtmlParseError.DoctypeInvalidCharacter); return ref BogusDoctype(ref doctype); } return ref doctype; } private ref StructHtmlToken BogusDoctype(ref StructHtmlToken doctype) { while (true) { switch (GetNext()) { case '>': State = HtmlParseMode.PCData; goto IL_0025; case '￿': { Back(); goto IL_0025; } IL_0025: return ref doctype; } } } private ref StructHtmlToken ParseAttributes(ref StructHtmlToken tag) { AttributeState attributeState = AttributeState.BeforeName; char c = '"'; char c2 = ''; TextPosition currentPosition = GetCurrentPosition(); bool flag = false; while (true) { switch (attributeState) { case AttributeState.BeforeName: c2 = SkipSpaces(); switch (c2) { case '/': attributeState = AttributeState.SelfClose; break; case '>': return ref EmitTag(ref tag); default: if (!c2.IsUppercaseAscii() || IsPreservingAttributeNames) { switch (c2) { case '': AppendReplacement(); currentPosition = GetCurrentPosition(); attributeState = AttributeState.Name; break; case '"': case '\'': case '<': case '=': RaiseErrorOccurred(HtmlParseError.AttributeNameInvalid); Append(c2); currentPosition = GetCurrentPosition(); attributeState = AttributeState.Name; break; default: Append(c2); currentPosition = GetCurrentPosition(); attributeState = AttributeState.Name; break; case '￿': return ref NewEof(false); } } else { Append(char.ToLowerInvariant(c2)); currentPosition = GetCurrentPosition(); attributeState = AttributeState.Name; } break; } break; case AttributeState.Name: c2 = GetNext(); switch (c2) { case '=': { StringOrMemory name4 = FlushBufferFast(HtmlAttributesLookup.TryGetWellKnownAttributeName); flag = _shouldEmitAttribute(ref tag, name4.Memory); if (flag) tag.AddAttribute(name4, currentPosition); attributeState = AttributeState.BeforeValue; break; } case '>': { StringOrMemory name3 = FlushBufferFast(HtmlAttributesLookup.TryGetWellKnownAttributeName); if (_shouldEmitAttribute(ref tag, name3.Memory)) tag.AddAttribute(name3, currentPosition); return ref EmitTag(ref tag); } default: if (c2.IsSpaceCharacter()) { StringOrMemory name = FlushBufferFast(HtmlAttributesLookup.TryGetWellKnownAttributeName); flag = _shouldEmitAttribute(ref tag, name.Memory); if (flag) tag.AddAttribute(name, currentPosition); attributeState = AttributeState.AfterName; } else if (c2 == '/') { StringOrMemory name2 = FlushBufferFast(HtmlAttributesLookup.TryGetWellKnownAttributeName); flag = _shouldEmitAttribute(ref tag, name2.Memory); if (flag) tag.AddAttribute(name2, currentPosition); attributeState = AttributeState.SelfClose; } else if (!c2.IsUppercaseAscii() || IsPreservingAttributeNames) { switch (c2) { case '"': case '\'': case '<': RaiseErrorOccurred(HtmlParseError.AttributeNameInvalid); Append(c2); break; case '': AppendReplacement(); break; default: Append(c2); break; case '￿': return ref NewEof(false); } } else { Append(char.ToLowerInvariant(c2)); } break; } break; case AttributeState.AfterName: c2 = SkipSpaces(); switch (c2) { case '>': return ref EmitTag(ref tag); case '=': attributeState = AttributeState.BeforeValue; break; case '/': attributeState = AttributeState.SelfClose; break; default: if (!c2.IsUppercaseAscii() || IsPreservingAttributeNames) { switch (c2) { case '"': case '\'': case '<': RaiseErrorOccurred(HtmlParseError.AttributeNameInvalid); Append(c2); currentPosition = GetCurrentPosition(); attributeState = AttributeState.Name; break; case '': AppendReplacement(); currentPosition = GetCurrentPosition(); attributeState = AttributeState.Name; break; default: Append(c2); currentPosition = GetCurrentPosition(); attributeState = AttributeState.Name; break; case '￿': return ref NewEof(false); } } else { Append(char.ToLowerInvariant(c2)); currentPosition = GetCurrentPosition(); attributeState = AttributeState.Name; } break; } break; case AttributeState.BeforeValue: c2 = SkipSpaces(); switch (c2) { case '"': case '\'': attributeState = AttributeState.QuotedValue; c = c2; break; case '&': attributeState = AttributeState.UnquotedValue; break; case '>': RaiseErrorOccurred(HtmlParseError.TagClosedWrong); return ref EmitTag(ref tag); case '<': case '=': case '`': RaiseErrorOccurred(HtmlParseError.AttributeValueInvalid); Append(c2); attributeState = AttributeState.UnquotedValue; c2 = GetNext(); break; case '': AppendReplacement(); attributeState = AttributeState.UnquotedValue; c2 = GetNext(); break; default: Append(c2); attributeState = AttributeState.UnquotedValue; c2 = GetNext(); break; case '￿': return ref NewEof(false); } break; case AttributeState.QuotedValue: c2 = GetNext(); if (c2 != c) { switch (c2) { case '&': AppendCharacterReference(GetNext(), c, true); break; case '': AppendReplacement(); break; default: Append(c2); break; case '￿': return ref NewEof(false); } } else { if (flag) { StringOrMemory attributeValue3 = FlushBufferFast(); tag.SetAttributeValue(attributeValue3); } else base.CharBuffer.Discard(); attributeState = AttributeState.AfterValue; } break; case AttributeState.UnquotedValue: if (c2 == '>') { if (flag) { StringOrMemory attributeValue = FlushBufferFast(); tag.SetAttributeValue(attributeValue); } else base.CharBuffer.Discard(); return ref EmitTag(ref tag); } if (!c2.IsSpaceCharacter()) { switch (c2) { case '&': AppendCharacterReference(GetNext(), '>', true); c2 = GetNext(); break; case '': AppendReplacement(); c2 = GetNext(); break; case '"': case '\'': case '<': case '=': case '`': RaiseErrorOccurred(HtmlParseError.AttributeValueInvalid); Append(c2); c2 = GetNext(); break; default: Append(c2); c2 = GetNext(); break; case '￿': return ref NewEof(false); } } else { if (flag) { StringOrMemory attributeValue2 = FlushBufferFast(); tag.SetAttributeValue(attributeValue2); } else base.CharBuffer.Discard(); attributeState = AttributeState.BeforeName; } break; case AttributeState.AfterValue: c2 = GetNext(); if (c2 == '>') return ref EmitTag(ref tag); if (!c2.IsSpaceCharacter()) { switch (c2) { case '/': attributeState = AttributeState.SelfClose; break; case '￿': return ref NewEof(false); default: RaiseErrorOccurred(HtmlParseError.AttributeNameExpected); Back(); attributeState = AttributeState.BeforeName; break; } } else attributeState = AttributeState.BeforeName; break; case AttributeState.SelfClose: if (TagSelfClosingInner(ref tag)) return ref tag; attributeState = AttributeState.BeforeName; break; } } } private ref StructHtmlToken ScriptData(char c) { int length = _lastStartTag.Length; int length2 = TagNames.Script.Length; ScriptState scriptState = ScriptState.Normal; int num = 0; while (true) { switch (scriptState) { case ScriptState.Normal: switch (c) { case '': AppendReplacement(); goto IL_00b1; case '<': Append('<'); scriptState = ScriptState.OpenTag; break; case '￿': Back(); if (SkipScriptText) return ref NewSkippedContent(HtmlTokenType.Character); return ref NewCharacter(); default: { Append(c); goto IL_00b1; } IL_00b1: c = GetNext(); break; } break; case ScriptState.OpenTag: c = GetNext(); switch (c) { case '/': scriptState = ScriptState.EndTag; break; case '!': scriptState = ScriptState.StartEscape; break; default: scriptState = ScriptState.Normal; break; } break; case ScriptState.StartEscape: Append('!'); c = GetNext(); scriptState = ((c == '-') ? ScriptState.StartEscapeDash : ScriptState.Normal); break; case ScriptState.StartEscapeDash: c = GetNext(); Append('-'); if (c == '-') { Append('-'); scriptState = ScriptState.EscapedDashDash; } else scriptState = ScriptState.Normal; break; case ScriptState.EndTag: { c = GetNext(); Append('/'); num = base.CharBuffer.Length; ref StructHtmlToken reference = ref NewTagClose(); while (c.IsLetter()) { Append(c); c = GetNext(); bool flag = c.IsSpaceCharacter(); bool flag2 = c == '>'; bool flag3 = c == '/'; if (base.CharBuffer.Length - num == length && (flag | flag2 | flag3) && base.CharBuffer.HasTextAt(_lastStartTag.Memory.Span, num, length, StringComparison.OrdinalIgnoreCase)) { if (num > 2) { Back(3 + length); base.CharBuffer.Remove(num - 2, length + 2); if (SkipScriptText) return ref NewSkippedContent(HtmlTokenType.Character); return ref NewCharacter(); } base.CharBuffer.Discard(); if (flag) { reference.Name = _lastStartTag; return ref ParseAttributes(ref reference); } if (flag3) { reference.Name = _lastStartTag; return ref TagSelfClosing(ref reference); } if (flag2) { reference.Name = _lastStartTag; return ref EmitTag(ref reference); } } } scriptState = ScriptState.Normal; break; } case ScriptState.Escaped: switch (c) { case '-': Append('-'); c = GetNext(); scriptState = ScriptState.EscapedDash; break; case '<': c = GetNext(); scriptState = ScriptState.EscapedOpenTag; break; case '': AppendReplacement(); c = GetNext(); break; case '￿': Back(); if (SkipScriptText) return ref NewSkippedContent(HtmlTokenType.Character); return ref NewCharacter(); default: scriptState = ScriptState.Normal; break; } break; case ScriptState.EscapedDash: switch (c) { case '-': Append('-'); scriptState = ScriptState.EscapedDashDash; break; case '<': c = GetNext(); scriptState = ScriptState.EscapedOpenTag; break; case '': AppendReplacement(); goto IL_0350; case '￿': Back(); if (SkipScriptText) return ref NewSkippedContent(HtmlTokenType.Character); return ref NewCharacter(); default: { Append(c); goto IL_0350; } IL_0350: c = GetNext(); scriptState = ScriptState.Escaped; break; } break; case ScriptState.EscapedDashDash: c = GetNext(); switch (c) { case '-': Append('-'); break; case '<': c = GetNext(); scriptState = ScriptState.EscapedOpenTag; break; case '>': Append('>'); c = GetNext(); scriptState = ScriptState.Normal; break; case '': AppendReplacement(); c = GetNext(); scriptState = ScriptState.Escaped; break; case '￿': if (SkipScriptText) return ref NewSkippedContent(HtmlTokenType.Character); return ref NewCharacter(); default: Append(c); c = GetNext(); scriptState = ScriptState.Escaped; break; } break; case ScriptState.EscapedOpenTag: if (c == '/') { c = GetNext(); scriptState = ScriptState.EscapedEndTag; } else if (c.IsLetter()) { Append('<'); num = base.CharBuffer.Length; Append(c); scriptState = ScriptState.StartDoubleEscape; } else { Append('<'); scriptState = ScriptState.Escaped; } break; case ScriptState.EscapedEndTag: Append('<', '/'); num = base.CharBuffer.Length; if (c.IsLetter()) { Append(c); scriptState = ScriptState.EscapedNameEndTag; } else scriptState = ScriptState.Escaped; break; case ScriptState.EscapedNameEndTag: c = GetNext(); if (base.CharBuffer.Length - num == length2 && (c == '/' || c == '>' || c.IsSpaceCharacter())) { if (base.CharBuffer.Isi(num, length2, MemoryExtensions.AsSpan(TagNames.Script))) { Back(length2 + 3); base.CharBuffer.Remove(num - 2, length2 + 2); if (SkipScriptText) return ref NewSkippedContent(HtmlTokenType.Character); return ref NewCharacter(); } } else if (!c.IsLetter()) { scriptState = ScriptState.Escaped; } else { Append(c); } break; case ScriptState.StartDoubleEscape: c = GetNext(); if (base.CharBuffer.Length - num == length2 && (c == '/' || c == '>' || c.IsSpaceCharacter())) { bool num3 = base.CharBuffer.Isi(num, length2, MemoryExtensions.AsSpan(TagNames.Script)); Append(c); c = GetNext(); scriptState = (num3 ? ScriptState.EscapedDouble : ScriptState.Escaped); } else if (c.IsLetter()) { Append(c); } else { scriptState = ScriptState.Escaped; } break; case ScriptState.EscapedDouble: switch (c) { case '-': Append('-'); c = GetNext(); scriptState = ScriptState.EscapedDoubleDash; break; case '<': Append('<'); c = GetNext(); scriptState = ScriptState.EscapedDoubleOpenTag; break; case '': AppendReplacement(); c = GetNext(); break; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); Back(); if (SkipScriptText) return ref NewSkippedContent(HtmlTokenType.Character); return ref NewCharacter(); default: Append(c); c = GetNext(); break; } break; case ScriptState.EscapedDoubleDash: switch (c) { case '-': Append('-'); scriptState = ScriptState.EscapedDoubleDashDash; break; case '<': Append('<'); c = GetNext(); scriptState = ScriptState.EscapedDoubleOpenTag; break; case '': RaiseErrorOccurred(HtmlParseError.Null); c = '�'; goto default; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); Back(); if (SkipScriptText) return ref NewSkippedContent(HtmlTokenType.Character); return ref NewCharacter(); default: scriptState = ScriptState.EscapedDouble; break; } break; case ScriptState.EscapedDoubleDashDash: c = GetNext(); switch (c) { case '-': Append('-'); break; case '<': Append('<'); c = GetNext(); scriptState = ScriptState.EscapedDoubleOpenTag; break; case '>': Append('>'); c = GetNext(); scriptState = ScriptState.Normal; break; case '': AppendReplacement(); c = GetNext(); scriptState = ScriptState.EscapedDouble; break; case '￿': RaiseErrorOccurred(HtmlParseError.EOF); Back(); if (SkipScriptText) return ref NewSkippedContent(HtmlTokenType.Character); return ref NewCharacter(); default: Append(c); c = GetNext(); scriptState = ScriptState.EscapedDouble; break; } break; case ScriptState.EscapedDoubleOpenTag: if (c == '/') { Append('/'); num = base.CharBuffer.Length; scriptState = ScriptState.EndDoubleEscape; } else scriptState = ScriptState.EscapedDouble; break; case ScriptState.EndDoubleEscape: c = GetNext(); if (base.CharBuffer.Length - num == length2 && (c.IsSpaceCharacter() || c == '/' || c == '>')) { bool num2 = base.CharBuffer.Isi(num, length2, MemoryExtensions.AsSpan(TagNames.Script)); Append(c); c = GetNext(); scriptState = (num2 ? ScriptState.Escaped : ScriptState.EscapedDouble); } else if (c.IsLetter()) { Append(c); } else { scriptState = ScriptState.EscapedDouble; } break; } } } private ref StructHtmlToken GetNextStructToken() { char next = GetNext(); _position = GetCurrentPosition(); if (next != '￿') { switch (State) { case HtmlParseMode.PCData: return ref Data(next); case HtmlParseMode.RCData: return ref RCData(next); case HtmlParseMode.Plaintext: return ref Plaintext(next); case HtmlParseMode.Rawtext: return ref Rawtext(next); case HtmlParseMode.Script: return ref ScriptData(next); } } return ref NewEof(true); } private ref StructHtmlToken NewSkippedContent(HtmlTokenType htmlTokenType = HtmlTokenType.Character) { base.CharBuffer.Discard(); _token = StructHtmlToken.Skipped(htmlTokenType, _position); return ref _token; } private ref StructHtmlToken NewCharacter() { StringOrMemory name = FlushBufferFast(); _token = StructHtmlToken.Character(name, _position); return ref _token; } private ref StructHtmlToken NewProcessingInstruction() { if (SkipProcessingInstructions) return ref NewSkippedContent(HtmlTokenType.Comment); StringOrMemory name = FlushBufferFast(); _token = StructHtmlToken.ProcessingInstruction(name, _position); return ref _token; } private ref StructHtmlToken NewComment() { if (SkipComments) return ref NewSkippedContent(HtmlTokenType.Comment); StringOrMemory name = FlushBufferFast(); _token = StructHtmlToken.Comment(name, _position); return ref _token; } private ref StructHtmlToken NewEof(bool acceptable = false) { if (!acceptable) RaiseErrorOccurred(HtmlParseError.EOF); _token = StructHtmlToken.EndOfFile(_position); return ref _token; } private ref StructHtmlToken NewDoctype(bool quirksForced) { _token = StructHtmlToken.Doctype(quirksForced, _position); return ref _token; } private ref StructHtmlToken NewTagOpen() { _token = StructHtmlToken.TagOpen(_position); return ref _token; } private ref StructHtmlToken NewTagClose() { _token = StructHtmlToken.TagClose(_position); return ref _token; } private void RaiseErrorOccurred(HtmlParseError code) { RaiseErrorOccurred(code, GetCurrentPosition()); } private void AppendReplacement() { RaiseErrorOccurred(HtmlParseError.Null); Append('�'); } private bool CreateIfAppropriate(char c, ref StructHtmlToken token) { bool flag = c.IsSpaceCharacter(); bool flag2 = c == '>'; bool flag3 = c == '/'; if (base.CharBuffer.Length == _lastStartTag.Length && (flag | flag2 | flag3) && base.CharBuffer.Is(_lastStartTag)) { ref StructHtmlToken reference = ref NewTagClose(); base.CharBuffer.Discard(); if (flag) { reference.Name = _lastStartTag; token = ParseAttributes(ref reference); return true; } if (flag3) { reference.Name = _lastStartTag; token = TagSelfClosing(ref reference); return true; } if (flag2) { reference.Name = _lastStartTag; token = EmitTag(ref reference); return true; } } return false; } private ref StructHtmlToken EmitTag(ref StructHtmlToken tag) { StructAttributes attributes = tag.Attributes; State = HtmlParseMode.PCData; switch (tag.Type) { case HtmlTokenType.StartTag: for (int num = attributes.Count - 1; num > 0; num--) { for (int num2 = num - 1; num2 >= 0; num2--) { MemoryHtmlAttributeToken memoryHtmlAttributeToken = attributes[num2]; StringOrMemory name = memoryHtmlAttributeToken.Name; memoryHtmlAttributeToken = attributes[num]; if (name.Is(memoryHtmlAttributeToken.Name)) { tag.RemoveAttributeAt(num); RaiseErrorOccurred(HtmlParseError.AttributeDuplicateOmitted, tag.Position); break; } } } _lastStartTag = tag.Name; break; case HtmlTokenType.EndTag: if (tag.IsSelfClosing) RaiseErrorOccurred(HtmlParseError.EndTagCannotBeSelfClosed, tag.Position); if (attributes.Count != 0) RaiseErrorOccurred(HtmlParseError.EndTagCannotHaveAttributes, tag.Position); break; } return ref tag; } } }