HtmlTokenizer
Performs the tokenization of the source code. Follows the tokenization algorithm at:
http://www.w3.org/html/wg/drafts/html/master/syntax.html
using AngleSharp.Extensions;
using AngleSharp.Html;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
namespace AngleSharp.Parser.Html
{
[DebuggerStepThrough]
internal sealed class HtmlTokenizer : BaseTokenizer
{
private readonly StringBuilder _buffer;
private bool _acceptsCharacterData;
private string _lastStartTag;
private HtmlParseMode _state;
private HtmlToken _buffered;
public bool IsAcceptingCharacterData {
get {
return _acceptsCharacterData;
}
set {
_acceptsCharacterData = value;
}
}
public HtmlParseMode State {
get {
return _state;
}
set {
_state = value;
}
}
public HtmlTokenizer(ITextSource source)
: base(source)
{
_state = HtmlParseMode.PCData;
_acceptsCharacterData = false;
_buffer = new StringBuilder();
}
public HtmlToken Get()
{
HtmlToken htmlToken = _buffered;
if (htmlToken != null) {
_buffered = null;
return htmlToken;
}
char next = base.Next;
if (base.IsEnded)
return HtmlToken.EOF;
switch (_state) {
case HtmlParseMode.PCData:
htmlToken = Data(next);
break;
case HtmlParseMode.RCData:
htmlToken = RCData(next);
break;
case HtmlParseMode.Plaintext:
htmlToken = Plaintext(next);
break;
case HtmlParseMode.Rawtext:
htmlToken = Rawtext(next);
break;
case HtmlParseMode.Script:
htmlToken = ScriptData(next);
break;
}
if (_buffer.Length > 0) {
_buffered = htmlToken;
htmlToken = HtmlToken.Character(_buffer.ToString());
_buffer.Clear();
}
return htmlToken;
}
public override void Dispose()
{
base.Dispose();
_buffer.ToPool();
}
private HtmlToken Plaintext(char c)
{
while (true) {
switch (c) {
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
break;
case '':
return HtmlToken.EOF;
default:
_buffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken Data(char c)
{
while (true) {
switch (c) {
case '&': {
string text = CharacterReference(base.Next, ' ');
if (text == null)
_buffer.Append('&');
_buffer.Append(text);
break;
}
case '<':
return TagOpen();
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
return Data(base.Next);
case '':
return HtmlToken.EOF;
default:
_buffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken RCData(char c)
{
while (true) {
switch (c) {
case '&': {
string text = CharacterReference(base.Next, ' ');
if (text == null)
_buffer.Append('&');
_buffer.Append(text);
break;
}
case '<':
return RCDataLT();
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
break;
case '':
return HtmlToken.EOF;
default:
_buffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken RCDataLT()
{
TextPosition currentPosition = GetCurrentPosition();
char next = base.Next;
if (next == '/') {
_stringBuffer.Clear();
return RCDataEndTag(currentPosition);
}
_buffer.Append('<');
return RCData(next);
}
private HtmlToken RCDataEndTag(TextPosition position)
{
char next = base.Next;
if (next.IsUppercaseAscii())
_stringBuffer.Clear().Append(char.ToLower(next));
else {
if (!next.IsLowercaseAscii()) {
_buffer.Append('<').Append('/');
return RCData(next);
}
_stringBuffer.Clear().Append(next);
}
HtmlTagToken htmlTagToken = HtmlToken.CloseTag();
htmlTagToken.Start = position;
return RCDataNameEndTag(htmlTagToken);
}
private HtmlToken RCDataNameEndTag(HtmlTagToken tag)
{
char next;
while (true) {
next = base.Next;
string text = _stringBuffer.ToString();
bool flag = text == _lastStartTag;
if (flag && next.IsSpaceCharacter()) {
tag.Name = text;
return AttributeBeforeName(tag);
}
if (flag && next == '/') {
tag.Name = text;
return TagSelfClosing(tag);
}
if (flag && next == '>') {
tag.Name = text;
return EmitTag(tag);
}
if (next.IsUppercaseAscii())
_stringBuffer.Append(char.ToLower(next));
else {
if (!next.IsLowercaseAscii())
break;
_stringBuffer.Append(next);
}
}
_buffer.Append('<').Append('/').Append(_stringBuffer.ToString());
return RCData(next);
}
private HtmlToken Rawtext(char c)
{
while (true) {
switch (c) {
case '<':
return RawtextLT();
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
break;
case '':
return HtmlToken.EOF;
default:
_buffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken RawtextLT()
{
TextPosition currentPosition = GetCurrentPosition();
char next = base.Next;
if (next == '/') {
_stringBuffer.Clear();
return RawtextEndTag(currentPosition);
}
_buffer.Append('<');
return Rawtext(next);
}
private HtmlToken RawtextEndTag(TextPosition position)
{
char next = base.Next;
if (next.IsUppercaseAscii())
_stringBuffer.Clear().Append(char.ToLower(next));
else {
if (!next.IsLowercaseAscii()) {
_buffer.Append('<').Append('/');
return Rawtext(next);
}
_stringBuffer.Clear().Append(next);
}
HtmlTagToken htmlTagToken = HtmlToken.CloseTag();
htmlTagToken.Start = position;
return RawtextNameEndTag(htmlTagToken);
}
private HtmlToken RawtextNameEndTag(HtmlTagToken tag)
{
char next;
while (true) {
next = base.Next;
string text = _stringBuffer.ToString();
bool flag = text == _lastStartTag;
if (flag && next.IsSpaceCharacter()) {
tag.Name = text;
return AttributeBeforeName(tag);
}
if (flag && next == '/') {
tag.Name = text;
return TagSelfClosing(tag);
}
if (flag && next == '>') {
tag.Name = text;
return EmitTag(tag);
}
if (next.IsUppercaseAscii())
_stringBuffer.Append(char.ToLower(next));
else {
if (!next.IsLowercaseAscii())
break;
_stringBuffer.Append(next);
}
}
_buffer.Append('<').Append('/').Append(_stringBuffer.ToString());
return Rawtext(next);
}
private HtmlToken CData()
{
char next = base.Next;
_stringBuffer.Clear();
while (true) {
switch (next) {
case '':
Back();
goto IL_0052;
case ']':
{
if (!ContinuesWith("]]>", true))
break;
Advance(2);
goto IL_0052;
}
IL_0052:
return HtmlToken.Character(_stringBuffer.ToString());
}
_stringBuffer.Append(next);
next = base.Next;
}
}
private string CharacterReference(char c, char allowedCharacter = ' ')
{
if (c.IsSpaceCharacter() || c == '<' || c == '' || c == '&' || c == allowedCharacter) {
Back();
return null;
}
if (c == '#') {
int num = 10;
int num2 = 1;
int num3 = 0;
List<int> list = new List<int>();
c = base.Next;
bool flag = c == 'x' || c == 'X';
if (!flag) {
while (c.IsDigit()) {
list.Add(c.FromHex());
c = base.Next;
}
} else {
num = 16;
while ((c = base.Next).IsHex()) {
list.Add(c.FromHex());
}
}
for (int num4 = list.Count - 1; num4 >= 0; num4--) {
num3 += list[num4] * num2;
num2 *= num;
}
if (list.Count == 0) {
Back(2);
if (flag)
Back();
RaiseErrorOccurred(ErrorCode.CharacterReferenceWrongNumber);
return null;
}
if (c != ';') {
RaiseErrorOccurred(ErrorCode.CharacterReferenceSemicolonMissing);
Back();
}
if (Entities.IsInCharacterTable(num3)) {
RaiseErrorOccurred(ErrorCode.CharacterReferenceInvalidCode);
return Entities.GetSymbolFromTable(num3);
}
if (Entities.IsInvalidNumber(num3)) {
RaiseErrorOccurred(ErrorCode.CharacterReferenceInvalidNumber);
return '�'.ToString();
}
if (Entities.IsInInvalidRange(num3))
RaiseErrorOccurred(ErrorCode.CharacterReferenceInvalidRange);
return Entities.Convert(num3);
}
string result = null;
int num5 = 0;
int insertionPoint = base.InsertionPoint - 1;
char[] array = new char[31];
int num6 = 0;
char c2 = base.Current;
while (c2 != ';' && c2.IsName()) {
array[num6++] = c2;
string name = new string(array, 0, num6);
c2 = base.Next;
num5++;
name = ((c2 == ';') ? Entities.GetSymbol(name) : Entities.GetSymbolWithoutSemicolon(name));
if (name != null) {
num5 = 0;
result = name;
}
if (base.IsEnded || num6 >= 31)
break;
}
Back(num5);
c2 = base.Current;
if (c2 != ';') {
if (allowedCharacter != 0 && (c2 == '=' || c2.IsAlphanumericAscii())) {
if (c2 == '=')
RaiseErrorOccurred(ErrorCode.CharacterReferenceAttributeEqualsFound);
base.InsertionPoint = insertionPoint;
return null;
}
Back();
RaiseErrorOccurred(ErrorCode.CharacterReferenceNotTerminated);
}
return result;
}
private HtmlToken TagOpen()
{
TextPosition currentPosition = GetCurrentPosition();
char next = base.Next;
if (next == '/')
return TagEnd(base.Next, currentPosition);
if (next.IsLowercaseAscii()) {
HtmlTagToken htmlTagToken = HtmlToken.OpenTag();
htmlTagToken.Start = currentPosition;
_stringBuffer.Clear().Append(next);
return TagName(htmlTagToken);
}
if (!next.IsUppercaseAscii()) {
switch (next) {
case '!':
return MarkupDeclaration(currentPosition);
case '?':
RaiseErrorOccurred(ErrorCode.BogusComment);
return BogusComment(next, currentPosition);
default:
_state = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.AmbiguousOpenTag);
_buffer.Append('<');
return Data(next);
}
}
HtmlTagToken htmlTagToken2 = HtmlToken.OpenTag();
htmlTagToken2.Start = currentPosition;
_stringBuffer.Clear().Append(char.ToLower(next));
return TagName(htmlTagToken2);
}
private HtmlToken TagEnd(char c, TextPosition position)
{
if (c.IsLowercaseAscii()) {
HtmlTagToken htmlTagToken = HtmlToken.CloseTag();
htmlTagToken.Start = position;
_stringBuffer.Clear().Append(c);
return TagName(htmlTagToken);
}
if (!c.IsUppercaseAscii()) {
switch (c) {
case '>':
_state = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
return Data(base.Next);
case '':
Back();
RaiseErrorOccurred(ErrorCode.EOF);
_buffer.Append('<').Append('/');
return HtmlToken.EOF;
default:
RaiseErrorOccurred(ErrorCode.BogusComment);
return BogusComment(c, position);
}
}
HtmlTagToken htmlTagToken2 = HtmlToken.CloseTag();
htmlTagToken2.Start = position;
_stringBuffer.Clear().Append(char.ToLower(c));
return TagName(htmlTagToken2);
}
private HtmlToken TagName(HtmlTagToken tag)
{
while (true) {
char next = base.Next;
if (next.IsSpaceCharacter())
break;
switch (next) {
case '/':
tag.Name = _stringBuffer.ToString();
return TagSelfClosing(tag);
case '>':
tag.Name = _stringBuffer.ToString();
return EmitTag(tag);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
return HtmlToken.EOF;
default:
if (next.IsUppercaseAscii())
_stringBuffer.Append(char.ToLower(next));
else
_stringBuffer.Append(next);
break;
}
}
tag.Name = _stringBuffer.ToString();
return AttributeBeforeName(tag);
}
private HtmlToken TagSelfClosing(HtmlTagToken tag)
{
switch (base.Next) {
case '>':
tag.IsSelfClosing = true;
return EmitTag(tag);
case '':
RaiseErrorOccurred(ErrorCode.EOF);
return HtmlToken.EOF;
default:
RaiseErrorOccurred(ErrorCode.ClosingSlashMisplaced);
Back();
return AttributeBeforeName(tag);
}
}
private HtmlToken MarkupDeclaration(TextPosition position)
{
char next = base.Next;
if (ContinuesWith("--", true)) {
Advance();
return CommentStart(position);
}
if (ContinuesWith(Tags.Doctype, true)) {
Advance(6);
return Doctype(position);
}
if (_acceptsCharacterData && ContinuesWith("[CDATA[", false)) {
Advance(6);
return CData();
}
RaiseErrorOccurred(ErrorCode.UndefinedMarkupDeclaration);
return BogusComment(next, position);
}
private HtmlToken BogusComment(char c, TextPosition position)
{
_stringBuffer.Clear();
while (true) {
switch (c) {
case '':
Back();
goto case '>';
case ' ':
_stringBuffer.Append('�');
c = base.Next;
break;
default:
_stringBuffer.Append(c);
c = base.Next;
break;
case '>':
_state = HtmlParseMode.PCData;
return EmitComment(position);
}
}
}
private HtmlCommentToken CommentStart(TextPosition position)
{
char next = base.Next;
_stringBuffer.Clear();
switch (next) {
case '-':
return CommentDashStart(position);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
return Comment(position);
case '>':
_state = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
break;
default:
_stringBuffer.Append(next);
return Comment(position);
}
return EmitComment(position);
}
private HtmlCommentToken CommentDashStart(TextPosition position)
{
char next = base.Next;
switch (next) {
case '-':
return CommentEnd(position);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('-').Append('�');
return Comment(position);
case '>':
_state = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
break;
default:
_stringBuffer.Append('-').Append(next);
return Comment(position);
}
return EmitComment(position);
}
private HtmlCommentToken Comment(TextPosition position)
{
while (true) {
char next = base.Next;
switch (next) {
case '-': {
HtmlCommentToken htmlCommentToken = CommentDashEnd(position);
if (htmlCommentToken != null)
return htmlCommentToken;
break;
}
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
return EmitComment(position);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
next = '�';
_stringBuffer.Append(next);
break;
default:
_stringBuffer.Append(next);
break;
}
}
}
private HtmlCommentToken CommentDashEnd(TextPosition position)
{
char c = base.Next;
switch (c) {
case '-':
return CommentEnd(position);
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
return EmitComment(position);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
c = '�';
break;
}
_stringBuffer.Append('-').Append(c);
return null;
}
private HtmlCommentToken CommentEnd(TextPosition position)
{
while (true) {
char next = base.Next;
switch (next) {
case '>':
_state = HtmlParseMode.PCData;
goto IL_00b8;
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('-').Append('�');
return null;
case '!':
RaiseErrorOccurred(ErrorCode.CommentEndedWithEM);
return CommentBangEnd(position);
case '-':
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
goto IL_00b8;
default:
{
RaiseErrorOccurred(ErrorCode.CommentEndedUnexpected);
_stringBuffer.Append('-').Append('-').Append(next);
return null;
}
IL_00b8:
return EmitComment(position);
}
RaiseErrorOccurred(ErrorCode.CommentEndedWithDash);
_stringBuffer.Append('-');
}
}
private HtmlCommentToken CommentBangEnd(TextPosition position)
{
char next = base.Next;
switch (next) {
case '-':
_stringBuffer.Append('-').Append('-').Append('!');
return CommentDashEnd(position);
case '>':
_state = HtmlParseMode.PCData;
break;
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('-').Append('-').Append('!')
.Append('�');
return null;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
break;
default:
_stringBuffer.Append('-').Append('-').Append('!')
.Append(next);
return null;
}
return EmitComment(position);
}
private HtmlToken Doctype(TextPosition position)
{
char next = base.Next;
if (next.IsSpaceCharacter())
return DoctypeNameBefore(base.Next, position);
if (next == '') {
RaiseErrorOccurred(ErrorCode.EOF);
Back();
HtmlDoctypeToken htmlDoctypeToken = HtmlToken.Doctype(true);
htmlDoctypeToken.Start = position;
htmlDoctypeToken.End = GetCurrentPosition();
return htmlDoctypeToken;
}
RaiseErrorOccurred(ErrorCode.DoctypeUnexpected);
return DoctypeNameBefore(next, position);
}
private HtmlToken DoctypeNameBefore(char c, TextPosition position)
{
while (c.IsSpaceCharacter()) {
c = base.Next;
}
if (!c.IsUppercaseAscii()) {
switch (c) {
case ' ': {
HtmlDoctypeToken htmlDoctypeToken4 = HtmlToken.Doctype(false);
htmlDoctypeToken4.Start = position;
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Clear().Append('�');
return DoctypeName(htmlDoctypeToken4);
}
case '>': {
HtmlDoctypeToken htmlDoctypeToken3 = HtmlToken.Doctype(true);
htmlDoctypeToken3.Start = position;
_state = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
htmlDoctypeToken3.End = GetCurrentPosition();
return htmlDoctypeToken3;
}
case '': {
HtmlDoctypeToken htmlDoctypeToken2 = HtmlToken.Doctype(true);
htmlDoctypeToken2.Start = position;
RaiseErrorOccurred(ErrorCode.EOF);
Back();
htmlDoctypeToken2.End = GetCurrentPosition();
return htmlDoctypeToken2;
}
default: {
HtmlDoctypeToken htmlDoctypeToken = HtmlToken.Doctype(false);
htmlDoctypeToken.Start = position;
_stringBuffer.Clear().Append(c);
return DoctypeName(htmlDoctypeToken);
}
}
}
HtmlDoctypeToken htmlDoctypeToken5 = HtmlToken.Doctype(false);
htmlDoctypeToken5.Start = position;
_stringBuffer.Clear().Append(char.ToLower(c));
return DoctypeName(htmlDoctypeToken5);
}
private HtmlToken DoctypeName(HtmlDoctypeToken doctype)
{
while (true) {
char next = base.Next;
if (next.IsSpaceCharacter()) {
doctype.Name = _stringBuffer.ToString();
_stringBuffer.Clear();
return DoctypeNameAfter(doctype);
}
if (next == '>') {
_state = HtmlParseMode.PCData;
doctype.Name = _stringBuffer.ToString();
break;
}
if (next.IsUppercaseAscii())
_stringBuffer.Append(char.ToLower(next));
else {
switch (next) {
case ' ':
break;
case '':
goto IL_0098;
default:
goto IL_00bf;
}
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
}
continue;
IL_00bf:
_stringBuffer.Append(next);
continue;
IL_0098:
RaiseErrorOccurred(ErrorCode.EOF);
Back();
doctype.IsQuirksForced = true;
doctype.Name = _stringBuffer.ToString();
break;
}
doctype.End = GetCurrentPosition();
return doctype;
}
private HtmlToken DoctypeNameAfter(HtmlDoctypeToken doctype)
{
switch (SkipSpaces()) {
case '>':
_state = HtmlParseMode.PCData;
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
doctype.IsQuirksForced = true;
break;
default:
if (ContinuesWith("public", true)) {
Advance(5);
return DoctypePublic(doctype);
}
if (ContinuesWith("system", true)) {
Advance(5);
return DoctypeSystem(doctype);
}
RaiseErrorOccurred(ErrorCode.DoctypeUnexpectedAfterName);
doctype.IsQuirksForced = true;
return BogusDoctype(doctype);
}
doctype.End = GetCurrentPosition();
return doctype;
}
private HtmlToken DoctypePublic(HtmlDoctypeToken doctype)
{
char next = base.Next;
if (next.IsSpaceCharacter())
return DoctypePublicIdentifierBefore(doctype);
switch (next) {
case '"':
RaiseErrorOccurred(ErrorCode.DoubleQuotationMarkUnexpected);
doctype.PublicIdentifier = string.Empty;
return DoctypePublicIdentifierDoubleQuoted(doctype);
case '\'':
RaiseErrorOccurred(ErrorCode.SingleQuotationMarkUnexpected);
doctype.PublicIdentifier = string.Empty;
return DoctypePublicIdentifierSingleQuoted(doctype);
case '>':
_state = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.IsQuirksForced = true;
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
Back();
break;
default:
RaiseErrorOccurred(ErrorCode.DoctypePublicInvalid);
doctype.IsQuirksForced = true;
return BogusDoctype(doctype);
}
doctype.End = GetCurrentPosition();
return doctype;
}
private HtmlToken DoctypePublicIdentifierBefore(HtmlDoctypeToken doctype)
{
switch (SkipSpaces()) {
case '"':
_stringBuffer.Clear();
doctype.PublicIdentifier = string.Empty;
return DoctypePublicIdentifierDoubleQuoted(doctype);
case '\'':
_stringBuffer.Clear();
doctype.PublicIdentifier = string.Empty;
return DoctypePublicIdentifierSingleQuoted(doctype);
case '>':
_state = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.IsQuirksForced = true;
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
Back();
break;
default:
RaiseErrorOccurred(ErrorCode.DoctypePublicInvalid);
doctype.IsQuirksForced = true;
return BogusDoctype(doctype);
}
doctype.End = GetCurrentPosition();
return doctype;
}
private HtmlToken DoctypePublicIdentifierDoubleQuoted(HtmlDoctypeToken doctype)
{
while (true) {
char next = base.Next;
switch (next) {
case '"':
doctype.PublicIdentifier = _stringBuffer.ToString();
_stringBuffer.Clear();
return DoctypePublicIdentifierAfter(doctype);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '>':
_state = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.PublicIdentifier = _stringBuffer.ToString();
goto IL_00be;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
doctype.IsQuirksForced = true;
doctype.PublicIdentifier = _stringBuffer.ToString();
goto IL_00be;
default:
{
_stringBuffer.Append(next);
break;
}
IL_00be:
doctype.End = GetCurrentPosition();
return doctype;
}
}
}
private HtmlToken DoctypePublicIdentifierSingleQuoted(HtmlDoctypeToken doctype)
{
while (true) {
char next = base.Next;
switch (next) {
case '\'':
doctype.PublicIdentifier = _stringBuffer.ToString();
_stringBuffer.Clear();
return DoctypePublicIdentifierAfter(doctype);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '>':
_state = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.PublicIdentifier = _stringBuffer.ToString();
goto IL_00be;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
doctype.PublicIdentifier = _stringBuffer.ToString();
Back();
goto IL_00be;
default:
{
_stringBuffer.Append(next);
break;
}
IL_00be:
doctype.End = GetCurrentPosition();
return doctype;
}
}
}
private HtmlToken DoctypePublicIdentifierAfter(HtmlDoctypeToken doctype)
{
char next = base.Next;
if (next.IsSpaceCharacter()) {
_stringBuffer.Clear();
return DoctypeBetween(doctype);
}
switch (next) {
case '>':
_state = HtmlParseMode.PCData;
break;
case '"':
RaiseErrorOccurred(ErrorCode.DoubleQuotationMarkUnexpected);
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierDoubleQuoted(doctype);
case '\'':
RaiseErrorOccurred(ErrorCode.SingleQuotationMarkUnexpected);
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierSingleQuoted(doctype);
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
Back();
break;
default:
RaiseErrorOccurred(ErrorCode.DoctypeInvalidCharacter);
doctype.IsQuirksForced = true;
return BogusDoctype(doctype);
}
doctype.End = GetCurrentPosition();
return doctype;
}
private HtmlToken DoctypeBetween(HtmlDoctypeToken doctype)
{
switch (SkipSpaces()) {
case '>':
_state = HtmlParseMode.PCData;
break;
case '"':
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierDoubleQuoted(doctype);
case '\'':
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierSingleQuoted(doctype);
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
Back();
break;
default:
RaiseErrorOccurred(ErrorCode.DoctypeInvalidCharacter);
doctype.IsQuirksForced = true;
return BogusDoctype(doctype);
}
doctype.End = GetCurrentPosition();
return doctype;
}
private HtmlToken DoctypeSystem(HtmlDoctypeToken doctype)
{
char next = base.Next;
if (next.IsSpaceCharacter()) {
_state = HtmlParseMode.PCData;
return DoctypeSystemIdentifierBefore(doctype);
}
switch (next) {
case '"':
RaiseErrorOccurred(ErrorCode.DoubleQuotationMarkUnexpected);
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierDoubleQuoted(doctype);
case '\'':
RaiseErrorOccurred(ErrorCode.SingleQuotationMarkUnexpected);
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierSingleQuoted(doctype);
case '>':
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.SystemIdentifier = _stringBuffer.ToString();
doctype.IsQuirksForced = true;
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
Back();
break;
default:
RaiseErrorOccurred(ErrorCode.DoctypeSystemInvalid);
doctype.IsQuirksForced = true;
return BogusDoctype(doctype);
}
doctype.End = GetCurrentPosition();
return doctype;
}
private HtmlToken DoctypeSystemIdentifierBefore(HtmlDoctypeToken doctype)
{
switch (SkipSpaces()) {
case '"':
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierDoubleQuoted(doctype);
case '\'':
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierSingleQuoted(doctype);
case '>':
_state = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = _stringBuffer.ToString();
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = _stringBuffer.ToString();
Back();
break;
default:
RaiseErrorOccurred(ErrorCode.DoctypeInvalidCharacter);
doctype.IsQuirksForced = true;
return BogusDoctype(doctype);
}
doctype.End = GetCurrentPosition();
return doctype;
}
private HtmlToken DoctypeSystemIdentifierDoubleQuoted(HtmlDoctypeToken doctype)
{
while (true) {
char next = base.Next;
switch (next) {
case '"':
doctype.SystemIdentifier = _stringBuffer.ToString();
_stringBuffer.Clear();
return DoctypeSystemIdentifierAfter(doctype);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '>':
_state = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = _stringBuffer.ToString();
goto IL_00be;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = _stringBuffer.ToString();
Back();
goto IL_00be;
default:
{
_stringBuffer.Append(next);
break;
}
IL_00be:
doctype.End = GetCurrentPosition();
return doctype;
}
}
}
private HtmlToken DoctypeSystemIdentifierSingleQuoted(HtmlDoctypeToken doctype)
{
while (true) {
char next = base.Next;
switch (next) {
case '\'':
doctype.SystemIdentifier = _stringBuffer.ToString();
_stringBuffer.Clear();
return DoctypeSystemIdentifierAfter(doctype);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '>':
_state = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = _stringBuffer.ToString();
goto IL_00d3;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = _stringBuffer.ToString();
Back();
goto IL_00d3;
default:
{
_stringBuffer.Append(next);
break;
}
IL_00d3:
doctype.End = GetCurrentPosition();
return doctype;
}
}
}
private HtmlToken DoctypeSystemIdentifierAfter(HtmlDoctypeToken doctype)
{
switch (SkipSpaces()) {
case '>':
_state = HtmlParseMode.PCData;
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
Back();
break;
default:
RaiseErrorOccurred(ErrorCode.DoctypeInvalidCharacter);
return BogusDoctype(doctype);
}
doctype.End = GetCurrentPosition();
return doctype;
}
private HtmlToken BogusDoctype(HtmlDoctypeToken doctype)
{
while (true) {
switch (base.Next) {
case '>':
_state = HtmlParseMode.PCData;
goto IL_0020;
case '':
{
Back();
goto IL_0020;
}
IL_0020:
doctype.End = GetCurrentPosition();
return doctype;
}
}
}
private HtmlToken AttributeBeforeName(HtmlTagToken tag)
{
char c = SkipSpaces();
switch (c) {
case '/':
return TagSelfClosing(tag);
case '>':
return EmitTag(tag);
default:
if (!c.IsUppercaseAscii()) {
switch (c) {
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Clear().Append('�');
return AttributeName(tag);
case '"':
case '\'':
case '<':
case '=':
RaiseErrorOccurred(ErrorCode.AttributeNameInvalid);
_stringBuffer.Clear().Append(c);
return AttributeName(tag);
case '':
return HtmlToken.EOF;
default:
_stringBuffer.Clear().Append(c);
return AttributeName(tag);
}
}
_stringBuffer.Clear().Append(char.ToLower(c));
return AttributeName(tag);
}
}
private HtmlToken AttributeName(HtmlTagToken tag)
{
while (true) {
char next = base.Next;
if (next.IsSpaceCharacter())
break;
switch (next) {
case '/':
tag.AddAttribute(_stringBuffer.ToString());
return TagSelfClosing(tag);
case '=':
tag.AddAttribute(_stringBuffer.ToString());
return AttributeBeforeValue(tag);
case '>':
tag.AddAttribute(_stringBuffer.ToString());
return EmitTag(tag);
case '':
return HtmlToken.EOF;
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
default:
if (next.IsUppercaseAscii())
_stringBuffer.Append(char.ToLower(next));
else if (next == '"' || next == '\'' || next == '<') {
RaiseErrorOccurred(ErrorCode.AttributeNameInvalid);
_stringBuffer.Append(next);
} else {
_stringBuffer.Append(next);
}
break;
}
}
tag.AddAttribute(_stringBuffer.ToString());
return AttributeAfterName(tag);
}
private HtmlToken AttributeAfterName(HtmlTagToken tag)
{
char c = SkipSpaces();
switch (c) {
case '/':
return TagSelfClosing(tag);
case '=':
return AttributeBeforeValue(tag);
case '>':
return EmitTag(tag);
default:
if (!c.IsUppercaseAscii()) {
switch (c) {
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Clear().Append('�');
return AttributeName(tag);
case '"':
case '\'':
case '<':
RaiseErrorOccurred(ErrorCode.AttributeNameInvalid);
_stringBuffer.Clear().Append(c);
return AttributeName(tag);
case '':
return HtmlToken.EOF;
default:
_stringBuffer.Clear().Append(c);
return AttributeName(tag);
}
}
_stringBuffer.Clear().Append(char.ToLower(c));
return AttributeName(tag);
}
}
private HtmlToken AttributeBeforeValue(HtmlTagToken tag)
{
char c = SkipSpaces();
switch (c) {
case '"':
_stringBuffer.Clear();
return AttributeDoubleQuotedValue(tag);
case '&':
_stringBuffer.Clear();
return AttributeUnquotedValue(c, tag);
case '\'':
_stringBuffer.Clear();
return AttributeSingleQuotedValue(tag);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
return AttributeUnquotedValue(base.Next, tag);
case '>':
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
return EmitTag(tag);
case '<':
case '=':
case '`':
RaiseErrorOccurred(ErrorCode.AttributeValueInvalid);
_stringBuffer.Clear().Append(c);
return AttributeUnquotedValue(base.Next, tag);
case '':
return HtmlToken.EOF;
default:
_stringBuffer.Clear().Append(c);
return AttributeUnquotedValue(base.Next, tag);
}
}
private HtmlToken AttributeDoubleQuotedValue(HtmlTagToken tag)
{
while (true) {
char next = base.Next;
switch (next) {
case '"':
tag.SetAttributeValue(_stringBuffer.ToString());
return AttributeAfterValue(tag);
case '&': {
string text = CharacterReference(base.Next, '"');
if (text == null)
_stringBuffer.Append('&');
else
_stringBuffer.Append(text);
break;
}
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '':
return HtmlToken.EOF;
default:
_stringBuffer.Append(next);
break;
}
}
}
private HtmlToken AttributeSingleQuotedValue(HtmlTagToken tag)
{
while (true) {
char next = base.Next;
switch (next) {
case '\'':
tag.SetAttributeValue(_stringBuffer.ToString());
return AttributeAfterValue(tag);
case '&': {
string text = CharacterReference(base.Next, '\'');
if (text == null)
_stringBuffer.Append('&');
else
_stringBuffer.Append(text);
break;
}
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '':
return HtmlToken.EOF;
default:
_stringBuffer.Append(next);
break;
}
}
}
private HtmlToken AttributeUnquotedValue(char c, HtmlTagToken tag)
{
while (!c.IsSpaceCharacter()) {
switch (c) {
case '&': {
string text = CharacterReference(base.Next, '>');
if (text == null)
_stringBuffer.Append('&');
else
_stringBuffer.Append(text);
break;
}
case '>':
tag.SetAttributeValue(_stringBuffer.ToString());
return EmitTag(tag);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '"':
case '\'':
case '<':
case '=':
case '`':
RaiseErrorOccurred(ErrorCode.AttributeValueInvalid);
_stringBuffer.Append(c);
break;
case '':
return HtmlToken.EOF;
default:
_stringBuffer.Append(c);
break;
}
c = base.Next;
}
tag.SetAttributeValue(_stringBuffer.ToString());
return AttributeBeforeName(tag);
}
private HtmlToken AttributeAfterValue(HtmlTagToken tag)
{
char next = base.Next;
if (!next.IsSpaceCharacter()) {
switch (next) {
case '/':
return TagSelfClosing(tag);
case '>':
return EmitTag(tag);
case '':
return HtmlToken.EOF;
default:
RaiseErrorOccurred(ErrorCode.AttributeNameExpected);
Back();
return AttributeBeforeName(tag);
}
}
return AttributeBeforeName(tag);
}
private HtmlToken ScriptData(char c)
{
while (true) {
switch (c) {
case '<':
return ScriptDataLT();
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
break;
case '':
return HtmlToken.EOF;
default:
_buffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken ScriptDataLT()
{
TextPosition currentPosition = GetCurrentPosition();
char next = base.Next;
switch (next) {
case '/':
return ScriptDataEndTag(currentPosition);
case '!':
_buffer.Append('<').Append('!');
return ScriptDataStartEscape(base.Next);
default:
_buffer.Append('<');
return ScriptData(next);
}
}
private HtmlToken ScriptDataEndTag(TextPosition position)
{
char next = base.Next;
if (next.IsLetter()) {
HtmlTagToken htmlTagToken = HtmlToken.CloseTag();
htmlTagToken.Start = position;
_stringBuffer.Clear().Append(next);
return ScriptDataNameEndTag(htmlTagToken);
}
_buffer.Append('<').Append('/');
return ScriptData(next);
}
private HtmlToken ScriptDataNameEndTag(HtmlTagToken tag)
{
char next;
while (true) {
next = base.Next;
string text = _stringBuffer.ToString().ToLowerInvariant();
if (text == _lastStartTag) {
if (next.IsSpaceCharacter()) {
tag.Name = text;
return AttributeBeforeName(tag);
}
switch (next) {
case '/':
tag.Name = text;
return TagSelfClosing(tag);
case '>':
tag.Name = text;
return EmitTag(tag);
}
}
if (!next.IsLetter())
break;
_stringBuffer.Append(next);
}
_buffer.Append('<').Append('/').Append(_stringBuffer.ToString());
return ScriptData(next);
}
private HtmlToken ScriptDataStartEscape(char c)
{
if (c == '-') {
_buffer.Append('-');
return ScriptDataStartEscapeDash(base.Next);
}
return ScriptData(c);
}
private HtmlToken ScriptDataEscaped(char c)
{
switch (c) {
case '-':
_buffer.Append('-');
return ScriptDataEscapedDash();
case '<':
return ScriptDataEscapedLT();
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
return ScriptDataEscaped(base.Next);
case '':
return HtmlToken.EOF;
default:
return ScriptData(c);
}
}
private HtmlToken ScriptDataStartEscapeDash(char c)
{
if (c == '-') {
_buffer.Append('-');
return ScriptDataEscapedDashDash();
}
return ScriptData(c);
}
private HtmlToken ScriptDataEscapedDash()
{
char next = base.Next;
switch (next) {
case '-':
_buffer.Append('-');
return ScriptDataEscapedDashDash();
case '<':
return ScriptDataEscapedLT();
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
return ScriptDataEscaped(base.Next);
case '':
return HtmlToken.EOF;
default:
_buffer.Append(next);
return ScriptDataEscaped(base.Next);
}
}
private HtmlToken ScriptDataEscapedDashDash()
{
while (true) {
char next = base.Next;
switch (next) {
case '-':
break;
case '<':
return ScriptDataEscapedLT();
case '>':
_buffer.Append('>');
return ScriptData(base.Next);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
return ScriptDataEscaped(base.Next);
case '':
return HtmlToken.EOF;
default:
_buffer.Append(next);
return ScriptDataEscaped(base.Next);
}
_buffer.Append('-');
}
}
private HtmlToken ScriptDataEscapedLT()
{
TextPosition currentPosition = GetCurrentPosition();
char next = base.Next;
if (next == '/')
return ScriptDataEndTag(currentPosition);
if (next.IsLetter()) {
_stringBuffer.Clear().Append(next);
_buffer.Append('<').Append(next);
return ScriptDataStartDoubleEscape();
}
_buffer.Append('<');
return ScriptDataEscaped(next);
}
private HtmlToken ScriptDataEscapedEndTag(HtmlTagToken tag)
{
char next = base.Next;
if (next.IsLetter()) {
_stringBuffer.Clear().Append(next);
return ScriptDataEscapedNameTag(tag);
}
_buffer.Append('<').Append('/');
return ScriptDataEscaped(next);
}
private HtmlToken ScriptDataEscapedNameTag(HtmlTagToken tag)
{
char next;
while (true) {
next = base.Next;
string text = _stringBuffer.ToString().ToLowerInvariant();
if (text == _lastStartTag) {
if (next.IsSpaceCharacter()) {
tag.Name = text;
return AttributeBeforeName(tag);
}
switch (next) {
case '/':
tag.Name = text;
return TagSelfClosing(tag);
case '>':
tag.Name = text;
return EmitTag(tag);
}
}
if (!next.IsLetter())
break;
_stringBuffer.Append(next);
}
_buffer.Append('<').Append('/').Append(_stringBuffer.ToString());
return ScriptDataEscaped(next);
}
private HtmlToken ScriptDataStartDoubleEscape()
{
char next;
while (true) {
next = base.Next;
if (next == '/' || next == '>' || next.IsSpaceCharacter()) {
_buffer.Append(next);
if (_stringBuffer.ToString().Equals(Tags.Script, StringComparison.OrdinalIgnoreCase))
return ScriptDataEscapedDouble(base.Next);
return ScriptDataEscaped(base.Next);
}
if (!next.IsLetter())
break;
_stringBuffer.Append(next);
_buffer.Append(next);
}
return ScriptDataEscaped(next);
}
private HtmlToken ScriptDataEscapedDouble(char c)
{
switch (c) {
case '-':
_buffer.Append('-');
return ScriptDataEscapedDoubleDash();
case '<':
_buffer.Append('<');
return ScriptDataEscapedDoubleLT();
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
return HtmlToken.EOF;
}
_buffer.Append(c);
return ScriptDataEscapedDouble(base.Next);
}
private HtmlToken ScriptDataEscapedDoubleDash()
{
char next = base.Next;
switch (next) {
case '-':
_buffer.Append('-');
return ScriptDataEscapedDoubleDashDash();
case '<':
_buffer.Append('<');
return ScriptDataEscapedDoubleLT();
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
return ScriptDataEscapedDouble(base.Next);
case '':
RaiseErrorOccurred(ErrorCode.EOF);
return HtmlToken.EOF;
default:
_buffer.Append(next);
return ScriptDataEscapedDouble(base.Next);
}
}
private HtmlToken ScriptDataEscapedDoubleDashDash()
{
while (true) {
char next = base.Next;
switch (next) {
case '-':
break;
case '<':
_buffer.Append('<');
return ScriptDataEscapedDoubleLT();
case '>':
_buffer.Append('>');
return ScriptData(base.Next);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
return ScriptDataEscapedDouble(base.Next);
case '':
RaiseErrorOccurred(ErrorCode.EOF);
return HtmlToken.EOF;
default:
_buffer.Append(next);
return ScriptDataEscapedDouble(base.Next);
}
_buffer.Append('-');
}
}
private HtmlToken ScriptDataEscapedDoubleLT()
{
char next = base.Next;
if (next == '/') {
_stringBuffer.Clear();
_buffer.Append('/');
return ScriptDataEndDoubleEscape();
}
return ScriptDataEscapedDouble(next);
}
private HtmlToken ScriptDataEndDoubleEscape()
{
char next;
while (true) {
next = base.Next;
if (next.IsSpaceCharacter() || next == '/' || next == '>') {
_buffer.Append(next);
if (_stringBuffer.ToString().Equals(Tags.Script, StringComparison.OrdinalIgnoreCase))
return ScriptDataEscaped(base.Next);
return ScriptDataEscapedDouble(base.Next);
}
if (!next.IsLetter())
break;
_stringBuffer.Append(next);
_buffer.Append(next);
}
return ScriptDataEscapedDouble(next);
}
private HtmlCommentToken EmitComment(TextPosition position)
{
HtmlCommentToken htmlCommentToken = HtmlToken.Comment(_stringBuffer.ToString());
htmlCommentToken.Start = position;
htmlCommentToken.End = GetCurrentPosition();
return htmlCommentToken;
}
private HtmlTagToken EmitTag(HtmlTagToken tag)
{
_state = HtmlParseMode.PCData;
if (tag.Type == HtmlTokenType.StartTag) {
for (int num = tag.Attributes.Count - 1; num > 0; num--) {
for (int num2 = num - 1; num2 >= 0; num2--) {
if (tag.Attributes[num2].Key == tag.Attributes[num].Key) {
tag.Attributes.RemoveAt(num);
RaiseErrorOccurred(ErrorCode.AttributeDuplicateOmitted);
break;
}
}
}
_lastStartTag = tag.Name;
} else {
if (tag.IsSelfClosing)
RaiseErrorOccurred(ErrorCode.EndTagCannotBeSelfClosed);
if (tag.Attributes.Count != 0)
RaiseErrorOccurred(ErrorCode.EndTagCannotHaveAttributes);
}
tag.End = GetCurrentPosition();
return tag;
}
}
}