HtmlTokenizer
Performs the tokenization of the source code. Follows the tokenization algorithm at:
http://www.w3.org/html/wg/drafts/html/master/syntax.html
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
namespace AngleSharp.Parser.Html
{
[DebuggerStepThrough]
internal sealed class HtmlTokenizer : BaseTokenizer
{
private bool _acceptsCharacterData;
private string _lastStartTag;
private HtmlParseMode _model;
private StringBuilder _buffer;
private HtmlToken _buffered;
public bool AcceptsCharacterData {
get {
return _acceptsCharacterData;
}
set {
_acceptsCharacterData = value;
}
}
public HtmlTokenizer(ITextSource source)
: base(source)
{
_model = HtmlParseMode.PCData;
_acceptsCharacterData = false;
_buffer = new StringBuilder();
}
public HtmlToken Get()
{
HtmlToken htmlToken = _buffered;
if (htmlToken != null) {
_buffered = null;
return htmlToken;
}
char next = base.Next;
if (base.IsEnded)
return HtmlToken.EOF;
switch (_model) {
case HtmlParseMode.PCData:
htmlToken = Data(next);
break;
case HtmlParseMode.RCData:
htmlToken = RCData(next);
break;
case HtmlParseMode.Plaintext:
htmlToken = Plaintext(next);
break;
case HtmlParseMode.Rawtext:
htmlToken = Rawtext(next);
break;
case HtmlParseMode.Script:
htmlToken = ScriptData(next);
break;
}
if (_buffer.Length > 0) {
_buffered = htmlToken;
htmlToken = HtmlToken.Character(_buffer.ToString());
_buffer.Clear();
}
return htmlToken;
}
public void Switch(HtmlParseMode state)
{
_model = state;
}
private HtmlToken Plaintext(char c)
{
while (true) {
switch (c) {
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
break;
case '':
return HtmlToken.EOF;
default:
_buffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken Data(char c)
{
while (true) {
switch (c) {
case '&': {
string text = CharacterReference(base.Next, ' ');
if (text == null)
_buffer.Append('&');
_buffer.Append(text);
break;
}
case '<':
return TagOpen(base.Next);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
return Data(base.Next);
case '':
return HtmlToken.EOF;
default:
_buffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken RCData(char c)
{
while (true) {
switch (c) {
case '&': {
string text = CharacterReference(base.Next, ' ');
if (text == null)
_buffer.Append('&');
_buffer.Append(text);
break;
}
case '<':
return RCDataLT(base.Next);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
break;
case '':
return HtmlToken.EOF;
default:
_buffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken RCDataLT(char c)
{
if (c == '/') {
_stringBuffer.Clear();
return RCDataEndTag(base.Next);
}
_buffer.Append('<');
return RCData(c);
}
private HtmlToken RCDataEndTag(char c)
{
if (c.IsUppercaseAscii()) {
_stringBuffer.Clear();
_stringBuffer.Append(char.ToLower(c));
return RCDataNameEndTag(base.Next, HtmlToken.CloseTag());
}
if (c.IsLowercaseAscii()) {
_stringBuffer.Clear();
_stringBuffer.Append(c);
return RCDataNameEndTag(base.Next, HtmlToken.CloseTag());
}
_buffer.Append('<').Append('/');
return RCData(c);
}
private HtmlToken RCDataNameEndTag(char c, HtmlTagToken tag)
{
string text = _stringBuffer.ToString();
bool flag = text == _lastStartTag;
if (flag && c.IsSpaceCharacter()) {
tag.Name = text;
return AttributeBeforeName(base.Next, tag);
}
if (flag && c == '/') {
tag.Name = text;
return TagSelfClosing(base.Next, tag);
}
if (flag && c == '>') {
tag.Name = text;
return EmitTag(tag);
}
if (c.IsUppercaseAscii()) {
_stringBuffer.Append(char.ToLower(c));
return RCDataNameEndTag(base.Next, tag);
}
if (c.IsLowercaseAscii()) {
_stringBuffer.Append(c);
return RCDataNameEndTag(base.Next, tag);
}
_buffer.Append('<').Append('/');
_buffer.Append(_stringBuffer.ToString());
return RCData(c);
}
private HtmlToken Rawtext(char c)
{
while (true) {
switch (c) {
case '<':
return RawtextLT(base.Next);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
break;
case '':
return HtmlToken.EOF;
default:
_buffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken RawtextLT(char c)
{
if (c == '/') {
_stringBuffer.Clear();
return RawtextEndTag(base.Next);
}
_buffer.Append('<');
return Rawtext(c);
}
private HtmlToken RawtextEndTag(char c)
{
if (c.IsUppercaseAscii()) {
_stringBuffer.Clear();
_stringBuffer.Append(char.ToLower(c));
return RawtextNameEndTag(base.Next, HtmlToken.CloseTag());
}
if (c.IsLowercaseAscii()) {
_stringBuffer.Clear();
_stringBuffer.Append(c);
return RawtextNameEndTag(base.Next, HtmlToken.CloseTag());
}
_buffer.Append('<').Append('/');
return Rawtext(c);
}
private HtmlToken RawtextNameEndTag(char c, HtmlTagToken tag)
{
string text = _stringBuffer.ToString();
bool flag = text == _lastStartTag;
if (flag && c.IsSpaceCharacter()) {
tag.Name = text;
return AttributeBeforeName(base.Next, tag);
}
if (flag && c == '/') {
tag.Name = text;
return TagSelfClosing(base.Next, tag);
}
if (flag && c == '>') {
tag.Name = text;
return EmitTag(tag);
}
if (c.IsUppercaseAscii()) {
_stringBuffer.Append(char.ToLower(c));
return RawtextNameEndTag(base.Next, tag);
}
if (c.IsLowercaseAscii()) {
_stringBuffer.Append(c);
return RawtextNameEndTag(base.Next, tag);
}
_buffer.Append('<').Append('/');
_buffer.Append(_stringBuffer.ToString());
return Rawtext(c);
}
private HtmlToken CData(char c)
{
_stringBuffer.Clear();
while (true) {
switch (c) {
case '':
Back();
goto IL_004c;
case ']':
{
if (!ContinuesWith("]]>", true))
break;
Advance(2);
goto IL_004c;
}
IL_004c:
return HtmlToken.Character(_stringBuffer.ToString());
}
_stringBuffer.Append(c);
c = base.Next;
}
}
private string CharacterReference(char c, char allowedCharacter = ' ')
{
if (c.IsSpaceCharacter() || c == '<' || c == '' || c == '&' || c == allowedCharacter) {
Back();
return null;
}
if (c == '#') {
int num = 10;
int num2 = 1;
int num3 = 0;
List<int> list = new List<int>();
c = base.Next;
bool flag = c == 'x' || c == 'X';
if (!flag) {
while (c.IsDigit()) {
list.Add(c.FromHex());
c = base.Next;
}
} else {
num = 16;
while ((c = base.Next).IsHex()) {
list.Add(c.FromHex());
}
}
for (int num4 = list.Count - 1; num4 >= 0; num4--) {
num3 += list[num4] * num2;
num2 *= num;
}
if (list.Count == 0) {
Back(2);
if (flag)
Back();
RaiseErrorOccurred(ErrorCode.CharacterReferenceWrongNumber);
return null;
}
if (c != ';') {
RaiseErrorOccurred(ErrorCode.CharacterReferenceSemicolonMissing);
Back();
}
if (Entities.IsInCharacterTable(num3)) {
RaiseErrorOccurred(ErrorCode.CharacterReferenceInvalidCode);
return Entities.GetSymbolFromTable(num3);
}
if (Entities.IsInvalidNumber(num3)) {
RaiseErrorOccurred(ErrorCode.CharacterReferenceInvalidNumber);
return '�'.ToString();
}
if (Entities.IsInInvalidRange(num3))
RaiseErrorOccurred(ErrorCode.CharacterReferenceInvalidRange);
return Entities.Convert(num3);
}
string result = null;
int num5 = 0;
int insertionPoint = base.InsertionPoint - 1;
char[] array = new char[31];
int num6 = 0;
char c2 = base.Current;
while (c2 != ';' && c2.IsName()) {
array[num6++] = c2;
string name = new string(array, 0, num6);
c2 = base.Next;
num5++;
name = ((c2 == ';') ? Entities.GetSymbol(name) : Entities.GetSymbolWithoutSemicolon(name));
if (name != null) {
num5 = 0;
result = name;
}
if (base.IsEnded || num6 >= 31)
break;
}
Back(num5);
c2 = base.Current;
if (c2 != ';') {
if (allowedCharacter != 0 && (c2 == '=' || c2.IsAlphanumericAscii())) {
if (c2 == '=')
RaiseErrorOccurred(ErrorCode.CharacterReferenceAttributeEqualsFound);
base.InsertionPoint = insertionPoint;
return null;
}
Back();
RaiseErrorOccurred(ErrorCode.CharacterReferenceNotTerminated);
}
return result;
}
private HtmlToken TagOpen(char c)
{
switch (c) {
case '!':
return MarkupDeclaration(base.Next);
case '/':
return TagEnd(base.Next);
default:
if (c.IsUppercaseAscii()) {
_stringBuffer.Clear();
_stringBuffer.Append(char.ToLower(c));
return TagName(base.Next, HtmlToken.OpenTag());
}
if (c.IsLowercaseAscii()) {
_stringBuffer.Clear();
_stringBuffer.Append(c);
return TagName(base.Next, HtmlToken.OpenTag());
}
if (c == '?') {
RaiseErrorOccurred(ErrorCode.BogusComment);
return BogusComment(c);
}
_model = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.AmbiguousOpenTag);
_buffer.Append('<');
return Data(c);
}
}
private HtmlToken TagEnd(char c)
{
if (c.IsUppercaseAscii()) {
_stringBuffer.Clear();
_stringBuffer.Append(char.ToLower(c));
return TagName(base.Next, HtmlToken.CloseTag());
}
if (!c.IsLowercaseAscii()) {
switch (c) {
case '>':
_model = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
return Data(base.Next);
case '':
Back();
RaiseErrorOccurred(ErrorCode.EOF);
_buffer.Append('<').Append('/');
return HtmlToken.EOF;
default:
RaiseErrorOccurred(ErrorCode.BogusComment);
return BogusComment(c);
}
}
_stringBuffer.Clear();
_stringBuffer.Append(c);
return TagName(base.Next, HtmlToken.CloseTag());
}
private HtmlToken TagName(char c, HtmlTagToken tag)
{
while (!c.IsSpaceCharacter()) {
switch (c) {
case '/':
tag.Name = _stringBuffer.ToString();
return TagSelfClosing(base.Next, tag);
case '>':
tag.Name = _stringBuffer.ToString();
return EmitTag(tag);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
return HtmlToken.EOF;
default:
if (c.IsUppercaseAscii())
_stringBuffer.Append(char.ToLower(c));
else
_stringBuffer.Append(c);
break;
}
c = base.Next;
}
tag.Name = _stringBuffer.ToString();
return AttributeBeforeName(base.Next, tag);
}
private HtmlToken TagSelfClosing(char c, HtmlTagToken tag)
{
switch (c) {
case '>':
tag.IsSelfClosing = true;
return EmitTag(tag);
case '':
RaiseErrorOccurred(ErrorCode.EOF);
return HtmlToken.EOF;
default:
RaiseErrorOccurred(ErrorCode.ClosingSlashMisplaced);
return AttributeBeforeName(c, tag);
}
}
private HtmlToken MarkupDeclaration(char c)
{
if (ContinuesWith("--", true)) {
Advance();
return CommentStart(base.Next);
}
if (ContinuesWith(Tags.Doctype, true)) {
Advance(6);
return Doctype(base.Next);
}
if (_acceptsCharacterData && ContinuesWith("[CDATA[", false)) {
Advance(6);
return CData(base.Next);
}
RaiseErrorOccurred(ErrorCode.UndefinedMarkupDeclaration);
return BogusComment(c);
}
private HtmlToken BogusComment(char c)
{
_stringBuffer.Clear();
while (true) {
switch (c) {
case '':
Back();
goto case '>';
case ' ':
_stringBuffer.Append('�');
break;
default:
_stringBuffer.Append(c);
break;
case '>':
_model = HtmlParseMode.PCData;
return HtmlToken.Comment(_stringBuffer.ToString());
}
c = base.Next;
}
}
private HtmlCommentToken CommentStart(char c)
{
_stringBuffer.Clear();
switch (c) {
case '-':
return CommentDashStart(base.Next);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
return Comment(base.Next);
case '>':
_model = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
return HtmlToken.Comment(_stringBuffer.ToString());
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
return HtmlToken.Comment(_stringBuffer.ToString());
default:
_stringBuffer.Append(c);
return Comment(base.Next);
}
}
private HtmlCommentToken CommentDashStart(char c)
{
switch (c) {
case '-':
return CommentEnd(base.Next);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('-');
_stringBuffer.Append('�');
return Comment(base.Next);
case '>':
_model = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
return HtmlToken.Comment(_stringBuffer.ToString());
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
return HtmlToken.Comment(_stringBuffer.ToString());
default:
_stringBuffer.Append('-');
_stringBuffer.Append(c);
return Comment(base.Next);
}
}
private HtmlCommentToken Comment(char c)
{
while (true) {
switch (c) {
case '-': {
HtmlCommentToken htmlCommentToken = CommentDashEnd(base.Next);
if (htmlCommentToken != null)
return htmlCommentToken;
break;
}
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
return HtmlToken.Comment(_stringBuffer.ToString());
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
c = '�';
_stringBuffer.Append(c);
break;
default:
_stringBuffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlCommentToken CommentDashEnd(char c)
{
switch (c) {
case '-':
return CommentEnd(base.Next);
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
return HtmlToken.Comment(_stringBuffer.ToString());
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
c = '�';
break;
}
_stringBuffer.Append('-');
_stringBuffer.Append(c);
return null;
}
private HtmlCommentToken CommentEnd(char c)
{
while (true) {
switch (c) {
case '>':
_model = HtmlParseMode.PCData;
return HtmlToken.Comment(_stringBuffer.ToString());
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('-');
_stringBuffer.Append('�');
return null;
case '!':
RaiseErrorOccurred(ErrorCode.CommentEndedWithEM);
return CommentBangEnd(base.Next);
case '-':
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
return HtmlToken.Comment(_stringBuffer.ToString());
default:
RaiseErrorOccurred(ErrorCode.CommentEndedUnexpected);
_stringBuffer.Append('-');
_stringBuffer.Append('-');
_stringBuffer.Append(c);
return null;
}
RaiseErrorOccurred(ErrorCode.CommentEndedWithDash);
_stringBuffer.Append('-');
c = base.Next;
}
}
private HtmlCommentToken CommentBangEnd(char c)
{
switch (c) {
case '-':
_stringBuffer.Append('-');
_stringBuffer.Append('-');
_stringBuffer.Append('!');
return CommentDashEnd(base.Next);
case '>':
_model = HtmlParseMode.PCData;
return HtmlToken.Comment(_stringBuffer.ToString());
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('-');
_stringBuffer.Append('-');
_stringBuffer.Append('!');
_stringBuffer.Append('�');
return null;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
return HtmlToken.Comment(_stringBuffer.ToString());
default:
_stringBuffer.Append('-');
_stringBuffer.Append('-');
_stringBuffer.Append('!');
_stringBuffer.Append(c);
return null;
}
}
private HtmlToken Doctype(char c)
{
if (c.IsSpaceCharacter())
return DoctypeNameBefore(base.Next);
if (c == '') {
RaiseErrorOccurred(ErrorCode.EOF);
Back();
return HtmlToken.Doctype(true);
}
RaiseErrorOccurred(ErrorCode.DoctypeUnexpected);
return DoctypeNameBefore(c);
}
private HtmlToken DoctypeNameBefore(char c)
{
while (c.IsSpaceCharacter()) {
c = base.Next;
}
if (!c.IsUppercaseAscii()) {
switch (c) {
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Clear();
_stringBuffer.Append('�');
return DoctypeName(base.Next, HtmlToken.Doctype(false));
case '>':
_model = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
return HtmlToken.Doctype(true);
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
return HtmlToken.Doctype(true);
default:
_stringBuffer.Clear();
_stringBuffer.Append(c);
return DoctypeName(base.Next, HtmlToken.Doctype(false));
}
}
_stringBuffer.Clear();
_stringBuffer.Append(char.ToLower(c));
return DoctypeName(base.Next, HtmlToken.Doctype(false));
}
private HtmlToken DoctypeName(char c, HtmlDoctypeToken doctype)
{
while (true) {
if (c.IsSpaceCharacter()) {
doctype.Name = _stringBuffer.ToString();
_stringBuffer.Clear();
return DoctypeNameAfter(base.Next, doctype);
}
if (c == '>')
break;
if (c.IsUppercaseAscii())
_stringBuffer.Append(char.ToLower(c));
else {
switch (c) {
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
doctype.IsQuirksForced = true;
doctype.Name = _stringBuffer.ToString();
return doctype;
default:
_stringBuffer.Append(c);
break;
}
}
c = base.Next;
}
_model = HtmlParseMode.PCData;
doctype.Name = _stringBuffer.ToString();
return doctype;
}
private HtmlToken DoctypeNameAfter(char c, HtmlDoctypeToken doctype)
{
while (c.IsSpaceCharacter()) {
c = base.Next;
}
switch (c) {
case '>':
_model = HtmlParseMode.PCData;
return doctype;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
doctype.IsQuirksForced = true;
return doctype;
default:
if (ContinuesWith("public", true)) {
Advance(5);
return DoctypePublic(base.Next, doctype);
}
if (ContinuesWith("system", true)) {
Advance(5);
return DoctypeSystem(base.Next, doctype);
}
RaiseErrorOccurred(ErrorCode.DoctypeUnexpectedAfterName);
doctype.IsQuirksForced = true;
return BogusDoctype(base.Next, doctype);
}
}
private HtmlToken DoctypePublic(char c, HtmlDoctypeToken doctype)
{
if (!c.IsSpaceCharacter()) {
switch (c) {
case '"':
RaiseErrorOccurred(ErrorCode.DoubleQuotationMarkUnexpected);
doctype.PublicIdentifier = string.Empty;
return DoctypePublicIdentifierDoubleQuoted(base.Next, doctype);
case '\'':
RaiseErrorOccurred(ErrorCode.SingleQuotationMarkUnexpected);
doctype.PublicIdentifier = string.Empty;
return DoctypePublicIdentifierSingleQuoted(base.Next, doctype);
case '>':
_model = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.IsQuirksForced = true;
return doctype;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
Back();
return doctype;
default:
RaiseErrorOccurred(ErrorCode.DoctypePublicInvalid);
doctype.IsQuirksForced = true;
return BogusDoctype(base.Next, doctype);
}
}
return DoctypePublicIdentifierBefore(base.Next, doctype);
}
private HtmlToken DoctypePublicIdentifierBefore(char c, HtmlDoctypeToken doctype)
{
while (c.IsSpaceCharacter()) {
c = base.Next;
}
switch (c) {
case '"':
_stringBuffer.Clear();
doctype.PublicIdentifier = string.Empty;
return DoctypePublicIdentifierDoubleQuoted(base.Next, doctype);
case '\'':
_stringBuffer.Clear();
doctype.PublicIdentifier = string.Empty;
return DoctypePublicIdentifierSingleQuoted(base.Next, doctype);
case '>':
_model = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.IsQuirksForced = true;
return doctype;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
Back();
return doctype;
default:
RaiseErrorOccurred(ErrorCode.DoctypePublicInvalid);
doctype.IsQuirksForced = true;
return BogusDoctype(base.Next, doctype);
}
}
private HtmlToken DoctypePublicIdentifierDoubleQuoted(char c, HtmlDoctypeToken doctype)
{
while (true) {
switch (c) {
case '"':
doctype.PublicIdentifier = _stringBuffer.ToString();
_stringBuffer.Clear();
return DoctypePublicIdentifierAfter(base.Next, doctype);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '>':
_model = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.PublicIdentifier = _stringBuffer.ToString();
return doctype;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
Back();
doctype.IsQuirksForced = true;
doctype.PublicIdentifier = _stringBuffer.ToString();
return doctype;
default:
_stringBuffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken DoctypePublicIdentifierSingleQuoted(char c, HtmlDoctypeToken doctype)
{
while (true) {
switch (c) {
case '\'':
doctype.PublicIdentifier = _stringBuffer.ToString();
_stringBuffer.Clear();
return DoctypePublicIdentifierAfter(base.Next, doctype);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '>':
_model = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.PublicIdentifier = _stringBuffer.ToString();
return doctype;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
doctype.PublicIdentifier = _stringBuffer.ToString();
Back();
return doctype;
default:
_stringBuffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken DoctypePublicIdentifierAfter(char c, HtmlDoctypeToken doctype)
{
if (!c.IsSpaceCharacter()) {
switch (c) {
case '>':
_model = HtmlParseMode.PCData;
return doctype;
case '"':
RaiseErrorOccurred(ErrorCode.DoubleQuotationMarkUnexpected);
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierDoubleQuoted(base.Next, doctype);
case '\'':
RaiseErrorOccurred(ErrorCode.SingleQuotationMarkUnexpected);
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierSingleQuoted(base.Next, doctype);
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
Back();
return doctype;
default:
RaiseErrorOccurred(ErrorCode.DoctypeInvalidCharacter);
doctype.IsQuirksForced = true;
return BogusDoctype(base.Next, doctype);
}
}
_stringBuffer.Clear();
return DoctypeBetween(base.Next, doctype);
}
private HtmlToken DoctypeBetween(char c, HtmlDoctypeToken doctype)
{
while (c.IsSpaceCharacter()) {
c = base.Next;
}
switch (c) {
case '>':
_model = HtmlParseMode.PCData;
return doctype;
case '"':
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierDoubleQuoted(base.Next, doctype);
case '\'':
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierSingleQuoted(base.Next, doctype);
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
Back();
return doctype;
default:
RaiseErrorOccurred(ErrorCode.DoctypeInvalidCharacter);
doctype.IsQuirksForced = true;
return BogusDoctype(base.Next, doctype);
}
}
private HtmlToken DoctypeSystem(char c, HtmlDoctypeToken doctype)
{
if (!c.IsSpaceCharacter()) {
switch (c) {
case '"':
RaiseErrorOccurred(ErrorCode.DoubleQuotationMarkUnexpected);
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierDoubleQuoted(base.Next, doctype);
case '\'':
RaiseErrorOccurred(ErrorCode.SingleQuotationMarkUnexpected);
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierSingleQuoted(base.Next, doctype);
case '>':
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.SystemIdentifier = _stringBuffer.ToString();
doctype.IsQuirksForced = true;
return doctype;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
Back();
return doctype;
default:
RaiseErrorOccurred(ErrorCode.DoctypeSystemInvalid);
doctype.IsQuirksForced = true;
return BogusDoctype(base.Next, doctype);
}
}
_model = HtmlParseMode.PCData;
return DoctypeSystemIdentifierBefore(base.Next, doctype);
}
private HtmlToken DoctypeSystemIdentifierBefore(char c, HtmlDoctypeToken doctype)
{
while (c.IsSpaceCharacter()) {
c = base.Next;
}
switch (c) {
case '"':
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierDoubleQuoted(base.Next, doctype);
case '\'':
doctype.SystemIdentifier = string.Empty;
return DoctypeSystemIdentifierSingleQuoted(base.Next, doctype);
case '>':
_model = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = _stringBuffer.ToString();
return doctype;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = _stringBuffer.ToString();
Back();
return doctype;
default:
RaiseErrorOccurred(ErrorCode.DoctypeInvalidCharacter);
doctype.IsQuirksForced = true;
return BogusDoctype(base.Next, doctype);
}
}
private HtmlToken DoctypeSystemIdentifierDoubleQuoted(char c, HtmlDoctypeToken doctype)
{
while (true) {
switch (c) {
case '"':
doctype.SystemIdentifier = _stringBuffer.ToString();
_stringBuffer.Clear();
return DoctypeSystemIdentifierAfter(base.Next, doctype);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '>':
_model = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = _stringBuffer.ToString();
return doctype;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = _stringBuffer.ToString();
Back();
return doctype;
default:
_stringBuffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken DoctypeSystemIdentifierSingleQuoted(char c, HtmlDoctypeToken doctype)
{
while (true) {
switch (c) {
case '\'':
doctype.SystemIdentifier = _stringBuffer.ToString();
_stringBuffer.Clear();
return DoctypeSystemIdentifierAfter(base.Next, doctype);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '>':
_model = HtmlParseMode.PCData;
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = _stringBuffer.ToString();
return doctype;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
doctype.SystemIdentifier = _stringBuffer.ToString();
Back();
return doctype;
default:
_stringBuffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken DoctypeSystemIdentifierAfter(char c, HtmlDoctypeToken doctype)
{
while (c.IsSpaceCharacter()) {
c = base.Next;
}
switch (c) {
case '>':
_model = HtmlParseMode.PCData;
return doctype;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
doctype.IsQuirksForced = true;
Back();
return doctype;
default:
RaiseErrorOccurred(ErrorCode.DoctypeInvalidCharacter);
return BogusDoctype(base.Next, doctype);
}
}
private HtmlToken BogusDoctype(char c, HtmlDoctypeToken doctype)
{
while (true) {
switch (c) {
case '':
Back();
return doctype;
case '>':
_model = HtmlParseMode.PCData;
return doctype;
}
c = base.Next;
}
}
private HtmlToken AttributeBeforeName(char c, HtmlTagToken tag)
{
while (c.IsSpaceCharacter()) {
c = base.Next;
}
switch (c) {
case '/':
return TagSelfClosing(base.Next, tag);
case '>':
return EmitTag(tag);
default:
if (!c.IsUppercaseAscii()) {
switch (c) {
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Clear();
_stringBuffer.Append('�');
return AttributeName(base.Next, tag);
case '"':
case '\'':
case '<':
case '=':
RaiseErrorOccurred(ErrorCode.AttributeNameInvalid);
_stringBuffer.Clear();
_stringBuffer.Append(c);
return AttributeName(base.Next, tag);
case '':
return HtmlToken.EOF;
default:
_stringBuffer.Clear();
_stringBuffer.Append(c);
return AttributeName(base.Next, tag);
}
}
_stringBuffer.Clear();
_stringBuffer.Append(char.ToLower(c));
return AttributeName(base.Next, tag);
}
}
private HtmlToken AttributeName(char c, HtmlTagToken tag)
{
while (!c.IsSpaceCharacter()) {
switch (c) {
case '/':
tag.AddAttribute(_stringBuffer.ToString());
return TagSelfClosing(base.Next, tag);
case '=':
tag.AddAttribute(_stringBuffer.ToString());
return AttributeBeforeValue(base.Next, tag);
case '>':
tag.AddAttribute(_stringBuffer.ToString());
return EmitTag(tag);
case '':
return HtmlToken.EOF;
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
default:
if (c.IsUppercaseAscii())
_stringBuffer.Append(char.ToLower(c));
else if (c == '"' || c == '\'' || c == '<') {
RaiseErrorOccurred(ErrorCode.AttributeNameInvalid);
_stringBuffer.Append(c);
} else {
_stringBuffer.Append(c);
}
break;
}
c = base.Next;
}
tag.AddAttribute(_stringBuffer.ToString());
return AttributeAfterName(base.Next, tag);
}
private HtmlToken AttributeAfterName(char c, HtmlTagToken tag)
{
while (c.IsSpaceCharacter()) {
c = base.Next;
}
switch (c) {
case '/':
return TagSelfClosing(base.Next, tag);
case '=':
return AttributeBeforeValue(base.Next, tag);
case '>':
return EmitTag(tag);
default:
if (!c.IsUppercaseAscii()) {
switch (c) {
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Clear();
_stringBuffer.Append('�');
return AttributeName(base.Next, tag);
case '"':
case '\'':
case '<':
RaiseErrorOccurred(ErrorCode.AttributeNameInvalid);
_stringBuffer.Clear();
_stringBuffer.Append(c);
return AttributeName(base.Next, tag);
case '':
return HtmlToken.EOF;
default:
_stringBuffer.Clear();
_stringBuffer.Append(c);
return AttributeName(base.Next, tag);
}
}
_stringBuffer.Clear();
_stringBuffer.Append(char.ToLower(c));
return AttributeName(base.Next, tag);
}
}
private HtmlToken AttributeBeforeValue(char c, HtmlTagToken tag)
{
while (c.IsSpaceCharacter()) {
c = base.Next;
}
switch (c) {
case '"':
_stringBuffer.Clear();
return AttributeDoubleQuotedValue(base.Next, tag);
case '&':
_stringBuffer.Clear();
return AttributeUnquotedValue(c, tag);
case '\'':
_stringBuffer.Clear();
return AttributeSingleQuotedValue(base.Next, tag);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
return AttributeUnquotedValue(base.Next, tag);
case '>':
RaiseErrorOccurred(ErrorCode.TagClosedWrong);
return EmitTag(tag);
case '<':
case '=':
case '`':
RaiseErrorOccurred(ErrorCode.AttributeValueInvalid);
_stringBuffer.Clear().Append(c);
return AttributeUnquotedValue(base.Next, tag);
case '':
return HtmlToken.EOF;
default:
_stringBuffer.Clear().Append(c);
return AttributeUnquotedValue(base.Next, tag);
}
}
private HtmlToken AttributeDoubleQuotedValue(char c, HtmlTagToken tag)
{
while (true) {
switch (c) {
case '"':
tag.SetAttributeValue(_stringBuffer.ToString());
return AttributeAfterValue(base.Next, tag);
case '&': {
string text = CharacterReference(base.Next, '"');
if (text == null)
_stringBuffer.Append('&');
else
_stringBuffer.Append(text);
break;
}
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '':
return HtmlToken.EOF;
default:
_stringBuffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken AttributeSingleQuotedValue(char c, HtmlTagToken tag)
{
while (true) {
switch (c) {
case '\'':
tag.SetAttributeValue(_stringBuffer.ToString());
return AttributeAfterValue(base.Next, tag);
case '&': {
string text = CharacterReference(base.Next, '\'');
if (text == null)
_stringBuffer.Append('&');
else
_stringBuffer.Append(text);
break;
}
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '':
return HtmlToken.EOF;
default:
_stringBuffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken AttributeUnquotedValue(char c, HtmlTagToken tag)
{
while (!c.IsSpaceCharacter()) {
switch (c) {
case '&': {
string text = CharacterReference(base.Next, '>');
if (text == null)
_stringBuffer.Append('&');
else
_stringBuffer.Append(text);
break;
}
case '>':
tag.SetAttributeValue(_stringBuffer.ToString());
return EmitTag(tag);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_stringBuffer.Append('�');
break;
case '"':
case '\'':
case '<':
case '=':
case '`':
RaiseErrorOccurred(ErrorCode.AttributeValueInvalid);
_stringBuffer.Append(c);
break;
case '':
return HtmlToken.EOF;
default:
_stringBuffer.Append(c);
break;
}
c = base.Next;
}
tag.SetAttributeValue(_stringBuffer.ToString());
return AttributeBeforeName(base.Next, tag);
}
private HtmlToken AttributeAfterValue(char c, HtmlTagToken tag)
{
if (!c.IsSpaceCharacter()) {
switch (c) {
case '/':
return TagSelfClosing(base.Next, tag);
case '>':
return EmitTag(tag);
case '':
return HtmlToken.EOF;
default:
RaiseErrorOccurred(ErrorCode.AttributeNameExpected);
return AttributeBeforeName(c, tag);
}
}
return AttributeBeforeName(base.Next, tag);
}
private HtmlToken ScriptData(char c)
{
while (true) {
switch (c) {
case '<':
return ScriptDataLT(base.Next);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
break;
case '':
return HtmlToken.EOF;
default:
_buffer.Append(c);
break;
}
c = base.Next;
}
}
private HtmlToken ScriptDataLT(char c)
{
switch (c) {
case '/':
return ScriptDataEndTag(base.Next);
case '!':
_buffer.Append('<').Append('!');
return ScriptDataStartEscape(base.Next);
default:
_buffer.Append('<');
return ScriptData(c);
}
}
private HtmlToken ScriptDataEndTag(char c)
{
if (c.IsLetter()) {
_stringBuffer.Clear();
_stringBuffer.Append(c);
return ScriptDataNameEndTag(base.Next, HtmlToken.CloseTag());
}
_buffer.Append('<').Append('/');
return ScriptData(c);
}
private HtmlToken ScriptDataNameEndTag(char c, HtmlTagToken tag)
{
string text = _stringBuffer.ToString().ToLower();
bool flag = text == _lastStartTag;
if (flag && c.IsSpaceCharacter()) {
tag.Name = text;
return AttributeBeforeName(base.Next, tag);
}
if (flag && c == '/') {
tag.Name = text;
return TagSelfClosing(base.Next, tag);
}
if (flag && c == '>') {
tag.Name = text;
return EmitTag(tag);
}
if (c.IsLetter()) {
_stringBuffer.Append(c);
return ScriptDataNameEndTag(base.Next, tag);
}
_buffer.Append('<').Append('/');
_buffer.Append(_stringBuffer.ToString());
return ScriptData(c);
}
private HtmlToken ScriptDataStartEscape(char c)
{
if (c == '-') {
_buffer.Append('-');
return ScriptDataStartEscapeDash(base.Next);
}
return ScriptData(c);
}
private HtmlToken ScriptDataEscaped(char c)
{
switch (c) {
case '-':
_buffer.Append('-');
return ScriptDataEscapedDash(base.Next);
case '<':
return ScriptDataEscapedLT(base.Next);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
return ScriptDataEscaped(base.Next);
case '':
return HtmlToken.EOF;
default:
return ScriptData(c);
}
}
private HtmlToken ScriptDataStartEscapeDash(char c)
{
if (c == '-') {
_buffer.Append('-');
return ScriptDataEscapedDashDash(base.Next);
}
return ScriptData(c);
}
private HtmlToken ScriptDataEscapedDash(char c)
{
switch (c) {
case '-':
_buffer.Append('-');
return ScriptDataEscapedDashDash(base.Next);
case '<':
return ScriptDataEscapedLT(base.Next);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
return ScriptDataEscaped(base.Next);
case '':
return HtmlToken.EOF;
default:
_buffer.Append(c);
return ScriptDataEscaped(base.Next);
}
}
private HtmlToken ScriptDataEscapedDashDash(char c)
{
switch (c) {
case '-':
_buffer.Append('-');
return ScriptDataEscapedDashDash(base.Next);
case '<':
return ScriptDataEscapedLT(base.Next);
case '>':
_buffer.Append('>');
return ScriptData(base.Next);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
return ScriptDataEscaped(base.Next);
case '':
return HtmlToken.EOF;
default:
_buffer.Append(c);
return ScriptDataEscaped(base.Next);
}
}
private HtmlToken ScriptDataEscapedLT(char c)
{
if (c == '/')
return ScriptDataEndTag(base.Next);
if (c.IsLetter()) {
_stringBuffer.Clear();
_stringBuffer.Append(c);
_buffer.Append('<');
_buffer.Append(c);
return ScriptDataStartDoubleEscape(base.Next);
}
_buffer.Append('<');
return ScriptDataEscaped(c);
}
private HtmlToken ScriptDataEscapedEndTag(char c, HtmlTagToken tag)
{
if (c.IsLetter()) {
_stringBuffer.Clear();
_stringBuffer.Append(c);
return ScriptDataEscapedEndTag(base.Next, tag);
}
_buffer.Append('<').Append('/');
return ScriptDataEscaped(c);
}
private HtmlToken ScriptDataEscapedNameTag(char c, HtmlTagToken tag)
{
string text = _stringBuffer.ToString().ToLower();
bool flag = text == _lastStartTag;
if (flag && c.IsSpaceCharacter()) {
tag.Name = text;
return AttributeBeforeName(base.Next, tag);
}
if (flag && c == '/') {
tag.Name = text;
return TagSelfClosing(base.Next, tag);
}
if (flag && c == '>') {
tag.Name = text;
return EmitTag(tag);
}
if (c.IsLetter()) {
_stringBuffer.Append(c);
return ScriptDataEscapedNameTag(base.Next, tag);
}
_buffer.Append('<').Append('/');
_buffer.Append(_stringBuffer.ToString());
return ScriptDataEscaped(c);
}
private HtmlToken ScriptDataStartDoubleEscape(char c)
{
if (c.IsSpaceCharacter() || c == '/' || c == '>') {
_buffer.Append(c);
if (string.Compare(_stringBuffer.ToString(), "script", StringComparison.OrdinalIgnoreCase) == 0)
return ScriptDataEscapedDouble(base.Next);
return ScriptDataEscaped(base.Next);
}
if (c.IsLetter()) {
_stringBuffer.Append(c);
_buffer.Append(c);
return ScriptDataStartDoubleEscape(base.Next);
}
return ScriptDataEscaped(c);
}
private HtmlToken ScriptDataEscapedDouble(char c)
{
switch (c) {
case '-':
_buffer.Append('-');
return ScriptDataEscapedDoubleDash(base.Next);
case '<':
_buffer.Append('<');
return ScriptDataEscapedDoubleLT(base.Next);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
break;
case '':
RaiseErrorOccurred(ErrorCode.EOF);
return HtmlToken.EOF;
}
_buffer.Append(c);
return ScriptDataEscapedDouble(base.Next);
}
private HtmlToken ScriptDataEscapedDoubleDash(char c)
{
switch (c) {
case '-':
_buffer.Append('-');
return ScriptDataEscapedDoubleDashDash(base.Next);
case '<':
_buffer.Append('<');
return ScriptDataEscapedDoubleLT(base.Next);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
return ScriptDataEscapedDouble(base.Next);
case '':
RaiseErrorOccurred(ErrorCode.EOF);
return HtmlToken.EOF;
default:
_buffer.Append(c);
return ScriptDataEscapedDouble(base.Next);
}
}
private HtmlToken ScriptDataEscapedDoubleDashDash(char c)
{
switch (c) {
case '-':
_buffer.Append('-');
return ScriptDataEscapedDoubleDashDash(base.Next);
case '<':
_buffer.Append('<');
return ScriptDataEscapedDoubleLT(base.Next);
case '>':
_buffer.Append('>');
return ScriptData(base.Next);
case ' ':
RaiseErrorOccurred(ErrorCode.Null);
_buffer.Append('�');
return ScriptDataEscapedDouble(base.Next);
case '':
RaiseErrorOccurred(ErrorCode.EOF);
return HtmlToken.EOF;
default:
_buffer.Append(c);
return ScriptDataEscapedDouble(base.Next);
}
}
private HtmlToken ScriptDataEscapedDoubleLT(char c)
{
if (c == '/') {
_stringBuffer.Clear();
_buffer.Append('/');
return ScriptDataEndDoubleEscape(base.Next);
}
return ScriptDataEscapedDouble(c);
}
private HtmlToken ScriptDataEndDoubleEscape(char c)
{
if (c.IsSpaceCharacter() || c == '/' || c == '>') {
_buffer.Append(c);
if (string.Compare(_stringBuffer.ToString(), "script", StringComparison.OrdinalIgnoreCase) == 0)
return ScriptDataEscaped(base.Next);
return ScriptDataEscapedDouble(base.Next);
}
if (c.IsLetter()) {
_stringBuffer.Append(c);
_buffer.Append(c);
return ScriptDataEndDoubleEscape(base.Next);
}
return ScriptDataEscapedDouble(c);
}
private HtmlTagToken EmitTag(HtmlTagToken tag)
{
_model = HtmlParseMode.PCData;
if (tag.Type == HtmlTokenType.StartTag) {
for (int num = tag.Attributes.Count - 1; num > 0; num--) {
for (int num2 = num - 1; num2 >= 0; num2--) {
if (tag.Attributes[num2].Key == tag.Attributes[num].Key) {
tag.Attributes.RemoveAt(num);
RaiseErrorOccurred(ErrorCode.AttributeDuplicateOmitted);
break;
}
}
}
_lastStartTag = tag.Name;
} else {
if (tag.IsSelfClosing)
RaiseErrorOccurred(ErrorCode.EndTagCannotBeSelfClosed);
if (tag.Attributes.Count != 0)
RaiseErrorOccurred(ErrorCode.EndTagCannotHaveAttributes);
}
return tag;
}
}
}