CssTokenizer
The CSS tokenizer.
See http://dev.w3.org/csswg/css-syntax/#tokenization for more details.
using AngleSharp.Css;
using AngleSharp.Dom.Events;
using AngleSharp.Extensions;
using System;
using System.Diagnostics;
using System.Globalization;
namespace AngleSharp.Parser.Css
{
[DebuggerStepThrough]
internal sealed class CssTokenizer : BaseTokenizer
{
private bool _valueMode;
private TextPosition _position;
public bool IsInValue {
get {
return _valueMode;
}
set {
_valueMode = value;
}
}
public event EventHandler<CssErrorEvent> Error;
public CssTokenizer(TextSource source)
: base(source)
{
_valueMode = false;
}
public CssToken Get()
{
char next = GetNext();
_position = GetCurrentPosition();
return Data(next);
}
internal void RaiseErrorOccurred(CssParseError error, TextPosition position)
{
EventHandler<CssErrorEvent> error2 = this.Error;
if (error2 != null) {
CssErrorEvent e = new CssErrorEvent(error, position);
error2(this, e);
}
}
private CssToken Data(char current)
{
_position = GetCurrentPosition();
switch (current) {
case '\t':
case '\n':
case '':
case '\r':
case ' ':
return NewWhitespace(current);
case '"':
return StringDQ();
case '#':
if (!_valueMode)
return HashStart();
return ColorLiteral();
case '$':
current = GetNext();
if (current == '=')
return NewMatch(CombinatorSymbols.Ends);
return NewDelimiter(GetPrevious());
case '\'':
return StringSQ();
case '(':
return NewOpenRound();
case ')':
return NewCloseRound();
case '*':
current = GetNext();
if (current == '=')
return NewMatch(CombinatorSymbols.InText);
return NewDelimiter(GetPrevious());
case '+': {
char next3 = GetNext();
if (next3 != '') {
char next4 = GetNext();
Back(2);
if (next3.IsDigit() || (next3 == '.' && next4.IsDigit()))
return NumberStart(current);
} else
Back();
return NewDelimiter(current);
}
case ',':
return NewComma();
case '.':
if (GetNext().IsDigit())
return NumberStart(GetPrevious());
return NewDelimiter(GetPrevious());
case '-': {
char next = GetNext();
if (next != '') {
char next2 = GetNext();
Back(2);
if (next.IsDigit() || (next == '.' && next2.IsDigit()))
return NumberStart(current);
if (next.IsNameStart())
return IdentStart(current);
if (next == '\\' && !next2.IsLineBreak() && next2 != '')
return IdentStart(current);
if (next == '-' && next2 == '>') {
Advance(2);
return NewCloseComment();
}
} else
Back();
return NewDelimiter(current);
}
case '/':
current = GetNext();
if (current == '*')
return Comment();
return NewDelimiter(GetPrevious());
case '\\':
current = GetNext();
if (current.IsLineBreak()) {
RaiseErrorOccurred(CssParseError.LineBreakUnexpected);
return NewDelimiter(GetPrevious());
}
if (current == '') {
RaiseErrorOccurred(CssParseError.EOF);
return NewDelimiter(GetPrevious());
}
return IdentStart(GetPrevious());
case ':':
return NewColon();
case ';':
return NewSemicolon();
case '<':
current = GetNext();
if (current == '!') {
current = GetNext();
if (current == '-') {
current = GetNext();
if (current == '-')
return NewOpenComment();
current = GetPrevious();
}
current = GetPrevious();
}
return NewDelimiter(GetPrevious());
case '@':
return AtKeywordStart();
case '[':
return NewOpenSquare();
case ']':
return NewCloseSquare();
case '^':
current = GetNext();
if (current == '=')
return NewMatch(CombinatorSymbols.Begins);
return NewDelimiter(GetPrevious());
case '{':
return NewOpenCurly();
case '}':
return NewCloseCurly();
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return NumberStart(current);
case 'U':
case 'u':
current = GetNext();
if (current == '+') {
current = GetNext();
if (current.IsHex() || current == '?')
return UnicodeRange(current);
current = GetPrevious();
}
return IdentStart(GetPrevious());
case '|':
current = GetNext();
switch (current) {
case '=':
return NewMatch(CombinatorSymbols.InToken);
case '|':
return NewColumn();
default:
return NewDelimiter(GetPrevious());
}
case '~':
current = GetNext();
if (current == '=')
return NewMatch(CombinatorSymbols.InList);
return NewDelimiter(GetPrevious());
case '':
return NewEof();
case '!':
current = GetNext();
if (current == '=')
return NewMatch(CombinatorSymbols.Unlike);
return NewDelimiter(GetPrevious());
default:
if (current.IsNameStart())
return IdentStart(current);
return NewDelimiter(current);
}
}
private CssToken StringDQ()
{
while (true) {
char next = GetNext();
switch (next) {
case '"':
case '':
return NewString(FlushBuffer(), '"', false);
case '\n':
case '':
RaiseErrorOccurred(CssParseError.LineBreakUnexpected);
Back();
return NewString(FlushBuffer(), '"', true);
case '\\':
next = GetNext();
if (next.IsLineBreak())
base.StringBuffer.AppendLine();
else {
if (next == '') {
RaiseErrorOccurred(CssParseError.EOF);
Back();
return NewString(FlushBuffer(), '"', true);
}
base.StringBuffer.Append(ConsumeEscape(next));
}
break;
default:
base.StringBuffer.Append(next);
break;
}
}
}
private CssToken StringSQ()
{
while (true) {
char next = GetNext();
switch (next) {
case '\'':
case '':
return NewString(FlushBuffer(), '\'', false);
case '\n':
case '':
RaiseErrorOccurred(CssParseError.LineBreakUnexpected);
Back();
return NewString(FlushBuffer(), '\'', true);
case '\\':
next = GetNext();
if (next.IsLineBreak())
base.StringBuffer.AppendLine();
else {
if (next == '') {
RaiseErrorOccurred(CssParseError.EOF);
Back();
return NewString(FlushBuffer(), '\'', true);
}
base.StringBuffer.Append(ConsumeEscape(next));
}
break;
default:
base.StringBuffer.Append(next);
break;
}
}
}
private CssToken ColorLiteral()
{
char next = GetNext();
while (next.IsHex()) {
base.StringBuffer.Append(next);
next = GetNext();
}
Back();
return NewColor(FlushBuffer());
}
private CssToken HashStart()
{
char next = GetNext();
if (next.IsNameStart()) {
base.StringBuffer.Append(next);
return HashRest();
}
if (IsValidEscape(next)) {
next = GetNext();
base.StringBuffer.Append(ConsumeEscape(next));
return HashRest();
}
if (next == '\\') {
RaiseErrorOccurred(CssParseError.InvalidCharacter);
Back();
return NewDelimiter('#');
}
Back();
return NewDelimiter('#');
}
private CssToken HashRest()
{
char next;
while (true) {
next = GetNext();
if (next.IsName())
base.StringBuffer.Append(next);
else {
if (!IsValidEscape(next))
break;
next = GetNext();
base.StringBuffer.Append(ConsumeEscape(next));
}
}
if (next == '\\') {
RaiseErrorOccurred(CssParseError.InvalidCharacter);
Back();
return NewHash(FlushBuffer());
}
Back();
return NewHash(FlushBuffer());
}
private CssToken Comment()
{
char next = GetNext();
while (true) {
switch (next) {
case '*':
next = GetNext();
if (next == '/')
return NewComment(FlushBuffer(), false);
base.StringBuffer.Append('*');
break;
default:
base.StringBuffer.Append(next);
next = GetNext();
break;
case '':
RaiseErrorOccurred(CssParseError.EOF);
return NewComment(FlushBuffer(), true);
}
}
}
private CssToken AtKeywordStart()
{
char next = GetNext();
if (next == '-') {
next = GetNext();
if (next.IsNameStart() || IsValidEscape(next)) {
base.StringBuffer.Append('-');
return AtKeywordRest(next);
}
Back(2);
return NewDelimiter('@');
}
if (next.IsNameStart()) {
base.StringBuffer.Append(next);
return AtKeywordRest(GetNext());
}
if (IsValidEscape(next)) {
next = GetNext();
base.StringBuffer.Append(ConsumeEscape(next));
return AtKeywordRest(GetNext());
}
Back();
return NewDelimiter('@');
}
private CssToken AtKeywordRest(char current)
{
while (true) {
if (current.IsName())
base.StringBuffer.Append(current);
else {
if (!IsValidEscape(current))
break;
current = GetNext();
base.StringBuffer.Append(ConsumeEscape(current));
}
current = GetNext();
}
Back();
return NewAtKeyword(FlushBuffer());
}
private CssToken IdentStart(char current)
{
if (current == '-') {
current = GetNext();
if (current.IsNameStart() || IsValidEscape(current)) {
base.StringBuffer.Append('-');
return IdentRest(current);
}
Back();
return NewDelimiter('-');
}
if (current.IsNameStart()) {
base.StringBuffer.Append(current);
return IdentRest(GetNext());
}
if (current == '\\' && IsValidEscape(current)) {
current = GetNext();
base.StringBuffer.Append(ConsumeEscape(current));
return IdentRest(GetNext());
}
return Data(current);
}
private CssToken IdentRest(char current)
{
while (true) {
if (current.IsName())
base.StringBuffer.Append(current);
else {
if (!IsValidEscape(current))
break;
current = GetNext();
base.StringBuffer.Append(ConsumeEscape(current));
}
current = GetNext();
}
if (current == '(') {
string text = FlushBuffer();
if (text.GetTypeFromName() != CssTokenType.Function)
return UrlStart(text);
return NewFunction(text);
}
Back();
return NewIdent(FlushBuffer());
}
private CssToken TransformFunctionWhitespace(char current)
{
do {
current = GetNext();
if (current == '(') {
Back();
return NewFunction(FlushBuffer());
}
} while (current.IsSpaceCharacter());
Back(2);
return NewIdent(FlushBuffer());
}
private CssToken NumberStart(char current)
{
while (true) {
if (current.IsOneOf('+', '-')) {
base.StringBuffer.Append(current);
current = GetNext();
if (current == '.') {
base.StringBuffer.Append(current);
base.StringBuffer.Append(GetNext());
return NumberFraction();
}
base.StringBuffer.Append(current);
return NumberRest();
}
if (current == '.') {
base.StringBuffer.Append(current);
base.StringBuffer.Append(GetNext());
return NumberFraction();
}
if (current.IsDigit())
break;
current = GetNext();
}
base.StringBuffer.Append(current);
return NumberRest();
}
private CssToken NumberRest()
{
char next = GetNext();
while (next.IsDigit()) {
base.StringBuffer.Append(next);
next = GetNext();
}
if (next.IsNameStart()) {
string number = FlushBuffer();
base.StringBuffer.Append(next);
return Dimension(number);
}
if (!IsValidEscape(next)) {
switch (next) {
case '.':
next = GetNext();
if (next.IsDigit()) {
base.StringBuffer.Append('.').Append(next);
return NumberFraction();
}
Back();
return NewNumber(FlushBuffer());
case '%':
return NewPercentage(FlushBuffer());
case 'E':
case 'e':
return NumberExponential(next);
case '-':
return NumberDash();
default:
Back();
return NewNumber(FlushBuffer());
}
}
next = GetNext();
string number2 = FlushBuffer();
base.StringBuffer.Append(ConsumeEscape(next));
return Dimension(number2);
}
private CssToken NumberFraction()
{
char next = GetNext();
while (next.IsDigit()) {
base.StringBuffer.Append(next);
next = GetNext();
}
if (next.IsNameStart()) {
string number = FlushBuffer();
base.StringBuffer.Append(next);
return Dimension(number);
}
if (!IsValidEscape(next)) {
switch (next) {
case 'E':
case 'e':
return NumberExponential(next);
case '%':
return NewPercentage(FlushBuffer());
case '-':
return NumberDash();
default:
Back();
return NewNumber(FlushBuffer());
}
}
next = GetNext();
string number2 = FlushBuffer();
base.StringBuffer.Append(ConsumeEscape(next));
return Dimension(number2);
}
private CssToken Dimension(string number)
{
while (true) {
char next = GetNext();
if (next.IsLetter())
base.StringBuffer.Append(next);
else {
if (!IsValidEscape(next))
break;
next = GetNext();
base.StringBuffer.Append(ConsumeEscape(next));
}
}
Back();
return NewDimension(number, FlushBuffer());
}
private CssToken SciNotation()
{
while (true) {
char next = GetNext();
if (!next.IsDigit())
break;
base.StringBuffer.Append(next);
}
Back();
return NewNumber(FlushBuffer());
}
private CssToken UrlStart(string functionName)
{
char c = SkipSpaces();
switch (c) {
case '':
RaiseErrorOccurred(CssParseError.EOF);
return NewUrl(functionName, string.Empty, true);
case '"':
return UrlDQ(functionName);
case '\'':
return UrlSQ(functionName);
case ')':
return NewUrl(functionName, string.Empty, false);
default:
return UrlUQ(c, functionName);
}
}
private CssToken UrlDQ(string functionName)
{
while (true) {
char next = GetNext();
if (next.IsLineBreak()) {
RaiseErrorOccurred(CssParseError.LineBreakUnexpected);
return UrlBad(functionName);
}
if ('' == next)
break;
switch (next) {
case '"':
return UrlEnd(functionName);
default:
base.StringBuffer.Append(next);
break;
case '\\':
next = GetNext();
if (next == '') {
Back(2);
RaiseErrorOccurred(CssParseError.EOF);
return NewUrl(functionName, FlushBuffer(), true);
}
if (next.IsLineBreak())
base.StringBuffer.AppendLine();
else
base.StringBuffer.Append(ConsumeEscape(next));
break;
}
}
return NewUrl(functionName, FlushBuffer(), false);
}
private CssToken UrlSQ(string functionName)
{
while (true) {
char next = GetNext();
if (next.IsLineBreak())
break;
switch (next) {
case '':
return NewUrl(functionName, FlushBuffer(), false);
case '\'':
return UrlEnd(functionName);
default:
base.StringBuffer.Append(next);
break;
case '\\':
next = GetNext();
if (next == '') {
Back(2);
RaiseErrorOccurred(CssParseError.EOF);
return NewUrl(functionName, FlushBuffer(), true);
}
if (next.IsLineBreak())
base.StringBuffer.AppendLine();
else
base.StringBuffer.Append(ConsumeEscape(next));
break;
}
}
RaiseErrorOccurred(CssParseError.LineBreakUnexpected);
return UrlBad(functionName);
}
private CssToken UrlUQ(char current, string functionName)
{
while (true) {
if (current.IsSpaceCharacter())
return UrlEnd(functionName);
if (current.IsOneOf(')', ''))
return NewUrl(functionName, FlushBuffer(), false);
if (current.IsOneOf('"', '\'', '(') || current.IsNonPrintable()) {
RaiseErrorOccurred(CssParseError.InvalidCharacter);
return UrlBad(functionName);
}
if (current != '\\')
base.StringBuffer.Append(current);
else {
if (!IsValidEscape(current))
break;
current = GetNext();
base.StringBuffer.Append(ConsumeEscape(current));
}
current = GetNext();
}
RaiseErrorOccurred(CssParseError.InvalidCharacter);
return UrlBad(functionName);
}
private CssToken UrlEnd(string functionName)
{
char next;
do {
next = GetNext();
if (next == ')')
return NewUrl(functionName, FlushBuffer(), false);
} while (next.IsSpaceCharacter());
RaiseErrorOccurred(CssParseError.InvalidCharacter);
Back();
return UrlBad(functionName);
}
private CssToken UrlBad(string functionName)
{
char c = base.Current;
int num = 0;
int num2 = 1;
while (true) {
switch (c) {
case ';':
Back();
return NewUrl(functionName, FlushBuffer(), true);
case '}':
if (--num == -1) {
Back();
return NewUrl(functionName, FlushBuffer(), true);
}
break;
case '':
RaiseErrorOccurred(CssParseError.EOF);
return NewUrl(functionName, FlushBuffer(), true);
}
if (c == ')' && --num2 == 0)
break;
if (IsValidEscape(c)) {
c = GetNext();
base.StringBuffer.Append(ConsumeEscape(c));
} else {
if (c == '(')
num2++;
else if (num == 123) {
num++;
}
base.StringBuffer.Append(c);
}
c = GetNext();
}
return NewUrl(functionName, FlushBuffer(), true);
}
private CssToken UnicodeRange(char current)
{
for (int i = 0; i < 6; i++) {
if (!current.IsHex())
break;
base.StringBuffer.Append(current);
current = GetNext();
}
if (base.StringBuffer.Length != 6) {
for (int j = 0; j < 6 - base.StringBuffer.Length; j++) {
if (current != '?') {
current = GetPrevious();
break;
}
base.StringBuffer.Append(current);
current = GetNext();
}
return NewRange(FlushBuffer());
}
if (current == '-') {
current = GetNext();
if (current.IsHex()) {
string start = FlushBuffer();
for (int k = 0; k < 6; k++) {
if (!current.IsHex()) {
current = GetPrevious();
break;
}
base.StringBuffer.Append(current);
current = GetNext();
}
string end = FlushBuffer();
return NewRange(start, end);
}
Back(2);
return NewRange(FlushBuffer());
}
Back();
return NewRange(FlushBuffer());
}
private CssToken NewMatch(string match)
{
return new CssToken(CssTokenType.Match, match, _position);
}
private CssToken NewColumn()
{
return new CssToken(CssTokenType.Column, CombinatorSymbols.Column, _position);
}
private CssToken NewCloseCurly()
{
return new CssToken(CssTokenType.CurlyBracketClose, "}", _position);
}
private CssToken NewOpenCurly()
{
return new CssToken(CssTokenType.CurlyBracketOpen, "{", _position);
}
private CssToken NewCloseSquare()
{
return new CssToken(CssTokenType.SquareBracketClose, "]", _position);
}
private CssToken NewOpenSquare()
{
return new CssToken(CssTokenType.SquareBracketOpen, "[", _position);
}
private CssToken NewOpenComment()
{
return new CssToken(CssTokenType.Cdo, "<!--", _position);
}
private CssToken NewSemicolon()
{
return new CssToken(CssTokenType.Semicolon, ";", _position);
}
private CssToken NewColon()
{
return new CssToken(CssTokenType.Colon, ":", _position);
}
private CssToken NewCloseComment()
{
return new CssToken(CssTokenType.Cdc, "-->", _position);
}
private CssToken NewComma()
{
return new CssToken(CssTokenType.Comma, ",", _position);
}
private CssToken NewCloseRound()
{
return new CssToken(CssTokenType.RoundBracketClose, ")", _position);
}
private CssToken NewOpenRound()
{
return new CssToken(CssTokenType.RoundBracketOpen, "(", _position);
}
private CssToken NewString(string value, char quote, bool bad = false)
{
return new CssStringToken(value, bad, quote, _position);
}
private CssToken NewHash(string value)
{
return new CssKeywordToken(CssTokenType.Hash, value, _position);
}
private CssToken NewComment(string value, bool bad = false)
{
return new CssCommentToken(value, bad, _position);
}
private CssToken NewAtKeyword(string value)
{
return new CssKeywordToken(CssTokenType.AtKeyword, value, _position);
}
private CssToken NewIdent(string value)
{
return new CssKeywordToken(CssTokenType.Ident, value, _position);
}
private CssToken NewFunction(string value)
{
CssFunctionToken cssFunctionToken = new CssFunctionToken(value, _position);
CssToken cssToken = Get();
while (cssToken.Type != CssTokenType.EndOfFile) {
cssFunctionToken.AddArgumentToken(cssToken);
if (cssToken.Type == CssTokenType.RoundBracketClose)
break;
cssToken = Get();
}
return cssFunctionToken;
}
private CssToken NewPercentage(string value)
{
return new CssUnitToken(CssTokenType.Percentage, value, "%", _position);
}
private CssToken NewDimension(string value, string unit)
{
return new CssUnitToken(CssTokenType.Dimension, value, unit, _position);
}
private CssToken NewUrl(string functionName, string data, bool bad = false)
{
return new CssUrlToken(functionName, data, bad, _position);
}
private CssToken NewRange(string range)
{
return new CssRangeToken(range, _position);
}
private CssToken NewRange(string start, string end)
{
return new CssRangeToken(start, end, _position);
}
private CssToken NewWhitespace(char c)
{
return new CssToken(CssTokenType.Whitespace, c.ToString(), _position);
}
private CssToken NewNumber(string number)
{
return new CssNumberToken(number, _position);
}
private CssToken NewDelimiter(char c)
{
return new CssToken(CssTokenType.Delim, c.ToString(), _position);
}
private CssToken NewColor(string text)
{
return new CssColorToken(text, _position);
}
private CssToken NewEof()
{
return new CssToken(CssTokenType.EndOfFile, string.Empty, _position);
}
private CssToken NumberExponential(char letter)
{
char next = GetNext();
if (next.IsDigit()) {
base.StringBuffer.Append(letter).Append(next);
return SciNotation();
}
if (next == '+' || next == '-') {
char value = next;
next = GetNext();
if (next.IsDigit()) {
base.StringBuffer.Append(letter).Append(value).Append(next);
return SciNotation();
}
Back();
}
string number = FlushBuffer();
base.StringBuffer.Append(letter);
Back();
return Dimension(number);
}
private CssToken NumberDash()
{
char next = GetNext();
if (next.IsNameStart()) {
string number = FlushBuffer();
base.StringBuffer.Append('-').Append(next);
return Dimension(number);
}
if (IsValidEscape(next)) {
next = GetNext();
string number2 = FlushBuffer();
base.StringBuffer.Append('-').Append(ConsumeEscape(next));
return Dimension(number2);
}
Back(2);
return NewNumber(FlushBuffer());
}
private string ConsumeEscape(char current)
{
if (current.IsHex()) {
bool flag = true;
char[] array = new char[6];
int num = 0;
while (flag && num < array.Length) {
array[num++] = current;
current = GetNext();
flag = current.IsHex();
}
if (!current.IsSpaceCharacter())
Back();
int num3 = int.Parse(new string(array, 0, num), NumberStyles.HexNumber);
if (!num3.IsInvalid())
return num3.ConvertFromUtf32();
current = '�';
}
return current.ToString();
}
private bool IsValidEscape(char current)
{
if (current == '\\') {
current = GetNext();
Back();
if (current != '')
return !current.IsLineBreak();
return false;
}
return false;
}
private void RaiseErrorOccurred(CssParseError code)
{
RaiseErrorOccurred(code, GetCurrentPosition());
}
}
}