AngleSharp by Florian Rappl

<PackageReference Include="AngleSharp" Version="0.9.1" />

 CssTokenizer

sealed class CssTokenizer : BaseTokenizer
The CSS tokenizer. See http://dev.w3.org/csswg/css-syntax/#tokenization for more details.
using AngleSharp.Events; using AngleSharp.Extensions; using System.Diagnostics; using System.Globalization; namespace AngleSharp.Parser.Css { [DebuggerStepThrough] internal sealed class CssTokenizer : BaseTokenizer { private bool _valueMode; private TextPosition _position; public bool IsInValue { get { return _valueMode; } set { _valueMode = value; } } public CssTokenizer(TextSource source, IEventAggregator events) : base(source, events) { _valueMode = false; } public CssToken Get() { char next = GetNext(); _position = GetCurrentPosition(); return Data(next); } public void RaiseErrorOccurred(CssParseError error, TextPosition position) { if (_events != null) { CssParseErrorEvent data = new CssParseErrorEvent(error.GetCode(), error.GetMessage(), position); _events.Publish(data); } } public void RaiseErrorOccurred(CssParseError code) { RaiseErrorOccurred(code, GetCurrentPosition()); } private CssToken Data(char current) { _position = GetCurrentPosition(); switch (current) { case '\t': case '\n': case ' ': case '\r': case ' ': return NewWhitespace(current); case '"': return StringDQ(); case '#': if (!_valueMode) return HashStart(); return ColorLiteral(); case '$': current = GetNext(); if (current == '=') return NewSuffix(); return NewDelimiter(GetPrevious()); case '\'': return StringSQ(); case '(': return NewOpenRound(); case ')': return NewCloseRound(); case '*': current = GetNext(); if (current == '=') return NewSubstring(); return NewDelimiter(GetPrevious()); case '+': { char next4 = GetNext(); if (next4 != '') { char next5 = GetNext(); Back(2); if (next4.IsDigit() || (next4 == '.' && next5.IsDigit())) return NumberStart(current); } else Back(); return NewDelimiter(current); } case ',': return NewComma(); case '.': { char next = GetNext(); if (next.IsDigit()) return NumberStart(GetPrevious()); return NewDelimiter(GetPrevious()); } case '-': { char next2 = GetNext(); if (next2 != '') { char next3 = GetNext(); Back(2); if (next2.IsDigit() || (next2 == '.' && next3.IsDigit())) return NumberStart(current); if (next2.IsNameStart()) return IdentStart(current); if (next2 == '\\' && !next3.IsLineBreak() && next3 != '') return IdentStart(current); if (next2 == '-' && next3 == '>') { Advance(2); return NewCloseComment(); } } else Back(); return NewDelimiter(current); } case '/': current = GetNext(); if (current == '*') return Comment(); return NewDelimiter(GetPrevious()); case '\\': current = GetNext(); if (current.IsLineBreak()) { RaiseErrorOccurred(CssParseError.LineBreakUnexpected); return NewDelimiter(GetPrevious()); } if (current == '') { RaiseErrorOccurred(CssParseError.EOF); return NewDelimiter(GetPrevious()); } return IdentStart(GetPrevious()); case ':': return NewColon(); case ';': return NewSemicolon(); case '<': current = GetNext(); if (current == '!') { current = GetNext(); if (current == '-') { current = GetNext(); if (current == '-') return NewOpenComment(); current = GetPrevious(); } current = GetPrevious(); } return NewDelimiter(GetPrevious()); case '@': return AtKeywordStart(); case '[': return NewOpenSquare(); case ']': return NewCloseSquare(); case '^': current = GetNext(); if (current == '=') return NewPrefix(); return NewDelimiter(GetPrevious()); case '{': return NewOpenCurly(); case '}': return NewCloseCurly(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return NumberStart(current); case 'U': case 'u': current = GetNext(); if (current == '+') { current = GetNext(); if (current.IsHex() || current == '?') return UnicodeRange(current); current = GetPrevious(); } return IdentStart(GetPrevious()); case '|': current = GetNext(); switch (current) { case '=': return NewDash(); case '|': return NewColumn(); default: return NewDelimiter(GetPrevious()); } case '~': current = GetNext(); if (current == '=') return NewInclude(); return NewDelimiter(GetPrevious()); case '': return NewEof(); case '!': current = GetNext(); if (current == '=') return NewNot(); return NewDelimiter(GetPrevious()); default: if (current.IsNameStart()) return IdentStart(current); return NewDelimiter(current); } } private CssToken StringDQ() { while (true) { char next = GetNext(); switch (next) { case '': case '"': return NewString(FlushBuffer(), false); case '\n': case ' ': RaiseErrorOccurred(CssParseError.LineBreakUnexpected); Back(); return NewString(FlushBuffer(), true); case '\\': next = GetNext(); if (next.IsLineBreak()) _stringBuffer.AppendLine(); else { if (next == '') { RaiseErrorOccurred(CssParseError.EOF); Back(); return NewString(FlushBuffer(), true); } _stringBuffer.Append(ConsumeEscape(next)); } break; default: _stringBuffer.Append(next); break; } } } private CssToken StringSQ() { while (true) { char next = GetNext(); switch (next) { case '': case '\'': return NewString(FlushBuffer(), false); case '\n': case ' ': RaiseErrorOccurred(CssParseError.LineBreakUnexpected); Back(); return NewString(FlushBuffer(), true); case '\\': next = GetNext(); if (next.IsLineBreak()) _stringBuffer.AppendLine(); else { if (next == '') { RaiseErrorOccurred(CssParseError.EOF); Back(); return NewString(FlushBuffer(), true); } _stringBuffer.Append(ConsumeEscape(next)); } break; default: _stringBuffer.Append(next); break; } } } private CssToken ColorLiteral() { char next = GetNext(); while (next.IsHex()) { _stringBuffer.Append(next); next = GetNext(); } Back(); return NewColor(FlushBuffer()); } private CssToken HashStart() { char next = GetNext(); if (next.IsNameStart()) { _stringBuffer.Append(next); return HashRest(); } if (IsValidEscape(next)) { next = GetNext(); _stringBuffer.Append(ConsumeEscape(next)); return HashRest(); } if (next == '\\') { RaiseErrorOccurred(CssParseError.InvalidCharacter); Back(); return NewDelimiter('#'); } Back(); return NewDelimiter('#'); } private CssToken HashRest() { char next; while (true) { next = GetNext(); if (next.IsName()) _stringBuffer.Append(next); else { if (!IsValidEscape(next)) break; next = GetNext(); _stringBuffer.Append(ConsumeEscape(next)); } } if (next == '\\') { RaiseErrorOccurred(CssParseError.InvalidCharacter); Back(); return NewHash(FlushBuffer()); } Back(); return NewHash(FlushBuffer()); } private CssToken Comment() { char next = GetNext(); while (true) { switch (next) { case '*': next = GetNext(); if (next == '/') return NewComment(FlushBuffer(), false); _stringBuffer.Append('*'); break; default: _stringBuffer.Append(next); next = GetNext(); break; case '': RaiseErrorOccurred(CssParseError.EOF); return NewComment(FlushBuffer(), true); } } } private CssToken AtKeywordStart() { char next = GetNext(); if (next == '-') { next = GetNext(); if (next.IsNameStart() || IsValidEscape(next)) { _stringBuffer.Append('-'); return AtKeywordRest(next); } Back(2); return NewDelimiter('@'); } if (next.IsNameStart()) { _stringBuffer.Append(next); return AtKeywordRest(GetNext()); } if (IsValidEscape(next)) { next = GetNext(); _stringBuffer.Append(ConsumeEscape(next)); return AtKeywordRest(GetNext()); } Back(); return NewDelimiter('@'); } private CssToken AtKeywordRest(char current) { while (true) { if (current.IsName()) _stringBuffer.Append(current); else { if (!IsValidEscape(current)) break; current = GetNext(); _stringBuffer.Append(ConsumeEscape(current)); } current = GetNext(); } Back(); return NewAtKeyword(FlushBuffer()); } private CssToken IdentStart(char current) { if (current == '-') { current = GetNext(); if (current.IsNameStart() || IsValidEscape(current)) { _stringBuffer.Append('-'); return IdentRest(current); } Back(); return NewDelimiter('-'); } if (current.IsNameStart()) { _stringBuffer.Append(current); return IdentRest(GetNext()); } if (current == '\\' && IsValidEscape(current)) { current = GetNext(); _stringBuffer.Append(ConsumeEscape(current)); return IdentRest(GetNext()); } return Data(current); } private CssToken IdentRest(char current) { while (true) { if (current.IsName()) _stringBuffer.Append(current); else { if (!IsValidEscape(current)) break; current = GetNext(); _stringBuffer.Append(ConsumeEscape(current)); } current = GetNext(); } if (current == '(') { string text = FlushBuffer(); CssTokenType typeFromName = text.GetTypeFromName(); CssTokenType cssTokenType = typeFromName; if (cssTokenType == CssTokenType.Function) return NewFunction(text); return UrlStart(typeFromName); } Back(); return NewIdent(FlushBuffer()); } private CssToken TransformFunctionWhitespace(char current) { do { current = GetNext(); if (current == '(') { Back(); return NewFunction(FlushBuffer()); } } while (current.IsSpaceCharacter()); Back(2); return NewIdent(FlushBuffer()); } private CssToken NumberStart(char current) { while (true) { switch (current) { case '+': case '-': _stringBuffer.Append(current); current = GetNext(); if (current == '.') { _stringBuffer.Append(current); _stringBuffer.Append(GetNext()); return NumberFraction(); } _stringBuffer.Append(current); return NumberRest(); case '.': _stringBuffer.Append(current); _stringBuffer.Append(GetNext()); return NumberFraction(); } if (current.IsDigit()) break; current = GetNext(); } _stringBuffer.Append(current); return NumberRest(); } private CssToken NumberRest() { char next = GetNext(); while (next.IsDigit()) { _stringBuffer.Append(next); next = GetNext(); } if (next.IsNameStart()) { string number = FlushBuffer(); _stringBuffer.Append(next); return Dimension(number); } if (!IsValidEscape(next)) { switch (next) { case '.': next = GetNext(); if (next.IsDigit()) { _stringBuffer.Append('.').Append(next); return NumberFraction(); } Back(); return NewNumber(FlushBuffer()); case '%': return NewPercentage(FlushBuffer()); case 'E': case 'e': return NumberExponential(next); case '-': return NumberDash(); default: Back(); return NewNumber(FlushBuffer()); } } next = GetNext(); string number2 = FlushBuffer(); _stringBuffer.Append(ConsumeEscape(next)); return Dimension(number2); } private CssToken NumberFraction() { char next = GetNext(); while (next.IsDigit()) { _stringBuffer.Append(next); next = GetNext(); } if (next.IsNameStart()) { string number = FlushBuffer(); _stringBuffer.Append(next); return Dimension(number); } if (!IsValidEscape(next)) { switch (next) { case 'E': case 'e': return NumberExponential(next); case '%': return NewPercentage(FlushBuffer()); case '-': return NumberDash(); default: Back(); return NewNumber(FlushBuffer()); } } next = GetNext(); string number2 = FlushBuffer(); _stringBuffer.Append(ConsumeEscape(next)); return Dimension(number2); } private CssToken Dimension(string number) { while (true) { char next = GetNext(); if (next.IsLetter()) _stringBuffer.Append(next); else { if (!IsValidEscape(next)) break; next = GetNext(); _stringBuffer.Append(ConsumeEscape(next)); } } Back(); return NewDimension(number, FlushBuffer()); } private CssToken SciNotation() { while (true) { char next = GetNext(); if (!next.IsDigit()) break; _stringBuffer.Append(next); } Back(); return NewNumber(FlushBuffer()); } private CssToken UrlStart(CssTokenType type) { char c = SkipSpaces(); switch (c) { case '': RaiseErrorOccurred(CssParseError.EOF); return NewUrl(type, string.Empty, true); case '"': return UrlDQ(type); case '\'': return UrlSQ(type); case ')': return NewUrl(type, string.Empty, false); default: return UrlUQ(c, type); } } private CssToken UrlDQ(CssTokenType type) { while (true) { char next = GetNext(); if (next.IsLineBreak()) { RaiseErrorOccurred(CssParseError.LineBreakUnexpected); return UrlBad(type); } if ('' == next) break; switch (next) { case '"': return UrlEnd(type); default: _stringBuffer.Append(next); break; case '\\': next = GetNext(); if (next == '') { Back(2); RaiseErrorOccurred(CssParseError.EOF); return NewUrl(type, FlushBuffer(), true); } if (next.IsLineBreak()) _stringBuffer.AppendLine(); else _stringBuffer.Append(ConsumeEscape(next)); break; } } return NewUrl(type, FlushBuffer(), false); } private CssToken UrlSQ(CssTokenType type) { while (true) { char next = GetNext(); if (next.IsLineBreak()) { RaiseErrorOccurred(CssParseError.LineBreakUnexpected); return UrlBad(type); } if ('' == next) break; switch (next) { case '\'': return UrlEnd(type); default: _stringBuffer.Append(next); break; case '\\': next = GetNext(); if (next == '') { Back(2); RaiseErrorOccurred(CssParseError.EOF); return NewUrl(type, FlushBuffer(), true); } if (next.IsLineBreak()) _stringBuffer.AppendLine(); else _stringBuffer.Append(ConsumeEscape(next)); break; } } return NewUrl(type, FlushBuffer(), false); } private CssToken UrlUQ(char current, CssTokenType type) { while (true) { if (current.IsSpaceCharacter()) return UrlEnd(type); switch (current) { case '': case ')': return NewUrl(type, FlushBuffer(), false); default: if (!current.IsNonPrintable()) break; goto case '"'; case '"': case '\'': case '(': RaiseErrorOccurred(CssParseError.InvalidCharacter); return UrlBad(type); } if (current != '\\') _stringBuffer.Append(current); else { if (!IsValidEscape(current)) break; current = GetNext(); _stringBuffer.Append(ConsumeEscape(current)); } current = GetNext(); } RaiseErrorOccurred(CssParseError.InvalidCharacter); return UrlBad(type); } private CssToken UrlEnd(CssTokenType type) { char next; do { next = GetNext(); if (next == ')') return NewUrl(type, FlushBuffer(), false); } while (next.IsSpaceCharacter()); RaiseErrorOccurred(CssParseError.InvalidCharacter); Back(); return UrlBad(type); } private CssToken UrlBad(CssTokenType type) { char c = base.Current; int num = 0; int num2 = 1; while (true) { switch (c) { case ';': Back(); return NewUrl(type, FlushBuffer(), true); case '}': if (--num == -1) { Back(); return NewUrl(type, FlushBuffer(), true); } break; case '': RaiseErrorOccurred(CssParseError.EOF); return NewUrl(type, FlushBuffer(), true); } if (c == ')' && --num2 == 0) break; if (IsValidEscape(c)) { c = GetNext(); _stringBuffer.Append(ConsumeEscape(c)); } else { if (c == '(') num2++; else if (num == 123) { num++; } _stringBuffer.Append(c); } c = GetNext(); } return NewUrl(type, FlushBuffer(), true); } private CssToken UnicodeRange(char current) { for (int i = 0; i < 6; i++) { if (!current.IsHex()) break; _stringBuffer.Append(current); current = GetNext(); } if (_stringBuffer.Length != 6) { for (int j = 0; j < 6 - _stringBuffer.Length; j++) { if (current != '?') { current = GetPrevious(); break; } _stringBuffer.Append(current); current = GetNext(); } return NewRange(FlushBuffer()); } if (current == '-') { current = GetNext(); if (current.IsHex()) { string start = _stringBuffer.ToString(); _stringBuffer.Clear(); for (int k = 0; k < 6; k++) { if (!current.IsHex()) { current = GetPrevious(); break; } _stringBuffer.Append(current); current = GetNext(); } string end = FlushBuffer(); return NewRange(start, end); } Back(2); return NewRange(FlushBuffer()); } Back(); return NewRange(FlushBuffer()); } private CssToken NewNot() { return new CssToken(CssTokenType.NotMatch, "!=", _position); } private CssToken NewInclude() { return new CssToken(CssTokenType.IncludeMatch, "~=", _position); } private CssToken NewColumn() { return new CssToken(CssTokenType.Column, "||", _position); } private CssToken NewDash() { return new CssToken(CssTokenType.DashMatch, "|=", _position); } private CssToken NewCloseCurly() { return new CssToken(CssTokenType.CurlyBracketClose, "}", _position); } private CssToken NewOpenCurly() { return new CssToken(CssTokenType.CurlyBracketOpen, "{", _position); } private CssToken NewPrefix() { return new CssToken(CssTokenType.PrefixMatch, "^=", _position); } private CssToken NewCloseSquare() { return new CssToken(CssTokenType.SquareBracketClose, "]", _position); } private CssToken NewOpenSquare() { return new CssToken(CssTokenType.SquareBracketOpen, "[", _position); } private CssToken NewOpenComment() { return new CssToken(CssTokenType.Cdo, "<!--", _position); } private CssToken NewSemicolon() { return new CssToken(CssTokenType.Semicolon, ";", _position); } private CssToken NewColon() { return new CssToken(CssTokenType.Colon, ":", _position); } private CssToken NewCloseComment() { return new CssToken(CssTokenType.Cdc, "-->", _position); } private CssToken NewComma() { return new CssToken(CssTokenType.Comma, ",", _position); } private CssToken NewSubstring() { return new CssToken(CssTokenType.SubstringMatch, "*=", _position); } private CssToken NewCloseRound() { return new CssToken(CssTokenType.RoundBracketClose, ")", _position); } private CssToken NewOpenRound() { return new CssToken(CssTokenType.RoundBracketOpen, "(", _position); } private CssToken NewSuffix() { return new CssToken(CssTokenType.SuffixMatch, "$=", _position); } private CssToken NewString(string value, bool bad = false) { return new CssStringToken(CssTokenType.String, value, bad, _position); } private CssToken NewHash(string value) { return new CssKeywordToken(CssTokenType.Hash, value, _position); } private CssToken NewComment(string value, bool bad = false) { return new CssStringToken(CssTokenType.Comment, value, bad, _position); } private CssToken NewAtKeyword(string value) { return new CssKeywordToken(CssTokenType.AtKeyword, value, _position); } private CssToken NewIdent(string value) { return new CssKeywordToken(CssTokenType.Ident, value, _position); } private CssToken NewFunction(string value) { CssFunctionToken cssFunctionToken = new CssFunctionToken(value, _position); CssToken cssToken = Get(); while (cssToken.Type != CssTokenType.Eof) { if (cssToken.Type == CssTokenType.RoundBracketClose) { cssFunctionToken.Close(cssToken); break; } cssFunctionToken.With(cssToken); cssToken = Get(); } return cssFunctionToken; } private CssToken NewPercentage(string value) { return new CssUnitToken(CssTokenType.Percentage, value, "%", _position); } private CssToken NewDimension(string value, string unit) { return new CssUnitToken(CssTokenType.Dimension, value, unit, _position); } private CssToken NewUrl(CssTokenType type, string data, bool bad = false) { return new CssStringToken(type, data, bad, _position); } private CssToken NewRange(string range) { return new CssRangeToken(range, _position); } private CssToken NewRange(string start, string end) { return new CssRangeToken(start, end, _position); } private CssToken NewWhitespace(char c) { return new CssToken(CssTokenType.Whitespace, c.ToString(), _position); } private CssToken NewNumber(string number) { return new CssNumberToken(number, _position); } private CssToken NewDelimiter(char c) { return new CssToken(CssTokenType.Delim, c, _position); } private CssToken NewColor(string text) { bool bad = text.Length != 3 && text.Length != 6; return new CssStringToken(CssTokenType.Color, text, bad, _position); } private CssToken NewEof() { return new CssToken(CssTokenType.Eof, string.Empty, _position); } private string FlushBuffer() { string result = _stringBuffer.ToString(); _stringBuffer.Clear(); return result; } private CssToken NumberExponential(char letter) { char next = GetNext(); if (next.IsDigit()) { _stringBuffer.Append(letter).Append(next); return SciNotation(); } if (next == '+' || next == '-') { char value = next; next = GetNext(); if (next.IsDigit()) { _stringBuffer.Append(letter).Append(value).Append(next); return SciNotation(); } Back(); } string number = FlushBuffer(); _stringBuffer.Append(letter); Back(); return Dimension(number); } private CssToken NumberDash() { char next = GetNext(); if (next.IsNameStart()) { string number = FlushBuffer(); _stringBuffer.Append('-').Append(next); return Dimension(number); } if (IsValidEscape(next)) { next = GetNext(); string number2 = FlushBuffer(); _stringBuffer.Append('-').Append(ConsumeEscape(next)); return Dimension(number2); } Back(2); return NewNumber(FlushBuffer()); } private string ConsumeEscape(char current) { if (current.IsHex()) { char[] array = new char[6]; int num = 0; while (num < array.Length) { array[num++] = current; current = GetNext(); if (!current.IsHex()) break; } if (!current.IsSpaceCharacter()) Back(); int utf = int.Parse(new string(array, 0, num), NumberStyles.HexNumber); if (!utf.IsInvalid()) return utf.ConvertFromUtf32(); current = '�'; } return current.ToString(); } private bool IsValidEscape(char current) { if (current != '\\') return false; current = GetNext(); Back(); if (current == '') return false; if (current.IsLineBreak()) return false; return true; } } }