AngleSharp by Florian Rappl

<PackageReference Include="AngleSharp" Version="0.8.5" />

 CssTokenizer

sealed class CssTokenizer : BaseTokenizer
The CSS tokenizer. See http://dev.w3.org/csswg/css-syntax/#tokenization for more details.
using AngleSharp.Css; using AngleSharp.Events; using AngleSharp.Extensions; using System; using System.Collections.Generic; using System.Diagnostics; using System.Globalization; namespace AngleSharp.Parser.Css { [DebuggerStepThrough] internal sealed class CssTokenizer : BaseTokenizer { private CssParseMode _state; private TextPosition _position; public CssParseMode State { get { return _state; } set { _state = value; } } public CssTokenizer(TextSource source, IEventAggregator events) : base(source, events) { _state = CssParseMode.Data; } public CssToken Get() { char next = GetNext(); _position = GetCurrentPosition(); if (next != '') { switch (_state) { case CssParseMode.Data: return Data(next); case CssParseMode.Text: return Text(next); case CssParseMode.Selector: return Selector(next); case CssParseMode.Value: return Value(next); } } return NewEof(); } public void RaiseErrorOccurred(CssParseError code, TextPosition position) { if (_events != null) { CssParseErrorEvent data = new CssParseErrorEvent(code, position); _events.Publish(data); } } public void RaiseErrorOccurred(CssParseError code) { RaiseErrorOccurred(code, GetCurrentPosition()); } public void JumpToEndOfDeclaration() { int num = 0; char c = base.Current; while (true) { switch (c) { case '{': num++; break; default: if (num > 0 || (c != '}' && c != ';')) { if (c == '}') num--; break; } goto case ''; case '': Back(); return; } c = GetNext(); } } public void JumpToNextSemicolon() { char next = GetNext(); while (next != '' && next != ';') { next = GetNext(); } } public void JumpToClosedArguments() { int num = 0; char c = base.Current; while (true) { switch (c) { case '(': num++; break; default: if (num > 0 || c != ')') { if (c == ')') num--; break; } goto case ''; case '': Back(); return; } c = GetNext(); } } public void SkipUnknownRule() { int num = 0; char c = base.Current; while (true) { switch (c) { case '': return; case '{': num++; break; case '}': num--; break; } if (num <= 0 && (c == ';' || c == '}')) break; c = GetNext(); } } private CssToken Data(char current) { _position = GetCurrentPosition(); switch (current) { case '\t': case '\n': case ' ': case '\r': case ' ': current = SkipSpaces(); return Data(current); case '"': return StringDQ(); case '#': return HashStart(); case '$': current = GetNext(); if (current == '=') return NewSuffix(); return NewDelimiter(GetPrevious()); case '\'': return StringSQ(); case '(': return NewOpenRound(); case ')': return NewCloseRound(); case '*': current = GetNext(); if (current == '=') return NewSubstring(); return NewDelimiter(GetPrevious()); case '+': { char next4 = GetNext(); if (next4 != '') { char next5 = GetNext(); Back(2); if (next4.IsDigit() || (next4 == '.' && next5.IsDigit())) return NumberStart(current); } else Back(); return NewDelimiter(current); } case ',': return NewComma(); case '.': { char next = GetNext(); if (next.IsDigit()) return NumberStart(GetPrevious()); return NewDelimiter(GetPrevious()); } case '-': { char next2 = GetNext(); if (next2 != '') { char next3 = GetNext(); Back(2); if (next2.IsDigit() || (next2 == '.' && next3.IsDigit())) return NumberStart(current); if (next2.IsNameStart()) return IdentStart(current); if (next2 == '\\' && !next3.IsLineBreak() && next3 != '') return IdentStart(current); if (next2 == '-' && next3 == '>') { Advance(2); return NewCloseComment(); } } else Back(); return NewDelimiter(current); } case '/': current = GetNext(); if (current == '*') return Comment(); return NewDelimiter(GetPrevious()); case '\\': current = GetNext(); if (current.IsLineBreak()) { RaiseErrorOccurred(CssParseError.LineBreakUnexpected); return NewDelimiter(GetPrevious()); } if (current == '') { RaiseErrorOccurred(CssParseError.EOF); return NewDelimiter(GetPrevious()); } return IdentStart(GetPrevious()); case ':': return NewColon(); case ';': return NewSemicolon(); case '<': current = GetNext(); if (current == '!') { current = GetNext(); if (current == '-') { current = GetNext(); if (current == '-') return NewOpenComment(); current = GetPrevious(); } current = GetPrevious(); } return NewDelimiter(GetPrevious()); case '@': return AtKeywordStart(); case '[': return NewOpenSquare(); case ']': return NewCloseSquare(); case '^': current = GetNext(); if (current == '=') return NewPrefix(); return NewDelimiter(GetPrevious()); case '{': return NewOpenCurly(); case '}': return NewCloseCurly(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return NumberStart(current); case 'U': case 'u': current = GetNext(); if (current == '+') { current = GetNext(); if (current.IsHex() || current == '?') return UnicodeRange(current); current = GetPrevious(); } return IdentStart(GetPrevious()); case '|': current = GetNext(); switch (current) { case '=': return NewDash(); case '|': return NewColumn(); default: return NewDelimiter(GetPrevious()); } case '~': current = GetNext(); if (current == '=') return NewInclude(); return NewDelimiter(GetPrevious()); case '': return NewEof(); case '!': current = GetNext(); if (current == '=') return NewNot(); return NewDelimiter(GetPrevious()); default: if (current.IsNameStart()) return IdentStart(current); return NewDelimiter(current); } } private CssToken Value(char current) { switch (current) { case '\t': case '\n': case ' ': case '\r': case ' ': SkipMostSpaces(); return NewWhitespace(); case '#': return ColorLiteral(); default: return Data(current); } } private CssToken Selector(char current) { switch (current) { case '\t': case '\n': case ' ': case '\r': case ' ': SkipMostSpaces(); return NewWhitespace(); default: return Data(current); } } private CssToken Text(char current) { int num = 0; int num2 = 0; while (current != '' && (num > 0 || num2 > 0 || (current != ';' && current != '{'))) { switch (current) { case '(': num++; break; case ')': num--; break; case '[': num2++; break; case ']': num2--; break; } _stringBuffer.Append(current); current = GetNext(); } Back(); return NewIdent(FlushBuffer()); } private CssToken StringDQ() { while (true) { char next = GetNext(); switch (next) { case '': case '"': return NewString(FlushBuffer(), false); case '\n': case ' ': RaiseErrorOccurred(CssParseError.LineBreakUnexpected); Back(); return NewString(FlushBuffer(), true); case '\\': next = GetNext(); if (next.IsLineBreak()) _stringBuffer.AppendLine(); else { if (next == '') { RaiseErrorOccurred(CssParseError.EOF); Back(); return NewString(FlushBuffer(), true); } _stringBuffer.Append(ConsumeEscape(next)); } break; default: _stringBuffer.Append(next); break; } } } private CssToken StringSQ() { while (true) { char next = GetNext(); switch (next) { case '': case '\'': return NewString(FlushBuffer(), false); case '\n': case ' ': RaiseErrorOccurred(CssParseError.LineBreakUnexpected); Back(); return NewString(FlushBuffer(), true); case '\\': next = GetNext(); if (next.IsLineBreak()) _stringBuffer.AppendLine(); else { if (next == '') { RaiseErrorOccurred(CssParseError.EOF); Back(); return NewString(FlushBuffer(), true); } _stringBuffer.Append(ConsumeEscape(next)); } break; default: _stringBuffer.Append(next); break; } } } private CssToken ColorLiteral() { char next = GetNext(); while (next.IsHex()) { _stringBuffer.Append(next); next = GetNext(); } Back(); return NewColor(FlushBuffer()); } private CssToken HashStart() { char next = GetNext(); if (next.IsNameStart()) { _stringBuffer.Append(next); return HashRest(); } if (IsValidEscape(next)) { next = GetNext(); _stringBuffer.Append(ConsumeEscape(next)); return HashRest(); } if (next == '\\') { RaiseErrorOccurred(CssParseError.InvalidCharacter); Back(); return NewDelimiter('#'); } Back(); return NewDelimiter('#'); } private CssToken HashRest() { char next; while (true) { next = GetNext(); if (next.IsName()) _stringBuffer.Append(next); else { if (!IsValidEscape(next)) break; next = GetNext(); _stringBuffer.Append(ConsumeEscape(next)); } } if (next == '\\') { RaiseErrorOccurred(CssParseError.InvalidCharacter); Back(); return NewHash(FlushBuffer()); } Back(); return NewHash(FlushBuffer()); } private CssToken Comment() { char next = GetNext(); while (true) { switch (next) { case '*': next = GetNext(); if (next == '/') return Data(GetNext()); break; case '': RaiseErrorOccurred(CssParseError.EOF); return Data(next); default: next = GetNext(); break; } } } private CssToken AtKeywordStart() { char next = GetNext(); if (next == '-') { next = GetNext(); if (next.IsNameStart() || IsValidEscape(next)) { _stringBuffer.Append('-'); return AtKeywordRest(next); } Back(2); return NewDelimiter('@'); } if (next.IsNameStart()) { _stringBuffer.Append(next); return AtKeywordRest(GetNext()); } if (IsValidEscape(next)) { next = GetNext(); _stringBuffer.Append(ConsumeEscape(next)); return AtKeywordRest(GetNext()); } Back(); return NewDelimiter('@'); } private CssToken AtKeywordRest(char current) { while (true) { if (current.IsName()) _stringBuffer.Append(current); else { if (!IsValidEscape(current)) break; current = GetNext(); _stringBuffer.Append(ConsumeEscape(current)); } current = GetNext(); } Back(); return NewAtKeyword(FlushBuffer()); } private CssToken IdentStart(char current) { if (current == '-') { current = GetNext(); if (current.IsNameStart() || IsValidEscape(current)) { _stringBuffer.Append('-'); return IdentRest(current); } Back(); return NewDelimiter('-'); } if (current.IsNameStart()) { _stringBuffer.Append(current); return IdentRest(GetNext()); } if (current == '\\' && IsValidEscape(current)) { current = GetNext(); _stringBuffer.Append(ConsumeEscape(current)); return IdentRest(GetNext()); } return Data(current); } private CssToken IdentRest(char current) { while (true) { if (current.IsName()) _stringBuffer.Append(current); else { if (!IsValidEscape(current)) break; current = GetNext(); _stringBuffer.Append(ConsumeEscape(current)); } current = GetNext(); } if (current == '(') { string text = FlushBuffer(); CssTokenType cssTokenType = TypeFromName(text); CssTokenType cssTokenType2 = cssTokenType; if (cssTokenType2 == CssTokenType.Function) return NewFunction(text); return UrlStart(cssTokenType); } Back(); return NewIdent(FlushBuffer()); } private CssToken TransformFunctionWhitespace(char current) { do { current = GetNext(); if (current == '(') { Back(); return NewFunction(FlushBuffer()); } } while (current.IsSpaceCharacter()); Back(2); return NewIdent(FlushBuffer()); } private CssToken NumberStart(char current) { while (true) { switch (current) { case '+': case '-': _stringBuffer.Append(current); current = GetNext(); if (current == '.') { _stringBuffer.Append(current); _stringBuffer.Append(GetNext()); return NumberFraction(); } _stringBuffer.Append(current); return NumberRest(); case '.': _stringBuffer.Append(current); _stringBuffer.Append(GetNext()); return NumberFraction(); } if (current.IsDigit()) break; current = GetNext(); } _stringBuffer.Append(current); return NumberRest(); } private CssToken NumberRest() { char next = GetNext(); while (next.IsDigit()) { _stringBuffer.Append(next); next = GetNext(); } if (next.IsNameStart()) { string number = FlushBuffer(); _stringBuffer.Append(next); return Dimension(number); } if (!IsValidEscape(next)) { switch (next) { case '.': next = GetNext(); if (next.IsDigit()) { _stringBuffer.Append('.').Append(next); return NumberFraction(); } Back(); return NewNumber(FlushBuffer()); case '%': return NewPercentage(FlushBuffer()); case 'E': case 'e': return NumberExponential(); case '-': return NumberDash(); default: Back(); return NewNumber(FlushBuffer()); } } next = GetNext(); string number2 = FlushBuffer(); _stringBuffer.Append(ConsumeEscape(next)); return Dimension(number2); } private CssToken NumberFraction() { char next = GetNext(); while (next.IsDigit()) { _stringBuffer.Append(next); next = GetNext(); } if (next.IsNameStart()) { string number = FlushBuffer(); _stringBuffer.Append(next); return Dimension(number); } if (!IsValidEscape(next)) { switch (next) { case 'E': case 'e': return NumberExponential(); case '%': return NewPercentage(FlushBuffer()); case '-': return NumberDash(); default: Back(); return NewNumber(FlushBuffer()); } } next = GetNext(); string number2 = FlushBuffer(); _stringBuffer.Append(ConsumeEscape(next)); return Dimension(number2); } private CssToken Dimension(string number) { while (true) { char next = GetNext(); if (next.IsLetter()) _stringBuffer.Append(next); else { if (!IsValidEscape(next)) break; next = GetNext(); _stringBuffer.Append(ConsumeEscape(next)); } } Back(); return NewDimension(number, FlushBuffer()); } private CssToken SciNotation() { while (true) { char next = GetNext(); if (!next.IsDigit()) break; _stringBuffer.Append(next); } Back(); return NewNumber(FlushBuffer()); } private CssToken UrlStart(CssTokenType type) { char c = SkipSpaces(); switch (c) { case '': RaiseErrorOccurred(CssParseError.EOF); return NewUrl(type, string.Empty, true); case '"': return UrlDQ(type); case '\'': return UrlSQ(type); case ')': return NewUrl(type, string.Empty, false); default: return UrlUQ(c, type); } } private CssToken UrlDQ(CssTokenType type) { while (true) { char next = GetNext(); if (next.IsLineBreak()) { RaiseErrorOccurred(CssParseError.LineBreakUnexpected); return UrlBad(type); } if ('' == next) break; switch (next) { case '"': return UrlEnd(type); case '\\': next = GetNext(); if (next == '') { Back(2); RaiseErrorOccurred(CssParseError.EOF); return NewUrl(type, FlushBuffer(), true); } if (next.IsLineBreak()) _stringBuffer.AppendLine(); else _stringBuffer.Append(ConsumeEscape(next)); break; default: _stringBuffer.Append(next); break; } } return NewUrl(type, FlushBuffer(), false); } private CssToken UrlSQ(CssTokenType type) { while (true) { char next = GetNext(); if (next.IsLineBreak()) { RaiseErrorOccurred(CssParseError.LineBreakUnexpected); return UrlBad(type); } if ('' == next) break; switch (next) { case '\'': return UrlEnd(type); case '\\': next = GetNext(); if (next == '') { Back(2); RaiseErrorOccurred(CssParseError.EOF); return NewUrl(type, FlushBuffer(), true); } if (next.IsLineBreak()) _stringBuffer.AppendLine(); else _stringBuffer.Append(ConsumeEscape(next)); break; default: _stringBuffer.Append(next); break; } } return NewUrl(type, FlushBuffer(), false); } private CssToken UrlUQ(char current, CssTokenType type) { while (true) { if (current.IsSpaceCharacter()) return UrlEnd(type); switch (current) { case '': case ')': return NewUrl(type, FlushBuffer(), false); default: if (!current.IsNonPrintable()) break; goto case '"'; case '"': case '\'': case '(': RaiseErrorOccurred(CssParseError.InvalidCharacter); return UrlBad(type); } if (current == '\\') { if (!IsValidEscape(current)) break; current = GetNext(); _stringBuffer.Append(ConsumeEscape(current)); } else _stringBuffer.Append(current); current = GetNext(); } RaiseErrorOccurred(CssParseError.InvalidCharacter); return UrlBad(type); } private CssToken UrlEnd(CssTokenType type) { char next; do { next = GetNext(); if (next == ')') return NewUrl(type, FlushBuffer(), false); } while (next.IsSpaceCharacter()); RaiseErrorOccurred(CssParseError.InvalidCharacter); Back(); return UrlBad(type); } private CssToken UrlBad(CssTokenType type) { while (true) { char next = GetNext(); switch (next) { case '': RaiseErrorOccurred(CssParseError.EOF); return NewUrl(type, FlushBuffer(), true); case ')': return NewUrl(type, FlushBuffer(), true); } if (IsValidEscape(next)) { next = GetNext(); _stringBuffer.Append(ConsumeEscape(next)); } } } private CssToken UnicodeRange(char current) { for (int i = 0; i < 6; i++) { if (!current.IsHex()) break; _stringBuffer.Append(current); current = GetNext(); } if (_stringBuffer.Length != 6) { for (int j = 0; j < 6 - _stringBuffer.Length; j++) { if (current != '?') { current = GetPrevious(); break; } _stringBuffer.Append(current); current = GetNext(); } string text = FlushBuffer(); string start = text.Replace('?', '0'); string end = text.Replace('?', 'F'); return NewRange(start, end); } if (current == '-') { current = GetNext(); if (current.IsHex()) { string start2 = _stringBuffer.ToString(); _stringBuffer.Clear(); for (int k = 0; k < 6; k++) { if (!current.IsHex()) { current = GetPrevious(); break; } _stringBuffer.Append(current); current = GetNext(); } string end2 = FlushBuffer(); return NewRange(start2, end2); } Back(2); return NewRange(FlushBuffer(), null); } Back(); return NewRange(FlushBuffer(), null); } private CssToken NewNot() { return new CssToken(CssTokenType.NotMatch, "!=", _position); } private CssToken NewInclude() { return new CssToken(CssTokenType.IncludeMatch, "~=", _position); } private CssToken NewColumn() { return new CssToken(CssTokenType.Column, "||", _position); } private CssToken NewDash() { return new CssToken(CssTokenType.DashMatch, "|=", _position); } private CssToken NewCloseCurly() { return new CssToken(CssTokenType.CurlyBracketClose, "}", _position); } private CssToken NewOpenCurly() { return new CssToken(CssTokenType.CurlyBracketOpen, "{", _position); } private CssToken NewPrefix() { return new CssToken(CssTokenType.PrefixMatch, "^=", _position); } private CssToken NewCloseSquare() { return new CssToken(CssTokenType.SquareBracketClose, "]", _position); } private CssToken NewOpenSquare() { return new CssToken(CssTokenType.SquareBracketOpen, "[", _position); } private CssToken NewOpenComment() { return new CssToken(CssTokenType.Cdo, "<!--", _position); } private CssToken NewSemicolon() { return new CssToken(CssTokenType.Semicolon, ";", _position); } private CssToken NewColon() { return new CssToken(CssTokenType.Colon, ":", _position); } private CssToken NewCloseComment() { return new CssToken(CssTokenType.Cdc, "-->", _position); } private CssToken NewComma() { return new CssToken(CssTokenType.Comma, ",", _position); } private CssToken NewSubstring() { return new CssToken(CssTokenType.SubstringMatch, "*=", _position); } private CssToken NewCloseRound() { return new CssToken(CssTokenType.RoundBracketClose, ")", _position); } private CssToken NewOpenRound() { return new CssToken(CssTokenType.RoundBracketOpen, "(", _position); } private CssToken NewSuffix() { return new CssToken(CssTokenType.SuffixMatch, "$=", _position); } private CssToken NewString(string value, bool bad = false) { return new CssStringToken(CssTokenType.String, value, bad, _position); } private CssToken NewHash(string value) { return new CssKeywordToken(CssTokenType.Hash, value, _position); } private CssToken NewAtKeyword(string value) { return new CssKeywordToken(CssTokenType.AtKeyword, value, _position); } private CssToken NewIdent(string value) { return new CssKeywordToken(CssTokenType.Ident, value, _position); } private CssToken NewFunction(string value) { SkipMostSpaces(); CssFunctionToken cssFunctionToken = new CssFunctionToken(value, _position); CssToken cssToken = Get(); while (cssToken.Type != CssTokenType.Eof) { if (cssToken.Type == CssTokenType.RoundBracketClose) { cssFunctionToken.Close(cssToken); break; } cssFunctionToken.With(cssToken); cssToken = Get(); } return cssFunctionToken; } private CssToken NewPercentage(string value) { return new CssUnitToken(CssTokenType.Percentage, value, "%", _position); } private CssToken NewDimension(string value, string unit) { return new CssUnitToken(CssTokenType.Dimension, value, unit, _position); } private CssToken NewUrl(CssTokenType type, string data, bool bad = false) { return new CssStringToken(type, data, bad, _position); } private CssToken NewRange(string start, string end) { return new CssRangeToken(start, end, _position); } private CssToken NewWhitespace() { return new CssToken(CssTokenType.Whitespace, " ", _position); } private CssToken NewNumber(string number) { return new CssNumberToken(number, _position); } private CssToken NewDelimiter(char c) { return new CssToken(CssTokenType.Delim, c, _position); } private CssToken NewColor(string text) { return new CssStringToken(CssTokenType.Color, text, text.Length != 3 && text.Length != 6, _position); } private CssToken NewEof() { return new CssToken(CssTokenType.Eof, string.Empty, _position); } private string FlushBuffer() { string result = _stringBuffer.ToString(); _stringBuffer.Clear(); return result; } private CssToken NumberExponential() { char next = GetNext(); if (next.IsDigit()) { _stringBuffer.Append('e').Append(next); return SciNotation(); } if (next == '+' || next == '-') { char value = next; next = GetNext(); if (next.IsDigit()) { _stringBuffer.Append('e').Append(value).Append(next); return SciNotation(); } Back(); } next = GetPrevious(); string number = FlushBuffer(); _stringBuffer.Append(next); return Dimension(number); } private CssToken NumberDash() { char next = GetNext(); if (next.IsNameStart()) { string number = FlushBuffer(); _stringBuffer.Append('-').Append(next); return Dimension(number); } if (IsValidEscape(next)) { next = GetNext(); string number2 = FlushBuffer(); _stringBuffer.Append('-').Append(ConsumeEscape(next)); return Dimension(number2); } Back(2); return NewNumber(FlushBuffer()); } private string ConsumeEscape(char current) { if (current.IsHex()) { List<char> list = new List<char>(); for (int i = 0; i < 6; i++) { list.Add(current); current = GetNext(); if (!current.IsHex()) break; } if (current != ' ') Back(); int utf = int.Parse(new string(list.ToArray()), NumberStyles.HexNumber); return utf.ConvertFromUtf32(); } return current.ToString(); } private bool IsValidEscape(char current) { if (current != '\\') return false; current = GetNext(); Back(); if (current == '') return false; if (current.IsLineBreak()) return false; return true; } private static CssTokenType TypeFromName(string function) { if (function.Equals(FunctionNames.Url, StringComparison.OrdinalIgnoreCase)) return CssTokenType.Url; if (function.Equals(FunctionNames.Domain, StringComparison.OrdinalIgnoreCase)) return CssTokenType.Domain; if (function.Equals(FunctionNames.Url_Prefix, StringComparison.OrdinalIgnoreCase)) return CssTokenType.UrlPrefix; return CssTokenType.Function; } } }