AngleSharp by AngleSharp

<PackageReference Include="AngleSharp" Version="0.9.5" />

 CssTokenizer

sealed class CssTokenizer : BaseTokenizer
The CSS tokenizer. See http://dev.w3.org/csswg/css-syntax/#tokenization for more details.
using AngleSharp.Css; using AngleSharp.Events; using AngleSharp.Extensions; using System.Diagnostics; using System.Globalization; namespace AngleSharp.Parser.Css { [DebuggerStepThrough] internal sealed class CssTokenizer : BaseTokenizer { private bool _valueMode; private TextPosition _position; public bool IsInValue { get { return _valueMode; } set { _valueMode = value; } } public CssTokenizer(TextSource source, IEventAggregator events) : base(source, events) { _valueMode = false; } public CssToken Get() { char next = GetNext(); _position = GetCurrentPosition(); return Data(next); } public void RaiseErrorOccurred(CssParseError error, TextPosition position) { if (_events != null) { CssParseErrorEvent data = new CssParseErrorEvent(error.GetCode(), error.GetMessage(), position); _events.Publish(data); } } private CssToken Data(char current) { _position = GetCurrentPosition(); switch (current) { case '\t': case '\n': case ' ': case '\r': case ' ': return NewWhitespace(current); case '"': return StringDQ(); case '#': if (!_valueMode) return HashStart(); return ColorLiteral(); case '$': current = GetNext(); if (current == '=') return NewMatch(CombinatorSymbols.Ends); return NewDelimiter(GetPrevious()); case '\'': return StringSQ(); case '(': return NewOpenRound(); case ')': return NewCloseRound(); case '*': current = GetNext(); if (current == '=') return NewMatch(CombinatorSymbols.InText); return NewDelimiter(GetPrevious()); case '+': { char next4 = GetNext(); if (next4 != '￿') { char next5 = GetNext(); Back(2); if (next4.IsDigit() || (next4 == '.' && next5.IsDigit())) return NumberStart(current); } else Back(); return NewDelimiter(current); } case ',': return NewComma(); case '.': { char next = GetNext(); if (next.IsDigit()) return NumberStart(GetPrevious()); return NewDelimiter(GetPrevious()); } case '-': { char next2 = GetNext(); if (next2 != '￿') { char next3 = GetNext(); Back(2); if (next2.IsDigit() || (next2 == '.' && next3.IsDigit())) return NumberStart(current); if (next2.IsNameStart()) return IdentStart(current); if (next2 == '\\' && !next3.IsLineBreak() && next3 != '￿') return IdentStart(current); if (next2 == '-' && next3 == '>') { Advance(2); return NewCloseComment(); } } else Back(); return NewDelimiter(current); } case '/': current = GetNext(); if (current == '*') return Comment(); return NewDelimiter(GetPrevious()); case '\\': current = GetNext(); if (current.IsLineBreak()) { RaiseErrorOccurred(CssParseError.LineBreakUnexpected); return NewDelimiter(GetPrevious()); } if (current == '￿') { RaiseErrorOccurred(CssParseError.EOF); return NewDelimiter(GetPrevious()); } return IdentStart(GetPrevious()); case ':': return NewColon(); case ';': return NewSemicolon(); case '<': current = GetNext(); if (current == '!') { current = GetNext(); if (current == '-') { current = GetNext(); if (current == '-') return NewOpenComment(); current = GetPrevious(); } current = GetPrevious(); } return NewDelimiter(GetPrevious()); case '@': return AtKeywordStart(); case '[': return NewOpenSquare(); case ']': return NewCloseSquare(); case '^': current = GetNext(); if (current == '=') return NewMatch(CombinatorSymbols.Begins); return NewDelimiter(GetPrevious()); case '{': return NewOpenCurly(); case '}': return NewCloseCurly(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return NumberStart(current); case 'U': case 'u': current = GetNext(); if (current == '+') { current = GetNext(); if (current.IsHex() || current == '?') return UnicodeRange(current); current = GetPrevious(); } return IdentStart(GetPrevious()); case '|': current = GetNext(); switch (current) { case '=': return NewMatch(CombinatorSymbols.InToken); case '|': return NewColumn(); default: return NewDelimiter(GetPrevious()); } case '~': current = GetNext(); if (current == '=') return NewMatch(CombinatorSymbols.InList); return NewDelimiter(GetPrevious()); case '￿': return NewEof(); case '!': current = GetNext(); if (current == '=') return NewMatch(CombinatorSymbols.Unlike); return NewDelimiter(GetPrevious()); default: if (current.IsNameStart()) return IdentStart(current); return NewDelimiter(current); } } private CssToken StringDQ() { while (true) { char next = GetNext(); switch (next) { case '"': case '￿': return NewString(FlushBuffer(), '"', false); case '\n': case ' ': RaiseErrorOccurred(CssParseError.LineBreakUnexpected); Back(); return NewString(FlushBuffer(), '"', true); case '\\': next = GetNext(); if (next.IsLineBreak()) _stringBuffer.AppendLine(); else { if (next == '￿') { RaiseErrorOccurred(CssParseError.EOF); Back(); return NewString(FlushBuffer(), '"', true); } _stringBuffer.Append(ConsumeEscape(next)); } break; default: _stringBuffer.Append(next); break; } } } private CssToken StringSQ() { while (true) { char next = GetNext(); switch (next) { case '\'': case '￿': return NewString(FlushBuffer(), '\'', false); case '\n': case ' ': RaiseErrorOccurred(CssParseError.LineBreakUnexpected); Back(); return NewString(FlushBuffer(), '\'', true); case '\\': next = GetNext(); if (next.IsLineBreak()) _stringBuffer.AppendLine(); else { if (next == '￿') { RaiseErrorOccurred(CssParseError.EOF); Back(); return NewString(FlushBuffer(), '\'', true); } _stringBuffer.Append(ConsumeEscape(next)); } break; default: _stringBuffer.Append(next); break; } } } private CssToken ColorLiteral() { char next = GetNext(); while (next.IsHex()) { _stringBuffer.Append(next); next = GetNext(); } Back(); return NewColor(FlushBuffer()); } private CssToken HashStart() { char next = GetNext(); if (next.IsNameStart()) { _stringBuffer.Append(next); return HashRest(); } if (IsValidEscape(next)) { next = GetNext(); _stringBuffer.Append(ConsumeEscape(next)); return HashRest(); } if (next == '\\') { RaiseErrorOccurred(CssParseError.InvalidCharacter); Back(); return NewDelimiter('#'); } Back(); return NewDelimiter('#'); } private CssToken HashRest() { char next; while (true) { next = GetNext(); if (next.IsName()) _stringBuffer.Append(next); else { if (!IsValidEscape(next)) break; next = GetNext(); _stringBuffer.Append(ConsumeEscape(next)); } } if (next == '\\') { RaiseErrorOccurred(CssParseError.InvalidCharacter); Back(); return NewHash(FlushBuffer()); } Back(); return NewHash(FlushBuffer()); } private CssToken Comment() { char next = GetNext(); while (true) { switch (next) { case '*': next = GetNext(); if (next == '/') return NewComment(FlushBuffer(), false); _stringBuffer.Append('*'); break; default: _stringBuffer.Append(next); next = GetNext(); break; case '￿': RaiseErrorOccurred(CssParseError.EOF); return NewComment(FlushBuffer(), true); } } } private CssToken AtKeywordStart() { char next = GetNext(); if (next == '-') { next = GetNext(); if (next.IsNameStart() || IsValidEscape(next)) { _stringBuffer.Append('-'); return AtKeywordRest(next); } Back(2); return NewDelimiter('@'); } if (next.IsNameStart()) { _stringBuffer.Append(next); return AtKeywordRest(GetNext()); } if (IsValidEscape(next)) { next = GetNext(); _stringBuffer.Append(ConsumeEscape(next)); return AtKeywordRest(GetNext()); } Back(); return NewDelimiter('@'); } private CssToken AtKeywordRest(char current) { while (true) { if (current.IsName()) _stringBuffer.Append(current); else { if (!IsValidEscape(current)) break; current = GetNext(); _stringBuffer.Append(ConsumeEscape(current)); } current = GetNext(); } Back(); return NewAtKeyword(FlushBuffer()); } private CssToken IdentStart(char current) { if (current == '-') { current = GetNext(); if (current.IsNameStart() || IsValidEscape(current)) { _stringBuffer.Append('-'); return IdentRest(current); } Back(); return NewDelimiter('-'); } if (current.IsNameStart()) { _stringBuffer.Append(current); return IdentRest(GetNext()); } if (current == '\\' && IsValidEscape(current)) { current = GetNext(); _stringBuffer.Append(ConsumeEscape(current)); return IdentRest(GetNext()); } return Data(current); } private CssToken IdentRest(char current) { while (true) { if (current.IsName()) _stringBuffer.Append(current); else { if (!IsValidEscape(current)) break; current = GetNext(); _stringBuffer.Append(ConsumeEscape(current)); } current = GetNext(); } if (current == '(') { string text = FlushBuffer(); CssTokenType typeFromName = text.GetTypeFromName(); if (typeFromName != CssTokenType.Function) return UrlStart(text); return NewFunction(text); } Back(); return NewIdent(FlushBuffer()); } private CssToken TransformFunctionWhitespace(char current) { do { current = GetNext(); if (current == '(') { Back(); return NewFunction(FlushBuffer()); } } while (current.IsSpaceCharacter()); Back(2); return NewIdent(FlushBuffer()); } private CssToken NumberStart(char current) { while (true) { if (current.IsOneOf('+', '-')) { _stringBuffer.Append(current); current = GetNext(); if (current == '.') { _stringBuffer.Append(current); _stringBuffer.Append(GetNext()); return NumberFraction(); } _stringBuffer.Append(current); return NumberRest(); } if (current == '.') { _stringBuffer.Append(current); _stringBuffer.Append(GetNext()); return NumberFraction(); } if (current.IsDigit()) break; current = GetNext(); } _stringBuffer.Append(current); return NumberRest(); } private CssToken NumberRest() { char next = GetNext(); while (next.IsDigit()) { _stringBuffer.Append(next); next = GetNext(); } if (next.IsNameStart()) { string number = FlushBuffer(); _stringBuffer.Append(next); return Dimension(number); } if (!IsValidEscape(next)) { switch (next) { case '.': next = GetNext(); if (next.IsDigit()) { _stringBuffer.Append('.').Append(next); return NumberFraction(); } Back(); return NewNumber(FlushBuffer()); case '%': return NewPercentage(FlushBuffer()); case 'E': case 'e': return NumberExponential(next); case '-': return NumberDash(); default: Back(); return NewNumber(FlushBuffer()); } } next = GetNext(); string number2 = FlushBuffer(); _stringBuffer.Append(ConsumeEscape(next)); return Dimension(number2); } private CssToken NumberFraction() { char next = GetNext(); while (next.IsDigit()) { _stringBuffer.Append(next); next = GetNext(); } if (next.IsNameStart()) { string number = FlushBuffer(); _stringBuffer.Append(next); return Dimension(number); } if (!IsValidEscape(next)) { switch (next) { case 'E': case 'e': return NumberExponential(next); case '%': return NewPercentage(FlushBuffer()); case '-': return NumberDash(); default: Back(); return NewNumber(FlushBuffer()); } } next = GetNext(); string number2 = FlushBuffer(); _stringBuffer.Append(ConsumeEscape(next)); return Dimension(number2); } private CssToken Dimension(string number) { while (true) { char next = GetNext(); if (next.IsLetter()) _stringBuffer.Append(next); else { if (!IsValidEscape(next)) break; next = GetNext(); _stringBuffer.Append(ConsumeEscape(next)); } } Back(); return NewDimension(number, FlushBuffer()); } private CssToken SciNotation() { while (true) { char next = GetNext(); if (!next.IsDigit()) break; _stringBuffer.Append(next); } Back(); return NewNumber(FlushBuffer()); } private CssToken UrlStart(string functionName) { char c = SkipSpaces(); switch (c) { case '￿': RaiseErrorOccurred(CssParseError.EOF); return NewUrl(functionName, string.Empty, true); case '"': return UrlDQ(functionName); case '\'': return UrlSQ(functionName); case ')': return NewUrl(functionName, string.Empty, false); default: return UrlUQ(c, functionName); } } private CssToken UrlDQ(string functionName) { while (true) { char next = GetNext(); if (next.IsLineBreak()) { RaiseErrorOccurred(CssParseError.LineBreakUnexpected); return UrlBad(functionName); } if ('￿' == next) break; switch (next) { case '"': return UrlEnd(functionName); default: _stringBuffer.Append(next); break; case '\\': next = GetNext(); if (next == '￿') { Back(2); RaiseErrorOccurred(CssParseError.EOF); return NewUrl(functionName, FlushBuffer(), true); } if (next.IsLineBreak()) _stringBuffer.AppendLine(); else _stringBuffer.Append(ConsumeEscape(next)); break; } } return NewUrl(functionName, FlushBuffer(), false); } private CssToken UrlSQ(string functionName) { while (true) { char next = GetNext(); if (next.IsLineBreak()) break; switch (next) { case '￿': return NewUrl(functionName, FlushBuffer(), false); case '\'': return UrlEnd(functionName); default: _stringBuffer.Append(next); break; case '\\': next = GetNext(); if (next == '￿') { Back(2); RaiseErrorOccurred(CssParseError.EOF); return NewUrl(functionName, FlushBuffer(), true); } if (next.IsLineBreak()) _stringBuffer.AppendLine(); else _stringBuffer.Append(ConsumeEscape(next)); break; } } RaiseErrorOccurred(CssParseError.LineBreakUnexpected); return UrlBad(functionName); } private CssToken UrlUQ(char current, string functionName) { while (true) { if (current.IsSpaceCharacter()) return UrlEnd(functionName); if (current.IsOneOf(')', '￿')) return NewUrl(functionName, FlushBuffer(), false); if (current.IsOneOf('"', '\'', '(') || current.IsNonPrintable()) { RaiseErrorOccurred(CssParseError.InvalidCharacter); return UrlBad(functionName); } if (current != '\\') _stringBuffer.Append(current); else { if (!IsValidEscape(current)) break; current = GetNext(); _stringBuffer.Append(ConsumeEscape(current)); } current = GetNext(); } RaiseErrorOccurred(CssParseError.InvalidCharacter); return UrlBad(functionName); } private CssToken UrlEnd(string functionName) { char next; do { next = GetNext(); if (next == ')') return NewUrl(functionName, FlushBuffer(), false); } while (next.IsSpaceCharacter()); RaiseErrorOccurred(CssParseError.InvalidCharacter); Back(); return UrlBad(functionName); } private CssToken UrlBad(string functionName) { char c = base.Current; int num = 0; int num2 = 1; while (true) { switch (c) { case ';': Back(); return NewUrl(functionName, FlushBuffer(), true); case '}': if (--num == -1) { Back(); return NewUrl(functionName, FlushBuffer(), true); } break; case '￿': RaiseErrorOccurred(CssParseError.EOF); return NewUrl(functionName, FlushBuffer(), true); } if (c == ')' && --num2 == 0) break; if (IsValidEscape(c)) { c = GetNext(); _stringBuffer.Append(ConsumeEscape(c)); } else { if (c == '(') num2++; else if (num == 123) { num++; } _stringBuffer.Append(c); } c = GetNext(); } return NewUrl(functionName, FlushBuffer(), true); } private CssToken UnicodeRange(char current) { for (int i = 0; i < 6; i++) { if (!current.IsHex()) break; _stringBuffer.Append(current); current = GetNext(); } if (_stringBuffer.Length != 6) { for (int j = 0; j < 6 - _stringBuffer.Length; j++) { if (current != '?') { current = GetPrevious(); break; } _stringBuffer.Append(current); current = GetNext(); } return NewRange(FlushBuffer()); } if (current == '-') { current = GetNext(); if (current.IsHex()) { string start = FlushBuffer(); for (int k = 0; k < 6; k++) { if (!current.IsHex()) { current = GetPrevious(); break; } _stringBuffer.Append(current); current = GetNext(); } string end = FlushBuffer(); return NewRange(start, end); } Back(2); return NewRange(FlushBuffer()); } Back(); return NewRange(FlushBuffer()); } private CssToken NewMatch(string match) { return new CssToken(CssTokenType.Match, match, _position); } private CssToken NewColumn() { return new CssToken(CssTokenType.Column, CombinatorSymbols.Column, _position); } private CssToken NewCloseCurly() { return new CssToken(CssTokenType.CurlyBracketClose, "}", _position); } private CssToken NewOpenCurly() { return new CssToken(CssTokenType.CurlyBracketOpen, "{", _position); } private CssToken NewCloseSquare() { return new CssToken(CssTokenType.SquareBracketClose, "]", _position); } private CssToken NewOpenSquare() { return new CssToken(CssTokenType.SquareBracketOpen, "[", _position); } private CssToken NewOpenComment() { return new CssToken(CssTokenType.Cdo, "<!--", _position); } private CssToken NewSemicolon() { return new CssToken(CssTokenType.Semicolon, ";", _position); } private CssToken NewColon() { return new CssToken(CssTokenType.Colon, ":", _position); } private CssToken NewCloseComment() { return new CssToken(CssTokenType.Cdc, "-->", _position); } private CssToken NewComma() { return new CssToken(CssTokenType.Comma, ",", _position); } private CssToken NewCloseRound() { return new CssToken(CssTokenType.RoundBracketClose, ")", _position); } private CssToken NewOpenRound() { return new CssToken(CssTokenType.RoundBracketOpen, "(", _position); } private CssToken NewString(string value, char quote, bool bad = false) { return new CssStringToken(value, bad, quote, _position); } private CssToken NewHash(string value) { return new CssKeywordToken(CssTokenType.Hash, value, _position); } private CssToken NewComment(string value, bool bad = false) { return new CssCommentToken(value, bad, _position); } private CssToken NewAtKeyword(string value) { return new CssKeywordToken(CssTokenType.AtKeyword, value, _position); } private CssToken NewIdent(string value) { return new CssKeywordToken(CssTokenType.Ident, value, _position); } private CssToken NewFunction(string value) { CssFunctionToken cssFunctionToken = new CssFunctionToken(value, _position); CssToken cssToken = Get(); while (cssToken.Type != CssTokenType.EndOfFile) { cssFunctionToken.AddArgumentToken(cssToken); if (cssToken.Type == CssTokenType.RoundBracketClose) break; cssToken = Get(); } return cssFunctionToken; } private CssToken NewPercentage(string value) { return new CssUnitToken(CssTokenType.Percentage, value, "%", _position); } private CssToken NewDimension(string value, string unit) { return new CssUnitToken(CssTokenType.Dimension, value, unit, _position); } private CssToken NewUrl(string functionName, string data, bool bad = false) { return new CssUrlToken(functionName, data, bad, _position); } private CssToken NewRange(string range) { return new CssRangeToken(range, _position); } private CssToken NewRange(string start, string end) { return new CssRangeToken(start, end, _position); } private CssToken NewWhitespace(char c) { return new CssToken(CssTokenType.Whitespace, c.ToString(), _position); } private CssToken NewNumber(string number) { return new CssNumberToken(number, _position); } private CssToken NewDelimiter(char c) { return new CssToken(CssTokenType.Delim, c.ToString(), _position); } private CssToken NewColor(string text) { return new CssColorToken(text, _position); } private CssToken NewEof() { return new CssToken(CssTokenType.EndOfFile, string.Empty, _position); } private CssToken NumberExponential(char letter) { char next = GetNext(); if (next.IsDigit()) { _stringBuffer.Append(letter).Append(next); return SciNotation(); } if (next == '+' || next == '-') { char value = next; next = GetNext(); if (next.IsDigit()) { _stringBuffer.Append(letter).Append(value).Append(next); return SciNotation(); } Back(); } string number = FlushBuffer(); _stringBuffer.Append(letter); Back(); return Dimension(number); } private CssToken NumberDash() { char next = GetNext(); if (next.IsNameStart()) { string number = FlushBuffer(); _stringBuffer.Append('-').Append(next); return Dimension(number); } if (IsValidEscape(next)) { next = GetNext(); string number2 = FlushBuffer(); _stringBuffer.Append('-').Append(ConsumeEscape(next)); return Dimension(number2); } Back(2); return NewNumber(FlushBuffer()); } private string ConsumeEscape(char current) { if (current.IsHex()) { bool flag = true; char[] array = new char[6]; int num = 0; while (flag && num < array.Length) { array[num++] = current; current = GetNext(); flag = current.IsHex(); } if (!current.IsSpaceCharacter()) Back(); int num3 = int.Parse(new string(array, 0, num), NumberStyles.HexNumber); if (!num3.IsInvalid()) return num3.ConvertFromUtf32(); current = '�'; } return current.ToString(); } private bool IsValidEscape(char current) { if (current == '\\') { current = GetNext(); Back(); if (current != '￿') return !current.IsLineBreak(); return false; } return false; } private void RaiseErrorOccurred(CssParseError code) { RaiseErrorOccurred(code, GetCurrentPosition()); } } }