AngleSharp by Florian Rappl

<PackageReference Include="AngleSharp" Version="0.8.4.1" />

 CssParser

public sealed class CssParser
The CSS parser. See http://dev.w3.org/csswg/css-syntax/#parsing for more details.
using AngleSharp.Css; using AngleSharp.Css.Conditions; using AngleSharp.Css.Values; using AngleSharp.Dom; using AngleSharp.Dom.Collections; using AngleSharp.Dom.Css; using AngleSharp.Extensions; using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Threading; using System.Threading.Tasks; namespace AngleSharp.Parser.Css { [DebuggerStepThrough] public sealed class CssParser { private readonly CssSelectorConstructor _selector; private readonly CssValueBuilder _value; private readonly CssTokenizer _tokenizer; private readonly object _syncGuard; private readonly CssStyleSheet _sheet; private bool _started; private Task<ICssStyleSheet> _task; public bool IsAsync => _task != null; public ICssStyleSheet Result { get { Parse(); return _sheet; } } public CssParser(string source, IConfiguration configuration = null) : this(new CssStyleSheet(configuration, new TextSource(source))) { } public CssParser(Stream stream, IConfiguration configuration = null) : this(new CssStyleSheet(configuration, new TextSource(stream, configuration.DefaultEncoding()))) { } internal CssParser(CssStyleSheet stylesheet) { IElement ownerNode = stylesheet.OwnerNode; _selector = new CssSelectorConstructor(); _value = new CssValueBuilder(); _syncGuard = new object(); _tokenizer = new CssTokenizer(stylesheet.Source, stylesheet.Options.Events); _started = false; _sheet = stylesheet; } public Task<ICssStyleSheet> ParseAsync() { return ParseAsync(CancellationToken.None); } public Task<ICssStyleSheet> ParseAsync(CancellationToken cancelToken) { lock (_syncGuard) { if (!_started) { _started = true; _task = KernelAsync(cancelToken); } } return _task; } public ICssStyleSheet Parse() { if (!_started) { _started = true; Kernel(); } return _sheet; } internal CssRule CreateUnkownRule(CssToken token) { RaiseErrorOccurred(CssParseError.UnknownAtRule, token); _tokenizer.SkipUnknownRule(); return null; } internal CssMediaRule CreateMediaRule() { CssToken token = _tokenizer.Get(); MediaList media = ReadMediaList(ref token); CssMediaRule cssMediaRule = new CssMediaRule(media); if (token.Type != CssTokenType.CurlyBracketOpen) return SkipDeclarations<CssMediaRule>(token); FillRules(cssMediaRule); return cssMediaRule; } internal CssPageRule CreatePageRule() { CssToken token = _tokenizer.Get(); CssPageRule cssPageRule = new CssPageRule(); cssPageRule.Selector = ReadSelector(ref token); if (token.Type != CssTokenType.CurlyBracketOpen) return SkipDeclarations<CssPageRule>(token); FillDeclarations(cssPageRule.Style); return cssPageRule; } internal CssFontFaceRule CreateFontFaceRule() { CssToken cssToken = _tokenizer.Get(); CssFontFaceRule cssFontFaceRule = new CssFontFaceRule(); if (cssToken.Type != CssTokenType.CurlyBracketOpen) return SkipDeclarations<CssFontFaceRule>(cssToken); FillDeclarations(cssFontFaceRule.Style); return cssFontFaceRule; } internal CssSupportsRule CreateSupportsRule() { CssToken token = _tokenizer.Get(); CssSupportsRule cssSupportsRule = new CssSupportsRule(); cssSupportsRule.Condition = ReadCondition(ref token); if (token.Type != CssTokenType.CurlyBracketOpen) return SkipDeclarations<CssSupportsRule>(token); FillRules(cssSupportsRule); return cssSupportsRule; } internal CssDocumentRule CreateDocumentRule() { CssToken token = _tokenizer.Get(); CssDocumentRule cssDocumentRule = new CssDocumentRule(); cssDocumentRule.Conditions.AddRange(ReadDocumentFunctions(ref token)); if (token.Type != CssTokenType.CurlyBracketOpen) return SkipDeclarations<CssDocumentRule>(token); FillRules(cssDocumentRule); return cssDocumentRule; } internal CssKeyframesRule CreateKeyframesRule() { CssToken token = _tokenizer.Get(); CssKeyframesRule cssKeyframesRule = new CssKeyframesRule(); cssKeyframesRule.Name = ReadRuleName(ref token); if (token.Type != CssTokenType.CurlyBracketOpen) return SkipDeclarations<CssKeyframesRule>(token); FillKeyframeRules(cssKeyframesRule); return cssKeyframesRule; } internal CssNamespaceRule CreateNamespaceRule() { CssToken token = _tokenizer.Get(); CssNamespaceRule cssNamespaceRule = new CssNamespaceRule(); cssNamespaceRule.Prefix = ReadRuleName(ref token); if (token.Type == CssTokenType.Url) cssNamespaceRule.NamespaceUri = token.Data; _tokenizer.JumpToNextSemicolon(); return cssNamespaceRule; } internal CssCharsetRule CreateCharsetRule() { CssToken cssToken = _tokenizer.Get(); CssCharsetRule cssCharsetRule = new CssCharsetRule(); if (cssToken.Type == CssTokenType.String) cssCharsetRule.CharacterSet = cssToken.Data; _tokenizer.JumpToNextSemicolon(); return cssCharsetRule; } internal CssImportRule CreateImportRule() { CssToken token = _tokenizer.Get(); CssImportRule cssImportRule = new CssImportRule(); if (token.Is(CssTokenType.String, CssTokenType.Url)) { cssImportRule.Href = token.Data; token = _tokenizer.Get(); cssImportRule.Media = ReadMediaList(ref token); } _tokenizer.JumpToNextSemicolon(); return cssImportRule; } private CssRule CreateRule(CssToken token) { switch (token.Type) { case CssTokenType.AtKeyword: return this.CreateAtRule(token); case CssTokenType.CurlyBracketOpen: RaiseErrorOccurred(CssParseError.InvalidBlockStart, token); _tokenizer.SkipUnknownRule(); return null; case CssTokenType.String: case CssTokenType.Url: case CssTokenType.RoundBracketClose: case CssTokenType.CurlyBracketClose: case CssTokenType.SquareBracketClose: RaiseErrorOccurred(CssParseError.InvalidToken, token); _tokenizer.SkipUnknownRule(); return null; default: return CreateStyleRule(token); } } private CssStyleRule CreateStyleRule(CssToken token) { CssStyleRule cssStyleRule = new CssStyleRule(); cssStyleRule.Selector = ReadSelector(ref token); FillDeclarations(cssStyleRule.Style); if (cssStyleRule.Selector == null) return null; return cssStyleRule; } private ISelector ReadSelector(ref CssToken token) { _tokenizer.State = CssParseMode.Selector; _selector.Reset(); CssToken token2 = token; while (token.IsNot(CssTokenType.Eof, CssTokenType.CurlyBracketOpen, CssTokenType.CurlyBracketClose)) { _selector.Apply(token); token = _tokenizer.Get(); } if (!_selector.IsValid) RaiseErrorOccurred(CssParseError.InvalidSelector, token2); _tokenizer.State = CssParseMode.Data; return _selector.Result; } private CssProperty ReadDeclaration(CssStyleDeclaration style, ref CssToken token) { if (token.Type == CssTokenType.Ident) { CssProperty cssProperty = null; string data = token.Data; token = _tokenizer.Get(); if (token.Type != CssTokenType.Colon) RaiseErrorOccurred(CssParseError.ColonMissing, token); else { cssProperty = style.CreateProperty(data); if (cssProperty == null) { RaiseErrorOccurred(CssParseError.UnknownDeclarationName, token); cssProperty = new CssUnknownProperty(data, style); } CssValue cssValue = ReadValue(ref token); if (cssValue == null) RaiseErrorOccurred(CssParseError.ValueMissing, token); else if (cssProperty.TrySetValue(cssValue)) { style.SetProperty(cssProperty); } cssProperty.IsImportant = _value.IsImportant; } _tokenizer.JumpToEndOfDeclaration(); token = _tokenizer.Get(); return cssProperty; } RaiseErrorOccurred(CssParseError.IdentExpected, token); return null; } private List<IDocumentFunction> ReadDocumentFunctions(ref CssToken token) { List<IDocumentFunction> list = new List<IDocumentFunction>(); do { IDocumentFunction documentFunction = token.ToDocumentFunction(); if (documentFunction == null) break; list.Add(documentFunction); token = _tokenizer.Get(); } while (token.Type == CssTokenType.Comma); return list; } private CssKeyframeRule CreateKeyframeRule(CssToken token) { CssKeyframeRule cssKeyframeRule = new CssKeyframeRule(); cssKeyframeRule.Key = ReadKeyframeSelector(ref token); if (cssKeyframeRule.Key == null) { _tokenizer.JumpToEndOfDeclaration(); return null; } FillDeclarations(cssKeyframeRule.Style); return cssKeyframeRule; } private KeyframeSelector ReadKeyframeSelector(ref CssToken token) { List<Percent> list = new List<Percent>(); while (token.Type != CssTokenType.Eof) { if (list.Count > 0) { if (token.Type == CssTokenType.CurlyBracketOpen) break; if (token.Type != CssTokenType.Comma) return null; token = _tokenizer.Get(); } if (token.Type == CssTokenType.Percentage) list.Add(new Percent(((CssUnitToken)token).Value)); else if (token.Type == CssTokenType.Ident && token.Data.Equals(Keywords.From)) { list.Add(Percent.Zero); } else { if (token.Type != CssTokenType.Ident || !token.Data.Equals(Keywords.To)) return null; list.Add(Percent.Hundred); } token = _tokenizer.Get(); } return new KeyframeSelector(list); } private MediaList ReadMediaList(ref CssToken token) { MediaList mediaList = new MediaList(); while (token.Type != CssTokenType.Eof) { CssMedium cssMedium = ReadMediaValue(ref token); if (cssMedium == null) break; mediaList.Add(cssMedium); if (token.Type != CssTokenType.Comma) break; token = _tokenizer.Get(); } if (token.Type != CssTokenType.CurlyBracketOpen) { if (token.Type == CssTokenType.RoundBracketClose) token = _tokenizer.Get(); if (token.Type == CssTokenType.CurlyBracketOpen) token = _tokenizer.Get(); _tokenizer.JumpToEndOfDeclaration(); token = _tokenizer.Get(); } else if (mediaList.Length == 0) { _tokenizer.JumpToEndOfDeclaration(); token = _tokenizer.Get(); } return mediaList; } private CssMedium ReadMediaValue(ref CssToken token) { CssMedium cssMedium = new CssMedium(); if (token.Type == CssTokenType.Ident) { string data = token.Data; if (data.Equals(Keywords.Not, StringComparison.OrdinalIgnoreCase)) { cssMedium.IsInverse = true; token = _tokenizer.Get(); } else if (data.Equals(Keywords.Only, StringComparison.OrdinalIgnoreCase)) { cssMedium.IsExclusive = true; token = _tokenizer.Get(); } } if (token.Type == CssTokenType.Ident) { cssMedium.Type = token.Data; token = _tokenizer.Get(); if (token.Type != CssTokenType.Ident || string.Compare(token.Data, Keywords.And, StringComparison.OrdinalIgnoreCase) != 0) return cssMedium; token = _tokenizer.Get(); } do { if (token.Type != CssTokenType.RoundBracketOpen) return null; token = _tokenizer.Get(); if (!TrySetConstraint(cssMedium, ref token) || token.Type != CssTokenType.RoundBracketClose) return null; token = _tokenizer.Get(); if (token.Type != CssTokenType.Ident || string.Compare(token.Data, Keywords.And, StringComparison.OrdinalIgnoreCase) != 0) break; token = _tokenizer.Get(); } while (token.Type != CssTokenType.Eof); return cssMedium; } private bool TrySetConstraint(CssMedium medium, ref CssToken token) { if (token.Type != CssTokenType.Ident) { _tokenizer.JumpToClosedArguments(); token = _tokenizer.Get(); return false; } _value.Reset(); string data = token.Data; token = _tokenizer.Get(); if (token.Type == CssTokenType.Colon) { _tokenizer.State = CssParseMode.Value; token = _tokenizer.Get(); while (token.Type != CssTokenType.RoundBracketClose || !_value.IsReady) { if (token.Type == CssTokenType.Eof) return false; _value.Apply(token); token = _tokenizer.Get(); } _tokenizer.State = CssParseMode.Data; medium.AddConstraint(data, _value.Result); } return true; } private string ReadRuleName(ref CssToken token) { string result = string.Empty; if (token.Type == CssTokenType.Ident) { result = token.Data; token = _tokenizer.Get(); } return result; } private CssValue ReadValue(ref CssToken token) { _tokenizer.State = CssParseMode.Value; _value.Reset(); token = _tokenizer.Get(); while (token.Type != CssTokenType.Eof && !token.Is(CssTokenType.Semicolon, CssTokenType.CurlyBracketClose) && (token.Type != CssTokenType.RoundBracketClose || !_value.IsReady)) { _value.Apply(token); token = _tokenizer.Get(); } _tokenizer.State = CssParseMode.Data; return _value.Result; } private ICondition ReadCondition(ref CssToken token) { ICondition condition = ExtractCondition(ref token); if (condition != null) { if (token.Data.Equals(Keywords.And, StringComparison.OrdinalIgnoreCase)) { token = _tokenizer.Get(); List<ICondition> conditions = MultipleConditions(condition, Keywords.And, ref token); return new AndCondition(conditions); } if (token.Data.Equals(Keywords.Or, StringComparison.OrdinalIgnoreCase)) { token = _tokenizer.Get(); List<ICondition> conditions2 = MultipleConditions(condition, Keywords.Or, ref token); return new OrCondition(conditions2); } } return condition; } private ICondition ExtractCondition(ref CssToken token) { ICondition condition = null; if (token.Type == CssTokenType.RoundBracketOpen) { token = _tokenizer.Get(); condition = ReadCondition(ref token); if (condition != null) condition = new GroupCondition(condition); else if (token.Type == CssTokenType.Ident) { condition = DeclarationCondition(ref token); } if (token.Type == CssTokenType.RoundBracketClose) token = _tokenizer.Get(); } else if (token.Data.Equals(Keywords.Not, StringComparison.OrdinalIgnoreCase)) { token = _tokenizer.Get(); condition = ExtractCondition(ref token); if (condition != null) condition = new NotCondition(condition); } return condition; } private ICondition DeclarationCondition(ref CssToken token) { string data = token.Data; CssStyleDeclaration cssStyleDeclaration = new CssStyleDeclaration((string)null); CssProperty cssProperty = Factory.Properties.Create(data, cssStyleDeclaration); if (cssProperty == null) cssProperty = new CssUnknownProperty(data, cssStyleDeclaration); token = _tokenizer.Get(); if (token.Type == CssTokenType.Colon) { CssValue cssValue = ReadValue(ref token); cssProperty.IsImportant = _value.IsImportant; if (cssValue != null) return new DeclarationCondition(cssProperty, cssValue); } return null; } private List<ICondition> MultipleConditions(ICondition condition, string connector, ref CssToken token) { List<ICondition> list = new List<ICondition>(); list.Add(condition); while (token.Type != CssTokenType.Eof) { condition = ExtractCondition(ref token); if (condition == null) break; list.Add(condition); if (!token.Data.Equals(connector, StringComparison.OrdinalIgnoreCase)) break; token = _tokenizer.Get(); } return list; } private void RaiseErrorOccurred(CssParseError code, CssToken token) { _tokenizer.RaiseErrorOccurred(code, token.Position); } private T SkipDeclarations<T>(CssToken token) { RaiseErrorOccurred(CssParseError.InvalidToken, token); _tokenizer.SkipUnknownRule(); return default(T); } private void Kernel() { CssToken cssToken = _tokenizer.Get(); do { Consume(cssToken); cssToken = _tokenizer.Get(); } while (cssToken.Type != CssTokenType.Eof); } private async Task<ICssStyleSheet> KernelAsync(CancellationToken cancelToken) { await _sheet.Source.PrefetchAll(cancelToken).ConfigureAwait(false); Kernel(); return _sheet; } private void Consume(CssToken token) { CssRule cssRule = CreateRule(token); if (cssRule != null) _sheet.Rules.Add(cssRule, _sheet, null); } private void Consume(CssToken token, CssGroupingRule parent) { CssRule cssRule = CreateRule(token); if (cssRule != null) parent.Rules.Add(cssRule, _sheet, parent); } private void FillRules(CssGroupingRule rule) { CssToken token = _tokenizer.Get(); while (token.IsNot(CssTokenType.Eof, CssTokenType.CurlyBracketClose)) { Consume(token, rule); token = _tokenizer.Get(); } } private void FillKeyframeRules(CssKeyframesRule parentRule) { CssToken token = _tokenizer.Get(); while (token.IsNot(CssTokenType.Eof, CssTokenType.CurlyBracketClose)) { CssKeyframeRule cssKeyframeRule = CreateKeyframeRule(token); if (cssKeyframeRule != null) parentRule.Rules.Add(cssKeyframeRule, _sheet, parentRule); token = _tokenizer.Get(); } } private void FillDeclarations(CssStyleDeclaration style) { CssToken token = _tokenizer.Get(); while (token.IsNot(CssTokenType.Eof, CssTokenType.CurlyBracketClose)) { ReadDeclaration(style, ref token); if (token.Type == CssTokenType.Semicolon) token = _tokenizer.Get(); } } public static ISelector ParseSelector(string selectorText) { TextSource source = new TextSource(selectorText); CssTokenizer cssTokenizer = new CssTokenizer(source, null); cssTokenizer.State = CssParseMode.Selector; CssSelectorConstructor cssSelectorConstructor = Pool.NewSelectorConstructor(); CssToken cssToken = cssTokenizer.Get(); while (cssToken.Type != CssTokenType.Eof) { cssSelectorConstructor.Apply(cssToken); cssToken = cssTokenizer.Get(); } return cssSelectorConstructor.ToPool(); } public static IKeyframeSelector ParseKeyframeSelector(string keyText, IConfiguration configuration = null) { CssParser cssParser = new CssParser(keyText, configuration ?? Configuration.Default); CssToken token = cssParser._tokenizer.Get(); KeyframeSelector result = cssParser.ReadKeyframeSelector(ref token); if (token.Type != CssTokenType.Eof) return null; return result; } internal static CssValue ParseValue(string valueText, IConfiguration configuration = null) { CssParser cssParser = new CssParser(valueText, configuration ?? Configuration.Default); CssToken token = null; CssValue result = cssParser.ReadValue(ref token); if (token.Type != CssTokenType.Eof) return null; return result; } internal static CssRule ParseRule(string ruleText, IConfiguration configuration = null) { CssParser cssParser = new CssParser(ruleText, configuration ?? Configuration.Default); CssRule result = cssParser.CreateRule(cssParser._tokenizer.Get()); CssToken cssToken = cssParser._tokenizer.Get(); if (cssToken.Type != CssTokenType.Eof) return null; return result; } internal static CssStyleDeclaration ParseDeclarations(string declarations, IConfiguration configuration = null) { CssStyleDeclaration cssStyleDeclaration = new CssStyleDeclaration((string)null); AppendDeclarations(cssStyleDeclaration, declarations, configuration); return cssStyleDeclaration; } internal static CssProperty ParseDeclaration(string declarationText, IConfiguration configuration = null) { CssParser cssParser = new CssParser(declarationText, configuration ?? Configuration.Default); CssStyleDeclaration style = new CssStyleDeclaration((string)null); CssToken token = cssParser._tokenizer.Get(); CssProperty result = cssParser.ReadDeclaration(style, ref token); if (token.Type == CssTokenType.Semicolon) token = cssParser._tokenizer.Get(); if (token.Type != CssTokenType.Eof) return null; return result; } internal static List<CssMedium> ParseMediaList(string mediaText, IConfiguration configuration = null) { CssParser cssParser = new CssParser(mediaText, configuration); List<CssMedium> list = new List<CssMedium>(); CssToken token = cssParser._tokenizer.Get(); while (token.Type != CssTokenType.Eof) { CssMedium cssMedium = cssParser.ReadMediaValue(ref token); if (cssMedium == null || token.IsNot(CssTokenType.Comma, CssTokenType.Eof)) throw new DomException(DomError.Syntax); list.Add(cssMedium); token = cssParser._tokenizer.Get(); } return list; } internal static ICondition ParseCondition(string conditionText, IConfiguration configuration = null) { CssParser cssParser = new CssParser(conditionText, configuration ?? Configuration.Default); CssToken token = cssParser._tokenizer.Get(); ICondition result = cssParser.ReadCondition(ref token); if (token.Type != CssTokenType.Eof) return null; return result; } internal static List<IDocumentFunction> ParseDocumentRules(string source, IConfiguration configuration = null) { CssParser cssParser = new CssParser(source, configuration); CssToken token = cssParser._tokenizer.Get(); List<IDocumentFunction> result = cssParser.ReadDocumentFunctions(ref token); if (token.Type != CssTokenType.Eof) return null; return result; } internal static CssMedium ParseMedium(string source, IConfiguration configuration = null) { CssParser cssParser = new CssParser(source, configuration); CssToken token = cssParser._tokenizer.Get(); CssMedium result = cssParser.ReadMediaValue(ref token); if (token.Type != CssTokenType.Eof) throw new DomException(DomError.Syntax); return result; } internal static CssKeyframeRule ParseKeyframeRule(string ruleText, IConfiguration configuration = null) { CssParser cssParser = new CssParser(ruleText, configuration); CssKeyframeRule result = cssParser.CreateKeyframeRule(cssParser._tokenizer.Get()); CssToken cssToken = cssParser._tokenizer.Get(); if (cssToken.Type != CssTokenType.Eof) return null; return result; } internal static void AppendDeclarations(CssStyleDeclaration list, string declarations, IConfiguration configuration = null) { CssParser cssParser = new CssParser(declarations, configuration ?? Configuration.Default); cssParser.FillDeclarations(list); } } }