CssParser
The CSS parser.
See http://dev.w3.org/csswg/css-syntax/#parsing for more details.
using AngleSharp.Css;
using AngleSharp.Css.Conditions;
using AngleSharp.Css.Values;
using AngleSharp.Dom;
using AngleSharp.Dom.Collections;
using AngleSharp.Dom.Css;
using AngleSharp.Extensions;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
namespace AngleSharp.Parser.Css
{
[DebuggerStepThrough]
public sealed class CssParser
{
private readonly CssSelectorConstructor _selector;
private readonly CssValueBuilder _value;
private readonly CssTokenizer _tokenizer;
private readonly object _syncGuard;
private readonly CssStyleSheet _sheet;
private bool _started;
private Task<ICssStyleSheet> _task;
public bool IsAsync => _task != null;
public ICssStyleSheet Result {
get {
Parse();
return _sheet;
}
}
public CssParser(string source, IConfiguration configuration = null)
: this(new CssStyleSheet(configuration, new TextSource(source)))
{
}
public CssParser(Stream stream, IConfiguration configuration = null)
: this(new CssStyleSheet(configuration, new TextSource(stream, configuration.DefaultEncoding())))
{
}
internal CssParser(CssStyleSheet stylesheet)
{
IElement ownerNode = stylesheet.OwnerNode;
_selector = new CssSelectorConstructor();
_value = new CssValueBuilder();
_syncGuard = new object();
_tokenizer = new CssTokenizer(stylesheet.Source, stylesheet.Options.Events);
_started = false;
_sheet = stylesheet;
}
public Task<ICssStyleSheet> ParseAsync()
{
return ParseAsync(CancellationToken.None);
}
public Task<ICssStyleSheet> ParseAsync(CancellationToken cancelToken)
{
lock (_syncGuard) {
if (!_started) {
_started = true;
_task = KernelAsync(cancelToken);
}
}
return _task;
}
public ICssStyleSheet Parse()
{
if (!_started) {
_started = true;
Kernel();
}
return _sheet;
}
internal CssRule CreateUnkownRule(CssToken token)
{
RaiseErrorOccurred(CssParseError.UnknownAtRule, token);
_tokenizer.SkipUnknownRule();
return null;
}
internal CssMediaRule CreateMediaRule()
{
CssToken token = _tokenizer.Get();
MediaList media = ReadMediaList(ref token);
CssMediaRule cssMediaRule = new CssMediaRule(media);
if (token.Type != CssTokenType.CurlyBracketOpen)
return SkipDeclarations<CssMediaRule>(token);
FillRules(cssMediaRule);
return cssMediaRule;
}
internal CssPageRule CreatePageRule()
{
CssToken token = _tokenizer.Get();
CssPageRule cssPageRule = new CssPageRule();
cssPageRule.Selector = ReadSelector(ref token);
if (token.Type != CssTokenType.CurlyBracketOpen)
return SkipDeclarations<CssPageRule>(token);
FillDeclarations(cssPageRule.Style);
return cssPageRule;
}
internal CssFontFaceRule CreateFontFaceRule()
{
CssToken cssToken = _tokenizer.Get();
CssFontFaceRule cssFontFaceRule = new CssFontFaceRule();
if (cssToken.Type != CssTokenType.CurlyBracketOpen)
return SkipDeclarations<CssFontFaceRule>(cssToken);
FillDeclarations(cssFontFaceRule.Style);
return cssFontFaceRule;
}
internal CssSupportsRule CreateSupportsRule()
{
CssToken token = _tokenizer.Get();
CssSupportsRule cssSupportsRule = new CssSupportsRule();
cssSupportsRule.Condition = ReadCondition(ref token);
if (token.Type != CssTokenType.CurlyBracketOpen)
return SkipDeclarations<CssSupportsRule>(token);
FillRules(cssSupportsRule);
return cssSupportsRule;
}
internal CssDocumentRule CreateDocumentRule()
{
CssToken token = _tokenizer.Get();
CssDocumentRule cssDocumentRule = new CssDocumentRule();
cssDocumentRule.Conditions.AddRange(ReadDocumentFunctions(ref token));
if (token.Type != CssTokenType.CurlyBracketOpen)
return SkipDeclarations<CssDocumentRule>(token);
FillRules(cssDocumentRule);
return cssDocumentRule;
}
internal CssKeyframesRule CreateKeyframesRule()
{
CssToken token = _tokenizer.Get();
CssKeyframesRule cssKeyframesRule = new CssKeyframesRule();
cssKeyframesRule.Name = ReadRuleName(ref token);
if (token.Type != CssTokenType.CurlyBracketOpen)
return SkipDeclarations<CssKeyframesRule>(token);
FillKeyframeRules(cssKeyframesRule);
return cssKeyframesRule;
}
internal CssNamespaceRule CreateNamespaceRule()
{
CssToken token = _tokenizer.Get();
CssNamespaceRule cssNamespaceRule = new CssNamespaceRule();
cssNamespaceRule.Prefix = ReadRuleName(ref token);
if (token.Type == CssTokenType.Url)
cssNamespaceRule.NamespaceUri = token.Data;
_tokenizer.JumpToNextSemicolon();
return cssNamespaceRule;
}
internal CssCharsetRule CreateCharsetRule()
{
CssToken cssToken = _tokenizer.Get();
CssCharsetRule cssCharsetRule = new CssCharsetRule();
if (cssToken.Type == CssTokenType.String)
cssCharsetRule.CharacterSet = cssToken.Data;
_tokenizer.JumpToNextSemicolon();
return cssCharsetRule;
}
internal CssImportRule CreateImportRule()
{
CssToken token = _tokenizer.Get();
CssImportRule cssImportRule = new CssImportRule();
if (token.Is(CssTokenType.String, CssTokenType.Url)) {
cssImportRule.Href = token.Data;
token = _tokenizer.Get();
cssImportRule.Media = ReadMediaList(ref token);
}
_tokenizer.JumpToNextSemicolon();
return cssImportRule;
}
private CssRule CreateRule(CssToken token)
{
switch (token.Type) {
case CssTokenType.AtKeyword:
return this.CreateAtRule(token);
case CssTokenType.CurlyBracketOpen:
RaiseErrorOccurred(CssParseError.InvalidBlockStart, token);
_tokenizer.SkipUnknownRule();
return null;
case CssTokenType.String:
case CssTokenType.Url:
case CssTokenType.RoundBracketClose:
case CssTokenType.CurlyBracketClose:
case CssTokenType.SquareBracketClose:
RaiseErrorOccurred(CssParseError.InvalidToken, token);
_tokenizer.SkipUnknownRule();
return null;
default:
return CreateStyleRule(token);
}
}
private CssStyleRule CreateStyleRule(CssToken token)
{
CssStyleRule cssStyleRule = new CssStyleRule();
cssStyleRule.Selector = ReadSelector(ref token);
FillDeclarations(cssStyleRule.Style);
if (cssStyleRule.Selector == null)
return null;
return cssStyleRule;
}
private ISelector ReadSelector(ref CssToken token)
{
_tokenizer.State = CssParseMode.Selector;
_selector.Reset();
CssToken token2 = token;
while (token.IsNot(CssTokenType.Eof, CssTokenType.CurlyBracketOpen, CssTokenType.CurlyBracketClose)) {
_selector.Apply(token);
token = _tokenizer.Get();
}
if (!_selector.IsValid)
RaiseErrorOccurred(CssParseError.InvalidSelector, token2);
_tokenizer.State = CssParseMode.Data;
return _selector.Result;
}
private CssProperty ReadDeclaration(CssStyleDeclaration style, ref CssToken token)
{
if (token.Type == CssTokenType.Ident) {
CssProperty cssProperty = null;
string data = token.Data;
token = _tokenizer.Get();
if (token.Type != CssTokenType.Colon)
RaiseErrorOccurred(CssParseError.ColonMissing, token);
else {
cssProperty = style.CreateProperty(data);
if (cssProperty == null) {
RaiseErrorOccurred(CssParseError.UnknownDeclarationName, token);
cssProperty = new CssUnknownProperty(data, style);
}
CssValue cssValue = ReadValue(ref token);
if (cssValue == null)
RaiseErrorOccurred(CssParseError.ValueMissing, token);
else if (cssProperty.TrySetValue(cssValue)) {
style.SetProperty(cssProperty);
}
cssProperty.IsImportant = _value.IsImportant;
}
_tokenizer.JumpToEndOfDeclaration();
token = _tokenizer.Get();
return cssProperty;
}
RaiseErrorOccurred(CssParseError.IdentExpected, token);
return null;
}
private List<IDocumentFunction> ReadDocumentFunctions(ref CssToken token)
{
List<IDocumentFunction> list = new List<IDocumentFunction>();
do {
IDocumentFunction documentFunction = token.ToDocumentFunction();
if (documentFunction == null)
break;
list.Add(documentFunction);
token = _tokenizer.Get();
} while (token.Type == CssTokenType.Comma);
return list;
}
private CssKeyframeRule CreateKeyframeRule(CssToken token)
{
CssKeyframeRule cssKeyframeRule = new CssKeyframeRule();
cssKeyframeRule.Key = ReadKeyframeSelector(ref token);
if (cssKeyframeRule.Key == null) {
_tokenizer.JumpToEndOfDeclaration();
return null;
}
FillDeclarations(cssKeyframeRule.Style);
return cssKeyframeRule;
}
private KeyframeSelector ReadKeyframeSelector(ref CssToken token)
{
List<Percent> list = new List<Percent>();
while (token.Type != CssTokenType.Eof) {
if (list.Count > 0) {
if (token.Type == CssTokenType.CurlyBracketOpen)
break;
if (token.Type != CssTokenType.Comma)
return null;
token = _tokenizer.Get();
}
if (token.Type == CssTokenType.Percentage)
list.Add(new Percent(((CssUnitToken)token).Value));
else if (token.Type == CssTokenType.Ident && token.Data.Equals(Keywords.From)) {
list.Add(Percent.Zero);
} else {
if (token.Type != CssTokenType.Ident || !token.Data.Equals(Keywords.To))
return null;
list.Add(Percent.Hundred);
}
token = _tokenizer.Get();
}
return new KeyframeSelector(list);
}
private MediaList ReadMediaList(ref CssToken token)
{
MediaList mediaList = new MediaList();
while (token.Type != CssTokenType.Eof) {
CssMedium cssMedium = ReadMediaValue(ref token);
if (cssMedium == null)
break;
mediaList.Add(cssMedium);
if (token.Type != CssTokenType.Comma)
break;
token = _tokenizer.Get();
}
if (token.Type != CssTokenType.CurlyBracketOpen) {
if (token.Type == CssTokenType.RoundBracketClose)
token = _tokenizer.Get();
if (token.Type == CssTokenType.CurlyBracketOpen)
token = _tokenizer.Get();
_tokenizer.JumpToEndOfDeclaration();
token = _tokenizer.Get();
} else if (mediaList.Length == 0) {
_tokenizer.JumpToEndOfDeclaration();
token = _tokenizer.Get();
}
return mediaList;
}
private CssMedium ReadMediaValue(ref CssToken token)
{
CssMedium cssMedium = new CssMedium();
if (token.Type == CssTokenType.Ident) {
string data = token.Data;
if (data.Equals(Keywords.Not, StringComparison.OrdinalIgnoreCase)) {
cssMedium.IsInverse = true;
token = _tokenizer.Get();
} else if (data.Equals(Keywords.Only, StringComparison.OrdinalIgnoreCase)) {
cssMedium.IsExclusive = true;
token = _tokenizer.Get();
}
}
if (token.Type == CssTokenType.Ident) {
cssMedium.Type = token.Data;
token = _tokenizer.Get();
if (token.Type != CssTokenType.Ident || string.Compare(token.Data, Keywords.And, StringComparison.OrdinalIgnoreCase) != 0)
return cssMedium;
token = _tokenizer.Get();
}
do {
if (token.Type != CssTokenType.RoundBracketOpen)
return null;
token = _tokenizer.Get();
if (!TrySetConstraint(cssMedium, ref token) || token.Type != CssTokenType.RoundBracketClose)
return null;
token = _tokenizer.Get();
if (token.Type != CssTokenType.Ident || string.Compare(token.Data, Keywords.And, StringComparison.OrdinalIgnoreCase) != 0)
break;
token = _tokenizer.Get();
} while (token.Type != CssTokenType.Eof);
return cssMedium;
}
private bool TrySetConstraint(CssMedium medium, ref CssToken token)
{
if (token.Type != CssTokenType.Ident) {
_tokenizer.JumpToClosedArguments();
token = _tokenizer.Get();
return false;
}
_value.Reset();
string data = token.Data;
token = _tokenizer.Get();
if (token.Type == CssTokenType.Colon) {
_tokenizer.State = CssParseMode.Value;
token = _tokenizer.Get();
while (token.Type != CssTokenType.RoundBracketClose || !_value.IsReady) {
if (token.Type == CssTokenType.Eof)
return false;
_value.Apply(token);
token = _tokenizer.Get();
}
_tokenizer.State = CssParseMode.Data;
medium.AddConstraint(data, _value.Result);
}
return true;
}
private string ReadRuleName(ref CssToken token)
{
string result = string.Empty;
if (token.Type == CssTokenType.Ident) {
result = token.Data;
token = _tokenizer.Get();
}
return result;
}
private CssValue ReadValue(ref CssToken token)
{
_tokenizer.State = CssParseMode.Value;
_value.Reset();
token = _tokenizer.Get();
while (token.Type != CssTokenType.Eof && !token.Is(CssTokenType.Semicolon, CssTokenType.CurlyBracketClose) && (token.Type != CssTokenType.RoundBracketClose || !_value.IsReady)) {
_value.Apply(token);
token = _tokenizer.Get();
}
_tokenizer.State = CssParseMode.Data;
return _value.Result;
}
private ICondition ReadCondition(ref CssToken token)
{
ICondition condition = ExtractCondition(ref token);
if (condition != null) {
if (token.Data.Equals(Keywords.And, StringComparison.OrdinalIgnoreCase)) {
token = _tokenizer.Get();
List<ICondition> conditions = MultipleConditions(condition, Keywords.And, ref token);
return new AndCondition(conditions);
}
if (token.Data.Equals(Keywords.Or, StringComparison.OrdinalIgnoreCase)) {
token = _tokenizer.Get();
List<ICondition> conditions2 = MultipleConditions(condition, Keywords.Or, ref token);
return new OrCondition(conditions2);
}
}
return condition;
}
private ICondition ExtractCondition(ref CssToken token)
{
ICondition condition = null;
if (token.Type == CssTokenType.RoundBracketOpen) {
token = _tokenizer.Get();
condition = ReadCondition(ref token);
if (condition != null)
condition = new GroupCondition(condition);
else if (token.Type == CssTokenType.Ident) {
condition = DeclarationCondition(ref token);
}
if (token.Type == CssTokenType.RoundBracketClose)
token = _tokenizer.Get();
} else if (token.Data.Equals(Keywords.Not, StringComparison.OrdinalIgnoreCase)) {
token = _tokenizer.Get();
condition = ExtractCondition(ref token);
if (condition != null)
condition = new NotCondition(condition);
}
return condition;
}
private ICondition DeclarationCondition(ref CssToken token)
{
string data = token.Data;
CssStyleDeclaration cssStyleDeclaration = new CssStyleDeclaration((string)null);
CssProperty cssProperty = Factory.Properties.Create(data, cssStyleDeclaration);
if (cssProperty == null)
cssProperty = new CssUnknownProperty(data, cssStyleDeclaration);
token = _tokenizer.Get();
if (token.Type == CssTokenType.Colon) {
CssValue cssValue = ReadValue(ref token);
cssProperty.IsImportant = _value.IsImportant;
if (cssValue != null)
return new DeclarationCondition(cssProperty, cssValue);
}
return null;
}
private List<ICondition> MultipleConditions(ICondition condition, string connector, ref CssToken token)
{
List<ICondition> list = new List<ICondition>();
list.Add(condition);
while (token.Type != CssTokenType.Eof) {
condition = ExtractCondition(ref token);
if (condition == null)
break;
list.Add(condition);
if (!token.Data.Equals(connector, StringComparison.OrdinalIgnoreCase))
break;
token = _tokenizer.Get();
}
return list;
}
private void RaiseErrorOccurred(CssParseError code, CssToken token)
{
_tokenizer.RaiseErrorOccurred(code, token.Position);
}
private T SkipDeclarations<T>(CssToken token)
{
RaiseErrorOccurred(CssParseError.InvalidToken, token);
_tokenizer.SkipUnknownRule();
return default(T);
}
private void Kernel()
{
CssToken cssToken = _tokenizer.Get();
do {
Consume(cssToken);
cssToken = _tokenizer.Get();
} while (cssToken.Type != CssTokenType.Eof);
}
private async Task<ICssStyleSheet> KernelAsync(CancellationToken cancelToken)
{
await _sheet.Source.PrefetchAll(cancelToken).ConfigureAwait(false);
Kernel();
return _sheet;
}
private void Consume(CssToken token)
{
CssRule cssRule = CreateRule(token);
if (cssRule != null)
_sheet.Rules.Add(cssRule, _sheet, null);
}
private void Consume(CssToken token, CssGroupingRule parent)
{
CssRule cssRule = CreateRule(token);
if (cssRule != null)
parent.Rules.Add(cssRule, _sheet, parent);
}
private void FillRules(CssGroupingRule rule)
{
CssToken token = _tokenizer.Get();
while (token.IsNot(CssTokenType.Eof, CssTokenType.CurlyBracketClose)) {
Consume(token, rule);
token = _tokenizer.Get();
}
}
private void FillKeyframeRules(CssKeyframesRule parentRule)
{
CssToken token = _tokenizer.Get();
while (token.IsNot(CssTokenType.Eof, CssTokenType.CurlyBracketClose)) {
CssKeyframeRule cssKeyframeRule = CreateKeyframeRule(token);
if (cssKeyframeRule != null)
parentRule.Rules.Add(cssKeyframeRule, _sheet, parentRule);
token = _tokenizer.Get();
}
}
private void FillDeclarations(CssStyleDeclaration style)
{
CssToken token = _tokenizer.Get();
while (token.IsNot(CssTokenType.Eof, CssTokenType.CurlyBracketClose)) {
ReadDeclaration(style, ref token);
if (token.Type == CssTokenType.Semicolon)
token = _tokenizer.Get();
}
}
public static ISelector ParseSelector(string selectorText)
{
TextSource source = new TextSource(selectorText);
CssTokenizer cssTokenizer = new CssTokenizer(source, null);
cssTokenizer.State = CssParseMode.Selector;
CssSelectorConstructor cssSelectorConstructor = Pool.NewSelectorConstructor();
CssToken cssToken = cssTokenizer.Get();
while (cssToken.Type != CssTokenType.Eof) {
cssSelectorConstructor.Apply(cssToken);
cssToken = cssTokenizer.Get();
}
return cssSelectorConstructor.ToPool();
}
public static IKeyframeSelector ParseKeyframeSelector(string keyText, IConfiguration configuration = null)
{
CssParser cssParser = new CssParser(keyText, configuration ?? Configuration.Default);
CssToken token = cssParser._tokenizer.Get();
KeyframeSelector result = cssParser.ReadKeyframeSelector(ref token);
if (token.Type != CssTokenType.Eof)
return null;
return result;
}
internal static CssValue ParseValue(string valueText, IConfiguration configuration = null)
{
CssParser cssParser = new CssParser(valueText, configuration ?? Configuration.Default);
CssToken token = null;
CssValue result = cssParser.ReadValue(ref token);
if (token.Type != CssTokenType.Eof)
return null;
return result;
}
internal static CssRule ParseRule(string ruleText, IConfiguration configuration = null)
{
CssParser cssParser = new CssParser(ruleText, configuration ?? Configuration.Default);
CssRule result = cssParser.CreateRule(cssParser._tokenizer.Get());
CssToken cssToken = cssParser._tokenizer.Get();
if (cssToken.Type != CssTokenType.Eof)
return null;
return result;
}
internal static CssStyleDeclaration ParseDeclarations(string declarations, IConfiguration configuration = null)
{
CssStyleDeclaration cssStyleDeclaration = new CssStyleDeclaration((string)null);
AppendDeclarations(cssStyleDeclaration, declarations, configuration);
return cssStyleDeclaration;
}
internal static CssProperty ParseDeclaration(string declarationText, IConfiguration configuration = null)
{
CssParser cssParser = new CssParser(declarationText, configuration ?? Configuration.Default);
CssStyleDeclaration style = new CssStyleDeclaration((string)null);
CssToken token = cssParser._tokenizer.Get();
CssProperty result = cssParser.ReadDeclaration(style, ref token);
if (token.Type == CssTokenType.Semicolon)
token = cssParser._tokenizer.Get();
if (token.Type != CssTokenType.Eof)
return null;
return result;
}
internal static List<CssMedium> ParseMediaList(string mediaText, IConfiguration configuration = null)
{
CssParser cssParser = new CssParser(mediaText, configuration);
List<CssMedium> list = new List<CssMedium>();
CssToken token = cssParser._tokenizer.Get();
while (token.Type != CssTokenType.Eof) {
CssMedium cssMedium = cssParser.ReadMediaValue(ref token);
if (cssMedium == null || token.IsNot(CssTokenType.Comma, CssTokenType.Eof))
throw new DomException(DomError.Syntax);
list.Add(cssMedium);
token = cssParser._tokenizer.Get();
}
return list;
}
internal static ICondition ParseCondition(string conditionText, IConfiguration configuration = null)
{
CssParser cssParser = new CssParser(conditionText, configuration ?? Configuration.Default);
CssToken token = cssParser._tokenizer.Get();
ICondition result = cssParser.ReadCondition(ref token);
if (token.Type != CssTokenType.Eof)
return null;
return result;
}
internal static List<IDocumentFunction> ParseDocumentRules(string source, IConfiguration configuration = null)
{
CssParser cssParser = new CssParser(source, configuration);
CssToken token = cssParser._tokenizer.Get();
List<IDocumentFunction> result = cssParser.ReadDocumentFunctions(ref token);
if (token.Type != CssTokenType.Eof)
return null;
return result;
}
internal static CssMedium ParseMedium(string source, IConfiguration configuration = null)
{
CssParser cssParser = new CssParser(source, configuration);
CssToken token = cssParser._tokenizer.Get();
CssMedium result = cssParser.ReadMediaValue(ref token);
if (token.Type != CssTokenType.Eof)
throw new DomException(DomError.Syntax);
return result;
}
internal static CssKeyframeRule ParseKeyframeRule(string ruleText, IConfiguration configuration = null)
{
CssParser cssParser = new CssParser(ruleText, configuration);
CssKeyframeRule result = cssParser.CreateKeyframeRule(cssParser._tokenizer.Get());
CssToken cssToken = cssParser._tokenizer.Get();
if (cssToken.Type != CssTokenType.Eof)
return null;
return result;
}
internal static void AppendDeclarations(CssStyleDeclaration list, string declarations, IConfiguration configuration = null)
{
CssParser cssParser = new CssParser(declarations, configuration ?? Configuration.Default);
cssParser.FillDeclarations(list);
}
}
}