AngleSharp by AngleSharp

<PackageReference Include="AngleSharp" Version="0.9.9.1" />

 CssSelectorConstructor

sealed class CssSelectorConstructor
Class for construction for CSS selectors as specified in http://www.w3.org/html/wg/drafts/html/master/selectors.html.
using AngleSharp.Css; using AngleSharp.Dom; using AngleSharp.Dom.Css; using AngleSharp.Dom.Html; using AngleSharp.Extensions; using AngleSharp.Services; using System; using System.Collections.Generic; namespace AngleSharp.Parser.Css { internal sealed class CssSelectorConstructor { private enum State : byte { Data, Attribute, AttributeOperator, AttributeValue, AttributeEnd, Class, PseudoClass, PseudoElement } private abstract class FunctionState : IDisposable { public bool Finished(CssToken token) { return OnToken(token); } public abstract ISelector Produce(); protected abstract bool OnToken(CssToken token); public virtual void Dispose() { } } private sealed class NotFunctionState : FunctionState { private readonly CssSelectorConstructor _selector; public NotFunctionState(CssSelectorConstructor parent) { _selector = parent.CreateChild(); _selector._nested = true; } protected override bool OnToken(CssToken token) { if (token.Type != CssTokenType.RoundBracketClose || _selector._state != 0) { _selector.Apply(token); return false; } return true; } public override ISelector Produce() { bool isValid = _selector.IsValid; ISelector sel = _selector.GetResult(); if (isValid) { string pseudoClass = PseudoClassNames.Not.CssFunction(sel.Text); return SimpleSelector.PseudoClass((IElement el) => !sel.Match(el), pseudoClass); } return null; } public override void Dispose() { base.Dispose(); _selector.ToPool(); } } private sealed class HasFunctionState : FunctionState { private readonly CssSelectorConstructor _nested; public HasFunctionState(CssSelectorConstructor parent) { _nested = parent.CreateChild(); } protected override bool OnToken(CssToken token) { if (token.Type != CssTokenType.RoundBracketClose || _nested._state != 0) { _nested.Apply(token); return false; } return true; } public override ISelector Produce() { bool isValid = _nested.IsValid; ISelector sel = _nested.GetResult(); if (isValid) { string pseudoClass = PseudoClassNames.Has.CssFunction(sel.Text); return SimpleSelector.PseudoClass((IElement el) => el.ChildNodes.QuerySelector(sel) != null, pseudoClass); } return null; } public override void Dispose() { base.Dispose(); _nested.ToPool(); } } private sealed class MatchesFunctionState : FunctionState { private readonly CssSelectorConstructor _selector; public MatchesFunctionState(CssSelectorConstructor parent) { _selector = parent.CreateChild(); } protected override bool OnToken(CssToken token) { if (token.Type != CssTokenType.RoundBracketClose || _selector._state != 0) { _selector.Apply(token); return false; } return true; } public override ISelector Produce() { bool isValid = _selector.IsValid; ISelector sel = _selector.GetResult(); if (isValid) { string pseudoClass = PseudoClassNames.Matches.CssFunction(sel.Text); return SimpleSelector.PseudoClass((IElement el) => sel.Match(el), pseudoClass); } return null; } public override void Dispose() { base.Dispose(); _selector.ToPool(); } } private sealed class DirFunctionState : FunctionState { private bool _valid; private string _value; public DirFunctionState() { _valid = true; _value = null; } protected override bool OnToken(CssToken token) { if (token.Type == CssTokenType.Ident) _value = token.Data; else { if (token.Type == CssTokenType.RoundBracketClose) return true; if (token.Type != CssTokenType.Whitespace) _valid = false; } return false; } public override ISelector Produce() { if (_valid && _value != null) { string pseudoClass = PseudoClassNames.Dir.CssFunction(_value); return SimpleSelector.PseudoClass(delegate(IElement el) { if (el is IHtmlElement) return _value.Isi(((IHtmlElement)el).Direction); return false; }, pseudoClass); } return null; } } private sealed class LangFunctionState : FunctionState { private bool valid; private string value; public LangFunctionState() { valid = true; value = null; } protected override bool OnToken(CssToken token) { if (token.Type == CssTokenType.Ident) value = token.Data; else { if (token.Type == CssTokenType.RoundBracketClose) return true; if (token.Type != CssTokenType.Whitespace) valid = false; } return false; } public override ISelector Produce() { if (valid && value != null) { string pseudoClass = PseudoClassNames.Lang.CssFunction(value); return SimpleSelector.PseudoClass(delegate(IElement el) { if (el is IHtmlElement) return ((IHtmlElement)el).Language.StartsWith(value, StringComparison.OrdinalIgnoreCase); return false; }, pseudoClass); } return null; } } private sealed class ContainsFunctionState : FunctionState { private bool _valid; private string _value; public ContainsFunctionState() { _valid = true; _value = null; } protected override bool OnToken(CssToken token) { if (token.Type == CssTokenType.Ident || token.Type == CssTokenType.String) _value = token.Data; else { if (token.Type == CssTokenType.RoundBracketClose) return true; if (token.Type != CssTokenType.Whitespace) _valid = false; } return false; } public override ISelector Produce() { if (_valid && _value != null) { string pseudoClass = PseudoClassNames.Contains.CssFunction(_value); return SimpleSelector.PseudoClass((IElement el) => el.TextContent.Contains(_value), pseudoClass); } return null; } } private sealed class HostContextFunctionState : FunctionState { private readonly CssSelectorConstructor _selector; public HostContextFunctionState(CssSelectorConstructor parent) { _selector = parent.CreateChild(); } protected override bool OnToken(CssToken token) { if (token.Type != CssTokenType.RoundBracketClose || _selector._state != 0) { _selector.Apply(token); return false; } return true; } public override ISelector Produce() { bool isValid = _selector.IsValid; ISelector sel = _selector.GetResult(); if (isValid) { string pseudoClass = PseudoClassNames.HostContext.CssFunction(sel.Text); return SimpleSelector.PseudoClass(delegate(IElement el) { for (IElement element = (el.Parent as IShadowRoot)?.Host; element != null; element = element.ParentElement) { if (sel.Match(element)) return true; } return false; }, pseudoClass); } return null; } public override void Dispose() { base.Dispose(); _selector.ToPool(); } } private sealed class ChildFunctionState<T> : FunctionState where T : ChildSelector, ISelector, new { private enum ParseState : byte { Initial, AfterInitialSign, Offset, BeforeOf, AfterOf } private readonly CssSelectorConstructor _parent; private bool _valid; private int _step; private int _offset; private int _sign; private ParseState _state; private CssSelectorConstructor _nested; private bool _allowOf; public ChildFunctionState(CssSelectorConstructor parent, bool withOptionalSelector = true) { _parent = parent; _allowOf = withOptionalSelector; _valid = true; _sign = 1; _state = ParseState.Initial; } public override ISelector Produce() { bool num = !_valid || (_nested != null && !_nested.IsValid); CssSelectorConstructor nested = _nested; ISelector kind = ((nested != null) ? nested.ToPool() : null) ?? SimpleSelector.All; if (num) return null; return new T().With(_step, _offset, kind); } protected override bool OnToken(CssToken token) { switch (_state) { case ParseState.Initial: return OnInitial(token); case ParseState.AfterInitialSign: return OnAfterInitialSign(token); case ParseState.Offset: return OnOffset(token); case ParseState.BeforeOf: return OnBeforeOf(token); default: return OnAfter(token); } } private bool OnAfterInitialSign(CssToken token) { if (token.Type == CssTokenType.Number) return OnOffset(token); if (token.Type == CssTokenType.Dimension) { CssUnitToken cssUnitToken = (CssUnitToken)token; _valid = (_valid && cssUnitToken.Unit.Isi("n") && int.TryParse(token.Data, out _step)); _step *= _sign; _sign = 1; _state = ParseState.Offset; return false; } if (token.Type == CssTokenType.Ident && token.Data.Isi("n")) { _step = _sign; _sign = 1; _state = ParseState.Offset; return false; } if (_state == ParseState.Initial && token.Type == CssTokenType.Ident && token.Data.Isi("-n")) { _step = -1; _state = ParseState.Offset; return false; } _valid = false; return token.Type == CssTokenType.RoundBracketClose; } private bool OnAfter(CssToken token) { if (token.Type != CssTokenType.RoundBracketClose || _nested._state != 0) { _nested.Apply(token); return false; } return true; } private bool OnBeforeOf(CssToken token) { if (token.Type == CssTokenType.Whitespace) return false; if (token.Data.Isi(Keywords.Of)) { _valid = _allowOf; _state = ParseState.AfterOf; _nested = _parent.CreateChild(); return false; } if (token.Type == CssTokenType.RoundBracketClose) return true; _valid = false; return false; } private bool OnOffset(CssToken token) { if (token.Type == CssTokenType.Whitespace) return false; if (token.Type == CssTokenType.Number) { _valid = (_valid && ((CssNumberToken)token).IsInteger && int.TryParse(token.Data, out _offset)); _offset *= _sign; _state = ParseState.BeforeOf; return false; } return OnBeforeOf(token); } private bool OnInitial(CssToken token) { if (token.Type == CssTokenType.Whitespace) return false; if (token.Data.Isi(Keywords.Odd)) { _state = ParseState.BeforeOf; _step = 2; _offset = 1; return false; } if (token.Data.Isi(Keywords.Even)) { _state = ParseState.BeforeOf; _step = 2; _offset = 0; return false; } if (token.Type == CssTokenType.Delim && token.Data.IsOneOf("+", "-")) { _sign = ((!(token.Data == "-")) ? 1 : (-1)); _state = ParseState.AfterInitialSign; return false; } return OnAfterInitialSign(token); } } private static readonly Dictionary<string, Func<CssSelectorConstructor, FunctionState>> pseudoClassFunctions = new Dictionary<string, Func<CssSelectorConstructor, FunctionState>>(StringComparer.OrdinalIgnoreCase) { { PseudoClassNames.NthChild, (CssSelectorConstructor ctx) => new ChildFunctionState<FirstChildSelector>(ctx, true) }, { PseudoClassNames.NthLastChild, (CssSelectorConstructor ctx) => new ChildFunctionState<LastChildSelector>(ctx, true) }, { PseudoClassNames.NthOfType, (CssSelectorConstructor ctx) => new ChildFunctionState<FirstTypeSelector>(ctx, false) }, { PseudoClassNames.NthLastOfType, (CssSelectorConstructor ctx) => new ChildFunctionState<LastTypeSelector>(ctx, false) }, { PseudoClassNames.NthColumn, (CssSelectorConstructor ctx) => new ChildFunctionState<FirstColumnSelector>(ctx, false) }, { PseudoClassNames.NthLastColumn, (CssSelectorConstructor ctx) => new ChildFunctionState<LastColumnSelector>(ctx, false) }, { PseudoClassNames.Not, (CssSelectorConstructor ctx) => new NotFunctionState(ctx) }, { PseudoClassNames.Dir, (CssSelectorConstructor ctx) => new DirFunctionState() }, { PseudoClassNames.Lang, (CssSelectorConstructor ctx) => new LangFunctionState() }, { PseudoClassNames.Contains, (CssSelectorConstructor ctx) => new ContainsFunctionState() }, { PseudoClassNames.Has, (CssSelectorConstructor ctx) => new HasFunctionState(ctx) }, { PseudoClassNames.Matches, (CssSelectorConstructor ctx) => new MatchesFunctionState(ctx) }, { PseudoClassNames.HostContext, (CssSelectorConstructor ctx) => new HostContextFunctionState(ctx) } }; private readonly Stack<CssCombinator> _combinators; private State _state; private ISelector _temp; private ListSelector _group; private ComplexSelector _complex; private string _attrName; private string _attrValue; private string _attrOp; private string _attrNs; private bool _valid; private bool _invoked; private bool _nested; private bool _ready; private IAttributeSelectorFactory _attributeSelector; private IPseudoElementSelectorFactory _pseudoElementSelector; private IPseudoClassSelectorFactory _pseudoClassSelector; public bool IsValid { get { if (_invoked && _valid) return _ready; return false; } } public bool IsNested => _nested; public CssSelectorConstructor(IAttributeSelectorFactory attributeSelector, IPseudoClassSelectorFactory pseudoClassSelector, IPseudoElementSelectorFactory pseudoElementSelector) { _combinators = new Stack<CssCombinator>(); Reset(attributeSelector, pseudoClassSelector, pseudoElementSelector); } public ISelector GetResult() { if (!IsValid) return new UnknownSelector(); if (_complex != null) { _complex.ConcludeSelector(_temp); _temp = _complex; _complex = null; } if (_group == null || _group.Length == 0) return _temp ?? SimpleSelector.All; if (_temp == null && _group.Length == 1) return _group[0]; if (_temp != null) { _group.Add(_temp); _temp = null; } return _group; } public void Apply(CssToken token) { if (token.Type != CssTokenType.Comment) { _invoked = true; switch (_state) { case State.Data: OnData(token); break; case State.Class: OnClass(token); break; case State.Attribute: OnAttribute(token); break; case State.AttributeOperator: OnAttributeOperator(token); break; case State.AttributeValue: OnAttributeValue(token); break; case State.AttributeEnd: OnAttributeEnd(token); break; case State.PseudoClass: OnPseudoClass(token); break; case State.PseudoElement: OnPseudoElement(token); break; default: _valid = false; break; } } } public CssSelectorConstructor Reset(IAttributeSelectorFactory attributeSelector, IPseudoClassSelectorFactory pseudoClassSelector, IPseudoElementSelectorFactory pseudoElementSelector) { _attrName = null; _attrValue = null; _attrNs = null; _attrOp = string.Empty; _state = State.Data; _combinators.Clear(); _temp = null; _group = null; _complex = null; _valid = true; _nested = false; _invoked = false; _ready = true; _attributeSelector = attributeSelector; _pseudoClassSelector = pseudoClassSelector; _pseudoElementSelector = pseudoElementSelector; return this; } private void OnData(CssToken token) { switch (token.Type) { case CssTokenType.SquareBracketOpen: _attrName = null; _attrValue = null; _attrOp = string.Empty; _attrNs = null; _state = State.Attribute; _ready = false; break; case CssTokenType.Colon: _state = State.PseudoClass; _ready = false; break; case CssTokenType.Hash: Insert(SimpleSelector.Id(token.Data)); _ready = true; break; case CssTokenType.Ident: Insert(SimpleSelector.Type(token.Data)); _ready = true; break; case CssTokenType.Whitespace: Insert(CssCombinator.Descendent); break; case CssTokenType.Delim: OnDelim(token); break; case CssTokenType.Comma: InsertOr(); _ready = false; break; default: _valid = false; break; } } private void OnAttribute(CssToken token) { if (token.Type != CssTokenType.Whitespace) { if (token.Type == CssTokenType.Ident || token.Type == CssTokenType.String) { _state = State.AttributeOperator; _attrName = token.Data; } else if (token.Type == CssTokenType.Delim && token.Data.Is(CombinatorSymbols.Pipe)) { _state = State.Attribute; _attrNs = string.Empty; } else if (token.Type == CssTokenType.Delim && token.Data.Is(Keywords.Asterisk)) { _state = State.AttributeOperator; _attrName = token.ToValue(); } else { _state = State.Data; _valid = false; } } } private void OnAttributeOperator(CssToken token) { if (token.Type != CssTokenType.Whitespace) { if (token.Type == CssTokenType.SquareBracketClose) { _state = State.AttributeValue; OnAttributeEnd(token); } else if (token.Type == CssTokenType.Match || token.Type == CssTokenType.Delim) { _state = State.AttributeValue; _attrOp = token.ToValue(); if (_attrOp == CombinatorSymbols.Pipe) { _attrNs = _attrName; _attrName = null; _attrOp = string.Empty; _state = State.Attribute; } } else { _state = State.AttributeEnd; _valid = false; } } } private void OnAttributeValue(CssToken token) { if (token.Type != CssTokenType.Whitespace) { if (token.Type == CssTokenType.Ident || token.Type == CssTokenType.String || token.Type == CssTokenType.Number) { _state = State.AttributeEnd; _attrValue = token.Data; } else { _state = State.Data; _valid = false; } } } private void OnAttributeEnd(CssToken token) { if (token.Type != CssTokenType.Whitespace) { _state = State.Data; _ready = true; if (token.Type == CssTokenType.SquareBracketClose) { ISelector selector = _attributeSelector.Create(_attrOp, _attrName, _attrValue, _attrNs); Insert(selector); } else _valid = false; } } private void OnPseudoClass(CssToken token) { _state = State.Data; _ready = true; if (token.Type == CssTokenType.Colon) _state = State.PseudoElement; else { if (token.Type == CssTokenType.Function) { ISelector pseudoFunction = GetPseudoFunction(token as CssFunctionToken); if (pseudoFunction != null) { Insert(pseudoFunction); return; } } else if (token.Type == CssTokenType.Ident) { ISelector selector = _pseudoClassSelector.Create(token.Data); if (selector != null) { Insert(selector); return; } } _valid = false; } } private void OnPseudoElement(CssToken token) { _state = State.Data; _ready = true; if (token.Type == CssTokenType.Ident) { ISelector selector = _pseudoElementSelector.Create(token.Data); if (selector != null) { _valid = (_valid && !_nested); Insert(selector); return; } } _valid = false; } private void OnClass(CssToken token) { _state = State.Data; _ready = true; if (token.Type == CssTokenType.Ident) Insert(SimpleSelector.Class(token.Data)); else _valid = false; } private void InsertOr() { if (_temp != null) { if (_group == null) _group = new ListSelector(); if (_complex != null) { _complex.ConcludeSelector(_temp); _group.Add(_complex); _complex = null; } else _group.Add(_temp); _temp = null; } } private void Insert(ISelector selector) { if (_temp != null) { if (_combinators.Count == 0) { CompoundSelector compoundSelector = _temp as CompoundSelector; if (compoundSelector == null) compoundSelector = new CompoundSelector { _temp }; compoundSelector.Add(selector); _temp = compoundSelector; } else { if (_complex == null) _complex = new ComplexSelector(); CssCombinator combinator = GetCombinator(); _complex.AppendSelector(_temp, combinator); _temp = selector; } } else { _combinators.Clear(); _temp = selector; } } private CssCombinator GetCombinator() { while (_combinators.Count > 1 && _combinators.Peek() == CssCombinator.Descendent) { _combinators.Pop(); } if (_combinators.Count > 1) { CssCombinator cssCombinator = _combinators.Pop(); CssCombinator cssCombinator2 = _combinators.Pop(); if (cssCombinator == CssCombinator.Child && cssCombinator2 == CssCombinator.Child) { if (_combinators.Count == 0 || _combinators.Peek() != CssCombinator.Child) cssCombinator = CssCombinator.Descendent; else if (_combinators.Pop() == CssCombinator.Child) { cssCombinator = CssCombinator.Deep; } } else if (cssCombinator == CssCombinator.Namespace && cssCombinator2 == CssCombinator.Namespace) { cssCombinator = CssCombinator.Column; } else { _combinators.Push(cssCombinator2); } while (_combinators.Count > 0) { _valid = (_combinators.Pop() == CssCombinator.Descendent && _valid); } return cssCombinator; } return _combinators.Pop(); } private void Insert(CssCombinator cssCombinator) { _combinators.Push(cssCombinator); } private void OnDelim(CssToken token) { switch (token.Data[0]) { case ',': InsertOr(); _ready = false; break; case '>': Insert(CssCombinator.Child); _ready = false; break; case '+': Insert(CssCombinator.AdjacentSibling); _ready = false; break; case '~': Insert(CssCombinator.Sibling); _ready = false; break; case '*': Insert(SimpleSelector.All); _ready = true; break; case '.': _state = State.Class; _ready = false; break; case '|': if (_combinators.Count > 0 && _combinators.Peek() == CssCombinator.Descendent) Insert(SimpleSelector.Type(string.Empty)); Insert(CssCombinator.Namespace); _ready = false; break; default: _valid = false; break; } } private ISelector GetPseudoFunction(CssFunctionToken arguments) { Func<CssSelectorConstructor, FunctionState> value = null; if (pseudoClassFunctions.TryGetValue(arguments.Data, out value)) { using (FunctionState functionState = value(this)) { _ready = false; foreach (CssToken argument in arguments) { if (functionState.Finished(argument)) { ISelector result = functionState.Produce(); if (_nested && functionState is NotFunctionState) result = null; _ready = true; return result; } } } } return null; } private CssSelectorConstructor CreateChild() { CssSelectorConstructor cssSelectorConstructor = Pool.NewSelectorConstructor(_attributeSelector, _pseudoClassSelector, _pseudoElementSelector); cssSelectorConstructor._invoked = true; return cssSelectorConstructor; } } }