AngleSharp by AngleSharp

<PackageReference Include="AngleSharp" Version="0.9.4" />

 CssSelectorConstructor

sealed class CssSelectorConstructor
Class for construction for CSS selectors as specified in http://www.w3.org/html/wg/drafts/html/master/selectors.html.
using AngleSharp.Css; using AngleSharp.Dom; using AngleSharp.Dom.Css; using AngleSharp.Dom.Html; using AngleSharp.Extensions; using System; using System.Collections.Generic; using System.Diagnostics; namespace AngleSharp.Parser.Css { [DebuggerStepThrough] internal sealed class CssSelectorConstructor { private enum State { Data, Attribute, AttributeOperator, AttributeValue, AttributeEnd, Class, PseudoClass, PseudoElement } private abstract class FunctionState { public bool Finished(CssToken token) { return OnToken(token); } public abstract ISelector Produce(); protected abstract bool OnToken(CssToken token); } private sealed class NotFunctionState : FunctionState { private readonly CssSelectorConstructor _nested; public NotFunctionState() { _nested = Pool.NewSelectorConstructor(); _nested.IsNested = true; } protected override bool OnToken(CssToken token) { if (token.Type != CssTokenType.RoundBracketClose || _nested._state != 0) { _nested.Apply(token); return false; } return true; } public override ISelector Produce() { bool isValid = _nested.IsValid; ISelector sel = _nested.ToPool(); if (!isValid) return null; return SimpleSelector.PseudoClass((IElement el) => !sel.Match(el), PseudoClassNames.Not + "(" + sel.Text + ")"); } } private sealed class HasFunctionState : FunctionState { private readonly CssSelectorConstructor _nested; public HasFunctionState() { _nested = Pool.NewSelectorConstructor(); } protected override bool OnToken(CssToken token) { if (token.Type != CssTokenType.RoundBracketClose || _nested._state != 0) { _nested.Apply(token); return false; } return true; } public override ISelector Produce() { bool isValid = _nested.IsValid; ISelector sel = _nested.ToPool(); if (!isValid) return null; return SimpleSelector.PseudoClass((IElement el) => el.ChildNodes.QuerySelector(sel) != null, PseudoClassNames.Has + "(" + sel.Text + ")"); } } private sealed class MatchesFunctionState : FunctionState { private readonly CssSelectorConstructor _nested; public MatchesFunctionState() { _nested = Pool.NewSelectorConstructor(); } protected override bool OnToken(CssToken token) { if (token.Type != CssTokenType.RoundBracketClose || _nested._state != 0) { _nested.Apply(token); return false; } return true; } public override ISelector Produce() { bool isValid = _nested.IsValid; ISelector sel = _nested.ToPool(); if (!isValid) return null; return SimpleSelector.PseudoClass((IElement el) => sel.Match(el), PseudoClassNames.Matches + "(" + sel.Text + ")"); } } private sealed class DirFunctionState : FunctionState { private bool valid; private string value; public DirFunctionState() { valid = true; value = null; } protected override bool OnToken(CssToken token) { if (token.Type == CssTokenType.Ident) value = token.Data; else { if (token.Type == CssTokenType.RoundBracketClose) return true; if (token.Type != CssTokenType.Whitespace) valid = false; } return false; } public override ISelector Produce() { if (!valid || value == null) return null; string pseudoClass = PseudoClassNames.Dir + "(" + value + ")"; return SimpleSelector.PseudoClass(delegate(IElement el) { if (el is IHtmlElement) return value.Isi(((IHtmlElement)el).Direction); return false; }, pseudoClass); } } private sealed class LangFunctionState : FunctionState { private bool valid; private string value; public LangFunctionState() { valid = true; value = null; } protected override bool OnToken(CssToken token) { if (token.Type == CssTokenType.Ident) value = token.Data; else { if (token.Type == CssTokenType.RoundBracketClose) return true; if (token.Type != CssTokenType.Whitespace) valid = false; } return false; } public override ISelector Produce() { if (!valid || value == null) return null; string pseudoClass = PseudoClassNames.Lang + "(" + value + ")"; return SimpleSelector.PseudoClass(delegate(IElement el) { if (el is IHtmlElement) return ((IHtmlElement)el).Language.StartsWith(value, StringComparison.OrdinalIgnoreCase); return false; }, pseudoClass); } } private sealed class ContainsFunctionState : FunctionState { private bool valid; private string value; public ContainsFunctionState() { valid = true; value = null; } protected override bool OnToken(CssToken token) { if (token.Type == CssTokenType.Ident || token.Type == CssTokenType.String) value = token.Data; else { if (token.Type == CssTokenType.RoundBracketClose) return true; if (token.Type != CssTokenType.Whitespace) valid = false; } return false; } public override ISelector Produce() { if (!valid || value == null) return null; string pseudoClass = PseudoClassNames.Contains + "(" + value + ")"; return SimpleSelector.PseudoClass((IElement el) => el.TextContent.Contains(value), pseudoClass); } } private sealed class HostContextFunctionState : FunctionState { private readonly CssSelectorConstructor _nested; public HostContextFunctionState() { _nested = Pool.NewSelectorConstructor(); } protected override bool OnToken(CssToken token) { if (token.Type != CssTokenType.RoundBracketClose || _nested._state != 0) { _nested.Apply(token); return false; } return true; } public override ISelector Produce() { bool isValid = _nested.IsValid; ISelector sel = _nested.ToPool(); if (!isValid) return null; return SimpleSelector.PseudoClass(delegate(IElement el) { IElement element = null; IShadowRoot shadowRoot = el.Parent as IShadowRoot; if (shadowRoot != null) element = shadowRoot.Host; while (element != null) { if (sel.Match(element)) return true; element = element.ParentElement; } return false; }, PseudoClassNames.HostContext + "(" + sel.Text + ")"); } } private sealed class ChildFunctionState<T> : FunctionState where T : ChildSelector, ISelector, new { private enum ParseState { Initial, AfterInitialSign, Offset, BeforeOf, AfterOf } private bool valid; private int step; private int offset; private int sign; private ParseState state; private CssSelectorConstructor nested; private bool allowOf; public ChildFunctionState(bool withOptionalSelector = true) { allowOf = withOptionalSelector; valid = true; sign = 1; state = ParseState.Initial; } public override ISelector Produce() { bool flag = !valid || (nested != null && !nested.IsValid); ISelector kind = (nested != null) ? nested.ToPool() : SimpleSelector.All; if (flag) return null; T val = new T(); return val.With(step, offset, kind); } protected override bool OnToken(CssToken token) { switch (state) { case ParseState.Initial: return OnInitial(token); case ParseState.AfterInitialSign: return OnAfterInitialSign(token); case ParseState.Offset: return OnOffset(token); case ParseState.BeforeOf: return OnBeforeOf(token); default: return OnAfter(token); } } private bool OnAfterInitialSign(CssToken token) { if (token.Type == CssTokenType.Number) return OnOffset(token); if (token.Type == CssTokenType.Dimension) { CssUnitToken cssUnitToken = (CssUnitToken)token; valid = (valid && cssUnitToken.Unit.Isi("n") && int.TryParse(token.Data, out step)); step *= sign; sign = 1; state = ParseState.Offset; return false; } if (token.Type == CssTokenType.Ident && token.Data.Isi("n")) { step = sign; sign = 1; state = ParseState.Offset; return false; } if (state == ParseState.Initial && token.Type == CssTokenType.Ident && token.Data.Isi("-n")) { step = -1; state = ParseState.Offset; return false; } valid = false; return token.Type == CssTokenType.RoundBracketClose; } private bool OnAfter(CssToken token) { if (token.Type != CssTokenType.RoundBracketClose || nested._state != 0) { nested.Apply(token); return false; } return true; } private bool OnBeforeOf(CssToken token) { if (token.Type == CssTokenType.Whitespace) return false; if (token.Data.Isi("of")) { valid = allowOf; state = ParseState.AfterOf; nested = Pool.NewSelectorConstructor(); return false; } if (token.Type == CssTokenType.RoundBracketClose) return true; valid = false; return false; } private bool OnOffset(CssToken token) { if (token.Type == CssTokenType.Whitespace) return false; if (token.Type == CssTokenType.Number) { valid = (valid && ((CssNumberToken)token).IsInteger && int.TryParse(token.Data, out offset)); offset *= sign; state = ParseState.BeforeOf; return false; } return OnBeforeOf(token); } private bool OnInitial(CssToken token) { if (token.Type == CssTokenType.Whitespace) return false; if (token.Data.Isi("odd")) { state = ParseState.BeforeOf; step = 2; offset = 1; return false; } if (token.Data.Isi("even")) { state = ParseState.BeforeOf; step = 2; offset = 0; return false; } if (token.Type == CssTokenType.Delim && (token.Data == "+" || token.Data == "-")) { sign = ((!(token.Data == "-")) ? 1 : (-1)); state = ParseState.AfterInitialSign; return false; } return OnAfterInitialSign(token); } } private static readonly Dictionary<string, Func<FunctionState>> pseudoClassFunctions = new Dictionary<string, Func<FunctionState>>(StringComparer.OrdinalIgnoreCase) { { PseudoClassNames.NthChild, () => new ChildFunctionState<FirstChildSelector>(true) }, { PseudoClassNames.NthLastChild, () => new ChildFunctionState<LastChildSelector>(true) }, { PseudoClassNames.NthOfType, () => new ChildFunctionState<FirstTypeSelector>(false) }, { PseudoClassNames.NthLastOfType, () => new ChildFunctionState<LastTypeSelector>(false) }, { PseudoClassNames.NthColumn, () => new ChildFunctionState<FirstColumnSelector>(false) }, { PseudoClassNames.NthLastColumn, () => new ChildFunctionState<LastColumnSelector>(false) }, { PseudoClassNames.Not, () => new NotFunctionState() }, { PseudoClassNames.Dir, () => new DirFunctionState() }, { PseudoClassNames.Lang, () => new LangFunctionState() }, { PseudoClassNames.Contains, () => new ContainsFunctionState() }, { PseudoClassNames.Has, () => new HasFunctionState() }, { PseudoClassNames.Matches, () => new MatchesFunctionState() }, { PseudoClassNames.HostContext, () => new HostContextFunctionState() } }; private readonly Stack<CssCombinator> _combinators; private State _state; private ISelector _temp; private ListSelector _group; private ComplexSelector _complex; private string _attrName; private string _attrValue; private string _attrOp; private string _attrNs; private bool _valid; private bool _ready; public bool IsValid { get { if (_valid) return _ready; return false; } } public bool IsNested { get; set; } public CssSelectorConstructor() { _combinators = new Stack<CssCombinator>(); Reset(); } public ISelector GetResult() { if (!IsValid) return new UnknownSelector(); if (_complex != null) { _complex.ConcludeSelector(_temp); _temp = _complex; _complex = null; } if (_group == null || _group.Length == 0) return _temp ?? SimpleSelector.All; if (_temp == null && _group.Length == 1) return _group[0]; if (_temp != null) { _group.Add(_temp); _temp = null; } return _group; } public void Apply(CssToken token) { if (token.Type != CssTokenType.Comment) { switch (_state) { case State.Data: OnData(token); break; case State.Class: OnClass(token); break; case State.Attribute: OnAttribute(token); break; case State.AttributeOperator: OnAttributeOperator(token); break; case State.AttributeValue: OnAttributeValue(token); break; case State.AttributeEnd: OnAttributeEnd(token); break; case State.PseudoClass: OnPseudoClass(token); break; case State.PseudoElement: OnPseudoElement(token); break; default: _valid = false; break; } } } public CssSelectorConstructor Reset() { _attrName = null; _attrValue = null; _attrNs = null; _attrOp = string.Empty; _state = State.Data; _combinators.Clear(); _temp = null; _group = null; _complex = null; _valid = true; IsNested = false; _ready = true; return this; } private void OnData(CssToken token) { switch (token.Type) { case CssTokenType.SquareBracketOpen: _attrName = null; _attrValue = null; _attrOp = string.Empty; _attrNs = null; _state = State.Attribute; _ready = false; break; case CssTokenType.Colon: _state = State.PseudoClass; _ready = false; break; case CssTokenType.Hash: Insert(SimpleSelector.Id(token.Data)); _ready = true; break; case CssTokenType.Ident: Insert(SimpleSelector.Type(token.Data)); _ready = true; break; case CssTokenType.Whitespace: Insert(CssCombinator.Descendent); break; case CssTokenType.Delim: OnDelim(token); break; case CssTokenType.Comma: InsertOr(); _ready = false; break; default: _valid = false; break; } } private void OnAttribute(CssToken token) { if (token.Type != CssTokenType.Whitespace) { if (token.Type == CssTokenType.Ident || token.Type == CssTokenType.String) { _state = State.AttributeOperator; _attrName = token.Data; } else if (token.Type == CssTokenType.Delim && token.ToValue() == "|") { _state = State.Attribute; _attrNs = string.Empty; } else if (token.Type == CssTokenType.Delim && token.ToValue() == "*") { _state = State.AttributeOperator; _attrName = token.ToValue(); } else { _state = State.Data; _valid = false; } } } private void OnAttributeOperator(CssToken token) { if (token.Type != CssTokenType.Whitespace) { if (token.Type == CssTokenType.SquareBracketClose) { _state = State.AttributeValue; OnAttributeEnd(token); } else if (token.Type == CssTokenType.Match || token.Type == CssTokenType.Delim) { _state = State.AttributeValue; _attrOp = token.ToValue(); if (_attrOp == "|") { _attrNs = _attrName; _attrName = null; _attrOp = string.Empty; _state = State.Attribute; } } else { _state = State.AttributeEnd; _valid = false; } } } private void OnAttributeValue(CssToken token) { if (token.Type != CssTokenType.Whitespace) { if (token.Type == CssTokenType.Ident || token.Type == CssTokenType.String || token.Type == CssTokenType.Number) { _state = State.AttributeEnd; _attrValue = token.Data; } else { _state = State.Data; _valid = false; } } } private void OnAttributeEnd(CssToken token) { if (token.Type != CssTokenType.Whitespace) { _state = State.Data; _ready = true; if (token.Type == CssTokenType.SquareBracketClose) { SimpleSelector selector = CreateAttrSelector(); Insert(selector); } else _valid = false; } } private SimpleSelector CreateAttrSelector() { switch (_attrOp) { case "=": return SimpleSelector.AttrMatch(_attrName, _attrValue, _attrNs); case "~=": return SimpleSelector.AttrList(_attrName, _attrValue, _attrNs); case "|=": return SimpleSelector.AttrHyphen(_attrName, _attrValue, _attrNs); case "^=": return SimpleSelector.AttrBegins(_attrName, _attrValue, _attrNs); case "$=": return SimpleSelector.AttrEnds(_attrName, _attrValue, _attrNs); case "*=": return SimpleSelector.AttrContains(_attrName, _attrValue, _attrNs); case "!=": return SimpleSelector.AttrNotMatch(_attrName, _attrValue, _attrNs); default: return SimpleSelector.AttrAvailable(_attrName, _attrNs); } } private void OnPseudoClass(CssToken token) { _state = State.Data; _ready = true; if (token.Type == CssTokenType.Colon) _state = State.PseudoElement; else { if (token.Type == CssTokenType.Function) { ISelector pseudoFunction = GetPseudoFunction(token as CssFunctionToken); if (pseudoFunction != null) { Insert(pseudoFunction); return; } } else if (token.Type == CssTokenType.Ident) { ISelector selector = Factory.PseudoClassSelector.Create(token.Data); if (selector != null) { Insert(selector); return; } } _valid = false; } } private void OnPseudoElement(CssToken token) { _state = State.Data; _ready = true; if (token.Type == CssTokenType.Ident) { ISelector selector = Factory.PseudoElementSelector.Create(token.Data); if (selector != null) { _valid = (_valid && !IsNested); Insert(selector); return; } } _valid = false; } private void OnClass(CssToken token) { _state = State.Data; _ready = true; if (token.Type == CssTokenType.Ident) Insert(SimpleSelector.Class(token.Data)); else _valid = false; } private void InsertOr() { if (_temp != null) { if (_group == null) _group = new ListSelector(); if (_complex != null) { _complex.ConcludeSelector(_temp); _group.Add(_complex); _complex = null; } else _group.Add(_temp); _temp = null; } } private void Insert(ISelector selector) { if (_temp != null) { if (_combinators.Count == 0) { CompoundSelector compoundSelector = _temp as CompoundSelector; if (compoundSelector == null) { compoundSelector = new CompoundSelector(); compoundSelector.Add(_temp); } compoundSelector.Add(selector); _temp = compoundSelector; } else { if (_complex == null) _complex = new ComplexSelector(); CssCombinator combinator = GetCombinator(); _complex.AppendSelector(_temp, combinator); _temp = selector; } } else { _combinators.Clear(); _temp = selector; } } private CssCombinator GetCombinator() { while (_combinators.Count > 1 && _combinators.Peek() == CssCombinator.Descendent) { _combinators.Pop(); } if (_combinators.Count > 1) { CssCombinator cssCombinator = _combinators.Pop(); CssCombinator cssCombinator2 = _combinators.Pop(); if (cssCombinator == CssCombinator.Child && cssCombinator2 == CssCombinator.Child) { if (_combinators.Count == 0 || _combinators.Peek() != CssCombinator.Child) cssCombinator = CssCombinator.Descendent; else if (_combinators.Pop() == CssCombinator.Child) { cssCombinator = CssCombinator.Deep; } } else if (cssCombinator == CssCombinator.Namespace && cssCombinator2 == CssCombinator.Namespace) { cssCombinator = CssCombinator.Column; } else { _combinators.Push(cssCombinator2); } while (_combinators.Count > 0) { _valid = (_combinators.Pop() == CssCombinator.Descendent && _valid); } return cssCombinator; } return _combinators.Pop(); } private void Insert(CssCombinator cssCombinator) { _combinators.Push(cssCombinator); } private void OnDelim(CssToken token) { switch (token.Data[0]) { case ',': InsertOr(); _ready = false; break; case '>': Insert(CssCombinator.Child); _ready = false; break; case '+': Insert(CssCombinator.AdjacentSibling); _ready = false; break; case '~': Insert(CssCombinator.Sibling); _ready = false; break; case '*': Insert(SimpleSelector.All); _ready = true; break; case '.': _state = State.Class; _ready = false; break; case '|': if (_combinators.Count > 0 && _combinators.Peek() == CssCombinator.Descendent) Insert(SimpleSelector.Type(string.Empty)); Insert(CssCombinator.Namespace); _ready = false; break; default: _valid = false; break; } } private ISelector GetPseudoFunction(CssFunctionToken arguments) { Func<FunctionState> value = null; if (pseudoClassFunctions.TryGetValue(arguments.Data, out value)) { FunctionState functionState = value(); _ready = false; foreach (CssToken argument in arguments) { if (functionState.Finished(argument)) { ISelector result = functionState.Produce(); if (IsNested && functionState is NotFunctionState) result = null; _ready = true; return result; } } } return null; } } }