AngleSharp by AngleSharp

<PackageReference Include="AngleSharp" Version="0.9.8" />

 XmlDomBuilder

sealed class XmlDomBuilder
Represents the Tree construction as specified in the official W3C specification for XML: http://www.w3.org/TR/REC-xml/
using AngleSharp.Dom; using AngleSharp.Dom.Xml; using AngleSharp.Extensions; using AngleSharp.Services; using AngleSharp.Xml; using System; using System.Collections.Generic; using System.Text; using System.Threading; using System.Threading.Tasks; namespace AngleSharp.Parser.Xml { internal sealed class XmlDomBuilder { private readonly XmlTokenizer _tokenizer; private readonly Document _document; private readonly List<Element> _openElements; private readonly Func<Document, string, string, Element> _creator; private XmlParserOptions _options; private XmlTreeMode _currentMode; private bool _standalone; public Node CurrentNode { get { if (_openElements.Count > 0) return _openElements[_openElements.Count - 1]; return _document; } } internal XmlDomBuilder(Document document, Func<Document, string, string, Element> creator = null) { IEntityProvider resolver = document.Options.GetProvider<IEntityProvider>() ?? XmlEntityService.Resolver; _tokenizer = new XmlTokenizer(document.Source, resolver); _document = document; _standalone = false; _openElements = new List<Element>(); _currentMode = XmlTreeMode.Initial; _creator = (creator ?? new Func<Document, string, string, Element>(CreateElement)); } public async Task<Document> ParseAsync(XmlParserOptions options, CancellationToken cancelToken) { TextSource source = _document.Source; _options = options; XmlToken xmlToken; do { if (source.Length - source.Index < 1024) await source.PrefetchAsync(8192, cancelToken).ConfigureAwait(false); xmlToken = _tokenizer.Get(); Consume(xmlToken); } while (xmlToken.Type != XmlTokenType.EndOfFile); return _document; } public Document Parse(XmlParserOptions options) { XmlToken xmlToken = null; do { xmlToken = _tokenizer.Get(); Consume(xmlToken); } while (xmlToken.Type != XmlTokenType.EndOfFile); return _document; } private void Consume(XmlToken token) { switch (_currentMode) { case XmlTreeMode.Initial: Initial(token); break; case XmlTreeMode.Prolog: BeforeDoctype(token); break; case XmlTreeMode.Misc: InMisc(token); break; case XmlTreeMode.Body: InBody(token); break; case XmlTreeMode.After: AfterBody(token); break; } } private void Initial(XmlToken token) { if (token.Type == XmlTokenType.Declaration) { XmlDeclarationToken xmlDeclarationToken = (XmlDeclarationToken)token; _standalone = xmlDeclarationToken.Standalone; if (!xmlDeclarationToken.IsEncodingMissing) SetEncoding(xmlDeclarationToken.Encoding); if (!CheckVersion(xmlDeclarationToken.Version) && !_options.IsSuppressingErrors) throw XmlParseError.XmlDeclarationVersionUnsupported.At(token.Position); } else { _currentMode = XmlTreeMode.Prolog; BeforeDoctype(token); } } private void BeforeDoctype(XmlToken token) { if (token.Type == XmlTokenType.Doctype) { XmlDoctypeToken xmlDoctypeToken = (XmlDoctypeToken)token; DocumentType child = new DocumentType(_document, xmlDoctypeToken.Name) { SystemIdentifier = xmlDoctypeToken.SystemIdentifier, PublicIdentifier = xmlDoctypeToken.PublicIdentifier }; _document.AppendChild(child); _currentMode = XmlTreeMode.Misc; } else InMisc(token); } private void InMisc(XmlToken token) { switch (token.Type) { case XmlTokenType.Comment: { XmlCommentToken xmlCommentToken = (XmlCommentToken)token; IComment child2 = _document.CreateComment(xmlCommentToken.Data); CurrentNode.AppendChild(child2); break; } case XmlTokenType.ProcessingInstruction: { XmlPIToken xmlPIToken = (XmlPIToken)token; IProcessingInstruction child = _document.CreateProcessingInstruction(xmlPIToken.Target, xmlPIToken.Content); CurrentNode.AppendChild(child); break; } case XmlTokenType.StartTag: _currentMode = XmlTreeMode.Body; InBody(token); break; default: if (!token.IsIgnorable && !_options.IsSuppressingErrors) throw XmlParseError.XmlMissingRoot.At(token.Position); break; } } private void InBody(XmlToken token) { switch (token.Type) { case XmlTokenType.CharacterReference: break; case XmlTokenType.StartTag: { XmlTagToken xmlTagToken2 = (XmlTagToken)token; Element element = _creator(_document, xmlTagToken2.Name, null); CurrentNode.AppendChild(element); if (!xmlTagToken2.IsSelfClosing) _openElements.Add(element); else if (_openElements.Count == 0) { _currentMode = XmlTreeMode.After; } for (int i = 0; i < xmlTagToken2.Attributes.Count; i++) { KeyValuePair<string, string> keyValuePair = xmlTagToken2.Attributes[i]; string key = keyValuePair.Key; keyValuePair = xmlTagToken2.Attributes[i]; string value = keyValuePair.Value.Trim(); element.SetAttribute(key, value); } break; } case XmlTokenType.EndTag: { XmlTagToken xmlTagToken = (XmlTagToken)token; if (!CurrentNode.NodeName.Is(xmlTagToken.Name)) { if (!_options.IsSuppressingErrors) throw XmlParseError.TagClosingMismatch.At(token.Position); } else { _openElements.RemoveAt(_openElements.Count - 1); if (_openElements.Count == 0) _currentMode = XmlTreeMode.After; } break; } case XmlTokenType.Comment: case XmlTokenType.ProcessingInstruction: InMisc(token); break; case XmlTokenType.CData: { XmlCDataToken xmlCDataToken = (XmlCDataToken)token; CurrentNode.AppendText(xmlCDataToken.Data); break; } case XmlTokenType.Character: { XmlCharacterToken xmlCharacterToken = (XmlCharacterToken)token; CurrentNode.AppendText(xmlCharacterToken.Data); break; } case XmlTokenType.EndOfFile: if (!_options.IsSuppressingErrors) throw XmlParseError.EOF.At(token.Position); break; case XmlTokenType.Doctype: if (!_options.IsSuppressingErrors) throw XmlParseError.XmlDoctypeAfterContent.At(token.Position); break; case XmlTokenType.Declaration: if (!_options.IsSuppressingErrors) throw XmlParseError.XmlDeclarationMisplaced.At(token.Position); break; } } private void AfterBody(XmlToken token) { switch (token.Type) { case XmlTokenType.EndOfFile: break; case XmlTokenType.Comment: case XmlTokenType.ProcessingInstruction: InMisc(token); break; default: if (!token.IsIgnorable && !_options.IsSuppressingErrors) throw XmlParseError.XmlMissingRoot.At(token.Position); break; } } private static Element CreateElement(Document document, string name, string prefix) { return new XmlElement(document, name, prefix); } private bool CheckVersion(string ver) { double num = ver.ToDouble(0); if (num >= 1) return num < 2; return false; } private void SetEncoding(string charSet) { if (TextEncoding.IsSupported(charSet)) { Encoding encoding = TextEncoding.Resolve(charSet); if (encoding != null) try { _document.Source.CurrentEncoding = encoding; } catch (NotSupportedException) { _currentMode = XmlTreeMode.Initial; _document.ReplaceAll(null, true); _openElements.Clear(); } } } } }