AngleSharp by Florian Rappl

<PackageReference Include="AngleSharp" Version="0.8.4.1" />

 XmlParser

public sealed class XmlParser
For more details: See the W3C Recommendation http://www.w3.org/TR/REC-xml/ and a little bit about XML parser (XHTML context) http://www.w3.org/html/wg/drafts/html/master/the-xhtml-syntax.html#xml-parser.
using AngleSharp.Dom; using AngleSharp.Dom.Xml; using AngleSharp.Extensions; using System; using System.Collections.Generic; using System.Diagnostics; using System.Globalization; using System.IO; using System.Text; using System.Threading; using System.Threading.Tasks; namespace AngleSharp.Parser.Xml { [DebuggerStepThrough] public sealed class XmlParser { private readonly XmlTokenizer _tokenizer; private readonly Document _document; private readonly List<Element> _openElements; private readonly object _syncGuard; private bool _started; private XmlTreeMode _currentMode; private Task<IDocument> _parsing; private bool _standalone; internal Node CurrentNode { get { if (_openElements.Count > 0) return _openElements[_openElements.Count - 1]; return _document; } } public IDocument Result => _document; public bool IsStandalone => _standalone; public bool IsAsync => _parsing != null; public XmlParser(string source, IConfiguration configuration = null) : this(new XmlDocument(BrowsingContext.New(configuration), new TextSource(source))) { } public XmlParser(Stream stream, IConfiguration configuration = null) : this(new XmlDocument(BrowsingContext.New(configuration), new TextSource(stream, configuration.DefaultEncoding()))) { } internal XmlParser(Document document) { _tokenizer = new XmlTokenizer(document.Source, document.Options.Events); _syncGuard = new object(); _started = false; _document = document; _standalone = false; _openElements = new List<Element>(); _currentMode = XmlTreeMode.Initial; } public Task<IDocument> ParseAsync() { return ParseAsync(CancellationToken.None); } public Task<IDocument> ParseAsync(CancellationToken cancelToken) { lock (_syncGuard) { if (!_started) { _started = true; _parsing = KernelAsync(cancelToken); } } return _parsing; } public IDocument Parse() { lock (_syncGuard) { if (!_started) { _started = true; Kernel(); } } return _document; } private void Consume(XmlToken token) { switch (_currentMode) { case XmlTreeMode.Initial: Initial(token); break; case XmlTreeMode.Prolog: BeforeDoctype(token); break; case XmlTreeMode.Misc: InMisc(token); break; case XmlTreeMode.Body: InBody(token); break; case XmlTreeMode.After: AfterBody(token); break; } } private void Initial(XmlToken token) { if (token.Type == XmlTokenType.Declaration) { XmlDeclarationToken xmlDeclarationToken = (XmlDeclarationToken)token; _standalone = xmlDeclarationToken.Standalone; if (!xmlDeclarationToken.IsEncodingMissing) SetEncoding(xmlDeclarationToken.Encoding); if (!CheckVersion(xmlDeclarationToken.Version)) throw XmlParseError.XmlDeclarationVersionUnsupported.At(token.Position); } else { _currentMode = XmlTreeMode.Prolog; BeforeDoctype(token); } } private void BeforeDoctype(XmlToken token) { if (token.Type == XmlTokenType.Doctype) { XmlDoctypeToken xmlDoctypeToken = (XmlDoctypeToken)token; _document.AppendChild(new DocumentType(_document, xmlDoctypeToken.Name) { SystemIdentifier = xmlDoctypeToken.SystemIdentifier, PublicIdentifier = xmlDoctypeToken.PublicIdentifier }); _currentMode = XmlTreeMode.Misc; } else InMisc(token); } private void InMisc(XmlToken token) { switch (token.Type) { case XmlTokenType.Comment: { XmlCommentToken xmlCommentToken = (XmlCommentToken)token; IComment child = _document.CreateComment(xmlCommentToken.Data); CurrentNode.AppendChild(child); break; } case XmlTokenType.ProcessingInstruction: { XmlPIToken xmlPIToken = (XmlPIToken)token; IProcessingInstruction child2 = _document.CreateProcessingInstruction(xmlPIToken.Target, xmlPIToken.Content); CurrentNode.AppendChild(child2); break; } case XmlTokenType.StartTag: _currentMode = XmlTreeMode.Body; InBody(token); break; default: if (!token.IsIgnorable) throw XmlParseError.XmlMissingRoot.At(token.Position); break; } } private void InBody(XmlToken token) { switch (token.Type) { case XmlTokenType.CharacterReference: break; case XmlTokenType.StartTag: { XmlTagToken xmlTagToken2 = (XmlTagToken)token; XmlElement xmlElement = new XmlElement(_document, xmlTagToken2.Name, null); CurrentNode.AppendChild(xmlElement); if (!xmlTagToken2.IsSelfClosing) _openElements.Add(xmlElement); else if (_openElements.Count == 0) { _currentMode = XmlTreeMode.After; } for (int i = 0; i < xmlTagToken2.Attributes.Count; i++) { xmlElement.SetAttribute(xmlTagToken2.Attributes[i].Key, xmlTagToken2.Attributes[i].Value.Trim()); } break; } case XmlTokenType.EndTag: { XmlTagToken xmlTagToken = (XmlTagToken)token; if (CurrentNode.NodeName != xmlTagToken.Name) throw XmlParseError.TagClosingMismatch.At(token.Position); _openElements.RemoveAt(_openElements.Count - 1); if (_openElements.Count == 0) _currentMode = XmlTreeMode.After; break; } case XmlTokenType.Comment: case XmlTokenType.ProcessingInstruction: InMisc(token); break; case XmlTokenType.Entity: { XmlEntityToken xmlEntityToken = (XmlEntityToken)token; string entity = xmlEntityToken.GetEntity(); CurrentNode.AppendText(entity); break; } case XmlTokenType.CData: { XmlCDataToken xmlCDataToken = (XmlCDataToken)token; CurrentNode.AppendText(xmlCDataToken.Data); break; } case XmlTokenType.Character: { XmlCharacterToken xmlCharacterToken = (XmlCharacterToken)token; CurrentNode.AppendText(xmlCharacterToken.Data.ToString()); break; } case XmlTokenType.EndOfFile: throw XmlParseError.EOF.At(token.Position); case XmlTokenType.Doctype: throw XmlParseError.XmlDoctypeAfterContent.At(token.Position); case XmlTokenType.Declaration: throw XmlParseError.XmlDeclarationMisplaced.At(token.Position); } } private void AfterBody(XmlToken token) { switch (token.Type) { case XmlTokenType.EndOfFile: break; case XmlTokenType.Comment: case XmlTokenType.ProcessingInstruction: InMisc(token); break; default: if (!token.IsIgnorable) throw XmlParseError.XmlMissingRoot.At(token.Position); break; } } private bool CheckVersion(string ver) { double result = 0; if (double.TryParse(ver, NumberStyles.Any, CultureInfo.InvariantCulture, out result)) { if (result >= 1) return result < 2; return false; } return false; } private void Kernel() { XmlToken xmlToken = null; do { xmlToken = _tokenizer.Get(); Consume(xmlToken); } while (xmlToken.Type != XmlTokenType.EndOfFile); } private async Task<IDocument> KernelAsync(CancellationToken cancelToken) { TextSource source = _document.Source; XmlToken token; do { if (source.Length - source.Index < 1024) await source.Prefetch(8192, cancelToken).ConfigureAwait(false); token = _tokenizer.Get(); Consume(token); } while (token.Type != XmlTokenType.EndOfFile); return _document; } private void SetEncoding(string charSet) { if (TextEncoding.IsSupported(charSet)) { Encoding encoding = TextEncoding.Resolve(charSet); if (encoding != null) try { _document.Source.CurrentEncoding = encoding; } catch (NotSupportedException) { _currentMode = XmlTreeMode.Initial; _document.ReplaceAll(null, true); _openElements.Clear(); } } } } }