XmlDomBuilder
Represents the Tree construction as specified in the official W3C
specification for XML:
http://www.w3.org/TR/REC-xml/
using AngleSharp.Dom;
using AngleSharp.Dom.Xml;
using AngleSharp.Extensions;
using AngleSharp.Services;
using AngleSharp.Xml;
using System;
using System.Collections.Generic;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace AngleSharp.Parser.Xml
{
internal sealed class XmlDomBuilder
{
private readonly XmlTokenizer _tokenizer;
private readonly Document _document;
private readonly List<Element> _openElements;
private readonly Func<Document, string, string, Element> _creator;
private XmlParserOptions _options;
private XmlTreeMode _currentMode;
private bool _standalone;
public Node CurrentNode {
get {
if (_openElements.Count > 0)
return _openElements[_openElements.Count - 1];
return _document;
}
}
internal XmlDomBuilder(Document document, Func<Document, string, string, Element> creator = null)
{
IEntityProvider resolver = document.Options.GetProvider<IEntityProvider>() ?? XmlEntityService.Resolver;
_tokenizer = new XmlTokenizer(document.Source, resolver);
_document = document;
_standalone = false;
_openElements = new List<Element>();
_currentMode = XmlTreeMode.Initial;
_creator = (creator ?? new Func<Document, string, string, Element>(CreateElement));
}
public async Task<Document> ParseAsync(XmlParserOptions options, CancellationToken cancelToken)
{
TextSource source = _document.Source;
_options = options;
XmlToken xmlToken;
do {
if (source.Length - source.Index < 1024)
await source.PrefetchAsync(8192, cancelToken).ConfigureAwait(false);
xmlToken = _tokenizer.Get();
Consume(xmlToken);
} while (xmlToken.Type != XmlTokenType.EndOfFile);
return _document;
}
public Document Parse(XmlParserOptions options)
{
XmlToken xmlToken = null;
do {
xmlToken = _tokenizer.Get();
Consume(xmlToken);
} while (xmlToken.Type != XmlTokenType.EndOfFile);
return _document;
}
private void Consume(XmlToken token)
{
switch (_currentMode) {
case XmlTreeMode.Initial:
Initial(token);
break;
case XmlTreeMode.Prolog:
BeforeDoctype(token);
break;
case XmlTreeMode.Misc:
InMisc(token);
break;
case XmlTreeMode.Body:
InBody(token);
break;
case XmlTreeMode.After:
AfterBody(token);
break;
}
}
private void Initial(XmlToken token)
{
if (token.Type == XmlTokenType.Declaration) {
XmlDeclarationToken xmlDeclarationToken = (XmlDeclarationToken)token;
_standalone = xmlDeclarationToken.Standalone;
if (!xmlDeclarationToken.IsEncodingMissing)
SetEncoding(xmlDeclarationToken.Encoding);
if (!CheckVersion(xmlDeclarationToken.Version) && !_options.IsSuppressingErrors)
throw XmlParseError.XmlDeclarationVersionUnsupported.At(token.Position);
} else {
_currentMode = XmlTreeMode.Prolog;
BeforeDoctype(token);
}
}
private void BeforeDoctype(XmlToken token)
{
if (token.Type == XmlTokenType.Doctype) {
XmlDoctypeToken xmlDoctypeToken = (XmlDoctypeToken)token;
DocumentType child = new DocumentType(_document, xmlDoctypeToken.Name) {
SystemIdentifier = xmlDoctypeToken.SystemIdentifier,
PublicIdentifier = xmlDoctypeToken.PublicIdentifier
};
_document.AppendChild(child);
_currentMode = XmlTreeMode.Misc;
} else
InMisc(token);
}
private void InMisc(XmlToken token)
{
switch (token.Type) {
case XmlTokenType.Comment: {
XmlCommentToken xmlCommentToken = (XmlCommentToken)token;
IComment child2 = _document.CreateComment(xmlCommentToken.Data);
CurrentNode.AppendChild(child2);
break;
}
case XmlTokenType.ProcessingInstruction: {
XmlPIToken xmlPIToken = (XmlPIToken)token;
IProcessingInstruction child = _document.CreateProcessingInstruction(xmlPIToken.Target, xmlPIToken.Content);
CurrentNode.AppendChild(child);
break;
}
case XmlTokenType.StartTag:
_currentMode = XmlTreeMode.Body;
InBody(token);
break;
default:
if (!token.IsIgnorable && !_options.IsSuppressingErrors)
throw XmlParseError.XmlMissingRoot.At(token.Position);
break;
}
}
private void InBody(XmlToken token)
{
switch (token.Type) {
case XmlTokenType.CharacterReference:
break;
case XmlTokenType.StartTag: {
XmlTagToken xmlTagToken2 = (XmlTagToken)token;
Element element = _creator(_document, xmlTagToken2.Name, null);
CurrentNode.AppendChild(element);
if (!xmlTagToken2.IsSelfClosing)
_openElements.Add(element);
else if (_openElements.Count == 0) {
_currentMode = XmlTreeMode.After;
}
for (int i = 0; i < xmlTagToken2.Attributes.Count; i++) {
KeyValuePair<string, string> keyValuePair = xmlTagToken2.Attributes[i];
string key = keyValuePair.Key;
keyValuePair = xmlTagToken2.Attributes[i];
string value = keyValuePair.Value.Trim();
element.SetAttribute(key, value);
}
break;
}
case XmlTokenType.EndTag: {
XmlTagToken xmlTagToken = (XmlTagToken)token;
if (!CurrentNode.NodeName.Is(xmlTagToken.Name)) {
if (!_options.IsSuppressingErrors)
throw XmlParseError.TagClosingMismatch.At(token.Position);
} else {
_openElements.RemoveAt(_openElements.Count - 1);
if (_openElements.Count == 0)
_currentMode = XmlTreeMode.After;
}
break;
}
case XmlTokenType.Comment:
case XmlTokenType.ProcessingInstruction:
InMisc(token);
break;
case XmlTokenType.CData: {
XmlCDataToken xmlCDataToken = (XmlCDataToken)token;
CurrentNode.AppendText(xmlCDataToken.Data);
break;
}
case XmlTokenType.Character: {
XmlCharacterToken xmlCharacterToken = (XmlCharacterToken)token;
CurrentNode.AppendText(xmlCharacterToken.Data);
break;
}
case XmlTokenType.EndOfFile:
if (!_options.IsSuppressingErrors)
throw XmlParseError.EOF.At(token.Position);
break;
case XmlTokenType.Doctype:
if (!_options.IsSuppressingErrors)
throw XmlParseError.XmlDoctypeAfterContent.At(token.Position);
break;
case XmlTokenType.Declaration:
if (!_options.IsSuppressingErrors)
throw XmlParseError.XmlDeclarationMisplaced.At(token.Position);
break;
}
}
private void AfterBody(XmlToken token)
{
switch (token.Type) {
case XmlTokenType.EndOfFile:
break;
case XmlTokenType.Comment:
case XmlTokenType.ProcessingInstruction:
InMisc(token);
break;
default:
if (!token.IsIgnorable && !_options.IsSuppressingErrors)
throw XmlParseError.XmlMissingRoot.At(token.Position);
break;
}
}
private static Element CreateElement(Document document, string name, string prefix)
{
return new XmlElement(document, name, prefix);
}
private bool CheckVersion(string ver)
{
double num = ver.ToDouble(0);
if (num >= 1)
return num < 2;
return false;
}
private void SetEncoding(string charSet)
{
if (TextEncoding.IsSupported(charSet)) {
Encoding encoding = TextEncoding.Resolve(charSet);
if (encoding != null)
try {
_document.Source.CurrentEncoding = encoding;
} catch (NotSupportedException) {
_currentMode = XmlTreeMode.Initial;
_document.ReplaceAll(null, true);
_openElements.Clear();
}
}
}
}
}