BaseTokenizer
Common methods and variables of all tokenizers.
using AngleSharp.Extensions;
using System;
using System.Collections.Generic;
using System.Text;
namespace AngleSharp.Parser
{
internal abstract class BaseTokenizer : IDisposable
{
private readonly Stack<ushort> _columns;
private readonly TextSource _source;
private ushort _column;
private ushort _row;
private char _current;
protected StringBuilder StringBuffer { get; set; }
public TextSource Source => _source;
public int InsertionPoint {
get {
return _source.Index;
}
protected set {
int i;
for (i = _source.Index - value; i > 0; i--) {
BackUnsafe();
}
for (; i < 0; i++) {
AdvanceUnsafe();
}
}
}
public ushort Line => _row;
public ushort Column => _column;
public int Position => _source.Index;
protected char Current => _current;
public BaseTokenizer(TextSource source)
{
StringBuffer = Pool.NewStringBuilder();
_columns = new Stack<ushort>();
_source = source;
_current = ' ';
_column = 0;
_row = 1;
}
public string FlushBuffer()
{
string result = StringBuffer.ToString();
StringBuffer.Clear();
return result;
}
public void Dispose()
{
if (StringBuffer != null) {
((IDisposable)_source)?.Dispose();
StringBuffer.Clear().ToPool();
StringBuffer = null;
}
}
public TextPosition GetCurrentPosition()
{
return new TextPosition(_row, _column, Position);
}
protected bool ContinuesWithInsensitive(string s)
{
string text = PeekString(s.Length);
if (text.Length == s.Length)
return text.Isi(s);
return false;
}
protected bool ContinuesWithSensitive(string s)
{
string text = PeekString(s.Length);
if (text.Length == s.Length)
return text.Isi(s);
return false;
}
protected string PeekString(int length)
{
int index = _source.Index;
_source.Index--;
string result = _source.ReadCharacters(length);
_source.Index = index;
return result;
}
protected char SkipSpaces()
{
char next = GetNext();
while (next.IsSpaceCharacter()) {
next = GetNext();
}
return next;
}
protected char GetNext()
{
Advance();
return _current;
}
protected char GetPrevious()
{
Back();
return _current;
}
protected void Advance()
{
if (_current != '')
AdvanceUnsafe();
}
protected void Advance(int n)
{
while (n-- > 0 && _current != '') {
AdvanceUnsafe();
}
}
protected void Back()
{
if (InsertionPoint > 0)
BackUnsafe();
}
protected void Back(int n)
{
while (n-- > 0 && InsertionPoint > 0) {
BackUnsafe();
}
}
private void AdvanceUnsafe()
{
if (_current == '\n') {
_columns.Push(_column);
_column = 1;
_row++;
} else
_column++;
_current = NormalizeForward(_source.ReadCharacter());
}
private void BackUnsafe()
{
_source.Index--;
if (_source.Index == 0) {
_column = 0;
_current = ' ';
} else {
char c = NormalizeBackward(_source[_source.Index - 1]);
switch (c) {
case ' ':
break;
case '\n':
_column = (ushort)((_columns.Count == 0) ? 1 : _columns.Pop());
_row--;
_current = c;
break;
default:
_current = c;
_column--;
break;
}
}
}
private char NormalizeForward(char p)
{
if (p != '\r')
return p;
if (_source.ReadCharacter() != '\n')
_source.Index--;
return '\n';
}
private char NormalizeBackward(char p)
{
if (p != '\r')
return p;
if (_source.Index < _source.Length && _source[_source.Index] == '\n') {
BackUnsafe();
return ' ';
}
return '\n';
}
}
}