BaseTokenizer
Common methods and variables of all tokenizers.
using AngleSharp.Events;
using AngleSharp.Extensions;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
namespace AngleSharp.Parser
{
[DebuggerStepThrough]
internal abstract class BaseTokenizer : IDisposable
{
protected readonly StringBuilder _stringBuffer;
protected readonly IEventAggregator _events;
private readonly Stack<ushort> _columns;
private readonly TextSource _source;
private ushort _column;
private ushort _row;
private char _current;
public int InsertionPoint {
get {
return _source.Index;
}
set {
int i;
for (i = _source.Index - value; i > 0; i--) {
BackUnsafe();
}
for (; i < 0; i++) {
AdvanceUnsafe();
}
}
}
public ushort Line => _row;
public ushort Column => _column;
public int Position => _source.Index;
protected char Current => _current;
public BaseTokenizer(TextSource source, IEventAggregator events)
{
_stringBuffer = Pool.NewStringBuilder();
_events = events;
_columns = new Stack<ushort>();
_source = source;
_current = ' ';
_column = 0;
_row = 1;
}
public string FlushBuffer()
{
string result = _stringBuffer.ToString();
_stringBuffer.Clear();
return result;
}
public void Dispose()
{
((IDisposable)_source)?.Dispose();
_stringBuffer.ToPool();
}
public TextPosition GetCurrentPosition()
{
return new TextPosition(_row, _column, Position);
}
protected bool ContinuesWith(string s, bool ignoreCase = true)
{
int index = _source.Index;
_source.Index--;
string text = _source.ReadCharacters(s.Length);
_source.Index = index;
if (text.Length == s.Length)
return text.Equals(s, ignoreCase ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal);
return false;
}
public void ResetInsertionPoint()
{
InsertionPoint = _source.Length;
}
protected char SkipSpaces()
{
char next = GetNext();
while (next.IsSpaceCharacter()) {
next = GetNext();
}
return next;
}
protected char GetNext()
{
Advance();
return _current;
}
protected char GetPrevious()
{
Back();
return _current;
}
protected void Advance()
{
if (_current != '')
AdvanceUnsafe();
}
protected void Advance(int n)
{
while (n-- > 0 && _current != '') {
AdvanceUnsafe();
}
}
protected void Back()
{
if (InsertionPoint > 0)
BackUnsafe();
}
protected void Back(int n)
{
while (n-- > 0 && InsertionPoint > 0) {
BackUnsafe();
}
}
private void AdvanceUnsafe()
{
if (_current == '\n') {
_columns.Push(_column);
_column = 1;
_row++;
} else
_column++;
_current = NormalizeForward(_source.ReadCharacter());
}
private void BackUnsafe()
{
_source.Index--;
if (_source.Index == 0) {
_column = 0;
_current = ' ';
} else {
char c = NormalizeBackward(_source[_source.Index - 1]);
switch (c) {
case ' ':
break;
case '\n':
_column = (ushort)((_columns.Count == 0) ? 1 : _columns.Pop());
_row--;
_current = c;
break;
default:
_current = c;
_column--;
break;
}
}
}
private char NormalizeForward(char p)
{
if (p != '\r')
return p;
if (_source.ReadCharacter() != '\n')
_source.Index--;
return '\n';
}
private char NormalizeBackward(char p)
{
if (p != '\r')
return p;
if (_source.Index < _source.Length && _source[_source.Index] == '\n') {
BackUnsafe();
return ' ';
}
return '\n';
}
}
}