TextSource
A stream abstraction to handle encoding and more.
using System;
using System.Diagnostics;
using System.IO;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace AngleSharp
{
[DebuggerStepThrough]
internal sealed class TextSource : ITextSource, IDisposable
{
private const int BufferSize = 4096;
private readonly Stream _baseStream;
private readonly StringBuilder _content;
private readonly byte[] _buffer;
private readonly char[] _chars;
private bool _finished;
private Encoding _encoding;
private int _index;
public char this[int index] {
get {
return _content[index];
}
}
public Encoding CurrentEncoding {
get {
return _encoding;
}
set {
if (value != _encoding) {
int length = _content.Length - _index;
string s = _content.ToString(_index, length);
byte[] bytes = _encoding.GetBytes(s);
_encoding = value;
_content.Remove(_index, length);
_content.Append(_encoding.GetString(bytes, 0, bytes.Length));
}
}
}
public int Index {
get {
return _index;
}
set {
_index = value;
}
}
public int Length => _content.Length;
private TextSource(Encoding encoding)
{
_buffer = new byte[4096];
_chars = new char[4096];
_index = 0;
_encoding = (encoding ?? Encoding.UTF8);
}
public TextSource(string source)
: this(Encoding.UTF8)
{
_finished = true;
_content = new StringBuilder(source.Replace("\r\n", "\n"));
}
public TextSource(Stream baseStream, Encoding encoding = null)
: this(encoding)
{
_baseStream = baseStream;
_content = new StringBuilder();
}
public void Dispose()
{
if (_baseStream != null)
_baseStream.Dispose();
}
public char ReadCharacter()
{
if (_index < _content.Length)
return _content[_index++];
ExpandBuffer(4096);
int num = _index++;
if (num >= _content.Length)
return '';
return _content[num];
}
public string ReadCharacters(int characters)
{
int index = _index;
int num = index + characters;
if (num <= _content.Length) {
_index += characters;
return _content.ToString(index, characters);
}
ExpandBuffer(Math.Max(4096, characters));
_index += characters;
characters = Math.Min(characters, _content.Length - index);
return _content.ToString(index, characters);
}
public async Task<char> ReadCharacterAsync(CancellationToken cancellationToken)
{
if (_index < _content.Length)
return _content[_index++];
await ExpandBufferAsync(4096, cancellationToken).ConfigureAwait(false);
int index = _index++;
return (index < _content.Length) ? _content[index] : '';
}
public async Task<string> ReadCharactersAsync(int characters, CancellationToken cancellationToken)
{
int start = _index;
int end = start + characters;
if (end <= _content.Length) {
_index += characters;
return _content.ToString(start, characters);
}
await ExpandBufferAsync(Math.Max(4096, characters), cancellationToken).ConfigureAwait(false);
_index += characters;
characters = Math.Min(characters, _content.Length - start);
return _content.ToString(start, characters);
}
public Task Prefetch(int length, CancellationToken cancellationToken)
{
return ExpandBufferAsync(length, cancellationToken);
}
public void InsertText(string content)
{
if (_index < _content.Length)
_content.Insert(_index, content);
else
_content.Append(content);
}
private async Task DetectByteOrderMarkAsync(CancellationToken cancellationToken)
{
int count = await _baseStream.ReadAsync(_buffer, 0, 4096);
int offset = 0;
if (count > 2 && _buffer[0] == 239 && _buffer[1] == 187 && _buffer[2] == 191) {
_encoding = DocumentEncoding.UTF8;
offset = 3;
} else if (count > 3 && _buffer[0] == 255 && _buffer[1] == 254 && _buffer[2] == 0 && _buffer[3] == 0) {
_encoding = DocumentEncoding.UTF32LE;
offset = 4;
} else if (count > 3 && _buffer[0] == 0 && _buffer[1] == 0 && _buffer[2] == 254 && _buffer[3] == 255) {
_encoding = DocumentEncoding.UTF32BE;
offset = 4;
} else if (count > 1 && _buffer[0] == 254 && _buffer[1] == 255) {
_encoding = DocumentEncoding.UTF16BE;
offset = 2;
} else if (count > 1 && _buffer[0] == 255 && _buffer[1] == 254) {
_encoding = DocumentEncoding.UTF16LE;
offset = 2;
} else if (count > 3 && _buffer[0] == 132 && _buffer[1] == 49 && _buffer[2] == 149 && _buffer[3] == 51) {
_encoding = DocumentEncoding.GB18030;
offset = 4;
}
AppendContentFromBuffer(count, offset);
}
private async Task ExpandBufferAsync(long size, CancellationToken cancellationToken)
{
if (!_finished && _content.Length == 0)
await DetectByteOrderMarkAsync(cancellationToken).ConfigureAwait(false);
while (size + _index > _content.Length && !_finished) {
await ReadIntoBufferAsync(cancellationToken).ConfigureAwait(false);
}
}
private async Task ReadIntoBufferAsync(CancellationToken cancellationToken)
{
AppendContentFromBuffer(await _baseStream.ReadAsync(_buffer, 0, 4096, cancellationToken).ConfigureAwait(false), 0);
}
private void ExpandBuffer(long size)
{
if (!_finished && _content.Length == 0)
DetectByteOrderMarkAsync(CancellationToken.None).Wait();
while (size + _index > _content.Length && !_finished) {
ReadIntoBuffer();
}
}
private void ReadIntoBuffer()
{
int result = _baseStream.ReadAsync(_buffer, 0, 4096).Result;
AppendContentFromBuffer(result, 0);
}
private void AppendContentFromBuffer(int size, int offset = 0)
{
_finished = (size == 0);
int num = _encoding.GetChars(_buffer, offset, size, _chars, 0);
int i = 0;
for (int num2 = num - 1; i < num2; i++) {
if (_chars[i] == '\r' && _chars[i + 1] == '\n') {
for (int j = i; j < num2; j++) {
_chars[j] = _chars[j + 1];
}
num--;
num2--;
}
}
if (num > 0 && _chars[num - 1] == '\r')
num--;
_content.Append(_chars, 0, num);
}
}
}