TextSource
A stream abstraction to handle encoding and more.
using System;
using System.Diagnostics;
using System.IO;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace AngleSharp
{
[DebuggerStepThrough]
internal sealed class TextSource : IDisposable
{
private enum EncodingConfidence
{
Tentative,
Certain,
Irrelevant
}
private const int BufferSize = 4096;
private readonly Stream _baseStream;
private readonly StringBuilder _content;
private readonly MemoryStream _raw;
private readonly byte[] _buffer;
private readonly char[] _chars;
private EncodingConfidence _confidence;
private bool _finished;
private Encoding _encoding;
private Decoder _decoder;
private int _index;
public string Text => _content.ToString();
public char this[int index] {
get {
return _content[index];
}
}
public Encoding CurrentEncoding {
get {
return _encoding;
}
set {
if (_confidence == EncodingConfidence.Tentative) {
if (_encoding.IsUnicode())
_confidence = EncodingConfidence.Certain;
else {
if (value.IsUnicode())
value = TextEncoding.Utf8;
if (value == _encoding)
_confidence = EncodingConfidence.Certain;
else {
_encoding = value;
_decoder = value.GetDecoder();
byte[] array = _raw.ToArray();
string string = _encoding.GetString(array, 0, array.Length);
int num = Math.Min(_index, string.Length);
if (!string.Substring(0, num).Equals(_content.ToString(0, num))) {
_index = 0;
_content.Clear().Append(string);
throw new NotSupportedException();
}
_confidence = EncodingConfidence.Certain;
_content.Remove(num, _content.Length - num);
_content.Append(string.Substring(num));
}
}
}
}
}
public int Index {
get {
return _index;
}
set {
_index = value;
}
}
public int Length => _content.Length;
private TextSource(Encoding encoding)
{
_buffer = new byte[4096];
_chars = new char[4096];
_raw = new MemoryStream();
_index = 0;
_encoding = (encoding ?? TextEncoding.Utf8);
_decoder = _encoding.GetDecoder();
}
public TextSource(string source)
: this(null, TextEncoding.Utf8)
{
_finished = true;
_content.Append(source.Replace("\r\n", "\n"));
_confidence = EncodingConfidence.Irrelevant;
}
public TextSource(Stream baseStream, Encoding encoding = null)
: this(encoding)
{
_baseStream = baseStream;
_content = Pool.NewStringBuilder();
_confidence = EncodingConfidence.Tentative;
}
public void Dispose()
{
_raw.Dispose();
_content.Clear().ToPool();
}
public char ReadCharacter()
{
if (_index < _content.Length)
return _content[_index++];
ExpandBuffer(4096);
int num = _index++;
if (num >= _content.Length)
return '';
return _content[num];
}
public string ReadCharacters(int characters)
{
int index = _index;
if (index + characters <= _content.Length) {
_index += characters;
return _content.ToString(index, characters);
}
ExpandBuffer(Math.Max(4096, characters));
_index += characters;
characters = Math.Min(characters, _content.Length - index);
return _content.ToString(index, characters);
}
public async Task<char> ReadCharacterAsync(CancellationToken cancellationToken)
{
if (_index < _content.Length)
return _content[_index++];
await AwaitExtensions.ConfigureAwait(ExpandBufferAsync(4096, cancellationToken), false);
int num = _index++;
return (num < _content.Length) ? _content[num] : '';
}
public async Task<string> ReadCharactersAsync(int characters, CancellationToken cancellationToken)
{
int start = _index;
if (start + characters <= _content.Length) {
_index += characters;
return _content.ToString(start, characters);
}
await AwaitExtensions.ConfigureAwait(ExpandBufferAsync(Math.Max(4096, characters), cancellationToken), false);
_index += characters;
characters = Math.Min(characters, _content.Length - start);
return _content.ToString(start, characters);
}
public Task Prefetch(int length, CancellationToken cancellationToken)
{
return ExpandBufferAsync(length, cancellationToken);
}
public void InsertText(string content)
{
if (_index < _content.Length)
_content.Insert(_index, content);
else
_content.Append(content);
}
private async Task DetectByteOrderMarkAsync(CancellationToken cancellationToken)
{
int num = await AwaitExtensions.ConfigureAwait(AsyncExtensions.ReadAsync(_baseStream, _buffer, 0, 4096), false);
int num2 = 0;
if (num > 2 && _buffer[0] == 239 && _buffer[1] == 187 && _buffer[2] == 191) {
_encoding = TextEncoding.Utf8;
num2 = 3;
} else if (num > 3 && _buffer[0] == 255 && _buffer[1] == 254 && _buffer[2] == 0 && _buffer[3] == 0) {
_encoding = TextEncoding.Utf32Le;
num2 = 4;
} else if (num > 3 && _buffer[0] == 0 && _buffer[1] == 0 && _buffer[2] == 254 && _buffer[3] == 255) {
_encoding = TextEncoding.Utf32Be;
num2 = 4;
} else if (num > 1 && _buffer[0] == 254 && _buffer[1] == 255) {
_encoding = TextEncoding.Utf16Be;
num2 = 2;
} else if (num > 1 && _buffer[0] == 255 && _buffer[1] == 254) {
_encoding = TextEncoding.Utf16Le;
num2 = 2;
} else if (num > 3 && _buffer[0] == 132 && _buffer[1] == 49 && _buffer[2] == 149 && _buffer[3] == 51) {
_encoding = TextEncoding.Gb18030;
num2 = 4;
}
if (num2 > 0) {
num -= num2;
Array.Copy(_buffer, num2, _buffer, 0, num);
_decoder = _encoding.GetDecoder();
_confidence = EncodingConfidence.Certain;
}
AppendContentFromBuffer(num);
}
private async Task ExpandBufferAsync(long size, CancellationToken cancellationToken)
{
if (!_finished && _content.Length == 0)
await AwaitExtensions.ConfigureAwait(DetectByteOrderMarkAsync(cancellationToken), false);
while (size + _index > _content.Length && !_finished) {
await AwaitExtensions.ConfigureAwait(ReadIntoBufferAsync(cancellationToken), false);
}
}
private async Task ReadIntoBufferAsync(CancellationToken cancellationToken)
{
AppendContentFromBuffer(await AwaitExtensions.ConfigureAwait(AsyncExtensions.ReadAsync(_baseStream, _buffer, 0, 4096, cancellationToken), false));
}
private void ExpandBuffer(long size)
{
if (!_finished && _content.Length == 0)
DetectByteOrderMarkAsync(CancellationToken.None).Wait();
while (size + _index > _content.Length && !_finished) {
ReadIntoBuffer();
}
}
private void ReadIntoBuffer()
{
int result = AsyncExtensions.ReadAsync(_baseStream, _buffer, 0, 4096).Result;
AppendContentFromBuffer(result);
}
private void AppendContentFromBuffer(int size)
{
_finished = (size == 0);
int chars = _decoder.GetChars(_buffer, 0, size, _chars, 0);
_raw.Write(_buffer, 0, size);
_content.Append(_chars, 0, chars);
}
}
}