AngleSharp by AngleSharp

<PackageReference Include="AngleSharp" Version="0.9.6" />

 TextSource

sealed class TextSource : IDisposable
A stream abstraction to handle encoding and more.
using AngleSharp.Extensions; using System; using System.Diagnostics; using System.IO; using System.Text; using System.Threading; using System.Threading.Tasks; namespace AngleSharp { [DebuggerStepThrough] internal sealed class TextSource : IDisposable { private enum EncodingConfidence { Tentative, Certain, Irrelevant } private const int BufferSize = 4096; private readonly Stream _baseStream; private readonly MemoryStream _raw; private readonly byte[] _buffer; private readonly char[] _chars; private StringBuilder _content; private EncodingConfidence _confidence; private bool _finished; private Encoding _encoding; private Decoder _decoder; private int _index; public string Text => _content.ToString(); public char this[int index] { get { return _content[index]; } } public Encoding CurrentEncoding { get { return _encoding; } set { if (_confidence == EncodingConfidence.Tentative) { if (_encoding.IsUnicode()) _confidence = EncodingConfidence.Certain; else { if (value.IsUnicode()) value = TextEncoding.Utf8; if (value == _encoding) _confidence = EncodingConfidence.Certain; else { _encoding = value; _decoder = value.GetDecoder(); byte[] array = _raw.ToArray(); char[] array2 = new char[_encoding.GetMaxCharCount(array.Length)]; int chars = _decoder.GetChars(array, 0, array.Length, array2, 0); string text = new string(array2, 0, chars); int num = Math.Min(_index, text.Length); if (!text.Substring(0, num).Is(_content.ToString(0, num))) { _index = 0; _content.Clear().Append(text); throw new NotSupportedException(); } _confidence = EncodingConfidence.Certain; _content.Remove(num, _content.Length - num); _content.Append(text.Substring(num)); } } } } } public int Index { get { return _index; } set { _index = value; } } public int Length => _content.Length; private TextSource(Encoding encoding) { _buffer = new byte[4096]; _chars = new char[4097]; _raw = new MemoryStream(); _index = 0; _encoding = (encoding ?? TextEncoding.Utf8); _decoder = _encoding.GetDecoder(); } public TextSource(string source) : this(null, TextEncoding.Utf8) { _finished = true; _content.Append(source); _confidence = EncodingConfidence.Irrelevant; } public TextSource(Stream baseStream, Encoding encoding = null) : this(encoding) { _baseStream = baseStream; _content = Pool.NewStringBuilder(); _confidence = EncodingConfidence.Tentative; } public void Dispose() { if (_content != null) { _raw.Dispose(); _content.Clear().ToPool(); _content = null; } } public char ReadCharacter() { if (_index < _content.Length) return _content[_index++]; ExpandBuffer(4096); int num = _index++; if (num >= _content.Length) return '￿'; return _content[num]; } public string ReadCharacters(int characters) { int index = _index; int num = index + characters; if (num <= _content.Length) { _index += characters; return _content.ToString(index, characters); } ExpandBuffer(Math.Max(4096, characters)); _index += characters; characters = Math.Min(characters, _content.Length - index); return _content.ToString(index, characters); } public async Task<char> ReadCharacterAsync(CancellationToken cancellationToken) { if (_index >= _content.Length) { await ExpandBufferAsync(4096, cancellationToken).ConfigureAwait(false); int index = _index++; return (index < _content.Length) ? _content[index] : '￿'; } return _content[_index++]; } public async Task<string> ReadCharactersAsync(int characters, CancellationToken cancellationToken) { int start = _index; int end = start + characters; if (end <= _content.Length) { _index += characters; return _content.ToString(start, characters); } await ExpandBufferAsync(Math.Max(4096, characters), cancellationToken).ConfigureAwait(false); _index += characters; characters = Math.Min(characters, _content.Length - start); return _content.ToString(start, characters); } public Task PrefetchAsync(int length, CancellationToken cancellationToken) { return ExpandBufferAsync(length, cancellationToken); } public async Task PrefetchAllAsync(CancellationToken cancellationToken) { if (_content.Length == 0) await DetectByteOrderMarkAsync(cancellationToken).ConfigureAwait(false); while (!_finished) { await ReadIntoBufferAsync(cancellationToken).ConfigureAwait(false); } } public void InsertText(string content) { if (_index >= 0 && _index < _content.Length) _content.Insert(_index, content); else _content.Append(content); _index += content.Length; } private async Task DetectByteOrderMarkAsync(CancellationToken cancellationToken) { int count = await _baseStream.ReadAsync(_buffer, 0, 4096).ConfigureAwait(false); int offset = 0; if (count > 2 && _buffer[0] == 239 && _buffer[1] == 187 && _buffer[2] == 191) { _encoding = TextEncoding.Utf8; offset = 3; } else if (count > 3 && _buffer[0] == 255 && _buffer[1] == 254 && _buffer[2] == 0 && _buffer[3] == 0) { _encoding = TextEncoding.Utf32Le; offset = 4; } else if (count > 3 && _buffer[0] == 0 && _buffer[1] == 0 && _buffer[2] == 254 && _buffer[3] == 255) { _encoding = TextEncoding.Utf32Be; offset = 4; } else if (count > 1 && _buffer[0] == 254 && _buffer[1] == 255) { _encoding = TextEncoding.Utf16Be; offset = 2; } else if (count > 1 && _buffer[0] == 255 && _buffer[1] == 254) { _encoding = TextEncoding.Utf16Le; offset = 2; } else if (count > 3 && _buffer[0] == 132 && _buffer[1] == 49 && _buffer[2] == 149 && _buffer[3] == 51) { _encoding = TextEncoding.Gb18030; offset = 4; } if (offset > 0) { count -= offset; Array.Copy(_buffer, offset, _buffer, 0, count); _decoder = _encoding.GetDecoder(); _confidence = EncodingConfidence.Certain; } AppendContentFromBuffer(count); } private async Task ExpandBufferAsync(long size, CancellationToken cancellationToken) { if (!_finished && _content.Length == 0) await DetectByteOrderMarkAsync(cancellationToken).ConfigureAwait(false); while (size + _index > _content.Length && !_finished) { await ReadIntoBufferAsync(cancellationToken).ConfigureAwait(false); } } private async Task ReadIntoBufferAsync(CancellationToken cancellationToken) { AppendContentFromBuffer(await _baseStream.ReadAsync(_buffer, 0, 4096, cancellationToken).ConfigureAwait(false)); } private void ExpandBuffer(long size) { if (!_finished && _content.Length == 0) DetectByteOrderMarkAsync(CancellationToken.None).Wait(); while (size + _index > _content.Length && !_finished) { ReadIntoBuffer(); } } private void ReadIntoBuffer() { int size = _baseStream.Read(_buffer, 0, 4096); AppendContentFromBuffer(size); } private void AppendContentFromBuffer(int size) { _finished = (size == 0); int chars = _decoder.GetChars(_buffer, 0, size, _chars, 0); if (_confidence != EncodingConfidence.Certain) _raw.Write(_buffer, 0, size); _content.Append(_chars, 0, chars); } } }