AngleSharp by Florian Rappl

<PackageReference Include="AngleSharp" Version="0.8.3" />

 TextSource

sealed class TextSource : IDisposable
A stream abstraction to handle encoding and more.
using System; using System.Diagnostics; using System.IO; using System.Text; using System.Threading; using System.Threading.Tasks; namespace AngleSharp { [DebuggerStepThrough] internal sealed class TextSource : IDisposable { private enum EncodingConfidence { Tentative, Certain, Irrelevant } private const int BufferSize = 4096; private readonly Stream _baseStream; private readonly StringBuilder _content; private readonly MemoryStream _raw; private readonly byte[] _buffer; private readonly char[] _chars; private EncodingConfidence _confidence; private bool _finished; private Encoding _encoding; private Decoder _decoder; private int _index; public string Text => _content.ToString(); public char this[int index] { get { return _content[index]; } } public Encoding CurrentEncoding { get { return _encoding; } set { if (_confidence == EncodingConfidence.Tentative) { if (_encoding.IsUnicode()) _confidence = EncodingConfidence.Certain; else { if (value.IsUnicode()) value = TextEncoding.Utf8; if (value == _encoding) _confidence = EncodingConfidence.Certain; else { _encoding = value; _decoder = value.GetDecoder(); byte[] array = _raw.ToArray(); string string = _encoding.GetString(array, 0, array.Length); int num = Math.Min(_index, string.Length); if (!string.Substring(0, num).Equals(_content.ToString(0, num))) { _index = 0; _content.Clear().Append(string); throw new NotSupportedException(); } _confidence = EncodingConfidence.Certain; _content.Remove(num, _content.Length - num); _content.Append(string.Substring(num)); } } } } } public int Index { get { return _index; } set { _index = value; } } public int Length => _content.Length; private TextSource(Encoding encoding) { _buffer = new byte[4096]; _chars = new char[4096]; _raw = new MemoryStream(); _index = 0; _encoding = (encoding ?? TextEncoding.Utf8); _decoder = _encoding.GetDecoder(); } public TextSource(string source) : this(null, TextEncoding.Utf8) { _finished = true; _content.Append(source.Replace("\r\n", "\n")); _confidence = EncodingConfidence.Irrelevant; } public TextSource(Stream baseStream, Encoding encoding = null) : this(encoding) { _baseStream = baseStream; _content = Pool.NewStringBuilder(); _confidence = EncodingConfidence.Tentative; } public void Dispose() { _raw.Dispose(); _content.Clear().ToPool(); } public char ReadCharacter() { if (_index < _content.Length) return _content[_index++]; ExpandBuffer(4096); int num = _index++; if (num >= _content.Length) return ''; return _content[num]; } public string ReadCharacters(int characters) { int index = _index; if (index + characters <= _content.Length) { _index += characters; return _content.ToString(index, characters); } ExpandBuffer(Math.Max(4096, characters)); _index += characters; characters = Math.Min(characters, _content.Length - index); return _content.ToString(index, characters); } public async Task<char> ReadCharacterAsync(CancellationToken cancellationToken) { if (_index < _content.Length) return _content[_index++]; await AwaitExtensions.ConfigureAwait(ExpandBufferAsync(4096, cancellationToken), false); int num = _index++; return (num < _content.Length) ? _content[num] : ''; } public async Task<string> ReadCharactersAsync(int characters, CancellationToken cancellationToken) { int start = _index; if (start + characters <= _content.Length) { _index += characters; return _content.ToString(start, characters); } await AwaitExtensions.ConfigureAwait(ExpandBufferAsync(Math.Max(4096, characters), cancellationToken), false); _index += characters; characters = Math.Min(characters, _content.Length - start); return _content.ToString(start, characters); } public Task Prefetch(int length, CancellationToken cancellationToken) { return ExpandBufferAsync(length, cancellationToken); } public void InsertText(string content) { if (_index < _content.Length) _content.Insert(_index, content); else _content.Append(content); } private async Task DetectByteOrderMarkAsync(CancellationToken cancellationToken) { int num = await AwaitExtensions.ConfigureAwait(AsyncExtensions.ReadAsync(_baseStream, _buffer, 0, 4096), false); int num2 = 0; if (num > 2 && _buffer[0] == 239 && _buffer[1] == 187 && _buffer[2] == 191) { _encoding = TextEncoding.Utf8; num2 = 3; } else if (num > 3 && _buffer[0] == 255 && _buffer[1] == 254 && _buffer[2] == 0 && _buffer[3] == 0) { _encoding = TextEncoding.Utf32Le; num2 = 4; } else if (num > 3 && _buffer[0] == 0 && _buffer[1] == 0 && _buffer[2] == 254 && _buffer[3] == 255) { _encoding = TextEncoding.Utf32Be; num2 = 4; } else if (num > 1 && _buffer[0] == 254 && _buffer[1] == 255) { _encoding = TextEncoding.Utf16Be; num2 = 2; } else if (num > 1 && _buffer[0] == 255 && _buffer[1] == 254) { _encoding = TextEncoding.Utf16Le; num2 = 2; } else if (num > 3 && _buffer[0] == 132 && _buffer[1] == 49 && _buffer[2] == 149 && _buffer[3] == 51) { _encoding = TextEncoding.Gb18030; num2 = 4; } if (num2 > 0) { num -= num2; Array.Copy(_buffer, num2, _buffer, 0, num); _decoder = _encoding.GetDecoder(); _confidence = EncodingConfidence.Certain; } AppendContentFromBuffer(num); } private async Task ExpandBufferAsync(long size, CancellationToken cancellationToken) { if (!_finished && _content.Length == 0) await AwaitExtensions.ConfigureAwait(DetectByteOrderMarkAsync(cancellationToken), false); while (size + _index > _content.Length && !_finished) { await AwaitExtensions.ConfigureAwait(ReadIntoBufferAsync(cancellationToken), false); } } private async Task ReadIntoBufferAsync(CancellationToken cancellationToken) { AppendContentFromBuffer(await AwaitExtensions.ConfigureAwait(AsyncExtensions.ReadAsync(_baseStream, _buffer, 0, 4096, cancellationToken), false)); } private void ExpandBuffer(long size) { if (!_finished && _content.Length == 0) DetectByteOrderMarkAsync(CancellationToken.None).Wait(); while (size + _index > _content.Length && !_finished) { ReadIntoBuffer(); } } private void ReadIntoBuffer() { int result = AsyncExtensions.ReadAsync(_baseStream, _buffer, 0, 4096).Result; AppendContentFromBuffer(result); } private void AppendContentFromBuffer(int size) { _finished = (size == 0); int chars = _decoder.GetChars(_buffer, 0, size, _chars, 0); _raw.Write(_buffer, 0, size); _content.Append(_chars, 0, chars); } } }