AngleSharp by Florian Rappl

<PackageReference Include="AngleSharp" Version="0.7.0" />

 TextSource

A stream abstraction to handle encoding and more.
using System; using System.Diagnostics; using System.IO; using System.Text; using System.Threading; using System.Threading.Tasks; namespace AngleSharp { [DebuggerStepThrough] internal sealed class TextSource : ITextSource, IDisposable { private const int BufferSize = 4096; private readonly Stream _baseStream; private readonly StringBuilder _content; private readonly byte[] _buffer; private readonly char[] _chars; private bool _finished; private Encoding _encoding; private int _index; public char this[int index] { get { return _content[index]; } } public Encoding CurrentEncoding { get { return _encoding; } set { if (value != _encoding) { int length = _content.Length - _index; string s = _content.ToString(_index, length); byte[] bytes = _encoding.GetBytes(s); _encoding = value; _content.Remove(_index, length); _content.Append(_encoding.GetString(bytes, 0, bytes.Length)); } } } public int Index { get { return _index; } set { _index = value; } } public int Length => _content.Length; private TextSource(Encoding encoding) { _buffer = new byte[4096]; _chars = new char[4096]; _index = 0; _encoding = (encoding ?? Encoding.UTF8); } public TextSource(string source) : this(Encoding.UTF8) { _finished = true; _content = new StringBuilder(source.Replace("\r\n", "\n")); } public TextSource(Stream baseStream, Encoding encoding = null) : this(encoding) { _baseStream = baseStream; _content = new StringBuilder(); } public void Dispose() { if (_baseStream != null) _baseStream.Dispose(); } public char ReadCharacter() { if (_index < _content.Length) return _content[_index++]; ExpandBuffer(4096); int num = _index++; if (num >= _content.Length) return ''; return _content[num]; } public string ReadCharacters(int characters) { int index = _index; int num = index + characters; if (num <= _content.Length) { _index += characters; return _content.ToString(index, characters); } ExpandBuffer(Math.Max(4096, characters)); _index += characters; characters = Math.Min(characters, _content.Length - index); return _content.ToString(index, characters); } public async Task<char> ReadCharacterAsync(CancellationToken cancellationToken) { if (_index < _content.Length) return _content[_index++]; await ExpandBufferAsync(4096, cancellationToken).ConfigureAwait(false); int index = _index++; return (index < _content.Length) ? _content[index] : ''; } public async Task<string> ReadCharactersAsync(int characters, CancellationToken cancellationToken) { int start = _index; int end = start + characters; if (end <= _content.Length) { _index += characters; return _content.ToString(start, characters); } await ExpandBufferAsync(Math.Max(4096, characters), cancellationToken).ConfigureAwait(false); _index += characters; characters = Math.Min(characters, _content.Length - start); return _content.ToString(start, characters); } public Task Prefetch(int length, CancellationToken cancellationToken) { return ExpandBufferAsync(length, cancellationToken); } public void InsertText(string content) { if (_index < _content.Length) _content.Insert(_index, content); else _content.Append(content); } private async Task DetectByteOrderMarkAsync(CancellationToken cancellationToken) { int count = await _baseStream.ReadAsync(_buffer, 0, 4096); int offset = 0; if (count > 2 && _buffer[0] == 239 && _buffer[1] == 187 && _buffer[2] == 191) { _encoding = DocumentEncoding.UTF8; offset = 3; } else if (count > 3 && _buffer[0] == 255 && _buffer[1] == 254 && _buffer[2] == 0 && _buffer[3] == 0) { _encoding = DocumentEncoding.UTF32LE; offset = 4; } else if (count > 3 && _buffer[0] == 0 && _buffer[1] == 0 && _buffer[2] == 254 && _buffer[3] == 255) { _encoding = DocumentEncoding.UTF32BE; offset = 4; } else if (count > 1 && _buffer[0] == 254 && _buffer[1] == 255) { _encoding = DocumentEncoding.UTF16BE; offset = 2; } else if (count > 1 && _buffer[0] == 255 && _buffer[1] == 254) { _encoding = DocumentEncoding.UTF16LE; offset = 2; } else if (count > 3 && _buffer[0] == 132 && _buffer[1] == 49 && _buffer[2] == 149 && _buffer[3] == 51) { _encoding = DocumentEncoding.GB18030; offset = 4; } AppendContentFromBuffer(count, offset); } private async Task ExpandBufferAsync(long size, CancellationToken cancellationToken) { if (!_finished && _content.Length == 0) await DetectByteOrderMarkAsync(cancellationToken).ConfigureAwait(false); while (size + _index > _content.Length && !_finished) { await ReadIntoBufferAsync(cancellationToken).ConfigureAwait(false); } } private async Task ReadIntoBufferAsync(CancellationToken cancellationToken) { AppendContentFromBuffer(await _baseStream.ReadAsync(_buffer, 0, 4096, cancellationToken).ConfigureAwait(false), 0); } private void ExpandBuffer(long size) { if (!_finished && _content.Length == 0) DetectByteOrderMarkAsync(CancellationToken.None).Wait(); while (size + _index > _content.Length && !_finished) { ReadIntoBuffer(); } } private void ReadIntoBuffer() { int result = _baseStream.ReadAsync(_buffer, 0, 4096).Result; AppendContentFromBuffer(result, 0); } private void AppendContentFromBuffer(int size, int offset = 0) { _finished = (size == 0); int num = _encoding.GetChars(_buffer, offset, size, _chars, 0); int i = 0; for (int num2 = num - 1; i < num2; i++) { if (_chars[i] == '\r' && _chars[i + 1] == '\n') { for (int j = i; j < num2; j++) { _chars[j] = _chars[j + 1]; } num--; num2--; } } if (num > 0 && _chars[num - 1] == '\r') num--; _content.Append(_chars, 0, num); } } }