Refactoring the ARM Hardware Intrinsics based on the latest design decisions. (#26895)
[mono-project.git] / netcore / System.Private.CoreLib / shared / System / Text / EncoderNLS.cs
blobff664f2209d515dc8643b5e670687a1467d1a84a
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 using System.Buffers;
6 using System.Diagnostics;
7 using System.Runtime.InteropServices;
9 namespace System.Text
11 // An Encoder is used to encode a sequence of blocks of characters into
12 // a sequence of blocks of bytes. Following instantiation of an encoder,
13 // sequential blocks of characters are converted into blocks of bytes through
14 // calls to the GetBytes method. The encoder maintains state between the
15 // conversions, allowing it to correctly encode character sequences that span
16 // adjacent blocks.
18 // Instances of specific implementations of the Encoder abstract base
19 // class are typically obtained through calls to the GetEncoder method
20 // of Encoding objects.
23 internal class EncoderNLS : Encoder
25 // Need a place for the last left over character, most of our encodings use this
26 internal char _charLeftOver;
27 private readonly Encoding _encoding;
28 private bool _mustFlush;
29 internal bool _throwOnOverflow;
30 internal int _charsUsed;
32 internal EncoderNLS(Encoding encoding)
34 _encoding = encoding;
35 _fallback = _encoding.EncoderFallback;
36 this.Reset();
39 public override void Reset()
41 _charLeftOver = (char)0;
42 if (_fallbackBuffer != null)
43 _fallbackBuffer.Reset();
46 public override unsafe int GetByteCount(char[] chars, int index, int count, bool flush)
48 // Validate input parameters
49 if (chars == null)
50 throw new ArgumentNullException(nameof(chars),
51 SR.ArgumentNull_Array);
53 if (index < 0 || count < 0)
54 throw new ArgumentOutOfRangeException(index < 0 ? nameof(index) : nameof(count),
55 SR.ArgumentOutOfRange_NeedNonNegNum);
57 if (chars.Length - index < count)
58 throw new ArgumentOutOfRangeException(nameof(chars),
59 SR.ArgumentOutOfRange_IndexCountBuffer);
61 // Just call the pointer version
62 int result = -1;
63 fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
65 result = GetByteCount(pChars + index, count, flush);
67 return result;
70 public override unsafe int GetByteCount(char* chars, int count, bool flush)
72 // Validate input parameters
73 if (chars == null)
74 throw new ArgumentNullException(nameof(chars),
75 SR.ArgumentNull_Array);
77 if (count < 0)
78 throw new ArgumentOutOfRangeException(nameof(count),
79 SR.ArgumentOutOfRange_NeedNonNegNum);
81 _mustFlush = flush;
82 _throwOnOverflow = true;
83 Debug.Assert(_encoding != null);
84 return _encoding.GetByteCount(chars, count, this);
87 public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
88 byte[] bytes, int byteIndex, bool flush)
90 // Validate parameters
91 if (chars == null || bytes == null)
92 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
93 SR.ArgumentNull_Array);
95 if (charIndex < 0 || charCount < 0)
96 throw new ArgumentOutOfRangeException(charIndex < 0 ? nameof(charIndex) : nameof(charCount),
97 SR.ArgumentOutOfRange_NeedNonNegNum);
99 if (chars.Length - charIndex < charCount)
100 throw new ArgumentOutOfRangeException(nameof(chars),
101 SR.ArgumentOutOfRange_IndexCountBuffer);
103 if (byteIndex < 0 || byteIndex > bytes.Length)
104 throw new ArgumentOutOfRangeException(nameof(byteIndex),
105 SR.ArgumentOutOfRange_Index);
107 int byteCount = bytes.Length - byteIndex;
109 // Just call pointer version
110 fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
111 fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
113 // Remember that charCount is # to decode, not size of array.
114 return GetBytes(pChars + charIndex, charCount,
115 pBytes + byteIndex, byteCount, flush);
118 public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, bool flush)
120 // Validate parameters
121 if (chars == null || bytes == null)
122 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
123 SR.ArgumentNull_Array);
125 if (byteCount < 0 || charCount < 0)
126 throw new ArgumentOutOfRangeException(byteCount < 0 ? nameof(byteCount) : nameof(charCount),
127 SR.ArgumentOutOfRange_NeedNonNegNum);
129 _mustFlush = flush;
130 _throwOnOverflow = true;
131 Debug.Assert(_encoding != null);
132 return _encoding.GetBytes(chars, charCount, bytes, byteCount, this);
135 // This method is used when your output buffer might not be large enough for the entire result.
136 // Just call the pointer version. (This gets bytes)
137 public override unsafe void Convert(char[] chars, int charIndex, int charCount,
138 byte[] bytes, int byteIndex, int byteCount, bool flush,
139 out int charsUsed, out int bytesUsed, out bool completed)
141 // Validate parameters
142 if (chars == null || bytes == null)
143 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
144 SR.ArgumentNull_Array);
146 if (charIndex < 0 || charCount < 0)
147 throw new ArgumentOutOfRangeException(charIndex < 0 ? nameof(charIndex) : nameof(charCount),
148 SR.ArgumentOutOfRange_NeedNonNegNum);
150 if (byteIndex < 0 || byteCount < 0)
151 throw new ArgumentOutOfRangeException(byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount),
152 SR.ArgumentOutOfRange_NeedNonNegNum);
154 if (chars.Length - charIndex < charCount)
155 throw new ArgumentOutOfRangeException(nameof(chars),
156 SR.ArgumentOutOfRange_IndexCountBuffer);
158 if (bytes.Length - byteIndex < byteCount)
159 throw new ArgumentOutOfRangeException(nameof(bytes),
160 SR.ArgumentOutOfRange_IndexCountBuffer);
162 // Just call the pointer version (can't do this for non-msft encoders)
163 fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
165 fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
167 Convert(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, flush,
168 out charsUsed, out bytesUsed, out completed);
173 // This is the version that uses pointers. We call the base encoding worker function
174 // after setting our appropriate internal variables. This is getting bytes
175 public override unsafe void Convert(char* chars, int charCount,
176 byte* bytes, int byteCount, bool flush,
177 out int charsUsed, out int bytesUsed, out bool completed)
179 // Validate input parameters
180 if (bytes == null || chars == null)
181 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
182 SR.ArgumentNull_Array);
183 if (charCount < 0 || byteCount < 0)
184 throw new ArgumentOutOfRangeException(charCount < 0 ? nameof(charCount) : nameof(byteCount),
185 SR.ArgumentOutOfRange_NeedNonNegNum);
187 // We don't want to throw
188 _mustFlush = flush;
189 _throwOnOverflow = false;
190 _charsUsed = 0;
192 // Do conversion
193 Debug.Assert(_encoding != null);
194 bytesUsed = _encoding.GetBytes(chars, charCount, bytes, byteCount, this);
195 charsUsed = _charsUsed;
197 // Per MSDN, "The completed output parameter indicates whether all the data in the input
198 // buffer was converted and stored in the output buffer." That means we've successfully
199 // consumed all the input _and_ there's no pending state or fallback data remaining to be output.
201 completed = (charsUsed == charCount)
202 && !this.HasState
203 && (_fallbackBuffer is null || _fallbackBuffer.Remaining == 0);
205 // Our data thingys are now full, we can return
208 public Encoding Encoding
212 Debug.Assert(_encoding != null);
213 return _encoding;
217 public bool MustFlush => _mustFlush;
219 /// <summary>
220 /// States whether a call to <see cref="Encoding.GetBytes(char*, int, byte*, int, EncoderNLS)"/> must first drain data on this <see cref="EncoderNLS"/> instance.
221 /// </summary>
222 internal bool HasLeftoverData => _charLeftOver != default || (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0);
224 // Anything left in our encoder?
225 internal virtual bool HasState => _charLeftOver != (char)0;
227 // Allow encoding to clear our must flush instead of throwing (in ThrowBytesOverflow)
228 internal void ClearMustFlush()
230 _mustFlush = false;
233 internal int DrainLeftoverDataForGetByteCount(ReadOnlySpan<char> chars, out int charsConsumed)
235 // Quick check: we _should not_ have leftover fallback data from a previous invocation,
236 // as we'd end up consuming any such data and would corrupt whatever Convert call happens
237 // to be in progress.
239 if (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0)
241 throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, Encoding.EncodingName, _fallbackBuffer.GetType()));
244 // If we have a leftover high surrogate from a previous operation, consume it now.
245 // We won't clear the _charLeftOver field since GetByteCount is supposed to be
246 // a non-mutating operation, and we need the field to retain its value for the
247 // next call to Convert.
249 charsConsumed = 0; // could be incorrect, will fix up later in the method
251 if (_charLeftOver == default)
253 return 0; // no leftover high surrogate char - short-circuit and finish
255 else
257 char secondChar = default;
259 if (chars.IsEmpty)
261 // If the input buffer is empty and we're not being asked to flush, no-op and return
262 // success to our caller. If we're being asked to flush, the leftover high surrogate from
263 // the previous operation will go through the fallback mechanism by itself.
265 if (!MustFlush)
267 return 0; // no-op = success
270 else
272 secondChar = chars[0];
275 // If we have to fallback the chars we're reading immediately below, populate the
276 // fallback buffer with the invalid data. We'll just fall through to the "consume
277 // fallback buffer" logic at the end of the method.
279 bool didFallback;
281 if (Rune.TryCreate(_charLeftOver, secondChar, out Rune rune))
283 charsConsumed = 1; // consumed the leftover high surrogate + the first char in the input buffer
285 Debug.Assert(_encoding != null);
286 if (_encoding.TryGetByteCount(rune, out int byteCount))
288 Debug.Assert(byteCount >= 0, "Encoding shouldn't have returned a negative byte count.");
289 return byteCount;
291 else
293 // The fallback mechanism relies on a negative index to convey "the start of the invalid
294 // sequence was some number of chars back before the current buffer." In this block and
295 // in the block immediately thereafter, we know we have a single leftover high surrogate
296 // character from a previous operation, so we provide an index of -1 to convey that the
297 // char immediately before the current buffer was the start of the invalid sequence.
299 didFallback = FallbackBuffer.Fallback(_charLeftOver, secondChar, index: -1);
302 else
304 didFallback = FallbackBuffer.Fallback(_charLeftOver, index: -1);
307 // Now tally the number of bytes that would've been emitted as part of fallback.
308 Debug.Assert(_fallbackBuffer != null);
309 return _fallbackBuffer.DrainRemainingDataForGetByteCount();
313 internal bool TryDrainLeftoverDataForGetBytes(ReadOnlySpan<char> chars, Span<byte> bytes, out int charsConsumed, out int bytesWritten)
315 // We may have a leftover high surrogate data from a previous invocation, or we may have leftover
316 // data in the fallback buffer, or we may have neither, but we will never have both. Check for these
317 // conditions and handle them now.
319 charsConsumed = 0; // could be incorrect, will fix up later in the method
320 bytesWritten = 0; // could be incorrect, will fix up later in the method
322 if (_charLeftOver != default)
324 char secondChar = default;
326 if (chars.IsEmpty)
328 // If the input buffer is empty and we're not being asked to flush, no-op and return
329 // success to our caller. If we're being asked to flush, the leftover high surrogate from
330 // the previous operation will go through the fallback mechanism by itself.
332 if (!MustFlush)
334 charsConsumed = 0;
335 bytesWritten = 0;
336 return true; // no-op = success
339 else
341 secondChar = chars[0];
344 // If we have to fallback the chars we're reading immediately below, populate the
345 // fallback buffer with the invalid data. We'll just fall through to the "consume
346 // fallback buffer" logic at the end of the method.
348 if (Rune.TryCreate(_charLeftOver, secondChar, out Rune rune))
350 charsConsumed = 1; // at the very least, we consumed 1 char from the input
351 Debug.Assert(_encoding != null);
352 switch (_encoding.EncodeRune(rune, bytes, out bytesWritten))
354 case OperationStatus.Done:
355 _charLeftOver = default; // we just consumed this char
356 return true; // that's all - we've handled the leftover data
358 case OperationStatus.DestinationTooSmall:
359 _charLeftOver = default; // we just consumed this char
360 _encoding.ThrowBytesOverflow(this, nothingEncoded: true); // will throw
361 break;
363 case OperationStatus.InvalidData:
364 FallbackBuffer.Fallback(_charLeftOver, secondChar, index: -1); // see comment in DrainLeftoverDataForGetByteCount
365 break;
367 default:
368 Debug.Fail("Unknown return value.");
369 break;
372 else
374 FallbackBuffer.Fallback(_charLeftOver, index: -1); // see comment in DrainLeftoverDataForGetByteCount
378 // Now check the fallback buffer for any remaining data.
380 if (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0)
382 return _fallbackBuffer.TryDrainRemainingDataForGetBytes(bytes, out bytesWritten);
385 // And we're done!
387 return true; // success