1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
6 using System
.Diagnostics
;
7 using System
.Runtime
.InteropServices
;
11 // An Encoder is used to encode a sequence of blocks of characters into
12 // a sequence of blocks of bytes. Following instantiation of an encoder,
13 // sequential blocks of characters are converted into blocks of bytes through
14 // calls to the GetBytes method. The encoder maintains state between the
15 // conversions, allowing it to correctly encode character sequences that span
18 // Instances of specific implementations of the Encoder abstract base
19 // class are typically obtained through calls to the GetEncoder method
20 // of Encoding objects.
23 internal class EncoderNLS
: Encoder
25 // Need a place for the last left over character, most of our encodings use this
26 internal char _charLeftOver
;
27 private readonly Encoding _encoding
;
28 private bool _mustFlush
;
29 internal bool _throwOnOverflow
;
30 internal int _charsUsed
;
32 internal EncoderNLS(Encoding encoding
)
35 _fallback
= _encoding
.EncoderFallback
;
39 public override void Reset()
41 _charLeftOver
= (char)0;
42 if (_fallbackBuffer
!= null)
43 _fallbackBuffer
.Reset();
46 public override unsafe int GetByteCount(char[] chars
, int index
, int count
, bool flush
)
48 // Validate input parameters
50 throw new ArgumentNullException(nameof(chars
),
51 SR
.ArgumentNull_Array
);
53 if (index
< 0 || count
< 0)
54 throw new ArgumentOutOfRangeException(index
< 0 ? nameof(index
) : nameof(count
),
55 SR
.ArgumentOutOfRange_NeedNonNegNum
);
57 if (chars
.Length
- index
< count
)
58 throw new ArgumentOutOfRangeException(nameof(chars
),
59 SR
.ArgumentOutOfRange_IndexCountBuffer
);
61 // Just call the pointer version
63 fixed (char* pChars
= &MemoryMarshal
.GetReference((Span
<char>)chars
))
65 result
= GetByteCount(pChars
+ index
, count
, flush
);
70 public override unsafe int GetByteCount(char* chars
, int count
, bool flush
)
72 // Validate input parameters
74 throw new ArgumentNullException(nameof(chars
),
75 SR
.ArgumentNull_Array
);
78 throw new ArgumentOutOfRangeException(nameof(count
),
79 SR
.ArgumentOutOfRange_NeedNonNegNum
);
82 _throwOnOverflow
= true;
83 Debug
.Assert(_encoding
!= null);
84 return _encoding
.GetByteCount(chars
, count
, this);
87 public override unsafe int GetBytes(char[] chars
, int charIndex
, int charCount
,
88 byte[] bytes
, int byteIndex
, bool flush
)
90 // Validate parameters
91 if (chars
== null || bytes
== null)
92 throw new ArgumentNullException(chars
== null ? nameof(chars
) : nameof(bytes
),
93 SR
.ArgumentNull_Array
);
95 if (charIndex
< 0 || charCount
< 0)
96 throw new ArgumentOutOfRangeException(charIndex
< 0 ? nameof(charIndex
) : nameof(charCount
),
97 SR
.ArgumentOutOfRange_NeedNonNegNum
);
99 if (chars
.Length
- charIndex
< charCount
)
100 throw new ArgumentOutOfRangeException(nameof(chars
),
101 SR
.ArgumentOutOfRange_IndexCountBuffer
);
103 if (byteIndex
< 0 || byteIndex
> bytes
.Length
)
104 throw new ArgumentOutOfRangeException(nameof(byteIndex
),
105 SR
.ArgumentOutOfRange_Index
);
107 int byteCount
= bytes
.Length
- byteIndex
;
109 // Just call pointer version
110 fixed (char* pChars
= &MemoryMarshal
.GetReference((Span
<char>)chars
))
111 fixed (byte* pBytes
= &MemoryMarshal
.GetReference((Span
<byte>)bytes
))
113 // Remember that charCount is # to decode, not size of array.
114 return GetBytes(pChars
+ charIndex
, charCount
,
115 pBytes
+ byteIndex
, byteCount
, flush
);
118 public override unsafe int GetBytes(char* chars
, int charCount
, byte* bytes
, int byteCount
, bool flush
)
120 // Validate parameters
121 if (chars
== null || bytes
== null)
122 throw new ArgumentNullException(chars
== null ? nameof(chars
) : nameof(bytes
),
123 SR
.ArgumentNull_Array
);
125 if (byteCount
< 0 || charCount
< 0)
126 throw new ArgumentOutOfRangeException(byteCount
< 0 ? nameof(byteCount
) : nameof(charCount
),
127 SR
.ArgumentOutOfRange_NeedNonNegNum
);
130 _throwOnOverflow
= true;
131 Debug
.Assert(_encoding
!= null);
132 return _encoding
.GetBytes(chars
, charCount
, bytes
, byteCount
, this);
135 // This method is used when your output buffer might not be large enough for the entire result.
136 // Just call the pointer version. (This gets bytes)
137 public override unsafe void Convert(char[] chars
, int charIndex
, int charCount
,
138 byte[] bytes
, int byteIndex
, int byteCount
, bool flush
,
139 out int charsUsed
, out int bytesUsed
, out bool completed
)
141 // Validate parameters
142 if (chars
== null || bytes
== null)
143 throw new ArgumentNullException(chars
== null ? nameof(chars
) : nameof(bytes
),
144 SR
.ArgumentNull_Array
);
146 if (charIndex
< 0 || charCount
< 0)
147 throw new ArgumentOutOfRangeException(charIndex
< 0 ? nameof(charIndex
) : nameof(charCount
),
148 SR
.ArgumentOutOfRange_NeedNonNegNum
);
150 if (byteIndex
< 0 || byteCount
< 0)
151 throw new ArgumentOutOfRangeException(byteIndex
< 0 ? nameof(byteIndex
) : nameof(byteCount
),
152 SR
.ArgumentOutOfRange_NeedNonNegNum
);
154 if (chars
.Length
- charIndex
< charCount
)
155 throw new ArgumentOutOfRangeException(nameof(chars
),
156 SR
.ArgumentOutOfRange_IndexCountBuffer
);
158 if (bytes
.Length
- byteIndex
< byteCount
)
159 throw new ArgumentOutOfRangeException(nameof(bytes
),
160 SR
.ArgumentOutOfRange_IndexCountBuffer
);
162 // Just call the pointer version (can't do this for non-msft encoders)
163 fixed (char* pChars
= &MemoryMarshal
.GetReference((Span
<char>)chars
))
165 fixed (byte* pBytes
= &MemoryMarshal
.GetReference((Span
<byte>)bytes
))
167 Convert(pChars
+ charIndex
, charCount
, pBytes
+ byteIndex
, byteCount
, flush
,
168 out charsUsed
, out bytesUsed
, out completed
);
173 // This is the version that uses pointers. We call the base encoding worker function
174 // after setting our appropriate internal variables. This is getting bytes
175 public override unsafe void Convert(char* chars
, int charCount
,
176 byte* bytes
, int byteCount
, bool flush
,
177 out int charsUsed
, out int bytesUsed
, out bool completed
)
179 // Validate input parameters
180 if (bytes
== null || chars
== null)
181 throw new ArgumentNullException(bytes
== null ? nameof(bytes
) : nameof(chars
),
182 SR
.ArgumentNull_Array
);
183 if (charCount
< 0 || byteCount
< 0)
184 throw new ArgumentOutOfRangeException(charCount
< 0 ? nameof(charCount
) : nameof(byteCount
),
185 SR
.ArgumentOutOfRange_NeedNonNegNum
);
187 // We don't want to throw
189 _throwOnOverflow
= false;
193 Debug
.Assert(_encoding
!= null);
194 bytesUsed
= _encoding
.GetBytes(chars
, charCount
, bytes
, byteCount
, this);
195 charsUsed
= _charsUsed
;
197 // Per MSDN, "The completed output parameter indicates whether all the data in the input
198 // buffer was converted and stored in the output buffer." That means we've successfully
199 // consumed all the input _and_ there's no pending state or fallback data remaining to be output.
201 completed
= (charsUsed
== charCount
)
203 && (_fallbackBuffer
is null || _fallbackBuffer
.Remaining
== 0);
205 // Our data thingys are now full, we can return
208 public Encoding Encoding
212 Debug
.Assert(_encoding
!= null);
217 public bool MustFlush
=> _mustFlush
;
220 /// States whether a call to <see cref="Encoding.GetBytes(char*, int, byte*, int, EncoderNLS)"/> must first drain data on this <see cref="EncoderNLS"/> instance.
222 internal bool HasLeftoverData
=> _charLeftOver
!= default || (_fallbackBuffer
!= null && _fallbackBuffer
.Remaining
> 0);
224 // Anything left in our encoder?
225 internal virtual bool HasState
=> _charLeftOver
!= (char)0;
227 // Allow encoding to clear our must flush instead of throwing (in ThrowBytesOverflow)
228 internal void ClearMustFlush()
233 internal int DrainLeftoverDataForGetByteCount(ReadOnlySpan
<char> chars
, out int charsConsumed
)
235 // Quick check: we _should not_ have leftover fallback data from a previous invocation,
236 // as we'd end up consuming any such data and would corrupt whatever Convert call happens
237 // to be in progress.
239 if (_fallbackBuffer
!= null && _fallbackBuffer
.Remaining
> 0)
241 throw new ArgumentException(SR
.Format(SR
.Argument_EncoderFallbackNotEmpty
, Encoding
.EncodingName
, _fallbackBuffer
.GetType()));
244 // If we have a leftover high surrogate from a previous operation, consume it now.
245 // We won't clear the _charLeftOver field since GetByteCount is supposed to be
246 // a non-mutating operation, and we need the field to retain its value for the
247 // next call to Convert.
249 charsConsumed
= 0; // could be incorrect, will fix up later in the method
251 if (_charLeftOver
== default)
253 return 0; // no leftover high surrogate char - short-circuit and finish
257 char secondChar
= default;
261 // If the input buffer is empty and we're not being asked to flush, no-op and return
262 // success to our caller. If we're being asked to flush, the leftover high surrogate from
263 // the previous operation will go through the fallback mechanism by itself.
267 return 0; // no-op = success
272 secondChar
= chars
[0];
275 // If we have to fallback the chars we're reading immediately below, populate the
276 // fallback buffer with the invalid data. We'll just fall through to the "consume
277 // fallback buffer" logic at the end of the method.
281 if (Rune
.TryCreate(_charLeftOver
, secondChar
, out Rune rune
))
283 charsConsumed
= 1; // consumed the leftover high surrogate + the first char in the input buffer
285 Debug
.Assert(_encoding
!= null);
286 if (_encoding
.TryGetByteCount(rune
, out int byteCount
))
288 Debug
.Assert(byteCount
>= 0, "Encoding shouldn't have returned a negative byte count.");
293 // The fallback mechanism relies on a negative index to convey "the start of the invalid
294 // sequence was some number of chars back before the current buffer." In this block and
295 // in the block immediately thereafter, we know we have a single leftover high surrogate
296 // character from a previous operation, so we provide an index of -1 to convey that the
297 // char immediately before the current buffer was the start of the invalid sequence.
299 didFallback
= FallbackBuffer
.Fallback(_charLeftOver
, secondChar
, index
: -1);
304 didFallback
= FallbackBuffer
.Fallback(_charLeftOver
, index
: -1);
307 // Now tally the number of bytes that would've been emitted as part of fallback.
308 Debug
.Assert(_fallbackBuffer
!= null);
309 return _fallbackBuffer
.DrainRemainingDataForGetByteCount();
313 internal bool TryDrainLeftoverDataForGetBytes(ReadOnlySpan
<char> chars
, Span
<byte> bytes
, out int charsConsumed
, out int bytesWritten
)
315 // We may have a leftover high surrogate data from a previous invocation, or we may have leftover
316 // data in the fallback buffer, or we may have neither, but we will never have both. Check for these
317 // conditions and handle them now.
319 charsConsumed
= 0; // could be incorrect, will fix up later in the method
320 bytesWritten
= 0; // could be incorrect, will fix up later in the method
322 if (_charLeftOver
!= default)
324 char secondChar
= default;
328 // If the input buffer is empty and we're not being asked to flush, no-op and return
329 // success to our caller. If we're being asked to flush, the leftover high surrogate from
330 // the previous operation will go through the fallback mechanism by itself.
336 return true; // no-op = success
341 secondChar
= chars
[0];
344 // If we have to fallback the chars we're reading immediately below, populate the
345 // fallback buffer with the invalid data. We'll just fall through to the "consume
346 // fallback buffer" logic at the end of the method.
348 if (Rune
.TryCreate(_charLeftOver
, secondChar
, out Rune rune
))
350 charsConsumed
= 1; // at the very least, we consumed 1 char from the input
351 Debug
.Assert(_encoding
!= null);
352 switch (_encoding
.EncodeRune(rune
, bytes
, out bytesWritten
))
354 case OperationStatus
.Done
:
355 _charLeftOver
= default; // we just consumed this char
356 return true; // that's all - we've handled the leftover data
358 case OperationStatus
.DestinationTooSmall
:
359 _charLeftOver
= default; // we just consumed this char
360 _encoding
.ThrowBytesOverflow(this, nothingEncoded
: true); // will throw
363 case OperationStatus
.InvalidData
:
364 FallbackBuffer
.Fallback(_charLeftOver
, secondChar
, index
: -1); // see comment in DrainLeftoverDataForGetByteCount
368 Debug
.Fail("Unknown return value.");
374 FallbackBuffer
.Fallback(_charLeftOver
, index
: -1); // see comment in DrainLeftoverDataForGetByteCount
378 // Now check the fallback buffer for any remaining data.
380 if (_fallbackBuffer
!= null && _fallbackBuffer
.Remaining
> 0)
382 return _fallbackBuffer
.TryDrainRemainingDataForGetBytes(bytes
, out bytesWritten
);
387 return true; // success