1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
7 using System
.Diagnostics
;
8 using System
.Runtime
.InteropServices
;
12 // A Decoder is used to decode a sequence of blocks of bytes into a
13 // sequence of blocks of characters. Following instantiation of a decoder,
14 // sequential blocks of bytes are converted into blocks of characters through
15 // calls to the GetChars method. The decoder maintains state between the
16 // conversions, allowing it to correctly decode byte sequences that span
19 // Instances of specific implementations of the Decoder abstract base
20 // class are typically obtained through calls to the GetDecoder method
21 // of Encoding objects.
23 public abstract class Decoder
25 internal DecoderFallback
? _fallback
= null;
27 internal DecoderFallbackBuffer
? _fallbackBuffer
= null;
31 // We don't call default reset because default reset probably isn't good if we aren't initialized.
34 public DecoderFallback
? Fallback
44 throw new ArgumentNullException(nameof(value));
46 // Can't change fallback if buffer is wrong
47 if (_fallbackBuffer
!= null && _fallbackBuffer
.Remaining
> 0)
48 throw new ArgumentException(
49 SR
.Argument_FallbackBufferNotEmpty
, nameof(value));
52 _fallbackBuffer
= null;
56 // Note: we don't test for threading here because async access to Encoders and Decoders
57 // doesn't work anyway.
58 public DecoderFallbackBuffer FallbackBuffer
62 if (_fallbackBuffer
== null)
64 if (_fallback
!= null)
65 _fallbackBuffer
= _fallback
.CreateFallbackBuffer();
67 _fallbackBuffer
= DecoderFallback
.ReplacementFallback
.CreateFallbackBuffer();
70 return _fallbackBuffer
;
74 internal bool InternalHasFallbackBuffer
=> _fallbackBuffer
!= null;
78 // Normally if we call GetChars() and an error is thrown we don't change the state of the Decoder. This
79 // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
81 // If the caller doesn't want to try again after GetChars() throws an error, then they need to call Reset().
83 // Virtual implementation has to call GetChars with flush and a big enough buffer to clear a 0 byte string
84 // We avoid GetMaxCharCount() because a) we can't call the base encoder and b) it might be really big.
85 public virtual void Reset()
87 byte[] byteTemp
= Array
.Empty
<byte>();
88 char[] charTemp
= new char[GetCharCount(byteTemp
, 0, 0, true)];
89 GetChars(byteTemp
, 0, 0, charTemp
, 0, true);
90 _fallbackBuffer
?.Reset();
93 // Returns the number of characters the next call to GetChars will
94 // produce if presented with the given range of bytes. The returned value
95 // takes into account the state in which the decoder was left following the
96 // last call to GetChars. The state of the decoder is not affected
97 // by a call to this method.
99 public abstract int GetCharCount(byte[] bytes
, int index
, int count
);
101 public virtual int GetCharCount(byte[] bytes
, int index
, int count
, bool flush
)
103 return GetCharCount(bytes
, index
, count
);
106 // We expect this to be the workhorse for NLS Encodings, but for existing
107 // ones we need a working (if slow) default implementation)
108 [CLSCompliant(false)]
109 public virtual unsafe int GetCharCount(byte* bytes
, int count
, bool flush
)
111 // Validate input parameters
113 throw new ArgumentNullException(nameof(bytes
),
114 SR
.ArgumentNull_Array
);
117 throw new ArgumentOutOfRangeException(nameof(count
),
118 SR
.ArgumentOutOfRange_NeedNonNegNum
);
120 byte[] arrbyte
= new byte[count
];
123 for (index
= 0; index
< count
; index
++)
124 arrbyte
[index
] = bytes
[index
];
126 return GetCharCount(arrbyte
, 0, count
);
129 public virtual unsafe int GetCharCount(ReadOnlySpan
<byte> bytes
, bool flush
)
131 fixed (byte* bytesPtr
= &MemoryMarshal
.GetNonNullPinnableReference(bytes
))
133 return GetCharCount(bytesPtr
, bytes
.Length
, flush
);
137 // Decodes a range of bytes in a byte array into a range of characters
138 // in a character array. The method decodes byteCount bytes from
139 // bytes starting at index byteIndex, storing the resulting
140 // characters in chars starting at index charIndex. The
141 // decoding takes into account the state in which the decoder was left
142 // following the last call to this method.
144 // An exception occurs if the character array is not large enough to
145 // hold the complete decoding of the bytes. The GetCharCount method
146 // can be used to determine the exact number of characters that will be
147 // produced for a given range of bytes. Alternatively, the
148 // GetMaxCharCount method of the Encoding that produced this
149 // decoder can be used to determine the maximum number of characters that
150 // will be produced for a given number of bytes, regardless of the actual
153 public abstract int GetChars(byte[] bytes
, int byteIndex
, int byteCount
,
154 char[] chars
, int charIndex
);
156 public virtual int GetChars(byte[] bytes
, int byteIndex
, int byteCount
,
157 char[] chars
, int charIndex
, bool flush
)
159 return GetChars(bytes
, byteIndex
, byteCount
, chars
, charIndex
);
162 // We expect this to be the workhorse for NLS Encodings, but for existing
163 // ones we need a working (if slow) default implementation)
165 // WARNING WARNING WARNING
167 // WARNING: If this breaks it could be a security threat. Obviously we
168 // call this internally, so you need to make sure that your pointers, counts
169 // and indexes are correct when you call this method.
171 // In addition, we have internal code, which will be marked as "safe" calling
172 // this code. However this code is dependent upon the implementation of an
173 // external GetChars() method, which could be overridden by a third party and
174 // the results of which cannot be guaranteed. We use that result to copy
175 // the char[] to our char* output buffer. If the result count was wrong, we
176 // could easily overflow our output buffer. Therefore we do an extra test
177 // when we copy the buffer so that we don't overflow charCount either.
178 [CLSCompliant(false)]
179 public virtual unsafe int GetChars(byte* bytes
, int byteCount
,
180 char* chars
, int charCount
, bool flush
)
182 // Validate input parameters
183 if (chars
== null || bytes
== null)
184 throw new ArgumentNullException(chars
== null ? nameof(chars
) : nameof(bytes
),
185 SR
.ArgumentNull_Array
);
187 if (byteCount
< 0 || charCount
< 0)
188 throw new ArgumentOutOfRangeException((byteCount
< 0 ? nameof(byteCount
) : nameof(charCount
)),
189 SR
.ArgumentOutOfRange_NeedNonNegNum
);
191 // Get the byte array to convert
192 byte[] arrByte
= new byte[byteCount
];
195 for (index
= 0; index
< byteCount
; index
++)
196 arrByte
[index
] = bytes
[index
];
198 // Get the char array to fill
199 char[] arrChar
= new char[charCount
];
202 int result
= GetChars(arrByte
, 0, byteCount
, arrChar
, 0, flush
);
204 Debug
.Assert(result
<= charCount
, "Returned more chars than we have space for");
206 // Copy the char array
207 // WARNING: We MUST make sure that we don't copy too many chars. We can't
208 // rely on result because it could be a 3rd party implementation. We need
209 // to make sure we never copy more than charCount chars no matter the value
211 if (result
< charCount
)
214 // We check both result and charCount so that we don't accidentally overrun
215 // our pointer buffer just because of an issue in GetChars
216 for (index
= 0; index
< charCount
; index
++)
217 chars
[index
] = arrChar
[index
];
222 public virtual unsafe int GetChars(ReadOnlySpan
<byte> bytes
, Span
<char> chars
, bool flush
)
224 fixed (byte* bytesPtr
= &MemoryMarshal
.GetNonNullPinnableReference(bytes
))
225 fixed (char* charsPtr
= &MemoryMarshal
.GetNonNullPinnableReference(chars
))
227 return GetChars(bytesPtr
, bytes
.Length
, charsPtr
, chars
.Length
, flush
);
231 // This method is used when the output buffer might not be large enough.
232 // It will decode until it runs out of bytes, and then it will return
233 // true if it the entire input was converted. In either case it
234 // will also return the number of converted bytes and output characters used.
235 // It will only throw a buffer overflow exception if the entire lenght of chars[] is
236 // too small to store the next char. (like 0 or maybe 1 or 4 for some encodings)
237 // We're done processing this buffer only if completed returns true.
239 // Might consider checking Max...Count to avoid the extra counting step.
241 // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
242 // that its likely that we didn't consume as many bytes as we could have. For some
243 // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
244 public virtual void Convert(byte[] bytes
, int byteIndex
, int byteCount
,
245 char[] chars
, int charIndex
, int charCount
, bool flush
,
246 out int bytesUsed
, out int charsUsed
, out bool completed
)
248 // Validate parameters
249 if (bytes
== null || chars
== null)
250 throw new ArgumentNullException((bytes
== null ? nameof(bytes
) : nameof(chars
)),
251 SR
.ArgumentNull_Array
);
253 if (byteIndex
< 0 || byteCount
< 0)
254 throw new ArgumentOutOfRangeException((byteIndex
< 0 ? nameof(byteIndex
) : nameof(byteCount
)),
255 SR
.ArgumentOutOfRange_NeedNonNegNum
);
257 if (charIndex
< 0 || charCount
< 0)
258 throw new ArgumentOutOfRangeException((charIndex
< 0 ? nameof(charIndex
) : nameof(charCount
)),
259 SR
.ArgumentOutOfRange_NeedNonNegNum
);
261 if (bytes
.Length
- byteIndex
< byteCount
)
262 throw new ArgumentOutOfRangeException(nameof(bytes
),
263 SR
.ArgumentOutOfRange_IndexCountBuffer
);
265 if (chars
.Length
- charIndex
< charCount
)
266 throw new ArgumentOutOfRangeException(nameof(chars
),
267 SR
.ArgumentOutOfRange_IndexCountBuffer
);
269 bytesUsed
= byteCount
;
271 // Its easy to do if it won't overrun our buffer.
272 while (bytesUsed
> 0)
274 if (GetCharCount(bytes
, byteIndex
, bytesUsed
, flush
) <= charCount
)
276 charsUsed
= GetChars(bytes
, byteIndex
, bytesUsed
, chars
, charIndex
, flush
);
277 completed
= (bytesUsed
== byteCount
&&
278 (_fallbackBuffer
== null || _fallbackBuffer
.Remaining
== 0));
282 // Try again with 1/2 the count, won't flush then 'cause won't read it all
287 // Oops, we didn't have anything, we'll have to throw an overflow
288 throw new ArgumentException(SR
.Argument_ConversionOverflow
);
291 // This is the version that uses *.
292 // We're done processing this buffer only if completed returns true.
294 // Might consider checking Max...Count to avoid the extra counting step.
296 // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
297 // that its likely that we didn't consume as many bytes as we could have. For some
298 // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
299 [CLSCompliant(false)]
300 public virtual unsafe void Convert(byte* bytes
, int byteCount
,
301 char* chars
, int charCount
, bool flush
,
302 out int bytesUsed
, out int charsUsed
, out bool completed
)
304 // Validate input parameters
305 if (chars
== null || bytes
== null)
306 throw new ArgumentNullException(chars
== null ? nameof(chars
) : nameof(bytes
),
307 SR
.ArgumentNull_Array
);
309 if (byteCount
< 0 || charCount
< 0)
310 throw new ArgumentOutOfRangeException((byteCount
< 0 ? nameof(byteCount
) : nameof(charCount
)),
311 SR
.ArgumentOutOfRange_NeedNonNegNum
);
313 // Get ready to do it
314 bytesUsed
= byteCount
;
316 // Its easy to do if it won't overrun our buffer.
317 while (bytesUsed
> 0)
319 if (GetCharCount(bytes
, bytesUsed
, flush
) <= charCount
)
321 charsUsed
= GetChars(bytes
, bytesUsed
, chars
, charCount
, flush
);
322 completed
= (bytesUsed
== byteCount
&&
323 (_fallbackBuffer
== null || _fallbackBuffer
.Remaining
== 0));
327 // Try again with 1/2 the count, won't flush then 'cause won't read it all
332 // Oops, we didn't have anything, we'll have to throw an overflow
333 throw new ArgumentException(SR
.Argument_ConversionOverflow
);
336 public virtual unsafe void Convert(ReadOnlySpan
<byte> bytes
, Span
<char> chars
, bool flush
, out int bytesUsed
, out int charsUsed
, out bool completed
)
338 fixed (byte* bytesPtr
= &MemoryMarshal
.GetNonNullPinnableReference(bytes
))
339 fixed (char* charsPtr
= &MemoryMarshal
.GetNonNullPinnableReference(chars
))
341 Convert(bytesPtr
, bytes
.Length
, charsPtr
, chars
.Length
, flush
, out bytesUsed
, out charsUsed
, out completed
);