1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 using System
.Diagnostics
;
6 using System
.Runtime
.InteropServices
;
10 // An Encoder is used to encode a sequence of blocks of characters into
11 // a sequence of blocks of bytes. Following instantiation of an encoder,
12 // sequential blocks of characters are converted into blocks of bytes through
13 // calls to the GetBytes method. The encoder maintains state between the
14 // conversions, allowing it to correctly encode character sequences that span
17 // Instances of specific implementations of the Encoder abstract base
18 // class are typically obtained through calls to the GetEncoder method
19 // of Encoding objects.
21 public abstract class Encoder
23 internal EncoderFallback
? _fallback
= null;
25 internal EncoderFallbackBuffer
? _fallbackBuffer
= null;
29 // We don't call default reset because default reset probably isn't good if we aren't initialized.
32 public EncoderFallback
? Fallback
38 throw new ArgumentNullException(nameof(value));
40 // Can't change fallback if buffer is wrong
41 if (_fallbackBuffer
!= null && _fallbackBuffer
.Remaining
> 0)
42 throw new ArgumentException(
43 SR
.Argument_FallbackBufferNotEmpty
, nameof(value));
46 _fallbackBuffer
= null;
50 // Note: we don't test for threading here because async access to Encoders and Decoders
51 // doesn't work anyway.
52 public EncoderFallbackBuffer FallbackBuffer
56 if (_fallbackBuffer
== null)
58 if (_fallback
!= null)
59 _fallbackBuffer
= _fallback
.CreateFallbackBuffer();
61 _fallbackBuffer
= EncoderFallback
.ReplacementFallback
.CreateFallbackBuffer();
64 return _fallbackBuffer
;
68 internal bool InternalHasFallbackBuffer
=> _fallbackBuffer
!= null;
72 // Normally if we call GetBytes() and an error is thrown we don't change the state of the encoder. This
73 // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
75 // If the caller doesn't want to try again after GetBytes() throws an error, then they need to call Reset().
77 // Virtual implementation has to call GetBytes with flush and a big enough buffer to clear a 0 char string
78 // We avoid GetMaxByteCount() because a) we can't call the base encoder and b) it might be really big.
79 public virtual void Reset()
81 char[] charTemp
= Array
.Empty
<char>();
82 byte[] byteTemp
= new byte[GetByteCount(charTemp
, 0, 0, true)];
83 GetBytes(charTemp
, 0, 0, byteTemp
, 0, true);
84 if (_fallbackBuffer
!= null)
85 _fallbackBuffer
.Reset();
88 // Returns the number of bytes the next call to GetBytes will
89 // produce if presented with the given range of characters and the given
90 // value of the flush parameter. The returned value takes into
91 // account the state in which the encoder was left following the last call
92 // to GetBytes. The state of the encoder is not affected by a call
95 public abstract int GetByteCount(char[] chars
, int index
, int count
, bool flush
);
97 // We expect this to be the workhorse for NLS encodings
98 // unfortunately for existing overrides, it has to call the [] version,
99 // which is really slow, so avoid this method if you might be calling external encodings.
100 [CLSCompliant(false)]
101 public virtual unsafe int GetByteCount(char* chars
, int count
, bool flush
)
103 // Validate input parameters
105 throw new ArgumentNullException(nameof(chars
),
106 SR
.ArgumentNull_Array
);
109 throw new ArgumentOutOfRangeException(nameof(count
),
110 SR
.ArgumentOutOfRange_NeedNonNegNum
);
112 char[] arrChar
= new char[count
];
115 for (index
= 0; index
< count
; index
++)
116 arrChar
[index
] = chars
[index
];
118 return GetByteCount(arrChar
, 0, count
, flush
);
121 public virtual unsafe int GetByteCount(ReadOnlySpan
<char> chars
, bool flush
)
123 fixed (char* charsPtr
= &MemoryMarshal
.GetNonNullPinnableReference(chars
))
125 return GetByteCount(charsPtr
, chars
.Length
, flush
);
129 // Encodes a range of characters in a character array into a range of bytes
130 // in a byte array. The method encodes charCount characters from
131 // chars starting at index charIndex, storing the resulting
132 // bytes in bytes starting at index byteIndex. The encoding
133 // takes into account the state in which the encoder was left following the
134 // last call to this method. The flush parameter indicates whether
135 // the encoder should flush any shift-states and partial characters at the
136 // end of the conversion. To ensure correct termination of a sequence of
137 // blocks of encoded bytes, the last call to GetBytes should specify
138 // a value of true for the flush parameter.
140 // An exception occurs if the byte array is not large enough to hold the
141 // complete encoding of the characters. The GetByteCount method can
142 // be used to determine the exact number of bytes that will be produced for
143 // a given range of characters. Alternatively, the GetMaxByteCount
144 // method of the Encoding that produced this encoder can be used to
145 // determine the maximum number of bytes that will be produced for a given
146 // number of characters, regardless of the actual character values.
148 public abstract int GetBytes(char[] chars
, int charIndex
, int charCount
,
149 byte[] bytes
, int byteIndex
, bool flush
);
151 // We expect this to be the workhorse for NLS Encodings, but for existing
152 // ones we need a working (if slow) default implementation)
154 // WARNING WARNING WARNING
156 // WARNING: If this breaks it could be a security threat. Obviously we
157 // call this internally, so you need to make sure that your pointers, counts
158 // and indexes are correct when you call this method.
160 // In addition, we have internal code, which will be marked as "safe" calling
161 // this code. However this code is dependent upon the implementation of an
162 // external GetBytes() method, which could be overridden by a third party and
163 // the results of which cannot be guaranteed. We use that result to copy
164 // the byte[] to our byte* output buffer. If the result count was wrong, we
165 // could easily overflow our output buffer. Therefore we do an extra test
166 // when we copy the buffer so that we don't overflow byteCount either.
167 [CLSCompliant(false)]
168 public virtual unsafe int GetBytes(char* chars
, int charCount
,
169 byte* bytes
, int byteCount
, bool flush
)
171 // Validate input parameters
172 if (bytes
== null || chars
== null)
173 throw new ArgumentNullException(bytes
== null ? nameof(bytes
) : nameof(chars
),
174 SR
.ArgumentNull_Array
);
176 if (charCount
< 0 || byteCount
< 0)
177 throw new ArgumentOutOfRangeException(charCount
< 0 ? nameof(charCount
) : nameof(byteCount
),
178 SR
.ArgumentOutOfRange_NeedNonNegNum
);
180 // Get the char array to convert
181 char[] arrChar
= new char[charCount
];
184 for (index
= 0; index
< charCount
; index
++)
185 arrChar
[index
] = chars
[index
];
187 // Get the byte array to fill
188 byte[] arrByte
= new byte[byteCount
];
191 int result
= GetBytes(arrChar
, 0, charCount
, arrByte
, 0, flush
);
193 Debug
.Assert(result
<= byteCount
, "Returned more bytes than we have space for");
195 // Copy the byte array
196 // WARNING: We MUST make sure that we don't copy too many bytes. We can't
197 // rely on result because it could be a 3rd party implementation. We need
198 // to make sure we never copy more than byteCount bytes no matter the value
200 if (result
< byteCount
)
203 // Don't copy too many bytes!
204 for (index
= 0; index
< byteCount
; index
++)
205 bytes
[index
] = arrByte
[index
];
210 public virtual unsafe int GetBytes(ReadOnlySpan
<char> chars
, Span
<byte> bytes
, bool flush
)
212 fixed (char* charsPtr
= &MemoryMarshal
.GetNonNullPinnableReference(chars
))
213 fixed (byte* bytesPtr
= &MemoryMarshal
.GetNonNullPinnableReference(bytes
))
215 return GetBytes(charsPtr
, chars
.Length
, bytesPtr
, bytes
.Length
, flush
);
219 // This method is used to avoid running out of output buffer space.
220 // It will encode until it runs out of chars, and then it will return
221 // true if it the entire input was converted. In either case it
222 // will also return the number of converted chars and output bytes used.
223 // It will only throw a buffer overflow exception if the entire lenght of bytes[] is
224 // too small to store the next byte. (like 0 or maybe 1 or 4 for some encodings)
225 // We're done processing this buffer only if completed returns true.
227 // Might consider checking Max...Count to avoid the extra counting step.
229 // Note that if all of the input chars are not consumed, then we'll do a /2, which means
230 // that its likely that we didn't consume as many chars as we could have. For some
231 // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
232 public virtual void Convert(char[] chars
, int charIndex
, int charCount
,
233 byte[] bytes
, int byteIndex
, int byteCount
, bool flush
,
234 out int charsUsed
, out int bytesUsed
, out bool completed
)
236 // Validate parameters
237 if (chars
== null || bytes
== null)
238 throw new ArgumentNullException(chars
== null ? nameof(chars
) : nameof(bytes
),
239 SR
.ArgumentNull_Array
);
241 if (charIndex
< 0 || charCount
< 0)
242 throw new ArgumentOutOfRangeException(charIndex
< 0 ? nameof(charIndex
) : nameof(charCount
),
243 SR
.ArgumentOutOfRange_NeedNonNegNum
);
245 if (byteIndex
< 0 || byteCount
< 0)
246 throw new ArgumentOutOfRangeException(byteIndex
< 0 ? nameof(byteIndex
) : nameof(byteCount
),
247 SR
.ArgumentOutOfRange_NeedNonNegNum
);
249 if (chars
.Length
- charIndex
< charCount
)
250 throw new ArgumentOutOfRangeException(nameof(chars
),
251 SR
.ArgumentOutOfRange_IndexCountBuffer
);
253 if (bytes
.Length
- byteIndex
< byteCount
)
254 throw new ArgumentOutOfRangeException(nameof(bytes
),
255 SR
.ArgumentOutOfRange_IndexCountBuffer
);
257 charsUsed
= charCount
;
259 // Its easy to do if it won't overrun our buffer.
260 // Note: We don't want to call unsafe version because that might be an untrusted version
261 // which could be really unsafe and we don't want to mix it up.
262 while (charsUsed
> 0)
264 if (GetByteCount(chars
, charIndex
, charsUsed
, flush
) <= byteCount
)
266 bytesUsed
= GetBytes(chars
, charIndex
, charsUsed
, bytes
, byteIndex
, flush
);
267 completed
= (charsUsed
== charCount
&&
268 (_fallbackBuffer
== null || _fallbackBuffer
.Remaining
== 0));
272 // Try again with 1/2 the count, won't flush then 'cause won't read it all
277 // Oops, we didn't have anything, we'll have to throw an overflow
278 throw new ArgumentException(SR
.Argument_ConversionOverflow
);
281 // Same thing, but using pointers
283 // Might consider checking Max...Count to avoid the extra counting step.
285 // Note that if all of the input chars are not consumed, then we'll do a /2, which means
286 // that its likely that we didn't consume as many chars as we could have. For some
287 // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
288 [CLSCompliant(false)]
289 public virtual unsafe void Convert(char* chars
, int charCount
,
290 byte* bytes
, int byteCount
, bool flush
,
291 out int charsUsed
, out int bytesUsed
, out bool completed
)
293 // Validate input parameters
294 if (bytes
== null || chars
== null)
295 throw new ArgumentNullException(bytes
== null ? nameof(bytes
) : nameof(chars
),
296 SR
.ArgumentNull_Array
);
297 if (charCount
< 0 || byteCount
< 0)
298 throw new ArgumentOutOfRangeException(charCount
< 0 ? nameof(charCount
) : nameof(byteCount
),
299 SR
.ArgumentOutOfRange_NeedNonNegNum
);
301 // Get ready to do it
302 charsUsed
= charCount
;
304 // Its easy to do if it won't overrun our buffer.
305 while (charsUsed
> 0)
307 if (GetByteCount(chars
, charsUsed
, flush
) <= byteCount
)
309 bytesUsed
= GetBytes(chars
, charsUsed
, bytes
, byteCount
, flush
);
310 completed
= (charsUsed
== charCount
&&
311 (_fallbackBuffer
== null || _fallbackBuffer
.Remaining
== 0));
315 // Try again with 1/2 the count, won't flush then 'cause won't read it all
320 // Oops, we didn't have anything, we'll have to throw an overflow
321 throw new ArgumentException(SR
.Argument_ConversionOverflow
);
324 public virtual unsafe void Convert(ReadOnlySpan
<char> chars
, Span
<byte> bytes
, bool flush
, out int charsUsed
, out int bytesUsed
, out bool completed
)
326 fixed (char* charsPtr
= &MemoryMarshal
.GetNonNullPinnableReference(chars
))
327 fixed (byte* bytesPtr
= &MemoryMarshal
.GetNonNullPinnableReference(bytes
))
329 Convert(charsPtr
, chars
.Length
, bytesPtr
, bytes
.Length
, flush
, out charsUsed
, out bytesUsed
, out completed
);