netcore/System.Private.CoreLib/shared/System/Text/Encoder.cs

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 using System.Text;
   6 using System;
   7 using System.Diagnostics;
   8 using System.Runtime.InteropServices;
   9
  10 namespace System.Text
  11 {
  12     // An Encoder is used to encode a sequence of blocks of characters into
  13     // a sequence of blocks of bytes. Following instantiation of an encoder,
  14     // sequential blocks of characters are converted into blocks of bytes through
  15     // calls to the GetBytes method. The encoder maintains state between the
  16     // conversions, allowing it to correctly encode character sequences that span
  17     // adjacent blocks.
  18     //
  19     // Instances of specific implementations of the Encoder abstract base
  20     // class are typically obtained through calls to the GetEncoder method
  21     // of Encoding objects.
  22     //
  23     public abstract class Encoder
  24     {
  25         internal EncoderFallback? _fallback = null;
  26
  27         internal EncoderFallbackBuffer? _fallbackBuffer = null;
  28
  29         protected Encoder()
  30         {
  31             // We don't call default reset because default reset probably isn't good if we aren't initialized.
  32         }
  33
  34         public EncoderFallback? Fallback
  35         {
  36             get
  37             {
  38                 return _fallback;
  39             }
  40
  41             set
  42             {
  43                 if (value == null)
  44                     throw new ArgumentNullException(nameof(value));
  45
  46                 // Can't change fallback if buffer is wrong
  47                 if (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0)
  48                     throw new ArgumentException(
  49                       SR.Argument_FallbackBufferNotEmpty, nameof(value));
  50
  51                 _fallback = value;
  52                 _fallbackBuffer = null;
  53             }
  54         }
  55
  56         // Note: we don't test for threading here because async access to Encoders and Decoders
  57         // doesn't work anyway.
  58         public EncoderFallbackBuffer FallbackBuffer
  59         {
  60             get
  61             {
  62                 if (_fallbackBuffer == null)
  63                 {
  64                     if (_fallback != null)
  65                         _fallbackBuffer = _fallback.CreateFallbackBuffer();
  66                     else
  67                         _fallbackBuffer = EncoderFallback.ReplacementFallback.CreateFallbackBuffer();
  68                 }
  69
  70                 return _fallbackBuffer;
  71             }
  72         }
  73
  74         internal bool InternalHasFallbackBuffer => _fallbackBuffer != null;
  75
  76         // Reset the Encoder
  77         //
  78         // Normally if we call GetBytes() and an error is thrown we don't change the state of the encoder.  This
  79         // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
  80         //
  81         // If the caller doesn't want to try again after GetBytes() throws an error, then they need to call Reset().
  82         //
  83         // Virtual implementation has to call GetBytes with flush and a big enough buffer to clear a 0 char string
  84         // We avoid GetMaxByteCount() because a) we can't call the base encoder and b) it might be really big.
  85         public virtual void Reset()
  86         {
  87             char[] charTemp = Array.Empty<char>();
  88             byte[] byteTemp = new byte[GetByteCount(charTemp, 0, 0, true)];
  89             GetBytes(charTemp, 0, 0, byteTemp, 0, true);
  90             if (_fallbackBuffer != null)
  91                 _fallbackBuffer.Reset();
  92         }
  93
  94         // Returns the number of bytes the next call to GetBytes will
  95         // produce if presented with the given range of characters and the given
  96         // value of the flush parameter. The returned value takes into
  97         // account the state in which the encoder was left following the last call
  98         // to GetBytes. The state of the encoder is not affected by a call
  99         // to this method.
 100         //
 101         public abstract int GetByteCount(char[] chars, int index, int count, bool flush);
 102
 103         // We expect this to be the workhorse for NLS encodings
 104         // unfortunately for existing overrides, it has to call the [] version,
 105         // which is really slow, so avoid this method if you might be calling external encodings.
 106         [CLSCompliant(false)]
 107         public virtual unsafe int GetByteCount(char* chars, int count, bool flush)
 108         {
 109             // Validate input parameters
 110             if (chars == null)
 111                 throw new ArgumentNullException(nameof(chars),
 112                       SR.ArgumentNull_Array);
 113
 114             if (count < 0)
 115                 throw new ArgumentOutOfRangeException(nameof(count),
 116                       SR.ArgumentOutOfRange_NeedNonNegNum);
 117
 118             char[] arrChar = new char[count];
 119             int index;
 120
 121             for (index = 0; index < count; index++)
 122                 arrChar[index] = chars[index];
 123
 124             return GetByteCount(arrChar, 0, count, flush);
 125         }
 126
 127         public virtual unsafe int GetByteCount(ReadOnlySpan<char> chars, bool flush)
 128         {
 129             fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
 130             {
 131                 return GetByteCount(charsPtr, chars.Length, flush);
 132             }
 133         }
 134
 135         // Encodes a range of characters in a character array into a range of bytes
 136         // in a byte array. The method encodes charCount characters from
 137         // chars starting at index charIndex, storing the resulting
 138         // bytes in bytes starting at index byteIndex. The encoding
 139         // takes into account the state in which the encoder was left following the
 140         // last call to this method. The flush parameter indicates whether
 141         // the encoder should flush any shift-states and partial characters at the
 142         // end of the conversion. To ensure correct termination of a sequence of
 143         // blocks of encoded bytes, the last call to GetBytes should specify
 144         // a value of true for the flush parameter.
 145         //
 146         // An exception occurs if the byte array is not large enough to hold the
 147         // complete encoding of the characters. The GetByteCount method can
 148         // be used to determine the exact number of bytes that will be produced for
 149         // a given range of characters. Alternatively, the GetMaxByteCount
 150         // method of the Encoding that produced this encoder can be used to
 151         // determine the maximum number of bytes that will be produced for a given
 152         // number of characters, regardless of the actual character values.
 153         //
 154         public abstract int GetBytes(char[] chars, int charIndex, int charCount,
 155                                         byte[] bytes, int byteIndex, bool flush);
 156
 157         // We expect this to be the workhorse for NLS Encodings, but for existing
 158         // ones we need a working (if slow) default implementation)
 159         //
 160         // WARNING WARNING WARNING
 161         //
 162         // WARNING: If this breaks it could be a security threat.  Obviously we
 163         // call this internally, so you need to make sure that your pointers, counts
 164         // and indexes are correct when you call this method.
 165         //
 166         // In addition, we have internal code, which will be marked as "safe" calling
 167         // this code.  However this code is dependent upon the implementation of an
 168         // external GetBytes() method, which could be overridden by a third party and
 169         // the results of which cannot be guaranteed.  We use that result to copy
 170         // the byte[] to our byte* output buffer.  If the result count was wrong, we
 171         // could easily overflow our output buffer.  Therefore we do an extra test
 172         // when we copy the buffer so that we don't overflow byteCount either.
 173         [CLSCompliant(false)]
 174         public virtual unsafe int GetBytes(char* chars, int charCount,
 175                                               byte* bytes, int byteCount, bool flush)
 176         {
 177             // Validate input parameters
 178             if (bytes == null || chars == null)
 179                 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
 180                     SR.ArgumentNull_Array);
 181
 182             if (charCount < 0 || byteCount < 0)
 183                 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
 184                     SR.ArgumentOutOfRange_NeedNonNegNum);
 185
 186             // Get the char array to convert
 187             char[] arrChar = new char[charCount];
 188
 189             int index;
 190             for (index = 0; index < charCount; index++)
 191                 arrChar[index] = chars[index];
 192
 193             // Get the byte array to fill
 194             byte[] arrByte = new byte[byteCount];
 195
 196             // Do the work
 197             int result = GetBytes(arrChar, 0, charCount, arrByte, 0, flush);
 198
 199             Debug.Assert(result <= byteCount, "Returned more bytes than we have space for");
 200
 201             // Copy the byte array
 202             // WARNING: We MUST make sure that we don't copy too many bytes.  We can't
 203             // rely on result because it could be a 3rd party implementation.  We need
 204             // to make sure we never copy more than byteCount bytes no matter the value
 205             // of result
 206             if (result < byteCount)
 207                 byteCount = result;
 208
 209             // Don't copy too many bytes!
 210             for (index = 0; index < byteCount; index++)
 211                 bytes[index] = arrByte[index];
 212
 213             return byteCount;
 214         }
 215
 216         public virtual unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes, bool flush)
 217         {
 218             fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
 219             fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
 220             {
 221                 return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length, flush);
 222             }
 223         }
 224
 225         // This method is used to avoid running out of output buffer space.
 226         // It will encode until it runs out of chars, and then it will return
 227         // true if it the entire input was converted.  In either case it
 228         // will also return the number of converted chars and output bytes used.
 229         // It will only throw a buffer overflow exception if the entire lenght of bytes[] is
 230         // too small to store the next byte. (like 0 or maybe 1 or 4 for some encodings)
 231         // We're done processing this buffer only if completed returns true.
 232         //
 233         // Might consider checking Max...Count to avoid the extra counting step.
 234         //
 235         // Note that if all of the input chars are not consumed, then we'll do a /2, which means
 236         // that its likely that we didn't consume as many chars as we could have.  For some
 237         // applications this could be slow.  (Like trying to exactly fill an output buffer from a bigger stream)
 238         public virtual void Convert(char[] chars, int charIndex, int charCount,
 239                                       byte[] bytes, int byteIndex, int byteCount, bool flush,
 240                                       out int charsUsed, out int bytesUsed, out bool completed)
 241         {
 242             // Validate parameters
 243             if (chars == null || bytes == null)
 244                 throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)),
 245                       SR.ArgumentNull_Array);
 246
 247             if (charIndex < 0 || charCount < 0)
 248                 throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)),
 249                       SR.ArgumentOutOfRange_NeedNonNegNum);
 250
 251             if (byteIndex < 0 || byteCount < 0)
 252                 throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)),
 253                       SR.ArgumentOutOfRange_NeedNonNegNum);
 254
 255             if (chars.Length - charIndex < charCount)
 256                 throw new ArgumentOutOfRangeException(nameof(chars),
 257                       SR.ArgumentOutOfRange_IndexCountBuffer);
 258
 259             if (bytes.Length - byteIndex < byteCount)
 260                 throw new ArgumentOutOfRangeException(nameof(bytes),
 261                       SR.ArgumentOutOfRange_IndexCountBuffer);
 262
 263             charsUsed = charCount;
 264
 265             // Its easy to do if it won't overrun our buffer.
 266             // Note: We don't want to call unsafe version because that might be an untrusted version
 267             // which could be really unsafe and we don't want to mix it up.
 268             while (charsUsed > 0)
 269             {
 270                 if (GetByteCount(chars, charIndex, charsUsed, flush) <= byteCount)
 271                 {
 272                     bytesUsed = GetBytes(chars, charIndex, charsUsed, bytes, byteIndex, flush);
 273                     completed = (charsUsed == charCount &&
 274                         (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0));
 275                     return;
 276                 }
 277
 278                 // Try again with 1/2 the count, won't flush then 'cause won't read it all
 279                 flush = false;
 280                 charsUsed /= 2;
 281             }
 282
 283             // Oops, we didn't have anything, we'll have to throw an overflow
 284             throw new ArgumentException(SR.Argument_ConversionOverflow);
 285         }
 286
 287         // Same thing, but using pointers
 288         //
 289         // Might consider checking Max...Count to avoid the extra counting step.
 290         //
 291         // Note that if all of the input chars are not consumed, then we'll do a /2, which means
 292         // that its likely that we didn't consume as many chars as we could have.  For some
 293         // applications this could be slow.  (Like trying to exactly fill an output buffer from a bigger stream)
 294         [CLSCompliant(false)]
 295         public virtual unsafe void Convert(char* chars, int charCount,
 296                                              byte* bytes, int byteCount, bool flush,
 297                                              out int charsUsed, out int bytesUsed, out bool completed)
 298         {
 299             // Validate input parameters
 300             if (bytes == null || chars == null)
 301                 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
 302                     SR.ArgumentNull_Array);
 303             if (charCount < 0 || byteCount < 0)
 304                 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
 305                     SR.ArgumentOutOfRange_NeedNonNegNum);
 306
 307             // Get ready to do it
 308             charsUsed = charCount;
 309
 310             // Its easy to do if it won't overrun our buffer.
 311             while (charsUsed > 0)
 312             {
 313                 if (GetByteCount(chars, charsUsed, flush) <= byteCount)
 314                 {
 315                     bytesUsed = GetBytes(chars, charsUsed, bytes, byteCount, flush);
 316                     completed = (charsUsed == charCount &&
 317                         (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0));
 318                     return;
 319                 }
 320
 321                 // Try again with 1/2 the count, won't flush then 'cause won't read it all
 322                 flush = false;
 323                 charsUsed /= 2;
 324             }
 325
 326             // Oops, we didn't have anything, we'll have to throw an overflow
 327             throw new ArgumentException(SR.Argument_ConversionOverflow);
 328         }
 329
 330         public virtual unsafe void Convert(ReadOnlySpan<char> chars, Span<byte> bytes, bool flush, out int charsUsed, out int bytesUsed, out bool completed)
 331         {
 332             fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
 333             fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
 334             {
 335                 Convert(charsPtr, chars.Length, bytesPtr, bytes.Length, flush, out charsUsed, out bytesUsed, out completed);
 336             }
 337         }
 338     }
 339 }