here, have a Thumb back-end
[mozilla-central.git] / intl / uconv / ucvlatin / nsUTF32ToUnicode.cpp
blobe659205fdf72dfdcb25329502864b1ea305ee2ef
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:expandtab:shiftwidth=2:tabstop=2:
3 */
4 /* ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
17 * The Original Code is Mozilla Communicator client code.
19 * The Initial Developer of the Original Code is
20 * Netscape Communications Corporation.
21 * Portions created by the Initial Developer are Copyright (C) 1998
22 * the Initial Developer. All Rights Reserved.
24 * Contributor(s):
25 * Jungshik Shin <jshin@mailaps.org>
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
41 #include "nsUCSupport.h"
42 #include "nsUTF32ToUnicode.h"
43 #include "nsCharTraits.h"
44 #include <string.h>
46 //----------------------------------------------------------------------
47 // static functions and macro definition common to nsUTF32(BE|LE)ToUnicode
49 #ifdef IS_BIG_ENDIAN
50 #define LE_STRING_TO_UCS4(s) \
51 (PRUint8(*(s)) | (PRUint8(*((s) + 1)) << 8) | \
52 (PRUint8(*((s) + 2)) << 16) | (PRUint8(*((s) + 3)) << 24))
53 #else
54 #define LE_STRING_TO_UCS4(s) (*(PRUint32*) (s))
55 #endif
57 #ifdef IS_BIG_ENDIAN
58 #define BE_STRING_TO_UCS4(s) (*(PRUint32*) (s))
59 #else
60 #define BE_STRING_TO_UCS4(s) \
61 (PRUint8(*((s) + 3)) | (PRUint8(*((s) + 2)) << 8) | \
62 (PRUint8(*((s) + 1)) << 16) | (PRUint8(*(s)) << 24))
63 #endif
65 static nsresult ConvertCommon(const char * aSrc,
66 PRInt32 * aSrcLength,
67 PRUnichar * aDest,
68 PRInt32 * aDestLength,
69 PRUint16 * aState,
70 PRUint8 * aBuffer,
71 PRBool aIsLE)
74 NS_ENSURE_TRUE(*aState < 4, NS_ERROR_INVALID_ARG);
75 NS_ENSURE_TRUE(*aDestLength > 0, NS_ERROR_INVALID_ARG);
77 const char *src = aSrc;
78 const char *srcEnd = aSrc + *aSrcLength;
80 PRUnichar *dest = aDest;
81 PRUnichar *destEnd = aDest + *aDestLength;
83 if (*aState > *aSrcLength)
85 memcpy(aBuffer + 4 - *aState, src, *aSrcLength);
86 *aDestLength = 0;
87 *aState -= *aSrcLength;
88 return NS_OK_UDEC_MOREINPUT;
91 PRUint32 ucs4;
93 // prev. run left a partial UTF-32 seq.
94 if (*aState > 0)
96 memcpy(aBuffer + 4 - *aState, src, *aState);
97 ucs4 = aIsLE ? LE_STRING_TO_UCS4(aBuffer) : BE_STRING_TO_UCS4(aBuffer);
98 if (ucs4 < 0x10000L) // BMP
100 *dest++= IS_SURROGATE(ucs4) ? UCS2_REPLACEMENT_CHAR : PRUnichar(ucs4);
102 else if (ucs4 < 0x110000L) // plane 1 through plane 16
104 if (destEnd - dest < 2)
106 *aSrcLength = 0;
107 *aDestLength = 0;
108 return NS_OK_UDEC_MOREOUTPUT;
110 *dest++= H_SURROGATE(ucs4);
111 *dest++= L_SURROGATE(ucs4);
113 // Codepoints in plane 17 and higher (> 0x10ffff)
114 // are not representable in UTF-16 we use for the internal
115 // character representation. This is not a problem
116 // because Unicode/ISO 10646 will never assign characters
117 // in plane 17 and higher. Therefore, we convert them
118 // to Unicode replacement character (0xfffd).
119 else
120 *dest++ = UCS2_REPLACEMENT_CHAR;
121 src += *aState;
122 *aState = 0;
125 nsresult rv = NS_OK; // conversion result
127 for ( ; src < srcEnd && dest < destEnd; src += 4)
129 if (srcEnd - src < 4)
131 // fill up aBuffer until src buffer gets exhausted.
132 memcpy(aBuffer, src, srcEnd - src);
133 *aState = 4 - (srcEnd - src); // set add. char to read in next run
134 src = srcEnd;
135 rv = NS_OK_UDEC_MOREINPUT;
136 break;
139 ucs4 = aIsLE ? LE_STRING_TO_UCS4(src) : BE_STRING_TO_UCS4(src);
140 if (ucs4 < 0x10000L) // BMP
142 *dest++= IS_SURROGATE(ucs4) ? UCS2_REPLACEMENT_CHAR : PRUnichar(ucs4);
144 else if (ucs4 < 0x110000L) // plane 1 through plane 16
146 if (destEnd - dest < 2)
147 break;
148 // ((ucs4 - 0x10000) >> 10) + 0xd800;
149 *dest++= H_SURROGATE(ucs4);
150 *dest++= L_SURROGATE(ucs4);
152 else // plane 17 and higher
153 *dest++ = UCS2_REPLACEMENT_CHAR;
156 //output not finished, output buffer too short
157 if((NS_OK == rv) && (src < srcEnd) && (dest >= destEnd))
158 rv = NS_OK_UDEC_MOREOUTPUT;
160 *aSrcLength = src - aSrc;
161 *aDestLength = dest - aDest;
163 return rv;
167 //----------------------------------------------------------------------
168 // Class nsUTF32ToUnicode [implementation]
170 nsUTF32ToUnicode::nsUTF32ToUnicode() : nsBasicDecoderSupport()
172 Reset();
175 //----------------------------------------------------------------------
176 // Subclassing of nsDecoderSupport class [implementation]
178 NS_IMETHODIMP nsUTF32ToUnicode::GetMaxLength(const char * aSrc,
179 PRInt32 aSrcLength,
180 PRInt32 * aDestLength)
182 // Non-BMP characters take two PRUnichars(a pair of surrogate codepoints)
183 // so that we have to divide by 2 instead of 4 for the worst case.
184 *aDestLength = aSrcLength / 2;
185 return NS_OK;
189 //----------------------------------------------------------------------
190 // Subclassing of nsBasicDecoderSupport class [implementation]
192 NS_IMETHODIMP nsUTF32ToUnicode::Reset()
194 // the number of additional bytes to read to complete UTF-32 4byte seq.
195 mState = 0;
196 memset(mBufferInc, 0, 4);
197 return NS_OK;
202 //----------------------------------------------------------------------
203 // Class nsUTF32BEToUnicode [implementation]
205 //----------------------------------------------------------------------
206 // Subclassing of nsUTF32ToUnicode class [implementation]
208 NS_IMETHODIMP nsUTF32BEToUnicode::Convert(const char * aSrc,
209 PRInt32 * aSrcLength,
210 PRUnichar * aDest,
211 PRInt32 * aDestLength)
213 return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, &mState,
214 mBufferInc, PR_FALSE);
217 //----------------------------------------------------------------------
218 // Class nsUTF32LEToUnicode [implementation]
220 //----------------------------------------------------------------------
221 // Subclassing of nsUTF32ToUnicode class [implementation]
223 NS_IMETHODIMP nsUTF32LEToUnicode::Convert(const char * aSrc,
224 PRInt32 * aSrcLength,
225 PRUnichar * aDest,
226 PRInt32 * aDestLength)
228 return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, &mState,
229 mBufferInc, PR_TRUE);
232 // XXX : What to do with 'unflushed' mBufferInc?? : Finish()