1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:expandtab:shiftwidth=2:tabstop=2:
4 /* ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
17 * The Original Code is Mozilla Communicator client code.
19 * The Initial Developer of the Original Code is
20 * Netscape Communications Corporation.
21 * Portions created by the Initial Developer are Copyright (C) 1998
22 * the Initial Developer. All Rights Reserved.
25 * Jungshik Shin <jshin@mailaps.org>
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
41 #include "nsUCSupport.h"
42 #include "nsUTF32ToUnicode.h"
43 #include "nsCharTraits.h"
46 //----------------------------------------------------------------------
47 // static functions and macro definition common to nsUTF32(BE|LE)ToUnicode
50 #define LE_STRING_TO_UCS4(s) \
51 (PRUint8(*(s)) | (PRUint8(*((s) + 1)) << 8) | \
52 (PRUint8(*((s) + 2)) << 16) | (PRUint8(*((s) + 3)) << 24))
54 #define LE_STRING_TO_UCS4(s) (*(PRUint32*) (s))
58 #define BE_STRING_TO_UCS4(s) (*(PRUint32*) (s))
60 #define BE_STRING_TO_UCS4(s) \
61 (PRUint8(*((s) + 3)) | (PRUint8(*((s) + 2)) << 8) | \
62 (PRUint8(*((s) + 1)) << 16) | (PRUint8(*(s)) << 24))
65 static nsresult
ConvertCommon(const char * aSrc
,
68 PRInt32
* aDestLength
,
74 NS_ENSURE_TRUE(*aState
< 4, NS_ERROR_INVALID_ARG
);
75 NS_ENSURE_TRUE(*aDestLength
> 0, NS_ERROR_INVALID_ARG
);
77 const char *src
= aSrc
;
78 const char *srcEnd
= aSrc
+ *aSrcLength
;
80 PRUnichar
*dest
= aDest
;
81 PRUnichar
*destEnd
= aDest
+ *aDestLength
;
83 if (*aState
> *aSrcLength
)
85 memcpy(aBuffer
+ 4 - *aState
, src
, *aSrcLength
);
87 *aState
-= *aSrcLength
;
88 return NS_OK_UDEC_MOREINPUT
;
93 // prev. run left a partial UTF-32 seq.
96 memcpy(aBuffer
+ 4 - *aState
, src
, *aState
);
97 ucs4
= aIsLE
? LE_STRING_TO_UCS4(aBuffer
) : BE_STRING_TO_UCS4(aBuffer
);
98 if (ucs4
< 0x10000L
) // BMP
100 *dest
++= IS_SURROGATE(ucs4
) ? UCS2_REPLACEMENT_CHAR
: PRUnichar(ucs4
);
102 else if (ucs4
< 0x110000L
) // plane 1 through plane 16
104 if (destEnd
- dest
< 2)
108 return NS_OK_UDEC_MOREOUTPUT
;
110 *dest
++= H_SURROGATE(ucs4
);
111 *dest
++= L_SURROGATE(ucs4
);
113 // Codepoints in plane 17 and higher (> 0x10ffff)
114 // are not representable in UTF-16 we use for the internal
115 // character representation. This is not a problem
116 // because Unicode/ISO 10646 will never assign characters
117 // in plane 17 and higher. Therefore, we convert them
118 // to Unicode replacement character (0xfffd).
120 *dest
++ = UCS2_REPLACEMENT_CHAR
;
125 nsresult rv
= NS_OK
; // conversion result
127 for ( ; src
< srcEnd
&& dest
< destEnd
; src
+= 4)
129 if (srcEnd
- src
< 4)
131 // fill up aBuffer until src buffer gets exhausted.
132 memcpy(aBuffer
, src
, srcEnd
- src
);
133 *aState
= 4 - (srcEnd
- src
); // set add. char to read in next run
135 rv
= NS_OK_UDEC_MOREINPUT
;
139 ucs4
= aIsLE
? LE_STRING_TO_UCS4(src
) : BE_STRING_TO_UCS4(src
);
140 if (ucs4
< 0x10000L
) // BMP
142 *dest
++= IS_SURROGATE(ucs4
) ? UCS2_REPLACEMENT_CHAR
: PRUnichar(ucs4
);
144 else if (ucs4
< 0x110000L
) // plane 1 through plane 16
146 if (destEnd
- dest
< 2)
148 // ((ucs4 - 0x10000) >> 10) + 0xd800;
149 *dest
++= H_SURROGATE(ucs4
);
150 *dest
++= L_SURROGATE(ucs4
);
152 else // plane 17 and higher
153 *dest
++ = UCS2_REPLACEMENT_CHAR
;
156 //output not finished, output buffer too short
157 if((NS_OK
== rv
) && (src
< srcEnd
) && (dest
>= destEnd
))
158 rv
= NS_OK_UDEC_MOREOUTPUT
;
160 *aSrcLength
= src
- aSrc
;
161 *aDestLength
= dest
- aDest
;
167 //----------------------------------------------------------------------
168 // Class nsUTF32ToUnicode [implementation]
170 nsUTF32ToUnicode::nsUTF32ToUnicode() : nsBasicDecoderSupport()
175 //----------------------------------------------------------------------
176 // Subclassing of nsDecoderSupport class [implementation]
178 NS_IMETHODIMP
nsUTF32ToUnicode::GetMaxLength(const char * aSrc
,
180 PRInt32
* aDestLength
)
182 // Non-BMP characters take two PRUnichars(a pair of surrogate codepoints)
183 // so that we have to divide by 2 instead of 4 for the worst case.
184 *aDestLength
= aSrcLength
/ 2;
189 //----------------------------------------------------------------------
190 // Subclassing of nsBasicDecoderSupport class [implementation]
192 NS_IMETHODIMP
nsUTF32ToUnicode::Reset()
194 // the number of additional bytes to read to complete UTF-32 4byte seq.
196 memset(mBufferInc
, 0, 4);
202 //----------------------------------------------------------------------
203 // Class nsUTF32BEToUnicode [implementation]
205 //----------------------------------------------------------------------
206 // Subclassing of nsUTF32ToUnicode class [implementation]
208 NS_IMETHODIMP
nsUTF32BEToUnicode::Convert(const char * aSrc
,
209 PRInt32
* aSrcLength
,
211 PRInt32
* aDestLength
)
213 return ConvertCommon(aSrc
, aSrcLength
, aDest
, aDestLength
, &mState
,
214 mBufferInc
, PR_FALSE
);
217 //----------------------------------------------------------------------
218 // Class nsUTF32LEToUnicode [implementation]
220 //----------------------------------------------------------------------
221 // Subclassing of nsUTF32ToUnicode class [implementation]
223 NS_IMETHODIMP
nsUTF32LEToUnicode::Convert(const char * aSrc
,
224 PRInt32
* aSrcLength
,
226 PRInt32
* aDestLength
)
228 return ConvertCommon(aSrc
, aSrcLength
, aDest
, aDestLength
, &mState
,
229 mBufferInc
, PR_TRUE
);
232 // XXX : What to do with 'unflushed' mBufferInc?? : Finish()