1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
24 #include <rtl/character.hxx>
25 #include <rtl/textcvt.h>
26 #include <sal/types.h>
28 #include "context.hxx"
29 #include "convertbig5hkscs.hxx"
30 #include "converter.hxx"
31 #include "tenchelp.hxx"
32 #include "unichars.hxx"
36 struct ImplBig5HkscsToUnicodeContext
38 sal_Int32 m_nRow
; // 0--255; 0 means none
43 void * ImplCreateBig5HkscsToUnicodeContext()
45 ImplBig5HkscsToUnicodeContext
* pContext
=
46 new ImplBig5HkscsToUnicodeContext
;
51 void ImplResetBig5HkscsToUnicodeContext(void * pContext
)
54 static_cast< ImplBig5HkscsToUnicodeContext
* >(pContext
)->m_nRow
= 0;
57 void ImplDestroyBig5HkscsToUnicodeContext(void * pContext
)
59 delete static_cast< ImplBig5HkscsToUnicodeContext
* >(pContext
);
62 sal_Size
ImplConvertBig5HkscsToUnicode(void const * pData
,
66 sal_Unicode
* pDestBuf
,
70 sal_Size
* pSrcCvtBytes
)
72 sal_uInt16
const * pBig5Hkscs2001Data
73 = static_cast< ImplBig5HkscsConverterData
const * >(pData
)->
74 m_pBig5Hkscs2001ToUnicodeData
;
75 sal_Int32
const * pBig5Hkscs2001RowOffsets
76 = static_cast< ImplBig5HkscsConverterData
const * >(pData
)->
77 m_pBig5Hkscs2001ToUnicodeRowOffsets
;
78 ImplDBCSToUniLeadTab
const * pBig5Data
79 = static_cast< ImplBig5HkscsConverterData
const * >(pData
)->
83 sal_Size nConverted
= 0;
84 sal_Unicode
* pDestBufPtr
= pDestBuf
;
85 sal_Unicode
* pDestBufEnd
= pDestBuf
+ nDestChars
;
86 sal_Size startOfCurrentChar
= 0;
89 nRow
= static_cast< ImplBig5HkscsToUnicodeContext
* >(pContext
)->m_nRow
;
91 for (; nConverted
< nSrcBytes
; ++nConverted
)
93 bool bUndefined
= true;
94 sal_uInt32 nChar
= *reinterpret_cast<unsigned char const *>(pSrcBuf
++);
97 if (pDestBufPtr
!= pDestBufEnd
) {
98 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nChar
);
99 startOfCurrentChar
= nConverted
+ 1;
102 else if (nChar
>= 0x81 && nChar
<= 0xFE)
110 if ((nChar
>= 0x40 && nChar
<= 0x7E)
111 || (nChar
>= 0xA1 && nChar
<= 0xFE))
113 sal_uInt32 nUnicode
= 0xFFFF;
114 sal_Int32 nOffset
= pBig5Hkscs2001RowOffsets
[nRow
];
119 sal_uInt32 nFirstLast
= pBig5Hkscs2001Data
[nOffset
++];
120 nFirst
= nFirstLast
& 0xFF;
121 nLast
= nFirstLast
>> 8;
122 if (nChar
>= nFirst
&& nChar
<= nLast
)
124 = pBig5Hkscs2001Data
[nOffset
+ (nChar
- nFirst
)];
126 if (nUnicode
== 0xFFFF)
128 sal_uInt32 n
= pBig5Data
[nRow
].mnTrailStart
;
129 if (nChar
>= n
&& nChar
<= pBig5Data
[nRow
].mnTrailEnd
)
131 nUnicode
= pBig5Data
[nRow
].mpToUniTrailTab
[nChar
- n
];
134 assert(!rtl::isHighSurrogate(nUnicode
));
137 if (nUnicode
== 0xFFFF)
139 ImplDBCSEUDCData
const * p
140 = static_cast< ImplBig5HkscsConverterData
const * >(pData
)->
143 = static_cast< ImplBig5HkscsConverterData
const * >(pData
)->
146 for (i
= 0; i
< nCount
; ++i
)
148 if (nRow
>= p
->mnLeadStart
&& nRow
<= p
->mnLeadEnd
)
150 if (nChar
< p
->mnTrail1Start
)
152 if (nChar
<= p
->mnTrail1End
)
156 + (nRow
- p
->mnLeadStart
)
157 * p
->mnTrailRangeCount
158 + (nChar
- p
->mnTrail1Start
);
161 if (p
->mnTrailCount
< 2
162 || nChar
< p
->mnTrail2Start
)
164 if (nChar
<= p
->mnTrail2End
)
168 + (nRow
- p
->mnLeadStart
)
169 * p
->mnTrailRangeCount
170 + (nChar
- p
->mnTrail2Start
)
171 + (p
->mnTrail1End
- p
->mnTrail1Start
175 if (p
->mnTrailCount
< 3
176 || nChar
< p
->mnTrail3Start
)
178 if (nChar
<= p
->mnTrail3End
)
182 + (nRow
- p
->mnLeadStart
)
183 * p
->mnTrailRangeCount
184 + (nChar
- p
->mnTrail3Start
)
185 + (p
->mnTrail1End
- p
->mnTrail1Start
187 + (p
->mnTrail2End
- p
->mnTrail2Start
195 assert(!rtl::isHighSurrogate(nUnicode
));
197 if (nUnicode
== 0xFFFF)
199 if (rtl::isHighSurrogate(nUnicode
))
200 if (pDestBufEnd
- pDestBufPtr
>= 2)
202 nOffset
+= nLast
- nFirst
+ 1;
203 nFirst
= pBig5Hkscs2001Data
[nOffset
++];
204 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nUnicode
);
206 = static_cast<sal_Unicode
>(pBig5Hkscs2001Data
[
207 nOffset
+ (nChar
- nFirst
)]);
208 startOfCurrentChar
= nConverted
+ 1;
213 if (pDestBufPtr
!= pDestBufEnd
) {
214 *pDestBufPtr
++ = static_cast<sal_Unicode
>(nUnicode
);
215 startOfCurrentChar
= nConverted
+ 1;
228 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
229 bUndefined
, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
232 case sal::detail::textenc::BAD_INPUT_STOP
:
234 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0) {
237 nConverted
= startOfCurrentChar
;
241 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
243 startOfCurrentChar
= nConverted
+ 1;
246 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
253 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
258 && (nInfo
& (RTL_TEXTTOUNICODE_INFO_ERROR
259 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
))
262 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) == 0)
263 nInfo
|= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL
;
265 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
266 false, true, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
,
269 case sal::detail::textenc::BAD_INPUT_STOP
:
270 if ((nFlags
& RTL_TEXTTOUNICODE_FLAGS_FLUSH
) != 0) {
271 nConverted
= startOfCurrentChar
;
274 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
278 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
279 nInfo
|= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
;
285 static_cast< ImplBig5HkscsToUnicodeContext
* >(pContext
)->m_nRow
= nRow
;
289 *pSrcCvtBytes
= nConverted
;
291 return pDestBufPtr
- pDestBuf
;
294 sal_Size
ImplConvertUnicodeToBig5Hkscs(void const * pData
,
296 sal_Unicode
const * pSrcBuf
,
302 sal_Size
* pSrcCvtChars
)
304 sal_uInt16
const * pBig5Hkscs2001Data
305 = static_cast< ImplBig5HkscsConverterData
const * >(pData
)->
306 m_pUnicodeToBig5Hkscs2001Data
;
307 sal_Int32
const * pBig5Hkscs2001PageOffsets
308 = static_cast< ImplBig5HkscsConverterData
const * >(pData
)->
309 m_pUnicodeToBig5Hkscs2001PageOffsets
;
310 sal_Int32
const * pBig5Hkscs2001PlaneOffsets
311 = static_cast< ImplBig5HkscsConverterData
const * >(pData
)->
312 m_pUnicodeToBig5Hkscs2001PlaneOffsets
;
313 ImplUniToDBCSHighTab
const * pBig5Data
314 = static_cast< ImplBig5HkscsConverterData
const * >(pData
)->
315 m_pUnicodeToBig5Data
;
316 sal_Unicode nHighSurrogate
= 0;
317 sal_uInt32 nInfo
= 0;
318 sal_Size nConverted
= 0;
319 char * pDestBufPtr
= pDestBuf
;
320 char * pDestBufEnd
= pDestBuf
+ nDestBytes
;
324 = static_cast<ImplUnicodeToTextContext
*>(pContext
)->m_nHighSurrogate
;
326 for (; nConverted
< nSrcChars
; ++nConverted
)
328 bool bUndefined
= true;
329 sal_uInt32 nChar
= *pSrcBuf
++;
330 if (nHighSurrogate
== 0)
332 if (rtl::isHighSurrogate(nChar
))
334 nHighSurrogate
= static_cast<sal_Unicode
>(nChar
);
337 else if (rtl::isLowSurrogate(nChar
))
343 else if (rtl::isLowSurrogate(nChar
))
344 nChar
= rtl::combineSurrogates(nHighSurrogate
, nChar
);
351 assert(rtl::isUnicodeScalarValue(nChar
));
354 if (pDestBufPtr
!= pDestBufEnd
)
355 *pDestBufPtr
++ = static_cast< char >(nChar
);
360 sal_uInt32 nBytes
= 0;
361 sal_Int32 nOffset
= pBig5Hkscs2001PlaneOffsets
[nChar
>> 16];
365 = pBig5Hkscs2001PageOffsets
[nOffset
+ ((nChar
& 0xFF00)
369 sal_uInt32 nFirstLast
= pBig5Hkscs2001Data
[nOffset
++];
370 sal_uInt32 nFirst
= nFirstLast
& 0xFF;
371 sal_uInt32 nLast
= nFirstLast
>> 8;
372 sal_uInt32 nIndex
= nChar
& 0xFF;
373 if (nIndex
>= nFirst
&& nIndex
<= nLast
)
376 = pBig5Hkscs2001Data
[nOffset
+ (nIndex
- nFirst
)];
382 sal_uInt32 nIndex1
= nChar
>> 8;
385 sal_uInt32 nIndex2
= nChar
& 0xFF;
386 sal_uInt32 nFirst
= pBig5Data
[nIndex1
].mnLowStart
;
387 if (nIndex2
>= nFirst
388 && nIndex2
<= pBig5Data
[nIndex1
].mnLowEnd
)
389 nBytes
= pBig5Data
[nIndex1
].
390 mpToUniTrailTab
[nIndex2
- nFirst
];
395 ImplDBCSEUDCData
const * p
396 = static_cast< ImplBig5HkscsConverterData
const * >(pData
)->
399 = static_cast< ImplBig5HkscsConverterData
const * >(pData
)->
402 for (i
= 0; i
< nCount
; ++i
) {
403 if (nChar
>= p
->mnUniStart
&& nChar
<= p
->mnUniEnd
)
405 sal_uInt32 nIndex
= nChar
- p
->mnUniStart
;
406 sal_uInt32 nLeadOff
= nIndex
/ p
->mnTrailRangeCount
;
407 sal_uInt32 nTrailOff
= nIndex
% p
->mnTrailRangeCount
;
409 nBytes
= (p
->mnLeadStart
+ nLeadOff
) << 8;
410 nSize
= p
->mnTrail1End
- p
->mnTrail1Start
+ 1;
411 if (nTrailOff
< nSize
)
413 nBytes
|= p
->mnTrail1Start
+ nTrailOff
;
417 nSize
= p
->mnTrail2End
- p
->mnTrail2Start
+ 1;
418 if (nTrailOff
< nSize
)
420 nBytes
|= p
->mnTrail2Start
+ nTrailOff
;
424 nBytes
|= p
->mnTrail3Start
+ nTrailOff
;
432 if (pDestBufEnd
- pDestBufPtr
>= 2)
434 *pDestBufPtr
++ = static_cast< char >(nBytes
>> 8);
435 *pDestBufPtr
++ = static_cast< char >(nBytes
& 0xFF);
444 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
445 bUndefined
, nChar
, nFlags
, &pDestBufPtr
, pDestBufEnd
,
446 &nInfo
, nullptr, 0, nullptr))
448 case sal::detail::textenc::BAD_INPUT_STOP
:
452 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
456 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
463 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
467 if (nHighSurrogate
!= 0
468 && (nInfo
& (RTL_UNICODETOTEXT_INFO_ERROR
469 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
))
472 if ((nFlags
& RTL_UNICODETOTEXT_FLAGS_FLUSH
) != 0)
473 nInfo
|= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL
;
475 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
476 false, 0, nFlags
, &pDestBufPtr
, pDestBufEnd
, &nInfo
,
477 nullptr, 0, nullptr))
479 case sal::detail::textenc::BAD_INPUT_STOP
:
480 case sal::detail::textenc::BAD_INPUT_CONTINUE
:
484 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT
:
485 nInfo
|= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL
;
491 static_cast<ImplUnicodeToTextContext
*>(pContext
)->m_nHighSurrogate
496 *pSrcCvtChars
= nConverted
;
498 return pDestBufPtr
- pDestBuf
;
501 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */