Tahoma -> DejaVu Sans for reproducible tests
[LibreOffice.git] / sal / textenc / convertbig5hkscs.cxx
blobbf3a2e2cf59d515e64235836c81fe9080c7778c9
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <cassert>
24 #include <rtl/character.hxx>
25 #include <rtl/textcvt.h>
26 #include <sal/types.h>
28 #include "context.hxx"
29 #include "convertbig5hkscs.hxx"
30 #include "converter.hxx"
31 #include "tenchelp.hxx"
32 #include "unichars.hxx"
34 namespace {
36 struct ImplBig5HkscsToUnicodeContext
38 sal_Int32 m_nRow; // 0--255; 0 means none
43 void * ImplCreateBig5HkscsToUnicodeContext()
45 ImplBig5HkscsToUnicodeContext * pContext =
46 new ImplBig5HkscsToUnicodeContext;
47 pContext->m_nRow = 0;
48 return pContext;
51 void ImplResetBig5HkscsToUnicodeContext(void * pContext)
53 if (pContext)
54 static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow = 0;
57 void ImplDestroyBig5HkscsToUnicodeContext(void * pContext)
59 delete static_cast< ImplBig5HkscsToUnicodeContext * >(pContext);
62 sal_Size ImplConvertBig5HkscsToUnicode(void const * pData,
63 void * pContext,
64 char const * pSrcBuf,
65 sal_Size nSrcBytes,
66 sal_Unicode * pDestBuf,
67 sal_Size nDestChars,
68 sal_uInt32 nFlags,
69 sal_uInt32 * pInfo,
70 sal_Size * pSrcCvtBytes)
72 sal_uInt16 const * pBig5Hkscs2001Data
73 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
74 m_pBig5Hkscs2001ToUnicodeData;
75 sal_Int32 const * pBig5Hkscs2001RowOffsets
76 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
77 m_pBig5Hkscs2001ToUnicodeRowOffsets;
78 ImplDBCSToUniLeadTab const * pBig5Data
79 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
80 m_pBig5ToUnicodeData;
81 sal_Int32 nRow = 0;
82 sal_uInt32 nInfo = 0;
83 sal_Size nConverted = 0;
84 sal_Unicode * pDestBufPtr = pDestBuf;
85 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
86 sal_Size startOfCurrentChar = 0;
88 if (pContext)
89 nRow = static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow;
91 for (; nConverted < nSrcBytes; ++nConverted)
93 bool bUndefined = true;
94 sal_uInt32 nChar = *reinterpret_cast<unsigned char const *>(pSrcBuf++);
95 if (nRow == 0)
96 if (nChar < 0x80)
97 if (pDestBufPtr != pDestBufEnd) {
98 *pDestBufPtr++ = static_cast<sal_Unicode>(nChar);
99 startOfCurrentChar = nConverted + 1;
100 } else
101 goto no_output;
102 else if (nChar >= 0x81 && nChar <= 0xFE)
103 nRow = nChar;
104 else
106 bUndefined = false;
107 goto bad_input;
109 else
110 if ((nChar >= 0x40 && nChar <= 0x7E)
111 || (nChar >= 0xA1 && nChar <= 0xFE))
113 sal_uInt32 nUnicode = 0xFFFF;
114 sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow];
115 sal_uInt32 nFirst=0;
116 sal_uInt32 nLast=0;
117 if (nOffset != -1)
119 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
120 nFirst = nFirstLast & 0xFF;
121 nLast = nFirstLast >> 8;
122 if (nChar >= nFirst && nChar <= nLast)
123 nUnicode
124 = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)];
126 if (nUnicode == 0xFFFF)
128 sal_uInt32 n = pBig5Data[nRow].mnTrailStart;
129 if (nChar >= n && nChar <= pBig5Data[nRow].mnTrailEnd)
131 nUnicode = pBig5Data[nRow].mpToUniTrailTab[nChar - n];
132 if (nUnicode == 0)
133 nUnicode = 0xFFFF;
134 assert(!rtl::isHighSurrogate(nUnicode));
137 if (nUnicode == 0xFFFF)
139 ImplDBCSEUDCData const * p
140 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
141 m_pEudcData;
142 sal_uInt32 nCount
143 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
144 m_nEudcCount;
145 sal_uInt32 i;
146 for (i = 0; i < nCount; ++i)
148 if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd)
150 if (nChar < p->mnTrail1Start)
151 break;
152 if (nChar <= p->mnTrail1End)
154 nUnicode
155 = p->mnUniStart
156 + (nRow - p->mnLeadStart)
157 * p->mnTrailRangeCount
158 + (nChar - p->mnTrail1Start);
159 break;
161 if (p->mnTrailCount < 2
162 || nChar < p->mnTrail2Start)
163 break;
164 if (nChar <= p->mnTrail2End)
166 nUnicode
167 = p->mnUniStart
168 + (nRow - p->mnLeadStart)
169 * p->mnTrailRangeCount
170 + (nChar - p->mnTrail2Start)
171 + (p->mnTrail1End - p->mnTrail1Start
172 + 1);
173 break;
175 if (p->mnTrailCount < 3
176 || nChar < p->mnTrail3Start)
177 break;
178 if (nChar <= p->mnTrail3End)
180 nUnicode
181 = p->mnUniStart
182 + (nRow - p->mnLeadStart)
183 * p->mnTrailRangeCount
184 + (nChar - p->mnTrail3Start)
185 + (p->mnTrail1End - p->mnTrail1Start
186 + 1)
187 + (p->mnTrail2End - p->mnTrail2Start
188 + 1);
189 break;
191 break;
193 ++p;
195 assert(!rtl::isHighSurrogate(nUnicode));
197 if (nUnicode == 0xFFFF)
198 goto bad_input;
199 if (rtl::isHighSurrogate(nUnicode))
200 if (pDestBufEnd - pDestBufPtr >= 2)
202 nOffset += nLast - nFirst + 1;
203 nFirst = pBig5Hkscs2001Data[nOffset++];
204 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
205 *pDestBufPtr++
206 = static_cast<sal_Unicode>(pBig5Hkscs2001Data[
207 nOffset + (nChar - nFirst)]);
208 startOfCurrentChar = nConverted + 1;
210 else
211 goto no_output;
212 else
213 if (pDestBufPtr != pDestBufEnd) {
214 *pDestBufPtr++ = static_cast<sal_Unicode>(nUnicode);
215 startOfCurrentChar = nConverted + 1;
216 } else
217 goto no_output;
218 nRow = 0;
220 else
222 bUndefined = false;
223 goto bad_input;
225 continue;
227 bad_input:
228 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
229 bUndefined, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
230 &nInfo))
232 case sal::detail::textenc::BAD_INPUT_STOP:
233 nRow = 0;
234 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
235 ++nConverted;
236 } else {
237 nConverted = startOfCurrentChar;
239 break;
241 case sal::detail::textenc::BAD_INPUT_CONTINUE:
242 nRow = 0;
243 startOfCurrentChar = nConverted + 1;
244 continue;
246 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
247 goto no_output;
249 break;
251 no_output:
252 --pSrcBuf;
253 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
254 break;
257 if (nRow != 0
258 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
259 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL))
260 == 0)
262 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
263 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
264 else
265 switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
266 false, true, 0, nFlags, &pDestBufPtr, pDestBufEnd,
267 &nInfo))
269 case sal::detail::textenc::BAD_INPUT_STOP:
270 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) != 0) {
271 nConverted = startOfCurrentChar;
273 [[fallthrough]];
274 case sal::detail::textenc::BAD_INPUT_CONTINUE:
275 nRow = 0;
276 break;
278 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
279 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
280 break;
284 if (pContext)
285 static_cast< ImplBig5HkscsToUnicodeContext * >(pContext)->m_nRow = nRow;
286 if (pInfo)
287 *pInfo = nInfo;
288 if (pSrcCvtBytes)
289 *pSrcCvtBytes = nConverted;
291 return pDestBufPtr - pDestBuf;
294 sal_Size ImplConvertUnicodeToBig5Hkscs(void const * pData,
295 void * pContext,
296 sal_Unicode const * pSrcBuf,
297 sal_Size nSrcChars,
298 char * pDestBuf,
299 sal_Size nDestBytes,
300 sal_uInt32 nFlags,
301 sal_uInt32 * pInfo,
302 sal_Size * pSrcCvtChars)
304 sal_uInt16 const * pBig5Hkscs2001Data
305 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
306 m_pUnicodeToBig5Hkscs2001Data;
307 sal_Int32 const * pBig5Hkscs2001PageOffsets
308 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
309 m_pUnicodeToBig5Hkscs2001PageOffsets;
310 sal_Int32 const * pBig5Hkscs2001PlaneOffsets
311 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
312 m_pUnicodeToBig5Hkscs2001PlaneOffsets;
313 ImplUniToDBCSHighTab const * pBig5Data
314 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
315 m_pUnicodeToBig5Data;
316 sal_Unicode nHighSurrogate = 0;
317 sal_uInt32 nInfo = 0;
318 sal_Size nConverted = 0;
319 char * pDestBufPtr = pDestBuf;
320 char * pDestBufEnd = pDestBuf + nDestBytes;
322 if (pContext)
323 nHighSurrogate
324 = static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate;
326 for (; nConverted < nSrcChars; ++nConverted)
328 bool bUndefined = true;
329 sal_uInt32 nChar = *pSrcBuf++;
330 if (nHighSurrogate == 0)
332 if (rtl::isHighSurrogate(nChar))
334 nHighSurrogate = static_cast<sal_Unicode>(nChar);
335 continue;
337 else if (rtl::isLowSurrogate(nChar))
339 bUndefined = false;
340 goto bad_input;
343 else if (rtl::isLowSurrogate(nChar))
344 nChar = rtl::combineSurrogates(nHighSurrogate, nChar);
345 else
347 bUndefined = false;
348 goto bad_input;
351 assert(rtl::isUnicodeScalarValue(nChar));
353 if (nChar < 0x80)
354 if (pDestBufPtr != pDestBufEnd)
355 *pDestBufPtr++ = static_cast< char >(nChar);
356 else
357 goto no_output;
358 else
360 sal_uInt32 nBytes = 0;
361 sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16];
362 if (nOffset != -1)
364 nOffset
365 = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00)
366 >> 8)];
367 if (nOffset != -1)
369 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++];
370 sal_uInt32 nFirst = nFirstLast & 0xFF;
371 sal_uInt32 nLast = nFirstLast >> 8;
372 sal_uInt32 nIndex = nChar & 0xFF;
373 if (nIndex >= nFirst && nIndex <= nLast)
375 nBytes
376 = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)];
380 if (nBytes == 0)
382 sal_uInt32 nIndex1 = nChar >> 8;
383 if (nIndex1 < 0x100)
385 sal_uInt32 nIndex2 = nChar & 0xFF;
386 sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart;
387 if (nIndex2 >= nFirst
388 && nIndex2 <= pBig5Data[nIndex1].mnLowEnd)
389 nBytes = pBig5Data[nIndex1].
390 mpToUniTrailTab[nIndex2 - nFirst];
393 if (nBytes == 0)
395 ImplDBCSEUDCData const * p
396 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
397 m_pEudcData;
398 sal_uInt32 nCount
399 = static_cast< ImplBig5HkscsConverterData const * >(pData)->
400 m_nEudcCount;
401 sal_uInt32 i;
402 for (i = 0; i < nCount; ++i) {
403 if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd)
405 sal_uInt32 nIndex = nChar - p->mnUniStart;
406 sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount;
407 sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount;
408 sal_uInt32 nSize;
409 nBytes = (p->mnLeadStart + nLeadOff) << 8;
410 nSize = p->mnTrail1End - p->mnTrail1Start + 1;
411 if (nTrailOff < nSize)
413 nBytes |= p->mnTrail1Start + nTrailOff;
414 break;
416 nTrailOff -= nSize;
417 nSize = p->mnTrail2End - p->mnTrail2Start + 1;
418 if (nTrailOff < nSize)
420 nBytes |= p->mnTrail2Start + nTrailOff;
421 break;
423 nTrailOff -= nSize;
424 nBytes |= p->mnTrail3Start + nTrailOff;
425 break;
427 ++p;
430 if (nBytes == 0)
431 goto bad_input;
432 if (pDestBufEnd - pDestBufPtr >= 2)
434 *pDestBufPtr++ = static_cast< char >(nBytes >> 8);
435 *pDestBufPtr++ = static_cast< char >(nBytes & 0xFF);
437 else
438 goto no_output;
440 nHighSurrogate = 0;
441 continue;
443 bad_input:
444 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
445 bUndefined, nChar, nFlags, &pDestBufPtr, pDestBufEnd,
446 &nInfo, nullptr, 0, nullptr))
448 case sal::detail::textenc::BAD_INPUT_STOP:
449 nHighSurrogate = 0;
450 break;
452 case sal::detail::textenc::BAD_INPUT_CONTINUE:
453 nHighSurrogate = 0;
454 continue;
456 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
457 goto no_output;
459 break;
461 no_output:
462 --pSrcBuf;
463 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
464 break;
467 if (nHighSurrogate != 0
468 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
469 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
470 == 0)
472 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
473 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
474 else
475 switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
476 false, 0, nFlags, &pDestBufPtr, pDestBufEnd, &nInfo,
477 nullptr, 0, nullptr))
479 case sal::detail::textenc::BAD_INPUT_STOP:
480 case sal::detail::textenc::BAD_INPUT_CONTINUE:
481 nHighSurrogate = 0;
482 break;
484 case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
485 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
486 break;
490 if (pContext)
491 static_cast<ImplUnicodeToTextContext *>(pContext)->m_nHighSurrogate
492 = nHighSurrogate;
493 if (pInfo)
494 *pInfo = nInfo;
495 if (pSrcCvtChars)
496 *pSrcCvtChars = nConverted;
498 return pDestBufPtr - pDestBuf;
501 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */