tdf#155486 Adding fonts to .odt when there is "no perfect match"
[LibreOffice.git] / sal / rtl / ustring.cxx
blob64f13cabc7ee514c3a82602fc0553da1f2cb5dcb
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <cassert>
23 #include <cstdlib>
24 #include <limits>
25 #include <stdexcept>
26 #include <string>
28 #include <osl/diagnose.h>
29 #include <osl/interlck.h>
30 #include <osl/mutex.h>
31 #include <rtl/tencinfo.h>
33 #include <string.h>
34 #include <sal/alloca.h>
35 #include <sal/log.hxx>
37 #include "hash.hxx"
38 #include "strimp.hxx"
39 #include <rtl/character.hxx>
40 #include <rtl/ustring.h>
42 #include <rtl/math.h>
44 #if defined _WIN32
45 // Temporary check to verify that the #pragma pack around rtl_uString is indeed cargo cult and can
46 // safely be removed:
47 static_assert(alignof (rtl_uString) == 4);
48 static_assert(sizeof (rtl_uString) == 12);
49 #endif
51 /* ======================================================================= */
53 #if USE_SDT_PROBES
54 #define RTL_LOG_STRING_BITS 16
55 #endif
57 #include "strtmpl.hxx"
59 /* static data to be referenced by all empty strings
60 * the refCount is predefined to 1 and must never become 0 !
62 template<>
63 rtl_uString rtl::str::EmptyStringImpl<rtl_uString>::data =
65 sal_Int32(SAL_STRING_INTERN_FLAG|SAL_STRING_STATIC_FLAG|1), /*sal_Int32 refCount; */
66 0, /*sal_Int32 length; */
67 { 0 } /*sal_Unicode buffer[1];*/
70 /* ======================================================================= */
72 sal_Int32 rtl_ustr_indexOfAscii_WithLength(
73 sal_Unicode const * str, sal_Int32 len,
74 char const * subStr, sal_Int32 subLen) SAL_THROW_EXTERN_C()
76 assert(len >= 0);
77 assert(subLen >= 0);
78 if (subLen > 0 && subLen <= len)
80 sal_Unicode const* end = str + len;
81 sal_Unicode const* cursor = str;
83 while(cursor < end)
85 cursor = std::char_traits<sal_Unicode>::find(cursor, end - cursor, *subStr);
86 if(!cursor || (end - cursor < subLen))
88 /* no enough left to actually have a match */
89 break;
91 /* now it is worth trying a full match */
92 if (rtl_ustr_asciil_reverseEquals_WithLength(cursor, subStr, subLen))
94 return cursor - str;
96 cursor += 1;
99 return -1;
102 sal_Int32 rtl_ustr_lastIndexOfAscii_WithLength(
103 sal_Unicode const * str, sal_Int32 len,
104 char const * subStr, sal_Int32 subLen) SAL_THROW_EXTERN_C()
106 assert(len >= 0);
107 assert(subLen >= 0);
108 if (subLen > 0 && subLen <= len) {
109 sal_Int32 i;
110 for (i = len - subLen; i >= 0; --i) {
111 if (rtl_ustr_asciil_reverseEquals_WithLength(
112 str + i, subStr, subLen))
114 return i;
118 return -1;
121 sal_Int32 SAL_CALL rtl_ustr_valueOfFloat(sal_Unicode * pStr, float f)
122 SAL_THROW_EXTERN_C()
124 assert(pStr);
125 rtl_uString * pResult = nullptr;
126 sal_Int32 nLen;
127 rtl_math_doubleToUString(
128 &pResult, nullptr, 0, f, rtl_math_StringFormat_G,
129 RTL_USTR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', nullptr,
130 0, true);
131 nLen = pResult->length;
132 OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFFLOAT);
133 memcpy(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
134 rtl_uString_release(pResult);
135 return nLen;
138 sal_Int32 SAL_CALL rtl_ustr_valueOfDouble(sal_Unicode * pStr, double d)
139 SAL_THROW_EXTERN_C()
141 assert(pStr);
142 rtl_uString * pResult = nullptr;
143 sal_Int32 nLen;
144 rtl_math_doubleToUString(
145 &pResult, nullptr, 0, d, rtl_math_StringFormat_G,
146 RTL_USTR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', nullptr,
147 0, true);
148 nLen = pResult->length;
149 OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFDOUBLE);
150 memcpy(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
151 rtl_uString_release(pResult);
152 return nLen;
155 namespace {
157 // Avoid -fsanitize=undefined warning e.g. "runtime error: value 1e+99 is
158 // outside the range of representable values of type 'float'":
159 float doubleToFloat(double x) {
160 return
161 x < -std::numeric_limits<float>::max()
162 ? -std::numeric_limits<float>::infinity()
163 : x > std::numeric_limits<float>::max()
164 ? std::numeric_limits<float>::infinity()
165 : static_cast<float>(x);
170 float SAL_CALL rtl_ustr_toFloat(sal_Unicode const * pStr) SAL_THROW_EXTERN_C()
172 assert(pStr);
173 return doubleToFloat(rtl_math_uStringToDouble(pStr,
174 pStr + rtl_ustr_getLength(pStr),
175 '.', 0, nullptr, nullptr));
178 double SAL_CALL rtl_ustr_toDouble(sal_Unicode const * pStr) SAL_THROW_EXTERN_C()
180 assert(pStr);
181 return rtl_math_uStringToDouble(pStr, pStr + rtl_ustr_getLength(pStr), '.',
182 0, nullptr, nullptr);
185 /* ======================================================================= */
187 sal_Int32 SAL_CALL rtl_ustr_ascii_compare( const sal_Unicode* pStr1,
188 const char* pStr2 )
189 SAL_THROW_EXTERN_C()
191 assert(pStr1);
192 assert(pStr2);
193 sal_Int32 nRet;
194 for (;;)
196 nRet = static_cast<sal_Int32>(*pStr1)-
197 static_cast<sal_Int32>(static_cast<unsigned char>(*pStr2));
198 if (!(nRet == 0 && *pStr2 ))
199 break;
200 /* Check ASCII range */
201 SAL_WARN_IF( (static_cast<unsigned char>(*pStr2)) > 127, "rtl.string",
202 "rtl_ustr_ascii_compare - Found char > 127" );
203 pStr1++;
204 pStr2++;
207 return nRet;
210 /* ----------------------------------------------------------------------- */
212 sal_Int32 SAL_CALL rtl_ustr_ascii_compare_WithLength( const sal_Unicode* pStr1,
213 sal_Int32 nStr1Len,
214 const char* pStr2 )
215 SAL_THROW_EXTERN_C()
217 assert(pStr1);
218 assert(nStr1Len >= 0);
219 assert(pStr2);
220 sal_Int32 nRet = 0;
221 for (;;)
223 nRet = (nStr1Len ? static_cast<sal_Int32>(*pStr1) : 0) -
224 static_cast<sal_Int32>(static_cast<unsigned char>(*pStr2));
225 if (!(nRet == 0 && nStr1Len && *pStr2 ))
226 break;
227 /* Check ASCII range */
228 SAL_WARN_IF( (static_cast<unsigned char>(*pStr2)) > 127, "rtl.string",
229 "rtl_ustr_ascii_compare_WithLength - Found char > 127" );
230 pStr1++;
231 pStr2++;
232 nStr1Len--;
235 return nRet;
238 /* ----------------------------------------------------------------------- */
240 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompare_WithLength( const sal_Unicode* pStr1,
241 sal_Int32 nStr1Len,
242 const char* pStr2,
243 sal_Int32 nShortenedLength )
244 SAL_THROW_EXTERN_C()
246 assert(nStr1Len >= 0);
247 assert(nShortenedLength >= 0);
248 const sal_Unicode* pStr1End = pStr1 + nStr1Len;
249 sal_Int32 nRet;
250 while ( (nShortenedLength > 0) &&
251 (pStr1 < pStr1End) && *pStr2 )
253 /* Check ASCII range */
254 SAL_WARN_IF( (static_cast<unsigned char>(*pStr2)) > 127, "rtl.string",
255 "rtl_ustr_ascii_shortenedCompare_WithLength - Found char > 127" );
257 nRet = static_cast<sal_Int32>(*pStr1)-
258 static_cast<sal_Int32>(static_cast<unsigned char>(*pStr2));
259 if ( nRet != 0 )
260 return nRet;
262 nShortenedLength--;
263 pStr1++;
264 pStr2++;
267 if ( nShortenedLength <= 0 )
268 return 0;
270 if ( *pStr2 )
272 OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
273 // first is a substring of the second string => less (negative value)
274 nRet = -1;
276 else
278 // greater or equal
279 nRet = pStr1End - pStr1;
282 return nRet;
285 /* ----------------------------------------------------------------------- */
287 sal_Int32 SAL_CALL rtl_ustr_asciil_reverseCompare_WithLength( const sal_Unicode* pStr1,
288 sal_Int32 nStr1Len,
289 const char* pStr2,
290 sal_Int32 nStr2Len )
291 SAL_THROW_EXTERN_C()
293 assert(nStr1Len >= 0 && nStr2Len >= 0);
294 const sal_Unicode* pStr1Run = pStr1+nStr1Len;
295 const char* pStr2Run = pStr2+nStr2Len;
296 sal_Int32 nRet;
297 while ( (pStr1 < pStr1Run) && (pStr2 < pStr2Run) )
299 /* Check ASCII range */
300 SAL_WARN_IF( (static_cast<unsigned char>(*pStr2)) > 127, "rtl.string",
301 "rtl_ustr_asciil_reverseCompare_WithLength - Found char > 127" );
302 pStr1Run--;
303 pStr2Run--;
304 nRet = static_cast<sal_Int32>(*pStr1Run)- static_cast<sal_Int32>(*pStr2Run);
305 if ( nRet )
306 return nRet;
309 return nStr1Len - nStr2Len;
312 /* ----------------------------------------------------------------------- */
314 sal_Bool SAL_CALL rtl_ustr_asciil_reverseEquals_WithLength( const sal_Unicode* pStr1,
315 const char* pStr2,
316 sal_Int32 nStrLen )
317 SAL_THROW_EXTERN_C()
319 assert(nStrLen >= 0);
320 const sal_Unicode* pStr1Run = pStr1+nStrLen;
321 const char* pStr2Run = pStr2+nStrLen;
322 while ( pStr1 < pStr1Run )
324 /* Check ASCII range */
325 SAL_WARN_IF( (static_cast<unsigned char>(*pStr2)) > 127, "rtl.string",
326 "rtl_ustr_asciil_reverseEquals_WithLength - Found char > 127" );
327 pStr1Run--;
328 pStr2Run--;
329 if( *pStr1Run != static_cast<sal_Unicode>(*pStr2Run) )
330 return false;
333 return true;
336 /* ----------------------------------------------------------------------- */
338 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase( const sal_Unicode* pStr1,
339 const char* pStr2 )
340 SAL_THROW_EXTERN_C()
342 assert(pStr1);
343 assert(pStr2);
344 sal_Int32 nRet;
345 sal_Int32 c1;
346 sal_Int32 c2;
349 /* Check ASCII range */
350 SAL_WARN_IF( (static_cast<unsigned char>(*pStr2)) > 127, "rtl.string",
351 "rtl_ustr_ascii_compareIgnoreAsciiCase - Found char > 127" );
352 /* If character between 'A' and 'Z', then convert it to lowercase */
353 c1 = static_cast<sal_Int32>(*pStr1);
354 c2 = static_cast<sal_Int32>(static_cast<unsigned char>(*pStr2));
355 if ( (c1 >= 65) && (c1 <= 90) )
356 c1 += 32;
357 if ( (c2 >= 65) && (c2 <= 90) )
358 c2 += 32;
359 nRet = c1-c2;
360 if ( nRet != 0 )
361 return nRet;
363 pStr1++;
364 pStr2++;
366 while ( c2 );
368 return 0;
371 /* ----------------------------------------------------------------------- */
373 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
374 sal_Int32 nStr1Len,
375 const char* pStr2 )
376 SAL_THROW_EXTERN_C()
378 assert(nStr1Len >= 0);
379 assert(pStr2);
380 sal_Int32 nRet;
381 sal_Int32 c1;
382 sal_Int32 c2;
385 /* Check ASCII range */
386 SAL_WARN_IF( (static_cast<unsigned char>(*pStr2)) > 127, "rtl.string",
387 "rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength - Found char > 127" );
388 if ( !nStr1Len )
389 return *pStr2 == '\0' ? 0 : -1;
391 /* If character between 'A' and 'Z', then convert it to lowercase */
392 c1 = static_cast<sal_Int32>(*pStr1);
393 c2 = static_cast<sal_Int32>(static_cast<unsigned char>(*pStr2));
394 if ( (c1 >= 65) && (c1 <= 90) )
395 c1 += 32;
396 if ( (c2 >= 65) && (c2 <= 90) )
397 c2 += 32;
398 nRet = c1-c2;
399 if ( nRet != 0 )
400 return nRet;
402 pStr1++;
403 pStr2++;
404 nStr1Len--;
406 while( c2 );
408 return 0;
411 sal_Int32 rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths(
412 sal_Unicode const * first, sal_Int32 firstLen,
413 char const * second, sal_Int32 secondLen) SAL_THROW_EXTERN_C()
415 assert(firstLen >= 0 && secondLen >= 0);
416 sal_Int32 i;
417 sal_Int32 len = std::min(firstLen, secondLen);
418 for (i = 0; i < len; ++i) {
419 /* Check ASCII range */
420 SAL_WARN_IF( (static_cast<unsigned char>(*second)) > 127, "rtl.string",
421 "rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths - Found char > 127" );
422 sal_Int32 c1 = *first++;
423 sal_Int32 c2 = static_cast<unsigned char>(*second++);
424 sal_Int32 d;
425 if (c1 >= 65 && c1 <= 90) {
426 c1 += 32;
428 if (c2 >= 65 && c2 <= 90) {
429 c2 += 32;
431 d = c1 - c2;
432 if (d != 0) {
433 return d;
436 return firstLen - secondLen;
439 /* ----------------------------------------------------------------------- */
441 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
442 sal_Int32 nStr1Len,
443 const char* pStr2,
444 sal_Int32 nShortenedLength )
445 SAL_THROW_EXTERN_C()
447 assert(nStr1Len >= 0);
448 assert(nShortenedLength >= 0);
449 const sal_Unicode* pStr1End = pStr1 + nStr1Len;
450 sal_Int32 nRet;
451 sal_Int32 c1;
452 sal_Int32 c2;
453 while ( (nShortenedLength > 0) &&
454 (pStr1 < pStr1End) && *pStr2 )
456 /* Check ASCII range */
457 SAL_WARN_IF( (static_cast<unsigned char>(*pStr2)) > 127, "rtl.string",
458 "rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength - Found char > 127" );
460 /* If character between 'A' and 'Z', then convert it to lowercase */
461 c1 = static_cast<sal_Int32>(*pStr1);
462 c2 = static_cast<sal_Int32>(static_cast<unsigned char>(*pStr2));
463 if ( (c1 >= 65) && (c1 <= 90) )
464 c1 += 32;
465 if ( (c2 >= 65) && (c2 <= 90) )
466 c2 += 32;
467 nRet = c1-c2;
468 if ( nRet != 0 )
469 return nRet;
471 nShortenedLength--;
472 pStr1++;
473 pStr2++;
476 if ( nShortenedLength <= 0 )
477 return 0;
479 if ( *pStr2 )
481 OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
482 // first is a substring of the second string => less (negative value)
483 nRet = -1;
485 else
487 // greater or equal
488 nRet = pStr1End - pStr1;
491 return nRet;
494 /* ----------------------------------------------------------------------- */
496 void SAL_CALL rtl_uString_newFromAscii( rtl_uString** ppThis,
497 const char* pCharStr )
498 SAL_THROW_EXTERN_C()
500 assert(ppThis);
501 sal_Int32 nLen;
503 if ( pCharStr )
505 const char* pTempStr = pCharStr;
506 while( *pTempStr )
507 pTempStr++;
508 nLen = pTempStr-pCharStr;
510 else
511 nLen = 0;
513 if ( !nLen )
515 rtl_uString_new( ppThis );
516 return;
519 if ( *ppThis )
520 rtl_uString_release( *ppThis );
522 *ppThis = rtl_uString_ImplAlloc( nLen );
523 OSL_ASSERT(*ppThis != nullptr);
524 if ( !(*ppThis) )
525 return;
527 sal_Unicode* pBuffer = (*ppThis)->buffer;
530 assert(static_cast<unsigned char>(*pCharStr) < 0x80); // ASCII range
531 *pBuffer = *pCharStr;
532 pBuffer++;
533 pCharStr++;
535 while ( *pCharStr );
537 RTL_LOG_STRING_NEW( *ppThis );
540 void SAL_CALL rtl_uString_newFromCodePoints(
541 rtl_uString ** newString, sal_uInt32 const * codePoints,
542 sal_Int32 codePointCount) SAL_THROW_EXTERN_C()
544 sal_Int32 n;
545 sal_Int32 i;
546 sal_Unicode * p;
547 assert(newString != nullptr);
548 assert((codePoints != nullptr || codePointCount == 0) && codePointCount >= 0);
549 if (codePointCount == 0) {
550 rtl_uString_new(newString);
551 return;
553 if (*newString != nullptr) {
554 rtl_uString_release(*newString);
556 n = codePointCount;
557 for (i = 0; i < codePointCount; ++i) {
558 OSL_ASSERT(rtl::isUnicodeCodePoint(codePoints[i]));
559 if (codePoints[i] >= 0x10000) {
560 ++n;
563 /* Builds on the assumption that sal_Int32 uses 32 bit two's complement
564 representation with wrap around (the necessary number of UTF-16 code
565 units will be no larger than 2 * SAL_MAX_INT32, represented as
566 sal_Int32 -2): */
567 if (n < 0) {
568 // coverity[dead_error_begin] - assumes wrap around
569 *newString = nullptr;
570 return;
572 *newString = rtl_uString_ImplAlloc(n);
573 if (*newString == nullptr) {
574 return;
576 p = (*newString)->buffer;
577 for (i = 0; i < codePointCount; ++i) {
578 p += rtl::splitSurrogates(codePoints[i], p);
580 RTL_LOG_STRING_NEW( *newString );
583 void rtl_uString_newConcatAsciiL(
584 rtl_uString ** newString, rtl_uString * left, char const * right,
585 sal_Int32 rightLength)
587 assert(newString != nullptr);
588 assert(left != nullptr);
589 assert(right != nullptr);
590 assert(rightLength >= 0);
591 if (left->length > std::numeric_limits<sal_Int32>::max() - rightLength) {
592 #if !defined(__COVERITY__)
593 throw std::length_error("rtl_uString_newConcatAsciiL");
594 #else
595 //coverity doesn't report std::bad_alloc as an unhandled exception when
596 //potentially thrown from destructors but does report std::length_error
597 throw std::bad_alloc();
598 #endif
600 sal_Int32 n = left->length + rightLength;
601 rtl_uString_assign(newString, left);
602 rtl_uString_ensureCapacity(newString, n);
603 sal_Unicode * p = (*newString)->buffer + (*newString)->length;
604 for (sal_Int32 i = 0; i != rightLength; ++i) {
605 p[i] = static_cast<unsigned char>(right[i]);
607 (*newString)->buffer[n] = 0;
608 (*newString)->length = n;
611 void rtl_uString_newConcatUtf16L(
612 rtl_uString ** newString, rtl_uString * left, sal_Unicode const * right,
613 sal_Int32 rightLength)
615 assert(newString != nullptr);
616 assert(left != nullptr);
617 assert(right != nullptr || rightLength == 0);
618 assert(rightLength >= 0);
619 if (left->length > std::numeric_limits<sal_Int32>::max() - rightLength) {
620 #if !defined(__COVERITY__)
621 throw std::length_error("rtl_uString_newConcatUtf16L");
622 #else
623 //coverity doesn't report std::bad_alloc as an unhandled exception when
624 //potentially thrown from destructors but does report std::length_error
625 throw std::bad_alloc();
626 #endif
628 sal_Int32 n = left->length + rightLength;
629 rtl_uString_assign(newString, left);
630 rtl_uString_ensureCapacity(newString, n);
631 if (rightLength != 0) {
632 memcpy(
633 (*newString)->buffer + (*newString)->length, right,
634 rightLength * sizeof (sal_Unicode));
636 (*newString)->buffer[n] = 0;
637 (*newString)->length = n;
640 /* ======================================================================= */
642 static int rtl_ImplGetFastUTF8UnicodeLen( const char* pStr, sal_Int32 nLen, bool * ascii )
644 int n;
645 const char* pEndStr;
647 *ascii = true;
648 n = 0;
649 pEndStr = pStr+nLen;
650 while ( pStr < pEndStr )
652 unsigned char c = static_cast<unsigned char>(*pStr);
654 if ( !(c & 0x80) )
655 pStr++;
656 else
658 if ( (c & 0xE0) == 0xC0 )
659 pStr += 2;
660 else if ( (c & 0xF0) == 0xE0 )
661 pStr += 3;
662 else if ( (c & 0xF8) == 0xF0 )
663 pStr += 4;
664 else if ( (c & 0xFC) == 0xF8 )
665 pStr += 5;
666 else if ( (c & 0xFE) == 0xFC )
667 pStr += 6;
668 else
669 pStr++;
670 *ascii = false;
673 n++;
676 return n;
679 /* ----------------------------------------------------------------------- */
681 static void rtl_string2UString_status( rtl_uString** ppThis,
682 const char* pStr,
683 sal_Int32 nLen,
684 rtl_TextEncoding eTextEncoding,
685 sal_uInt32 nCvtFlags,
686 sal_uInt32 *pInfo )
688 OSL_ENSURE(nLen == 0 || rtl_isOctetTextEncoding(eTextEncoding),
689 "rtl_string2UString_status() - Wrong TextEncoding" );
691 if ( !nLen )
693 rtl_uString_new( ppThis );
694 if (pInfo != nullptr) {
695 *pInfo = 0;
698 else
700 if ( *ppThis )
701 rtl_uString_release( *ppThis );
703 /* Optimization for US-ASCII */
704 if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
706 sal_Unicode* pBuffer;
707 *ppThis = rtl_uString_ImplAlloc( nLen );
708 if (*ppThis == nullptr) {
709 if (pInfo != nullptr) {
710 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
711 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
713 return;
715 pBuffer = (*ppThis)->buffer;
716 sal_Int32 nLenCopy(nLen);
717 const char *pStrCopy(pStr);
720 /* Check ASCII range */
721 if (static_cast<unsigned char>(*pStrCopy) > 127)
723 rtl_uString_release(*ppThis);
724 goto retry; // cancel loop - try again with the converter
727 *pBuffer = *pStrCopy;
728 pBuffer++;
729 pStrCopy++;
730 nLenCopy--;
732 while (nLenCopy);
733 if (pInfo != nullptr) {
734 *pInfo = 0;
736 RTL_LOG_STRING_NEW( *ppThis );
737 return;
739 retry:
741 rtl_uString* pTemp;
742 rtl_uString* pTemp2 = nullptr;
743 rtl_TextToUnicodeConverter hConverter;
744 sal_uInt32 nInfo;
745 sal_Size nSrcBytes;
746 sal_Size nDestChars;
747 sal_Size nNewLen;
749 /* Optimization for UTF-8 - we try to calculate the exact length */
750 /* For all other encoding we try the maximum - and reallocate
751 the buffer if needed */
752 if ( eTextEncoding == RTL_TEXTENCODING_UTF8 )
754 bool ascii;
755 nNewLen = rtl_ImplGetFastUTF8UnicodeLen( pStr, nLen, &ascii );
756 /* Includes the string only ASCII, then we could copy
757 the buffer faster */
758 if ( ascii )
760 sal_Unicode* pBuffer;
761 *ppThis = rtl_uString_ImplAlloc( nLen );
762 if (*ppThis == nullptr)
764 if (pInfo != nullptr) {
765 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
766 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
768 return;
770 pBuffer = (*ppThis)->buffer;
773 assert((static_cast<unsigned char>(*pStr)) <= 127);
774 *pBuffer = *pStr;
775 pBuffer++;
776 pStr++;
777 nLen--;
779 while ( nLen );
780 if (pInfo != nullptr) {
781 *pInfo = 0;
783 RTL_LOG_STRING_NEW( *ppThis );
784 return;
787 else
788 nNewLen = nLen;
790 nCvtFlags |= RTL_TEXTTOUNICODE_FLAGS_FLUSH;
791 hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
793 pTemp = rtl_uString_ImplAlloc( nNewLen );
794 if (pTemp == nullptr) {
795 if (pInfo != nullptr) {
796 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
797 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
799 return;
801 nDestChars = rtl_convertTextToUnicode( hConverter, nullptr,
802 pStr, nLen,
803 pTemp->buffer, nNewLen,
804 nCvtFlags,
805 &nInfo, &nSrcBytes );
807 /* Buffer not big enough, try again with enough space */
808 /* Shouldn't be the case, but if we get textencoding which
809 could results in more unicode characters we have this
810 code here. Could be the case for apple encodings */
811 while ( nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL )
813 rtl_freeString( pTemp );
814 nNewLen += 8;
815 pTemp = rtl_uString_ImplAlloc( nNewLen );
816 if (pTemp == nullptr) {
817 if (pInfo != nullptr) {
818 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
819 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
821 return;
823 nDestChars = rtl_convertTextToUnicode( hConverter, nullptr,
824 pStr, nLen,
825 pTemp->buffer, nNewLen,
826 nCvtFlags,
827 &nInfo, &nSrcBytes );
830 if (pInfo)
831 *pInfo = nInfo;
833 /* Set the buffer to the correct size or if there is too
834 much overhead, reallocate to the correct size */
835 if ( nNewLen > nDestChars+8 )
837 pTemp2 = rtl_uString_ImplAlloc( nDestChars );
839 if (pTemp2 != nullptr)
841 rtl::str::Copy(pTemp2->buffer, pTemp->buffer, nDestChars);
842 rtl_freeString(pTemp);
843 pTemp = pTemp2;
845 else
847 pTemp->length = nDestChars;
848 pTemp->buffer[nDestChars] = 0;
851 rtl_destroyTextToUnicodeConverter( hConverter );
852 *ppThis = pTemp;
854 /* Results the conversion in an empty buffer -
855 create an empty string */
856 if ( pTemp && !nDestChars )
857 rtl_uString_new( ppThis );
860 RTL_LOG_STRING_NEW( *ppThis );
863 void SAL_CALL rtl_string2UString( rtl_uString** ppThis,
864 const char* pStr,
865 sal_Int32 nLen,
866 rtl_TextEncoding eTextEncoding,
867 sal_uInt32 nCvtFlags ) SAL_THROW_EXTERN_C()
869 assert(ppThis);
870 assert(nLen >= 0);
871 rtl_string2UString_status( ppThis, pStr, nLen, eTextEncoding,
872 nCvtFlags, nullptr );
875 /* ----------------------------------------------------------------------- */
877 namespace {
879 enum StrLifecycle {
880 CANNOT_RETURN,
881 CAN_RETURN = 1
886 static oslMutex
887 getInternMutex()
889 static oslMutex pPoolGuard = osl_createMutex();
891 return pPoolGuard;
894 /* returns true if we found a dup in the pool */
895 static void rtl_ustring_intern_internal( rtl_uString ** newStr,
896 rtl_uString * str,
897 StrLifecycle can_return )
899 oslMutex pPoolMutex;
901 pPoolMutex = getInternMutex();
903 osl_acquireMutex( pPoolMutex );
905 *newStr = rtl_str_hash_intern (str, can_return);
907 osl_releaseMutex( pPoolMutex );
909 RTL_LOG_STRING_INTERN_NEW(*newStr, str);
911 if( can_return && *newStr != str )
912 { /* we dupped, then found a match */
913 rtl_freeString( str );
917 void SAL_CALL rtl_uString_intern( rtl_uString ** newStr,
918 rtl_uString * str) SAL_THROW_EXTERN_C()
920 assert(newStr);
921 assert(str);
922 if (SAL_STRING_IS_INTERN(str))
924 rtl::str::acquire(str);
925 *newStr = str;
927 else
929 rtl_uString *pOrg = *newStr;
930 *newStr = nullptr;
931 rtl_ustring_intern_internal( newStr, str, CANNOT_RETURN );
932 if (pOrg)
933 rtl_uString_release (pOrg);
937 static int rtl_canGuessUOutputLength( int len, rtl_TextEncoding eTextEncoding )
939 // FIXME: Maybe we should use a bit flag in the higher bits of the
940 // eTextEncoding value itself to determine the encoding type. But if we
941 // do, be sure to mask the value in certain places that expect the values
942 // to be numbered serially from 0 and up. One such place is
943 // Impl_getTextEncodingData().
945 switch ( eTextEncoding )
947 // 1 to 1 (with no zero elements)
948 case RTL_TEXTENCODING_IBM_437:
949 case RTL_TEXTENCODING_IBM_850:
950 case RTL_TEXTENCODING_IBM_860:
951 case RTL_TEXTENCODING_IBM_861:
952 case RTL_TEXTENCODING_IBM_863:
953 case RTL_TEXTENCODING_IBM_865:
954 return len;
956 return 0;
959 void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr,
960 const char * str,
961 sal_Int32 len,
962 rtl_TextEncoding eTextEncoding,
963 sal_uInt32 convertFlags,
964 sal_uInt32 * pInfo )
965 SAL_THROW_EXTERN_C()
967 assert(newStr);
968 assert(len >= 0);
969 rtl_uString *scratch;
971 if (*newStr)
973 rtl_uString_release (*newStr);
974 *newStr = nullptr;
977 if ( len < 256 )
978 { // try various optimisations
979 sal_Int32 ulen;
980 if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
982 int i;
983 rtl_uString *pScratch;
984 pScratch = static_cast< rtl_uString * >(
985 alloca(sizeof (rtl_uString) + len * sizeof (sal_Unicode)));
986 for (i = 0; i < len; i++)
988 /* Check ASCII range */
989 SAL_WARN_IF( (static_cast<unsigned char>(str[i])) > 127, "rtl.string",
990 "rtl_ustring_internConvert() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
991 pScratch->buffer[i] = str[i];
993 pScratch->length = len;
994 rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
995 return;
997 if ( (ulen = rtl_canGuessUOutputLength(len, eTextEncoding)) != 0 )
999 rtl_uString *pScratch;
1000 rtl_TextToUnicodeConverter hConverter;
1001 sal_Size nSrcBytes;
1002 sal_uInt32 nInfo;
1004 pScratch = static_cast< rtl_uString * >(
1005 alloca(
1006 sizeof (rtl_uString) + ulen * sizeof (sal_Unicode)));
1008 hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
1009 rtl_convertTextToUnicode(
1010 hConverter, nullptr, str, len, pScratch->buffer, ulen, convertFlags, &nInfo, &nSrcBytes );
1011 rtl_destroyTextToUnicodeConverter( hConverter );
1013 if (pInfo)
1014 *pInfo = nInfo;
1016 pScratch->length = ulen;
1017 rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
1018 return;
1021 /* FIXME: we want a nice UTF-8 / alloca shortcut here */
1024 scratch = nullptr;
1025 rtl_string2UString_status( &scratch, str, len, eTextEncoding, convertFlags,
1026 pInfo );
1027 if (!scratch) {
1028 return;
1030 rtl_ustring_intern_internal( newStr, scratch, CAN_RETURN );
1033 static void
1034 internRelease (rtl_uString *pThis)
1036 rtl_uString *pFree = nullptr;
1037 if ( SAL_STRING_REFCOUNT(
1038 osl_atomic_decrement( &(pThis->refCount) ) ) == 0)
1040 RTL_LOG_STRING_INTERN_DELETE(pThis);
1041 oslMutex pPoolMutex = getInternMutex();
1042 osl_acquireMutex( pPoolMutex );
1044 rtl_str_hash_remove (pThis);
1046 /* May have been separately acquired */
1047 if ( SAL_STRING_REFCOUNT(
1048 osl_atomic_increment( &(pThis->refCount) ) ) == 1 )
1050 /* we got the last ref */
1051 pFree = pThis;
1053 else /* very unusual */
1055 internRelease (pThis);
1058 osl_releaseMutex( pPoolMutex );
1060 if (pFree)
1061 rtl_freeString (pFree);
1064 sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
1065 rtl_uString const * string, sal_Int32 * indexUtf16,
1066 sal_Int32 incrementCodePoints)
1068 sal_Int32 n;
1069 sal_Unicode cu;
1070 sal_uInt32 cp;
1071 assert(string != nullptr && indexUtf16 != nullptr);
1072 n = *indexUtf16;
1073 assert(n >= 0 && n <= string->length);
1074 while (incrementCodePoints < 0) {
1075 assert(n > 0);
1076 cu = string->buffer[--n];
1077 if (rtl::isLowSurrogate(cu) && n != 0 &&
1078 rtl::isHighSurrogate(string->buffer[n - 1]))
1080 --n;
1082 ++incrementCodePoints;
1084 assert(n >= 0 && n < string->length);
1085 cu = string->buffer[n];
1086 if (rtl::isHighSurrogate(cu) && string->length - n >= 2 &&
1087 rtl::isLowSurrogate(string->buffer[n + 1]))
1089 cp = rtl::combineSurrogates(cu, string->buffer[n + 1]);
1090 } else {
1091 cp = cu;
1093 while (incrementCodePoints > 0) {
1094 assert(n < string->length);
1095 cu = string->buffer[n++];
1096 if (rtl::isHighSurrogate(cu) && n != string->length &&
1097 rtl::isLowSurrogate(string->buffer[n]))
1099 ++n;
1101 --incrementCodePoints;
1103 assert(n >= 0 && n <= string->length);
1104 *indexUtf16 = n;
1105 return cp;
1108 sal_Bool rtl_convertStringToUString(
1109 rtl_uString ** target, char const * source, sal_Int32 length,
1110 rtl_TextEncoding encoding, sal_uInt32 flags) SAL_THROW_EXTERN_C()
1112 assert(target);
1113 assert(length >= 0);
1114 sal_uInt32 info;
1115 rtl_string2UString_status(target, source, length, encoding, flags, &info);
1116 return (info & RTL_TEXTTOUNICODE_INFO_ERROR) == 0;
1119 void rtl_uString_newReplaceFirst(
1120 rtl_uString ** newStr, rtl_uString * str, rtl_uString const * from,
1121 rtl_uString const * to, sal_Int32 * index) SAL_THROW_EXTERN_C()
1123 assert(str != nullptr);
1124 assert(index != nullptr);
1125 assert(*index >= 0 && *index <= str->length);
1126 assert(from != nullptr);
1127 assert(to != nullptr);
1128 sal_Int32 i = rtl_ustr_indexOfStr_WithLength(
1129 str->buffer + *index, str->length - *index, from->buffer, from->length);
1130 if (i == -1) {
1131 rtl_uString_assign(newStr, str);
1132 } else {
1133 assert(i <= str->length - *index);
1134 i += *index;
1135 assert(from->length <= str->length);
1136 if (str->length - from->length > SAL_MAX_INT32 - to->length) {
1137 std::abort();
1139 sal_Int32 n = str->length - from->length + to->length;
1140 rtl_uString_acquire(str); // in case *newStr == str
1141 rtl_uString_new_WithLength(newStr, n);
1142 if (n != 0) {
1143 (*newStr)->length = n;
1144 assert(i >= 0 && i < str->length);
1145 memcpy(
1146 (*newStr)->buffer, str->buffer, i * sizeof (sal_Unicode));
1147 memcpy(
1148 (*newStr)->buffer + i, to->buffer,
1149 to->length * sizeof (sal_Unicode));
1150 memcpy(
1151 (*newStr)->buffer + i + to->length,
1152 str->buffer + i + from->length,
1153 (str->length - i - from->length) * sizeof (sal_Unicode));
1155 rtl_uString_release(str);
1157 *index = i;
1160 void rtl_uString_newReplaceFirstAsciiL(
1161 rtl_uString ** newStr, rtl_uString * str, char const * from,
1162 sal_Int32 fromLength, rtl_uString const * to, sal_Int32 * index)
1163 SAL_THROW_EXTERN_C()
1165 assert(str != nullptr);
1166 assert(index != nullptr);
1167 assert(*index >= 0 && *index <= str->length);
1168 assert(fromLength >= 0);
1169 assert(to != nullptr);
1170 sal_Int32 i = rtl_ustr_indexOfAscii_WithLength(
1171 str->buffer + *index, str->length - *index, from, fromLength);
1172 if (i == -1) {
1173 rtl_uString_assign(newStr, str);
1174 } else {
1175 assert(i <= str->length - *index);
1176 i += *index;
1177 assert(fromLength <= str->length);
1178 if (str->length - fromLength > SAL_MAX_INT32 - to->length) {
1179 std::abort();
1181 sal_Int32 n = str->length - fromLength + to->length;
1182 rtl_uString_acquire(str); // in case *newStr == str
1183 rtl_uString_new_WithLength(newStr, n);
1184 if (n != 0) {
1185 (*newStr)->length = n;
1186 assert(i >= 0 && i < str->length);
1187 memcpy(
1188 (*newStr)->buffer, str->buffer, i * sizeof (sal_Unicode));
1189 memcpy(
1190 (*newStr)->buffer + i, to->buffer,
1191 to->length * sizeof (sal_Unicode));
1192 memcpy(
1193 (*newStr)->buffer + i + to->length,
1194 str->buffer + i + fromLength,
1195 (str->length - i - fromLength) * sizeof (sal_Unicode));
1197 rtl_uString_release(str);
1199 *index = i;
1202 void rtl_uString_newReplaceFirstToAsciiL(
1203 rtl_uString ** newStr, rtl_uString * str, rtl_uString const * from,
1204 char const * to, sal_Int32 toLength, sal_Int32 * index)
1205 SAL_THROW_EXTERN_C()
1207 assert(str != nullptr);
1208 assert(index != nullptr);
1209 assert(*index >= 0 && *index <= str->length);
1210 assert(from != nullptr);
1211 assert(toLength >= 0);
1212 sal_Int32 i = rtl_ustr_indexOfStr_WithLength(
1213 str->buffer + *index, str->length - *index, from->buffer, from->length);
1214 if (i == -1) {
1215 rtl_uString_assign(newStr, str);
1216 } else {
1217 assert(i <= str->length - *index);
1218 i += *index;
1219 assert(from->length <= str->length);
1220 if (str->length - from->length > SAL_MAX_INT32 - toLength) {
1221 std::abort();
1223 sal_Int32 n = str->length - from->length + toLength;
1224 rtl_uString_acquire(str); // in case *newStr == str
1225 rtl_uString_new_WithLength(newStr, n);
1226 if (n != 0) {
1227 (*newStr)->length = n;
1228 assert(i >= 0 && i < str->length);
1229 memcpy(
1230 (*newStr)->buffer, str->buffer, i * sizeof (sal_Unicode));
1231 for (sal_Int32 j = 0; j != toLength; ++j) {
1232 assert(static_cast< unsigned char >(to[j]) <= 0x7F);
1233 (*newStr)->buffer[i + j] = to[j];
1235 memcpy(
1236 (*newStr)->buffer + i + toLength,
1237 str->buffer + i + from->length,
1238 (str->length - i - from->length) * sizeof (sal_Unicode));
1240 rtl_uString_release(str);
1242 *index = i;
1245 void rtl_uString_newReplaceFirstAsciiLAsciiL(
1246 rtl_uString ** newStr, rtl_uString * str, char const * from,
1247 sal_Int32 fromLength, char const * to, sal_Int32 toLength,
1248 sal_Int32 * index) SAL_THROW_EXTERN_C()
1250 assert(str != nullptr);
1251 assert(index != nullptr);
1252 assert(*index >= 0 && *index <= str->length);
1253 assert(fromLength >= 0);
1254 assert(to != nullptr);
1255 assert(toLength >= 0);
1256 sal_Int32 i = rtl_ustr_indexOfAscii_WithLength(
1257 str->buffer + *index, str->length - *index, from, fromLength);
1258 if (i == -1) {
1259 rtl_uString_assign(newStr, str);
1260 } else {
1261 assert(i <= str->length - *index);
1262 i += *index;
1263 assert(fromLength <= str->length);
1264 if (str->length - fromLength > SAL_MAX_INT32 - toLength) {
1265 std::abort();
1267 sal_Int32 n = str->length - fromLength + toLength;
1268 rtl_uString_acquire(str); // in case *newStr == str
1269 rtl_uString_new_WithLength(newStr, n);
1270 if (n != 0) {
1271 (*newStr)->length = n;
1272 assert(i >= 0 && i < str->length);
1273 memcpy(
1274 (*newStr)->buffer, str->buffer, i * sizeof (sal_Unicode));
1275 for (sal_Int32 j = 0; j != toLength; ++j) {
1276 assert(static_cast< unsigned char >(to[j]) <= 0x7F);
1277 (*newStr)->buffer[i + j] = to[j];
1279 memcpy(
1280 (*newStr)->buffer + i + toLength,
1281 str->buffer + i + fromLength,
1282 (str->length - i - fromLength) * sizeof (sal_Unicode));
1284 rtl_uString_release(str);
1286 *index = i;
1289 void rtl_uString_newReplaceFirstAsciiLUtf16L(
1290 rtl_uString ** newStr, rtl_uString * str, char const * from,
1291 sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength,
1292 sal_Int32 * index) SAL_THROW_EXTERN_C()
1294 assert(str != nullptr);
1295 assert(index != nullptr);
1296 assert(*index >= 0 && *index <= str->length);
1297 assert(fromLength >= 0);
1298 assert(to != nullptr || toLength == 0);
1299 assert(toLength >= 0);
1300 sal_Int32 i = rtl_ustr_indexOfAscii_WithLength(
1301 str->buffer + *index, str->length - *index, from, fromLength);
1302 if (i == -1) {
1303 rtl_uString_assign(newStr, str);
1304 } else {
1305 assert(i <= str->length - *index);
1306 i += *index;
1307 assert(fromLength <= str->length);
1308 if (str->length - fromLength > SAL_MAX_INT32 - toLength) {
1309 rtl_uString_release(*newStr);
1310 *newStr = nullptr;
1311 } else {
1312 sal_Int32 n = str->length - fromLength + toLength;
1313 rtl_uString_acquire(str); // in case *newStr == str
1314 rtl_uString_new_WithLength(newStr, n);
1315 if (n != 0 && /*TODO:*/ *newStr != nullptr) {
1316 (*newStr)->length = n;
1317 assert(i >= 0 && i < str->length);
1318 memcpy(
1319 (*newStr)->buffer, str->buffer, i * sizeof (sal_Unicode));
1320 if (toLength != 0) {
1321 memcpy(
1322 (*newStr)->buffer + i, to, toLength * sizeof (sal_Unicode));
1324 memcpy(
1325 (*newStr)->buffer + i + toLength,
1326 str->buffer + i + fromLength,
1327 (str->length - i - fromLength) * sizeof (sal_Unicode));
1329 rtl_uString_release(str);
1332 *index = i;
1335 void rtl_uString_newReplaceFirstUtf16LAsciiL(
1336 rtl_uString ** newStr, rtl_uString * str, sal_Unicode const * from,
1337 sal_Int32 fromLength, char const * to, sal_Int32 toLength,
1338 sal_Int32 * index) SAL_THROW_EXTERN_C()
1340 assert(str != nullptr);
1341 assert(index != nullptr);
1342 assert(*index >= 0 && *index <= str->length);
1343 assert(fromLength >= 0);
1344 assert(to != nullptr);
1345 assert(toLength >= 0);
1346 sal_Int32 i = rtl_ustr_indexOfStr_WithLength(
1347 str->buffer + *index, str->length - *index, from, fromLength);
1348 if (i == -1) {
1349 rtl_uString_assign(newStr, str);
1350 } else {
1351 assert(i <= str->length - *index);
1352 i += *index;
1353 assert(fromLength <= str->length);
1354 if (str->length - fromLength > SAL_MAX_INT32 - toLength) {
1355 rtl_uString_release(*newStr);
1356 *newStr = nullptr;
1357 } else {
1358 sal_Int32 n = str->length - fromLength + toLength;
1359 rtl_uString_acquire(str); // in case *newStr == str
1360 rtl_uString_new_WithLength(newStr, n);
1361 if (n != 0 && /*TODO:*/ *newStr != nullptr) {
1362 (*newStr)->length = n;
1363 assert(i >= 0 && i < str->length);
1364 memcpy(
1365 (*newStr)->buffer, str->buffer, i * sizeof (sal_Unicode));
1366 for (sal_Int32 j = 0; j != toLength; ++j) {
1367 assert(static_cast< unsigned char >(to[j]) <= 0x7F);
1368 (*newStr)->buffer[i + j] = to[j];
1370 memcpy(
1371 (*newStr)->buffer + i + toLength,
1372 str->buffer + i + fromLength,
1373 (str->length - i - fromLength) * sizeof (sal_Unicode));
1375 rtl_uString_release(str);
1378 *index = i;
1381 void rtl_uString_newReplaceFirstUtf16LUtf16L(
1382 rtl_uString ** newStr, rtl_uString * str, sal_Unicode const * from,
1383 sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength,
1384 sal_Int32 * index) SAL_THROW_EXTERN_C()
1386 assert(str != nullptr);
1387 assert(index != nullptr);
1388 assert(*index >= 0 && *index <= str->length);
1389 assert(from != nullptr || fromLength == 0);
1390 assert(fromLength >= 0);
1391 assert(to != nullptr || toLength == 0);
1392 assert(toLength >= 0);
1393 sal_Int32 i = rtl_ustr_indexOfStr_WithLength(
1394 str->buffer + *index, str->length - *index, from, fromLength);
1395 if (i == -1) {
1396 rtl_uString_assign(newStr, str);
1397 } else {
1398 assert(i <= str->length - *index);
1399 i += *index;
1400 assert(fromLength <= str->length);
1401 if (str->length - fromLength > SAL_MAX_INT32 - toLength) {
1402 rtl_uString_release(*newStr);
1403 *newStr = nullptr;
1404 } else {
1405 sal_Int32 n = str->length - fromLength + toLength;
1406 rtl_uString_acquire(str); // in case *newStr == str
1407 rtl_uString_new_WithLength(newStr, n);
1408 if (n != 0 && /*TODO:*/ *newStr != nullptr) {
1409 (*newStr)->length = n;
1410 assert(i >= 0 && i < str->length);
1411 memcpy(
1412 (*newStr)->buffer, str->buffer, i * sizeof (sal_Unicode));
1413 if (toLength != 0) {
1414 memcpy(
1415 (*newStr)->buffer + i, to, toLength * sizeof (sal_Unicode));
1417 memcpy(
1418 (*newStr)->buffer + i + toLength,
1419 str->buffer + i + fromLength,
1420 (str->length - i - fromLength) * sizeof (sal_Unicode));
1422 rtl_uString_release(str);
1425 *index = i;
1428 void rtl_uString_newReplaceAll(
1429 rtl_uString ** newStr, rtl_uString * str, rtl_uString const * from,
1430 rtl_uString const * to) SAL_THROW_EXTERN_C()
1432 rtl_uString_newReplaceAllFromIndex( newStr, str, from, to, 0 );
1435 void rtl_uString_newReplaceAllFromIndex(
1436 rtl_uString ** newStr, rtl_uString * str, rtl_uString const * from,
1437 rtl_uString const * to, sal_Int32 fromIndex) SAL_THROW_EXTERN_C()
1439 assert(to != nullptr);
1440 assert(fromIndex >= 0 && fromIndex <= str->length);
1441 rtl_uString_assign(newStr, str);
1442 for (sal_Int32 i = fromIndex;; i += to->length) {
1443 rtl_uString_newReplaceFirst(newStr, *newStr, from, to, &i);
1444 if (i == -1) {
1445 break;
1450 void rtl_uString_newReplaceAllAsciiL(
1451 rtl_uString ** newStr, rtl_uString * str, char const * from,
1452 sal_Int32 fromLength, rtl_uString const * to) SAL_THROW_EXTERN_C()
1454 assert(to != nullptr);
1455 rtl_uString_assign(newStr, str);
1456 for (sal_Int32 i = 0;; i += to->length) {
1457 rtl_uString_newReplaceFirstAsciiL(
1458 newStr, *newStr, from, fromLength, to, &i);
1459 if (i == -1) {
1460 break;
1465 void rtl_uString_newReplaceAllToAsciiL(
1466 rtl_uString ** newStr, rtl_uString * str, rtl_uString const * from,
1467 char const * to, sal_Int32 toLength) SAL_THROW_EXTERN_C()
1469 assert(from != nullptr);
1470 rtl_uString_assign(newStr, str);
1471 for (sal_Int32 i = 0;; i += toLength) {
1472 rtl_uString_newReplaceFirstToAsciiL(
1473 newStr, *newStr, from, to, toLength, &i);
1474 if (i == -1) {
1475 break;
1480 void rtl_uString_newReplaceAllAsciiLAsciiL(
1481 rtl_uString ** newStr, rtl_uString * str, char const * from,
1482 sal_Int32 fromLength, char const * to, sal_Int32 toLength)
1483 SAL_THROW_EXTERN_C()
1485 assert(toLength >= 0);
1486 rtl_uString_assign(newStr, str);
1487 for (sal_Int32 i = 0;; i += toLength) {
1488 rtl_uString_newReplaceFirstAsciiLAsciiL(
1489 newStr, *newStr, from, fromLength, to, toLength, &i);
1490 if (i == -1) {
1491 break;
1496 void rtl_uString_newReplaceAllAsciiLUtf16L(
1497 rtl_uString ** newStr, rtl_uString * str, char const * from,
1498 sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength)
1499 SAL_THROW_EXTERN_C()
1501 assert(toLength >= 0);
1502 rtl_uString_assign(newStr, str);
1503 for (sal_Int32 i = 0;; i += toLength) {
1504 rtl_uString_newReplaceFirstAsciiLUtf16L(
1505 newStr, *newStr, from, fromLength, to, toLength, &i);
1506 if (i == -1 || *newStr == nullptr) {
1507 break;
1512 void rtl_uString_newReplaceAllUtf16LAsciiL(
1513 rtl_uString ** newStr, rtl_uString * str, sal_Unicode const * from,
1514 sal_Int32 fromLength, char const * to, sal_Int32 toLength)
1515 SAL_THROW_EXTERN_C()
1517 assert(toLength >= 0);
1518 rtl_uString_assign(newStr, str);
1519 for (sal_Int32 i = 0;; i += toLength) {
1520 rtl_uString_newReplaceFirstUtf16LAsciiL(
1521 newStr, *newStr, from, fromLength, to, toLength, &i);
1522 if (i == -1 || *newStr == nullptr) {
1523 break;
1528 void rtl_uString_newReplaceAllUtf16LUtf16L(
1529 rtl_uString ** newStr, rtl_uString * str, sal_Unicode const * from,
1530 sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength)
1531 SAL_THROW_EXTERN_C()
1533 rtl_uString_newReplaceAllFromIndexUtf16LUtf16L(newStr, str, from, fromLength, to, toLength, 0);
1536 void rtl_uString_newReplaceAllFromIndexUtf16LUtf16L(
1537 rtl_uString ** newStr, rtl_uString * str, sal_Unicode const * from,
1538 sal_Int32 fromLength, sal_Unicode const * to, sal_Int32 toLength, sal_Int32 fromIndex)
1539 SAL_THROW_EXTERN_C()
1541 assert(toLength >= 0);
1542 assert(fromIndex >= 0 && fromIndex <= str->length);
1543 rtl_uString_assign(newStr, str);
1544 for (sal_Int32 i = fromIndex;; i += toLength) {
1545 rtl_uString_newReplaceFirstUtf16LUtf16L(
1546 newStr, *newStr, from, fromLength, to, toLength, &i);
1547 if (i == -1 || *newStr == nullptr) {
1548 break;
1553 sal_Int32 SAL_CALL rtl_ustr_getLength(const sal_Unicode* pStr) SAL_THROW_EXTERN_C()
1555 return rtl::str::getLength(pStr);
1558 sal_Int32 SAL_CALL rtl_ustr_compare(const sal_Unicode* pStr1, const sal_Unicode* pStr2)
1559 SAL_THROW_EXTERN_C()
1561 return rtl::str::compare(pStr1, pStr2);
1564 sal_Int32 SAL_CALL rtl_ustr_compare_WithLength(const sal_Unicode* pStr1, sal_Int32 nStr1Len,
1565 const sal_Unicode* pStr2, sal_Int32 nStr2Len)
1566 SAL_THROW_EXTERN_C()
1568 return rtl::str::compare_WithLength(pStr1, nStr1Len, pStr2, nStr2Len);
1571 sal_Int32 SAL_CALL rtl_ustr_shortenedCompare_WithLength(
1572 const sal_Unicode* pStr1, sal_Int32 nStr1Len, const sal_Unicode* pStr2, sal_Int32 nStr2Len,
1573 sal_Int32 nShortenedLength) SAL_THROW_EXTERN_C()
1575 return rtl::str::shortenedCompare_WithLength(pStr1, nStr1Len, pStr2, nStr2Len, nShortenedLength);
1578 sal_Int32 SAL_CALL rtl_ustr_reverseCompare_WithLength(const sal_Unicode* pStr1, sal_Int32 nStr1Len,
1579 const sal_Unicode* pStr2, sal_Int32 nStr2Len)
1580 SAL_THROW_EXTERN_C()
1582 return rtl::str::reverseCompare_WithLength(pStr1, nStr1Len, pStr2, nStr2Len);
1585 sal_Int32 SAL_CALL rtl_ustr_compareIgnoreAsciiCase(const sal_Unicode* pStr1,
1586 const sal_Unicode* pStr2) SAL_THROW_EXTERN_C()
1588 return rtl::str::compareIgnoreAsciiCase(pStr1, pStr2);
1591 sal_Int32 SAL_CALL rtl_ustr_compareIgnoreAsciiCase_WithLength(const sal_Unicode* pStr1,
1592 sal_Int32 nStr1Len,
1593 const sal_Unicode* pStr2,
1594 sal_Int32 nStr2Len)
1595 SAL_THROW_EXTERN_C()
1597 return rtl::str::compareIgnoreAsciiCase_WithLength(pStr1, nStr1Len, pStr2, nStr2Len);
1600 sal_Int32 SAL_CALL rtl_ustr_shortenedCompareIgnoreAsciiCase_WithLength(
1601 const sal_Unicode* pStr1, sal_Int32 nStr1Len, const sal_Unicode* pStr2, sal_Int32 nStr2Len,
1602 sal_Int32 nShortenedLength) SAL_THROW_EXTERN_C()
1604 return rtl::str::shortenedCompareIgnoreAsciiCase_WithLength(pStr1, nStr1Len, pStr2, nStr2Len,
1605 nShortenedLength);
1608 sal_Int32 SAL_CALL rtl_ustr_hashCode(const sal_Unicode* pStr) SAL_THROW_EXTERN_C()
1610 return rtl::str::hashCode(pStr);
1613 sal_Int32 SAL_CALL rtl_ustr_hashCode_WithLength(const sal_Unicode* pStr, sal_Int32 nLen)
1614 SAL_THROW_EXTERN_C()
1616 return rtl::str::hashCode_WithLength(pStr, nLen);
1619 sal_Int32 SAL_CALL rtl_ustr_indexOfChar(const sal_Unicode* pStr, sal_Unicode c) SAL_THROW_EXTERN_C()
1621 return rtl::str::indexOfChar(pStr, c);
1624 sal_Int32 SAL_CALL rtl_ustr_indexOfChar_WithLength(const sal_Unicode* pStr, sal_Int32 nLen,
1625 sal_Unicode c) SAL_THROW_EXTERN_C()
1627 return rtl::str::indexOfChar_WithLength(pStr, nLen, c);
1630 sal_Int32 SAL_CALL rtl_ustr_lastIndexOfChar(const sal_Unicode* pStr, sal_Unicode c)
1631 SAL_THROW_EXTERN_C()
1633 return rtl::str::lastIndexOfChar(pStr, c);
1636 sal_Int32 SAL_CALL rtl_ustr_lastIndexOfChar_WithLength(const sal_Unicode* pStr, sal_Int32 nLen,
1637 sal_Unicode c) SAL_THROW_EXTERN_C()
1639 return rtl::str::lastIndexOfChar_WithLength(pStr, nLen, c);
1642 sal_Int32 SAL_CALL rtl_ustr_indexOfStr(const sal_Unicode* pStr, const sal_Unicode* pSubStr)
1643 SAL_THROW_EXTERN_C()
1645 return rtl::str::indexOfStr(pStr, pSubStr);
1648 sal_Int32 SAL_CALL rtl_ustr_indexOfStr_WithLength(const sal_Unicode* pStr, sal_Int32 nStrLen,
1649 const sal_Unicode* pSubStr, sal_Int32 nSubLen)
1650 SAL_THROW_EXTERN_C()
1652 return rtl::str::indexOfStr_WithLength(pStr, nStrLen, pSubStr, nSubLen);
1655 sal_Int32 SAL_CALL rtl_ustr_lastIndexOfStr(const sal_Unicode* pStr, const sal_Unicode* pSubStr)
1656 SAL_THROW_EXTERN_C()
1658 return rtl::str::lastIndexOfStr(pStr, pSubStr);
1661 sal_Int32 SAL_CALL rtl_ustr_lastIndexOfStr_WithLength(const sal_Unicode* pStr, sal_Int32 nStrLen,
1662 const sal_Unicode* pSubStr, sal_Int32 nSubLen)
1663 SAL_THROW_EXTERN_C()
1665 return rtl::str::lastIndexOfStr_WithLength(pStr, nStrLen, pSubStr, nSubLen);
1668 void SAL_CALL rtl_ustr_replaceChar(sal_Unicode* pStr, sal_Unicode cOld, sal_Unicode cNew)
1669 SAL_THROW_EXTERN_C()
1671 return rtl::str::replaceChar(pStr, cOld, cNew);
1674 void SAL_CALL rtl_ustr_replaceChar_WithLength(sal_Unicode* pStr, sal_Int32 nLen, sal_Unicode cOld,
1675 sal_Unicode cNew) SAL_THROW_EXTERN_C()
1677 return rtl::str::replaceChar_WithLength(pStr, nLen, cOld, cNew);
1680 void SAL_CALL rtl_ustr_toAsciiLowerCase(sal_Unicode* pStr) SAL_THROW_EXTERN_C()
1682 return rtl::str::toAsciiLowerCase(pStr);
1685 void SAL_CALL rtl_ustr_toAsciiLowerCase_WithLength(sal_Unicode* pStr, sal_Int32 nLen)
1686 SAL_THROW_EXTERN_C()
1688 return rtl::str::toAsciiLowerCase_WithLength(pStr, nLen);
1691 void SAL_CALL rtl_ustr_toAsciiUpperCase(sal_Unicode* pStr) SAL_THROW_EXTERN_C()
1693 return rtl::str::toAsciiUpperCase(pStr);
1696 void SAL_CALL rtl_ustr_toAsciiUpperCase_WithLength(sal_Unicode* pStr, sal_Int32 nLen)
1697 SAL_THROW_EXTERN_C()
1699 return rtl::str::toAsciiUpperCase_WithLength(pStr, nLen);
1702 sal_Int32 SAL_CALL rtl_ustr_trim(sal_Unicode* pStr) SAL_THROW_EXTERN_C()
1704 return rtl::str::trim(pStr);
1707 sal_Int32 SAL_CALL rtl_ustr_trim_WithLength(sal_Unicode* pStr, sal_Int32 nLen) SAL_THROW_EXTERN_C()
1709 return rtl::str::trim_WithLength(pStr, nLen);
1712 sal_Int32 SAL_CALL rtl_ustr_valueOfBoolean(sal_Unicode* pStr, sal_Bool b) SAL_THROW_EXTERN_C()
1714 return rtl::str::valueOfBoolean(pStr, b);
1717 sal_Int32 SAL_CALL rtl_ustr_valueOfChar(sal_Unicode* pStr, sal_Unicode c) SAL_THROW_EXTERN_C()
1719 return rtl::str::valueOfChar(pStr, c);
1722 sal_Int32 SAL_CALL rtl_ustr_valueOfInt32(sal_Unicode* pStr, sal_Int32 n, sal_Int16 nRadix)
1723 SAL_THROW_EXTERN_C()
1725 return rtl::str::valueOfInt32(pStr, n, nRadix);
1728 sal_Int32 SAL_CALL rtl_ustr_valueOfInt64(sal_Unicode* pStr, sal_Int64 n, sal_Int16 nRadix)
1729 SAL_THROW_EXTERN_C()
1731 return rtl::str::valueOfInt64(pStr, n, nRadix);
1734 sal_Int32 SAL_CALL rtl_ustr_valueOfUInt64(sal_Unicode* pStr, sal_uInt64 n, sal_Int16 nRadix)
1735 SAL_THROW_EXTERN_C()
1737 return rtl::str::valueOfUInt64(pStr, n, nRadix);
1740 sal_Bool SAL_CALL rtl_ustr_toBoolean(const sal_Unicode* pStr) SAL_THROW_EXTERN_C()
1742 return rtl::str::toBoolean(pStr);
1745 sal_Int32 SAL_CALL rtl_ustr_toInt32(const sal_Unicode* pStr, sal_Int16 nRadix) SAL_THROW_EXTERN_C()
1747 return rtl::str::toInt32(pStr, nRadix);
1750 sal_Int64 SAL_CALL rtl_ustr_toInt64(const sal_Unicode* pStr, sal_Int16 nRadix) SAL_THROW_EXTERN_C()
1752 return rtl::str::toInt64(pStr, nRadix);
1755 sal_Int64 SAL_CALL rtl_ustr_toInt64_WithLength(const sal_Unicode* pStr, sal_Int16 nRadix,
1756 sal_Int32 nStrLength) SAL_THROW_EXTERN_C()
1758 return rtl::str::toInt64_WithLength(pStr, nRadix, nStrLength);
1761 sal_uInt32 SAL_CALL rtl_ustr_toUInt32(const sal_Unicode* pStr, sal_Int16 nRadix)
1762 SAL_THROW_EXTERN_C()
1764 return rtl::str::toUInt32(pStr, nRadix);
1767 sal_uInt64 SAL_CALL rtl_ustr_toUInt64(const sal_Unicode* pStr, sal_Int16 nRadix)
1768 SAL_THROW_EXTERN_C()
1770 return rtl::str::toUInt64(pStr, nRadix);
1773 rtl_uString* rtl_uString_ImplAlloc(sal_Int32 nLen)
1775 return rtl::str::Alloc<rtl_uString>(nLen);
1778 void SAL_CALL rtl_uString_acquire(rtl_uString* pThis) SAL_THROW_EXTERN_C()
1780 return rtl::str::acquire(pThis);
1783 void SAL_CALL rtl_uString_release(rtl_uString* pThis) SAL_THROW_EXTERN_C()
1785 return rtl::str::release(pThis);
1788 void SAL_CALL rtl_uString_new(rtl_uString** ppThis) SAL_THROW_EXTERN_C()
1790 return rtl::str::new_(ppThis);
1793 rtl_uString* SAL_CALL rtl_uString_alloc(sal_Int32 nLen) SAL_THROW_EXTERN_C()
1795 assert(nLen >= 0);
1796 return rtl::str::Alloc<rtl_uString>(nLen);
1799 void SAL_CALL rtl_uString_new_WithLength(rtl_uString** ppThis, sal_Int32 nLen) SAL_THROW_EXTERN_C()
1801 rtl::str::new_WithLength(ppThis, nLen);
1804 void SAL_CALL rtl_uString_newFromString(rtl_uString** ppThis, const rtl_uString* pStr)
1805 SAL_THROW_EXTERN_C()
1807 rtl::str::newFromString(ppThis, pStr);
1810 void SAL_CALL rtl_uString_newFromStr(rtl_uString** ppThis, const sal_Unicode* pCharStr)
1811 SAL_THROW_EXTERN_C()
1813 rtl::str::newFromStr(ppThis, pCharStr);
1816 void SAL_CALL rtl_uString_newFromStr_WithLength(rtl_uString** ppThis, const sal_Unicode* pCharStr,
1817 sal_Int32 nLen) SAL_THROW_EXTERN_C()
1819 rtl::str::newFromStr_WithLength(ppThis, pCharStr, nLen);
1822 void SAL_CALL rtl_uString_newFromSubString(rtl_uString** ppThis, const rtl_uString* pFrom,
1823 sal_Int32 beginIndex, sal_Int32 count)
1824 SAL_THROW_EXTERN_C()
1826 rtl::str::newFromSubString(ppThis, pFrom, beginIndex, count);
1829 // Used when creating from string literals.
1830 void SAL_CALL rtl_uString_newFromLiteral(rtl_uString** ppThis, const char* pCharStr, sal_Int32 nLen,
1831 sal_Int32 allocExtra) SAL_THROW_EXTERN_C()
1833 rtl::str::newFromLiteral(ppThis, pCharStr, nLen, allocExtra);
1836 void SAL_CALL rtl_uString_assign(rtl_uString** ppThis, rtl_uString* pStr) SAL_THROW_EXTERN_C()
1838 rtl::str::assign(ppThis, pStr);
1841 sal_Int32 SAL_CALL rtl_uString_getLength(const rtl_uString* pThis) SAL_THROW_EXTERN_C()
1843 return rtl::str::getLength(pThis);
1846 sal_Unicode* SAL_CALL rtl_uString_getStr(rtl_uString* pThis) SAL_THROW_EXTERN_C()
1848 return rtl::str::getStr(pThis);
1851 void SAL_CALL rtl_uString_newConcat(rtl_uString** ppThis, rtl_uString* pLeft, rtl_uString* pRight)
1852 SAL_THROW_EXTERN_C()
1854 rtl::str::newConcat(ppThis, pLeft, pRight);
1857 void SAL_CALL rtl_uString_ensureCapacity(rtl_uString** ppThis, sal_Int32 size) SAL_THROW_EXTERN_C()
1859 rtl::str::ensureCapacity(ppThis, size);
1862 void SAL_CALL rtl_uString_newReplaceStrAt(rtl_uString** ppThis, rtl_uString* pStr, sal_Int32 nIndex,
1863 sal_Int32 nCount, rtl_uString* pNewSubStr)
1864 SAL_THROW_EXTERN_C()
1866 rtl::str::newReplaceStrAt(ppThis, pStr, nIndex, nCount, pNewSubStr);
1869 void SAL_CALL rtl_uString_newReplaceStrAtUtf16L(rtl_uString** ppThis, rtl_uString* pStr, sal_Int32 nIndex,
1870 sal_Int32 nCount, sal_Unicode const * subStr, sal_Int32 substrLen)
1871 SAL_THROW_EXTERN_C()
1873 rtl::str::newReplaceStrAt(ppThis, pStr, nIndex, nCount, subStr, substrLen);
1876 void SAL_CALL rtl_uString_newReplace(rtl_uString** ppThis, rtl_uString* pStr, sal_Unicode cOld,
1877 sal_Unicode cNew) SAL_THROW_EXTERN_C()
1879 rtl::str::newReplace(ppThis, pStr, cOld, cNew);
1882 void SAL_CALL rtl_uString_newToAsciiLowerCase(rtl_uString** ppThis, rtl_uString* pStr)
1883 SAL_THROW_EXTERN_C()
1885 rtl::str::newToAsciiLowerCase(ppThis, pStr);
1888 void SAL_CALL rtl_uString_newToAsciiUpperCase(rtl_uString** ppThis, rtl_uString* pStr)
1889 SAL_THROW_EXTERN_C()
1891 rtl::str::newToAsciiUpperCase(ppThis, pStr);
1894 void SAL_CALL rtl_uString_newTrim(rtl_uString** ppThis, rtl_uString* pStr) SAL_THROW_EXTERN_C()
1896 rtl::str::newTrim(ppThis, pStr);
1899 sal_Int32 SAL_CALL rtl_uString_getToken(rtl_uString** ppThis, rtl_uString* pStr, sal_Int32 nToken,
1900 sal_Unicode cTok, sal_Int32 nIndex) SAL_THROW_EXTERN_C()
1902 return rtl::str::getToken(ppThis, pStr, nToken, cTok, nIndex);
1905 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */