src/corelib/tools/qchar.cpp

   1 /****************************************************************************
   2 **
   3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
   4 ** Contact: Qt Software Information (qt-info@nokia.com)
   5 **
   6 ** This file is part of the QtCore module of the Qt Toolkit.
   7 **
   8 ** $QT_BEGIN_LICENSE:LGPL$
   9 ** No Commercial Usage
  10 ** This file contains pre-release code and may not be distributed.
  11 ** You may use this file in accordance with the terms and conditions
  12 ** contained in the either Technology Preview License Agreement or the
  13 ** Beta Release License Agreement.
  14 **
  15 ** GNU Lesser General Public License Usage
  16 ** Alternatively, this file may be used under the terms of the GNU Lesser
  17 ** General Public License version 2.1 as published by the Free Software
  18 ** Foundation and appearing in the file LICENSE.LGPL included in the
  19 ** packaging of this file.  Please review the following information to
  20 ** ensure the GNU Lesser General Public License version 2.1 requirements
  21 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
  22 **
  23 ** In addition, as a special exception, Nokia gives you certain
  24 ** additional rights. These rights are described in the Nokia Qt LGPL
  25 ** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
  26 ** package.
  27 **
  28 ** GNU General Public License Usage
  29 ** Alternatively, this file may be used under the terms of the GNU
  30 ** General Public License version 3.0 as published by the Free Software
  31 ** Foundation and appearing in the file LICENSE.GPL included in the
  32 ** packaging of this file.  Please review the following information to
  33 ** ensure the GNU General Public License version 3.0 requirements will be
  34 ** met: http://www.gnu.org/copyleft/gpl.html.
  35 **
  36 ** If you are unsure which license is appropriate for your use, please
  37 ** contact the sales department at qt-sales@nokia.com.
  38 ** $QT_END_LICENSE$
  39 **
  40 ****************************************************************************/
  41
  42 // Don't define it while compiling this module, or USERS of Qt will
  43 // not be able to link.
  44 #ifdef QT_NO_CAST_FROM_ASCII
  45 #undef QT_NO_CAST_FROM_ASCII
  46 #endif
  47 #ifdef QT_NO_CAST_TO_ASCII
  48 #undef QT_NO_CAST_TO_ASCII
  49 #endif
  50 #include "qchar.h"
  51 #include "qdatastream.h"
  52 #include "qtextcodec.h"
  53
  54 #include "qunicodetables_p.h"
  55
  56 #include "qunicodetables.cpp"
  57
  58 QT_BEGIN_NAMESPACE
  59
  60 #define LAST_UNICODE_CHAR 0x10ffff
  61
  62 #ifndef QT_NO_CODEC_FOR_C_STRINGS
  63 #ifdef QT_NO_TEXTCODEC
  64 #define QT_NO_CODEC_FOR_C_STRINGS
  65 #endif
  66 #endif
  67
  68 #define FLAG(x) (1 << (x))
  69
  70 /*! \class QLatin1Char
  71     \brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
  72
  73     \ingroup text
  74
  75     This class is only useful to avoid the codec for C strings business
  76     in the QChar(ch) constructor. You can avoid it by writing
  77     QChar(ch, 0).
  78
  79     \sa QChar, QLatin1String, QString
  80 */
  81
  82 /*!
  83     \fn const char QLatin1Char::toLatin1() const
  84
  85     Converts a Latin-1 character to an 8-bit ASCII representation of
  86     the character.
  87 */
  88
  89 /*!
  90     \fn const ushort QLatin1Char::unicode() const
  91
  92     Converts a Latin-1 character to an 16-bit-encoded Unicode representation
  93     of the character.
  94 */
  95
  96 /*!
  97     \fn QLatin1Char::QLatin1Char(char c)
  98
  99     Constructs a Latin-1 character for \a c. This constructor should be
 100     used when the encoding of the input character is known to be Latin-1.
 101 */
 102
 103 /*!
 104     \class QChar
 105     \brief The QChar class provides a 16-bit Unicode character.
 106
 107     \ingroup text
 108     \reentrant
 109
 110     In Qt, Unicode characters are 16-bit entities without any markup
 111     or structure. This class represents such an entity. It is
 112     lightweight, so it can be used everywhere. Most compilers treat
 113     it like a \c{unsigned short}.
 114
 115     QChar provides a full complement of testing/classification
 116     functions, converting to and from other formats, converting from
 117     composed to decomposed Unicode, and trying to compare and
 118     case-convert if you ask it to.
 119
 120     The classification functions include functions like those in the
 121     standard C++ header \<cctype\> (formerly \<ctype.h\>), but
 122     operating on the full range of Unicode characters. They all
 123     return true if the character is a certain type of character;
 124     otherwise they return false. These classification functions are
 125     isNull() (returns true if the character is '\\0'), isPrint()
 126     (true if the character is any sort of printable character,
 127     including whitespace), isPunct() (any sort of punctation),
 128     isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any
 129     sort of numeric character, not just 0-9), isLetterOrNumber(), and
 130     isDigit() (decimal digits). All of these are wrappers around
 131     category() which return the Unicode-defined category of each
 132     character.
 133
 134     QChar also provides direction(), which indicates the "natural"
 135     writing direction of this character. The joining() function
 136     indicates how the character joins with its neighbors (needed
 137     mostly for Arabic) and finally hasMirrored(), which indicates
 138     whether the character needs to be mirrored when it is printed in
 139     its "unnatural" writing direction.
 140
 141     Composed Unicode characters (like \aring) can be converted to
 142     decomposed Unicode ("a" followed by "ring above") by using
 143     decomposition().
 144
 145     In Unicode, comparison is not necessarily possible and case
 146     conversion is very difficult at best. Unicode, covering the
 147     "entire" world, also includes most of the world's case and
 148     sorting problems. operator==() and friends will do comparison
 149     based purely on the numeric Unicode value (code point) of the
 150     characters, and toUpper() and toLower() will do case changes when
 151     the character has a well-defined uppercase/lowercase equivalent.
 152     For locale-dependent comparisons, use
 153     QString::localeAwareCompare().
 154
 155     The conversion functions include unicode() (to a scalar),
 156     toLatin1() (to scalar, but converts all non-Latin-1 characters to
 157     0), row() (gives the Unicode row), cell() (gives the Unicode
 158     cell), digitValue() (gives the integer value of any of the
 159     numerous digit characters), and a host of constructors.
 160
 161     QChar provides constructors and cast operators that make it easy
 162     to convert to and from traditional 8-bit \c{char}s. If you
 163     defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as
 164     explained in the QString documentation, you will need to
 165     explicitly call fromAscii() or fromLatin1(), or use QLatin1Char,
 166     to construct a QChar from an 8-bit \c char, and you will need to
 167     call toAscii() or toLatin1() to get the 8-bit value back.
 168
 169     \sa QString, Unicode, QLatin1Char
 170 */
 171
 172 /*!
 173     \enum QChar::UnicodeVersion
 174
 175     Specifies which version of the \l{http://www.unicode.org/}{Unicode standard}
 176     introduced a certain character.
 177
 178     \value Unicode_1_1  Version 1.1
 179     \value Unicode_2_0  Version 2.0
 180     \value Unicode_2_1_2  Version 2.1.2
 181     \value Unicode_3_0  Version 3.0
 182     \value Unicode_3_1  Version 3.1
 183     \value Unicode_3_2  Version 3.2
 184     \value Unicode_4_0  Version 4.0
 185     \value Unicode_4_1  Version 4.1
 186     \value Unicode_5_0  Version 5.0
 187     \value Unicode_Unassigned  The value is not assigned to any character
 188         in version 5.0 of Unicode.
 189
 190     \sa unicodeVersion()
 191 */
 192
 193 /*!
 194     \enum QChar::Category
 195
 196     This enum maps the Unicode character categories.
 197
 198     The following characters are normative in Unicode:
 199
 200     \value Mark_NonSpacing  Unicode class name Mn
 201
 202     \value Mark_SpacingCombining  Unicode class name Mc
 203
 204     \value Mark_Enclosing  Unicode class name Me
 205
 206     \value Number_DecimalDigit  Unicode class name Nd
 207
 208     \value Number_Letter  Unicode class name Nl
 209
 210     \value Number_Other  Unicode class name No
 211
 212     \value Separator_Space  Unicode class name Zs
 213
 214     \value Separator_Line  Unicode class name Zl
 215
 216     \value Separator_Paragraph  Unicode class name Zp
 217
 218     \value Other_Control  Unicode class name Cc
 219
 220     \value Other_Format  Unicode class name Cf
 221
 222     \value Other_Surrogate  Unicode class name Cs
 223
 224     \value Other_PrivateUse  Unicode class name Co
 225
 226     \value Other_NotAssigned  Unicode class name Cn
 227
 228
 229     The following categories are informative in Unicode:
 230
 231     \value Letter_Uppercase  Unicode class name Lu
 232
 233     \value Letter_Lowercase  Unicode class name Ll
 234
 235     \value Letter_Titlecase  Unicode class name Lt
 236
 237     \value Letter_Modifier  Unicode class name Lm
 238
 239     \value Letter_Other Unicode class name Lo
 240
 241     \value Punctuation_Connector  Unicode class name Pc
 242
 243     \value Punctuation_Dash  Unicode class name Pd
 244
 245     \value Punctuation_Open  Unicode class name Ps
 246
 247     \value Punctuation_Close  Unicode class name Pe
 248
 249     \value Punctuation_InitialQuote  Unicode class name Pi
 250
 251     \value Punctuation_FinalQuote  Unicode class name Pf
 252
 253     \value Punctuation_Other  Unicode class name Po
 254
 255     \value Symbol_Math  Unicode class name Sm
 256
 257     \value Symbol_Currency  Unicode class name Sc
 258
 259     \value Symbol_Modifier  Unicode class name Sk
 260
 261     \value Symbol_Other  Unicode class name So
 262
 263     \value NoCategory  Qt cannot find an appropriate category for the character.
 264
 265     \omitvalue Punctuation_Dask
 266
 267     \sa category()
 268 */
 269
 270 /*!
 271     \enum QChar::Direction
 272
 273     This enum type defines the Unicode direction attributes. See the
 274     \l{http://www.unicode.org/}{Unicode Standard} for a description
 275     of the values.
 276
 277     In order to conform to C/C++ naming conventions "Dir" is prepended
 278     to the codes used in the Unicode Standard.
 279
 280     \value DirAL
 281     \value DirAN
 282     \value DirB
 283     \value DirBN
 284     \value DirCS
 285     \value DirEN
 286     \value DirES
 287     \value DirET
 288     \value DirL
 289     \value DirLRE
 290     \value DirLRO
 291     \value DirNSM
 292     \value DirON
 293     \value DirPDF
 294     \value DirR
 295     \value DirRLE
 296     \value DirRLO
 297     \value DirS
 298     \value DirWS
 299
 300     \sa direction()
 301 */
 302
 303 /*!
 304     \enum QChar::Decomposition
 305
 306     This enum type defines the Unicode decomposition attributes. See
 307     the \l{http://www.unicode.org/}{Unicode Standard} for a
 308     description of the values.
 309
 310     \value NoDecomposition
 311     \value Canonical
 312     \value Circle
 313     \value Compat
 314     \value Final
 315     \value Font
 316     \value Fraction
 317     \value Initial
 318     \value Isolated
 319     \value Medial
 320     \value Narrow
 321     \value NoBreak
 322     \value Small
 323     \value Square
 324     \value Sub
 325     \value Super
 326     \value Vertical
 327     \value Wide
 328
 329     \omitvalue Single
 330
 331     \sa decomposition()
 332 */
 333
 334 /*!
 335     \enum QChar::Joining
 336
 337     This enum type defines the Unicode joining attributes. See the
 338     \l{http://www.unicode.org/}{Unicode Standard} for a description
 339     of the values.
 340
 341     \value Center
 342     \value Dual
 343     \value OtherJoining
 344     \value Right
 345
 346     \sa joining()
 347 */
 348
 349 /*!
 350     \enum QChar::CombiningClass
 351
 352     \internal
 353
 354     This enum type defines names for some of the Unicode combining
 355     classes. See the \l{http://www.unicode.org/}{Unicode Standard}
 356     for a description of the values.
 357
 358     \value Combining_Above
 359     \value Combining_AboveAttached
 360     \value Combining_AboveLeft
 361     \value Combining_AboveLeftAttached
 362     \value Combining_AboveRight
 363     \value Combining_AboveRightAttached
 364     \value Combining_Below
 365     \value Combining_BelowAttached
 366     \value Combining_BelowLeft
 367     \value Combining_BelowLeftAttached
 368     \value Combining_BelowRight
 369     \value Combining_BelowRightAttached
 370     \value Combining_DoubleAbove
 371     \value Combining_DoubleBelow
 372     \value Combining_IotaSubscript
 373     \value Combining_Left
 374     \value Combining_LeftAttached
 375     \value Combining_Right
 376     \value Combining_RightAttached
 377 */
 378
 379 /*!
 380     \enum QChar::SpecialCharacter
 381
 382     \value Null A QChar with this value isNull().
 383     \value Nbsp Non-breaking space.
 384     \value ReplacementCharacter
 385     \value ObjectReplacementCharacter The character shown when a font has no glyph for a certain codepoint. The square character is normally used.
 386     \value ByteOrderMark
 387     \value ByteOrderSwapped
 388     \value ParagraphSeparator
 389     \value LineSeparator
 390
 391     \omitvalue null
 392     \omitvalue replacement
 393     \omitvalue byteOrderMark
 394     \omitvalue byteOrderSwapped
 395     \omitvalue nbsp
 396 */
 397
 398 /*!
 399     \fn void QChar::setCell(uchar cell)
 400     \internal
 401 */
 402
 403 /*!
 404     \fn void QChar::setRow(uchar row)
 405     \internal
 406 */
 407
 408 /*!
 409     \fn QChar::QChar()
 410
 411     Constructs a null QChar ('\\0').
 412
 413     \sa isNull()
 414 */
 415
 416 /*!
 417     \fn QChar::QChar(QLatin1Char ch)
 418
 419     Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
 420 */
 421
 422 /*!
 423     \fn QChar::QChar(SpecialCharacter ch)
 424
 425     Constructs a QChar for the predefined character value \a ch.
 426 */
 427
 428 /*!
 429     Constructs a QChar corresponding to ASCII/Latin-1 character \a
 430     ch.
 431 */
 432 QChar::QChar(char ch)
 433 {
 434 #ifndef QT_NO_CODEC_FOR_C_STRINGS
 435     if (QTextCodec::codecForCStrings())
 436         // #####
 437         ucs =  QTextCodec::codecForCStrings()->toUnicode(&ch, 1).at(0).unicode();
 438     else
 439 #endif
 440         ucs = uchar(ch);
 441 }
 442
 443 /*!
 444     Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
 445 */
 446 QChar::QChar(uchar ch)
 447 {
 448 #ifndef QT_NO_CODEC_FOR_C_STRINGS
 449     if (QTextCodec::codecForCStrings()) {
 450         // #####
 451         char c = char(ch);
 452         ucs =  QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
 453     } else
 454 #endif
 455         ucs = ch;
 456 }
 457
 458 /*!
 459     \fn QChar::QChar(uchar cell, uchar row)
 460
 461     Constructs a QChar for Unicode cell \a cell in row \a row.
 462
 463     \sa cell(), row()
 464 */
 465
 466 /*!
 467     \fn QChar::QChar(ushort code)
 468
 469     Constructs a QChar for the character with Unicode code point \a
 470     code.
 471 */
 472
 473
 474 /*!
 475     \fn QChar::QChar(short code)
 476
 477     Constructs a QChar for the character with Unicode code point \a
 478     code.
 479 */
 480
 481
 482 /*!
 483     \fn QChar::QChar(uint code)
 484
 485     Constructs a QChar for the character with Unicode code point \a
 486     code.
 487 */
 488
 489
 490 /*!
 491     \fn QChar::QChar(int code)
 492
 493     Constructs a QChar for the character with Unicode code point \a
 494     code.
 495 */
 496
 497
 498 /*!
 499     \fn bool QChar::isNull() const
 500
 501     Returns true if the character is the Unicode character 0x0000
 502     ('\\0'); otherwise returns false.
 503 */
 504
 505 /*!
 506     \fn uchar QChar::cell() const
 507
 508     Returns the cell (least significant byte) of the Unicode
 509     character.
 510
 511     \sa row()
 512 */
 513
 514 /*!
 515     \fn uchar QChar::row() const
 516
 517     Returns the row (most significant byte) of the Unicode character.
 518
 519     \sa cell()
 520 */
 521
 522 /*!
 523     Returns true if the character is a printable character; otherwise
 524     returns false. This is any character not of category Cc or Cn.
 525
 526     Note that this gives no indication of whether the character is
 527     available in a particular font.
 528 */
 529 bool QChar::isPrint() const
 530 {
 531     const int test = FLAG(Other_Control) |
 532                      FLAG(Other_NotAssigned);
 533     return !(FLAG(qGetProp(ucs)->category) & test);
 534 }
 535
 536 /*!
 537     Returns true if the character is a separator character
 538     (Separator_* categories); otherwise returns false.
 539 */
 540 bool QChar::isSpace() const
 541 {
 542     if(ucs >= 9 && ucs <=13)
 543         return true;
 544     const int test = FLAG(Separator_Space) |
 545                      FLAG(Separator_Line) |
 546                      FLAG(Separator_Paragraph);
 547     return FLAG(qGetProp(ucs)->category) & test;
 548 }
 549
 550 /*!
 551     Returns true if the character is a mark (Mark_* categories);
 552     otherwise returns false.
 553
 554     See QChar::Category for more information regarding marks.
 555 */
 556 bool QChar::isMark() const
 557 {
 558     const int test = FLAG(Mark_NonSpacing) |
 559                      FLAG(Mark_SpacingCombining) |
 560                      FLAG(Mark_Enclosing);
 561     return FLAG(qGetProp(ucs)->category) & test;
 562 }
 563
 564 /*!
 565     Returns true if the character is a punctuation mark (Punctuation_*
 566     categories); otherwise returns false.
 567 */
 568 bool QChar::isPunct() const
 569 {
 570     const int test = FLAG(Punctuation_Connector) |
 571                      FLAG(Punctuation_Dash) |
 572                      FLAG(Punctuation_Open) |
 573                      FLAG(Punctuation_Close) |
 574                      FLAG(Punctuation_InitialQuote) |
 575                      FLAG(Punctuation_FinalQuote) |
 576                      FLAG(Punctuation_Other);
 577     return FLAG(qGetProp(ucs)->category) & test;
 578 }
 579
 580 /*!
 581     Returns true if the character is a letter (Letter_* categories);
 582     otherwise returns false.
 583 */
 584 bool QChar::isLetter() const
 585 {
 586     const int test = FLAG(Letter_Uppercase) |
 587                      FLAG(Letter_Lowercase) |
 588                      FLAG(Letter_Titlecase) |
 589                      FLAG(Letter_Modifier) |
 590                      FLAG(Letter_Other);
 591     return FLAG(qGetProp(ucs)->category) & test;
 592 }
 593
 594 /*!
 595     Returns true if the character is a number (Number_* categories,
 596     not just 0-9); otherwise returns false.
 597
 598     \sa isDigit()
 599 */
 600 bool QChar::isNumber() const
 601 {
 602     const int test = FLAG(Number_DecimalDigit) |
 603                      FLAG(Number_Letter) |
 604                      FLAG(Number_Other);
 605     return FLAG(qGetProp(ucs)->category) & test;
 606 }
 607
 608 /*!
 609     Returns true if the character is a letter or number (Letter_* or
 610     Number_* categories); otherwise returns false.
 611 */
 612 bool QChar::isLetterOrNumber() const
 613 {
 614     const int test = FLAG(Letter_Uppercase) |
 615                      FLAG(Letter_Lowercase) |
 616                      FLAG(Letter_Titlecase) |
 617                      FLAG(Letter_Modifier) |
 618                      FLAG(Letter_Other) |
 619                      FLAG(Number_DecimalDigit) |
 620                      FLAG(Number_Letter) |
 621                      FLAG(Number_Other);
 622     return FLAG(qGetProp(ucs)->category) & test;
 623 }
 624
 625
 626 /*!
 627     Returns true if the character is a decimal digit
 628     (Number_DecimalDigit); otherwise returns false.
 629 */
 630 bool QChar::isDigit() const
 631 {
 632     return (qGetProp(ucs)->category == Number_DecimalDigit);
 633 }
 634
 635
 636 /*!
 637     Returns true if the character is a symbol (Symbol_* categories);
 638     otherwise returns false.
 639 */
 640 bool QChar::isSymbol() const
 641 {
 642     const int test = FLAG(Symbol_Math) |
 643                      FLAG(Symbol_Currency) |
 644                      FLAG(Symbol_Modifier) |
 645                      FLAG(Symbol_Other);
 646     return FLAG(qGetProp(ucs)->category) & test;
 647 }
 648
 649 /*!
 650   \fn bool QChar::isHighSurrogate() const
 651
 652   Returns true if the QChar is the high part of a utf16 surrogate
 653   (ie. if its code point is between 0xd800 and 0xdbff).
 654 */
 655
 656 /*!
 657   \fn bool QChar::isLowSurrogate() const
 658
 659   Returns true if the QChar is the low part of a utf16 surrogate
 660   (ie. if its code point is between 0xdc00 and 0xdfff).
 661 */
 662
 663 /*!
 664   \fn static uint QChar::surrogateToUcs4(ushort high, ushort low)
 665
 666   Converts a UTF16 surrogate pair with the given \a high and \a low values
 667   to its UCS-4 code point.
 668 */
 669
 670 /*!
 671   \fn static uint QChar::surrogateToUcs4(QChar high, QChar low)
 672
 673   Converts a utf16 surrogate pair (\a high, \a low) to its ucs4 code
 674   point.
 675 */
 676
 677 /*!
 678   \fn static ushort QChar::highSurrogate(uint ucs4)
 679
 680   Returns the high surrogate value of a ucs4 code point.
 681   The returned result is undefined if \a ucs4 is smaller than 0x10000.
 682 */
 683
 684 /*!
 685   \fn static ushort QChar::lowSurrogate(uint ucs4)
 686
 687   Returns the low surrogate value of a ucs4 code point.
 688   The returned result is undefined if \a ucs4 is smaller than 0x10000.
 689 */
 690
 691 /*!
 692     Returns the numeric value of the digit, or -1 if the character is
 693     not a digit.
 694 */
 695 int QChar::digitValue() const
 696 {
 697     return qGetProp(ucs)->digitValue;
 698 }
 699
 700 /*!
 701     \overload
 702     Returns the numeric value of the digit, specified by the UCS-2-encoded
 703     character, \a ucs2, or -1 if the character is not a digit.
 704 */
 705 int QChar::digitValue(ushort ucs2)
 706 {
 707     return qGetProp(ucs2)->digitValue;
 708 }
 709
 710 /*!
 711     \overload
 712     Returns the numeric value of the digit specified by the UCS-4-encoded
 713     character, \a ucs4, or -1 if the character is not a digit.
 714 */
 715 int QChar::digitValue(uint ucs4)
 716 {
 717     if (ucs4 > LAST_UNICODE_CHAR)
 718         return 0;
 719     return qGetProp(ucs4)->digitValue;
 720 }
 721
 722 /*!
 723     Returns the character's category.
 724 */
 725 QChar::Category QChar::category() const
 726 {
 727     return (QChar::Category) qGetProp(ucs)->category;
 728 }
 729
 730 /*!
 731     \overload
 732     \since 4.3
 733     Returns the category of the UCS-4-encoded character specified by \a ucs4.
 734  */
 735 QChar::Category QChar::category(uint ucs4)
 736 {
 737     if (ucs4 > LAST_UNICODE_CHAR)
 738         return QChar::NoCategory;
 739     return (QChar::Category) qGetProp(ucs4)->category;
 740 }
 741
 742 /*!
 743     \overload
 744     Returns the category of the UCS-2-encoded character specified by \a ucs2.
 745  */
 746 QChar::Category QChar::category(ushort ucs2)
 747 {
 748     return (QChar::Category) qGetProp(ucs2)->category;
 749 }
 750
 751
 752 /*!
 753     Returns the character's direction.
 754 */
 755 QChar::Direction QChar::direction() const
 756 {
 757     return (QChar::Direction) qGetProp(ucs)->direction;
 758 }
 759
 760 /*!
 761 \overload
 762 Returns the direction of the UCS-4-encoded character specified by \a ucs4.
 763  */
 764 QChar::Direction QChar::direction(uint ucs4)
 765 {
 766     if (ucs4 > LAST_UNICODE_CHAR)
 767         return QChar::DirL;
 768     return (QChar::Direction) qGetProp(ucs4)->direction;
 769 }
 770
 771 /*!
 772 \overload
 773 Returns the direction of the UCS-2-encoded character specified by \a ucs2.
 774  */
 775 QChar::Direction QChar::direction(ushort ucs2)
 776 {
 777     return (QChar::Direction) qGetProp(ucs2)->direction;
 778 }
 779
 780 /*!
 781     Returns information about the joining properties of the character
 782     (needed for certain languages such as Arabic).
 783 */
 784 QChar::Joining QChar::joining() const
 785 {
 786     return (QChar::Joining) qGetProp(ucs)->joining;
 787 }
 788
 789 /*!
 790 \overload
 791 Returns information about the joining properties of the UCS-4-encoded
 792 character specified by \a ucs4 (needed for certain languages such as
 793 Arabic).
 794  */
 795 QChar::Joining QChar::joining(uint ucs4)
 796 {
 797     if (ucs4 > LAST_UNICODE_CHAR)
 798         return QChar::OtherJoining;
 799     return (QChar::Joining) qGetProp(ucs4)->joining;
 800 }
 801
 802 /*!
 803 \overload
 804 Returns information about the joining properties of the UCS-2-encoded
 805 character specified by \a ucs2 (needed for certain languages such as
 806 Arabic).
 807  */
 808 QChar::Joining QChar::joining(ushort ucs2)
 809 {
 810     return (QChar::Joining) qGetProp(ucs2)->joining;
 811 }
 812
 813
 814 /*!
 815     Returns true if the character should be reversed if the text
 816     direction is reversed; otherwise returns false.
 817
 818     Same as (ch.mirroredChar() != ch).
 819
 820     \sa mirroredChar()
 821 */
 822 bool QChar::hasMirrored() const
 823 {
 824     return qGetProp(ucs)->mirrorDiff != 0;
 825 }
 826
 827 /*!
 828     \fn bool QChar::isLower() const
 829
 830     Returns true if the character is a lowercase letter, i.e.
 831     category() is Letter_Lowercase.
 832
 833     \sa isUpper(), toLower(), toUpper()
 834 */
 835
 836 /*!
 837     \fn bool QChar::isUpper() const
 838
 839     Returns true if the character is an uppercase letter, i.e.
 840     category() is Letter_Uppercase.
 841
 842     \sa isLower(), toUpper(), toLower()
 843 */
 844
 845 /*!
 846     \fn bool QChar::isTitleCase() const
 847     \since 4.3
 848
 849     Returns true if the character is a titlecase letter, i.e.
 850     category() is Letter_Titlecase.
 851
 852     \sa isLower(), toUpper(), toLower(), toTitleCase()
 853 */
 854
 855 /*!
 856     Returns the mirrored character if this character is a mirrored
 857     character; otherwise returns the character itself.
 858
 859     \sa hasMirrored()
 860 */
 861 QChar QChar::mirroredChar() const
 862 {
 863     return ucs + qGetProp(ucs)->mirrorDiff;
 864 }
 865
 866 /*! \overload
 867 Returns the mirrored character if the UCS-4-encoded character specified
 868 by \a ucs4 is a mirrored character; otherwise returns the character itself.
 869
 870 \sa hasMirrored()
 871  */
 872 uint QChar::mirroredChar(uint ucs4)
 873 {
 874     if (ucs4 > LAST_UNICODE_CHAR)
 875         return ucs4;
 876     return ucs4 + qGetProp(ucs4)->mirrorDiff;
 877 }
 878
 879 /*!
 880 \overload
 881 Returns the mirrored character if the UCS-2-encoded character specified
 882 by \a ucs2 is a mirrored character; otherwise returns the character itself.
 883
 884 \sa hasMirrored()
 885  */
 886 ushort QChar::mirroredChar(ushort ucs2)
 887 {
 888     return ucs2 + qGetProp(ucs2)->mirrorDiff;
 889 }
 890
 891
 892 enum {
 893     Hangul_SBase = 0xac00,
 894     Hangul_LBase = 0x1100,
 895     Hangul_VBase = 0x1161,
 896     Hangul_TBase = 0x11a7,
 897     Hangul_SCount = 11172,
 898     Hangul_LCount = 19,
 899     Hangul_VCount = 21,
 900     Hangul_TCount = 28,
 901     Hangul_NCount = 21*28
 902 };
 903
 904 // buffer has to have a length of 3. It's needed for Hangul decomposition
 905 static const unsigned short * QT_FASTCALL decompositionHelper
 906     (uint ucs4, int *length, int *tag, unsigned short *buffer)
 907 {
 908     *length = 0;
 909     if (ucs4 > LAST_UNICODE_CHAR)
 910         return 0;
 911     if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
 912         int SIndex = ucs4 - Hangul_SBase;
 913         buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L
 914         buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V
 915         buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T
 916         *length = buffer[2] == Hangul_TBase ? 2 : 3;
 917         *tag = QChar::Canonical;
 918         return buffer;
 919     }
 920
 921     const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
 922     if (index == 0xffff)
 923         return 0;
 924     const unsigned short *decomposition = uc_decomposition_map+index;
 925     *tag = (*decomposition) & 0xff;
 926     *length = (*decomposition) >> 8;
 927     return decomposition+1;
 928 }
 929
 930 /*!
 931     Decomposes a character into its parts. Returns an empty string if
 932     no decomposition exists.
 933 */
 934 QString QChar::decomposition() const
 935 {
 936     return decomposition(ucs);
 937 }
 938
 939 /*!
 940 \overload
 941 Decomposes the UCS-4-encoded character specified by \a ucs4 into its
 942 constituent parts. Returns an empty string if no decomposition exists.
 943  */
 944 QString QChar::decomposition(uint ucs4)
 945 {
 946     unsigned short buffer[3];
 947     int length;
 948     int tag;
 949     const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
 950     return QString::fromUtf16(d, length);
 951 }
 952
 953 /*!
 954     Returns the tag defining the composition of the character. Returns
 955     QChar::Single if no decomposition exists.
 956 */
 957 QChar::Decomposition QChar::decompositionTag() const
 958 {
 959     return decompositionTag(ucs);
 960 }
 961
 962 /*!
 963 \overload
 964 Returns the tag defining the composition of the UCS-4-encoded character
 965 specified by \a ucs4. Returns QChar::Single if no decomposition exists.
 966  */
 967 QChar::Decomposition QChar::decompositionTag(uint ucs4)
 968 {
 969     if (ucs4 > LAST_UNICODE_CHAR)
 970         return QChar::NoDecomposition;
 971     const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
 972     if (index == 0xffff)
 973         return QChar::NoDecomposition;
 974     return (QChar::Decomposition)(uc_decomposition_map[index] & 0xff);
 975 }
 976
 977 /*!
 978     Returns the combining class for the character as defined in the
 979     Unicode standard. This is mainly useful as a positioning hint for
 980     marks attached to a base character.
 981
 982     The Qt text rendering engine uses this information to correctly
 983     position non-spacing marks around a base character.
 984 */
 985 unsigned char QChar::combiningClass() const
 986 {
 987     return (unsigned char) qGetProp(ucs)->combiningClass;
 988 }
 989
 990 /*! \overload
 991 Returns the combining class for the UCS-4-encoded character specified by
 992 \a ucs4, as defined in the Unicode standard.
 993  */
 994 unsigned char QChar::combiningClass(uint ucs4)
 995 {
 996     if (ucs4 > LAST_UNICODE_CHAR)
 997         return 0;
 998     return (unsigned char) qGetProp(ucs4)->combiningClass;
 999 }
1000
1001 /*! \overload
1002 Returns the combining class for the UCS-2-encoded character specified by
1003 \a ucs2, as defined in the Unicode standard.
1004  */
1005 unsigned char QChar::combiningClass(ushort ucs2)
1006 {
1007     return (unsigned char) qGetProp(ucs2)->combiningClass;
1008 }
1009
1010
1011 /*!
1012     Returns the Unicode version that introduced this character.
1013 */
1014 QChar::UnicodeVersion QChar::unicodeVersion() const
1015 {
1016     return (QChar::UnicodeVersion) qGetProp(ucs)->unicodeVersion;
1017 }
1018
1019 /*! \overload
1020 Returns the Unicode version that introduced the character specified in
1021 its UCS-4-encoded form as \a ucs4.
1022  */
1023 QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4)
1024 {
1025     if (ucs4 > LAST_UNICODE_CHAR)
1026         return QChar::Unicode_Unassigned;
1027     return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion;
1028 }
1029
1030 /*! \overload
1031 Returns the Unicode version that introduced the character specified in
1032 its UCS-2-encoded form as \a ucs2.
1033  */
1034 QChar::UnicodeVersion QChar::unicodeVersion(ushort ucs2)
1035 {
1036     return (QChar::UnicodeVersion) qGetProp(ucs2)->unicodeVersion;
1037 }
1038
1039
1040 /*!
1041     Returns the lowercase equivalent if the character is uppercase or titlecase;
1042     otherwise returns the character itself.
1043 */
1044 QChar QChar::toLower() const
1045 {
1046     const QUnicodeTables::Properties *p = qGetProp(ucs);
1047     if (!p->lowerCaseSpecial)
1048         return ucs + p->lowerCaseDiff;
1049     return ucs;
1050 }
1051
1052 /*! \overload
1053 Returns the lowercase equivalent of the UCS-4-encoded character specified
1054 by \a ucs4 if the character is uppercase or titlecase; otherwise returns
1055 the character itself.
1056  */
1057 uint QChar::toLower(uint ucs4)
1058 {
1059     if (ucs4 > LAST_UNICODE_CHAR)
1060         return ucs4;
1061     const QUnicodeTables::Properties *p = qGetProp(ucs4);
1062     if (!p->lowerCaseSpecial)
1063         return ucs4 + p->lowerCaseDiff;
1064     return ucs4;
1065 }
1066
1067 /*! \overload
1068 Returns the lowercase equivalent of the UCS-2-encoded character specified
1069 by \a ucs2 if the character is uppercase or titlecase; otherwise returns
1070 the character itself.
1071  */
1072 ushort QChar::toLower(ushort ucs2)
1073 {
1074     const QUnicodeTables::Properties *p = qGetProp(ucs2);
1075     if (!p->lowerCaseSpecial)
1076         return ucs2 + p->lowerCaseDiff;
1077     return ucs2;
1078 }
1079
1080 /*!
1081     Returns the uppercase equivalent if the character is lowercase or titlecase;
1082     otherwise returns the character itself.
1083 */
1084 QChar QChar::toUpper() const
1085 {
1086     const QUnicodeTables::Properties *p = qGetProp(ucs);
1087     if (!p->upperCaseSpecial)
1088         return ucs + p->upperCaseDiff;
1089     return ucs;
1090 }
1091
1092 /*! \overload
1093 Returns the uppercase equivalent of the UCS-4-encoded character specified
1094 by \a ucs4 if the character is lowercase or titlecase; otherwise returns
1095 the character itself.
1096  */
1097 uint QChar::toUpper(uint ucs4)
1098 {
1099     if (ucs4 > LAST_UNICODE_CHAR)
1100         return ucs4;
1101     const QUnicodeTables::Properties *p = qGetProp(ucs4);
1102     if (!p->upperCaseSpecial)
1103         return ucs4 + p->upperCaseDiff;
1104     return ucs4;
1105 }
1106
1107 /*! \overload
1108 Returns the uppercase equivalent of the UCS-2-encoded character specified
1109 by \a ucs2 if the character is lowercase or titlecase; otherwise returns
1110 the character itself.
1111  */
1112 ushort QChar::toUpper(ushort ucs2)
1113 {
1114     const QUnicodeTables::Properties *p = qGetProp(ucs2);
1115     if (!p->upperCaseSpecial)
1116         return ucs2 + p->upperCaseDiff;
1117     return ucs2;
1118 }
1119
1120 /*!
1121     Returns the title case equivalent if the character is lowercase or uppercase;
1122     otherwise returns the character itself.
1123 */
1124 QChar QChar::toTitleCase() const
1125 {
1126     const QUnicodeTables::Properties *p = qGetProp(ucs);
1127     if (!p->titleCaseSpecial)
1128         return ucs + p->titleCaseDiff;
1129     return ucs;
1130 }
1131
1132 /*!
1133     \overload
1134     Returns the title case equivalent of the UCS-4-encoded character specified
1135     by \a ucs4 if the character is lowercase or uppercase; otherwise returns
1136     the character itself.
1137 */
1138 uint QChar::toTitleCase(uint ucs4)
1139 {
1140     if (ucs4 > LAST_UNICODE_CHAR)
1141         return ucs4;
1142     const QUnicodeTables::Properties *p = qGetProp(ucs4);
1143     if (!p->titleCaseSpecial)
1144         return ucs4 + p->titleCaseDiff;
1145     return ucs4;
1146 }
1147
1148 /*!
1149     \overload
1150     Returns the title case equivalent of the UCS-2-encoded character specified
1151     by \a ucs2 if the character is lowercase or uppercase; otherwise returns
1152     the character itself.
1153 */
1154 ushort QChar::toTitleCase(ushort ucs2)
1155 {
1156     const QUnicodeTables::Properties *p = qGetProp(ucs2);
1157     if (!p->titleCaseSpecial)
1158         return ucs2 + p->titleCaseDiff;
1159     return ucs2;
1160 }
1161
1162
1163 static inline uint foldCase(const ushort *ch, const ushort *start)
1164 {
1165     uint c = *ch;
1166     if (QChar(c).isLowSurrogate() && ch > start && QChar(*(ch - 1)).isHighSurrogate())
1167         c = QChar::surrogateToUcs4(*(ch - 1), c);
1168     return *ch + qGetProp(c)->caseFoldDiff;
1169 }
1170
1171 static inline uint foldCase(uint ch, uint &last)
1172 {
1173     uint c = ch;
1174     if (QChar(c).isLowSurrogate() && QChar(last).isHighSurrogate())
1175         c = QChar::surrogateToUcs4(last, c);
1176     last = ch;
1177     return ch + qGetProp(c)->caseFoldDiff;
1178 }
1179
1180 static inline ushort foldCase(ushort ch)
1181 {
1182     return ch + qGetProp(ch)->caseFoldDiff;
1183 }
1184
1185 /*!
1186     Returns the case folded equivalent of the character. For most Unicode characters this
1187     is the same as toLowerCase().
1188 */
1189 QChar QChar::toCaseFolded() const
1190 {
1191     return ucs + qGetProp(ucs)->caseFoldDiff;
1192 }
1193
1194 /*!
1195     \overload
1196     Returns the case folded equivalent of the UCS-4-encoded character specified
1197     by \a ucs4. For most Unicode characters this is the same as toLowerCase().
1198 */
1199 uint QChar::toCaseFolded(uint ucs4)
1200 {
1201     if (ucs4 > LAST_UNICODE_CHAR)
1202         return ucs4;
1203     return ucs4 + qGetProp(ucs4)->caseFoldDiff;
1204 }
1205
1206 /*!
1207     \overload
1208     Returns the case folded equivalent of the UCS-2-encoded character specified
1209     by \a ucs2. For most Unicode characters this is the same as toLowerCase().
1210 */
1211 ushort QChar::toCaseFolded(ushort ucs2)
1212 {
1213     return ucs2 + qGetProp(ucs2)->caseFoldDiff;
1214 }
1215
1216
1217 /*!
1218     \fn char QChar::latin1() const
1219
1220     Use toLatin1() instead.
1221 */
1222
1223 /*!
1224     \fn char QChar::ascii() const
1225
1226     Use toAscii() instead.
1227 */
1228
1229 /*!
1230     \fn char QChar::toLatin1() const
1231
1232     Returns the Latin-1 character equivalent to the QChar, or 0. This
1233     is mainly useful for non-internationalized software.
1234
1235     \sa toAscii(), unicode(), QTextCodec::codecForCStrings()
1236 */
1237
1238 /*!
1239     \fn char QChar::toAscii() const
1240     Returns the character value of the QChar obtained using the current
1241     codec used to read C strings, or 0 if the character is not representable
1242     using this codec. The default codec handles Latin-1 encoded text,
1243     but this can be changed to assist developers writing source code using
1244     other encodings.
1245
1246     The main purpose of this function is to preserve ASCII characters used
1247     in C strings. This is mainly useful for developers of non-internationalized
1248     software.
1249
1250     \sa toLatin1(), unicode(), QTextCodec::codecForCStrings()
1251 */
1252 #ifdef Q_COMPILER_MANGLES_RETURN_TYPE
1253 const char QChar::toAscii() const
1254 #else
1255 char QChar::toAscii() const
1256 #endif
1257 {
1258 #ifndef QT_NO_CODEC_FOR_C_STRINGS
1259     if (QTextCodec::codecForCStrings())
1260         // #####
1261         return QTextCodec::codecForCStrings()->fromUnicode(QString(*this)).at(0);
1262 #endif
1263     return ucs > 0xff ? 0 : char(ucs);
1264 }
1265
1266 /*!
1267     \fn QChar QChar::fromLatin1(char c)
1268
1269     Converts the Latin-1 character \a c to its equivalent QChar. This
1270     is mainly useful for non-internationalized software.
1271
1272     \sa fromAscii(), unicode(), QTextCodec::codecForCStrings()
1273 */
1274
1275 /*!
1276     Converts the ASCII character \a c to its equivalent QChar. This
1277     is mainly useful for non-internationalized software.
1278
1279     An alternative is to use QLatin1Char.
1280
1281     \sa fromLatin1(), unicode(), QTextCodec::codecForCStrings()
1282 */
1283 QChar QChar::fromAscii(char c)
1284 {
1285 #ifndef QT_NO_CODEC_FOR_C_STRINGS
1286     if (QTextCodec::codecForCStrings())
1287         // #####
1288         return QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
1289 #endif
1290     return QChar(ushort((uchar)c));
1291 }
1292
1293 #ifndef QT_NO_DATASTREAM
1294 /*!
1295   \relates QChar
1296
1297   Writes the char \a chr to the stream \a out.
1298
1299   \sa {Format of the QDataStream operators}
1300  */
1301
1302 QDataStream &operator<<(QDataStream &out, const QChar &chr)
1303 {
1304     out << quint16(chr.unicode());
1305     return out;
1306 }
1307
1308
1309 /*!
1310   \relates QChar
1311
1312   Reads a char from the stream \a in into char \a chr.
1313
1314   \sa {Format of the QDataStream operators}
1315  */
1316
1317 QDataStream &operator>>(QDataStream &in, QChar &chr)
1318 {
1319     quint16 u;
1320     in >> u;
1321     chr.unicode() = ushort(u);
1322     return in;
1323 }
1324 #endif // QT_NO_DATASTREAM
1325
1326 /*!
1327     \fn ushort & QChar::unicode()
1328
1329     Returns a reference to the numeric Unicode value of the QChar.
1330 */
1331
1332 /*!
1333     \fn ushort QChar::unicode() const
1334
1335     \overload
1336 */
1337
1338 /*****************************************************************************
1339   Documentation of QChar related functions
1340  *****************************************************************************/
1341
1342 /*!
1343     \fn bool operator==(QChar c1, QChar c2)
1344
1345     \relates QChar
1346
1347     Returns true if \a c1 and \a c2 are the same Unicode character;
1348     otherwise returns false.
1349 */
1350
1351 /*!
1352     \fn int operator!=(QChar c1, QChar c2)
1353
1354     \relates QChar
1355
1356     Returns true if \a c1 and \a c2 are not the same Unicode
1357     character; otherwise returns false.
1358 */
1359
1360 /*!
1361     \fn int operator<=(QChar c1, QChar c2)
1362
1363     \relates QChar
1364
1365     Returns true if the numeric Unicode value of \a c1 is less than
1366     or equal to that of \a c2; otherwise returns false.
1367 */
1368
1369 /*!
1370     \fn int operator>=(QChar c1, QChar c2)
1371
1372     \relates QChar
1373
1374     Returns true if the numeric Unicode value of \a c1 is greater than
1375     or equal to that of \a c2; otherwise returns false.
1376 */
1377
1378 /*!
1379     \fn int operator<(QChar c1, QChar c2)
1380
1381     \relates QChar
1382
1383     Returns true if the numeric Unicode value of \a c1 is less than
1384     that of \a c2; otherwise returns false.
1385 */
1386
1387 /*!
1388     \fn int operator>(QChar c1, QChar c2)
1389
1390     \relates QChar
1391
1392     Returns true if the numeric Unicode value of \a c1 is greater than
1393     that of \a c2; otherwise returns false.
1394 */
1395
1396 /*!
1397     \fn bool QChar::mirrored() const
1398
1399     Use hasMirrored() instead.
1400 */
1401
1402 /*!
1403     \fn QChar QChar::lower() const
1404
1405     Use toLower() instead.
1406 */
1407
1408 /*!
1409     \fn QChar QChar::upper() const
1410
1411     Use toUpper() instead.
1412 */
1413
1414 /*!
1415     \fn bool QChar::networkOrdered()
1416
1417     See if QSysInfo::ByteOrder == QSysInfo::BigEndian instead.
1418 */
1419
1420
1421 // ---------------------------------------------------------------------------
1422
1423
1424 static QString decomposeHelper
1425     (const QString &str, bool canonical, QChar::UnicodeVersion version)
1426 {
1427     unsigned short buffer[3];
1428
1429     QString s = str;
1430
1431     const unsigned short *utf16 = s.utf16();
1432     const unsigned short *uc = utf16 + s.length();
1433     while (uc != utf16) {
1434         uint ucs4 = *(--uc);
1435         if (QChar(ucs4).isLowSurrogate() && uc != utf16) {
1436             ushort high = *(uc - 1);
1437             if (QChar(high).isHighSurrogate()) {
1438                 --uc;
1439                 ucs4 = QChar::surrogateToUcs4(high, ucs4);
1440             }
1441         }
1442         if (QChar::unicodeVersion(ucs4) > version)
1443             continue;
1444         int length;
1445         int tag;
1446         const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
1447         if (!d || (canonical && tag != QChar::Canonical))
1448             continue;
1449
1450         s.replace(uc - utf16, ucs4 > 0x10000 ? 2 : 1, (const QChar *)d, length);
1451         // since the insert invalidates the pointers and we do decomposition recursive
1452         int pos = uc - utf16;
1453         utf16 = s.utf16();
1454         uc = utf16 + pos + length;
1455     }
1456
1457     return s;
1458 }
1459
1460
1461 static ushort ligatureHelper(ushort u1, ushort u2)
1462 {
1463     // hangul L-V pair
1464     int LIndex = u1 - Hangul_LBase;
1465     if (0 <= LIndex && LIndex < Hangul_LCount) {
1466         int VIndex = u2 - Hangul_VBase;
1467         if (0 <= VIndex && VIndex < Hangul_VCount)
1468             return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
1469     }
1470
1471     // hangul LV-T pair
1472     int SIndex = u1 - Hangul_SBase;
1473     if (0 <= SIndex && SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
1474         int TIndex = u2 - Hangul_TBase;
1475         if (0 <= TIndex && TIndex <= Hangul_TCount)
1476             return u1 + TIndex;
1477     }
1478
1479     const unsigned short index = GET_LIGATURE_INDEX(u2);
1480     if (index == 0xffff)
1481         return 0;
1482     const unsigned short *ligatures = uc_ligature_map+index;
1483     ushort length = *ligatures;
1484     ++ligatures;
1485     // ### use bsearch
1486     for (uint i = 0; i < length; ++i)
1487         if (ligatures[2*i] == u1)
1488             return ligatures[2*i+1];
1489     return 0;
1490 }
1491
1492 static QString composeHelper(const QString &str)
1493 {
1494     QString s = str;
1495
1496     if (s.length() < 2)
1497         return s;
1498
1499     // the loop can partly ignore high Unicode as all ligatures are in the BMP
1500     int starter = 0;
1501     int lastCombining = 0;
1502     int pos = 0;
1503     while (pos < s.length()) {
1504         uint uc = s.utf16()[pos];
1505         if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
1506             ushort low = s.utf16()[pos+1];
1507             if (QChar(low).isLowSurrogate()) {
1508                 uc = QChar::surrogateToUcs4(uc, low);
1509                 ++pos;
1510             }
1511         }
1512         int combining = QChar::combiningClass(uc);
1513         if (starter == pos - 1 || combining > lastCombining) {
1514             // allowed to form ligature with S
1515             QChar ligature = ligatureHelper(s.utf16()[starter], uc);
1516             if (ligature.unicode()) {
1517                 s[starter] = ligature;
1518                 s.remove(pos, 1);
1519                 continue;
1520             }
1521         }
1522         if (!combining)
1523             starter = pos;
1524         lastCombining = combining;
1525         ++pos;
1526     }
1527     return s;
1528 }
1529
1530
1531 static QString canonicalOrderHelper
1532     (const QString &str, QChar::UnicodeVersion version)
1533 {
1534     QString s = str;
1535     const int l = s.length()-1;
1536     int pos = 0;
1537     while (pos < l) {
1538         int p2 = pos+1;
1539         uint u1 = s.at(pos).unicode();
1540         if (QChar(u1).isHighSurrogate()) {
1541             ushort low = s.at(pos+1).unicode();
1542             if (QChar(low).isLowSurrogate()) {
1543                 p2++;
1544                 u1 = QChar::surrogateToUcs4(u1, low);
1545                 if (p2 >= l)
1546                     break;
1547             }
1548         }
1549         uint u2 = s.at(p2).unicode();
1550         if (QChar(u2).isHighSurrogate() && p2 < l-1) {
1551             ushort low = s.at(p2+1).unicode();
1552             if (QChar(low).isLowSurrogate()) {
1553                 p2++;
1554                 u2 = QChar::surrogateToUcs4(u2, low);
1555             }
1556         }
1557
1558         int c2 = QChar::combiningClass(u2);
1559         if (QChar::unicodeVersion(u2) > version)
1560             c2 = 0;
1561
1562         if (c2 == 0) {
1563             pos = p2+1;
1564             continue;
1565         }
1566         int c1 = QChar::combiningClass(u1);
1567         if (QChar::unicodeVersion(u1) > version)
1568             c1 = 0;
1569
1570         if (c1 > c2) {
1571             QChar *uc = s.data();
1572             int p = pos;
1573             // exchange characters
1574             if (u2 < 0x10000) {
1575                 uc[p++] = u2;
1576             } else {
1577                 uc[p++] = QChar::highSurrogate(u2);
1578                 uc[p++] = QChar::lowSurrogate(u2);
1579             }
1580             if (u1 < 0x10000) {
1581                 uc[p++] = u1;
1582             } else {
1583                 uc[p++] = QChar::highSurrogate(u1);
1584                 uc[p++] = QChar::lowSurrogate(u1);
1585             }
1586             if (pos > 0)
1587                 --pos;
1588             if (pos > 0 && s.at(pos).isLowSurrogate())
1589                 --pos;
1590         } else {
1591             ++pos;
1592             if (u1 > 0x10000)
1593                 ++pos;
1594         }
1595     }
1596     return s;
1597 }
1598
1599 int QT_FASTCALL QUnicodeTables::script(unsigned int uc)
1600 {
1601     if (uc > 0xffff)
1602         return Common;
1603     int script = uc_scripts[uc >> 7];
1604     if (script < ScriptSentinel)
1605         return script;
1606     script = (((script - ScriptSentinel) * UnicodeBlockSize) + UnicodeBlockCount);
1607     script = uc_scripts[script + (uc & 0x7f)];
1608     return script;
1609 }
1610
1611
1612 Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL QUnicodeTables::lineBreakClass(uint ucs4)
1613 {
1614     return (QUnicodeTables::LineBreakClass) qGetProp(ucs4)->line_break_class;
1615 }
1616
1617
1618 QT_END_NAMESPACE