1 /****************************************************************************
3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4 ** Contact: Qt Software Information (qt-info@nokia.com)
6 ** This file is part of the QtCore module of the Qt Toolkit.
8 ** $QT_BEGIN_LICENSE:LGPL$
10 ** This file contains pre-release code and may not be distributed.
11 ** You may use this file in accordance with the terms and conditions
12 ** contained in the either Technology Preview License Agreement or the
13 ** Beta Release License Agreement.
15 ** GNU Lesser General Public License Usage
16 ** Alternatively, this file may be used under the terms of the GNU Lesser
17 ** General Public License version 2.1 as published by the Free Software
18 ** Foundation and appearing in the file LICENSE.LGPL included in the
19 ** packaging of this file. Please review the following information to
20 ** ensure the GNU Lesser General Public License version 2.1 requirements
21 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23 ** In addition, as a special exception, Nokia gives you certain
24 ** additional rights. These rights are described in the Nokia Qt LGPL
25 ** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
28 ** GNU General Public License Usage
29 ** Alternatively, this file may be used under the terms of the GNU
30 ** General Public License version 3.0 as published by the Free Software
31 ** Foundation and appearing in the file LICENSE.GPL included in the
32 ** packaging of this file. Please review the following information to
33 ** ensure the GNU General Public License version 3.0 requirements will be
34 ** met: http://www.gnu.org/copyleft/gpl.html.
36 ** If you are unsure which license is appropriate for your use, please
37 ** contact the sales department at qt-sales@nokia.com.
40 ****************************************************************************/
42 // Don't define it while compiling this module, or USERS of Qt will
43 // not be able to link.
44 #ifdef QT_NO_CAST_FROM_ASCII
45 #undef QT_NO_CAST_FROM_ASCII
47 #ifdef QT_NO_CAST_TO_ASCII
48 #undef QT_NO_CAST_TO_ASCII
51 #include "qdatastream.h"
52 #include "qtextcodec.h"
54 #include "qunicodetables_p.h"
56 #include "qunicodetables.cpp"
60 #define LAST_UNICODE_CHAR 0x10ffff
62 #ifndef QT_NO_CODEC_FOR_C_STRINGS
63 #ifdef QT_NO_TEXTCODEC
64 #define QT_NO_CODEC_FOR_C_STRINGS
68 #define FLAG(x) (1 << (x))
70 /*! \class QLatin1Char
71 \brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
75 This class is only useful to avoid the codec for C strings business
76 in the QChar(ch) constructor. You can avoid it by writing
79 \sa QChar, QLatin1String, QString
83 \fn const char QLatin1Char::toLatin1() const
85 Converts a Latin-1 character to an 8-bit ASCII representation of
90 \fn const ushort QLatin1Char::unicode() const
92 Converts a Latin-1 character to an 16-bit-encoded Unicode representation
97 \fn QLatin1Char::QLatin1Char(char c)
99 Constructs a Latin-1 character for \a c. This constructor should be
100 used when the encoding of the input character is known to be Latin-1.
105 \brief The QChar class provides a 16-bit Unicode character.
110 In Qt, Unicode characters are 16-bit entities without any markup
111 or structure. This class represents such an entity. It is
112 lightweight, so it can be used everywhere. Most compilers treat
113 it like a \c{unsigned short}.
115 QChar provides a full complement of testing/classification
116 functions, converting to and from other formats, converting from
117 composed to decomposed Unicode, and trying to compare and
118 case-convert if you ask it to.
120 The classification functions include functions like those in the
121 standard C++ header \<cctype\> (formerly \<ctype.h\>), but
122 operating on the full range of Unicode characters. They all
123 return true if the character is a certain type of character;
124 otherwise they return false. These classification functions are
125 isNull() (returns true if the character is '\\0'), isPrint()
126 (true if the character is any sort of printable character,
127 including whitespace), isPunct() (any sort of punctation),
128 isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any
129 sort of numeric character, not just 0-9), isLetterOrNumber(), and
130 isDigit() (decimal digits). All of these are wrappers around
131 category() which return the Unicode-defined category of each
134 QChar also provides direction(), which indicates the "natural"
135 writing direction of this character. The joining() function
136 indicates how the character joins with its neighbors (needed
137 mostly for Arabic) and finally hasMirrored(), which indicates
138 whether the character needs to be mirrored when it is printed in
139 its "unnatural" writing direction.
141 Composed Unicode characters (like \aring) can be converted to
142 decomposed Unicode ("a" followed by "ring above") by using
145 In Unicode, comparison is not necessarily possible and case
146 conversion is very difficult at best. Unicode, covering the
147 "entire" world, also includes most of the world's case and
148 sorting problems. operator==() and friends will do comparison
149 based purely on the numeric Unicode value (code point) of the
150 characters, and toUpper() and toLower() will do case changes when
151 the character has a well-defined uppercase/lowercase equivalent.
152 For locale-dependent comparisons, use
153 QString::localeAwareCompare().
155 The conversion functions include unicode() (to a scalar),
156 toLatin1() (to scalar, but converts all non-Latin-1 characters to
157 0), row() (gives the Unicode row), cell() (gives the Unicode
158 cell), digitValue() (gives the integer value of any of the
159 numerous digit characters), and a host of constructors.
161 QChar provides constructors and cast operators that make it easy
162 to convert to and from traditional 8-bit \c{char}s. If you
163 defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as
164 explained in the QString documentation, you will need to
165 explicitly call fromAscii() or fromLatin1(), or use QLatin1Char,
166 to construct a QChar from an 8-bit \c char, and you will need to
167 call toAscii() or toLatin1() to get the 8-bit value back.
169 \sa QString, Unicode, QLatin1Char
173 \enum QChar::UnicodeVersion
175 Specifies which version of the \l{http://www.unicode.org/}{Unicode standard}
176 introduced a certain character.
178 \value Unicode_1_1 Version 1.1
179 \value Unicode_2_0 Version 2.0
180 \value Unicode_2_1_2 Version 2.1.2
181 \value Unicode_3_0 Version 3.0
182 \value Unicode_3_1 Version 3.1
183 \value Unicode_3_2 Version 3.2
184 \value Unicode_4_0 Version 4.0
185 \value Unicode_4_1 Version 4.1
186 \value Unicode_5_0 Version 5.0
187 \value Unicode_Unassigned The value is not assigned to any character
188 in version 5.0 of Unicode.
194 \enum QChar::Category
196 This enum maps the Unicode character categories.
198 The following characters are normative in Unicode:
200 \value Mark_NonSpacing Unicode class name Mn
202 \value Mark_SpacingCombining Unicode class name Mc
204 \value Mark_Enclosing Unicode class name Me
206 \value Number_DecimalDigit Unicode class name Nd
208 \value Number_Letter Unicode class name Nl
210 \value Number_Other Unicode class name No
212 \value Separator_Space Unicode class name Zs
214 \value Separator_Line Unicode class name Zl
216 \value Separator_Paragraph Unicode class name Zp
218 \value Other_Control Unicode class name Cc
220 \value Other_Format Unicode class name Cf
222 \value Other_Surrogate Unicode class name Cs
224 \value Other_PrivateUse Unicode class name Co
226 \value Other_NotAssigned Unicode class name Cn
229 The following categories are informative in Unicode:
231 \value Letter_Uppercase Unicode class name Lu
233 \value Letter_Lowercase Unicode class name Ll
235 \value Letter_Titlecase Unicode class name Lt
237 \value Letter_Modifier Unicode class name Lm
239 \value Letter_Other Unicode class name Lo
241 \value Punctuation_Connector Unicode class name Pc
243 \value Punctuation_Dash Unicode class name Pd
245 \value Punctuation_Open Unicode class name Ps
247 \value Punctuation_Close Unicode class name Pe
249 \value Punctuation_InitialQuote Unicode class name Pi
251 \value Punctuation_FinalQuote Unicode class name Pf
253 \value Punctuation_Other Unicode class name Po
255 \value Symbol_Math Unicode class name Sm
257 \value Symbol_Currency Unicode class name Sc
259 \value Symbol_Modifier Unicode class name Sk
261 \value Symbol_Other Unicode class name So
263 \value NoCategory Qt cannot find an appropriate category for the character.
265 \omitvalue Punctuation_Dask
271 \enum QChar::Direction
273 This enum type defines the Unicode direction attributes. See the
274 \l{http://www.unicode.org/}{Unicode Standard} for a description
277 In order to conform to C/C++ naming conventions "Dir" is prepended
278 to the codes used in the Unicode Standard.
304 \enum QChar::Decomposition
306 This enum type defines the Unicode decomposition attributes. See
307 the \l{http://www.unicode.org/}{Unicode Standard} for a
308 description of the values.
310 \value NoDecomposition
337 This enum type defines the Unicode joining attributes. See the
338 \l{http://www.unicode.org/}{Unicode Standard} for a description
350 \enum QChar::CombiningClass
354 This enum type defines names for some of the Unicode combining
355 classes. See the \l{http://www.unicode.org/}{Unicode Standard}
356 for a description of the values.
358 \value Combining_Above
359 \value Combining_AboveAttached
360 \value Combining_AboveLeft
361 \value Combining_AboveLeftAttached
362 \value Combining_AboveRight
363 \value Combining_AboveRightAttached
364 \value Combining_Below
365 \value Combining_BelowAttached
366 \value Combining_BelowLeft
367 \value Combining_BelowLeftAttached
368 \value Combining_BelowRight
369 \value Combining_BelowRightAttached
370 \value Combining_DoubleAbove
371 \value Combining_DoubleBelow
372 \value Combining_IotaSubscript
373 \value Combining_Left
374 \value Combining_LeftAttached
375 \value Combining_Right
376 \value Combining_RightAttached
380 \enum QChar::SpecialCharacter
382 \value Null A QChar with this value isNull().
383 \value Nbsp Non-breaking space.
384 \value ReplacementCharacter
385 \value ObjectReplacementCharacter The character shown when a font has no glyph for a certain codepoint. The square character is normally used.
387 \value ByteOrderSwapped
388 \value ParagraphSeparator
392 \omitvalue replacement
393 \omitvalue byteOrderMark
394 \omitvalue byteOrderSwapped
399 \fn void QChar::setCell(uchar cell)
404 \fn void QChar::setRow(uchar row)
411 Constructs a null QChar ('\\0').
417 \fn QChar::QChar(QLatin1Char ch)
419 Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
423 \fn QChar::QChar(SpecialCharacter ch)
425 Constructs a QChar for the predefined character value \a ch.
429 Constructs a QChar corresponding to ASCII/Latin-1 character \a
432 QChar::QChar(char ch
)
434 #ifndef QT_NO_CODEC_FOR_C_STRINGS
435 if (QTextCodec::codecForCStrings())
437 ucs
= QTextCodec::codecForCStrings()->toUnicode(&ch
, 1).at(0).unicode();
444 Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
446 QChar::QChar(uchar ch
)
448 #ifndef QT_NO_CODEC_FOR_C_STRINGS
449 if (QTextCodec::codecForCStrings()) {
452 ucs
= QTextCodec::codecForCStrings()->toUnicode(&c
, 1).at(0).unicode();
459 \fn QChar::QChar(uchar cell, uchar row)
461 Constructs a QChar for Unicode cell \a cell in row \a row.
467 \fn QChar::QChar(ushort code)
469 Constructs a QChar for the character with Unicode code point \a
475 \fn QChar::QChar(short code)
477 Constructs a QChar for the character with Unicode code point \a
483 \fn QChar::QChar(uint code)
485 Constructs a QChar for the character with Unicode code point \a
491 \fn QChar::QChar(int code)
493 Constructs a QChar for the character with Unicode code point \a
499 \fn bool QChar::isNull() const
501 Returns true if the character is the Unicode character 0x0000
502 ('\\0'); otherwise returns false.
506 \fn uchar QChar::cell() const
508 Returns the cell (least significant byte) of the Unicode
515 \fn uchar QChar::row() const
517 Returns the row (most significant byte) of the Unicode character.
523 Returns true if the character is a printable character; otherwise
524 returns false. This is any character not of category Cc or Cn.
526 Note that this gives no indication of whether the character is
527 available in a particular font.
529 bool QChar::isPrint() const
531 const int test
= FLAG(Other_Control
) |
532 FLAG(Other_NotAssigned
);
533 return !(FLAG(qGetProp(ucs
)->category
) & test
);
537 Returns true if the character is a separator character
538 (Separator_* categories); otherwise returns false.
540 bool QChar::isSpace() const
542 if(ucs
>= 9 && ucs
<=13)
544 const int test
= FLAG(Separator_Space
) |
545 FLAG(Separator_Line
) |
546 FLAG(Separator_Paragraph
);
547 return FLAG(qGetProp(ucs
)->category
) & test
;
551 Returns true if the character is a mark (Mark_* categories);
552 otherwise returns false.
554 See QChar::Category for more information regarding marks.
556 bool QChar::isMark() const
558 const int test
= FLAG(Mark_NonSpacing
) |
559 FLAG(Mark_SpacingCombining
) |
560 FLAG(Mark_Enclosing
);
561 return FLAG(qGetProp(ucs
)->category
) & test
;
565 Returns true if the character is a punctuation mark (Punctuation_*
566 categories); otherwise returns false.
568 bool QChar::isPunct() const
570 const int test
= FLAG(Punctuation_Connector
) |
571 FLAG(Punctuation_Dash
) |
572 FLAG(Punctuation_Open
) |
573 FLAG(Punctuation_Close
) |
574 FLAG(Punctuation_InitialQuote
) |
575 FLAG(Punctuation_FinalQuote
) |
576 FLAG(Punctuation_Other
);
577 return FLAG(qGetProp(ucs
)->category
) & test
;
581 Returns true if the character is a letter (Letter_* categories);
582 otherwise returns false.
584 bool QChar::isLetter() const
586 const int test
= FLAG(Letter_Uppercase
) |
587 FLAG(Letter_Lowercase
) |
588 FLAG(Letter_Titlecase
) |
589 FLAG(Letter_Modifier
) |
591 return FLAG(qGetProp(ucs
)->category
) & test
;
595 Returns true if the character is a number (Number_* categories,
596 not just 0-9); otherwise returns false.
600 bool QChar::isNumber() const
602 const int test
= FLAG(Number_DecimalDigit
) |
603 FLAG(Number_Letter
) |
605 return FLAG(qGetProp(ucs
)->category
) & test
;
609 Returns true if the character is a letter or number (Letter_* or
610 Number_* categories); otherwise returns false.
612 bool QChar::isLetterOrNumber() const
614 const int test
= FLAG(Letter_Uppercase
) |
615 FLAG(Letter_Lowercase
) |
616 FLAG(Letter_Titlecase
) |
617 FLAG(Letter_Modifier
) |
619 FLAG(Number_DecimalDigit
) |
620 FLAG(Number_Letter
) |
622 return FLAG(qGetProp(ucs
)->category
) & test
;
627 Returns true if the character is a decimal digit
628 (Number_DecimalDigit); otherwise returns false.
630 bool QChar::isDigit() const
632 return (qGetProp(ucs
)->category
== Number_DecimalDigit
);
637 Returns true if the character is a symbol (Symbol_* categories);
638 otherwise returns false.
640 bool QChar::isSymbol() const
642 const int test
= FLAG(Symbol_Math
) |
643 FLAG(Symbol_Currency
) |
644 FLAG(Symbol_Modifier
) |
646 return FLAG(qGetProp(ucs
)->category
) & test
;
650 \fn bool QChar::isHighSurrogate() const
652 Returns true if the QChar is the high part of a utf16 surrogate
653 (ie. if its code point is between 0xd800 and 0xdbff).
657 \fn bool QChar::isLowSurrogate() const
659 Returns true if the QChar is the low part of a utf16 surrogate
660 (ie. if its code point is between 0xdc00 and 0xdfff).
664 \fn static uint QChar::surrogateToUcs4(ushort high, ushort low)
666 Converts a UTF16 surrogate pair with the given \a high and \a low values
667 to its UCS-4 code point.
671 \fn static uint QChar::surrogateToUcs4(QChar high, QChar low)
673 Converts a utf16 surrogate pair (\a high, \a low) to its ucs4 code
678 \fn static ushort QChar::highSurrogate(uint ucs4)
680 Returns the high surrogate value of a ucs4 code point.
681 The returned result is undefined if \a ucs4 is smaller than 0x10000.
685 \fn static ushort QChar::lowSurrogate(uint ucs4)
687 Returns the low surrogate value of a ucs4 code point.
688 The returned result is undefined if \a ucs4 is smaller than 0x10000.
692 Returns the numeric value of the digit, or -1 if the character is
695 int QChar::digitValue() const
697 return qGetProp(ucs
)->digitValue
;
702 Returns the numeric value of the digit, specified by the UCS-2-encoded
703 character, \a ucs2, or -1 if the character is not a digit.
705 int QChar::digitValue(ushort ucs2
)
707 return qGetProp(ucs2
)->digitValue
;
712 Returns the numeric value of the digit specified by the UCS-4-encoded
713 character, \a ucs4, or -1 if the character is not a digit.
715 int QChar::digitValue(uint ucs4
)
717 if (ucs4
> LAST_UNICODE_CHAR
)
719 return qGetProp(ucs4
)->digitValue
;
723 Returns the character's category.
725 QChar::Category
QChar::category() const
727 return (QChar::Category
) qGetProp(ucs
)->category
;
733 Returns the category of the UCS-4-encoded character specified by \a ucs4.
735 QChar::Category
QChar::category(uint ucs4
)
737 if (ucs4
> LAST_UNICODE_CHAR
)
738 return QChar::NoCategory
;
739 return (QChar::Category
) qGetProp(ucs4
)->category
;
744 Returns the category of the UCS-2-encoded character specified by \a ucs2.
746 QChar::Category
QChar::category(ushort ucs2
)
748 return (QChar::Category
) qGetProp(ucs2
)->category
;
753 Returns the character's direction.
755 QChar::Direction
QChar::direction() const
757 return (QChar::Direction
) qGetProp(ucs
)->direction
;
762 Returns the direction of the UCS-4-encoded character specified by \a ucs4.
764 QChar::Direction
QChar::direction(uint ucs4
)
766 if (ucs4
> LAST_UNICODE_CHAR
)
768 return (QChar::Direction
) qGetProp(ucs4
)->direction
;
773 Returns the direction of the UCS-2-encoded character specified by \a ucs2.
775 QChar::Direction
QChar::direction(ushort ucs2
)
777 return (QChar::Direction
) qGetProp(ucs2
)->direction
;
781 Returns information about the joining properties of the character
782 (needed for certain languages such as Arabic).
784 QChar::Joining
QChar::joining() const
786 return (QChar::Joining
) qGetProp(ucs
)->joining
;
791 Returns information about the joining properties of the UCS-4-encoded
792 character specified by \a ucs4 (needed for certain languages such as
795 QChar::Joining
QChar::joining(uint ucs4
)
797 if (ucs4
> LAST_UNICODE_CHAR
)
798 return QChar::OtherJoining
;
799 return (QChar::Joining
) qGetProp(ucs4
)->joining
;
804 Returns information about the joining properties of the UCS-2-encoded
805 character specified by \a ucs2 (needed for certain languages such as
808 QChar::Joining
QChar::joining(ushort ucs2
)
810 return (QChar::Joining
) qGetProp(ucs2
)->joining
;
815 Returns true if the character should be reversed if the text
816 direction is reversed; otherwise returns false.
818 Same as (ch.mirroredChar() != ch).
822 bool QChar::hasMirrored() const
824 return qGetProp(ucs
)->mirrorDiff
!= 0;
828 \fn bool QChar::isLower() const
830 Returns true if the character is a lowercase letter, i.e.
831 category() is Letter_Lowercase.
833 \sa isUpper(), toLower(), toUpper()
837 \fn bool QChar::isUpper() const
839 Returns true if the character is an uppercase letter, i.e.
840 category() is Letter_Uppercase.
842 \sa isLower(), toUpper(), toLower()
846 \fn bool QChar::isTitleCase() const
849 Returns true if the character is a titlecase letter, i.e.
850 category() is Letter_Titlecase.
852 \sa isLower(), toUpper(), toLower(), toTitleCase()
856 Returns the mirrored character if this character is a mirrored
857 character; otherwise returns the character itself.
861 QChar
QChar::mirroredChar() const
863 return ucs
+ qGetProp(ucs
)->mirrorDiff
;
867 Returns the mirrored character if the UCS-4-encoded character specified
868 by \a ucs4 is a mirrored character; otherwise returns the character itself.
872 uint
QChar::mirroredChar(uint ucs4
)
874 if (ucs4
> LAST_UNICODE_CHAR
)
876 return ucs4
+ qGetProp(ucs4
)->mirrorDiff
;
881 Returns the mirrored character if the UCS-2-encoded character specified
882 by \a ucs2 is a mirrored character; otherwise returns the character itself.
886 ushort
QChar::mirroredChar(ushort ucs2
)
888 return ucs2
+ qGetProp(ucs2
)->mirrorDiff
;
893 Hangul_SBase
= 0xac00,
894 Hangul_LBase
= 0x1100,
895 Hangul_VBase
= 0x1161,
896 Hangul_TBase
= 0x11a7,
897 Hangul_SCount
= 11172,
901 Hangul_NCount
= 21*28
904 // buffer has to have a length of 3. It's needed for Hangul decomposition
905 static const unsigned short * QT_FASTCALL decompositionHelper
906 (uint ucs4
, int *length
, int *tag
, unsigned short *buffer
)
909 if (ucs4
> LAST_UNICODE_CHAR
)
911 if (ucs4
>= Hangul_SBase
&& ucs4
< Hangul_SBase
+ Hangul_SCount
) {
912 int SIndex
= ucs4
- Hangul_SBase
;
913 buffer
[0] = Hangul_LBase
+ SIndex
/ Hangul_NCount
; // L
914 buffer
[1] = Hangul_VBase
+ (SIndex
% Hangul_NCount
) / Hangul_TCount
; // V
915 buffer
[2] = Hangul_TBase
+ SIndex
% Hangul_TCount
; // T
916 *length
= buffer
[2] == Hangul_TBase
? 2 : 3;
917 *tag
= QChar::Canonical
;
921 const unsigned short index
= GET_DECOMPOSITION_INDEX(ucs4
);
924 const unsigned short *decomposition
= uc_decomposition_map
+index
;
925 *tag
= (*decomposition
) & 0xff;
926 *length
= (*decomposition
) >> 8;
927 return decomposition
+1;
931 Decomposes a character into its parts. Returns an empty string if
932 no decomposition exists.
934 QString
QChar::decomposition() const
936 return decomposition(ucs
);
941 Decomposes the UCS-4-encoded character specified by \a ucs4 into its
942 constituent parts. Returns an empty string if no decomposition exists.
944 QString
QChar::decomposition(uint ucs4
)
946 unsigned short buffer
[3];
949 const unsigned short *d
= decompositionHelper(ucs4
, &length
, &tag
, buffer
);
950 return QString::fromUtf16(d
, length
);
954 Returns the tag defining the composition of the character. Returns
955 QChar::Single if no decomposition exists.
957 QChar::Decomposition
QChar::decompositionTag() const
959 return decompositionTag(ucs
);
964 Returns the tag defining the composition of the UCS-4-encoded character
965 specified by \a ucs4. Returns QChar::Single if no decomposition exists.
967 QChar::Decomposition
QChar::decompositionTag(uint ucs4
)
969 if (ucs4
> LAST_UNICODE_CHAR
)
970 return QChar::NoDecomposition
;
971 const unsigned short index
= GET_DECOMPOSITION_INDEX(ucs4
);
973 return QChar::NoDecomposition
;
974 return (QChar::Decomposition
)(uc_decomposition_map
[index
] & 0xff);
978 Returns the combining class for the character as defined in the
979 Unicode standard. This is mainly useful as a positioning hint for
980 marks attached to a base character.
982 The Qt text rendering engine uses this information to correctly
983 position non-spacing marks around a base character.
985 unsigned char QChar::combiningClass() const
987 return (unsigned char) qGetProp(ucs
)->combiningClass
;
991 Returns the combining class for the UCS-4-encoded character specified by
992 \a ucs4, as defined in the Unicode standard.
994 unsigned char QChar::combiningClass(uint ucs4
)
996 if (ucs4
> LAST_UNICODE_CHAR
)
998 return (unsigned char) qGetProp(ucs4
)->combiningClass
;
1002 Returns the combining class for the UCS-2-encoded character specified by
1003 \a ucs2, as defined in the Unicode standard.
1005 unsigned char QChar::combiningClass(ushort ucs2
)
1007 return (unsigned char) qGetProp(ucs2
)->combiningClass
;
1012 Returns the Unicode version that introduced this character.
1014 QChar::UnicodeVersion
QChar::unicodeVersion() const
1016 return (QChar::UnicodeVersion
) qGetProp(ucs
)->unicodeVersion
;
1020 Returns the Unicode version that introduced the character specified in
1021 its UCS-4-encoded form as \a ucs4.
1023 QChar::UnicodeVersion
QChar::unicodeVersion(uint ucs4
)
1025 if (ucs4
> LAST_UNICODE_CHAR
)
1026 return QChar::Unicode_Unassigned
;
1027 return (QChar::UnicodeVersion
) qGetProp(ucs4
)->unicodeVersion
;
1031 Returns the Unicode version that introduced the character specified in
1032 its UCS-2-encoded form as \a ucs2.
1034 QChar::UnicodeVersion
QChar::unicodeVersion(ushort ucs2
)
1036 return (QChar::UnicodeVersion
) qGetProp(ucs2
)->unicodeVersion
;
1041 Returns the lowercase equivalent if the character is uppercase or titlecase;
1042 otherwise returns the character itself.
1044 QChar
QChar::toLower() const
1046 const QUnicodeTables::Properties
*p
= qGetProp(ucs
);
1047 if (!p
->lowerCaseSpecial
)
1048 return ucs
+ p
->lowerCaseDiff
;
1053 Returns the lowercase equivalent of the UCS-4-encoded character specified
1054 by \a ucs4 if the character is uppercase or titlecase; otherwise returns
1055 the character itself.
1057 uint
QChar::toLower(uint ucs4
)
1059 if (ucs4
> LAST_UNICODE_CHAR
)
1061 const QUnicodeTables::Properties
*p
= qGetProp(ucs4
);
1062 if (!p
->lowerCaseSpecial
)
1063 return ucs4
+ p
->lowerCaseDiff
;
1068 Returns the lowercase equivalent of the UCS-2-encoded character specified
1069 by \a ucs2 if the character is uppercase or titlecase; otherwise returns
1070 the character itself.
1072 ushort
QChar::toLower(ushort ucs2
)
1074 const QUnicodeTables::Properties
*p
= qGetProp(ucs2
);
1075 if (!p
->lowerCaseSpecial
)
1076 return ucs2
+ p
->lowerCaseDiff
;
1081 Returns the uppercase equivalent if the character is lowercase or titlecase;
1082 otherwise returns the character itself.
1084 QChar
QChar::toUpper() const
1086 const QUnicodeTables::Properties
*p
= qGetProp(ucs
);
1087 if (!p
->upperCaseSpecial
)
1088 return ucs
+ p
->upperCaseDiff
;
1093 Returns the uppercase equivalent of the UCS-4-encoded character specified
1094 by \a ucs4 if the character is lowercase or titlecase; otherwise returns
1095 the character itself.
1097 uint
QChar::toUpper(uint ucs4
)
1099 if (ucs4
> LAST_UNICODE_CHAR
)
1101 const QUnicodeTables::Properties
*p
= qGetProp(ucs4
);
1102 if (!p
->upperCaseSpecial
)
1103 return ucs4
+ p
->upperCaseDiff
;
1108 Returns the uppercase equivalent of the UCS-2-encoded character specified
1109 by \a ucs2 if the character is lowercase or titlecase; otherwise returns
1110 the character itself.
1112 ushort
QChar::toUpper(ushort ucs2
)
1114 const QUnicodeTables::Properties
*p
= qGetProp(ucs2
);
1115 if (!p
->upperCaseSpecial
)
1116 return ucs2
+ p
->upperCaseDiff
;
1121 Returns the title case equivalent if the character is lowercase or uppercase;
1122 otherwise returns the character itself.
1124 QChar
QChar::toTitleCase() const
1126 const QUnicodeTables::Properties
*p
= qGetProp(ucs
);
1127 if (!p
->titleCaseSpecial
)
1128 return ucs
+ p
->titleCaseDiff
;
1134 Returns the title case equivalent of the UCS-4-encoded character specified
1135 by \a ucs4 if the character is lowercase or uppercase; otherwise returns
1136 the character itself.
1138 uint
QChar::toTitleCase(uint ucs4
)
1140 if (ucs4
> LAST_UNICODE_CHAR
)
1142 const QUnicodeTables::Properties
*p
= qGetProp(ucs4
);
1143 if (!p
->titleCaseSpecial
)
1144 return ucs4
+ p
->titleCaseDiff
;
1150 Returns the title case equivalent of the UCS-2-encoded character specified
1151 by \a ucs2 if the character is lowercase or uppercase; otherwise returns
1152 the character itself.
1154 ushort
QChar::toTitleCase(ushort ucs2
)
1156 const QUnicodeTables::Properties
*p
= qGetProp(ucs2
);
1157 if (!p
->titleCaseSpecial
)
1158 return ucs2
+ p
->titleCaseDiff
;
1163 static inline uint
foldCase(const ushort
*ch
, const ushort
*start
)
1166 if (QChar(c
).isLowSurrogate() && ch
> start
&& QChar(*(ch
- 1)).isHighSurrogate())
1167 c
= QChar::surrogateToUcs4(*(ch
- 1), c
);
1168 return *ch
+ qGetProp(c
)->caseFoldDiff
;
1171 static inline uint
foldCase(uint ch
, uint
&last
)
1174 if (QChar(c
).isLowSurrogate() && QChar(last
).isHighSurrogate())
1175 c
= QChar::surrogateToUcs4(last
, c
);
1177 return ch
+ qGetProp(c
)->caseFoldDiff
;
1180 static inline ushort
foldCase(ushort ch
)
1182 return ch
+ qGetProp(ch
)->caseFoldDiff
;
1186 Returns the case folded equivalent of the character. For most Unicode characters this
1187 is the same as toLowerCase().
1189 QChar
QChar::toCaseFolded() const
1191 return ucs
+ qGetProp(ucs
)->caseFoldDiff
;
1196 Returns the case folded equivalent of the UCS-4-encoded character specified
1197 by \a ucs4. For most Unicode characters this is the same as toLowerCase().
1199 uint
QChar::toCaseFolded(uint ucs4
)
1201 if (ucs4
> LAST_UNICODE_CHAR
)
1203 return ucs4
+ qGetProp(ucs4
)->caseFoldDiff
;
1208 Returns the case folded equivalent of the UCS-2-encoded character specified
1209 by \a ucs2. For most Unicode characters this is the same as toLowerCase().
1211 ushort
QChar::toCaseFolded(ushort ucs2
)
1213 return ucs2
+ qGetProp(ucs2
)->caseFoldDiff
;
1218 \fn char QChar::latin1() const
1220 Use toLatin1() instead.
1224 \fn char QChar::ascii() const
1226 Use toAscii() instead.
1230 \fn char QChar::toLatin1() const
1232 Returns the Latin-1 character equivalent to the QChar, or 0. This
1233 is mainly useful for non-internationalized software.
1235 \sa toAscii(), unicode(), QTextCodec::codecForCStrings()
1239 \fn char QChar::toAscii() const
1240 Returns the character value of the QChar obtained using the current
1241 codec used to read C strings, or 0 if the character is not representable
1242 using this codec. The default codec handles Latin-1 encoded text,
1243 but this can be changed to assist developers writing source code using
1246 The main purpose of this function is to preserve ASCII characters used
1247 in C strings. This is mainly useful for developers of non-internationalized
1250 \sa toLatin1(), unicode(), QTextCodec::codecForCStrings()
1252 #ifdef Q_COMPILER_MANGLES_RETURN_TYPE
1253 const char QChar::toAscii() const
1255 char QChar::toAscii() const
1258 #ifndef QT_NO_CODEC_FOR_C_STRINGS
1259 if (QTextCodec::codecForCStrings())
1261 return QTextCodec::codecForCStrings()->fromUnicode(QString(*this)).at(0);
1263 return ucs
> 0xff ? 0 : char(ucs
);
1267 \fn QChar QChar::fromLatin1(char c)
1269 Converts the Latin-1 character \a c to its equivalent QChar. This
1270 is mainly useful for non-internationalized software.
1272 \sa fromAscii(), unicode(), QTextCodec::codecForCStrings()
1276 Converts the ASCII character \a c to its equivalent QChar. This
1277 is mainly useful for non-internationalized software.
1279 An alternative is to use QLatin1Char.
1281 \sa fromLatin1(), unicode(), QTextCodec::codecForCStrings()
1283 QChar
QChar::fromAscii(char c
)
1285 #ifndef QT_NO_CODEC_FOR_C_STRINGS
1286 if (QTextCodec::codecForCStrings())
1288 return QTextCodec::codecForCStrings()->toUnicode(&c
, 1).at(0).unicode();
1290 return QChar(ushort((uchar
)c
));
1293 #ifndef QT_NO_DATASTREAM
1297 Writes the char \a chr to the stream \a out.
1299 \sa {Format of the QDataStream operators}
1302 QDataStream
&operator<<(QDataStream
&out
, const QChar
&chr
)
1304 out
<< quint16(chr
.unicode());
1312 Reads a char from the stream \a in into char \a chr.
1314 \sa {Format of the QDataStream operators}
1317 QDataStream
&operator>>(QDataStream
&in
, QChar
&chr
)
1321 chr
.unicode() = ushort(u
);
1324 #endif // QT_NO_DATASTREAM
1327 \fn ushort & QChar::unicode()
1329 Returns a reference to the numeric Unicode value of the QChar.
1333 \fn ushort QChar::unicode() const
1338 /*****************************************************************************
1339 Documentation of QChar related functions
1340 *****************************************************************************/
1343 \fn bool operator==(QChar c1, QChar c2)
1347 Returns true if \a c1 and \a c2 are the same Unicode character;
1348 otherwise returns false.
1352 \fn int operator!=(QChar c1, QChar c2)
1356 Returns true if \a c1 and \a c2 are not the same Unicode
1357 character; otherwise returns false.
1361 \fn int operator<=(QChar c1, QChar c2)
1365 Returns true if the numeric Unicode value of \a c1 is less than
1366 or equal to that of \a c2; otherwise returns false.
1370 \fn int operator>=(QChar c1, QChar c2)
1374 Returns true if the numeric Unicode value of \a c1 is greater than
1375 or equal to that of \a c2; otherwise returns false.
1379 \fn int operator<(QChar c1, QChar c2)
1383 Returns true if the numeric Unicode value of \a c1 is less than
1384 that of \a c2; otherwise returns false.
1388 \fn int operator>(QChar c1, QChar c2)
1392 Returns true if the numeric Unicode value of \a c1 is greater than
1393 that of \a c2; otherwise returns false.
1397 \fn bool QChar::mirrored() const
1399 Use hasMirrored() instead.
1403 \fn QChar QChar::lower() const
1405 Use toLower() instead.
1409 \fn QChar QChar::upper() const
1411 Use toUpper() instead.
1415 \fn bool QChar::networkOrdered()
1417 See if QSysInfo::ByteOrder == QSysInfo::BigEndian instead.
1421 // ---------------------------------------------------------------------------
1424 static QString decomposeHelper
1425 (const QString
&str
, bool canonical
, QChar::UnicodeVersion version
)
1427 unsigned short buffer
[3];
1431 const unsigned short *utf16
= s
.utf16();
1432 const unsigned short *uc
= utf16
+ s
.length();
1433 while (uc
!= utf16
) {
1434 uint ucs4
= *(--uc
);
1435 if (QChar(ucs4
).isLowSurrogate() && uc
!= utf16
) {
1436 ushort high
= *(uc
- 1);
1437 if (QChar(high
).isHighSurrogate()) {
1439 ucs4
= QChar::surrogateToUcs4(high
, ucs4
);
1442 if (QChar::unicodeVersion(ucs4
) > version
)
1446 const unsigned short *d
= decompositionHelper(ucs4
, &length
, &tag
, buffer
);
1447 if (!d
|| (canonical
&& tag
!= QChar::Canonical
))
1450 s
.replace(uc
- utf16
, ucs4
> 0x10000 ? 2 : 1, (const QChar
*)d
, length
);
1451 // since the insert invalidates the pointers and we do decomposition recursive
1452 int pos
= uc
- utf16
;
1454 uc
= utf16
+ pos
+ length
;
1461 static ushort
ligatureHelper(ushort u1
, ushort u2
)
1464 int LIndex
= u1
- Hangul_LBase
;
1465 if (0 <= LIndex
&& LIndex
< Hangul_LCount
) {
1466 int VIndex
= u2
- Hangul_VBase
;
1467 if (0 <= VIndex
&& VIndex
< Hangul_VCount
)
1468 return Hangul_SBase
+ (LIndex
* Hangul_VCount
+ VIndex
) * Hangul_TCount
;
1472 int SIndex
= u1
- Hangul_SBase
;
1473 if (0 <= SIndex
&& SIndex
< Hangul_SCount
&& (SIndex
% Hangul_TCount
) == 0) {
1474 int TIndex
= u2
- Hangul_TBase
;
1475 if (0 <= TIndex
&& TIndex
<= Hangul_TCount
)
1479 const unsigned short index
= GET_LIGATURE_INDEX(u2
);
1480 if (index
== 0xffff)
1482 const unsigned short *ligatures
= uc_ligature_map
+index
;
1483 ushort length
= *ligatures
;
1486 for (uint i
= 0; i
< length
; ++i
)
1487 if (ligatures
[2*i
] == u1
)
1488 return ligatures
[2*i
+1];
1492 static QString
composeHelper(const QString
&str
)
1499 // the loop can partly ignore high Unicode as all ligatures are in the BMP
1501 int lastCombining
= 0;
1503 while (pos
< s
.length()) {
1504 uint uc
= s
.utf16()[pos
];
1505 if (QChar(uc
).isHighSurrogate() && pos
< s
.length()-1) {
1506 ushort low
= s
.utf16()[pos
+1];
1507 if (QChar(low
).isLowSurrogate()) {
1508 uc
= QChar::surrogateToUcs4(uc
, low
);
1512 int combining
= QChar::combiningClass(uc
);
1513 if (starter
== pos
- 1 || combining
> lastCombining
) {
1514 // allowed to form ligature with S
1515 QChar ligature
= ligatureHelper(s
.utf16()[starter
], uc
);
1516 if (ligature
.unicode()) {
1517 s
[starter
] = ligature
;
1524 lastCombining
= combining
;
1531 static QString canonicalOrderHelper
1532 (const QString
&str
, QChar::UnicodeVersion version
)
1535 const int l
= s
.length()-1;
1539 uint u1
= s
.at(pos
).unicode();
1540 if (QChar(u1
).isHighSurrogate()) {
1541 ushort low
= s
.at(pos
+1).unicode();
1542 if (QChar(low
).isLowSurrogate()) {
1544 u1
= QChar::surrogateToUcs4(u1
, low
);
1549 uint u2
= s
.at(p2
).unicode();
1550 if (QChar(u2
).isHighSurrogate() && p2
< l
-1) {
1551 ushort low
= s
.at(p2
+1).unicode();
1552 if (QChar(low
).isLowSurrogate()) {
1554 u2
= QChar::surrogateToUcs4(u2
, low
);
1558 int c2
= QChar::combiningClass(u2
);
1559 if (QChar::unicodeVersion(u2
) > version
)
1566 int c1
= QChar::combiningClass(u1
);
1567 if (QChar::unicodeVersion(u1
) > version
)
1571 QChar
*uc
= s
.data();
1573 // exchange characters
1577 uc
[p
++] = QChar::highSurrogate(u2
);
1578 uc
[p
++] = QChar::lowSurrogate(u2
);
1583 uc
[p
++] = QChar::highSurrogate(u1
);
1584 uc
[p
++] = QChar::lowSurrogate(u1
);
1588 if (pos
> 0 && s
.at(pos
).isLowSurrogate())
1599 int QT_FASTCALL
QUnicodeTables::script(unsigned int uc
)
1603 int script
= uc_scripts
[uc
>> 7];
1604 if (script
< ScriptSentinel
)
1606 script
= (((script
- ScriptSentinel
) * UnicodeBlockSize
) + UnicodeBlockCount
);
1607 script
= uc_scripts
[script
+ (uc
& 0x7f)];
1612 Q_CORE_EXPORT
QUnicodeTables::LineBreakClass QT_FASTCALL
QUnicodeTables::lineBreakClass(uint ucs4
)
1614 return (QUnicodeTables::LineBreakClass
) qGetProp(ucs4
)->line_break_class
;