1 /* Test of titlecase mapping for UTF-16 strings.
2 Copyright (C) 2009-2017 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
30 check (const uint16_t *input
, size_t input_length
,
31 const char *iso639_language
, uninorm_t nf
,
32 const uint16_t *expected
, size_t expected_length
)
37 /* Test return conventions with resultbuf == NULL. */
38 result
= u16_totitle (input
, input_length
, iso639_language
, nf
, NULL
, &length
);
39 if (!(result
!= NULL
))
41 if (!(length
== expected_length
))
43 if (!(u16_cmp (result
, expected
, expected_length
) == 0))
47 /* Test return conventions with resultbuf too small. */
48 if (expected_length
> 0)
50 uint16_t *preallocated
;
52 length
= expected_length
- 1;
53 preallocated
= (uint16_t *) malloc (length
* sizeof (uint16_t));
54 result
= u16_totitle (input
, input_length
, iso639_language
, nf
, preallocated
, &length
);
55 if (!(result
!= NULL
))
57 if (!(result
!= preallocated
))
59 if (!(length
== expected_length
))
61 if (!(u16_cmp (result
, expected
, expected_length
) == 0))
67 /* Test return conventions with resultbuf large enough. */
69 uint16_t *preallocated
;
71 length
= expected_length
;
72 preallocated
= (uint16_t *) malloc (length
* sizeof (uint16_t));
73 result
= u16_totitle (input
, input_length
, iso639_language
, nf
, preallocated
, &length
);
74 if (!(result
!= NULL
))
76 if (!(preallocated
== NULL
|| result
== preallocated
))
78 if (!(length
== expected_length
))
80 if (!(u16_cmp (result
, expected
, expected_length
) == 0))
92 ASSERT (check (NULL
, 0, NULL
, NULL
, NULL
, 0) == 0);
93 ASSERT (check (NULL
, 0, NULL
, UNINORM_NFC
, NULL
, 0) == 0);
97 { /* "GRÜß GOTT. ЗДРАВСТВУЙТЕ! X=(-B±SQRT(B²-4AC))/(2A) 日本語,中文,한글" */
98 static const uint16_t input
[] =
99 { 'G', 'R', 0x00DC, 0x00DF, ' ', 'G', 'O', 'T', 'T', '.', ' ',
100 0x0417, 0x0414, 0x0420, 0x0410, 0x0412, 0x0421, 0x0422, 0x0412, 0x0423,
101 0x0419, 0x0422, 0x0415, '!', ' ',
102 'X', '=', '(', '-', 'B', 0x00B1, 'S', 'Q', 'R', 'T', '(', 'B', 0x00B2,
103 '-', '4', 'A', 'C', ')', ')', '/', '(', '2', 'A', ')', ' ', ' ',
104 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
106 static const uint16_t casemapped
[] =
107 { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
108 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
109 0x0439, 0x0442, 0x0435, '!', ' ',
110 'X', '=', '(', '-', 'B', 0x00B1, 'S', 'q', 'r', 't', '(', 'B', 0x00B2,
111 '-', '4', 'A', 'c', ')', ')', '/', '(', '2', 'A', ')', ' ', ' ',
112 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
114 ASSERT (check (input
, SIZEOF (input
), NULL
, NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
117 /* Case mapping can increase the number of Unicode characters. */
118 { /* LATIN SMALL LETTER N PRECEDED BY APOSTROPHE */
119 static const uint16_t input
[] = { 0x0149 };
120 static const uint16_t casemapped
[] = { 0x02BC, 0x004E };
121 ASSERT (check (input
, SIZEOF (input
), NULL
, NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
123 { /* GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS */
124 static const uint16_t input
[] = { 0x0390 };
125 static const uint16_t casemapped
[] = { 0x0399, 0x0308, 0x0301 };
126 ASSERT (check (input
, SIZEOF (input
), NULL
, NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
129 /* Turkish letters i İ ı I */
130 { /* LATIN CAPITAL LETTER I */
131 static const uint16_t input
[] = { 0x0049 };
132 static const uint16_t casemapped
[] = { 0x0049 };
133 ASSERT (check (input
, SIZEOF (input
), NULL
, NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
134 ASSERT (check (input
, SIZEOF (input
), "tr", NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
136 { /* LATIN SMALL LETTER I */
137 static const uint16_t input
[] = { 0x0069 };
138 static const uint16_t casemapped
[] = { 0x0049 };
139 static const uint16_t casemapped_tr
[] = { 0x0130 };
140 ASSERT (check (input
, SIZEOF (input
), NULL
, NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
141 ASSERT (check (input
, SIZEOF (input
), "tr", NULL
, casemapped_tr
, SIZEOF (casemapped_tr
)) == 0);
143 { /* LATIN CAPITAL LETTER I WITH DOT ABOVE */
144 static const uint16_t input
[] = { 0x0130 };
145 static const uint16_t casemapped
[] = { 0x0130 };
146 ASSERT (check (input
, SIZEOF (input
), NULL
, NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
147 ASSERT (check (input
, SIZEOF (input
), "tr", NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
149 { /* LATIN SMALL LETTER DOTLESS I */
150 static const uint16_t input
[] = { 0x0131 };
151 static const uint16_t casemapped
[] = { 0x0049 };
152 ASSERT (check (input
, SIZEOF (input
), NULL
, NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
153 ASSERT (check (input
, SIZEOF (input
), "tr", NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
156 static const uint16_t input
[] =
157 { 0x0074, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 };
158 static const uint16_t casemapped
[] =
159 { 0x0054, 0x006F, 0x0070, 0x006B, 0x0061, 0x0070, 0x0131 };
160 ASSERT (check (input
, SIZEOF (input
), NULL
, NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
161 ASSERT (check (input
, SIZEOF (input
), "tr", NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
164 /* Uppercasing can increase the number of Unicode characters. */
166 static const uint16_t input
[] = { 0x0068, 0x0065, 0x0069, 0x00DF };
167 static const uint16_t casemapped
[] = { 0x0048, 0x0065, 0x0069, 0x00DF };
168 ASSERT (check (input
, SIZEOF (input
), NULL
, NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
171 /* Case mappings for some characters can depend on the surrounding characters. */
172 { /* "περισσότερες πληροφορίες" */
173 static const uint16_t input
[] =
175 0x03C0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4,
176 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03C0, 0x03BB, 0x03B7,
177 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2
179 static const uint16_t casemapped
[] =
181 0x03A0, 0x03B5, 0x03C1, 0x03B9, 0x03C3, 0x03C3, 0x03CC, 0x03C4,
182 0x03B5, 0x03C1, 0x03B5, 0x03C2, 0x0020, 0x03A0, 0x03BB, 0x03B7,
183 0x03C1, 0x03BF, 0x03C6, 0x03BF, 0x03C1, 0x03AF, 0x03B5, 0x03C2
185 ASSERT (check (input
, SIZEOF (input
), NULL
, NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
188 /* Case mapping can require subsequent normalization. */
189 { /* LATIN SMALL LETTER J WITH CARON, COMBINING DOT BELOW */
190 static const uint16_t input
[] = { 0x01F0, 0x0323 };
191 static const uint16_t casemapped
[] = { 0x004A, 0x030C, 0x0323 };
192 static const uint16_t casemapped_normalized
[] = { 0x004A, 0x0323, 0x030C };
193 ASSERT (check (input
, SIZEOF (input
), NULL
, NULL
, casemapped
, SIZEOF (casemapped
)) == 0);
194 ASSERT (check (input
, SIZEOF (input
), NULL
, UNINORM_NFC
, casemapped_normalized
, SIZEOF (casemapped_normalized
)) == 0);