* gcc.dg/guality/guality.exp: Skip on AIX.
[official-gcc.git] / libgo / go / unicode / letter_test.go
blobe4d5572a0fecae31eb8e9c65794647a182a1aae8
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package unicode_test
7 import (
8 "flag"
9 "fmt"
10 "runtime"
11 "sort"
12 "testing"
13 . "unicode"
16 var upperTest = []rune{
17 0x41,
18 0xc0,
19 0xd8,
20 0x100,
21 0x139,
22 0x14a,
23 0x178,
24 0x181,
25 0x376,
26 0x3cf,
27 0x1f2a,
28 0x2102,
29 0x2c00,
30 0x2c10,
31 0x2c20,
32 0xa650,
33 0xa722,
34 0xff3a,
35 0x10400,
36 0x1d400,
37 0x1d7ca,
40 var notupperTest = []rune{
41 0x40,
42 0x5b,
43 0x61,
44 0x185,
45 0x1b0,
46 0x377,
47 0x387,
48 0x2150,
49 0xffff,
50 0x10000,
53 var letterTest = []rune{
54 0x41,
55 0x61,
56 0xaa,
57 0xba,
58 0xc8,
59 0xdb,
60 0xf9,
61 0x2ec,
62 0x535,
63 0x620,
64 0x6e6,
65 0x93d,
66 0xa15,
67 0xb99,
68 0xdc0,
69 0xedd,
70 0x1000,
71 0x1200,
72 0x1312,
73 0x1401,
74 0x1885,
75 0x2c00,
76 0xa800,
77 0xf900,
78 0xfa30,
79 0xffda,
80 0xffdc,
81 0x10000,
82 0x10300,
83 0x10400,
84 0x20000,
85 0x2f800,
86 0x2fa1d,
89 var notletterTest = []rune{
90 0x20,
91 0x35,
92 0x375,
93 0x619,
94 0x700,
95 0xfffe,
96 0x1ffff,
97 0x10ffff,
100 // Contains all the special cased Latin-1 chars.
101 var spaceTest = []rune{
102 0x09,
103 0x0a,
104 0x0b,
105 0x0c,
106 0x0d,
107 0x20,
108 0x85,
109 0xA0,
110 0x2000,
111 0x3000,
114 type caseT struct {
115 cas int
116 in, out rune
119 var caseTest = []caseT{
120 // errors
121 {-1, '\n', 0xFFFD},
122 {UpperCase, -1, -1},
123 {UpperCase, 1 << 30, 1 << 30},
125 // ASCII (special-cased so test carefully)
126 {UpperCase, '\n', '\n'},
127 {UpperCase, 'a', 'A'},
128 {UpperCase, 'A', 'A'},
129 {UpperCase, '7', '7'},
130 {LowerCase, '\n', '\n'},
131 {LowerCase, 'a', 'a'},
132 {LowerCase, 'A', 'a'},
133 {LowerCase, '7', '7'},
134 {TitleCase, '\n', '\n'},
135 {TitleCase, 'a', 'A'},
136 {TitleCase, 'A', 'A'},
137 {TitleCase, '7', '7'},
139 // Latin-1: easy to read the tests!
140 {UpperCase, 0x80, 0x80},
141 {UpperCase, 'Å', 'Å'},
142 {UpperCase, 'å', 'Å'},
143 {LowerCase, 0x80, 0x80},
144 {LowerCase, 'Å', 'å'},
145 {LowerCase, 'å', 'å'},
146 {TitleCase, 0x80, 0x80},
147 {TitleCase, 'Å', 'Å'},
148 {TitleCase, 'å', 'Å'},
150 // 0131;LATIN SMALL LETTER DOTLESS I;Ll;0;L;;;;;N;;;0049;;0049
151 {UpperCase, 0x0131, 'I'},
152 {LowerCase, 0x0131, 0x0131},
153 {TitleCase, 0x0131, 'I'},
155 // 0133;LATIN SMALL LIGATURE IJ;Ll;0;L;<compat> 0069 006A;;;;N;LATIN SMALL LETTER I J;;0132;;0132
156 {UpperCase, 0x0133, 0x0132},
157 {LowerCase, 0x0133, 0x0133},
158 {TitleCase, 0x0133, 0x0132},
160 // 212A;KELVIN SIGN;Lu;0;L;004B;;;;N;DEGREES KELVIN;;;006B;
161 {UpperCase, 0x212A, 0x212A},
162 {LowerCase, 0x212A, 'k'},
163 {TitleCase, 0x212A, 0x212A},
165 // From an UpperLower sequence
166 // A640;CYRILLIC CAPITAL LETTER ZEMLYA;Lu;0;L;;;;;N;;;;A641;
167 {UpperCase, 0xA640, 0xA640},
168 {LowerCase, 0xA640, 0xA641},
169 {TitleCase, 0xA640, 0xA640},
170 // A641;CYRILLIC SMALL LETTER ZEMLYA;Ll;0;L;;;;;N;;;A640;;A640
171 {UpperCase, 0xA641, 0xA640},
172 {LowerCase, 0xA641, 0xA641},
173 {TitleCase, 0xA641, 0xA640},
174 // A64E;CYRILLIC CAPITAL LETTER NEUTRAL YER;Lu;0;L;;;;;N;;;;A64F;
175 {UpperCase, 0xA64E, 0xA64E},
176 {LowerCase, 0xA64E, 0xA64F},
177 {TitleCase, 0xA64E, 0xA64E},
178 // A65F;CYRILLIC SMALL LETTER YN;Ll;0;L;;;;;N;;;A65E;;A65E
179 {UpperCase, 0xA65F, 0xA65E},
180 {LowerCase, 0xA65F, 0xA65F},
181 {TitleCase, 0xA65F, 0xA65E},
183 // From another UpperLower sequence
184 // 0139;LATIN CAPITAL LETTER L WITH ACUTE;Lu;0;L;004C 0301;;;;N;LATIN CAPITAL LETTER L ACUTE;;;013A;
185 {UpperCase, 0x0139, 0x0139},
186 {LowerCase, 0x0139, 0x013A},
187 {TitleCase, 0x0139, 0x0139},
188 // 013F;LATIN CAPITAL LETTER L WITH MIDDLE DOT;Lu;0;L;<compat> 004C 00B7;;;;N;;;;0140;
189 {UpperCase, 0x013f, 0x013f},
190 {LowerCase, 0x013f, 0x0140},
191 {TitleCase, 0x013f, 0x013f},
192 // 0148;LATIN SMALL LETTER N WITH CARON;Ll;0;L;006E 030C;;;;N;LATIN SMALL LETTER N HACEK;;0147;;0147
193 {UpperCase, 0x0148, 0x0147},
194 {LowerCase, 0x0148, 0x0148},
195 {TitleCase, 0x0148, 0x0147},
197 // Last block in the 5.1.0 table
198 // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
199 {UpperCase, 0x10400, 0x10400},
200 {LowerCase, 0x10400, 0x10428},
201 {TitleCase, 0x10400, 0x10400},
202 // 10427;DESERET CAPITAL LETTER EW;Lu;0;L;;;;;N;;;;1044F;
203 {UpperCase, 0x10427, 0x10427},
204 {LowerCase, 0x10427, 0x1044F},
205 {TitleCase, 0x10427, 0x10427},
206 // 10428;DESERET SMALL LETTER LONG I;Ll;0;L;;;;;N;;;10400;;10400
207 {UpperCase, 0x10428, 0x10400},
208 {LowerCase, 0x10428, 0x10428},
209 {TitleCase, 0x10428, 0x10400},
210 // 1044F;DESERET SMALL LETTER EW;Ll;0;L;;;;;N;;;10427;;10427
211 {UpperCase, 0x1044F, 0x10427},
212 {LowerCase, 0x1044F, 0x1044F},
213 {TitleCase, 0x1044F, 0x10427},
215 // First one not in the 5.1.0 table
216 // 10450;SHAVIAN LETTER PEEP;Lo;0;L;;;;;N;;;;;
217 {UpperCase, 0x10450, 0x10450},
218 {LowerCase, 0x10450, 0x10450},
219 {TitleCase, 0x10450, 0x10450},
221 // Non-letters with case.
222 {LowerCase, 0x2161, 0x2171},
223 {UpperCase, 0x0345, 0x0399},
226 func TestIsLetter(t *testing.T) {
227 for _, r := range upperTest {
228 if !IsLetter(r) {
229 t.Errorf("IsLetter(U+%04X) = false, want true", r)
232 for _, r := range letterTest {
233 if !IsLetter(r) {
234 t.Errorf("IsLetter(U+%04X) = false, want true", r)
237 for _, r := range notletterTest {
238 if IsLetter(r) {
239 t.Errorf("IsLetter(U+%04X) = true, want false", r)
244 func TestIsUpper(t *testing.T) {
245 for _, r := range upperTest {
246 if !IsUpper(r) {
247 t.Errorf("IsUpper(U+%04X) = false, want true", r)
250 for _, r := range notupperTest {
251 if IsUpper(r) {
252 t.Errorf("IsUpper(U+%04X) = true, want false", r)
255 for _, r := range notletterTest {
256 if IsUpper(r) {
257 t.Errorf("IsUpper(U+%04X) = true, want false", r)
262 func caseString(c int) string {
263 switch c {
264 case UpperCase:
265 return "UpperCase"
266 case LowerCase:
267 return "LowerCase"
268 case TitleCase:
269 return "TitleCase"
271 return "ErrorCase"
274 func TestTo(t *testing.T) {
275 for _, c := range caseTest {
276 r := To(c.cas, c.in)
277 if c.out != r {
278 t.Errorf("To(U+%04X, %s) = U+%04X want U+%04X", c.in, caseString(c.cas), r, c.out)
283 func TestToUpperCase(t *testing.T) {
284 for _, c := range caseTest {
285 if c.cas != UpperCase {
286 continue
288 r := ToUpper(c.in)
289 if c.out != r {
290 t.Errorf("ToUpper(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
295 func TestToLowerCase(t *testing.T) {
296 for _, c := range caseTest {
297 if c.cas != LowerCase {
298 continue
300 r := ToLower(c.in)
301 if c.out != r {
302 t.Errorf("ToLower(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
307 func TestToTitleCase(t *testing.T) {
308 for _, c := range caseTest {
309 if c.cas != TitleCase {
310 continue
312 r := ToTitle(c.in)
313 if c.out != r {
314 t.Errorf("ToTitle(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
319 func TestIsSpace(t *testing.T) {
320 for _, c := range spaceTest {
321 if !IsSpace(c) {
322 t.Errorf("IsSpace(U+%04X) = false; want true", c)
325 for _, c := range letterTest {
326 if IsSpace(c) {
327 t.Errorf("IsSpace(U+%04X) = true; want false", c)
332 // Check that the optimizations for IsLetter etc. agree with the tables.
333 // We only need to check the Latin-1 range.
334 func TestLetterOptimizations(t *testing.T) {
335 for i := rune(0); i <= MaxLatin1; i++ {
336 if Is(Letter, i) != IsLetter(i) {
337 t.Errorf("IsLetter(U+%04X) disagrees with Is(Letter)", i)
339 if Is(Upper, i) != IsUpper(i) {
340 t.Errorf("IsUpper(U+%04X) disagrees with Is(Upper)", i)
342 if Is(Lower, i) != IsLower(i) {
343 t.Errorf("IsLower(U+%04X) disagrees with Is(Lower)", i)
345 if Is(Title, i) != IsTitle(i) {
346 t.Errorf("IsTitle(U+%04X) disagrees with Is(Title)", i)
348 if Is(White_Space, i) != IsSpace(i) {
349 t.Errorf("IsSpace(U+%04X) disagrees with Is(White_Space)", i)
351 if To(UpperCase, i) != ToUpper(i) {
352 t.Errorf("ToUpper(U+%04X) disagrees with To(Upper)", i)
354 if To(LowerCase, i) != ToLower(i) {
355 t.Errorf("ToLower(U+%04X) disagrees with To(Lower)", i)
357 if To(TitleCase, i) != ToTitle(i) {
358 t.Errorf("ToTitle(U+%04X) disagrees with To(Title)", i)
363 func TestTurkishCase(t *testing.T) {
364 lower := []rune("abcçdefgğhıijklmnoöprsştuüvyz")
365 upper := []rune("ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ")
366 for i, l := range lower {
367 u := upper[i]
368 if TurkishCase.ToLower(l) != l {
369 t.Errorf("lower(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToLower(l), l)
371 if TurkishCase.ToUpper(u) != u {
372 t.Errorf("upper(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToUpper(u), u)
374 if TurkishCase.ToUpper(l) != u {
375 t.Errorf("upper(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToUpper(l), u)
377 if TurkishCase.ToLower(u) != l {
378 t.Errorf("lower(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToLower(l), l)
380 if TurkishCase.ToTitle(u) != u {
381 t.Errorf("title(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToTitle(u), u)
383 if TurkishCase.ToTitle(l) != u {
384 t.Errorf("title(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToTitle(l), u)
389 var simpleFoldTests = []string{
390 // SimpleFold could order its returned slices in any order it wants,
391 // but we know it orders them in increasing order starting at in
392 // and looping around from MaxRune to 0.
394 // Easy cases.
395 "Aa",
396 "aA",
397 "δΔ",
398 "Δδ",
400 // ASCII special cases.
401 "KkK",
402 "kKK",
403 "KKk",
404 "Ssſ",
405 "sſS",
406 "ſSs",
408 // Non-ASCII special cases.
409 "ρϱΡ",
410 "ϱΡρ",
411 "Ρρϱ",
412 "ͅΙιι",
413 "Ιιιͅ",
414 "ιιͅΙ",
415 "ιͅΙι",
417 // Extra special cases: has lower/upper but no case fold.
418 "İ",
419 "ı",
422 func TestSimpleFold(t *testing.T) {
423 for _, tt := range simpleFoldTests {
424 cycle := []rune(tt)
425 r := cycle[len(cycle)-1]
426 for _, out := range cycle {
427 if r := SimpleFold(r); r != out {
428 t.Errorf("SimpleFold(%#U) = %#U, want %#U", r, r, out)
430 r = out
435 // Running 'go test -calibrate' runs the calibration to find a plausible
436 // cutoff point for linear search of a range list vs. binary search.
437 // We create a fake table and then time how long it takes to do a
438 // sequence of searches within that table, for all possible inputs
439 // relative to the ranges (something before all, in each, between each, after all).
440 // This assumes that all possible runes are equally likely.
441 // In practice most runes are ASCII so this is a conservative estimate
442 // of an effective cutoff value. In practice we could probably set it higher
443 // than what this function recommends.
445 var calibrate = flag.Bool("calibrate", false, "compute crossover for linear vs. binary search")
447 func TestCalibrate(t *testing.T) {
448 if !*calibrate {
449 return
452 if runtime.GOARCH == "amd64" {
453 fmt.Printf("warning: running calibration on %s\n", runtime.GOARCH)
456 // Find the point where binary search wins by more than 10%.
457 // The 10% bias gives linear search an edge when they're close,
458 // because on predominantly ASCII inputs linear search is even
459 // better than our benchmarks measure.
460 n := sort.Search(64, func(n int) bool {
461 tab := fakeTable(n)
462 blinear := func(b *testing.B) {
463 tab := tab
464 max := n*5 + 20
465 for i := 0; i < b.N; i++ {
466 for j := 0; j <= max; j++ {
467 linear(tab, uint16(j))
471 bbinary := func(b *testing.B) {
472 tab := tab
473 max := n*5 + 20
474 for i := 0; i < b.N; i++ {
475 for j := 0; j <= max; j++ {
476 binary(tab, uint16(j))
480 bmlinear := testing.Benchmark(blinear)
481 bmbinary := testing.Benchmark(bbinary)
482 fmt.Printf("n=%d: linear=%d binary=%d\n", n, bmlinear.NsPerOp(), bmbinary.NsPerOp())
483 return bmlinear.NsPerOp()*100 > bmbinary.NsPerOp()*110
485 fmt.Printf("calibration: linear cutoff = %d\n", n)
488 func fakeTable(n int) []Range16 {
489 var r16 []Range16
490 for i := 0; i < n; i++ {
491 r16 = append(r16, Range16{uint16(i*5 + 10), uint16(i*5 + 12), 1})
493 return r16
496 func linear(ranges []Range16, r uint16) bool {
497 for i := range ranges {
498 range_ := &ranges[i]
499 if r < range_.Lo {
500 return false
502 if r <= range_.Hi {
503 return (r-range_.Lo)%range_.Stride == 0
506 return false
509 func binary(ranges []Range16, r uint16) bool {
510 // binary search over ranges
511 lo := 0
512 hi := len(ranges)
513 for lo < hi {
514 m := lo + (hi-lo)/2
515 range_ := &ranges[m]
516 if range_.Lo <= r && r <= range_.Hi {
517 return (r-range_.Lo)%range_.Stride == 0
519 if r < range_.Lo {
520 hi = m
521 } else {
522 lo = m + 1
525 return false
528 func TestLatinOffset(t *testing.T) {
529 var maps = []map[string]*RangeTable{
530 Categories,
531 FoldCategory,
532 FoldScript,
533 Properties,
534 Scripts,
536 for _, m := range maps {
537 for name, tab := range m {
538 i := 0
539 for i < len(tab.R16) && tab.R16[i].Hi <= MaxLatin1 {
542 if tab.LatinOffset != i {
543 t.Errorf("%s: LatinOffset=%d, want %d", name, tab.LatinOffset, i)