libgo: Update to Go 1.3 release.
[official-gcc.git] / libgo / go / unicode / letter_test.go
blob4ee11fb364f58dab8ea41e237651992473977fe1
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package unicode_test
7 import (
8 "flag"
9 "fmt"
10 "runtime"
11 "sort"
12 "testing"
13 . "unicode"
16 var upperTest = []rune{
17 0x41,
18 0xc0,
19 0xd8,
20 0x100,
21 0x139,
22 0x14a,
23 0x178,
24 0x181,
25 0x376,
26 0x3cf,
27 0x1f2a,
28 0x2102,
29 0x2c00,
30 0x2c10,
31 0x2c20,
32 0xa650,
33 0xa722,
34 0xff3a,
35 0x10400,
36 0x1d400,
37 0x1d7ca,
40 var notupperTest = []rune{
41 0x40,
42 0x5b,
43 0x61,
44 0x185,
45 0x1b0,
46 0x377,
47 0x387,
48 0x2150,
49 0xffff,
50 0x10000,
53 var letterTest = []rune{
54 0x41,
55 0x61,
56 0xaa,
57 0xba,
58 0xc8,
59 0xdb,
60 0xf9,
61 0x2ec,
62 0x535,
63 0x620,
64 0x6e6,
65 0x93d,
66 0xa15,
67 0xb99,
68 0xdc0,
69 0xedd,
70 0x1000,
71 0x1200,
72 0x1312,
73 0x1401,
74 0x1885,
75 0x2c00,
76 0xa800,
77 0xf900,
78 0xfa30,
79 0xffda,
80 0xffdc,
81 0x10000,
82 0x10300,
83 0x10400,
84 0x20000,
85 0x2f800,
86 0x2fa1d,
89 var notletterTest = []rune{
90 0x20,
91 0x35,
92 0x375,
93 0x619,
94 0x700,
95 0xfffe,
96 0x1ffff,
97 0x10ffff,
100 // Contains all the special cased Latin-1 chars.
101 var spaceTest = []rune{
102 0x09,
103 0x0a,
104 0x0b,
105 0x0c,
106 0x0d,
107 0x20,
108 0x85,
109 0xA0,
110 0x2000,
111 0x3000,
114 type caseT struct {
115 cas int
116 in, out rune
119 var caseTest = []caseT{
120 // errors
121 {-1, '\n', 0xFFFD},
122 {UpperCase, -1, -1},
123 {UpperCase, 1 << 30, 1 << 30},
125 // ASCII (special-cased so test carefully)
126 {UpperCase, '\n', '\n'},
127 {UpperCase, 'a', 'A'},
128 {UpperCase, 'A', 'A'},
129 {UpperCase, '7', '7'},
130 {LowerCase, '\n', '\n'},
131 {LowerCase, 'a', 'a'},
132 {LowerCase, 'A', 'a'},
133 {LowerCase, '7', '7'},
134 {TitleCase, '\n', '\n'},
135 {TitleCase, 'a', 'A'},
136 {TitleCase, 'A', 'A'},
137 {TitleCase, '7', '7'},
139 // Latin-1: easy to read the tests!
140 {UpperCase, 0x80, 0x80},
141 {UpperCase, 'Å', 'Å'},
142 {UpperCase, 'å', 'Å'},
143 {LowerCase, 0x80, 0x80},
144 {LowerCase, 'Å', 'å'},
145 {LowerCase, 'å', 'å'},
146 {TitleCase, 0x80, 0x80},
147 {TitleCase, 'Å', 'Å'},
148 {TitleCase, 'å', 'Å'},
150 // 0131;LATIN SMALL LETTER DOTLESS I;Ll;0;L;;;;;N;;;0049;;0049
151 {UpperCase, 0x0131, 'I'},
152 {LowerCase, 0x0131, 0x0131},
153 {TitleCase, 0x0131, 'I'},
155 // 0133;LATIN SMALL LIGATURE IJ;Ll;0;L;<compat> 0069 006A;;;;N;LATIN SMALL LETTER I J;;0132;;0132
156 {UpperCase, 0x0133, 0x0132},
157 {LowerCase, 0x0133, 0x0133},
158 {TitleCase, 0x0133, 0x0132},
160 // 212A;KELVIN SIGN;Lu;0;L;004B;;;;N;DEGREES KELVIN;;;006B;
161 {UpperCase, 0x212A, 0x212A},
162 {LowerCase, 0x212A, 'k'},
163 {TitleCase, 0x212A, 0x212A},
165 // From an UpperLower sequence
166 // A640;CYRILLIC CAPITAL LETTER ZEMLYA;Lu;0;L;;;;;N;;;;A641;
167 {UpperCase, 0xA640, 0xA640},
168 {LowerCase, 0xA640, 0xA641},
169 {TitleCase, 0xA640, 0xA640},
170 // A641;CYRILLIC SMALL LETTER ZEMLYA;Ll;0;L;;;;;N;;;A640;;A640
171 {UpperCase, 0xA641, 0xA640},
172 {LowerCase, 0xA641, 0xA641},
173 {TitleCase, 0xA641, 0xA640},
174 // A64E;CYRILLIC CAPITAL LETTER NEUTRAL YER;Lu;0;L;;;;;N;;;;A64F;
175 {UpperCase, 0xA64E, 0xA64E},
176 {LowerCase, 0xA64E, 0xA64F},
177 {TitleCase, 0xA64E, 0xA64E},
178 // A65F;CYRILLIC SMALL LETTER YN;Ll;0;L;;;;;N;;;A65E;;A65E
179 {UpperCase, 0xA65F, 0xA65E},
180 {LowerCase, 0xA65F, 0xA65F},
181 {TitleCase, 0xA65F, 0xA65E},
183 // From another UpperLower sequence
184 // 0139;LATIN CAPITAL LETTER L WITH ACUTE;Lu;0;L;004C 0301;;;;N;LATIN CAPITAL LETTER L ACUTE;;;013A;
185 {UpperCase, 0x0139, 0x0139},
186 {LowerCase, 0x0139, 0x013A},
187 {TitleCase, 0x0139, 0x0139},
188 // 013F;LATIN CAPITAL LETTER L WITH MIDDLE DOT;Lu;0;L;<compat> 004C 00B7;;;;N;;;;0140;
189 {UpperCase, 0x013f, 0x013f},
190 {LowerCase, 0x013f, 0x0140},
191 {TitleCase, 0x013f, 0x013f},
192 // 0148;LATIN SMALL LETTER N WITH CARON;Ll;0;L;006E 030C;;;;N;LATIN SMALL LETTER N HACEK;;0147;;0147
193 {UpperCase, 0x0148, 0x0147},
194 {LowerCase, 0x0148, 0x0148},
195 {TitleCase, 0x0148, 0x0147},
197 // Last block in the 5.1.0 table
198 // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
199 {UpperCase, 0x10400, 0x10400},
200 {LowerCase, 0x10400, 0x10428},
201 {TitleCase, 0x10400, 0x10400},
202 // 10427;DESERET CAPITAL LETTER EW;Lu;0;L;;;;;N;;;;1044F;
203 {UpperCase, 0x10427, 0x10427},
204 {LowerCase, 0x10427, 0x1044F},
205 {TitleCase, 0x10427, 0x10427},
206 // 10428;DESERET SMALL LETTER LONG I;Ll;0;L;;;;;N;;;10400;;10400
207 {UpperCase, 0x10428, 0x10400},
208 {LowerCase, 0x10428, 0x10428},
209 {TitleCase, 0x10428, 0x10400},
210 // 1044F;DESERET SMALL LETTER EW;Ll;0;L;;;;;N;;;10427;;10427
211 {UpperCase, 0x1044F, 0x10427},
212 {LowerCase, 0x1044F, 0x1044F},
213 {TitleCase, 0x1044F, 0x10427},
215 // First one not in the 5.1.0 table
216 // 10450;SHAVIAN LETTER PEEP;Lo;0;L;;;;;N;;;;;
217 {UpperCase, 0x10450, 0x10450},
218 {LowerCase, 0x10450, 0x10450},
219 {TitleCase, 0x10450, 0x10450},
221 // Non-letters with case.
222 {LowerCase, 0x2161, 0x2171},
223 {UpperCase, 0x0345, 0x0399},
226 func TestIsLetter(t *testing.T) {
227 for _, r := range upperTest {
228 if !IsLetter(r) {
229 t.Errorf("IsLetter(U+%04X) = false, want true", r)
232 for _, r := range letterTest {
233 if !IsLetter(r) {
234 t.Errorf("IsLetter(U+%04X) = false, want true", r)
237 for _, r := range notletterTest {
238 if IsLetter(r) {
239 t.Errorf("IsLetter(U+%04X) = true, want false", r)
244 func TestIsUpper(t *testing.T) {
245 for _, r := range upperTest {
246 if !IsUpper(r) {
247 t.Errorf("IsUpper(U+%04X) = false, want true", r)
250 for _, r := range notupperTest {
251 if IsUpper(r) {
252 t.Errorf("IsUpper(U+%04X) = true, want false", r)
255 for _, r := range notletterTest {
256 if IsUpper(r) {
257 t.Errorf("IsUpper(U+%04X) = true, want false", r)
262 func caseString(c int) string {
263 switch c {
264 case UpperCase:
265 return "UpperCase"
266 case LowerCase:
267 return "LowerCase"
268 case TitleCase:
269 return "TitleCase"
271 return "ErrorCase"
274 func TestTo(t *testing.T) {
275 for _, c := range caseTest {
276 r := To(c.cas, c.in)
277 if c.out != r {
278 t.Errorf("To(U+%04X, %s) = U+%04X want U+%04X", c.in, caseString(c.cas), r, c.out)
283 func TestToUpperCase(t *testing.T) {
284 for _, c := range caseTest {
285 if c.cas != UpperCase {
286 continue
288 r := ToUpper(c.in)
289 if c.out != r {
290 t.Errorf("ToUpper(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
295 func TestToLowerCase(t *testing.T) {
296 for _, c := range caseTest {
297 if c.cas != LowerCase {
298 continue
300 r := ToLower(c.in)
301 if c.out != r {
302 t.Errorf("ToLower(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
307 func TestToTitleCase(t *testing.T) {
308 for _, c := range caseTest {
309 if c.cas != TitleCase {
310 continue
312 r := ToTitle(c.in)
313 if c.out != r {
314 t.Errorf("ToTitle(U+%04X) = U+%04X want U+%04X", c.in, r, c.out)
319 func TestIsSpace(t *testing.T) {
320 for _, c := range spaceTest {
321 if !IsSpace(c) {
322 t.Errorf("IsSpace(U+%04X) = false; want true", c)
325 for _, c := range letterTest {
326 if IsSpace(c) {
327 t.Errorf("IsSpace(U+%04X) = true; want false", c)
332 // Check that the optimizations for IsLetter etc. agree with the tables.
333 // We only need to check the Latin-1 range.
334 func TestLetterOptimizations(t *testing.T) {
335 for i := rune(0); i <= MaxLatin1; i++ {
336 if Is(Letter, i) != IsLetter(i) {
337 t.Errorf("IsLetter(U+%04X) disagrees with Is(Letter)", i)
339 if Is(Upper, i) != IsUpper(i) {
340 t.Errorf("IsUpper(U+%04X) disagrees with Is(Upper)", i)
342 if Is(Lower, i) != IsLower(i) {
343 t.Errorf("IsLower(U+%04X) disagrees with Is(Lower)", i)
345 if Is(Title, i) != IsTitle(i) {
346 t.Errorf("IsTitle(U+%04X) disagrees with Is(Title)", i)
348 if Is(White_Space, i) != IsSpace(i) {
349 t.Errorf("IsSpace(U+%04X) disagrees with Is(White_Space)", i)
351 if To(UpperCase, i) != ToUpper(i) {
352 t.Errorf("ToUpper(U+%04X) disagrees with To(Upper)", i)
354 if To(LowerCase, i) != ToLower(i) {
355 t.Errorf("ToLower(U+%04X) disagrees with To(Lower)", i)
357 if To(TitleCase, i) != ToTitle(i) {
358 t.Errorf("ToTitle(U+%04X) disagrees with To(Title)", i)
363 func TestTurkishCase(t *testing.T) {
364 lower := []rune("abcçdefgğhıijklmnoöprsştuüvyz")
365 upper := []rune("ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ")
366 for i, l := range lower {
367 u := upper[i]
368 if TurkishCase.ToLower(l) != l {
369 t.Errorf("lower(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToLower(l), l)
371 if TurkishCase.ToUpper(u) != u {
372 t.Errorf("upper(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToUpper(u), u)
374 if TurkishCase.ToUpper(l) != u {
375 t.Errorf("upper(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToUpper(l), u)
377 if TurkishCase.ToLower(u) != l {
378 t.Errorf("lower(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToLower(l), l)
380 if TurkishCase.ToTitle(u) != u {
381 t.Errorf("title(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToTitle(u), u)
383 if TurkishCase.ToTitle(l) != u {
384 t.Errorf("title(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToTitle(l), u)
389 var simpleFoldTests = []string{
390 // SimpleFold(x) returns the next equivalent rune > x or wraps
391 // around to smaller values.
393 // Easy cases.
394 "Aa",
395 "δΔ",
397 // ASCII special cases.
398 "KkK",
399 "Ssſ",
401 // Non-ASCII special cases.
402 "ρϱΡ",
403 "ͅΙιι",
405 // Extra special cases: has lower/upper but no case fold.
406 "İ",
407 "ı",
410 func TestSimpleFold(t *testing.T) {
411 for _, tt := range simpleFoldTests {
412 cycle := []rune(tt)
413 r := cycle[len(cycle)-1]
414 for _, out := range cycle {
415 if r := SimpleFold(r); r != out {
416 t.Errorf("SimpleFold(%#U) = %#U, want %#U", r, r, out)
418 r = out
423 // Running 'go test -calibrate' runs the calibration to find a plausible
424 // cutoff point for linear search of a range list vs. binary search.
425 // We create a fake table and then time how long it takes to do a
426 // sequence of searches within that table, for all possible inputs
427 // relative to the ranges (something before all, in each, between each, after all).
428 // This assumes that all possible runes are equally likely.
429 // In practice most runes are ASCII so this is a conservative estimate
430 // of an effective cutoff value. In practice we could probably set it higher
431 // than what this function recommends.
433 var calibrate = flag.Bool("calibrate", false, "compute crossover for linear vs. binary search")
435 func TestCalibrate(t *testing.T) {
436 if !*calibrate {
437 return
440 if runtime.GOARCH == "amd64" {
441 fmt.Printf("warning: running calibration on %s\n", runtime.GOARCH)
444 // Find the point where binary search wins by more than 10%.
445 // The 10% bias gives linear search an edge when they're close,
446 // because on predominantly ASCII inputs linear search is even
447 // better than our benchmarks measure.
448 n := sort.Search(64, func(n int) bool {
449 tab := fakeTable(n)
450 blinear := func(b *testing.B) {
451 tab := tab
452 max := n*5 + 20
453 for i := 0; i < b.N; i++ {
454 for j := 0; j <= max; j++ {
455 linear(tab, uint16(j))
459 bbinary := func(b *testing.B) {
460 tab := tab
461 max := n*5 + 20
462 for i := 0; i < b.N; i++ {
463 for j := 0; j <= max; j++ {
464 binary(tab, uint16(j))
468 bmlinear := testing.Benchmark(blinear)
469 bmbinary := testing.Benchmark(bbinary)
470 fmt.Printf("n=%d: linear=%d binary=%d\n", n, bmlinear.NsPerOp(), bmbinary.NsPerOp())
471 return bmlinear.NsPerOp()*100 > bmbinary.NsPerOp()*110
473 fmt.Printf("calibration: linear cutoff = %d\n", n)
476 func fakeTable(n int) []Range16 {
477 var r16 []Range16
478 for i := 0; i < n; i++ {
479 r16 = append(r16, Range16{uint16(i*5 + 10), uint16(i*5 + 12), 1})
481 return r16
484 func linear(ranges []Range16, r uint16) bool {
485 for i := range ranges {
486 range_ := &ranges[i]
487 if r < range_.Lo {
488 return false
490 if r <= range_.Hi {
491 return (r-range_.Lo)%range_.Stride == 0
494 return false
497 func binary(ranges []Range16, r uint16) bool {
498 // binary search over ranges
499 lo := 0
500 hi := len(ranges)
501 for lo < hi {
502 m := lo + (hi-lo)/2
503 range_ := &ranges[m]
504 if range_.Lo <= r && r <= range_.Hi {
505 return (r-range_.Lo)%range_.Stride == 0
507 if r < range_.Lo {
508 hi = m
509 } else {
510 lo = m + 1
513 return false
516 func TestLatinOffset(t *testing.T) {
517 var maps = []map[string]*RangeTable{
518 Categories,
519 FoldCategory,
520 FoldScript,
521 Properties,
522 Scripts,
524 for _, m := range maps {
525 for name, tab := range m {
526 i := 0
527 for i < len(tab.R16) && tab.R16[i].Hi <= MaxLatin1 {
530 if tab.LatinOffset != i {
531 t.Errorf("%s: LatinOffset=%d, want %d", name, tab.LatinOffset, i)