1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
16 var upperTest
= []rune
{
40 var notupperTest
= []rune
{
53 var letterTest
= []rune
{
89 var notletterTest
= []rune
{
100 // Contains all the special cased Latin-1 chars.
101 var spaceTest
= []rune
{
119 var caseTest
= []caseT
{
123 {UpperCase
, 1 << 30, 1 << 30},
125 // ASCII (special-cased so test carefully)
126 {UpperCase
, '\n', '\n'},
127 {UpperCase
, 'a', 'A'},
128 {UpperCase
, 'A', 'A'},
129 {UpperCase
, '7', '7'},
130 {LowerCase
, '\n', '\n'},
131 {LowerCase
, 'a', 'a'},
132 {LowerCase
, 'A', 'a'},
133 {LowerCase
, '7', '7'},
134 {TitleCase
, '\n', '\n'},
135 {TitleCase
, 'a', 'A'},
136 {TitleCase
, 'A', 'A'},
137 {TitleCase
, '7', '7'},
139 // Latin-1: easy to read the tests!
140 {UpperCase
, 0x80, 0x80},
141 {UpperCase
, 'Å', 'Å'},
142 {UpperCase
, 'å', 'Å'},
143 {LowerCase
, 0x80, 0x80},
144 {LowerCase
, 'Å', 'å'},
145 {LowerCase
, 'å', 'å'},
146 {TitleCase
, 0x80, 0x80},
147 {TitleCase
, 'Å', 'Å'},
148 {TitleCase
, 'å', 'Å'},
150 // 0131;LATIN SMALL LETTER DOTLESS I;Ll;0;L;;;;;N;;;0049;;0049
151 {UpperCase
, 0x0131, 'I'},
152 {LowerCase
, 0x0131, 0x0131},
153 {TitleCase
, 0x0131, 'I'},
155 // 0133;LATIN SMALL LIGATURE IJ;Ll;0;L;<compat> 0069 006A;;;;N;LATIN SMALL LETTER I J;;0132;;0132
156 {UpperCase
, 0x0133, 0x0132},
157 {LowerCase
, 0x0133, 0x0133},
158 {TitleCase
, 0x0133, 0x0132},
160 // 212A;KELVIN SIGN;Lu;0;L;004B;;;;N;DEGREES KELVIN;;;006B;
161 {UpperCase
, 0x212A, 0x212A},
162 {LowerCase
, 0x212A, 'k'},
163 {TitleCase
, 0x212A, 0x212A},
165 // From an UpperLower sequence
166 // A640;CYRILLIC CAPITAL LETTER ZEMLYA;Lu;0;L;;;;;N;;;;A641;
167 {UpperCase
, 0xA640, 0xA640},
168 {LowerCase
, 0xA640, 0xA641},
169 {TitleCase
, 0xA640, 0xA640},
170 // A641;CYRILLIC SMALL LETTER ZEMLYA;Ll;0;L;;;;;N;;;A640;;A640
171 {UpperCase
, 0xA641, 0xA640},
172 {LowerCase
, 0xA641, 0xA641},
173 {TitleCase
, 0xA641, 0xA640},
174 // A64E;CYRILLIC CAPITAL LETTER NEUTRAL YER;Lu;0;L;;;;;N;;;;A64F;
175 {UpperCase
, 0xA64E, 0xA64E},
176 {LowerCase
, 0xA64E, 0xA64F},
177 {TitleCase
, 0xA64E, 0xA64E},
178 // A65F;CYRILLIC SMALL LETTER YN;Ll;0;L;;;;;N;;;A65E;;A65E
179 {UpperCase
, 0xA65F, 0xA65E},
180 {LowerCase
, 0xA65F, 0xA65F},
181 {TitleCase
, 0xA65F, 0xA65E},
183 // From another UpperLower sequence
184 // 0139;LATIN CAPITAL LETTER L WITH ACUTE;Lu;0;L;004C 0301;;;;N;LATIN CAPITAL LETTER L ACUTE;;;013A;
185 {UpperCase
, 0x0139, 0x0139},
186 {LowerCase
, 0x0139, 0x013A},
187 {TitleCase
, 0x0139, 0x0139},
188 // 013F;LATIN CAPITAL LETTER L WITH MIDDLE DOT;Lu;0;L;<compat> 004C 00B7;;;;N;;;;0140;
189 {UpperCase
, 0x013f, 0x013f},
190 {LowerCase
, 0x013f, 0x0140},
191 {TitleCase
, 0x013f, 0x013f},
192 // 0148;LATIN SMALL LETTER N WITH CARON;Ll;0;L;006E 030C;;;;N;LATIN SMALL LETTER N HACEK;;0147;;0147
193 {UpperCase
, 0x0148, 0x0147},
194 {LowerCase
, 0x0148, 0x0148},
195 {TitleCase
, 0x0148, 0x0147},
197 // Last block in the 5.1.0 table
198 // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
199 {UpperCase
, 0x10400, 0x10400},
200 {LowerCase
, 0x10400, 0x10428},
201 {TitleCase
, 0x10400, 0x10400},
202 // 10427;DESERET CAPITAL LETTER EW;Lu;0;L;;;;;N;;;;1044F;
203 {UpperCase
, 0x10427, 0x10427},
204 {LowerCase
, 0x10427, 0x1044F},
205 {TitleCase
, 0x10427, 0x10427},
206 // 10428;DESERET SMALL LETTER LONG I;Ll;0;L;;;;;N;;;10400;;10400
207 {UpperCase
, 0x10428, 0x10400},
208 {LowerCase
, 0x10428, 0x10428},
209 {TitleCase
, 0x10428, 0x10400},
210 // 1044F;DESERET SMALL LETTER EW;Ll;0;L;;;;;N;;;10427;;10427
211 {UpperCase
, 0x1044F, 0x10427},
212 {LowerCase
, 0x1044F, 0x1044F},
213 {TitleCase
, 0x1044F, 0x10427},
215 // First one not in the 5.1.0 table
216 // 10450;SHAVIAN LETTER PEEP;Lo;0;L;;;;;N;;;;;
217 {UpperCase
, 0x10450, 0x10450},
218 {LowerCase
, 0x10450, 0x10450},
219 {TitleCase
, 0x10450, 0x10450},
221 // Non-letters with case.
222 {LowerCase
, 0x2161, 0x2171},
223 {UpperCase
, 0x0345, 0x0399},
226 func TestIsLetter(t
*testing
.T
) {
227 for _
, r
:= range upperTest
{
229 t
.Errorf("IsLetter(U+%04X) = false, want true", r
)
232 for _
, r
:= range letterTest
{
234 t
.Errorf("IsLetter(U+%04X) = false, want true", r
)
237 for _
, r
:= range notletterTest
{
239 t
.Errorf("IsLetter(U+%04X) = true, want false", r
)
244 func TestIsUpper(t
*testing
.T
) {
245 for _
, r
:= range upperTest
{
247 t
.Errorf("IsUpper(U+%04X) = false, want true", r
)
250 for _
, r
:= range notupperTest
{
252 t
.Errorf("IsUpper(U+%04X) = true, want false", r
)
255 for _
, r
:= range notletterTest
{
257 t
.Errorf("IsUpper(U+%04X) = true, want false", r
)
262 func caseString(c
int) string {
274 func TestTo(t
*testing
.T
) {
275 for _
, c
:= range caseTest
{
278 t
.Errorf("To(U+%04X, %s) = U+%04X want U+%04X", c
.in
, caseString(c
.cas
), r
, c
.out
)
283 func TestToUpperCase(t
*testing
.T
) {
284 for _
, c
:= range caseTest
{
285 if c
.cas
!= UpperCase
{
290 t
.Errorf("ToUpper(U+%04X) = U+%04X want U+%04X", c
.in
, r
, c
.out
)
295 func TestToLowerCase(t
*testing
.T
) {
296 for _
, c
:= range caseTest
{
297 if c
.cas
!= LowerCase
{
302 t
.Errorf("ToLower(U+%04X) = U+%04X want U+%04X", c
.in
, r
, c
.out
)
307 func TestToTitleCase(t
*testing
.T
) {
308 for _
, c
:= range caseTest
{
309 if c
.cas
!= TitleCase
{
314 t
.Errorf("ToTitle(U+%04X) = U+%04X want U+%04X", c
.in
, r
, c
.out
)
319 func TestIsSpace(t
*testing
.T
) {
320 for _
, c
:= range spaceTest
{
322 t
.Errorf("IsSpace(U+%04X) = false; want true", c
)
325 for _
, c
:= range letterTest
{
327 t
.Errorf("IsSpace(U+%04X) = true; want false", c
)
332 // Check that the optimizations for IsLetter etc. agree with the tables.
333 // We only need to check the Latin-1 range.
334 func TestLetterOptimizations(t
*testing
.T
) {
335 for i
:= rune(0); i
<= MaxLatin1
; i
++ {
336 if Is(Letter
, i
) != IsLetter(i
) {
337 t
.Errorf("IsLetter(U+%04X) disagrees with Is(Letter)", i
)
339 if Is(Upper
, i
) != IsUpper(i
) {
340 t
.Errorf("IsUpper(U+%04X) disagrees with Is(Upper)", i
)
342 if Is(Lower
, i
) != IsLower(i
) {
343 t
.Errorf("IsLower(U+%04X) disagrees with Is(Lower)", i
)
345 if Is(Title
, i
) != IsTitle(i
) {
346 t
.Errorf("IsTitle(U+%04X) disagrees with Is(Title)", i
)
348 if Is(White_Space
, i
) != IsSpace(i
) {
349 t
.Errorf("IsSpace(U+%04X) disagrees with Is(White_Space)", i
)
351 if To(UpperCase
, i
) != ToUpper(i
) {
352 t
.Errorf("ToUpper(U+%04X) disagrees with To(Upper)", i
)
354 if To(LowerCase
, i
) != ToLower(i
) {
355 t
.Errorf("ToLower(U+%04X) disagrees with To(Lower)", i
)
357 if To(TitleCase
, i
) != ToTitle(i
) {
358 t
.Errorf("ToTitle(U+%04X) disagrees with To(Title)", i
)
363 func TestTurkishCase(t
*testing
.T
) {
364 lower
:= []rune("abcçdefgğhıijklmnoöprsştuüvyz")
365 upper
:= []rune("ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ")
366 for i
, l
:= range lower
{
368 if TurkishCase
.ToLower(l
) != l
{
369 t
.Errorf("lower(U+%04X) is U+%04X not U+%04X", l
, TurkishCase
.ToLower(l
), l
)
371 if TurkishCase
.ToUpper(u
) != u
{
372 t
.Errorf("upper(U+%04X) is U+%04X not U+%04X", u
, TurkishCase
.ToUpper(u
), u
)
374 if TurkishCase
.ToUpper(l
) != u
{
375 t
.Errorf("upper(U+%04X) is U+%04X not U+%04X", l
, TurkishCase
.ToUpper(l
), u
)
377 if TurkishCase
.ToLower(u
) != l
{
378 t
.Errorf("lower(U+%04X) is U+%04X not U+%04X", u
, TurkishCase
.ToLower(l
), l
)
380 if TurkishCase
.ToTitle(u
) != u
{
381 t
.Errorf("title(U+%04X) is U+%04X not U+%04X", u
, TurkishCase
.ToTitle(u
), u
)
383 if TurkishCase
.ToTitle(l
) != u
{
384 t
.Errorf("title(U+%04X) is U+%04X not U+%04X", l
, TurkishCase
.ToTitle(l
), u
)
389 var simpleFoldTests
= []string{
390 // SimpleFold(x) returns the next equivalent rune > x or wraps
391 // around to smaller values.
397 // ASCII special cases.
401 // Non-ASCII special cases.
405 // Extra special cases: has lower/upper but no case fold.
410 func TestSimpleFold(t
*testing
.T
) {
411 for _
, tt
:= range simpleFoldTests
{
413 r
:= cycle
[len(cycle
)-1]
414 for _
, out
:= range cycle
{
415 if r
:= SimpleFold(r
); r
!= out
{
416 t
.Errorf("SimpleFold(%#U) = %#U, want %#U", r
, r
, out
)
423 // Running 'go test -calibrate' runs the calibration to find a plausible
424 // cutoff point for linear search of a range list vs. binary search.
425 // We create a fake table and then time how long it takes to do a
426 // sequence of searches within that table, for all possible inputs
427 // relative to the ranges (something before all, in each, between each, after all).
428 // This assumes that all possible runes are equally likely.
429 // In practice most runes are ASCII so this is a conservative estimate
430 // of an effective cutoff value. In practice we could probably set it higher
431 // than what this function recommends.
433 var calibrate
= flag
.Bool("calibrate", false, "compute crossover for linear vs. binary search")
435 func TestCalibrate(t
*testing
.T
) {
440 if runtime
.GOARCH
== "amd64" {
441 fmt
.Printf("warning: running calibration on %s\n", runtime
.GOARCH
)
444 // Find the point where binary search wins by more than 10%.
445 // The 10% bias gives linear search an edge when they're close,
446 // because on predominantly ASCII inputs linear search is even
447 // better than our benchmarks measure.
448 n
:= sort
.Search(64, func(n
int) bool {
450 blinear
:= func(b
*testing
.B
) {
453 for i
:= 0; i
< b
.N
; i
++ {
454 for j
:= 0; j
<= max
; j
++ {
455 linear(tab
, uint16(j
))
459 bbinary
:= func(b
*testing
.B
) {
462 for i
:= 0; i
< b
.N
; i
++ {
463 for j
:= 0; j
<= max
; j
++ {
464 binary(tab
, uint16(j
))
468 bmlinear
:= testing
.Benchmark(blinear
)
469 bmbinary
:= testing
.Benchmark(bbinary
)
470 fmt
.Printf("n=%d: linear=%d binary=%d\n", n
, bmlinear
.NsPerOp(), bmbinary
.NsPerOp())
471 return bmlinear
.NsPerOp()*100 > bmbinary
.NsPerOp()*110
473 fmt
.Printf("calibration: linear cutoff = %d\n", n
)
476 func fakeTable(n
int) []Range16
{
478 for i
:= 0; i
< n
; i
++ {
479 r16
= append(r16
, Range16
{uint16(i
*5 + 10), uint16(i
*5 + 12), 1})
484 func linear(ranges
[]Range16
, r
uint16) bool {
485 for i
:= range ranges
{
491 return (r
-range_
.Lo
)%range_
.Stride
== 0
497 func binary(ranges
[]Range16
, r
uint16) bool {
498 // binary search over ranges
504 if range_
.Lo
<= r
&& r
<= range_
.Hi
{
505 return (r
-range_
.Lo
)%range_
.Stride
== 0
516 func TestLatinOffset(t
*testing
.T
) {
517 var maps
= []map[string]*RangeTable
{
524 for _
, m
:= range maps
{
525 for name
, tab
:= range m
{
527 for i
< len(tab
.R16
) && tab
.R16
[i
].Hi
<= MaxLatin1
{
530 if tab
.LatinOffset
!= i
{
531 t
.Errorf("%s: LatinOffset=%d, want %d", name
, tab
.LatinOffset
, i
)