1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
16 var upperTest
= []rune
{
41 var notupperTest
= []rune
{
55 var letterTest
= []rune
{
90 var notletterTest
= []rune
{
102 // Contains all the special cased Latin-1 chars.
103 var spaceTest
= []rune
{
121 var caseTest
= []caseT
{
125 {UpperCase
, 1 << 30, 1 << 30},
127 // ASCII (special-cased so test carefully)
128 {UpperCase
, '\n', '\n'},
129 {UpperCase
, 'a', 'A'},
130 {UpperCase
, 'A', 'A'},
131 {UpperCase
, '7', '7'},
132 {LowerCase
, '\n', '\n'},
133 {LowerCase
, 'a', 'a'},
134 {LowerCase
, 'A', 'a'},
135 {LowerCase
, '7', '7'},
136 {TitleCase
, '\n', '\n'},
137 {TitleCase
, 'a', 'A'},
138 {TitleCase
, 'A', 'A'},
139 {TitleCase
, '7', '7'},
141 // Latin-1: easy to read the tests!
142 {UpperCase
, 0x80, 0x80},
143 {UpperCase
, 'Å', 'Å'},
144 {UpperCase
, 'å', 'Å'},
145 {LowerCase
, 0x80, 0x80},
146 {LowerCase
, 'Å', 'å'},
147 {LowerCase
, 'å', 'å'},
148 {TitleCase
, 0x80, 0x80},
149 {TitleCase
, 'Å', 'Å'},
150 {TitleCase
, 'å', 'Å'},
152 // 0131;LATIN SMALL LETTER DOTLESS I;Ll;0;L;;;;;N;;;0049;;0049
153 {UpperCase
, 0x0131, 'I'},
154 {LowerCase
, 0x0131, 0x0131},
155 {TitleCase
, 0x0131, 'I'},
157 // 0133;LATIN SMALL LIGATURE IJ;Ll;0;L;<compat> 0069 006A;;;;N;LATIN SMALL LETTER I J;;0132;;0132
158 {UpperCase
, 0x0133, 0x0132},
159 {LowerCase
, 0x0133, 0x0133},
160 {TitleCase
, 0x0133, 0x0132},
162 // 212A;KELVIN SIGN;Lu;0;L;004B;;;;N;DEGREES KELVIN;;;006B;
163 {UpperCase
, 0x212A, 0x212A},
164 {LowerCase
, 0x212A, 'k'},
165 {TitleCase
, 0x212A, 0x212A},
167 // From an UpperLower sequence
168 // A640;CYRILLIC CAPITAL LETTER ZEMLYA;Lu;0;L;;;;;N;;;;A641;
169 {UpperCase
, 0xA640, 0xA640},
170 {LowerCase
, 0xA640, 0xA641},
171 {TitleCase
, 0xA640, 0xA640},
172 // A641;CYRILLIC SMALL LETTER ZEMLYA;Ll;0;L;;;;;N;;;A640;;A640
173 {UpperCase
, 0xA641, 0xA640},
174 {LowerCase
, 0xA641, 0xA641},
175 {TitleCase
, 0xA641, 0xA640},
176 // A64E;CYRILLIC CAPITAL LETTER NEUTRAL YER;Lu;0;L;;;;;N;;;;A64F;
177 {UpperCase
, 0xA64E, 0xA64E},
178 {LowerCase
, 0xA64E, 0xA64F},
179 {TitleCase
, 0xA64E, 0xA64E},
180 // A65F;CYRILLIC SMALL LETTER YN;Ll;0;L;;;;;N;;;A65E;;A65E
181 {UpperCase
, 0xA65F, 0xA65E},
182 {LowerCase
, 0xA65F, 0xA65F},
183 {TitleCase
, 0xA65F, 0xA65E},
185 // From another UpperLower sequence
186 // 0139;LATIN CAPITAL LETTER L WITH ACUTE;Lu;0;L;004C 0301;;;;N;LATIN CAPITAL LETTER L ACUTE;;;013A;
187 {UpperCase
, 0x0139, 0x0139},
188 {LowerCase
, 0x0139, 0x013A},
189 {TitleCase
, 0x0139, 0x0139},
190 // 013F;LATIN CAPITAL LETTER L WITH MIDDLE DOT;Lu;0;L;<compat> 004C 00B7;;;;N;;;;0140;
191 {UpperCase
, 0x013f, 0x013f},
192 {LowerCase
, 0x013f, 0x0140},
193 {TitleCase
, 0x013f, 0x013f},
194 // 0148;LATIN SMALL LETTER N WITH CARON;Ll;0;L;006E 030C;;;;N;LATIN SMALL LETTER N HACEK;;0147;;0147
195 {UpperCase
, 0x0148, 0x0147},
196 {LowerCase
, 0x0148, 0x0148},
197 {TitleCase
, 0x0148, 0x0147},
199 // Lowercase lower than uppercase.
200 // AB78;CHEROKEE SMALL LETTER GE;Ll;0;L;;;;;N;;;13A8;;13A8
201 {UpperCase
, 0xab78, 0x13a8},
202 {LowerCase
, 0xab78, 0xab78},
203 {TitleCase
, 0xab78, 0x13a8},
204 {UpperCase
, 0x13a8, 0x13a8},
205 {LowerCase
, 0x13a8, 0xab78},
206 {TitleCase
, 0x13a8, 0x13a8},
208 // Last block in the 5.1.0 table
209 // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
210 {UpperCase
, 0x10400, 0x10400},
211 {LowerCase
, 0x10400, 0x10428},
212 {TitleCase
, 0x10400, 0x10400},
213 // 10427;DESERET CAPITAL LETTER EW;Lu;0;L;;;;;N;;;;1044F;
214 {UpperCase
, 0x10427, 0x10427},
215 {LowerCase
, 0x10427, 0x1044F},
216 {TitleCase
, 0x10427, 0x10427},
217 // 10428;DESERET SMALL LETTER LONG I;Ll;0;L;;;;;N;;;10400;;10400
218 {UpperCase
, 0x10428, 0x10400},
219 {LowerCase
, 0x10428, 0x10428},
220 {TitleCase
, 0x10428, 0x10400},
221 // 1044F;DESERET SMALL LETTER EW;Ll;0;L;;;;;N;;;10427;;10427
222 {UpperCase
, 0x1044F, 0x10427},
223 {LowerCase
, 0x1044F, 0x1044F},
224 {TitleCase
, 0x1044F, 0x10427},
226 // First one not in the 5.1.0 table
227 // 10450;SHAVIAN LETTER PEEP;Lo;0;L;;;;;N;;;;;
228 {UpperCase
, 0x10450, 0x10450},
229 {LowerCase
, 0x10450, 0x10450},
230 {TitleCase
, 0x10450, 0x10450},
232 // Non-letters with case.
233 {LowerCase
, 0x2161, 0x2171},
234 {UpperCase
, 0x0345, 0x0399},
237 func TestIsLetter(t
*testing
.T
) {
238 for _
, r
:= range upperTest
{
240 t
.Errorf("IsLetter(U+%04X) = false, want true", r
)
243 for _
, r
:= range letterTest
{
245 t
.Errorf("IsLetter(U+%04X) = false, want true", r
)
248 for _
, r
:= range notletterTest
{
250 t
.Errorf("IsLetter(U+%04X) = true, want false", r
)
255 func TestIsUpper(t
*testing
.T
) {
256 for _
, r
:= range upperTest
{
258 t
.Errorf("IsUpper(U+%04X) = false, want true", r
)
261 for _
, r
:= range notupperTest
{
263 t
.Errorf("IsUpper(U+%04X) = true, want false", r
)
266 for _
, r
:= range notletterTest
{
268 t
.Errorf("IsUpper(U+%04X) = true, want false", r
)
273 func caseString(c
int) string {
285 func TestTo(t
*testing
.T
) {
286 for _
, c
:= range caseTest
{
289 t
.Errorf("To(U+%04X, %s) = U+%04X want U+%04X", c
.in
, caseString(c
.cas
), r
, c
.out
)
294 func TestToUpperCase(t
*testing
.T
) {
295 for _
, c
:= range caseTest
{
296 if c
.cas
!= UpperCase
{
301 t
.Errorf("ToUpper(U+%04X) = U+%04X want U+%04X", c
.in
, r
, c
.out
)
306 func TestToLowerCase(t
*testing
.T
) {
307 for _
, c
:= range caseTest
{
308 if c
.cas
!= LowerCase
{
313 t
.Errorf("ToLower(U+%04X) = U+%04X want U+%04X", c
.in
, r
, c
.out
)
318 func TestToTitleCase(t
*testing
.T
) {
319 for _
, c
:= range caseTest
{
320 if c
.cas
!= TitleCase
{
325 t
.Errorf("ToTitle(U+%04X) = U+%04X want U+%04X", c
.in
, r
, c
.out
)
330 func TestIsSpace(t
*testing
.T
) {
331 for _
, c
:= range spaceTest
{
333 t
.Errorf("IsSpace(U+%04X) = false; want true", c
)
336 for _
, c
:= range letterTest
{
338 t
.Errorf("IsSpace(U+%04X) = true; want false", c
)
343 // Check that the optimizations for IsLetter etc. agree with the tables.
344 // We only need to check the Latin-1 range.
345 func TestLetterOptimizations(t
*testing
.T
) {
346 for i
:= rune(0); i
<= MaxLatin1
; i
++ {
347 if Is(Letter
, i
) != IsLetter(i
) {
348 t
.Errorf("IsLetter(U+%04X) disagrees with Is(Letter)", i
)
350 if Is(Upper
, i
) != IsUpper(i
) {
351 t
.Errorf("IsUpper(U+%04X) disagrees with Is(Upper)", i
)
353 if Is(Lower
, i
) != IsLower(i
) {
354 t
.Errorf("IsLower(U+%04X) disagrees with Is(Lower)", i
)
356 if Is(Title
, i
) != IsTitle(i
) {
357 t
.Errorf("IsTitle(U+%04X) disagrees with Is(Title)", i
)
359 if Is(White_Space
, i
) != IsSpace(i
) {
360 t
.Errorf("IsSpace(U+%04X) disagrees with Is(White_Space)", i
)
362 if To(UpperCase
, i
) != ToUpper(i
) {
363 t
.Errorf("ToUpper(U+%04X) disagrees with To(Upper)", i
)
365 if To(LowerCase
, i
) != ToLower(i
) {
366 t
.Errorf("ToLower(U+%04X) disagrees with To(Lower)", i
)
368 if To(TitleCase
, i
) != ToTitle(i
) {
369 t
.Errorf("ToTitle(U+%04X) disagrees with To(Title)", i
)
374 func TestTurkishCase(t
*testing
.T
) {
375 lower
:= []rune("abcçdefgğhıijklmnoöprsştuüvyz")
376 upper
:= []rune("ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ")
377 for i
, l
:= range lower
{
379 if TurkishCase
.ToLower(l
) != l
{
380 t
.Errorf("lower(U+%04X) is U+%04X not U+%04X", l
, TurkishCase
.ToLower(l
), l
)
382 if TurkishCase
.ToUpper(u
) != u
{
383 t
.Errorf("upper(U+%04X) is U+%04X not U+%04X", u
, TurkishCase
.ToUpper(u
), u
)
385 if TurkishCase
.ToUpper(l
) != u
{
386 t
.Errorf("upper(U+%04X) is U+%04X not U+%04X", l
, TurkishCase
.ToUpper(l
), u
)
388 if TurkishCase
.ToLower(u
) != l
{
389 t
.Errorf("lower(U+%04X) is U+%04X not U+%04X", u
, TurkishCase
.ToLower(l
), l
)
391 if TurkishCase
.ToTitle(u
) != u
{
392 t
.Errorf("title(U+%04X) is U+%04X not U+%04X", u
, TurkishCase
.ToTitle(u
), u
)
394 if TurkishCase
.ToTitle(l
) != u
{
395 t
.Errorf("title(U+%04X) is U+%04X not U+%04X", l
, TurkishCase
.ToTitle(l
), u
)
400 var simpleFoldTests
= []string{
401 // SimpleFold(x) returns the next equivalent rune > x or wraps
402 // around to smaller values.
408 // ASCII special cases.
412 // Non-ASCII special cases.
416 // Extra special cases: has lower/upper but no case fold.
420 // Upper comes before lower (Cherokee).
424 func TestSimpleFold(t
*testing
.T
) {
425 for _
, tt
:= range simpleFoldTests
{
427 r
:= cycle
[len(cycle
)-1]
428 for _
, out
:= range cycle
{
429 if r
:= SimpleFold(r
); r
!= out
{
430 t
.Errorf("SimpleFold(%#U) = %#U, want %#U", r
, r
, out
)
437 // Running 'go test -calibrate' runs the calibration to find a plausible
438 // cutoff point for linear search of a range list vs. binary search.
439 // We create a fake table and then time how long it takes to do a
440 // sequence of searches within that table, for all possible inputs
441 // relative to the ranges (something before all, in each, between each, after all).
442 // This assumes that all possible runes are equally likely.
443 // In practice most runes are ASCII so this is a conservative estimate
444 // of an effective cutoff value. In practice we could probably set it higher
445 // than what this function recommends.
447 var calibrate
= flag
.Bool("calibrate", false, "compute crossover for linear vs. binary search")
449 func TestCalibrate(t
*testing
.T
) {
454 if runtime
.GOARCH
== "amd64" {
455 fmt
.Printf("warning: running calibration on %s\n", runtime
.GOARCH
)
458 // Find the point where binary search wins by more than 10%.
459 // The 10% bias gives linear search an edge when they're close,
460 // because on predominantly ASCII inputs linear search is even
461 // better than our benchmarks measure.
462 n
:= sort
.Search(64, func(n
int) bool {
464 blinear
:= func(b
*testing
.B
) {
467 for i
:= 0; i
< b
.N
; i
++ {
468 for j
:= 0; j
<= max
; j
++ {
469 linear(tab
, uint16(j
))
473 bbinary
:= func(b
*testing
.B
) {
476 for i
:= 0; i
< b
.N
; i
++ {
477 for j
:= 0; j
<= max
; j
++ {
478 binary(tab
, uint16(j
))
482 bmlinear
:= testing
.Benchmark(blinear
)
483 bmbinary
:= testing
.Benchmark(bbinary
)
484 fmt
.Printf("n=%d: linear=%d binary=%d\n", n
, bmlinear
.NsPerOp(), bmbinary
.NsPerOp())
485 return bmlinear
.NsPerOp()*100 > bmbinary
.NsPerOp()*110
487 fmt
.Printf("calibration: linear cutoff = %d\n", n
)
490 func fakeTable(n
int) []Range16
{
492 for i
:= 0; i
< n
; i
++ {
493 r16
= append(r16
, Range16
{uint16(i
*5 + 10), uint16(i
*5 + 12), 1})
498 func linear(ranges
[]Range16
, r
uint16) bool {
499 for i
:= range ranges
{
505 return (r
-range_
.Lo
)%range_
.Stride
== 0
511 func binary(ranges
[]Range16
, r
uint16) bool {
512 // binary search over ranges
518 if range_
.Lo
<= r
&& r
<= range_
.Hi
{
519 return (r
-range_
.Lo
)%range_
.Stride
== 0
530 func TestLatinOffset(t
*testing
.T
) {
531 var maps
= []map[string]*RangeTable
{
538 for _
, m
:= range maps
{
539 for name
, tab
:= range m
{
541 for i
< len(tab
.R16
) && tab
.R16
[i
].Hi
<= MaxLatin1
{
544 if tab
.LatinOffset
!= i
{
545 t
.Errorf("%s: LatinOffset=%d, want %d", name
, tab
.LatinOffset
, i
)