Implement -freuse-stack= option
[official-gcc.git] / libgo / go / exp / norm / normalize_test.go
blob8b970598b4d1faed388067d6bda157eb1ec5afb4
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package norm
7 import (
8 "bytes"
9 "strings"
10 "testing"
13 type PositionTest struct {
14 input string
15 pos int
16 buffer string // expected contents of reorderBuffer, if applicable
19 type positionFunc func(rb *reorderBuffer, s string) int
21 func runPosTests(t *testing.T, name string, f Form, fn positionFunc, tests []PositionTest) {
22 rb := reorderBuffer{}
23 rb.init(f, nil)
24 for i, test := range tests {
25 rb.reset()
26 rb.src = inputString(test.input)
27 rb.nsrc = len(test.input)
28 pos := fn(&rb, test.input)
29 if pos != test.pos {
30 t.Errorf("%s:%d: position is %d; want %d", name, i, pos, test.pos)
32 runes := []rune(test.buffer)
33 if rb.nrune != len(runes) {
34 t.Errorf("%s:%d: reorder buffer lenght is %d; want %d", name, i, rb.nrune, len(runes))
35 continue
37 for j, want := range runes {
38 found := rune(rb.runeAt(j))
39 if found != want {
40 t.Errorf("%s:%d: rune at %d is %U; want %U", name, i, j, found, want)
46 var decomposeSegmentTests = []PositionTest{
47 // illegal runes
48 {"\xC0", 0, ""},
49 {"\u00E0\x80", 2, "\u0061\u0300"},
50 // starter
51 {"a", 1, "a"},
52 {"ab", 1, "a"},
53 // starter + composing
54 {"a\u0300", 3, "a\u0300"},
55 {"a\u0300b", 3, "a\u0300"},
56 // with decomposition
57 {"\u00C0", 2, "A\u0300"},
58 {"\u00C0b", 2, "A\u0300"},
59 // long
60 {strings.Repeat("\u0300", 31), 62, strings.Repeat("\u0300", 31)},
61 // ends with incomplete UTF-8 encoding
62 {"\xCC", 0, ""},
63 {"\u0300\xCC", 2, "\u0300"},
66 func decomposeSegmentF(rb *reorderBuffer, s string) int {
67 rb.src = inputString(s)
68 rb.nsrc = len(s)
69 return decomposeSegment(rb, 0)
72 func TestDecomposeSegment(t *testing.T) {
73 runPosTests(t, "TestDecomposeSegment", NFC, decomposeSegmentF, decomposeSegmentTests)
76 var firstBoundaryTests = []PositionTest{
77 // no boundary
78 {"", -1, ""},
79 {"\u0300", -1, ""},
80 {"\x80\x80", -1, ""},
81 // illegal runes
82 {"\xff", 0, ""},
83 {"\u0300\xff", 2, ""},
84 {"\u0300\xc0\x80\x80", 2, ""},
85 // boundaries
86 {"a", 0, ""},
87 {"\u0300a", 2, ""},
88 // Hangul
89 {"\u1103\u1161", 0, ""},
90 {"\u110B\u1173\u11B7", 0, ""},
91 {"\u1161\u110B\u1173\u11B7", 3, ""},
92 {"\u1173\u11B7\u1103\u1161", 6, ""},
93 // too many combining characters.
94 {strings.Repeat("\u0300", maxCombiningChars-1), -1, ""},
95 {strings.Repeat("\u0300", maxCombiningChars), 60, ""},
96 {strings.Repeat("\u0300", maxCombiningChars+1), 60, ""},
99 func firstBoundaryF(rb *reorderBuffer, s string) int {
100 return rb.f.form.FirstBoundary([]byte(s))
103 func firstBoundaryStringF(rb *reorderBuffer, s string) int {
104 return rb.f.form.FirstBoundaryInString(s)
107 func TestFirstBoundary(t *testing.T) {
108 runPosTests(t, "TestFirstBoundary", NFC, firstBoundaryF, firstBoundaryTests)
109 runPosTests(t, "TestFirstBoundaryInString", NFC, firstBoundaryStringF, firstBoundaryTests)
112 var decomposeToLastTests = []PositionTest{
113 // ends with inert character
114 {"Hello!", 6, ""},
115 {"\u0632", 2, ""},
116 {"a\u0301\u0635", 5, ""},
117 // ends with non-inert starter
118 {"a", 0, "a"},
119 {"a\u0301a", 3, "a"},
120 {"a\u0301\u03B9", 3, "\u03B9"},
121 {"a\u0327", 0, "a\u0327"},
122 // illegal runes
123 {"\xFF", 1, ""},
124 {"aa\xFF", 3, ""},
125 {"\xC0\x80\x80", 3, ""},
126 {"\xCC\x80\x80", 3, ""},
127 // ends with incomplete UTF-8 encoding
128 {"a\xCC", 2, ""},
129 // ends with combining characters
130 {"\u0300\u0301", 0, "\u0300\u0301"},
131 {"a\u0300\u0301", 0, "a\u0300\u0301"},
132 {"a\u0301\u0308", 0, "a\u0301\u0308"},
133 {"a\u0308\u0301", 0, "a\u0308\u0301"},
134 {"aaaa\u0300\u0301", 3, "a\u0300\u0301"},
135 {"\u0300a\u0300\u0301", 2, "a\u0300\u0301"},
136 {"\u00C0", 0, "A\u0300"},
137 {"a\u00C0", 1, "A\u0300"},
138 // decomposing
139 {"a\u0300\uFDC0", 3, "\u0645\u062C\u064A"},
140 {"\uFDC0" + strings.Repeat("\u0300", 26), 0, "\u0645\u062C\u064A" + strings.Repeat("\u0300", 26)},
141 // Hangul
142 {"a\u1103", 1, "\u1103"},
143 {"a\u110B", 1, "\u110B"},
144 {"a\u110B\u1173", 1, "\u110B\u1173"},
145 // See comment in composition.go:compBoundaryAfter.
146 {"a\u110B\u1173\u11B7", 1, "\u110B\u1173\u11B7"},
147 {"a\uC73C", 1, "\u110B\u1173"},
148 {"다음", 3, "\u110B\u1173\u11B7"},
149 {"다", 0, "\u1103\u1161"},
150 {"\u1103\u1161\u110B\u1173\u11B7", 6, "\u110B\u1173\u11B7"},
151 {"\u110B\u1173\u11B7\u1103\u1161", 9, "\u1103\u1161"},
152 {"다음음", 6, "\u110B\u1173\u11B7"},
153 {"음다다", 6, "\u1103\u1161"},
154 // buffer overflow
155 {"a" + strings.Repeat("\u0300", 30), 3, strings.Repeat("\u0300", 29)},
156 {"\uFDFA" + strings.Repeat("\u0300", 14), 3, strings.Repeat("\u0300", 14)},
157 // weird UTF-8
158 {"a\u0300\u11B7", 0, "a\u0300\u11B7"},
161 func decomposeToLast(rb *reorderBuffer, s string) int {
162 buf := decomposeToLastBoundary(rb, []byte(s))
163 return len(buf)
166 func TestDecomposeToLastBoundary(t *testing.T) {
167 runPosTests(t, "TestDecomposeToLastBoundary", NFKC, decomposeToLast, decomposeToLastTests)
170 var lastBoundaryTests = []PositionTest{
171 // ends with inert character
172 {"Hello!", 6, ""},
173 {"\u0632", 2, ""},
174 // ends with non-inert starter
175 {"a", 0, ""},
176 // illegal runes
177 {"\xff", 1, ""},
178 {"aa\xff", 3, ""},
179 {"a\xff\u0300", 1, ""},
180 {"\xc0\x80\x80", 3, ""},
181 {"\xc0\x80\x80\u0300", 3, ""},
182 // ends with incomplete UTF-8 encoding
183 {"\xCC", -1, ""},
184 {"\xE0\x80", -1, ""},
185 {"\xF0\x80\x80", -1, ""},
186 {"a\xCC", 0, ""},
187 {"\x80\xCC", 1, ""},
188 {"\xCC\xCC", 1, ""},
189 // ends with combining characters
190 {"a\u0300\u0301", 0, ""},
191 {"aaaa\u0300\u0301", 3, ""},
192 {"\u0300a\u0300\u0301", 2, ""},
193 {"\u00C0", 0, ""},
194 {"a\u00C0", 1, ""},
195 // decomposition may recombine
196 {"\u0226", 0, ""},
197 // no boundary
198 {"", -1, ""},
199 {"\u0300\u0301", -1, ""},
200 {"\u0300", -1, ""},
201 {"\x80\x80", -1, ""},
202 {"\x80\x80\u0301", -1, ""},
203 // Hangul
204 {"다음", 3, ""},
205 {"다", 0, ""},
206 {"\u1103\u1161\u110B\u1173\u11B7", 6, ""},
207 {"\u110B\u1173\u11B7\u1103\u1161", 9, ""},
208 // too many combining characters.
209 {strings.Repeat("\u0300", maxCombiningChars-1), -1, ""},
210 {strings.Repeat("\u0300", maxCombiningChars), 60, ""},
211 {strings.Repeat("\u0300", maxCombiningChars+1), 62, ""},
214 func lastBoundaryF(rb *reorderBuffer, s string) int {
215 return rb.f.form.LastBoundary([]byte(s))
218 func TestLastBoundary(t *testing.T) {
219 runPosTests(t, "TestLastBoundary", NFC, lastBoundaryF, lastBoundaryTests)
222 var quickSpanTests = []PositionTest{
223 {"", 0, ""},
224 // starters
225 {"a", 1, ""},
226 {"abc", 3, ""},
227 {"\u043Eb", 3, ""},
228 // incomplete last rune.
229 {"\xCC", 1, ""},
230 {"a\xCC", 2, ""},
231 // incorrectly ordered combining characters
232 {"\u0300\u0316", 0, ""},
233 {"\u0300\u0316cd", 0, ""},
234 // have a maximum number of combining characters.
235 {strings.Repeat("\u035D", 30) + "\u035B", 62, ""},
236 {"a" + strings.Repeat("\u035D", 30) + "\u035B", 63, ""},
237 {"Ɵ" + strings.Repeat("\u035D", 30) + "\u035B", 64, ""},
238 {"aa" + strings.Repeat("\u035D", 30) + "\u035B", 64, ""},
241 var quickSpanNFDTests = []PositionTest{
242 // needs decomposing
243 {"\u00C0", 0, ""},
244 {"abc\u00C0", 3, ""},
245 // correctly ordered combining characters
246 {"\u0300", 2, ""},
247 {"ab\u0300", 4, ""},
248 {"ab\u0300cd", 6, ""},
249 {"\u0300cd", 4, ""},
250 {"\u0316\u0300", 4, ""},
251 {"ab\u0316\u0300", 6, ""},
252 {"ab\u0316\u0300cd", 8, ""},
253 {"ab\u0316\u0300\u00C0", 6, ""},
254 {"\u0316\u0300cd", 6, ""},
255 {"\u043E\u0308b", 5, ""},
256 // incorrectly ordered combining characters
257 {"ab\u0300\u0316", 1, ""}, // TODO: we could skip 'b' as well.
258 {"ab\u0300\u0316cd", 1, ""},
259 // Hangul
260 {"같은", 0, ""},
263 var quickSpanNFCTests = []PositionTest{
264 // okay composed
265 {"\u00C0", 2, ""},
266 {"abc\u00C0", 5, ""},
267 // correctly ordered combining characters
268 {"ab\u0300", 1, ""},
269 {"ab\u0300cd", 1, ""},
270 {"ab\u0316\u0300", 1, ""},
271 {"ab\u0316\u0300cd", 1, ""},
272 {"\u00C0\u035D", 4, ""},
273 // we do not special case leading combining characters
274 {"\u0300cd", 0, ""},
275 {"\u0300", 0, ""},
276 {"\u0316\u0300", 0, ""},
277 {"\u0316\u0300cd", 0, ""},
278 // incorrectly ordered combining characters
279 {"ab\u0300\u0316", 1, ""},
280 {"ab\u0300\u0316cd", 1, ""},
281 // Hangul
282 {"같은", 6, ""},
285 func doQuickSpan(rb *reorderBuffer, s string) int {
286 return rb.f.form.QuickSpan([]byte(s))
289 func doQuickSpanString(rb *reorderBuffer, s string) int {
290 return rb.f.form.QuickSpanString(s)
293 func TestQuickSpan(t *testing.T) {
294 runPosTests(t, "TestQuickSpanNFD1", NFD, doQuickSpan, quickSpanTests)
295 runPosTests(t, "TestQuickSpanNFD2", NFD, doQuickSpan, quickSpanNFDTests)
296 runPosTests(t, "TestQuickSpanNFC1", NFC, doQuickSpan, quickSpanTests)
297 runPosTests(t, "TestQuickSpanNFC2", NFC, doQuickSpan, quickSpanNFCTests)
299 runPosTests(t, "TestQuickSpanStringNFD1", NFD, doQuickSpanString, quickSpanTests)
300 runPosTests(t, "TestQuickSpanStringNFD2", NFD, doQuickSpanString, quickSpanNFDTests)
301 runPosTests(t, "TestQuickSpanStringNFC1", NFC, doQuickSpanString, quickSpanTests)
302 runPosTests(t, "TestQuickSpanStringNFC2", NFC, doQuickSpanString, quickSpanNFCTests)
305 var isNormalTests = []PositionTest{
306 {"", 1, ""},
307 // illegal runes
308 {"\xff", 1, ""},
309 // starters
310 {"a", 1, ""},
311 {"abc", 1, ""},
312 {"\u043Eb", 1, ""},
313 // incorrectly ordered combining characters
314 {"\u0300\u0316", 0, ""},
315 {"ab\u0300\u0316", 0, ""},
316 {"ab\u0300\u0316cd", 0, ""},
317 {"\u0300\u0316cd", 0, ""},
319 var isNormalNFDTests = []PositionTest{
320 // needs decomposing
321 {"\u00C0", 0, ""},
322 {"abc\u00C0", 0, ""},
323 // correctly ordered combining characters
324 {"\u0300", 1, ""},
325 {"ab\u0300", 1, ""},
326 {"ab\u0300cd", 1, ""},
327 {"\u0300cd", 1, ""},
328 {"\u0316\u0300", 1, ""},
329 {"ab\u0316\u0300", 1, ""},
330 {"ab\u0316\u0300cd", 1, ""},
331 {"\u0316\u0300cd", 1, ""},
332 {"\u043E\u0308b", 1, ""},
333 // Hangul
334 {"같은", 0, ""},
336 var isNormalNFCTests = []PositionTest{
337 // okay composed
338 {"\u00C0", 1, ""},
339 {"abc\u00C0", 1, ""},
340 // need reordering
341 {"a\u0300", 0, ""},
342 {"a\u0300cd", 0, ""},
343 {"a\u0316\u0300", 0, ""},
344 {"a\u0316\u0300cd", 0, ""},
345 // correctly ordered combining characters
346 {"ab\u0300", 1, ""},
347 {"ab\u0300cd", 1, ""},
348 {"ab\u0316\u0300", 1, ""},
349 {"ab\u0316\u0300cd", 1, ""},
350 {"\u00C0\u035D", 1, ""},
351 {"\u0300", 1, ""},
352 {"\u0316\u0300cd", 1, ""},
353 // Hangul
354 {"같은", 1, ""},
357 func isNormalF(rb *reorderBuffer, s string) int {
358 if rb.f.form.IsNormal([]byte(s)) {
359 return 1
361 return 0
364 func TestIsNormal(t *testing.T) {
365 runPosTests(t, "TestIsNormalNFD1", NFD, isNormalF, isNormalTests)
366 runPosTests(t, "TestIsNormalNFD2", NFD, isNormalF, isNormalNFDTests)
367 runPosTests(t, "TestIsNormalNFC1", NFC, isNormalF, isNormalTests)
368 runPosTests(t, "TestIsNormalNFC2", NFC, isNormalF, isNormalNFCTests)
371 type AppendTest struct {
372 left string
373 right string
374 out string
377 type appendFunc func(f Form, out []byte, s string) []byte
379 func runAppendTests(t *testing.T, name string, f Form, fn appendFunc, tests []AppendTest) {
380 for i, test := range tests {
381 out := []byte(test.left)
382 out = fn(f, out, test.right)
383 outs := string(out)
384 if len(outs) != len(test.out) {
385 t.Errorf("%s:%d: length is %d; want %d", name, i, len(outs), len(test.out))
387 if outs != test.out {
388 // Find first rune that differs and show context.
389 ir := []rune(outs)
390 ig := []rune(test.out)
391 for j := 0; j < len(ir) && j < len(ig); j++ {
392 if ir[j] == ig[j] {
393 continue
395 if j -= 3; j < 0 {
396 j = 0
398 for e := j + 7; j < e && j < len(ir) && j < len(ig); j++ {
399 t.Errorf("%s:%d: runeAt(%d) = %U; want %U", name, i, j, ir[j], ig[j])
401 break
407 var appendTests = []AppendTest{
408 // empty buffers
409 {"", "", ""},
410 {"a", "", "a"},
411 {"", "a", "a"},
412 {"", "\u0041\u0307\u0304", "\u01E0"},
413 // segment split across buffers
414 {"", "a\u0300b", "\u00E0b"},
415 {"a", "\u0300b", "\u00E0b"},
416 {"a", "\u0300\u0316", "\u00E0\u0316"},
417 {"a", "\u0316\u0300", "\u00E0\u0316"},
418 {"a", "\u0300a\u0300", "\u00E0\u00E0"},
419 {"a", "\u0300a\u0300a\u0300", "\u00E0\u00E0\u00E0"},
420 {"a", "\u0300aaa\u0300aaa\u0300", "\u00E0aa\u00E0aa\u00E0"},
421 {"a\u0300", "\u0327", "\u00E0\u0327"},
422 {"a\u0327", "\u0300", "\u00E0\u0327"},
423 {"a\u0316", "\u0300", "\u00E0\u0316"},
424 {"\u0041\u0307", "\u0304", "\u01E0"},
425 // Hangul
426 {"", "\u110B\u1173", "\uC73C"},
427 {"", "\u1103\u1161", "\uB2E4"},
428 {"", "\u110B\u1173\u11B7", "\uC74C"},
429 {"", "\u320E", "\x28\uAC00\x29"},
430 {"", "\x28\u1100\u1161\x29", "\x28\uAC00\x29"},
431 {"\u1103", "\u1161", "\uB2E4"},
432 {"\u110B", "\u1173\u11B7", "\uC74C"},
433 {"\u110B\u1173", "\u11B7", "\uC74C"},
434 {"\uC73C", "\u11B7", "\uC74C"},
435 // UTF-8 encoding split across buffers
436 {"a\xCC", "\x80", "\u00E0"},
437 {"a\xCC", "\x80b", "\u00E0b"},
438 {"a\xCC", "\x80a\u0300", "\u00E0\u00E0"},
439 {"a\xCC", "\x80\x80", "\u00E0\x80"},
440 {"a\xCC", "\x80\xCC", "\u00E0\xCC"},
441 {"a\u0316\xCC", "\x80a\u0316\u0300", "\u00E0\u0316\u00E0\u0316"},
442 // ending in incomplete UTF-8 encoding
443 {"", "\xCC", "\xCC"},
444 {"a", "\xCC", "a\xCC"},
445 {"a", "b\xCC", "ab\xCC"},
446 {"\u0226", "\xCC", "\u0226\xCC"},
447 // illegal runes
448 {"", "\x80", "\x80"},
449 {"", "\x80\x80\x80", "\x80\x80\x80"},
450 {"", "\xCC\x80\x80\x80", "\xCC\x80\x80\x80"},
451 {"", "a\x80", "a\x80"},
452 {"", "a\x80\x80\x80", "a\x80\x80\x80"},
453 {"", "a\x80\x80\x80\x80\x80\x80", "a\x80\x80\x80\x80\x80\x80"},
454 {"a", "\x80\x80\x80", "a\x80\x80\x80"},
455 // overflow
456 {"", strings.Repeat("\x80", 33), strings.Repeat("\x80", 33)},
457 {strings.Repeat("\x80", 33), "", strings.Repeat("\x80", 33)},
458 {strings.Repeat("\x80", 33), strings.Repeat("\x80", 33), strings.Repeat("\x80", 66)},
459 // overflow of combining characters
460 {strings.Repeat("\u0300", 33), "", strings.Repeat("\u0300", 33)},
461 // weird UTF-8
462 {"\u00E0\xE1", "\x86", "\u00E0\xE1\x86"},
463 {"a\u0300\u11B7", "\u0300", "\u00E0\u11B7\u0300"},
464 {"a\u0300\u11B7\u0300", "\u0300", "\u00E0\u11B7\u0300\u0300"},
465 {"\u0300", "\xF8\x80\x80\x80\x80\u0300", "\u0300\xF8\x80\x80\x80\x80\u0300"},
466 {"\u0300", "\xFC\x80\x80\x80\x80\x80\u0300", "\u0300\xFC\x80\x80\x80\x80\x80\u0300"},
467 {"\xF8\x80\x80\x80\x80\u0300", "\u0300", "\xF8\x80\x80\x80\x80\u0300\u0300"},
468 {"\xFC\x80\x80\x80\x80\x80\u0300", "\u0300", "\xFC\x80\x80\x80\x80\x80\u0300\u0300"},
469 {"\xF8\x80\x80\x80", "\x80\u0300\u0300", "\xF8\x80\x80\x80\x80\u0300\u0300"},
472 func appendF(f Form, out []byte, s string) []byte {
473 return f.Append(out, []byte(s)...)
476 func appendStringF(f Form, out []byte, s string) []byte {
477 return f.AppendString(out, s)
480 func bytesF(f Form, out []byte, s string) []byte {
481 buf := []byte{}
482 buf = append(buf, out...)
483 buf = append(buf, s...)
484 return f.Bytes(buf)
487 func stringF(f Form, out []byte, s string) []byte {
488 outs := string(out) + s
489 return []byte(f.String(outs))
492 func TestAppend(t *testing.T) {
493 runAppendTests(t, "TestAppend", NFKC, appendF, appendTests)
494 runAppendTests(t, "TestAppendString", NFKC, appendStringF, appendTests)
495 runAppendTests(t, "TestBytes", NFKC, bytesF, appendTests)
496 runAppendTests(t, "TestString", NFKC, stringF, appendTests)
499 func appendBench(f Form, in []byte) func() {
500 buf := make([]byte, 0, 4*len(in))
501 return func() {
502 f.Append(buf, in...)
506 func iterBench(f Form, in []byte) func() {
507 buf := make([]byte, 4*len(in))
508 iter := Iter{}
509 return func() {
510 iter.SetInput(f, in)
511 for !iter.Done() {
512 iter.Next(buf)
517 func appendBenchmarks(bm []func(), f Form, in []byte) []func() {
518 //bm = append(bm, appendBench(f, in))
519 bm = append(bm, iterBench(f, in))
520 return bm
523 func doFormBenchmark(b *testing.B, inf, f Form, s string) {
524 b.StopTimer()
525 in := inf.Bytes([]byte(s))
526 bm := appendBenchmarks(nil, f, in)
527 b.SetBytes(int64(len(in) * len(bm)))
528 b.StartTimer()
529 for i := 0; i < b.N; i++ {
530 for _, fn := range bm {
531 fn()
536 var ascii = strings.Repeat("There is nothing to change here! ", 500)
538 func BenchmarkNormalizeAsciiNFC(b *testing.B) {
539 doFormBenchmark(b, NFC, NFC, ascii)
541 func BenchmarkNormalizeAsciiNFD(b *testing.B) {
542 doFormBenchmark(b, NFC, NFD, ascii)
544 func BenchmarkNormalizeAsciiNFKC(b *testing.B) {
545 doFormBenchmark(b, NFC, NFKC, ascii)
547 func BenchmarkNormalizeAsciiNFKD(b *testing.B) {
548 doFormBenchmark(b, NFC, NFKD, ascii)
551 func BenchmarkNormalizeNFC2NFC(b *testing.B) {
552 doFormBenchmark(b, NFC, NFC, txt_all)
554 func BenchmarkNormalizeNFC2NFD(b *testing.B) {
555 doFormBenchmark(b, NFC, NFD, txt_all)
557 func BenchmarkNormalizeNFD2NFC(b *testing.B) {
558 doFormBenchmark(b, NFD, NFC, txt_all)
560 func BenchmarkNormalizeNFD2NFD(b *testing.B) {
561 doFormBenchmark(b, NFD, NFD, txt_all)
564 // Hangul is often special-cased, so we test it separately.
565 func BenchmarkNormalizeHangulNFC2NFC(b *testing.B) {
566 doFormBenchmark(b, NFC, NFC, txt_kr)
568 func BenchmarkNormalizeHangulNFC2NFD(b *testing.B) {
569 doFormBenchmark(b, NFC, NFD, txt_kr)
571 func BenchmarkNormalizeHangulNFD2NFC(b *testing.B) {
572 doFormBenchmark(b, NFD, NFC, txt_kr)
574 func BenchmarkNormalizeHangulNFD2NFD(b *testing.B) {
575 doFormBenchmark(b, NFD, NFD, txt_kr)
578 var forms = []Form{NFC, NFD, NFKC, NFKD}
580 func doTextBenchmark(b *testing.B, s string) {
581 b.StopTimer()
582 in := []byte(s)
583 bm := []func(){}
584 for _, f := range forms {
585 bm = appendBenchmarks(bm, f, in)
587 b.SetBytes(int64(len(s) * len(bm)))
588 b.StartTimer()
589 for i := 0; i < b.N; i++ {
590 for _, f := range bm {
596 func BenchmarkCanonicalOrdering(b *testing.B) {
597 doTextBenchmark(b, txt_canon)
599 func BenchmarkExtendedLatin(b *testing.B) {
600 doTextBenchmark(b, txt_vn)
602 func BenchmarkMiscTwoByteUtf8(b *testing.B) {
603 doTextBenchmark(b, twoByteUtf8)
605 func BenchmarkMiscThreeByteUtf8(b *testing.B) {
606 doTextBenchmark(b, threeByteUtf8)
608 func BenchmarkHangul(b *testing.B) {
609 doTextBenchmark(b, txt_kr)
611 func BenchmarkJapanese(b *testing.B) {
612 doTextBenchmark(b, txt_jp)
614 func BenchmarkChinese(b *testing.B) {
615 doTextBenchmark(b, txt_cn)
617 func BenchmarkOverflow(b *testing.B) {
618 doTextBenchmark(b, overflow)
621 var overflow = string(bytes.Repeat([]byte("\u035D"), 4096)) + "\u035B"
623 // Tests sampled from the Canonical ordering tests (Part 2) of
624 // http://unicode.org/Public/UNIDATA/NormalizationTest.txt
625 const txt_canon = `\u0061\u0315\u0300\u05AE\u0300\u0062 \u0061\u0300\u0315\u0300\u05AE\u0062
626 \u0061\u0302\u0315\u0300\u05AE\u0062 \u0061\u0307\u0315\u0300\u05AE\u0062
627 \u0061\u0315\u0300\u05AE\u030A\u0062 \u0061\u059A\u0316\u302A\u031C\u0062
628 \u0061\u032E\u059A\u0316\u302A\u0062 \u0061\u0338\u093C\u0334\u0062
629 \u0061\u059A\u0316\u302A\u0339 \u0061\u0341\u0315\u0300\u05AE\u0062
630 \u0061\u0348\u059A\u0316\u302A\u0062 \u0061\u0361\u0345\u035D\u035C\u0062
631 \u0061\u0366\u0315\u0300\u05AE\u0062 \u0061\u0315\u0300\u05AE\u0486\u0062
632 \u0061\u05A4\u059A\u0316\u302A\u0062 \u0061\u0315\u0300\u05AE\u0613\u0062
633 \u0061\u0315\u0300\u05AE\u0615\u0062 \u0061\u0617\u0315\u0300\u05AE\u0062
634 \u0061\u0619\u0618\u064D\u064E\u0062 \u0061\u0315\u0300\u05AE\u0654\u0062
635 \u0061\u0315\u0300\u05AE\u06DC\u0062 \u0061\u0733\u0315\u0300\u05AE\u0062
636 \u0061\u0744\u059A\u0316\u302A\u0062 \u0061\u0315\u0300\u05AE\u0745\u0062
637 \u0061\u09CD\u05B0\u094D\u3099\u0062 \u0061\u0E38\u0E48\u0E38\u0C56\u0062
638 \u0061\u0EB8\u0E48\u0E38\u0E49\u0062 \u0061\u0F72\u0F71\u0EC8\u0F71\u0062
639 \u0061\u1039\u05B0\u094D\u3099\u0062 \u0061\u05B0\u094D\u3099\u1A60\u0062
640 \u0061\u3099\u093C\u0334\u1BE6\u0062 \u0061\u3099\u093C\u0334\u1C37\u0062
641 \u0061\u1CD9\u059A\u0316\u302A\u0062 \u0061\u2DED\u0315\u0300\u05AE\u0062
642 \u0061\u2DEF\u0315\u0300\u05AE\u0062 \u0061\u302D\u302E\u059A\u0316\u0062`
644 // Taken from http://creativecommons.org/licenses/by-sa/3.0/vn/
645 const txt_vn = `Với các điều kiện sau: Ghi nhận công của tác giả.
646 Nếu bạn sử dụng, chuyển đổi, hoặc xây dựng dự án từ
647 nội dung được chia sẻ này, bạn phải áp dụng giấy phép này hoặc
648 một giấy phép khác có các điều khoản tương tự như giấy phép này
649 cho dự án của bạn. Hiểu rằng: Miễn — Bất kỳ các điều kiện nào
650 trên đây cũng có thể được miễn bỏ nếu bạn được sự cho phép của
651 người sở hữu bản quyền. Phạm vi công chúng — Khi tác phẩm hoặc
652 bất kỳ chương nào của tác phẩm đã trong vùng dành cho công
653 chúng theo quy định của pháp luật thì tình trạng của nó không
654 bị ảnh hưởng bởi giấy phép trong bất kỳ trường hợp nào.`
656 // Taken from http://creativecommons.org/licenses/by-sa/1.0/deed.ru
657 const txt_ru = `При обязательном соблюдении следующих условий:
658 Attribution — Вы должны атрибутировать произведение (указывать
659 автора и источник) в порядке, предусмотренном автором или
660 лицензиаром (но только так, чтобы никоим образом не подразумевалось,
661 что они поддерживают вас или использование вами данного произведения).
662 Υπό τις ακόλουθες προϋποθέσεις:`
664 // Taken from http://creativecommons.org/licenses/by-sa/3.0/gr/
665 const txt_gr = `Αναφορά Δημιουργού — Θα πρέπει να κάνετε την αναφορά στο έργο με τον
666 τρόπο που έχει οριστεί από το δημιουργό ή το χορηγούντο την άδεια
667 (χωρίς όμως να εννοείται με οποιονδήποτε τρόπο ότι εγκρίνουν εσάς ή
668 τη χρήση του έργου από εσάς). Παρόμοια Διανομή — Εάν αλλοιώσετε,
669 τροποποιήσετε ή δημιουργήσετε περαιτέρω βασισμένοι στο έργο θα
670 μπορείτε να διανέμετε το έργο που θα προκύψει μόνο με την ίδια ή
671 παρόμοια άδεια.`
673 // Taken from http://creativecommons.org/licenses/by-sa/3.0/deed.ar
674 const txt_ar = `بموجب الشروط التالية نسب المصنف — يجب عليك أن
675 تنسب العمل بالطريقة التي تحددها المؤلف أو المرخص (ولكن ليس بأي حال من
676 الأحوال أن توحي وتقترح بتحول أو استخدامك للعمل).
677 المشاركة على قدم المساواة — إذا كنت يعدل ، والتغيير ، أو الاستفادة
678 من هذا العمل ، قد ينتج عن توزيع العمل إلا في ظل تشابه او تطابق فى واحد
679 لهذا الترخيص.`
681 // Taken from http://creativecommons.org/licenses/by-sa/1.0/il/
682 const txt_il = `בכפוף לתנאים הבאים: ייחוס — עליך לייחס את היצירה (לתת קרדיט) באופן
683 המצויין על-ידי היוצר או מעניק הרישיון (אך לא בשום אופן המרמז על כך
684 שהם תומכים בך או בשימוש שלך ביצירה). שיתוף זהה — אם תחליט/י לשנות,
685 לעבד או ליצור יצירה נגזרת בהסתמך על יצירה זו, תוכל/י להפיץ את יצירתך
686 החדשה רק תחת אותו הרישיון או רישיון דומה לרישיון זה.`
688 const twoByteUtf8 = txt_ru + txt_gr + txt_ar + txt_il
690 // Taken from http://creativecommons.org/licenses/by-sa/2.0/kr/
691 const txt_kr = `다음과 같은 조건을 따라야 합니다: 저작자표시
692 (Attribution) — 저작자나 이용허락자가 정한 방법으로 저작물의
693 원저작자를 표시하여야 합니다(그러나 원저작자가 이용자나 이용자의
694 이용을 보증하거나 추천한다는 의미로 표시해서는 안됩니다).
695 동일조건변경허락 — 이 저작물을 이용하여 만든 이차적 저작물에는 본
696 라이선스와 동일한 라이선스를 적용해야 합니다.`
698 // Taken from http://creativecommons.org/licenses/by-sa/3.0/th/
699 const txt_th = `ภายใต้เงื่อนไข ดังต่อไปนี้ : แสดงที่มา — คุณต้องแสดงที่
700 มาของงานดังกล่าว ตามรูปแบบที่ผู้สร้างสรรค์หรือผู้อนุญาตกำหนด (แต่
701 ไม่ใช่ในลักษณะที่ว่า พวกเขาสนับสนุนคุณหรือสนับสนุนการที่
702 คุณนำงานไปใช้) อนุญาตแบบเดียวกัน — หากคุณดัดแปลง เปลี่ยนรูป หรื
703 อต่อเติมงานนี้ คุณต้องใช้สัญญาอนุญาตแบบเดียวกันหรือแบบที่เหมื
704 อนกับสัญญาอนุญาตที่ใช้กับงานนี้เท่านั้น`
706 const threeByteUtf8 = txt_th
708 // Taken from http://creativecommons.org/licenses/by-sa/2.0/jp/
709 const txt_jp = `あなたの従うべき条件は以下の通りです。
710 表示 — あなたは原著作者のクレジットを表示しなければなりません。
711 継承 — もしあなたがこの作品を改変、変形または加工した場合、
712 あなたはその結果生じた作品をこの作品と同一の許諾条件の下でのみ
713 頒布することができます。`
715 // http://creativecommons.org/licenses/by-sa/2.5/cn/
716 const txt_cn = `您可以自由: 复制、发行、展览、表演、放映、
717 广播或通过信息网络传播本作品 创作演绎作品
718 对本作品进行商业性使用 惟须遵守下列条件:
719 署名 — 您必须按照作者或者许可人指定的方式对作品进行署名。
720 相同方式共享 — 如果您改变、转换本作品或者以本作品为基础进行创作,
721 您只能采用与本协议相同的许可协议发布基于本作品的演绎作品。`
723 const txt_cjk = txt_cn + txt_jp + txt_kr
724 const txt_all = txt_vn + twoByteUtf8 + threeByteUtf8 + txt_cjk