* gcc.dg/guality/guality.exp: Skip on AIX.
[official-gcc.git] / libgo / go / strings / replace_test.go
blobd33dea95b09bedae242c195a5f13c1c1c312a91a
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package strings_test
7 import (
8 "bytes"
9 "fmt"
10 . "strings"
11 "testing"
14 var htmlEscaper = NewReplacer(
15 "&", "&",
16 "<", "&lt;",
17 ">", "&gt;",
18 `"`, "&quot;",
19 "'", "&apos;",
22 var htmlUnescaper = NewReplacer(
23 "&amp;", "&",
24 "&lt;", "<",
25 "&gt;", ">",
26 "&quot;", `"`,
27 "&apos;", "'",
30 // The http package's old HTML escaping function.
31 func oldHTMLEscape(s string) string {
32 s = Replace(s, "&", "&amp;", -1)
33 s = Replace(s, "<", "&lt;", -1)
34 s = Replace(s, ">", "&gt;", -1)
35 s = Replace(s, `"`, "&quot;", -1)
36 s = Replace(s, "'", "&apos;", -1)
37 return s
40 var capitalLetters = NewReplacer("a", "A", "b", "B")
42 // TestReplacer tests the replacer implementations.
43 func TestReplacer(t *testing.T) {
44 type testCase struct {
45 r *Replacer
46 in, out string
48 var testCases []testCase
50 // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
51 str := func(b byte) string {
52 return string([]byte{b})
54 var s []string
56 // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
57 s = nil
58 for i := 0; i < 256; i++ {
59 s = append(s, str(byte(i)), str(byte(i+1)))
61 inc := NewReplacer(s...)
63 // Test cases with 1-byte old strings, 1-byte new strings.
64 testCases = append(testCases,
65 testCase{capitalLetters, "brad", "BrAd"},
66 testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
67 testCase{capitalLetters, "", ""},
69 testCase{inc, "brad", "csbe"},
70 testCase{inc, "\x00\xff", "\x01\x00"},
71 testCase{inc, "", ""},
73 testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
76 // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
77 s = nil
78 for i := 0; i < 256; i++ {
79 n := i + 1 - 'a'
80 if n < 1 {
81 n = 1
83 s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
85 repeat := NewReplacer(s...)
87 // Test cases with 1-byte old strings, variable length new strings.
88 testCases = append(testCases,
89 testCase{htmlEscaper, "No changes", "No changes"},
90 testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
91 testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
92 testCase{htmlEscaper, "", ""},
94 testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
95 testCase{repeat, "abba", "abbbba"},
96 testCase{repeat, "", ""},
98 testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
101 // The remaining test cases have variable length old strings.
103 testCases = append(testCases,
104 testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
105 testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
106 testCase{htmlUnescaper, "", ""},
108 testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
110 testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
112 testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
115 // gen1 has multiple old strings of variable length. There is no
116 // overall non-empty common prefix, but some pairwise common prefixes.
117 gen1 := NewReplacer(
118 "aaa", "3[aaa]",
119 "aa", "2[aa]",
120 "a", "1[a]",
121 "i", "i",
122 "longerst", "most long",
123 "longer", "medium",
124 "long", "short",
125 "xx", "xx",
126 "x", "X",
127 "X", "Y",
128 "Y", "Z",
130 testCases = append(testCases,
131 testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
132 testCase{gen1, "long, longerst, longer", "short, most long, medium"},
133 testCase{gen1, "xxxxx", "xxxxX"},
134 testCase{gen1, "XiX", "YiY"},
135 testCase{gen1, "", ""},
138 // gen2 has multiple old strings with no pairwise common prefix.
139 gen2 := NewReplacer(
140 "roses", "red",
141 "violets", "blue",
142 "sugar", "sweet",
144 testCases = append(testCases,
145 testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
146 testCase{gen2, "", ""},
149 // gen3 has multiple old strings with an overall common prefix.
150 gen3 := NewReplacer(
151 "abracadabra", "poof",
152 "abracadabrakazam", "splat",
153 "abraham", "lincoln",
154 "abrasion", "scrape",
155 "abraham", "isaac",
157 testCases = append(testCases,
158 testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
159 testCase{gen3, "abrasion abracad", "scrape abracad"},
160 testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
161 testCase{gen3, "", ""},
164 // foo{1,2,3,4} have multiple old strings with an overall common prefix
165 // and 1- or 2- byte extensions from the common prefix.
166 foo1 := NewReplacer(
167 "foo1", "A",
168 "foo2", "B",
169 "foo3", "C",
171 foo2 := NewReplacer(
172 "foo1", "A",
173 "foo2", "B",
174 "foo31", "C",
175 "foo32", "D",
177 foo3 := NewReplacer(
178 "foo11", "A",
179 "foo12", "B",
180 "foo31", "C",
181 "foo32", "D",
183 foo4 := NewReplacer(
184 "foo12", "B",
185 "foo32", "D",
187 testCases = append(testCases,
188 testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
189 testCase{foo1, "", ""},
191 testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
192 testCase{foo2, "", ""},
194 testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
195 testCase{foo3, "", ""},
197 testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
198 testCase{foo4, "", ""},
201 // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
202 allBytes := make([]byte, 256)
203 for i := range allBytes {
204 allBytes[i] = byte(i)
206 allString := string(allBytes)
207 genAll := NewReplacer(
208 allString, "[all]",
209 "\xff", "[ff]",
210 "\x00", "[00]",
212 testCases = append(testCases,
213 testCase{genAll, allString, "[all]"},
214 testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
215 testCase{genAll, "", ""},
218 // Test cases with empty old strings.
220 blankToX1 := NewReplacer("", "X")
221 blankToX2 := NewReplacer("", "X", "", "")
222 blankHighPriority := NewReplacer("", "X", "o", "O")
223 blankLowPriority := NewReplacer("o", "O", "", "X")
224 blankNoOp1 := NewReplacer("", "")
225 blankNoOp2 := NewReplacer("", "", "", "A")
226 blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
227 testCases = append(testCases,
228 testCase{blankToX1, "foo", "XfXoXoX"},
229 testCase{blankToX1, "", "X"},
231 testCase{blankToX2, "foo", "XfXoXoX"},
232 testCase{blankToX2, "", "X"},
234 testCase{blankHighPriority, "oo", "XOXOX"},
235 testCase{blankHighPriority, "ii", "XiXiX"},
236 testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
237 testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
238 testCase{blankHighPriority, "", "X"},
240 testCase{blankLowPriority, "oo", "OOX"},
241 testCase{blankLowPriority, "ii", "XiXiX"},
242 testCase{blankLowPriority, "oiio", "OXiXiOX"},
243 testCase{blankLowPriority, "iooi", "XiOOXiX"},
244 testCase{blankLowPriority, "", "X"},
246 testCase{blankNoOp1, "foo", "foo"},
247 testCase{blankNoOp1, "", ""},
249 testCase{blankNoOp2, "foo", "foo"},
250 testCase{blankNoOp2, "", ""},
252 testCase{blankFoo, "foobarfoobaz", "XRXZX"},
253 testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
254 testCase{blankFoo, "", "X"},
257 // single string replacer
259 abcMatcher := NewReplacer("abc", "[match]")
261 testCases = append(testCases,
262 testCase{abcMatcher, "", ""},
263 testCase{abcMatcher, "ab", "ab"},
264 testCase{abcMatcher, "abcd", "[match]d"},
265 testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
268 // No-arg test cases.
270 nop := NewReplacer()
271 testCases = append(testCases,
272 testCase{nop, "abc", "abc"},
273 testCase{nop, "", ""},
276 // Run the test cases.
278 for i, tc := range testCases {
279 if s := tc.r.Replace(tc.in); s != tc.out {
280 t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
282 var buf bytes.Buffer
283 n, err := tc.r.WriteString(&buf, tc.in)
284 if err != nil {
285 t.Errorf("%d. WriteString: %v", i, err)
286 continue
288 got := buf.String()
289 if got != tc.out {
290 t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
291 continue
293 if n != len(tc.out) {
294 t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
295 i, tc.in, n, len(tc.out), tc.out)
300 // TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
301 func TestPickAlgorithm(t *testing.T) {
302 testCases := []struct {
303 r *Replacer
304 want string
306 {capitalLetters, "*strings.byteReplacer"},
307 {htmlEscaper, "*strings.byteStringReplacer"},
308 {NewReplacer("12", "123"), "*strings.singleStringReplacer"},
309 {NewReplacer("1", "12"), "*strings.byteStringReplacer"},
310 {NewReplacer("", "X"), "*strings.genericReplacer"},
311 {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
313 for i, tc := range testCases {
314 got := fmt.Sprintf("%T", tc.r.Replacer())
315 if got != tc.want {
316 t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
321 // TestGenericTrieBuilding verifies the structure of the generated trie. There
322 // is one node per line, and the key ending with the current line is in the
323 // trie if it ends with a "+".
324 func TestGenericTrieBuilding(t *testing.T) {
325 testCases := []struct{ in, out string }{
326 {"abc;abdef;abdefgh;xx;xy;z", `-
329 ..c+
330 ..d-
331 ...ef+
332 .....gh+
338 {"abracadabra;abracadabrakazam;abraham;abrasion", `-
340 .bra-
341 ....c-
342 .....adabra+
343 ...........kazam+
344 ....h-
345 .....am+
346 ....s-
347 .....ion+
349 {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
354 ..a+
357 .ong+
358 ....er+
359 ......st+
363 {"foo;;foo;foo1", `+
365 .oo+
366 ...1+
370 for _, tc := range testCases {
371 keys := Split(tc.in, ";")
372 args := make([]string, len(keys)*2)
373 for i, key := range keys {
374 args[i*2] = key
377 got := NewReplacer(args...).PrintTrie()
378 // Remove tabs from tc.out
379 wantbuf := make([]byte, 0, len(tc.out))
380 for i := 0; i < len(tc.out); i++ {
381 if tc.out[i] != '\t' {
382 wantbuf = append(wantbuf, tc.out[i])
385 want := string(wantbuf)
387 if got != want {
388 t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
393 func BenchmarkGenericNoMatch(b *testing.B) {
394 str := Repeat("A", 100) + Repeat("B", 100)
395 generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
396 for i := 0; i < b.N; i++ {
397 generic.Replace(str)
401 func BenchmarkGenericMatch1(b *testing.B) {
402 str := Repeat("a", 100) + Repeat("b", 100)
403 generic := NewReplacer("a", "A", "b", "B", "12", "123")
404 for i := 0; i < b.N; i++ {
405 generic.Replace(str)
409 func BenchmarkGenericMatch2(b *testing.B) {
410 str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
411 for i := 0; i < b.N; i++ {
412 htmlUnescaper.Replace(str)
416 func benchmarkSingleString(b *testing.B, pattern, text string) {
417 r := NewReplacer(pattern, "[match]")
418 b.SetBytes(int64(len(text)))
419 b.ResetTimer()
420 for i := 0; i < b.N; i++ {
421 r.Replace(text)
425 func BenchmarkSingleMaxSkipping(b *testing.B) {
426 benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
429 func BenchmarkSingleLongSuffixFail(b *testing.B) {
430 benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
433 func BenchmarkSingleMatch(b *testing.B) {
434 benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
437 func BenchmarkByteByteNoMatch(b *testing.B) {
438 str := Repeat("A", 100) + Repeat("B", 100)
439 for i := 0; i < b.N; i++ {
440 capitalLetters.Replace(str)
444 func BenchmarkByteByteMatch(b *testing.B) {
445 str := Repeat("a", 100) + Repeat("b", 100)
446 for i := 0; i < b.N; i++ {
447 capitalLetters.Replace(str)
451 func BenchmarkByteStringMatch(b *testing.B) {
452 str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
453 for i := 0; i < b.N; i++ {
454 htmlEscaper.Replace(str)
458 func BenchmarkHTMLEscapeNew(b *testing.B) {
459 str := "I <3 to escape HTML & other text too."
460 for i := 0; i < b.N; i++ {
461 htmlEscaper.Replace(str)
465 func BenchmarkHTMLEscapeOld(b *testing.B) {
466 str := "I <3 to escape HTML & other text too."
467 for i := 0; i < b.N; i++ {
468 oldHTMLEscape(str)
472 // BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
473 func BenchmarkByteByteReplaces(b *testing.B) {
474 str := Repeat("a", 100) + Repeat("b", 100)
475 for i := 0; i < b.N; i++ {
476 Replace(Replace(str, "a", "A", -1), "b", "B", -1)
480 // BenchmarkByteByteMap compares byteByteImpl against Map.
481 func BenchmarkByteByteMap(b *testing.B) {
482 str := Repeat("a", 100) + Repeat("b", 100)
483 fn := func(r rune) rune {
484 switch r {
485 case 'a':
486 return 'A'
487 case 'b':
488 return 'B'
490 return r
492 for i := 0; i < b.N; i++ {
493 Map(fn, str)