1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
14 var htmlEscaper
= NewReplacer(
22 var htmlUnescaper
= NewReplacer(
30 // The http package's old HTML escaping function.
31 func oldHTMLEscape(s
string) string {
32 s
= Replace(s
, "&", "&", -1)
33 s
= Replace(s
, "<", "<", -1)
34 s
= Replace(s
, ">", ">", -1)
35 s
= Replace(s
, `"`, """, -1)
36 s
= Replace(s
, "'", "'", -1)
40 var capitalLetters
= NewReplacer("a", "A", "b", "B")
42 // TestReplacer tests the replacer implementations.
43 func TestReplacer(t
*testing
.T
) {
44 type testCase
struct {
48 var testCases
[]testCase
50 // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
51 str
:= func(b
byte) string {
52 return string([]byte{b
})
56 // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
58 for i
:= 0; i
< 256; i
++ {
59 s
= append(s
, str(byte(i
)), str(byte(i
+1)))
61 inc
:= NewReplacer(s
...)
63 // Test cases with 1-byte old strings, 1-byte new strings.
64 testCases
= append(testCases
,
65 testCase
{capitalLetters
, "brad", "BrAd"},
66 testCase
{capitalLetters
, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
67 testCase
{capitalLetters
, "", ""},
69 testCase
{inc
, "brad", "csbe"},
70 testCase
{inc
, "\x00\xff", "\x01\x00"},
71 testCase
{inc
, "", ""},
73 testCase
{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
76 // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
78 for i
:= 0; i
< 256; i
++ {
83 s
= append(s
, str(byte(i
)), Repeat(str(byte(i
)), n
))
85 repeat
:= NewReplacer(s
...)
87 // Test cases with 1-byte old strings, variable length new strings.
88 testCases
= append(testCases
,
89 testCase
{htmlEscaper
, "No changes", "No changes"},
90 testCase
{htmlEscaper
, "I <3 escaping & stuff", "I <3 escaping & stuff"},
91 testCase
{htmlEscaper
, "&&&", "&&&"},
92 testCase
{htmlEscaper
, "", ""},
94 testCase
{repeat
, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
95 testCase
{repeat
, "abba", "abbbba"},
96 testCase
{repeat
, "", ""},
98 testCase
{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
101 // The remaining test cases have variable length old strings.
103 testCases
= append(testCases
,
104 testCase
{htmlUnescaper
, "&amp;", "&"},
105 testCase
{htmlUnescaper
, "<b>HTML's neat</b>", "<b>HTML's neat</b>"},
106 testCase
{htmlUnescaper
, "", ""},
108 testCase
{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
110 testCase
{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
112 testCase
{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
115 // gen1 has multiple old strings of variable length. There is no
116 // overall non-empty common prefix, but some pairwise common prefixes.
122 "longerst", "most long",
130 testCases
= append(testCases
,
131 testCase
{gen1
, "fooaaabar", "foo3[aaa]b1[a]r"},
132 testCase
{gen1
, "long, longerst, longer", "short, most long, medium"},
133 testCase
{gen1
, "xxxxx", "xxxxX"},
134 testCase
{gen1
, "XiX", "YiY"},
135 testCase
{gen1
, "", ""},
138 // gen2 has multiple old strings with no pairwise common prefix.
144 testCases
= append(testCases
,
145 testCase
{gen2
, "roses are red, violets are blue...", "red are red, blue are blue..."},
146 testCase
{gen2
, "", ""},
149 // gen3 has multiple old strings with an overall common prefix.
151 "abracadabra", "poof",
152 "abracadabrakazam", "splat",
153 "abraham", "lincoln",
154 "abrasion", "scrape",
157 testCases
= append(testCases
,
158 testCase
{gen3
, "abracadabrakazam abraham", "poofkazam lincoln"},
159 testCase
{gen3
, "abrasion abracad", "scrape abracad"},
160 testCase
{gen3
, "abba abram abrasive", "abba abram abrasive"},
161 testCase
{gen3
, "", ""},
164 // foo{1,2,3,4} have multiple old strings with an overall common prefix
165 // and 1- or 2- byte extensions from the common prefix.
187 testCases
= append(testCases
,
188 testCase
{foo1
, "fofoofoo12foo32oo", "fofooA2C2oo"},
189 testCase
{foo1
, "", ""},
191 testCase
{foo2
, "fofoofoo12foo32oo", "fofooA2Doo"},
192 testCase
{foo2
, "", ""},
194 testCase
{foo3
, "fofoofoo12foo32oo", "fofooBDoo"},
195 testCase
{foo3
, "", ""},
197 testCase
{foo4
, "fofoofoo12foo32oo", "fofooBDoo"},
198 testCase
{foo4
, "", ""},
201 // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
202 allBytes
:= make([]byte, 256)
203 for i
:= range allBytes
{
204 allBytes
[i
] = byte(i
)
206 allString
:= string(allBytes
)
207 genAll
:= NewReplacer(
212 testCases
= append(testCases
,
213 testCase
{genAll
, allString
, "[all]"},
214 testCase
{genAll
, "a\xff" + allString
+ "\x00", "a[ff][all][00]"},
215 testCase
{genAll
, "", ""},
218 // Test cases with empty old strings.
220 blankToX1
:= NewReplacer("", "X")
221 blankToX2
:= NewReplacer("", "X", "", "")
222 blankHighPriority
:= NewReplacer("", "X", "o", "O")
223 blankLowPriority
:= NewReplacer("o", "O", "", "X")
224 blankNoOp1
:= NewReplacer("", "")
225 blankNoOp2
:= NewReplacer("", "", "", "A")
226 blankFoo
:= NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
227 testCases
= append(testCases
,
228 testCase
{blankToX1
, "foo", "XfXoXoX"},
229 testCase
{blankToX1
, "", "X"},
231 testCase
{blankToX2
, "foo", "XfXoXoX"},
232 testCase
{blankToX2
, "", "X"},
234 testCase
{blankHighPriority
, "oo", "XOXOX"},
235 testCase
{blankHighPriority
, "ii", "XiXiX"},
236 testCase
{blankHighPriority
, "oiio", "XOXiXiXOX"},
237 testCase
{blankHighPriority
, "iooi", "XiXOXOXiX"},
238 testCase
{blankHighPriority
, "", "X"},
240 testCase
{blankLowPriority
, "oo", "OOX"},
241 testCase
{blankLowPriority
, "ii", "XiXiX"},
242 testCase
{blankLowPriority
, "oiio", "OXiXiOX"},
243 testCase
{blankLowPriority
, "iooi", "XiOOXiX"},
244 testCase
{blankLowPriority
, "", "X"},
246 testCase
{blankNoOp1
, "foo", "foo"},
247 testCase
{blankNoOp1
, "", ""},
249 testCase
{blankNoOp2
, "foo", "foo"},
250 testCase
{blankNoOp2
, "", ""},
252 testCase
{blankFoo
, "foobarfoobaz", "XRXZX"},
253 testCase
{blankFoo
, "foobar-foobaz", "XRX-XZX"},
254 testCase
{blankFoo
, "", "X"},
257 // single string replacer
259 abcMatcher
:= NewReplacer("abc", "[match]")
261 testCases
= append(testCases
,
262 testCase
{abcMatcher
, "", ""},
263 testCase
{abcMatcher
, "ab", "ab"},
264 testCase
{abcMatcher
, "abc", "[match]"},
265 testCase
{abcMatcher
, "abcd", "[match]d"},
266 testCase
{abcMatcher
, "cabcabcdabca", "c[match][match]d[match]a"},
269 // Issue 6659 cases (more single string replacer)
271 noHello
:= NewReplacer("Hello", "")
272 testCases
= append(testCases
,
273 testCase
{noHello
, "Hello", ""},
274 testCase
{noHello
, "Hellox", "x"},
275 testCase
{noHello
, "xHello", "x"},
276 testCase
{noHello
, "xHellox", "xx"},
279 // No-arg test cases.
282 testCases
= append(testCases
,
283 testCase
{nop
, "abc", "abc"},
284 testCase
{nop
, "", ""},
287 // Run the test cases.
289 for i
, tc
:= range testCases
{
290 if s
:= tc
.r
.Replace(tc
.in
); s
!= tc
.out
{
291 t
.Errorf("%d. Replace(%q) = %q, want %q", i
, tc
.in
, s
, tc
.out
)
294 n
, err
:= tc
.r
.WriteString(&buf
, tc
.in
)
296 t
.Errorf("%d. WriteString: %v", i
, err
)
301 t
.Errorf("%d. WriteString(%q) wrote %q, want %q", i
, tc
.in
, got
, tc
.out
)
304 if n
!= len(tc
.out
) {
305 t
.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
306 i
, tc
.in
, n
, len(tc
.out
), tc
.out
)
311 var algorithmTestCases
= []struct {
315 {capitalLetters
, "*strings.byteReplacer"},
316 {htmlEscaper
, "*strings.byteStringReplacer"},
317 {NewReplacer("12", "123"), "*strings.singleStringReplacer"},
318 {NewReplacer("1", "12"), "*strings.byteStringReplacer"},
319 {NewReplacer("", "X"), "*strings.genericReplacer"},
320 {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
323 // TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
324 func TestPickAlgorithm(t
*testing
.T
) {
325 for i
, tc
:= range algorithmTestCases
{
326 got
:= fmt
.Sprintf("%T", tc
.r
.Replacer())
328 t
.Errorf("%d. algorithm = %s, want %s", i
, got
, tc
.want
)
333 type errWriter
struct{}
335 func (errWriter
) Write(p
[]byte) (n
int, err error
) {
336 return 0, fmt
.Errorf("unwritable")
339 // TestWriteStringError tests that WriteString returns an error
340 // received from the underlying io.Writer.
341 func TestWriteStringError(t
*testing
.T
) {
342 for i
, tc
:= range algorithmTestCases
{
343 n
, err
:= tc
.r
.WriteString(errWriter
{}, "abc")
344 if n
!= 0 || err
== nil || err
.Error() != "unwritable" {
345 t
.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i
, n
, err
)
350 // TestGenericTrieBuilding verifies the structure of the generated trie. There
351 // is one node per line, and the key ending with the current line is in the
352 // trie if it ends with a "+".
353 func TestGenericTrieBuilding(t
*testing
.T
) {
354 testCases
:= []struct{ in
, out
string }{
355 {"abc;abdef;abdefgh;xx;xy;z", `-
367 {"abracadabra;abracadabrakazam;abraham;abrasion", `-
378 {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
399 for _
, tc
:= range testCases
{
400 keys
:= Split(tc
.in
, ";")
401 args
:= make([]string, len(keys
)*2)
402 for i
, key
:= range keys
{
406 got
:= NewReplacer(args
...).PrintTrie()
407 // Remove tabs from tc.out
408 wantbuf
:= make([]byte, 0, len(tc
.out
))
409 for i
:= 0; i
< len(tc
.out
); i
++ {
410 if tc
.out
[i
] != '\t' {
411 wantbuf
= append(wantbuf
, tc
.out
[i
])
414 want
:= string(wantbuf
)
417 t
.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc
.in
, got
, want
)
422 func BenchmarkGenericNoMatch(b
*testing
.B
) {
423 str
:= Repeat("A", 100) + Repeat("B", 100)
424 generic
:= NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
425 for i
:= 0; i
< b
.N
; i
++ {
430 func BenchmarkGenericMatch1(b
*testing
.B
) {
431 str
:= Repeat("a", 100) + Repeat("b", 100)
432 generic
:= NewReplacer("a", "A", "b", "B", "12", "123")
433 for i
:= 0; i
< b
.N
; i
++ {
438 func BenchmarkGenericMatch2(b
*testing
.B
) {
439 str
:= Repeat("It's <b>HTML</b>!", 100)
440 for i
:= 0; i
< b
.N
; i
++ {
441 htmlUnescaper
.Replace(str
)
445 func benchmarkSingleString(b
*testing
.B
, pattern
, text
string) {
446 r
:= NewReplacer(pattern
, "[match]")
447 b
.SetBytes(int64(len(text
)))
449 for i
:= 0; i
< b
.N
; i
++ {
454 func BenchmarkSingleMaxSkipping(b
*testing
.B
) {
455 benchmarkSingleString(b
, Repeat("b", 25), Repeat("a", 10000))
458 func BenchmarkSingleLongSuffixFail(b
*testing
.B
) {
459 benchmarkSingleString(b
, "b"+Repeat("a", 500), Repeat("a", 1002))
462 func BenchmarkSingleMatch(b
*testing
.B
) {
463 benchmarkSingleString(b
, "abcdef", Repeat("abcdefghijklmno", 1000))
466 func BenchmarkByteByteNoMatch(b
*testing
.B
) {
467 str
:= Repeat("A", 100) + Repeat("B", 100)
468 for i
:= 0; i
< b
.N
; i
++ {
469 capitalLetters
.Replace(str
)
473 func BenchmarkByteByteMatch(b
*testing
.B
) {
474 str
:= Repeat("a", 100) + Repeat("b", 100)
475 for i
:= 0; i
< b
.N
; i
++ {
476 capitalLetters
.Replace(str
)
480 func BenchmarkByteStringMatch(b
*testing
.B
) {
481 str
:= "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
482 for i
:= 0; i
< b
.N
; i
++ {
483 htmlEscaper
.Replace(str
)
487 func BenchmarkHTMLEscapeNew(b
*testing
.B
) {
488 str
:= "I <3 to escape HTML & other text too."
489 for i
:= 0; i
< b
.N
; i
++ {
490 htmlEscaper
.Replace(str
)
494 func BenchmarkHTMLEscapeOld(b
*testing
.B
) {
495 str
:= "I <3 to escape HTML & other text too."
496 for i
:= 0; i
< b
.N
; i
++ {
501 func BenchmarkByteStringReplacerWriteString(b
*testing
.B
) {
502 str
:= Repeat("I <3 to escape HTML & other text too.", 100)
503 buf
:= new(bytes
.Buffer
)
504 for i
:= 0; i
< b
.N
; i
++ {
505 htmlEscaper
.WriteString(buf
, str
)
510 func BenchmarkByteReplacerWriteString(b
*testing
.B
) {
511 str
:= Repeat("abcdefghijklmnopqrstuvwxyz", 100)
512 buf
:= new(bytes
.Buffer
)
513 for i
:= 0; i
< b
.N
; i
++ {
514 capitalLetters
.WriteString(buf
, str
)
519 // BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
520 func BenchmarkByteByteReplaces(b
*testing
.B
) {
521 str
:= Repeat("a", 100) + Repeat("b", 100)
522 for i
:= 0; i
< b
.N
; i
++ {
523 Replace(Replace(str
, "a", "A", -1), "b", "B", -1)
527 // BenchmarkByteByteMap compares byteByteImpl against Map.
528 func BenchmarkByteByteMap(b
*testing
.B
) {
529 str
:= Repeat("a", 100) + Repeat("b", 100)
530 fn
:= func(r rune
) rune
{
539 for i
:= 0; i
< b
.N
; i
++ {
544 var mapdata
= []struct{ name
, data
string }{
545 {"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"},
546 {"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"},
549 func BenchmarkMap(b
*testing
.B
) {
550 mapidentity
:= func(r rune
) rune
{
554 b
.Run("identity", func(b
*testing
.B
) {
555 for _
, md
:= range mapdata
{
556 b
.Run(md
.name
, func(b
*testing
.B
) {
557 for i
:= 0; i
< b
.N
; i
++ {
558 Map(mapidentity
, md
.data
)
564 mapchange
:= func(r rune
) rune
{
565 if 'a' <= r
&& r
<= 'z' {
568 if 'α' <= r
&& r
<= 'ω' {
574 b
.Run("change", func(b
*testing
.B
) {
575 for _
, md
:= range mapdata
{
576 b
.Run(md
.name
, func(b
*testing
.B
) {
577 for i
:= 0; i
< b
.N
; i
++ {
578 Map(mapchange
, md
.data
)