1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
9 // Replacer replaces a list of strings with replacements.
10 // It is safe for concurrent use by multiple goroutines.
11 type Replacer
struct {
15 // replacer is the interface that a replacement algorithm needs to implement.
16 type replacer
interface {
17 Replace(s
string) string
18 WriteString(w io
.Writer
, s
string) (n
int, err error
)
21 // NewReplacer returns a new Replacer from a list of old, new string pairs.
22 // Replacements are performed in order, without overlapping matches.
23 func NewReplacer(oldnew
...string) *Replacer
{
24 if len(oldnew
)%2
== 1 {
25 panic("strings.NewReplacer: odd argument count")
28 if len(oldnew
) == 2 && len(oldnew
[0]) > 1 {
29 return &Replacer
{r
: makeSingleStringReplacer(oldnew
[0], oldnew
[1])}
33 for i
:= 0; i
< len(oldnew
); i
+= 2 {
34 if len(oldnew
[i
]) != 1 {
35 return &Replacer
{r
: makeGenericReplacer(oldnew
)}
37 if len(oldnew
[i
+1]) != 1 {
47 // The first occurrence of old->new map takes precedence
48 // over the others with the same old string.
49 for i
:= len(oldnew
) - 2; i
>= 0; i
-= 2 {
54 return &Replacer
{r
: &r
}
57 r
:= byteStringReplacer
{}
58 // The first occurrence of old->new map takes precedence
59 // over the others with the same old string.
60 for i
:= len(oldnew
) - 2; i
>= 0; i
-= 2 {
65 return &Replacer
{r
: &r
}
68 // Replace returns a copy of s with all replacements performed.
69 func (r
*Replacer
) Replace(s
string) string {
73 // WriteString writes s to w with all replacements performed.
74 func (r
*Replacer
) WriteString(w io
.Writer
, s
string) (n
int, err error
) {
75 return r
.r
.WriteString(w
, s
)
78 // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
79 // and values may be empty. For example, the trie containing keys "ax", "ay",
80 // "bcbc", "x" and "xy" could have eight nodes:
91 // n0 is the root node, and its children are n1, n4 and n6; n1's children are
92 // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
93 // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
94 // (marked with a trailing "+") are complete keys.
95 type trieNode
struct {
96 // value is the value of the trie node's key/value pair. It is empty if
97 // this node is not a complete key.
99 // priority is the priority (higher is more important) of the trie node's
100 // key/value pair; keys are not necessarily matched shortest- or longest-
101 // first. Priority is positive if this node is a complete key, and zero
102 // otherwise. In the example above, positive/zero priorities are marked
103 // with a trailing "+" or "-".
106 // A trie node may have zero, one or more child nodes:
107 // * if the remaining fields are zero, there are no children.
108 // * if prefix and next are non-zero, there is one child in next.
109 // * if table is non-zero, it defines all the children.
111 // Prefixes are preferred over tables when there is one child, but the
112 // root node always uses a table for lookup efficiency.
114 // prefix is the difference in keys between this trie node and the next.
115 // In the example above, node n4 has prefix "cbc" and n4's next node is n5.
116 // Node n5 has no children and so has zero prefix, next and table fields.
120 // table is a lookup table indexed by the next byte in the key, after
121 // remapping that byte through genericReplacer.mapping to create a dense
122 // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
123 // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
124 // genericReplacer.tableSize will be 5. Node n0's table will be
125 // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
130 func (t
*trieNode
) add(key
, val
string, priority
int, r
*genericReplacer
) {
134 t
.priority
= priority
140 // Need to split the prefix among multiple nodes.
141 var n
int // length of the longest common prefix
142 for ; n
< len(t
.prefix
) && n
< len(key
); n
++ {
143 if t
.prefix
[n
] != key
[n
] {
147 if n
== len(t
.prefix
) {
148 t
.next
.add(key
[n
:], val
, priority
, r
)
150 // First byte differs, start a new lookup table here. Looking up
151 // what is currently t.prefix[0] will lead to prefixNode, and
152 // looking up key[0] will lead to keyNode.
153 var prefixNode
*trieNode
154 if len(t
.prefix
) == 1 {
157 prefixNode
= &trieNode
{
158 prefix
: t
.prefix
[1:],
162 keyNode
:= new(trieNode
)
163 t
.table
= make([]*trieNode
, r
.tableSize
)
164 t
.table
[r
.mapping
[t
.prefix
[0]]] = prefixNode
165 t
.table
[r
.mapping
[key
[0]]] = keyNode
168 keyNode
.add(key
[1:], val
, priority
, r
)
170 // Insert new node after the common section of the prefix.
172 prefix
: t
.prefix
[n
:],
175 t
.prefix
= t
.prefix
[:n
]
177 next
.add(key
[n
:], val
, priority
, r
)
179 } else if t
.table
!= nil {
180 // Insert into existing table.
181 m
:= r
.mapping
[key
[0]]
182 if t
.table
[m
] == nil {
183 t
.table
[m
] = new(trieNode
)
185 t
.table
[m
].add(key
[1:], val
, priority
, r
)
188 t
.next
= new(trieNode
)
189 t
.next
.add("", val
, priority
, r
)
193 func (r
*genericReplacer
) lookup(s
string, ignoreRoot
bool) (val
string, keylen
int, found
bool) {
194 // Iterate down the trie to the end, and grab the value and keylen with
195 // the highest priority.
200 if node
.priority
> bestPriority
&& !(ignoreRoot
&& node
== &r
.root
) {
201 bestPriority
= node
.priority
210 if node
.table
!= nil {
211 index
:= r
.mapping
[s
[0]]
212 if int(index
) == r
.tableSize
{
215 node
= node
.table
[index
]
218 } else if node
.prefix
!= "" && HasPrefix(s
, node
.prefix
) {
219 n
+= len(node
.prefix
)
220 s
= s
[len(node
.prefix
):]
229 // genericReplacer is the fully generic algorithm.
230 // It's used as a fallback when nothing faster can be used.
231 type genericReplacer
struct {
233 // tableSize is the size of a trie node's lookup table. It is the number
234 // of unique key bytes.
236 // mapping maps from key bytes to a dense index for trieNode.table.
240 func makeGenericReplacer(oldnew
[]string) *genericReplacer
{
241 r
:= new(genericReplacer
)
242 // Find each byte used, then assign them each an index.
243 for i
:= 0; i
< len(oldnew
); i
+= 2 {
245 for j
:= 0; j
< len(key
); j
++ {
246 r
.mapping
[key
[j
]] = 1
250 for _
, b
:= range r
.mapping
{
251 r
.tableSize
+= int(b
)
255 for i
, b
:= range r
.mapping
{
257 r
.mapping
[i
] = byte(r
.tableSize
)
263 // Ensure root node uses a lookup table (for performance).
264 r
.root
.table
= make([]*trieNode
, r
.tableSize
)
266 for i
:= 0; i
< len(oldnew
); i
+= 2 {
267 r
.root
.add(oldnew
[i
], oldnew
[i
+1], len(oldnew
)-i
, r
)
272 type appendSliceWriter
[]byte
274 // Write writes to the buffer to satisfy io.Writer.
275 func (w
*appendSliceWriter
) Write(p
[]byte) (int, error
) {
276 *w
= append(*w
, p
...)
280 // WriteString writes to the buffer without string->[]byte->string allocations.
281 func (w
*appendSliceWriter
) WriteString(s
string) (int, error
) {
282 *w
= append(*w
, s
...)
286 type stringWriterIface
interface {
287 WriteString(string) (int, error
)
290 type stringWriter
struct {
294 func (w stringWriter
) WriteString(s
string) (int, error
) {
295 return w
.w
.Write([]byte(s
))
298 func getStringWriter(w io
.Writer
) stringWriterIface
{
299 sw
, ok
:= w
.(stringWriterIface
)
306 func (r
*genericReplacer
) Replace(s
string) string {
307 buf
:= make(appendSliceWriter
, 0, len(s
))
308 r
.WriteString(&buf
, s
)
312 func (r
*genericReplacer
) WriteString(w io
.Writer
, s
string) (n
int, err error
) {
313 sw
:= getStringWriter(w
)
315 var prevMatchEmpty
bool
316 for i
:= 0; i
<= len(s
); {
317 // Fast path: s[i] is not a prefix of any pattern.
318 if i
!= len(s
) && r
.root
.priority
== 0 {
319 index
:= int(r
.mapping
[s
[i
]])
320 if index
== r
.tableSize || r
.root
.table
[index
] == nil {
326 // Ignore the empty match iff the previous loop found the empty match.
327 val
, keylen
, match
:= r
.lookup(s
[i
:], prevMatchEmpty
)
328 prevMatchEmpty
= match
&& keylen
== 0
330 wn
, err
= sw
.WriteString(s
[last
:i
])
335 wn
, err
= sw
.WriteString(val
)
347 wn
, err
= sw
.WriteString(s
[last
:])
353 // singleStringReplacer is the implementation that's used when there is only
354 // one string to replace (and that string has more than one byte).
355 type singleStringReplacer
struct {
357 // value is the new string that replaces that pattern when it's found.
361 func makeSingleStringReplacer(pattern
string, value
string) *singleStringReplacer
{
362 return &singleStringReplacer
{finder
: makeStringFinder(pattern
), value
: value
}
365 func (r
*singleStringReplacer
) Replace(s
string) string {
367 i
, matched
:= 0, false
369 match
:= r
.finder
.next(s
[i
:])
374 buf
= append(buf
, s
[i
:i
+match
]...)
375 buf
= append(buf
, r
.value
...)
376 i
+= match
+ len(r
.finder
.pattern
)
381 buf
= append(buf
, s
[i
:]...)
385 func (r
*singleStringReplacer
) WriteString(w io
.Writer
, s
string) (n
int, err error
) {
386 sw
:= getStringWriter(w
)
389 match
:= r
.finder
.next(s
[i
:])
393 wn
, err
= sw
.WriteString(s
[i
: i
+match
])
398 wn
, err
= sw
.WriteString(r
.value
)
403 i
+= match
+ len(r
.finder
.pattern
)
405 wn
, err
= sw
.WriteString(s
[i
:])
410 // byteReplacer is the implementation that's used when all the "old"
411 // and "new" values are single ASCII bytes.
412 // The array contains replacement bytes indexed by old byte.
413 type byteReplacer
[256]byte
415 func (r
*byteReplacer
) Replace(s
string) string {
416 var buf
[]byte // lazily allocated
417 for i
:= 0; i
< len(s
); i
++ {
432 func (r
*byteReplacer
) WriteString(w io
.Writer
, s
string) (n
int, err error
) {
433 // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
435 if len(s
) < bufsize
{
438 buf
:= make([]byte, bufsize
)
441 ncopy
:= copy(buf
, s
[:])
443 for i
, b
:= range buf
[:ncopy
] {
446 wn
, err
:= w
.Write(buf
[:ncopy
])
455 // byteStringReplacer is the implementation that's used when all the
456 // "old" values are single ASCII bytes but the "new" values vary in size.
457 // The array contains replacement byte slices indexed by old byte.
458 // A nil []byte means that the old byte should not be replaced.
459 type byteStringReplacer
[256][]byte
461 func (r
*byteStringReplacer
) Replace(s
string) string {
464 for i
:= 0; i
< len(s
); i
++ {
468 // The -1 is because we are replacing 1 byte with len(r[b]) bytes.
469 newSize
+= len(r
[b
]) - 1
475 buf
:= make([]byte, newSize
)
477 for i
:= 0; i
< len(s
); i
++ {
490 func (r
*byteStringReplacer
) WriteString(w io
.Writer
, s
string) (n
int, err error
) {
491 sw
:= getStringWriter(w
)
493 for i
:= 0; i
< len(s
); i
++ {
499 nw
, err
:= sw
.WriteString(s
[last
:i
])
506 nw
, err
:= w
.Write(r
[b
])
514 nw
, err
= sw
.WriteString(s
[last
:])