1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
9 // A Replacer replaces a list of strings with replacements.
10 type Replacer
struct {
14 // replacer is the interface that a replacement algorithm needs to implement.
15 type replacer
interface {
16 Replace(s
string) string
17 WriteString(w io
.Writer
, s
string) (n
int, err error
)
20 // byteBitmap represents bytes which are sought for replacement.
21 // byteBitmap is 256 bits wide, with a bit set for each old byte to be
23 type byteBitmap
[256 / 32]uint32
25 func (m
*byteBitmap
) set(b
byte) {
26 m
[b
>>5] |
= uint32(1 << (b
& 31))
29 // NewReplacer returns a new Replacer from a list of old, new string pairs.
30 // Replacements are performed in order, without overlapping matches.
31 func NewReplacer(oldnew
...string) *Replacer
{
32 if len(oldnew
)%2
== 1 {
33 panic("strings.NewReplacer: odd argument count")
36 if len(oldnew
) == 2 && len(oldnew
[0]) > 1 {
37 return &Replacer
{r
: makeSingleStringReplacer(oldnew
[0], oldnew
[1])}
41 for i
:= 0; i
< len(oldnew
); i
+= 2 {
42 if len(oldnew
[i
]) != 1 {
43 return &Replacer
{r
: makeGenericReplacer(oldnew
)}
45 if len(oldnew
[i
+1]) != 1 {
52 for i
:= 0; i
< len(oldnew
); i
+= 2 {
53 o
, n
:= oldnew
[i
][0], oldnew
[i
+1][0]
54 if bb
.old
[o
>>5]&uint32(1<<(o
&31)) != 0 {
55 // Later old->new maps do not override previous ones with the same old string.
61 return &Replacer
{r
: bb
}
64 bs
:= &byteStringReplacer
{}
65 for i
:= 0; i
< len(oldnew
); i
+= 2 {
66 o
, new := oldnew
[i
][0], oldnew
[i
+1]
67 if bs
.old
[o
>>5]&uint32(1<<(o
&31)) != 0 {
68 // Later old->new maps do not override previous ones with the same old string.
72 bs
.new[o
] = []byte(new)
74 return &Replacer
{r
: bs
}
77 // Replace returns a copy of s with all replacements performed.
78 func (r
*Replacer
) Replace(s
string) string {
82 // WriteString writes s to w with all replacements performed.
83 func (r
*Replacer
) WriteString(w io
.Writer
, s
string) (n
int, err error
) {
84 return r
.r
.WriteString(w
, s
)
87 // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
88 // and values may be empty. For example, the trie containing keys "ax", "ay",
89 // "bcbc", "x" and "xy" could have eight nodes:
100 // n0 is the root node, and its children are n1, n4 and n6; n1's children are
101 // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
102 // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
103 // (marked with a trailing "+") are complete keys.
104 type trieNode
struct {
105 // value is the value of the trie node's key/value pair. It is empty if
106 // this node is not a complete key.
108 // priority is the priority (higher is more important) of the trie node's
109 // key/value pair; keys are not necessarily matched shortest- or longest-
110 // first. Priority is positive if this node is a complete key, and zero
111 // otherwise. In the example above, positive/zero priorities are marked
112 // with a trailing "+" or "-".
115 // A trie node may have zero, one or more child nodes:
116 // * if the remaining fields are zero, there are no children.
117 // * if prefix and next are non-zero, there is one child in next.
118 // * if table is non-zero, it defines all the children.
120 // Prefixes are preferred over tables when there is one child, but the
121 // root node always uses a table for lookup efficiency.
123 // prefix is the difference in keys between this trie node and the next.
124 // In the example above, node n4 has prefix "cbc" and n4's next node is n5.
125 // Node n5 has no children and so has zero prefix, next and table fields.
129 // table is a lookup table indexed by the next byte in the key, after
130 // remapping that byte through genericReplacer.mapping to create a dense
131 // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
132 // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
133 // genericReplacer.tableSize will be 5. Node n0's table will be
134 // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
139 func (t
*trieNode
) add(key
, val
string, priority
int, r
*genericReplacer
) {
143 t
.priority
= priority
149 // Need to split the prefix among multiple nodes.
150 var n
int // length of the longest common prefix
151 for ; n
< len(t
.prefix
) && n
< len(key
); n
++ {
152 if t
.prefix
[n
] != key
[n
] {
156 if n
== len(t
.prefix
) {
157 t
.next
.add(key
[n
:], val
, priority
, r
)
159 // First byte differs, start a new lookup table here. Looking up
160 // what is currently t.prefix[0] will lead to prefixNode, and
161 // looking up key[0] will lead to keyNode.
162 var prefixNode
*trieNode
163 if len(t
.prefix
) == 1 {
166 prefixNode
= &trieNode
{
167 prefix
: t
.prefix
[1:],
171 keyNode
:= new(trieNode
)
172 t
.table
= make([]*trieNode
, r
.tableSize
)
173 t
.table
[r
.mapping
[t
.prefix
[0]]] = prefixNode
174 t
.table
[r
.mapping
[key
[0]]] = keyNode
177 keyNode
.add(key
[1:], val
, priority
, r
)
179 // Insert new node after the common section of the prefix.
181 prefix
: t
.prefix
[n
:],
184 t
.prefix
= t
.prefix
[:n
]
186 next
.add(key
[n
:], val
, priority
, r
)
188 } else if t
.table
!= nil {
189 // Insert into existing table.
190 m
:= r
.mapping
[key
[0]]
191 if t
.table
[m
] == nil {
192 t
.table
[m
] = new(trieNode
)
194 t
.table
[m
].add(key
[1:], val
, priority
, r
)
197 t
.next
= new(trieNode
)
198 t
.next
.add("", val
, priority
, r
)
202 func (r
*genericReplacer
) lookup(s
string, ignoreRoot
bool) (val
string, keylen
int, found
bool) {
203 // Iterate down the trie to the end, and grab the value and keylen with
204 // the highest priority.
209 if node
.priority
> bestPriority
&& !(ignoreRoot
&& node
== &r
.root
) {
210 bestPriority
= node
.priority
219 if node
.table
!= nil {
220 index
:= r
.mapping
[s
[0]]
221 if int(index
) == r
.tableSize
{
224 node
= node
.table
[index
]
227 } else if node
.prefix
!= "" && HasPrefix(s
, node
.prefix
) {
228 n
+= len(node
.prefix
)
229 s
= s
[len(node
.prefix
):]
238 // genericReplacer is the fully generic algorithm.
239 // It's used as a fallback when nothing faster can be used.
240 type genericReplacer
struct {
242 // tableSize is the size of a trie node's lookup table. It is the number
243 // of unique key bytes.
245 // mapping maps from key bytes to a dense index for trieNode.table.
249 func makeGenericReplacer(oldnew
[]string) *genericReplacer
{
250 r
:= new(genericReplacer
)
251 // Find each byte used, then assign them each an index.
252 for i
:= 0; i
< len(oldnew
); i
+= 2 {
254 for j
:= 0; j
< len(key
); j
++ {
255 r
.mapping
[key
[j
]] = 1
259 for _
, b
:= range r
.mapping
{
260 r
.tableSize
+= int(b
)
264 for i
, b
:= range r
.mapping
{
266 r
.mapping
[i
] = byte(r
.tableSize
)
272 // Ensure root node uses a lookup table (for performance).
273 r
.root
.table
= make([]*trieNode
, r
.tableSize
)
275 for i
:= 0; i
< len(oldnew
); i
+= 2 {
276 r
.root
.add(oldnew
[i
], oldnew
[i
+1], len(oldnew
)-i
, r
)
281 type appendSliceWriter
[]byte
283 // Write writes to the buffer to satisfy io.Writer.
284 func (w
*appendSliceWriter
) Write(p
[]byte) (int, error
) {
285 *w
= append(*w
, p
...)
289 // WriteString writes to the buffer without string->[]byte->string allocations.
290 func (w
*appendSliceWriter
) WriteString(s
string) (int, error
) {
291 *w
= append(*w
, s
...)
295 type stringWriterIface
interface {
296 WriteString(string) (int, error
)
299 type stringWriter
struct {
303 func (w stringWriter
) WriteString(s
string) (int, error
) {
304 return w
.w
.Write([]byte(s
))
307 func getStringWriter(w io
.Writer
) stringWriterIface
{
308 sw
, ok
:= w
.(stringWriterIface
)
315 func (r
*genericReplacer
) Replace(s
string) string {
316 buf
:= make(appendSliceWriter
, 0, len(s
))
317 r
.WriteString(&buf
, s
)
321 func (r
*genericReplacer
) WriteString(w io
.Writer
, s
string) (n
int, err error
) {
322 sw
:= getStringWriter(w
)
324 var prevMatchEmpty
bool
325 for i
:= 0; i
<= len(s
); {
326 // Ignore the empty match iff the previous loop found the empty match.
327 val
, keylen
, match
:= r
.lookup(s
[i
:], prevMatchEmpty
)
328 prevMatchEmpty
= match
&& keylen
== 0
330 wn
, err
= sw
.WriteString(s
[last
:i
])
335 wn
, err
= sw
.WriteString(val
)
347 wn
, err
= sw
.WriteString(s
[last
:])
353 // singleStringReplacer is the implementation that's used when there is only
354 // one string to replace (and that string has more than one byte).
355 type singleStringReplacer
struct {
357 // value is the new string that replaces that pattern when it's found.
361 func makeSingleStringReplacer(pattern
string, value
string) *singleStringReplacer
{
362 return &singleStringReplacer
{finder
: makeStringFinder(pattern
), value
: value
}
365 func (r
*singleStringReplacer
) Replace(s
string) string {
367 i
, matched
:= 0, false
369 match
:= r
.finder
.next(s
[i
:])
374 buf
= append(buf
, s
[i
:i
+match
]...)
375 buf
= append(buf
, r
.value
...)
376 i
+= match
+ len(r
.finder
.pattern
)
381 buf
= append(buf
, s
[i
:]...)
385 func (r
*singleStringReplacer
) WriteString(w io
.Writer
, s
string) (n
int, err error
) {
386 sw
:= getStringWriter(w
)
389 match
:= r
.finder
.next(s
[i
:])
393 wn
, err
= sw
.WriteString(s
[i
: i
+match
])
398 wn
, err
= sw
.WriteString(r
.value
)
403 i
+= match
+ len(r
.finder
.pattern
)
405 wn
, err
= sw
.WriteString(s
[i
:])
410 // byteReplacer is the implementation that's used when all the "old"
411 // and "new" values are single ASCII bytes.
412 type byteReplacer
struct {
413 // old has a bit set for each old byte that should be replaced.
416 // replacement byte, indexed by old byte. only valid if
417 // corresponding old bit is set.
421 func (r
*byteReplacer
) Replace(s
string) string {
422 var buf
[]byte // lazily allocated
423 for i
:= 0; i
< len(s
); i
++ {
425 if r
.old
[b
>>5]&uint32(1<<(b
&31)) != 0 {
438 func (r
*byteReplacer
) WriteString(w io
.Writer
, s
string) (n
int, err error
) {
439 // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
441 if len(s
) < bufsize
{
444 buf
:= make([]byte, bufsize
)
447 ncopy
:= copy(buf
, s
[:])
449 for i
, b
:= range buf
[:ncopy
] {
450 if r
.old
[b
>>5]&uint32(1<<(b
&31)) != 0 {
454 wn
, err
:= w
.Write(buf
[:ncopy
])
463 // byteStringReplacer is the implementation that's used when all the
464 // "old" values are single ASCII bytes but the "new" values vary in
466 type byteStringReplacer
struct {
467 // old has a bit set for each old byte that should be replaced.
470 // replacement string, indexed by old byte. only valid if
471 // corresponding old bit is set.
475 func (r
*byteStringReplacer
) Replace(s
string) string {
478 for i
:= 0; i
< len(s
); i
++ {
480 if r
.old
[b
>>5]&uint32(1<<(b
&31)) != 0 {
482 newSize
+= len(r
.new[b
])
490 buf
:= make([]byte, newSize
)
492 for i
:= 0; i
< len(s
); i
++ {
494 if r
.old
[b
>>5]&uint32(1<<(b
&31)) != 0 {
495 n
:= copy(bi
[:], r
.new[b
])
505 // WriteString maintains one buffer that's at most 32KB. The bytes in
506 // s are enumerated and the buffer is filled. If it reaches its
507 // capacity or a byte has a replacement, the buffer is flushed to w.
508 func (r
*byteStringReplacer
) WriteString(w io
.Writer
, s
string) (n
int, err error
) {
509 // TODO(bradfitz): use io.WriteString with slices of s instead.
511 if len(s
) < bufsize
{
514 buf
:= make([]byte, bufsize
)
517 for i
:= 0; i
< len(s
); i
++ {
520 if r
.old
[b
>>5]&uint32(1<<(b
&31)) != 0 {
525 if len(bi
) == cap(bi
) ||
(len(bi
) > 0 && len(new) > 0) {
526 nw
, err
:= w
.Write(bi
)
534 nw
, err
:= w
.Write(new)
542 nw
, err
:= w
.Write(bi
)