LWG 3035. std::allocator's constructors should be constexpr
[official-gcc.git] / libgo / go / strings / replace.go
blob4752641be0c9008d311581fcd028074ec258350d
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package strings
7 import "io"
9 // Replacer replaces a list of strings with replacements.
10 // It is safe for concurrent use by multiple goroutines.
11 type Replacer struct {
12 r replacer
15 // replacer is the interface that a replacement algorithm needs to implement.
16 type replacer interface {
17 Replace(s string) string
18 WriteString(w io.Writer, s string) (n int, err error)
21 // NewReplacer returns a new Replacer from a list of old, new string pairs.
22 // Replacements are performed in order, without overlapping matches.
23 func NewReplacer(oldnew ...string) *Replacer {
24 if len(oldnew)%2 == 1 {
25 panic("strings.NewReplacer: odd argument count")
28 if len(oldnew) == 2 && len(oldnew[0]) > 1 {
29 return &Replacer{r: makeSingleStringReplacer(oldnew[0], oldnew[1])}
32 allNewBytes := true
33 for i := 0; i < len(oldnew); i += 2 {
34 if len(oldnew[i]) != 1 {
35 return &Replacer{r: makeGenericReplacer(oldnew)}
37 if len(oldnew[i+1]) != 1 {
38 allNewBytes = false
42 if allNewBytes {
43 r := byteReplacer{}
44 for i := range r {
45 r[i] = byte(i)
47 // The first occurrence of old->new map takes precedence
48 // over the others with the same old string.
49 for i := len(oldnew) - 2; i >= 0; i -= 2 {
50 o := oldnew[i][0]
51 n := oldnew[i+1][0]
52 r[o] = n
54 return &Replacer{r: &r}
57 r := byteStringReplacer{}
58 // The first occurrence of old->new map takes precedence
59 // over the others with the same old string.
60 for i := len(oldnew) - 2; i >= 0; i -= 2 {
61 o := oldnew[i][0]
62 n := oldnew[i+1]
63 r[o] = []byte(n)
65 return &Replacer{r: &r}
68 // Replace returns a copy of s with all replacements performed.
69 func (r *Replacer) Replace(s string) string {
70 return r.r.Replace(s)
73 // WriteString writes s to w with all replacements performed.
74 func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
75 return r.r.WriteString(w, s)
78 // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
79 // and values may be empty. For example, the trie containing keys "ax", "ay",
80 // "bcbc", "x" and "xy" could have eight nodes:
82 // n0 -
83 // n1 a-
84 // n2 .x+
85 // n3 .y+
86 // n4 b-
87 // n5 .cbc+
88 // n6 x+
89 // n7 .y+
91 // n0 is the root node, and its children are n1, n4 and n6; n1's children are
92 // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
93 // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
94 // (marked with a trailing "+") are complete keys.
95 type trieNode struct {
96 // value is the value of the trie node's key/value pair. It is empty if
97 // this node is not a complete key.
98 value string
99 // priority is the priority (higher is more important) of the trie node's
100 // key/value pair; keys are not necessarily matched shortest- or longest-
101 // first. Priority is positive if this node is a complete key, and zero
102 // otherwise. In the example above, positive/zero priorities are marked
103 // with a trailing "+" or "-".
104 priority int
106 // A trie node may have zero, one or more child nodes:
107 // * if the remaining fields are zero, there are no children.
108 // * if prefix and next are non-zero, there is one child in next.
109 // * if table is non-zero, it defines all the children.
111 // Prefixes are preferred over tables when there is one child, but the
112 // root node always uses a table for lookup efficiency.
114 // prefix is the difference in keys between this trie node and the next.
115 // In the example above, node n4 has prefix "cbc" and n4's next node is n5.
116 // Node n5 has no children and so has zero prefix, next and table fields.
117 prefix string
118 next *trieNode
120 // table is a lookup table indexed by the next byte in the key, after
121 // remapping that byte through genericReplacer.mapping to create a dense
122 // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
123 // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
124 // genericReplacer.tableSize will be 5. Node n0's table will be
125 // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
126 // 'a', 'b' and 'x'.
127 table []*trieNode
130 func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
131 if key == "" {
132 if t.priority == 0 {
133 t.value = val
134 t.priority = priority
136 return
139 if t.prefix != "" {
140 // Need to split the prefix among multiple nodes.
141 var n int // length of the longest common prefix
142 for ; n < len(t.prefix) && n < len(key); n++ {
143 if t.prefix[n] != key[n] {
144 break
147 if n == len(t.prefix) {
148 t.next.add(key[n:], val, priority, r)
149 } else if n == 0 {
150 // First byte differs, start a new lookup table here. Looking up
151 // what is currently t.prefix[0] will lead to prefixNode, and
152 // looking up key[0] will lead to keyNode.
153 var prefixNode *trieNode
154 if len(t.prefix) == 1 {
155 prefixNode = t.next
156 } else {
157 prefixNode = &trieNode{
158 prefix: t.prefix[1:],
159 next: t.next,
162 keyNode := new(trieNode)
163 t.table = make([]*trieNode, r.tableSize)
164 t.table[r.mapping[t.prefix[0]]] = prefixNode
165 t.table[r.mapping[key[0]]] = keyNode
166 t.prefix = ""
167 t.next = nil
168 keyNode.add(key[1:], val, priority, r)
169 } else {
170 // Insert new node after the common section of the prefix.
171 next := &trieNode{
172 prefix: t.prefix[n:],
173 next: t.next,
175 t.prefix = t.prefix[:n]
176 t.next = next
177 next.add(key[n:], val, priority, r)
179 } else if t.table != nil {
180 // Insert into existing table.
181 m := r.mapping[key[0]]
182 if t.table[m] == nil {
183 t.table[m] = new(trieNode)
185 t.table[m].add(key[1:], val, priority, r)
186 } else {
187 t.prefix = key
188 t.next = new(trieNode)
189 t.next.add("", val, priority, r)
193 func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
194 // Iterate down the trie to the end, and grab the value and keylen with
195 // the highest priority.
196 bestPriority := 0
197 node := &r.root
198 n := 0
199 for node != nil {
200 if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
201 bestPriority = node.priority
202 val = node.value
203 keylen = n
204 found = true
207 if s == "" {
208 break
210 if node.table != nil {
211 index := r.mapping[s[0]]
212 if int(index) == r.tableSize {
213 break
215 node = node.table[index]
216 s = s[1:]
218 } else if node.prefix != "" && HasPrefix(s, node.prefix) {
219 n += len(node.prefix)
220 s = s[len(node.prefix):]
221 node = node.next
222 } else {
223 break
226 return
229 // genericReplacer is the fully generic algorithm.
230 // It's used as a fallback when nothing faster can be used.
231 type genericReplacer struct {
232 root trieNode
233 // tableSize is the size of a trie node's lookup table. It is the number
234 // of unique key bytes.
235 tableSize int
236 // mapping maps from key bytes to a dense index for trieNode.table.
237 mapping [256]byte
240 func makeGenericReplacer(oldnew []string) *genericReplacer {
241 r := new(genericReplacer)
242 // Find each byte used, then assign them each an index.
243 for i := 0; i < len(oldnew); i += 2 {
244 key := oldnew[i]
245 for j := 0; j < len(key); j++ {
246 r.mapping[key[j]] = 1
250 for _, b := range r.mapping {
251 r.tableSize += int(b)
254 var index byte
255 for i, b := range r.mapping {
256 if b == 0 {
257 r.mapping[i] = byte(r.tableSize)
258 } else {
259 r.mapping[i] = index
260 index++
263 // Ensure root node uses a lookup table (for performance).
264 r.root.table = make([]*trieNode, r.tableSize)
266 for i := 0; i < len(oldnew); i += 2 {
267 r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
269 return r
272 type appendSliceWriter []byte
274 // Write writes to the buffer to satisfy io.Writer.
275 func (w *appendSliceWriter) Write(p []byte) (int, error) {
276 *w = append(*w, p...)
277 return len(p), nil
280 // WriteString writes to the buffer without string->[]byte->string allocations.
281 func (w *appendSliceWriter) WriteString(s string) (int, error) {
282 *w = append(*w, s...)
283 return len(s), nil
286 type stringWriterIface interface {
287 WriteString(string) (int, error)
290 type stringWriter struct {
291 w io.Writer
294 func (w stringWriter) WriteString(s string) (int, error) {
295 return w.w.Write([]byte(s))
298 func getStringWriter(w io.Writer) stringWriterIface {
299 sw, ok := w.(stringWriterIface)
300 if !ok {
301 sw = stringWriter{w}
303 return sw
306 func (r *genericReplacer) Replace(s string) string {
307 buf := make(appendSliceWriter, 0, len(s))
308 r.WriteString(&buf, s)
309 return string(buf)
312 func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
313 sw := getStringWriter(w)
314 var last, wn int
315 var prevMatchEmpty bool
316 for i := 0; i <= len(s); {
317 // Fast path: s[i] is not a prefix of any pattern.
318 if i != len(s) && r.root.priority == 0 {
319 index := int(r.mapping[s[i]])
320 if index == r.tableSize || r.root.table[index] == nil {
322 continue
326 // Ignore the empty match iff the previous loop found the empty match.
327 val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
328 prevMatchEmpty = match && keylen == 0
329 if match {
330 wn, err = sw.WriteString(s[last:i])
331 n += wn
332 if err != nil {
333 return
335 wn, err = sw.WriteString(val)
336 n += wn
337 if err != nil {
338 return
340 i += keylen
341 last = i
342 continue
346 if last != len(s) {
347 wn, err = sw.WriteString(s[last:])
348 n += wn
350 return
353 // singleStringReplacer is the implementation that's used when there is only
354 // one string to replace (and that string has more than one byte).
355 type singleStringReplacer struct {
356 finder *stringFinder
357 // value is the new string that replaces that pattern when it's found.
358 value string
361 func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
362 return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
365 func (r *singleStringReplacer) Replace(s string) string {
366 var buf []byte
367 i, matched := 0, false
368 for {
369 match := r.finder.next(s[i:])
370 if match == -1 {
371 break
373 matched = true
374 buf = append(buf, s[i:i+match]...)
375 buf = append(buf, r.value...)
376 i += match + len(r.finder.pattern)
378 if !matched {
379 return s
381 buf = append(buf, s[i:]...)
382 return string(buf)
385 func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
386 sw := getStringWriter(w)
387 var i, wn int
388 for {
389 match := r.finder.next(s[i:])
390 if match == -1 {
391 break
393 wn, err = sw.WriteString(s[i : i+match])
394 n += wn
395 if err != nil {
396 return
398 wn, err = sw.WriteString(r.value)
399 n += wn
400 if err != nil {
401 return
403 i += match + len(r.finder.pattern)
405 wn, err = sw.WriteString(s[i:])
406 n += wn
407 return
410 // byteReplacer is the implementation that's used when all the "old"
411 // and "new" values are single ASCII bytes.
412 // The array contains replacement bytes indexed by old byte.
413 type byteReplacer [256]byte
415 func (r *byteReplacer) Replace(s string) string {
416 var buf []byte // lazily allocated
417 for i := 0; i < len(s); i++ {
418 b := s[i]
419 if r[b] != b {
420 if buf == nil {
421 buf = []byte(s)
423 buf[i] = r[b]
426 if buf == nil {
427 return s
429 return string(buf)
432 func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
433 // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
434 bufsize := 32 << 10
435 if len(s) < bufsize {
436 bufsize = len(s)
438 buf := make([]byte, bufsize)
440 for len(s) > 0 {
441 ncopy := copy(buf, s[:])
442 s = s[ncopy:]
443 for i, b := range buf[:ncopy] {
444 buf[i] = r[b]
446 wn, err := w.Write(buf[:ncopy])
447 n += wn
448 if err != nil {
449 return n, err
452 return n, nil
455 // byteStringReplacer is the implementation that's used when all the
456 // "old" values are single ASCII bytes but the "new" values vary in size.
457 // The array contains replacement byte slices indexed by old byte.
458 // A nil []byte means that the old byte should not be replaced.
459 type byteStringReplacer [256][]byte
461 func (r *byteStringReplacer) Replace(s string) string {
462 newSize := len(s)
463 anyChanges := false
464 for i := 0; i < len(s); i++ {
465 b := s[i]
466 if r[b] != nil {
467 anyChanges = true
468 // The -1 is because we are replacing 1 byte with len(r[b]) bytes.
469 newSize += len(r[b]) - 1
472 if !anyChanges {
473 return s
475 buf := make([]byte, newSize)
476 bi := buf
477 for i := 0; i < len(s); i++ {
478 b := s[i]
479 if r[b] != nil {
480 n := copy(bi, r[b])
481 bi = bi[n:]
482 } else {
483 bi[0] = b
484 bi = bi[1:]
487 return string(buf)
490 func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
491 sw := getStringWriter(w)
492 last := 0
493 for i := 0; i < len(s); i++ {
494 b := s[i]
495 if r[b] == nil {
496 continue
498 if last != i {
499 nw, err := sw.WriteString(s[last:i])
500 n += nw
501 if err != nil {
502 return n, err
505 last = i + 1
506 nw, err := w.Write(r[b])
507 n += nw
508 if err != nil {
509 return n, err
512 if last != len(s) {
513 var nw int
514 nw, err = sw.WriteString(s[last:])
515 n += nw
517 return