2014-04-11 Marc Glisse <marc.glisse@inria.fr>
[official-gcc.git] / libgo / go / strings / replace.go
blob54c9323e0489137a5b19f8f5ab2974c8d2d89014
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package strings
7 import "io"
9 // A Replacer replaces a list of strings with replacements.
10 type Replacer struct {
11 r replacer
14 // replacer is the interface that a replacement algorithm needs to implement.
15 type replacer interface {
16 Replace(s string) string
17 WriteString(w io.Writer, s string) (n int, err error)
20 // byteBitmap represents bytes which are sought for replacement.
21 // byteBitmap is 256 bits wide, with a bit set for each old byte to be
22 // replaced.
23 type byteBitmap [256 / 32]uint32
25 func (m *byteBitmap) set(b byte) {
26 m[b>>5] |= uint32(1 << (b & 31))
29 // NewReplacer returns a new Replacer from a list of old, new string pairs.
30 // Replacements are performed in order, without overlapping matches.
31 func NewReplacer(oldnew ...string) *Replacer {
32 if len(oldnew)%2 == 1 {
33 panic("strings.NewReplacer: odd argument count")
36 if len(oldnew) == 2 && len(oldnew[0]) > 1 {
37 return &Replacer{r: makeSingleStringReplacer(oldnew[0], oldnew[1])}
40 allNewBytes := true
41 for i := 0; i < len(oldnew); i += 2 {
42 if len(oldnew[i]) != 1 {
43 return &Replacer{r: makeGenericReplacer(oldnew)}
45 if len(oldnew[i+1]) != 1 {
46 allNewBytes = false
50 if allNewBytes {
51 bb := &byteReplacer{}
52 for i := 0; i < len(oldnew); i += 2 {
53 o, n := oldnew[i][0], oldnew[i+1][0]
54 if bb.old[o>>5]&uint32(1<<(o&31)) != 0 {
55 // Later old->new maps do not override previous ones with the same old string.
56 continue
58 bb.old.set(o)
59 bb.new[o] = n
61 return &Replacer{r: bb}
64 bs := &byteStringReplacer{}
65 for i := 0; i < len(oldnew); i += 2 {
66 o, new := oldnew[i][0], oldnew[i+1]
67 if bs.old[o>>5]&uint32(1<<(o&31)) != 0 {
68 // Later old->new maps do not override previous ones with the same old string.
69 continue
71 bs.old.set(o)
72 bs.new[o] = []byte(new)
74 return &Replacer{r: bs}
77 // Replace returns a copy of s with all replacements performed.
78 func (r *Replacer) Replace(s string) string {
79 return r.r.Replace(s)
82 // WriteString writes s to w with all replacements performed.
83 func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
84 return r.r.WriteString(w, s)
87 // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
88 // and values may be empty. For example, the trie containing keys "ax", "ay",
89 // "bcbc", "x" and "xy" could have eight nodes:
91 // n0 -
92 // n1 a-
93 // n2 .x+
94 // n3 .y+
95 // n4 b-
96 // n5 .cbc+
97 // n6 x+
98 // n7 .y+
100 // n0 is the root node, and its children are n1, n4 and n6; n1's children are
101 // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
102 // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
103 // (marked with a trailing "+") are complete keys.
104 type trieNode struct {
105 // value is the value of the trie node's key/value pair. It is empty if
106 // this node is not a complete key.
107 value string
108 // priority is the priority (higher is more important) of the trie node's
109 // key/value pair; keys are not necessarily matched shortest- or longest-
110 // first. Priority is positive if this node is a complete key, and zero
111 // otherwise. In the example above, positive/zero priorities are marked
112 // with a trailing "+" or "-".
113 priority int
115 // A trie node may have zero, one or more child nodes:
116 // * if the remaining fields are zero, there are no children.
117 // * if prefix and next are non-zero, there is one child in next.
118 // * if table is non-zero, it defines all the children.
120 // Prefixes are preferred over tables when there is one child, but the
121 // root node always uses a table for lookup efficiency.
123 // prefix is the difference in keys between this trie node and the next.
124 // In the example above, node n4 has prefix "cbc" and n4's next node is n5.
125 // Node n5 has no children and so has zero prefix, next and table fields.
126 prefix string
127 next *trieNode
129 // table is a lookup table indexed by the next byte in the key, after
130 // remapping that byte through genericReplacer.mapping to create a dense
131 // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
132 // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
133 // genericReplacer.tableSize will be 5. Node n0's table will be
134 // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
135 // 'a', 'b' and 'x'.
136 table []*trieNode
139 func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
140 if key == "" {
141 if t.priority == 0 {
142 t.value = val
143 t.priority = priority
145 return
148 if t.prefix != "" {
149 // Need to split the prefix among multiple nodes.
150 var n int // length of the longest common prefix
151 for ; n < len(t.prefix) && n < len(key); n++ {
152 if t.prefix[n] != key[n] {
153 break
156 if n == len(t.prefix) {
157 t.next.add(key[n:], val, priority, r)
158 } else if n == 0 {
159 // First byte differs, start a new lookup table here. Looking up
160 // what is currently t.prefix[0] will lead to prefixNode, and
161 // looking up key[0] will lead to keyNode.
162 var prefixNode *trieNode
163 if len(t.prefix) == 1 {
164 prefixNode = t.next
165 } else {
166 prefixNode = &trieNode{
167 prefix: t.prefix[1:],
168 next: t.next,
171 keyNode := new(trieNode)
172 t.table = make([]*trieNode, r.tableSize)
173 t.table[r.mapping[t.prefix[0]]] = prefixNode
174 t.table[r.mapping[key[0]]] = keyNode
175 t.prefix = ""
176 t.next = nil
177 keyNode.add(key[1:], val, priority, r)
178 } else {
179 // Insert new node after the common section of the prefix.
180 next := &trieNode{
181 prefix: t.prefix[n:],
182 next: t.next,
184 t.prefix = t.prefix[:n]
185 t.next = next
186 next.add(key[n:], val, priority, r)
188 } else if t.table != nil {
189 // Insert into existing table.
190 m := r.mapping[key[0]]
191 if t.table[m] == nil {
192 t.table[m] = new(trieNode)
194 t.table[m].add(key[1:], val, priority, r)
195 } else {
196 t.prefix = key
197 t.next = new(trieNode)
198 t.next.add("", val, priority, r)
202 func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
203 // Iterate down the trie to the end, and grab the value and keylen with
204 // the highest priority.
205 bestPriority := 0
206 node := &r.root
207 n := 0
208 for node != nil {
209 if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
210 bestPriority = node.priority
211 val = node.value
212 keylen = n
213 found = true
216 if s == "" {
217 break
219 if node.table != nil {
220 index := r.mapping[s[0]]
221 if int(index) == r.tableSize {
222 break
224 node = node.table[index]
225 s = s[1:]
227 } else if node.prefix != "" && HasPrefix(s, node.prefix) {
228 n += len(node.prefix)
229 s = s[len(node.prefix):]
230 node = node.next
231 } else {
232 break
235 return
238 // genericReplacer is the fully generic algorithm.
239 // It's used as a fallback when nothing faster can be used.
240 type genericReplacer struct {
241 root trieNode
242 // tableSize is the size of a trie node's lookup table. It is the number
243 // of unique key bytes.
244 tableSize int
245 // mapping maps from key bytes to a dense index for trieNode.table.
246 mapping [256]byte
249 func makeGenericReplacer(oldnew []string) *genericReplacer {
250 r := new(genericReplacer)
251 // Find each byte used, then assign them each an index.
252 for i := 0; i < len(oldnew); i += 2 {
253 key := oldnew[i]
254 for j := 0; j < len(key); j++ {
255 r.mapping[key[j]] = 1
259 for _, b := range r.mapping {
260 r.tableSize += int(b)
263 var index byte
264 for i, b := range r.mapping {
265 if b == 0 {
266 r.mapping[i] = byte(r.tableSize)
267 } else {
268 r.mapping[i] = index
269 index++
272 // Ensure root node uses a lookup table (for performance).
273 r.root.table = make([]*trieNode, r.tableSize)
275 for i := 0; i < len(oldnew); i += 2 {
276 r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
278 return r
281 type appendSliceWriter []byte
283 // Write writes to the buffer to satisfy io.Writer.
284 func (w *appendSliceWriter) Write(p []byte) (int, error) {
285 *w = append(*w, p...)
286 return len(p), nil
289 // WriteString writes to the buffer without string->[]byte->string allocations.
290 func (w *appendSliceWriter) WriteString(s string) (int, error) {
291 *w = append(*w, s...)
292 return len(s), nil
295 type stringWriterIface interface {
296 WriteString(string) (int, error)
299 type stringWriter struct {
300 w io.Writer
303 func (w stringWriter) WriteString(s string) (int, error) {
304 return w.w.Write([]byte(s))
307 func getStringWriter(w io.Writer) stringWriterIface {
308 sw, ok := w.(stringWriterIface)
309 if !ok {
310 sw = stringWriter{w}
312 return sw
315 func (r *genericReplacer) Replace(s string) string {
316 buf := make(appendSliceWriter, 0, len(s))
317 r.WriteString(&buf, s)
318 return string(buf)
321 func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
322 sw := getStringWriter(w)
323 var last, wn int
324 var prevMatchEmpty bool
325 for i := 0; i <= len(s); {
326 // Ignore the empty match iff the previous loop found the empty match.
327 val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
328 prevMatchEmpty = match && keylen == 0
329 if match {
330 wn, err = sw.WriteString(s[last:i])
331 n += wn
332 if err != nil {
333 return
335 wn, err = sw.WriteString(val)
336 n += wn
337 if err != nil {
338 return
340 i += keylen
341 last = i
342 continue
346 if last != len(s) {
347 wn, err = sw.WriteString(s[last:])
348 n += wn
350 return
353 // singleStringReplacer is the implementation that's used when there is only
354 // one string to replace (and that string has more than one byte).
355 type singleStringReplacer struct {
356 finder *stringFinder
357 // value is the new string that replaces that pattern when it's found.
358 value string
361 func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
362 return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
365 func (r *singleStringReplacer) Replace(s string) string {
366 var buf []byte
367 i, matched := 0, false
368 for {
369 match := r.finder.next(s[i:])
370 if match == -1 {
371 break
373 matched = true
374 buf = append(buf, s[i:i+match]...)
375 buf = append(buf, r.value...)
376 i += match + len(r.finder.pattern)
378 if !matched {
379 return s
381 buf = append(buf, s[i:]...)
382 return string(buf)
385 func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
386 sw := getStringWriter(w)
387 var i, wn int
388 for {
389 match := r.finder.next(s[i:])
390 if match == -1 {
391 break
393 wn, err = sw.WriteString(s[i : i+match])
394 n += wn
395 if err != nil {
396 return
398 wn, err = sw.WriteString(r.value)
399 n += wn
400 if err != nil {
401 return
403 i += match + len(r.finder.pattern)
405 wn, err = sw.WriteString(s[i:])
406 n += wn
407 return
410 // byteReplacer is the implementation that's used when all the "old"
411 // and "new" values are single ASCII bytes.
412 type byteReplacer struct {
413 // old has a bit set for each old byte that should be replaced.
414 old byteBitmap
416 // replacement byte, indexed by old byte. only valid if
417 // corresponding old bit is set.
418 new [256]byte
421 func (r *byteReplacer) Replace(s string) string {
422 var buf []byte // lazily allocated
423 for i := 0; i < len(s); i++ {
424 b := s[i]
425 if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
426 if buf == nil {
427 buf = []byte(s)
429 buf[i] = r.new[b]
432 if buf == nil {
433 return s
435 return string(buf)
438 func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
439 // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
440 bufsize := 32 << 10
441 if len(s) < bufsize {
442 bufsize = len(s)
444 buf := make([]byte, bufsize)
446 for len(s) > 0 {
447 ncopy := copy(buf, s[:])
448 s = s[ncopy:]
449 for i, b := range buf[:ncopy] {
450 if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
451 buf[i] = r.new[b]
454 wn, err := w.Write(buf[:ncopy])
455 n += wn
456 if err != nil {
457 return n, err
460 return n, nil
463 // byteStringReplacer is the implementation that's used when all the
464 // "old" values are single ASCII bytes but the "new" values vary in
465 // size.
466 type byteStringReplacer struct {
467 // old has a bit set for each old byte that should be replaced.
468 old byteBitmap
470 // replacement string, indexed by old byte. only valid if
471 // corresponding old bit is set.
472 new [256][]byte
475 func (r *byteStringReplacer) Replace(s string) string {
476 newSize := 0
477 anyChanges := false
478 for i := 0; i < len(s); i++ {
479 b := s[i]
480 if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
481 anyChanges = true
482 newSize += len(r.new[b])
483 } else {
484 newSize++
487 if !anyChanges {
488 return s
490 buf := make([]byte, newSize)
491 bi := buf
492 for i := 0; i < len(s); i++ {
493 b := s[i]
494 if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
495 n := copy(bi[:], r.new[b])
496 bi = bi[n:]
497 } else {
498 bi[0] = b
499 bi = bi[1:]
502 return string(buf)
505 // WriteString maintains one buffer that's at most 32KB. The bytes in
506 // s are enumerated and the buffer is filled. If it reaches its
507 // capacity or a byte has a replacement, the buffer is flushed to w.
508 func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
509 // TODO(bradfitz): use io.WriteString with slices of s instead.
510 bufsize := 32 << 10
511 if len(s) < bufsize {
512 bufsize = len(s)
514 buf := make([]byte, bufsize)
515 bi := buf[:0]
517 for i := 0; i < len(s); i++ {
518 b := s[i]
519 var new []byte
520 if r.old[b>>5]&uint32(1<<(b&31)) != 0 {
521 new = r.new[b]
522 } else {
523 bi = append(bi, b)
525 if len(bi) == cap(bi) || (len(bi) > 0 && len(new) > 0) {
526 nw, err := w.Write(bi)
527 n += nw
528 if err != nil {
529 return n, err
531 bi = buf[:0]
533 if len(new) > 0 {
534 nw, err := w.Write(new)
535 n += nw
536 if err != nil {
537 return n, err
541 if len(bi) > 0 {
542 nw, err := w.Write(bi)
543 n += nw
544 if err != nil {
545 return n, err
548 return n, nil