libgo/go/strings/replace.go

   1 // Copyright 2011 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package strings
   6
   7 import "io"
   8
   9 // Replacer replaces a list of strings with replacements.
  10 // It is safe for concurrent use by multiple goroutines.
  11 type Replacer struct {
  12         r replacer
  13 }
  14
  15 // replacer is the interface that a replacement algorithm needs to implement.
  16 type replacer interface {
  17         Replace(s string) string
  18         WriteString(w io.Writer, s string) (n int, err error)
  19 }
  20
  21 // NewReplacer returns a new Replacer from a list of old, new string pairs.
  22 // Replacements are performed in order, without overlapping matches.
  23 func NewReplacer(oldnew ...string) *Replacer {
  24         if len(oldnew)%2 == 1 {
  25                 panic("strings.NewReplacer: odd argument count")
  26         }
  27
  28         if len(oldnew) == 2 && len(oldnew[0]) > 1 {
  29                 return &Replacer{r: makeSingleStringReplacer(oldnew[0], oldnew[1])}
  30         }
  31
  32         allNewBytes := true
  33         for i := 0; i < len(oldnew); i += 2 {
  34                 if len(oldnew[i]) != 1 {
  35                         return &Replacer{r: makeGenericReplacer(oldnew)}
  36                 }
  37                 if len(oldnew[i+1]) != 1 {
  38                         allNewBytes = false
  39                 }
  40         }
  41
  42         if allNewBytes {
  43                 r := byteReplacer{}
  44                 for i := range r {
  45                         r[i] = byte(i)
  46                 }
  47                 // The first occurrence of old->new map takes precedence
  48                 // over the others with the same old string.
  49                 for i := len(oldnew) - 2; i >= 0; i -= 2 {
  50                         o := oldnew[i][0]
  51                         n := oldnew[i+1][0]
  52                         r[o] = n
  53                 }
  54                 return &Replacer{r: &r}
  55         }
  56
  57         r := byteStringReplacer{}
  58         // The first occurrence of old->new map takes precedence
  59         // over the others with the same old string.
  60         for i := len(oldnew) - 2; i >= 0; i -= 2 {
  61                 o := oldnew[i][0]
  62                 n := oldnew[i+1]
  63                 r[o] = []byte(n)
  64         }
  65         return &Replacer{r: &r}
  66 }
  67
  68 // Replace returns a copy of s with all replacements performed.
  69 func (r *Replacer) Replace(s string) string {
  70         return r.r.Replace(s)
  71 }
  72
  73 // WriteString writes s to w with all replacements performed.
  74 func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
  75         return r.r.WriteString(w, s)
  76 }
  77
  78 // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
  79 // and values may be empty. For example, the trie containing keys "ax", "ay",
  80 // "bcbc", "x" and "xy" could have eight nodes:
  81 //
  82 //  n0  -
  83 //  n1  a-
  84 //  n2  .x+
  85 //  n3  .y+
  86 //  n4  b-
  87 //  n5  .cbc+
  88 //  n6  x+
  89 //  n7  .y+
  90 //
  91 // n0 is the root node, and its children are n1, n4 and n6; n1's children are
  92 // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
  93 // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
  94 // (marked with a trailing "+") are complete keys.
  95 type trieNode struct {
  96         // value is the value of the trie node's key/value pair. It is empty if
  97         // this node is not a complete key.
  98         value string
  99         // priority is the priority (higher is more important) of the trie node's
 100         // key/value pair; keys are not necessarily matched shortest- or longest-
 101         // first. Priority is positive if this node is a complete key, and zero
 102         // otherwise. In the example above, positive/zero priorities are marked
 103         // with a trailing "+" or "-".
 104         priority int
 105
 106         // A trie node may have zero, one or more child nodes:
 107         //  * if the remaining fields are zero, there are no children.
 108         //  * if prefix and next are non-zero, there is one child in next.
 109         //  * if table is non-zero, it defines all the children.
 110         //
 111         // Prefixes are preferred over tables when there is one child, but the
 112         // root node always uses a table for lookup efficiency.
 113
 114         // prefix is the difference in keys between this trie node and the next.
 115         // In the example above, node n4 has prefix "cbc" and n4's next node is n5.
 116         // Node n5 has no children and so has zero prefix, next and table fields.
 117         prefix string
 118         next   *trieNode
 119
 120         // table is a lookup table indexed by the next byte in the key, after
 121         // remapping that byte through genericReplacer.mapping to create a dense
 122         // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
 123         // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
 124         // genericReplacer.tableSize will be 5. Node n0's table will be
 125         // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
 126         // 'a', 'b' and 'x'.
 127         table []*trieNode
 128 }
 129
 130 func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
 131         if key == "" {
 132                 if t.priority == 0 {
 133                         t.value = val
 134                         t.priority = priority
 135                 }
 136                 return
 137         }
 138
 139         if t.prefix != "" {
 140                 // Need to split the prefix among multiple nodes.
 141                 var n int // length of the longest common prefix
 142                 for ; n < len(t.prefix) && n < len(key); n++ {
 143                         if t.prefix[n] != key[n] {
 144                                 break
 145                         }
 146                 }
 147                 if n == len(t.prefix) {
 148                         t.next.add(key[n:], val, priority, r)
 149                 } else if n == 0 {
 150                         // First byte differs, start a new lookup table here. Looking up
 151                         // what is currently t.prefix[0] will lead to prefixNode, and
 152                         // looking up key[0] will lead to keyNode.
 153                         var prefixNode *trieNode
 154                         if len(t.prefix) == 1 {
 155                                 prefixNode = t.next
 156                         } else {
 157                                 prefixNode = &trieNode{
 158                                         prefix: t.prefix[1:],
 159                                         next:   t.next,
 160                                 }
 161                         }
 162                         keyNode := new(trieNode)
 163                         t.table = make([]*trieNode, r.tableSize)
 164                         t.table[r.mapping[t.prefix[0]]] = prefixNode
 165                         t.table[r.mapping[key[0]]] = keyNode
 166                         t.prefix = ""
 167                         t.next = nil
 168                         keyNode.add(key[1:], val, priority, r)
 169                 } else {
 170                         // Insert new node after the common section of the prefix.
 171                         next := &trieNode{
 172                                 prefix: t.prefix[n:],
 173                                 next:   t.next,
 174                         }
 175                         t.prefix = t.prefix[:n]
 176                         t.next = next
 177                         next.add(key[n:], val, priority, r)
 178                 }
 179         } else if t.table != nil {
 180                 // Insert into existing table.
 181                 m := r.mapping[key[0]]
 182                 if t.table[m] == nil {
 183                         t.table[m] = new(trieNode)
 184                 }
 185                 t.table[m].add(key[1:], val, priority, r)
 186         } else {
 187                 t.prefix = key
 188                 t.next = new(trieNode)
 189                 t.next.add("", val, priority, r)
 190         }
 191 }
 192
 193 func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
 194         // Iterate down the trie to the end, and grab the value and keylen with
 195         // the highest priority.
 196         bestPriority := 0
 197         node := &r.root
 198         n := 0
 199         for node != nil {
 200                 if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
 201                         bestPriority = node.priority
 202                         val = node.value
 203                         keylen = n
 204                         found = true
 205                 }
 206
 207                 if s == "" {
 208                         break
 209                 }
 210                 if node.table != nil {
 211                         index := r.mapping[s[0]]
 212                         if int(index) == r.tableSize {
 213                                 break
 214                         }
 215                         node = node.table[index]
 216                         s = s[1:]
 217                         n++
 218                 } else if node.prefix != "" && HasPrefix(s, node.prefix) {
 219                         n += len(node.prefix)
 220                         s = s[len(node.prefix):]
 221                         node = node.next
 222                 } else {
 223                         break
 224                 }
 225         }
 226         return
 227 }
 228
 229 // genericReplacer is the fully generic algorithm.
 230 // It's used as a fallback when nothing faster can be used.
 231 type genericReplacer struct {
 232         root trieNode
 233         // tableSize is the size of a trie node's lookup table. It is the number
 234         // of unique key bytes.
 235         tableSize int
 236         // mapping maps from key bytes to a dense index for trieNode.table.
 237         mapping [256]byte
 238 }
 239
 240 func makeGenericReplacer(oldnew []string) *genericReplacer {
 241         r := new(genericReplacer)
 242         // Find each byte used, then assign them each an index.
 243         for i := 0; i < len(oldnew); i += 2 {
 244                 key := oldnew[i]
 245                 for j := 0; j < len(key); j++ {
 246                         r.mapping[key[j]] = 1
 247                 }
 248         }
 249
 250         for _, b := range r.mapping {
 251                 r.tableSize += int(b)
 252         }
 253
 254         var index byte
 255         for i, b := range r.mapping {
 256                 if b == 0 {
 257                         r.mapping[i] = byte(r.tableSize)
 258                 } else {
 259                         r.mapping[i] = index
 260                         index++
 261                 }
 262         }
 263         // Ensure root node uses a lookup table (for performance).
 264         r.root.table = make([]*trieNode, r.tableSize)
 265
 266         for i := 0; i < len(oldnew); i += 2 {
 267                 r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
 268         }
 269         return r
 270 }
 271
 272 type appendSliceWriter []byte
 273
 274 // Write writes to the buffer to satisfy io.Writer.
 275 func (w *appendSliceWriter) Write(p []byte) (int, error) {
 276         *w = append(*w, p...)
 277         return len(p), nil
 278 }
 279
 280 // WriteString writes to the buffer without string->[]byte->string allocations.
 281 func (w *appendSliceWriter) WriteString(s string) (int, error) {
 282         *w = append(*w, s...)
 283         return len(s), nil
 284 }
 285
 286 type stringWriterIface interface {
 287         WriteString(string) (int, error)
 288 }
 289
 290 type stringWriter struct {
 291         w io.Writer
 292 }
 293
 294 func (w stringWriter) WriteString(s string) (int, error) {
 295         return w.w.Write([]byte(s))
 296 }
 297
 298 func getStringWriter(w io.Writer) stringWriterIface {
 299         sw, ok := w.(stringWriterIface)
 300         if !ok {
 301                 sw = stringWriter{w}
 302         }
 303         return sw
 304 }
 305
 306 func (r *genericReplacer) Replace(s string) string {
 307         buf := make(appendSliceWriter, 0, len(s))
 308         r.WriteString(&buf, s)
 309         return string(buf)
 310 }
 311
 312 func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
 313         sw := getStringWriter(w)
 314         var last, wn int
 315         var prevMatchEmpty bool
 316         for i := 0; i <= len(s); {
 317                 // Fast path: s[i] is not a prefix of any pattern.
 318                 if i != len(s) && r.root.priority == 0 {
 319                         index := int(r.mapping[s[i]])
 320                         if index == r.tableSize || r.root.table[index] == nil {
 321                                 i++
 322                                 continue
 323                         }
 324                 }
 325
 326                 // Ignore the empty match iff the previous loop found the empty match.
 327                 val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
 328                 prevMatchEmpty = match && keylen == 0
 329                 if match {
 330                         wn, err = sw.WriteString(s[last:i])
 331                         n += wn
 332                         if err != nil {
 333                                 return
 334                         }
 335                         wn, err = sw.WriteString(val)
 336                         n += wn
 337                         if err != nil {
 338                                 return
 339                         }
 340                         i += keylen
 341                         last = i
 342                         continue
 343                 }
 344                 i++
 345         }
 346         if last != len(s) {
 347                 wn, err = sw.WriteString(s[last:])
 348                 n += wn
 349         }
 350         return
 351 }
 352
 353 // singleStringReplacer is the implementation that's used when there is only
 354 // one string to replace (and that string has more than one byte).
 355 type singleStringReplacer struct {
 356         finder *stringFinder
 357         // value is the new string that replaces that pattern when it's found.
 358         value string
 359 }
 360
 361 func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
 362         return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
 363 }
 364
 365 func (r *singleStringReplacer) Replace(s string) string {
 366         var buf []byte
 367         i, matched := 0, false
 368         for {
 369                 match := r.finder.next(s[i:])
 370                 if match == -1 {
 371                         break
 372                 }
 373                 matched = true
 374                 buf = append(buf, s[i:i+match]...)
 375                 buf = append(buf, r.value...)
 376                 i += match + len(r.finder.pattern)
 377         }
 378         if !matched {
 379                 return s
 380         }
 381         buf = append(buf, s[i:]...)
 382         return string(buf)
 383 }
 384
 385 func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
 386         sw := getStringWriter(w)
 387         var i, wn int
 388         for {
 389                 match := r.finder.next(s[i:])
 390                 if match == -1 {
 391                         break
 392                 }
 393                 wn, err = sw.WriteString(s[i : i+match])
 394                 n += wn
 395                 if err != nil {
 396                         return
 397                 }
 398                 wn, err = sw.WriteString(r.value)
 399                 n += wn
 400                 if err != nil {
 401                         return
 402                 }
 403                 i += match + len(r.finder.pattern)
 404         }
 405         wn, err = sw.WriteString(s[i:])
 406         n += wn
 407         return
 408 }
 409
 410 // byteReplacer is the implementation that's used when all the "old"
 411 // and "new" values are single ASCII bytes.
 412 // The array contains replacement bytes indexed by old byte.
 413 type byteReplacer [256]byte
 414
 415 func (r *byteReplacer) Replace(s string) string {
 416         var buf []byte // lazily allocated
 417         for i := 0; i < len(s); i++ {
 418                 b := s[i]
 419                 if r[b] != b {
 420                         if buf == nil {
 421                                 buf = []byte(s)
 422                         }
 423                         buf[i] = r[b]
 424                 }
 425         }
 426         if buf == nil {
 427                 return s
 428         }
 429         return string(buf)
 430 }
 431
 432 func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
 433         // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
 434         bufsize := 32 << 10
 435         if len(s) < bufsize {
 436                 bufsize = len(s)
 437         }
 438         buf := make([]byte, bufsize)
 439
 440         for len(s) > 0 {
 441                 ncopy := copy(buf, s[:])
 442                 s = s[ncopy:]
 443                 for i, b := range buf[:ncopy] {
 444                         buf[i] = r[b]
 445                 }
 446                 wn, err := w.Write(buf[:ncopy])
 447                 n += wn
 448                 if err != nil {
 449                         return n, err
 450                 }
 451         }
 452         return n, nil
 453 }
 454
 455 // byteStringReplacer is the implementation that's used when all the
 456 // "old" values are single ASCII bytes but the "new" values vary in size.
 457 // The array contains replacement byte slices indexed by old byte.
 458 // A nil []byte means that the old byte should not be replaced.
 459 type byteStringReplacer [256][]byte
 460
 461 func (r *byteStringReplacer) Replace(s string) string {
 462         newSize := len(s)
 463         anyChanges := false
 464         for i := 0; i < len(s); i++ {
 465                 b := s[i]
 466                 if r[b] != nil {
 467                         anyChanges = true
 468                         // The -1 is because we are replacing 1 byte with len(r[b]) bytes.
 469                         newSize += len(r[b]) - 1
 470                 }
 471         }
 472         if !anyChanges {
 473                 return s
 474         }
 475         buf := make([]byte, newSize)
 476         bi := buf
 477         for i := 0; i < len(s); i++ {
 478                 b := s[i]
 479                 if r[b] != nil {
 480                         n := copy(bi, r[b])
 481                         bi = bi[n:]
 482                 } else {
 483                         bi[0] = b
 484                         bi = bi[1:]
 485                 }
 486         }
 487         return string(buf)
 488 }
 489
 490 func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
 491         sw := getStringWriter(w)
 492         last := 0
 493         for i := 0; i < len(s); i++ {
 494                 b := s[i]
 495                 if r[b] == nil {
 496                         continue
 497                 }
 498                 if last != i {
 499                         nw, err := sw.WriteString(s[last:i])
 500                         n += nw
 501                         if err != nil {
 502                                 return n, err
 503                         }
 504                 }
 505                 last = i + 1
 506                 nw, err := w.Write(r[b])
 507                 n += nw
 508                 if err != nil {
 509                         return n, err
 510                 }
 511         }
 512         if last != len(s) {
 513                 var nw int
 514                 nw, err = sw.WriteString(s[last:])
 515                 n += nw
 516         }
 517         return
 518 }