Import GNU Classpath (20121202).
[official-gcc.git] / libjava / classpath / java / util / regex / Matcher.java
blob95a35535935ca1161c99095b13c7f59069cb7674
1 /* Matcher.java -- Instance of a regular expression applied to a char sequence.
2 Copyright (C) 2002, 2004, 2006 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
39 package java.util.regex;
41 import gnu.java.lang.CPStringBuilder;
43 import gnu.java.util.regex.CharIndexed;
44 import gnu.java.util.regex.RE;
45 import gnu.java.util.regex.REMatch;
47 /**
48 * Instance of a regular expression applied to a char sequence.
50 * @since 1.4
52 public final class Matcher implements MatchResult
54 private Pattern pattern;
55 private CharSequence input;
56 // We use CharIndexed as an input object to the getMatch method in order
57 // that /\G/ (the end of the previous match) may work. The information
58 // of the previous match is stored in the CharIndexed object.
59 private CharIndexed inputCharIndexed;
60 private int position;
61 private int appendPosition;
62 private REMatch match;
64 /**
65 * The start of the region of the input on which to match.
67 private int regionStart;
69 /**
70 * The end of the region of the input on which to match.
72 private int regionEnd;
74 /**
75 * True if the match process should look beyond the
76 * region marked by regionStart to regionEnd when
77 * performing lookAhead, lookBehind and boundary
78 * matching.
80 private boolean transparentBounds;
82 /**
83 * The flags that affect the anchoring bounds.
84 * If {@link #hasAnchoringBounds()} is {@code true},
85 * the match process will honour the
86 * anchoring bounds: ^, \A, \Z, \z and $. If
87 * {@link #hasAnchoringBounds()} is {@code false},
88 * the anchors are ignored and appropriate flags,
89 * stored in this variable, are used to provide this
90 * behaviour.
92 private int anchoringBounds;
94 Matcher(Pattern pattern, CharSequence input)
96 this.pattern = pattern;
97 this.input = input;
98 this.inputCharIndexed = RE.makeCharIndexed(input, 0);
99 regionStart = 0;
100 regionEnd = input.length();
101 transparentBounds = false;
102 anchoringBounds = 0;
106 * Changes the pattern used by the {@link Matcher} to
107 * the one specified. Existing match information is lost,
108 * but the input and the matcher's position within it is
109 * retained.
111 * @param newPattern the new pattern to use.
112 * @return this matcher.
113 * @throws IllegalArgumentException if {@code newPattern} is
114 * {@code null}.
115 * @since 1.5
117 public Matcher usePattern(Pattern newPattern)
119 if (newPattern == null)
120 throw new IllegalArgumentException("The new pattern was null.");
121 pattern = newPattern;
122 match = null;
124 return this;
128 * @param sb The target string buffer
129 * @param replacement The replacement string
131 * @exception IllegalStateException If no match has yet been attempted,
132 * or if the previous match operation failed
133 * @exception IndexOutOfBoundsException If the replacement string refers
134 * to a capturing group that does not exist in the pattern
136 public Matcher appendReplacement (StringBuffer sb, String replacement)
137 throws IllegalStateException
139 assertMatchOp();
140 sb.append(input.subSequence(appendPosition,
141 match.getStartIndex()).toString());
142 sb.append(RE.getReplacement(replacement, match,
143 RE.REG_REPLACE_USE_BACKSLASHESCAPE));
144 appendPosition = match.getEndIndex();
145 return this;
149 * @param sb The target string buffer
151 public StringBuffer appendTail (StringBuffer sb)
153 sb.append(input.subSequence(appendPosition, input.length()).toString());
154 return sb;
158 * @exception IllegalStateException If no match has yet been attempted,
159 * or if the previous match operation failed
161 public int end ()
162 throws IllegalStateException
164 assertMatchOp();
165 return match.getEndIndex();
169 * @param group The index of a capturing group in this matcher's pattern
171 * @exception IllegalStateException If no match has yet been attempted,
172 * or if the previous match operation failed
173 * @exception IndexOutOfBoundsException If the replacement string refers
174 * to a capturing group that does not exist in the pattern
176 public int end (int group)
177 throws IllegalStateException
179 assertMatchOp();
180 return match.getEndIndex(group);
183 public boolean find ()
185 boolean first = (match == null);
186 if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
187 match = pattern.getRE().getMatch(inputCharIndexed, position, anchoringBounds);
188 else
189 match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd),
190 position, anchoringBounds);
191 if (match != null)
193 int endIndex = match.getEndIndex();
194 // Is the match within input limits?
195 if (endIndex > input.length())
197 match = null;
198 return false;
200 // Are we stuck at the same position?
201 if (!first && endIndex == position)
203 match = null;
204 // Not at the end of the input yet?
205 if (position < input.length() - 1)
207 position++;
208 return find(position);
210 else
211 return false;
213 position = endIndex;
214 return true;
216 return false;
220 * @param start The index to start the new pattern matching
222 * @exception IndexOutOfBoundsException If the replacement string refers
223 * to a capturing group that does not exist in the pattern
225 public boolean find (int start)
227 if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
228 match = pattern.getRE().getMatch(inputCharIndexed, start, anchoringBounds);
229 else
230 match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd),
231 start, anchoringBounds);
232 if (match != null)
234 position = match.getEndIndex();
235 return true;
237 return false;
241 * @exception IllegalStateException If no match has yet been attempted,
242 * or if the previous match operation failed
244 public String group ()
246 assertMatchOp();
247 return match.toString();
251 * @param group The index of a capturing group in this matcher's pattern
253 * @exception IllegalStateException If no match has yet been attempted,
254 * or if the previous match operation failed
255 * @exception IndexOutOfBoundsException If the replacement string refers
256 * to a capturing group that does not exist in the pattern
258 public String group (int group)
259 throws IllegalStateException
261 assertMatchOp();
262 return match.toString(group);
266 * @param replacement The replacement string
268 public String replaceFirst (String replacement)
270 reset();
271 // Semantics might not quite match
272 return pattern.getRE().substitute(input, replacement, position,
273 RE.REG_REPLACE_USE_BACKSLASHESCAPE);
277 * @param replacement The replacement string
279 public String replaceAll (String replacement)
281 reset();
282 return pattern.getRE().substituteAll(input, replacement, position,
283 RE.REG_REPLACE_USE_BACKSLASHESCAPE);
286 public int groupCount ()
288 return pattern.getRE().getNumSubs();
291 public boolean lookingAt ()
293 if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
294 match = pattern.getRE().getMatch(inputCharIndexed, regionStart,
295 anchoringBounds|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX);
296 else
297 match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0,
298 anchoringBounds|RE.REG_FIX_STARTING_POSITION);
299 if (match != null)
301 if (match.getStartIndex() == 0)
303 position = match.getEndIndex();
304 return true;
306 match = null;
308 return false;
312 * Attempts to match the entire input sequence against the pattern.
314 * If the match succeeds then more information can be obtained via the
315 * start, end, and group methods.
317 * @see #start()
318 * @see #end()
319 * @see #group()
321 public boolean matches ()
323 if (transparentBounds || (regionStart == 0 && regionEnd == input.length()))
324 match = pattern.getRE().getMatch(inputCharIndexed, regionStart,
325 anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION|RE.REG_ANCHORINDEX);
326 else
327 match = pattern.getRE().getMatch(input.subSequence(regionStart, regionEnd), 0,
328 anchoringBounds|RE.REG_TRY_ENTIRE_MATCH|RE.REG_FIX_STARTING_POSITION);
329 if (match != null)
331 if (match.getStartIndex() == 0)
333 position = match.getEndIndex();
334 if (position == input.length())
335 return true;
337 match = null;
339 return false;
343 * Returns the Pattern that is interpreted by this Matcher
345 public Pattern pattern ()
347 return pattern;
351 * Resets the internal state of the matcher, including
352 * resetting the region to its default state of encompassing
353 * the whole input. The state of {@link #hasTransparentBounds()}
354 * and {@link #hasAnchoringBounds()} are unaffected.
356 * @return a reference to this matcher.
357 * @see #regionStart()
358 * @see #regionEnd()
359 * @see #hasTransparentBounds()
360 * @see #hasAnchoringBounds()
362 public Matcher reset ()
364 position = 0;
365 match = null;
366 regionStart = 0;
367 regionEnd = input.length();
368 appendPosition = 0;
369 return this;
373 * Resets the internal state of the matcher, including
374 * resetting the region to its default state of encompassing
375 * the whole input. The state of {@link #hasTransparentBounds()}
376 * and {@link #hasAnchoringBounds()} are unaffected.
378 * @param input The new input character sequence.
379 * @return a reference to this matcher.
380 * @see #regionStart()
381 * @see #regionEnd()
382 * @see #hasTransparentBounds()
383 * @see #hasAnchoringBounds()
385 public Matcher reset (CharSequence input)
387 this.input = input;
388 this.inputCharIndexed = RE.makeCharIndexed(input, 0);
389 return reset();
393 * @return the index of a capturing group in this matcher's pattern
395 * @exception IllegalStateException If no match has yet been attempted,
396 * or if the previous match operation failed
398 public int start ()
399 throws IllegalStateException
401 assertMatchOp();
402 return match.getStartIndex();
406 * @param group The index of a capturing group in this matcher's pattern
408 * @exception IllegalStateException If no match has yet been attempted,
409 * or if the previous match operation failed
410 * @exception IndexOutOfBoundsException If the replacement string refers
411 * to a capturing group that does not exist in the pattern
413 public int start (int group)
414 throws IllegalStateException
416 assertMatchOp();
417 return match.getStartIndex(group);
421 * @return True if and only if the matcher hit the end of input.
422 * @since 1.5
424 public boolean hitEnd()
426 return inputCharIndexed.hitEnd();
430 * @return A string expression of this matcher.
432 public String toString()
434 CPStringBuilder sb = new CPStringBuilder();
435 sb.append(this.getClass().getName())
436 .append("[pattern=").append(pattern.pattern())
437 .append(" region=").append(regionStart).append(",").append(regionEnd)
438 .append(" anchoringBounds=").append(anchoringBounds == 0)
439 .append(" transparentBounds=").append(transparentBounds)
440 .append(" lastmatch=").append(match == null ? "" : match.toString())
441 .append("]");
442 return sb.toString();
445 private void assertMatchOp()
447 if (match == null) throw new IllegalStateException();
451 * <p>
452 * Defines the region of the input on which to match.
453 * By default, the {@link Matcher} attempts to match
454 * the whole string (from 0 to the length of the input),
455 * but a region between {@code start} (inclusive) and
456 * {@code end} (exclusive) on which to match may instead
457 * be defined using this method.
458 * </p>
459 * <p>
460 * The behaviour of region matching is further affected
461 * by the use of transparent or opaque bounds (see
462 * {@link #useTransparentBounds(boolean)}) and whether or not
463 * anchors ({@code ^} and {@code $}) are in use
464 * (see {@link #useAnchoringBounds(boolean)}). With transparent
465 * bounds, the matcher is aware of input outside the bounds
466 * set by this method, whereas, with opaque bounds (the default)
467 * only the input within the bounds is used. The use of
468 * anchors are affected by this setting; with transparent
469 * bounds, anchors will match the beginning of the real input,
470 * while with opaque bounds they match the beginning of the
471 * region. {@link #useAnchoringBounds(boolean)} can be used
472 * to turn on or off the matching of anchors.
473 * </p>
475 * @param start the start of the region (inclusive).
476 * @param end the end of the region (exclusive).
477 * @return a reference to this matcher.
478 * @throws IndexOutOfBoundsException if either {@code start} or
479 * {@code end} are less than zero,
480 * if either {@code start} or
481 * {@code end} are greater than the
482 * length of the input, or if
483 * {@code start} is greater than
484 * {@code end}.
485 * @see #regionStart()
486 * @see #regionEnd()
487 * @see #hasTransparentBounds()
488 * @see #useTransparentBounds(boolean)
489 * @see #hasAnchoringBounds()
490 * @see #useAnchoringBounds(boolean)
491 * @since 1.5
493 public Matcher region(int start, int end)
495 int length = input.length();
496 if (start < 0)
497 throw new IndexOutOfBoundsException("The start position was less than zero.");
498 if (start >= length)
499 throw new IndexOutOfBoundsException("The start position is after the end of the input.");
500 if (end < 0)
501 throw new IndexOutOfBoundsException("The end position was less than zero.");
502 if (end > length)
503 throw new IndexOutOfBoundsException("The end position is after the end of the input.");
504 if (start > end)
505 throw new IndexOutOfBoundsException("The start position is after the end position.");
506 reset();
507 regionStart = start;
508 regionEnd = end;
509 return this;
513 * The start of the region on which to perform matches (inclusive).
515 * @return the start index of the region.
516 * @see #region(int,int)
517 * #see #regionEnd()
518 * @since 1.5
520 public int regionStart()
522 return regionStart;
526 * The end of the region on which to perform matches (exclusive).
528 * @return the end index of the region.
529 * @see #region(int,int)
530 * @see #regionStart()
531 * @since 1.5
533 public int regionEnd()
535 return regionEnd;
539 * Returns true if the bounds of the region marked by
540 * {@link #regionStart()} and {@link #regionEnd()} are
541 * transparent. When these bounds are transparent, the
542 * matching process can look beyond them to perform
543 * lookahead, lookbehind and boundary matching operations.
544 * By default, the bounds are opaque.
546 * @return true if the bounds of the matching region are
547 * transparent.
548 * @see #useTransparentBounds(boolean)
549 * @see #region(int,int)
550 * @see #regionStart()
551 * @see #regionEnd()
552 * @since 1.5
554 public boolean hasTransparentBounds()
556 return transparentBounds;
560 * Sets the transparency of the bounds of the region
561 * marked by {@link #regionStart()} and {@link #regionEnd()}.
562 * A value of {@code true} makes the bounds transparent,
563 * so the matcher can see beyond them to perform lookahead,
564 * lookbehind and boundary matching operations. A value
565 * of {@code false} (the default) makes the bounds opaque,
566 * restricting the match to the input region denoted
567 * by {@link #regionStart()} and {@link #regionEnd()}.
569 * @param transparent true if the bounds should be transparent.
570 * @return a reference to this matcher.
571 * @see #hasTransparentBounds()
572 * @see #region(int,int)
573 * @see #regionStart()
574 * @see #regionEnd()
575 * @since 1.5
577 public Matcher useTransparentBounds(boolean transparent)
579 transparentBounds = transparent;
580 return this;
584 * Returns true if the matcher will honour the use of
585 * the anchoring bounds: {@code ^}, {@code \A}, {@code \Z},
586 * {@code \z} and {@code $}. By default, the anchors
587 * are used. Note that the effect of the anchors is
588 * also affected by {@link #hasTransparentBounds()}.
590 * @return true if the matcher will attempt to match
591 * the anchoring bounds.
592 * @see #useAnchoringBounds(boolean)
593 * @see #hasTransparentBounds()
594 * @since 1.5
596 public boolean hasAnchoringBounds()
598 return anchoringBounds == 0;
602 * Enables or disables the use of the anchoring bounds:
603 * {@code ^}, {@code \A}, {@code \Z}, {@code \z} and
604 * {@code $}. By default, their use is enabled. When
605 * disabled, the matcher will not attempt to match
606 * the anchors.
608 * @param useAnchors true if anchoring bounds should be used.
609 * @return a reference to this matcher.
610 * @since 1.5
611 * @see #hasAnchoringBounds()
613 public Matcher useAnchoringBounds(boolean useAnchors)
615 if (useAnchors)
616 anchoringBounds = 0;
617 else
618 anchoringBounds = RE.REG_NOTBOL|RE.REG_NOTEOL;
619 return this;
623 * Returns a read-only snapshot of the current state of
624 * the {@link Matcher} as a {@link MatchResult}. Any
625 * subsequent changes to this instance are not reflected
626 * in the returned {@link MatchResult}.
628 * @return a {@link MatchResult} instance representing the
629 * current state of the {@link Matcher}.
631 public MatchResult toMatchResult()
633 Matcher snapshot = new Matcher(pattern, input);
634 if (match != null)
635 snapshot.match = (REMatch) match.clone();
636 return snapshot;
640 * Returns a literalized string of s where characters {@code $} and {@code
641 * \\} are escaped.
643 * @param s the string to literalize.
644 * @return the literalized string.
645 * @since 1.5
647 public static String quoteReplacement(String s)
649 if (s == null)
650 throw new NullPointerException();
651 CPStringBuilder sb = new CPStringBuilder();
652 for (int i = 0; i < s.length(); i++)
654 char ch = s.charAt(i);
655 if (ch == '$' || ch == '\\')
656 sb.append('\\');
657 sb.append(ch);
659 return sb.toString();