libjava/ChangeLog:
[official-gcc.git] / libjava / classpath / gnu / java / util / regex / RESyntax.java
blob38d70564d4aa79e6799eb2de5876e002b711d8c2
1 /* gnu/regexp/RESyntax.java
2 Copyright (C) 2006 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
39 package gnu.java.util.regex;
40 import java.io.Serializable;
41 import java.util.BitSet;
43 /**
44 * An RESyntax specifies the way a regular expression will be compiled.
45 * This class provides a number of predefined useful constants for
46 * emulating popular regular expression syntaxes. Additionally the
47 * user may construct his or her own syntax, using any combination of the
48 * syntax bit constants. The syntax is an optional argument to any of the
49 * matching methods on class RE.
51 * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
54 public final class RESyntax implements Serializable
56 static final String DEFAULT_LINE_SEPARATOR =
57 System.getProperty ("line.separator");
59 private BitSet bits;
61 // true for the constant defined syntaxes
62 private boolean isFinal = false;
64 private String lineSeparator = DEFAULT_LINE_SEPARATOR;
66 // Values for constants are bit indexes
68 /**
69 * Syntax bit. Backslash is an escape character in lists.
71 public static final int RE_BACKSLASH_ESCAPE_IN_LISTS = 0;
73 /**
74 * Syntax bit. Use \? instead of ? and \+ instead of +.
76 public static final int RE_BK_PLUS_QM = 1;
78 /**
79 * Syntax bit. POSIX character classes ([:...:]) in lists are allowed.
81 public static final int RE_CHAR_CLASSES = 2;
83 /**
84 * Syntax bit. ^ and $ are special everywhere.
85 * <B>Not implemented.</B>
87 public static final int RE_CONTEXT_INDEP_ANCHORS = 3;
89 /**
90 * Syntax bit. Repetition operators are only special in valid positions.
91 * <B>Not implemented.</B>
93 public static final int RE_CONTEXT_INDEP_OPS = 4;
95 /**
96 * Syntax bit. Repetition and alternation operators are invalid
97 * at start and end of pattern and other places.
98 * <B>Not implemented</B>.
100 public static final int RE_CONTEXT_INVALID_OPS = 5;
103 * Syntax bit. Match-any-character operator (.) matches a newline.
105 public static final int RE_DOT_NEWLINE = 6;
108 * Syntax bit. Match-any-character operator (.) does not match a null.
110 public static final int RE_DOT_NOT_NULL = 7;
113 * Syntax bit. Intervals ({x}, {x,}, {x,y}) are allowed.
115 public static final int RE_INTERVALS = 8;
118 * Syntax bit. No alternation (|), match one-or-more (+), or
119 * match zero-or-one (?) operators.
121 public static final int RE_LIMITED_OPS = 9;
124 * Syntax bit. Newline is an alternation operator.
126 public static final int RE_NEWLINE_ALT = 10; // impl.
129 * Syntax bit. Intervals use { } instead of \{ \}
131 public static final int RE_NO_BK_BRACES = 11;
134 * Syntax bit. Grouping uses ( ) instead of \( \).
136 public static final int RE_NO_BK_PARENS = 12;
139 * Syntax bit. Backreferences not allowed.
141 public static final int RE_NO_BK_REFS = 13;
144 * Syntax bit. Alternation uses | instead of \|
146 public static final int RE_NO_BK_VBAR = 14;
149 * Syntax bit. <B>Not implemented</B>.
151 public static final int RE_NO_EMPTY_RANGES = 15;
154 * Syntax bit. An unmatched right parenthesis (')' or '\)', depending
155 * on RE_NO_BK_PARENS) will throw an exception when compiling.
157 public static final int RE_UNMATCHED_RIGHT_PAREN_ORD = 16;
160 * Syntax bit. <B>Not implemented.</B>
162 public static final int RE_HAT_LISTS_NOT_NEWLINE = 17;
165 * Syntax bit. Stingy matching is allowed (+?, *?, ??, {x,y}?).
167 public static final int RE_STINGY_OPS = 18;
170 * Syntax bit. Allow character class escapes (\d, \D, \s, \S, \w, \W).
172 public static final int RE_CHAR_CLASS_ESCAPES = 19;
175 * Syntax bit. Allow use of (?:xxx) grouping (subexpression is not saved).
177 public static final int RE_PURE_GROUPING = 20;
180 * Syntax bit. Allow use of (?=xxx) and (?!xxx) apply the subexpression
181 * to the text following the current position without consuming that text.
183 public static final int RE_LOOKAHEAD = 21;
186 * Syntax bit. Allow beginning- and end-of-string anchors (\A, \Z).
188 public static final int RE_STRING_ANCHORS = 22;
191 * Syntax bit. Allow embedded comments, (?#comment), as in Perl5.
193 public static final int RE_COMMENTS = 23;
196 * Syntax bit. Allow character class escapes within lists, as in Perl5.
198 public static final int RE_CHAR_CLASS_ESC_IN_LISTS = 24;
201 * Syntax bit. Possessive matching is allowed (++, *+, ?+, {x,y}+).
203 public static final int RE_POSSESSIVE_OPS = 25;
206 * Syntax bit. Allow embedded flags, (?is-x), as in Perl5.
208 public static final int RE_EMBEDDED_FLAGS = 26;
211 * Syntax bit. Allow octal char (\0377), as in Perl5.
213 public static final int RE_OCTAL_CHAR = 27;
216 * Syntax bit. Allow hex char (\x1b), as in Perl5.
218 public static final int RE_HEX_CHAR = 28;
221 * Syntax bit. Allow Unicode char (\u1234), as in Java 1.4.
223 public static final int RE_UNICODE_CHAR = 29;
226 * Syntax bit. Allow named property (\p{P}, \P{p}), as in Perl5.
228 public static final int RE_NAMED_PROPERTY = 30;
231 * Syntax bit. Allow nested characterclass ([a-z&&[^p-r]]), as in Java 1.4.
233 public static final int RE_NESTED_CHARCLASS = 31;
235 private static final int BIT_TOTAL = 32;
238 * Predefined syntax.
239 * Emulates regular expression support in the awk utility.
241 public static final RESyntax RE_SYNTAX_AWK;
244 * Predefined syntax.
245 * Emulates regular expression support in the ed utility.
247 public static final RESyntax RE_SYNTAX_ED;
250 * Predefined syntax.
251 * Emulates regular expression support in the egrep utility.
253 public static final RESyntax RE_SYNTAX_EGREP;
256 * Predefined syntax.
257 * Emulates regular expression support in the GNU Emacs editor.
259 public static final RESyntax RE_SYNTAX_EMACS;
262 * Predefined syntax.
263 * Emulates regular expression support in the grep utility.
265 public static final RESyntax RE_SYNTAX_GREP;
268 * Predefined syntax.
269 * Emulates regular expression support in the POSIX awk specification.
271 public static final RESyntax RE_SYNTAX_POSIX_AWK;
274 * Predefined syntax.
275 * Emulates POSIX basic regular expression support.
277 public static final RESyntax RE_SYNTAX_POSIX_BASIC;
280 * Predefined syntax.
281 * Emulates regular expression support in the POSIX egrep specification.
283 public static final RESyntax RE_SYNTAX_POSIX_EGREP;
286 * Predefined syntax.
287 * Emulates POSIX extended regular expression support.
289 public static final RESyntax RE_SYNTAX_POSIX_EXTENDED;
292 * Predefined syntax.
293 * Emulates POSIX basic minimal regular expressions.
295 public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_BASIC;
298 * Predefined syntax.
299 * Emulates POSIX extended minimal regular expressions.
301 public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_EXTENDED;
304 * Predefined syntax.
305 * Emulates regular expression support in the sed utility.
307 public static final RESyntax RE_SYNTAX_SED;
310 * Predefined syntax.
311 * Emulates regular expression support in Larry Wall's perl, version 4,
313 public static final RESyntax RE_SYNTAX_PERL4;
316 * Predefined syntax.
317 * Emulates regular expression support in Larry Wall's perl, version 4,
318 * using single line mode (/s modifier).
320 public static final RESyntax RE_SYNTAX_PERL4_S; // single line mode (/s)
323 * Predefined syntax.
324 * Emulates regular expression support in Larry Wall's perl, version 5.
326 public static final RESyntax RE_SYNTAX_PERL5;
329 * Predefined syntax.
330 * Emulates regular expression support in Larry Wall's perl, version 5,
331 * using single line mode (/s modifier).
333 public static final RESyntax RE_SYNTAX_PERL5_S;
336 * Predefined syntax.
337 * Emulates regular expression support in Java 1.4's java.util.regex
338 * package.
340 public static final RESyntax RE_SYNTAX_JAVA_1_4;
342 static
344 // Define syntaxes
346 RE_SYNTAX_EMACS = new RESyntax ().makeFinal ();
348 RESyntax RE_SYNTAX_POSIX_COMMON =
349 new RESyntax ().set (RE_CHAR_CLASSES).set (RE_DOT_NEWLINE).
350 set (RE_DOT_NOT_NULL).set (RE_INTERVALS).set (RE_NO_EMPTY_RANGES).
351 makeFinal ();
353 RE_SYNTAX_POSIX_BASIC =
354 new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_BK_PLUS_QM).makeFinal ();
356 RE_SYNTAX_POSIX_EXTENDED =
357 new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_CONTEXT_INDEP_ANCHORS).
358 set (RE_CONTEXT_INDEP_OPS).set (RE_NO_BK_BRACES).set (RE_NO_BK_PARENS).
359 set (RE_NO_BK_VBAR).set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal ();
361 RE_SYNTAX_AWK =
362 new RESyntax ().set (RE_BACKSLASH_ESCAPE_IN_LISTS).
363 set (RE_DOT_NOT_NULL).set (RE_NO_BK_PARENS).set (RE_NO_BK_REFS).
364 set (RE_NO_BK_VBAR).set (RE_NO_EMPTY_RANGES).
365 set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal ();
367 RE_SYNTAX_POSIX_AWK =
368 new RESyntax (RE_SYNTAX_POSIX_EXTENDED).
369 set (RE_BACKSLASH_ESCAPE_IN_LISTS).makeFinal ();
371 RE_SYNTAX_GREP =
372 new RESyntax ().set (RE_BK_PLUS_QM).set (RE_CHAR_CLASSES).
373 set (RE_HAT_LISTS_NOT_NEWLINE).set (RE_INTERVALS).set (RE_NEWLINE_ALT).
374 makeFinal ();
376 RE_SYNTAX_EGREP =
377 new RESyntax ().set (RE_CHAR_CLASSES).set (RE_CONTEXT_INDEP_ANCHORS).
378 set (RE_CONTEXT_INDEP_OPS).set (RE_HAT_LISTS_NOT_NEWLINE).
379 set (RE_NEWLINE_ALT).set (RE_NO_BK_PARENS).set (RE_NO_BK_VBAR).
380 makeFinal ();
382 RE_SYNTAX_POSIX_EGREP =
383 new RESyntax (RE_SYNTAX_EGREP).set (RE_INTERVALS).set (RE_NO_BK_BRACES).
384 makeFinal ();
386 /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
388 RE_SYNTAX_ED = new RESyntax (RE_SYNTAX_POSIX_BASIC).makeFinal ();
390 RE_SYNTAX_SED = new RESyntax (RE_SYNTAX_POSIX_BASIC).makeFinal ();
392 RE_SYNTAX_POSIX_MINIMAL_BASIC =
393 new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_LIMITED_OPS).makeFinal ();
395 /* Differs from RE_SYNTAX_POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
396 replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
398 RE_SYNTAX_POSIX_MINIMAL_EXTENDED =
399 new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_CONTEXT_INDEP_ANCHORS).
400 set (RE_CONTEXT_INVALID_OPS).set (RE_NO_BK_BRACES).
401 set (RE_NO_BK_PARENS).set (RE_NO_BK_REFS).set (RE_NO_BK_VBAR).
402 set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal ();
404 /* There is no official Perl spec, but here's a "best guess" */
406 RE_SYNTAX_PERL4 = new RESyntax ().set (RE_BACKSLASH_ESCAPE_IN_LISTS).set (RE_CONTEXT_INDEP_ANCHORS).set (RE_CONTEXT_INDEP_OPS) // except for '{', apparently
407 .set (RE_INTERVALS).set (RE_NO_BK_BRACES).set (RE_NO_BK_PARENS).set (RE_NO_BK_VBAR).set (RE_NO_EMPTY_RANGES).set (RE_CHAR_CLASS_ESCAPES) // \d,\D,\w,\W,\s,\S
408 .makeFinal ();
410 RE_SYNTAX_PERL4_S =
411 new RESyntax (RE_SYNTAX_PERL4).set (RE_DOT_NEWLINE).makeFinal ();
413 RE_SYNTAX_PERL5 = new RESyntax (RE_SYNTAX_PERL4).set (RE_PURE_GROUPING) // (?:)
414 .set (RE_STINGY_OPS) // *?,??,+?,{}?
415 .set (RE_LOOKAHEAD) // (?=)(?!)
416 .set (RE_STRING_ANCHORS) // \A,\Z
417 .set (RE_CHAR_CLASS_ESC_IN_LISTS) // \d,\D,\w,\W,\s,\S within []
418 .set (RE_COMMENTS) // (?#)
419 .set (RE_EMBEDDED_FLAGS) // (?imsx-imsx)
420 .set (RE_OCTAL_CHAR) // \0377
421 .set (RE_HEX_CHAR) // \x1b
422 .set (RE_NAMED_PROPERTY) // \p{prop}, \P{prop}
423 .makeFinal ();
425 RE_SYNTAX_PERL5_S =
426 new RESyntax (RE_SYNTAX_PERL5).set (RE_DOT_NEWLINE).makeFinal ();
428 RE_SYNTAX_JAVA_1_4 = new RESyntax (RE_SYNTAX_PERL5)
429 // XXX
430 .set (RE_POSSESSIVE_OPS) // *+,?+,++,{}+
431 .set (RE_UNICODE_CHAR) // \u1234
432 .set (RE_NESTED_CHARCLASS) // [a-z&&[^p-r]]
433 .makeFinal ();
437 * Construct a new syntax object with all bits turned off.
438 * This is equivalent to RE_SYNTAX_EMACS.
440 public RESyntax ()
442 bits = new BitSet (BIT_TOTAL);
446 * Called internally when constructing predefined syntaxes
447 * so their interpretation cannot vary. Conceivably useful
448 * for your syntaxes as well. Causes IllegalAccessError to
449 * be thrown if any attempt to modify the syntax is made.
451 * @return this object for convenient chaining
453 public RESyntax makeFinal ()
455 isFinal = true;
456 return this;
460 * Construct a new syntax object with all bits set the same
461 * as the other syntax.
463 public RESyntax (RESyntax other)
465 bits = (BitSet) other.bits.clone ();
469 * Check if a given bit is set in this syntax.
471 public boolean get (int index)
473 return bits.get (index);
477 * Set a given bit in this syntax.
479 * @param index the constant (RESyntax.RE_xxx) bit to set.
480 * @return a reference to this object for easy chaining.
482 public RESyntax set (int index)
484 if (isFinal)
485 throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final"));
486 bits.set (index);
487 return this;
491 * Clear a given bit in this syntax.
493 * @param index the constant (RESyntax.RE_xxx) bit to clear.
494 * @return a reference to this object for easy chaining.
496 public RESyntax clear (int index)
498 if (isFinal)
499 throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final"));
500 bits.clear (index);
501 return this;
505 * Changes the line separator string for regular expressions
506 * created using this RESyntax. The default separator is the
507 * value returned by the system property "line.separator", which
508 * should be correct when reading platform-specific files from a
509 * filesystem. However, many programs may collect input from
510 * sources where the line separator is differently specified (for
511 * example, in the applet environment, the text box widget
512 * interprets line breaks as single-character newlines,
513 * regardless of the host platform.
515 * Note that setting the line separator to a character or
516 * characters that have specific meaning within the current syntax
517 * can cause unexpected chronosynclastic infundibula.
519 * @return this object for convenient chaining
521 public RESyntax setLineSeparator (String aSeparator)
523 if (isFinal)
524 throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final"));
525 lineSeparator = aSeparator;
526 return this;
530 * Returns the currently active line separator string. The default
531 * is the platform-dependent system property "line.separator".
533 public String getLineSeparator ()
535 return lineSeparator;