1 /* regexprops.c -- document the properties of the regular expressions
4 Copyright 2005 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20 /* Written by James Youngman, <jay@gnu.org>. */
31 #include "regextype.h"
34 /* Name this program was run with. */
37 static void output(const char *s
, int escape
)
45 static void newline(void)
50 static void content(const char *s
)
55 static void literal(const char *s
)
60 static void directive(const char *s
)
65 static void enum_item(const char *s
)
73 static void begin_subsection(const char *name
,
86 content(" regular expression syntax");
89 directive("@subsection ");
93 content(" regular expression syntax");
97 static void begintable_markup(char const *markup
)
100 directive("@table ");
105 static void endtable()
108 directive("@end table");
112 static void beginenum()
115 directive("@enumerate");
119 static void endenum()
122 directive("@end enumerate");
126 static void newpara()
133 describe_regex_syntax(int options
)
136 content("The character @samp{.} matches any single character");
137 if ( (options
& RE_DOT_NEWLINE
) == 0 )
139 content(" except newline");
141 if (options
& RE_DOT_NOT_NULL
)
143 if ( (options
& RE_DOT_NEWLINE
) == 0 )
148 content(" the null character");
153 if (!(options
& RE_LIMITED_OPS
))
155 begintable_markup("@samp");
156 if (options
& RE_BK_PLUS_QM
)
159 content("indicates that the regular expression should match one"
160 " or more occurrences of the previous atom or regexp. ");
162 content("indicates that the regular expression should match zero"
163 " or one occurrence of the previous atom or regexp. ");
164 enum_item("+ and ? ");
165 content("match themselves. ");
170 content("indicates that the regular expression should match one"
171 " or more occurrences of the previous atom or regexp. ");
173 content("indicates that the regular expression should match zero"
174 " or one occurrence of the previous atom or regexp. ");
176 literal("matches a @samp{+}");
178 literal("matches a @samp{?}. ");
185 content("Bracket expressions are used to match ranges of characters. ");
186 literal("Bracket expressions where the range is backward, for example @samp{[z-a]}, are ");
187 if (options
& RE_NO_EMPTY_RANGES
)
193 if (options
& RE_BACKSLASH_ESCAPE_IN_LISTS
)
194 literal("Within square brackets, @samp{\\} can be used to quote "
195 "the following character. ");
197 literal("Within square brackets, @samp{\\} is taken literally. ");
199 if (options
& RE_CHAR_CLASSES
)
200 content("Character classes are supported; for example "
201 "@samp{[[:digit:]]} will match a single decimal digit. ");
203 literal("Character classes are not supported, so for example "
204 "you would need to use @samp{[0-9]} "
205 "instead of @samp{[[:digit:]]}. ");
207 if (options
& RE_HAT_LISTS_NOT_NEWLINE
)
209 literal("Non-matching lists @samp{[^@dots{}]} do not ever match newline. ");
212 if (options
& RE_NO_GNU_OPS
)
214 content("GNU extensions are not supported and so "
215 "@samp{\\w}, @samp{\\W}, @samp{\\<}, @samp{\\>}, @samp{\\b}, @samp{\\B}, @samp{\\`}, and @samp{\\'} "
217 "@samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively. ");
221 content("GNU extensions are supported:");
223 enum_item("@samp{\\w} matches a character within a word");
224 enum_item("@samp{\\W} matches a character which is not within a word");
225 enum_item("@samp{\\<} matches the beginning of a word");
226 enum_item("@samp{\\>} matches the end of a word");
227 enum_item("@samp{\\b} matches a word boundary");
228 enum_item("@samp{\\B} matches characters which are not a word boundary");
229 enum_item("@samp{\\`} matches the beginning of the whole input");
230 enum_item("@samp{\\'} matches the end of the whole input");
237 if (options
& RE_NO_BK_PARENS
)
239 literal("Grouping is performed with parentheses @samp{()}. ");
241 if (options
& RE_UNMATCHED_RIGHT_PAREN_ORD
)
242 literal("An unmatched @samp{)} matches just itself. ");
246 literal("Grouping is performed with backslashes followed by parentheses @samp{\\(}, @samp{\\)}. ");
249 if (options
& RE_NO_BK_REFS
)
251 content("A backslash followed by a digit matches that digit. ");
255 literal("A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis ");
256 if (options
& RE_NO_BK_PARENS
)
259 literal("@samp{\\(}");
265 if (!(options
& RE_LIMITED_OPS
))
267 if (options
& RE_NO_BK_VBAR
)
268 literal("The alternation operator is @samp{|}. ");
270 literal("The alternation operator is @samp{\\|}. ");
274 if (options
& RE_CONTEXT_INDEP_ANCHORS
)
276 literal("The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified. ");
280 literal("The character @samp{^} only represents the beginning of a string when it appears:");
282 enum_item("\nAt the beginning of a regular expression");
283 enum_item("After an open-group, signified by ");
284 if (options
& RE_NO_BK_PARENS
)
290 literal("@samp{\\(}");
293 if (!(options
& RE_LIMITED_OPS
))
295 if (options
& RE_NEWLINE_ALT
)
296 enum_item("After a newline");
298 if (options
& RE_NO_BK_VBAR
)
299 enum_item("After the alternation operator @samp{|}");
301 enum_item("After the alternation operator @samp{\\|}");
306 literal("The character @samp{$} only represents the end of a string when it appears:");
308 enum_item("At the end of a regular expression");
309 enum_item("Before an close-group, signified by ");
310 if (options
& RE_NO_BK_PARENS
)
316 literal("@samp{\\)}");
318 if (!(options
& RE_LIMITED_OPS
))
320 if (options
& RE_NEWLINE_ALT
)
321 enum_item("Before a newline");
323 if (options
& RE_NO_BK_VBAR
)
324 enum_item("Before the alternation operator @samp{|}");
326 enum_item("Before the alternation operator @samp{\\|}");
331 if (!(options
& RE_LIMITED_OPS
) )
333 if ((options
& RE_CONTEXT_INDEP_OPS
)
334 && !(options
& RE_CONTEXT_INVALID_OPS
))
336 literal("The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression. ");
340 if (options
& RE_BK_PLUS_QM
)
341 literal("@samp{\\*}, @samp{\\+} and @samp{\\?} ");
343 literal("@samp{*}, @samp{+} and @samp{?} ");
345 if (options
& RE_CONTEXT_INVALID_OPS
)
347 content("are special at any point in a regular expression except the following places, where they are not allowed:");
351 content("are special at any point in a regular expression except:");
355 enum_item("At the beginning of a regular expression");
356 enum_item("After an open-group, signified by ");
357 if (options
& RE_NO_BK_PARENS
)
363 literal("@samp{\\(}");
365 if (!(options
& RE_LIMITED_OPS
))
367 if (options
& RE_NEWLINE_ALT
)
368 enum_item("After a newline");
370 if (options
& RE_NO_BK_VBAR
)
371 enum_item("After the alternation operator @samp{|}");
373 enum_item("After the alternation operator @samp{\\|}");
381 if (options
& RE_INTERVALS
)
383 if (options
& RE_NO_BK_BRACES
)
385 literal("Intervals are specified by @samp{@{} and @samp{@}}. ");
386 if (options
& RE_INVALID_INTERVAL_ORD
)
388 literal("Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\\@{1}");
392 literal("Invalid intervals such as @samp{a@{1z} are not accepted. ");
397 literal("Intervals are specified by @samp{\\@{} and @samp{\\@}}. ");
398 if (options
& RE_INVALID_INTERVAL_ORD
)
400 literal("Invalid intervals are treated as literals, for example @samp{a\\@{1} is treated as @samp{a@{1}");
404 literal("Invalid intervals such as @samp{a\\@{1z} are not accepted. ");
411 if (options
& RE_NO_POSIX_BACKTRACKING
)
413 content("Matching succeeds as soon as the whole pattern is matched, meaning that the result may not be the longest possible match. ");
417 content("The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups. ");
430 output("@menu\n", 0);
432 options
= get_regex_type_flags(i
),
433 name
=get_regex_type_name(i
);
438 content(" regular expression syntax");
442 output("@end menu\n", 0);
447 describe_all(const char *up
)
449 const char *name
, *next
, *previous
;
458 options
= get_regex_type_flags(i
),
459 name
=get_regex_type_name(i
);
462 next
= get_regex_type_name(i
+1);
465 begin_subsection(name
, next
, previous
, up
);
466 parent
= get_regex_type_synonym(i
);
469 content("This is a synonym for ");
470 content(get_regex_type_name(parent
));
475 describe_regex_syntax(options
);
483 int main (int argc
, char *argv
[])
486 program_name
= argv
[0];