Added new test cases for many of the actions and tests of find
[findutils.git] / lib / regexprops.c
blob6e8f3330ed663b582eca215bd59ef7eb3ed02f54
1 /* regexprops.c -- document the properties of the regular expressions
2 understood by gnulib.
4 Copyright 2005 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20 /* Written by James Youngman, <jay@gnu.org>. */
22 #if HAVE_CONFIG_H
23 # include <config.h>
24 #endif
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <errno.h>
30 #include "regex.h"
31 #include "regextype.h"
34 static void output(const char *s, int escape)
36 fputs(s, stdout);
40 static void newline(void)
42 output("\n", 0);
45 static void content(const char *s)
47 output(s, 1);
50 static void literal(const char *s)
52 output(s, 0);
55 static void directive(const char *s)
57 output(s, 0);
60 static void enum_item(const char *s)
62 newline();
63 directive("@item ");
64 literal(s);
65 newline();
67 static void table_item(const char *s)
69 directive("@item");
70 newline();
71 content(s);
72 newline();
75 static void code(const char *s)
77 directive("@code{");
78 content(s);
79 directive("}");
82 static void begin_subsection(const char *name,
83 const char *next,
84 const char *prev,
85 const char *up)
87 newline();
89 directive("@node ");
90 content(name);
91 content(" regular expression syntax");
92 newline();
94 directive("@subsection ");
95 output("@samp{", 0);
96 content(name);
97 output("}", 0);
98 content(" regular expression syntax");
99 newline();
102 static void begintable_asis()
104 newline();
105 directive("@table @asis");
106 newline();
109 static void begintable_markup(char const *markup)
111 newline();
112 directive("@table ");
113 literal(markup);
114 newline();
117 static void endtable()
119 newline();
120 directive("@end table");
121 newline();
124 static void beginenum()
126 newline();
127 directive("@enumerate");
128 newline();
131 static void endenum()
133 newline();
134 directive("@end enumerate");
135 newline();
138 static void newpara()
140 content("\n\n");
144 static int describe_regex_syntax(int options)
146 newpara();
147 content("The character @samp{.} matches any single character");
148 if ( (options & RE_DOT_NEWLINE) == 0 )
150 content(" except newline");
152 if (options & RE_DOT_NOT_NULL)
154 if ( (options & RE_DOT_NEWLINE) == 0 )
155 content(" and");
156 else
157 content(" except");
159 content(" the null character");
161 content(". ");
162 newpara();
164 if (!(options & RE_LIMITED_OPS))
166 begintable_markup("@samp");
167 if (options & RE_BK_PLUS_QM)
169 enum_item("\\+");
170 content("indicates that the regular expression should match one"
171 " or more occurrences of the previous atom or regexp. ");
172 enum_item("\\?");
173 content("indicates that the regular expression should match zero"
174 " or one occurrence of the previous atom or regexp. ");
175 enum_item("+ and ? ");
176 content("match themselves. ");
178 else
180 enum_item("+");
181 content("indicates that the regular expression should match one"
182 " or more occurrences of the previous atom or regexp. ");
183 enum_item("?");
184 content("indicates that the regular expression should match zero"
185 " or one occurrence of the previous atom or regexp. ");
186 enum_item("\\+");
187 literal("matches a @samp{+}");
188 enum_item("\\?");
189 literal("matches a @samp{?}. ");
191 endtable();
194 newpara();
196 content("Bracket expressions are used to match ranges of characters. ");
197 literal("Bracket expressions where the range is backward, for example @samp{[z-a]}, are ");
198 if (options & RE_NO_EMPTY_RANGES)
199 content("invalid");
200 else
201 content("ignored");
202 content(". ");
204 if (options & RE_BACKSLASH_ESCAPE_IN_LISTS)
205 literal("Within square brackets, @samp{\\} can be used to quote "
206 "the following character. ");
207 else
208 literal("Within square brackets, @samp{\\} is taken literally. ");
210 if (options & RE_CHAR_CLASSES)
211 content("Character classes are supported; for example "
212 "@samp{[[:digit:]]} will match a single decimal digit. ");
213 else
214 literal("Character classes are not supported, so for example "
215 "you would need to use @samp{[0-9]} "
216 "instead of @samp{[[:digit:]]}. ");
218 if (options & RE_HAT_LISTS_NOT_NEWLINE)
220 literal("Non-matching lists @samp{[^@dots{}]} do not ever match newline. ");
222 newpara();
223 if (options & RE_NO_GNU_OPS)
225 content("GNU extensions are not supported and so "
226 "@samp{\\w}, @samp{\\W}, @samp{\\<}, @samp{\\>}, @samp{\\b}, @samp{\\B}, @samp{\\`}, and @samp{\\'} "
227 "match "
228 "@samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively. ");
230 else
232 content("GNU extensions are supported:");
233 beginenum();
234 enum_item("@samp{\\w} matches a character within a word");
235 enum_item("@samp{\\W} matches a character which is not within a word");
236 enum_item("@samp{\\<} matches the beginning of a word");
237 enum_item("@samp{\\>} matches the end of a word");
238 enum_item("@samp{\\b} matches a word boundary");
239 enum_item("@samp{\\B} matches characters which are not a word boundary");
240 enum_item("@samp{\\`} matches the beginning of the whole input");
241 enum_item("@samp{\\'} matches the end of the whole input");
242 endenum();
245 newpara();
248 if (options & RE_NO_BK_PARENS)
250 literal("Grouping is performed with parentheses @samp{()}. ");
252 if (options & RE_UNMATCHED_RIGHT_PAREN_ORD)
253 literal("An unmatched @samp{)} matches just itself. ");
255 else
257 literal("Grouping is performed with backslashes followed by parentheses @samp{\\(}, @samp{\\)}. ");
260 if (options & RE_NO_BK_REFS)
262 content("A backslash followed by a digit matches that digit. ");
264 else
266 literal("A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis ");
267 if (options & RE_NO_BK_PARENS)
268 literal("@samp{(}");
269 else
270 literal("@samp{\\(}");
271 content(". ");
275 newpara();
276 if (!(options & RE_LIMITED_OPS))
278 if (options & RE_NO_BK_VBAR)
279 literal("The alternation operator is @samp{|}. ");
280 else
281 literal("The alternation operator is @samp{\\|}. ");
283 newpara();
285 if (options & RE_CONTEXT_INDEP_ANCHORS)
287 literal("The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified. ");
289 else
291 literal("The character @samp{^} only represents the beginning of a string when it appears:");
292 beginenum();
293 enum_item("\nAt the beginning of a regular expression");
294 enum_item("After an open-group, signified by ");
295 if (options & RE_NO_BK_PARENS)
297 literal("@samp{(}");
299 else
301 literal("@samp{\\(}");
303 newline();
304 if (!(options & RE_LIMITED_OPS))
306 if (options & RE_NEWLINE_ALT)
307 enum_item("After a newline");
309 if (options & RE_NO_BK_VBAR )
310 enum_item("After the alternation operator @samp{|}");
311 else
312 enum_item("After the alternation operator @samp{\\|}");
314 endenum();
316 newpara();
317 literal("The character @samp{$} only represents the end of a string when it appears:");
318 beginenum();
319 enum_item("At the end of a regular expression");
320 enum_item("Before an close-group, signified by ");
321 if (options & RE_NO_BK_PARENS)
323 literal("@samp{)}");
325 else
327 literal("@samp{\\)}");
329 if (!(options & RE_LIMITED_OPS))
331 if (options & RE_NEWLINE_ALT)
332 enum_item("Before a newline");
334 if (options & RE_NO_BK_VBAR)
335 enum_item("Before the alternation operator @samp{|}");
336 else
337 enum_item("Before the alternation operator @samp{\\|}");
339 endenum();
341 newpara();
342 if (!(options & RE_LIMITED_OPS) )
344 if ((options & RE_CONTEXT_INDEP_OPS)
345 && !(options & RE_CONTEXT_INVALID_OPS))
347 literal("The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression. ");
349 else
351 if (options & RE_BK_PLUS_QM)
352 literal("@samp{\\*}, @samp{\\+} and @samp{\\?} ");
353 else
354 literal("@samp{*}, @samp{+} and @samp{?} ");
356 if (options & RE_CONTEXT_INVALID_OPS)
358 content("are special at any point in a regular expression except the following places, where they are illegal:");
360 else
362 content("are special at any point in a regular expression except:");
365 beginenum();
366 enum_item("At the beginning of a regular expression");
367 enum_item("After an open-group, signified by ");
368 if (options & RE_NO_BK_PARENS)
370 literal("@samp{(}");
372 else
374 literal("@samp{\\(}");
376 if (!(options & RE_LIMITED_OPS))
378 if (options & RE_NEWLINE_ALT)
379 enum_item("After a newline");
381 if (options & RE_NO_BK_VBAR)
382 enum_item("After the alternation operator @samp{|}");
383 else
384 enum_item("After the alternation operator @samp{\\|}");
386 endenum();
391 newpara();
392 if (options & RE_INTERVALS)
394 if (options & RE_NO_BK_BRACES)
396 literal("Intervals are specified by @samp{@{} and @samp{@}}. ");
397 if (options & RE_INVALID_INTERVAL_ORD)
399 literal("Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\\@{1}");
401 else
403 literal("Invalid intervals such as @samp{a@{1z} are not accepted. ");
406 else
408 literal("Intervals are specified by @samp{\\@{} and @samp{\\@}}. ");
409 if (options & RE_INVALID_INTERVAL_ORD)
411 literal("Invalid intervals are treated as literals, for example @samp{a\\@{1} is treated as @samp{a@{1}");
413 else
415 literal("Invalid intervals such as @samp{a\\@{1z} are not accepted. ");
421 newpara();
422 if (options & RE_NO_POSIX_BACKTRACKING)
424 content("Matching succeeds as soon as the whole pattern is matched, meaning that the result may not be the longest possible match. ");
426 else
428 content("The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups. ");
430 newpara();
435 static int menu()
437 int i, options;
438 const char *name;
440 output("@menu\n", 0);
441 for (i=0;
442 options = get_regex_type_flags(i),
443 name=get_regex_type_name(i);
444 ++i)
446 output("* ", 0);
447 output(name, 0);
448 content(" regular expression syntax");
449 output("::", 0);
450 newline();
452 output("@end menu\n", 0);
456 static int describe_all(const char *up)
458 const char *name, *next, *previous;
459 int options;
460 int i, parent;
462 menu();
464 previous = "";
466 for (i=0;
467 options = get_regex_type_flags(i),
468 name=get_regex_type_name(i);
469 ++i)
471 next = get_regex_type_name(i+1);
472 if (NULL == next)
473 next = "";
474 begin_subsection(name, next, previous, up);
475 parent = get_regex_type_synonym(i);
476 if (parent >= 0)
478 content("This is a synonym for ");
479 content(get_regex_type_name(parent));
480 content(".");
482 else
484 describe_regex_syntax(options);
486 previous = name;
492 int main (int argc, char *argv[])
494 const char *up = "";
496 if (argc > 1)
497 up = argv[1];
499 describe_all(up);
500 return 0;