Ignore the usually-ignored files in git
[findutils.git] / lib / regexprops.c
blob85ad112d68f82d03617e6e453acdf110727464ce
1 /* regexprops.c -- document the properties of the regular expressions
2 understood by gnulib.
4 Copyright 2005 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20 /* Written by James Youngman, <jay@gnu.org>. */
22 #include <config.h>
25 #include <stdio.h>
26 #include <unistd.h>
27 #include <errno.h>
29 #include "regex.h"
30 #include "regextype.h"
33 /* Name this program was run with. */
34 char *program_name;
36 static void output(const char *s, int escape)
38 (void) escape;
40 fputs(s, stdout);
44 static void newline(void)
46 output("\n", 0);
49 static void content(const char *s)
51 output(s, 1);
54 static void literal(const char *s)
56 output(s, 0);
59 static void directive(const char *s)
61 output(s, 0);
64 static void enum_item(const char *s)
66 newline();
67 directive("@item ");
68 literal(s);
69 newline();
72 static void begin_subsection(const char *name,
73 const char *next,
74 const char *prev,
75 const char *up)
77 (void) next;
78 (void) prev;
79 (void) up;
81 newline();
83 directive("@node ");
84 content(name);
85 content(" regular expression syntax");
86 newline();
88 directive("@subsection ");
89 output("@samp{", 0);
90 content(name);
91 output("}", 0);
92 content(" regular expression syntax");
93 newline();
96 static void begintable_markup(char const *markup)
98 newline();
99 directive("@table ");
100 literal(markup);
101 newline();
104 static void endtable()
106 newline();
107 directive("@end table");
108 newline();
111 static void beginenum()
113 newline();
114 directive("@enumerate");
115 newline();
118 static void endenum()
120 newline();
121 directive("@end enumerate");
122 newline();
125 static void newpara()
127 content("\n\n");
131 static void
132 describe_regex_syntax(int options)
134 newpara();
135 content("The character @samp{.} matches any single character");
136 if ( (options & RE_DOT_NEWLINE) == 0 )
138 content(" except newline");
140 if (options & RE_DOT_NOT_NULL)
142 if ( (options & RE_DOT_NEWLINE) == 0 )
143 content(" and");
144 else
145 content(" except");
147 content(" the null character");
149 content(". ");
150 newpara();
152 if (!(options & RE_LIMITED_OPS))
154 begintable_markup("@samp");
155 if (options & RE_BK_PLUS_QM)
157 enum_item("\\+");
158 content("indicates that the regular expression should match one"
159 " or more occurrences of the previous atom or regexp. ");
160 enum_item("\\?");
161 content("indicates that the regular expression should match zero"
162 " or one occurrence of the previous atom or regexp. ");
163 enum_item("+ and ? ");
164 content("match themselves. ");
166 else
168 enum_item("+");
169 content("indicates that the regular expression should match one"
170 " or more occurrences of the previous atom or regexp. ");
171 enum_item("?");
172 content("indicates that the regular expression should match zero"
173 " or one occurrence of the previous atom or regexp. ");
174 enum_item("\\+");
175 literal("matches a @samp{+}");
176 enum_item("\\?");
177 literal("matches a @samp{?}. ");
179 endtable();
182 newpara();
184 content("Bracket expressions are used to match ranges of characters. ");
185 literal("Bracket expressions where the range is backward, for example @samp{[z-a]}, are ");
186 if (options & RE_NO_EMPTY_RANGES)
187 content("invalid");
188 else
189 content("ignored");
190 content(". ");
192 if (options & RE_BACKSLASH_ESCAPE_IN_LISTS)
193 literal("Within square brackets, @samp{\\} can be used to quote "
194 "the following character. ");
195 else
196 literal("Within square brackets, @samp{\\} is taken literally. ");
198 if (options & RE_CHAR_CLASSES)
199 content("Character classes are supported; for example "
200 "@samp{[[:digit:]]} will match a single decimal digit. ");
201 else
202 literal("Character classes are not supported, so for example "
203 "you would need to use @samp{[0-9]} "
204 "instead of @samp{[[:digit:]]}. ");
206 if (options & RE_HAT_LISTS_NOT_NEWLINE)
208 literal("Non-matching lists @samp{[^@dots{}]} do not ever match newline. ");
210 newpara();
211 if (options & RE_NO_GNU_OPS)
213 content("GNU extensions are not supported and so "
214 "@samp{\\w}, @samp{\\W}, @samp{\\<}, @samp{\\>}, @samp{\\b}, @samp{\\B}, @samp{\\`}, and @samp{\\'} "
215 "match "
216 "@samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively. ");
218 else
220 content("GNU extensions are supported:");
221 beginenum();
222 enum_item("@samp{\\w} matches a character within a word");
223 enum_item("@samp{\\W} matches a character which is not within a word");
224 enum_item("@samp{\\<} matches the beginning of a word");
225 enum_item("@samp{\\>} matches the end of a word");
226 enum_item("@samp{\\b} matches a word boundary");
227 enum_item("@samp{\\B} matches characters which are not a word boundary");
228 enum_item("@samp{\\`} matches the beginning of the whole input");
229 enum_item("@samp{\\'} matches the end of the whole input");
230 endenum();
233 newpara();
236 if (options & RE_NO_BK_PARENS)
238 literal("Grouping is performed with parentheses @samp{()}. ");
240 if (options & RE_UNMATCHED_RIGHT_PAREN_ORD)
241 literal("An unmatched @samp{)} matches just itself. ");
243 else
245 literal("Grouping is performed with backslashes followed by parentheses @samp{\\(}, @samp{\\)}. ");
248 if (options & RE_NO_BK_REFS)
250 content("A backslash followed by a digit matches that digit. ");
252 else
254 literal("A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis ");
255 if (options & RE_NO_BK_PARENS)
256 literal("@samp{(}");
257 else
258 literal("@samp{\\(}");
259 content(". ");
263 newpara();
264 if (!(options & RE_LIMITED_OPS))
266 if (options & RE_NO_BK_VBAR)
267 literal("The alternation operator is @samp{|}. ");
268 else
269 literal("The alternation operator is @samp{\\|}. ");
271 newpara();
273 if (options & RE_CONTEXT_INDEP_ANCHORS)
275 literal("The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified. ");
277 else
279 literal("The character @samp{^} only represents the beginning of a string when it appears:");
280 beginenum();
281 enum_item("\nAt the beginning of a regular expression");
282 enum_item("After an open-group, signified by ");
283 if (options & RE_NO_BK_PARENS)
285 literal("@samp{(}");
287 else
289 literal("@samp{\\(}");
291 newline();
292 if (!(options & RE_LIMITED_OPS))
294 if (options & RE_NEWLINE_ALT)
295 enum_item("After a newline");
297 if (options & RE_NO_BK_VBAR )
298 enum_item("After the alternation operator @samp{|}");
299 else
300 enum_item("After the alternation operator @samp{\\|}");
302 endenum();
304 newpara();
305 literal("The character @samp{$} only represents the end of a string when it appears:");
306 beginenum();
307 enum_item("At the end of a regular expression");
308 enum_item("Before a close-group, signified by ");
309 if (options & RE_NO_BK_PARENS)
311 literal("@samp{)}");
313 else
315 literal("@samp{\\)}");
317 if (!(options & RE_LIMITED_OPS))
319 if (options & RE_NEWLINE_ALT)
320 enum_item("Before a newline");
322 if (options & RE_NO_BK_VBAR)
323 enum_item("Before the alternation operator @samp{|}");
324 else
325 enum_item("Before the alternation operator @samp{\\|}");
327 endenum();
329 newpara();
330 if (!(options & RE_LIMITED_OPS) )
332 if ((options & RE_CONTEXT_INDEP_OPS)
333 && !(options & RE_CONTEXT_INVALID_OPS))
335 literal("The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression. ");
337 else
339 if (options & RE_BK_PLUS_QM)
340 literal("@samp{\\*}, @samp{\\+} and @samp{\\?} ");
341 else
342 literal("@samp{*}, @samp{+} and @samp{?} ");
344 if (options & RE_CONTEXT_INVALID_OPS)
346 content("are special at any point in a regular expression except the following places, where they are not allowed:");
348 else
350 content("are special at any point in a regular expression except:");
353 beginenum();
354 enum_item("At the beginning of a regular expression");
355 enum_item("After an open-group, signified by ");
356 if (options & RE_NO_BK_PARENS)
358 literal("@samp{(}");
360 else
362 literal("@samp{\\(}");
364 if (!(options & RE_LIMITED_OPS))
366 if (options & RE_NEWLINE_ALT)
367 enum_item("After a newline");
369 if (options & RE_NO_BK_VBAR)
370 enum_item("After the alternation operator @samp{|}");
371 else
372 enum_item("After the alternation operator @samp{\\|}");
374 endenum();
379 newpara();
380 if (options & RE_INTERVALS)
382 if (options & RE_NO_BK_BRACES)
384 literal("Intervals are specified by @samp{@{} and @samp{@}}. ");
385 if (options & RE_INVALID_INTERVAL_ORD)
387 literal("Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\\@{1}");
389 else
391 literal("Invalid intervals such as @samp{a@{1z} are not accepted. ");
394 else
396 literal("Intervals are specified by @samp{\\@{} and @samp{\\@}}. ");
397 if (options & RE_INVALID_INTERVAL_ORD)
399 literal("Invalid intervals are treated as literals, for example @samp{a\\@{1} is treated as @samp{a@{1}");
401 else
403 literal("Invalid intervals such as @samp{a\\@{1z} are not accepted. ");
409 newpara();
410 if (options & RE_NO_POSIX_BACKTRACKING)
412 content("Matching succeeds as soon as the whole pattern is matched, meaning that the result may not be the longest possible match. ");
414 else
416 content("The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups. ");
418 newpara();
423 static void
424 menu(void)
426 int i, options;
427 const char *name;
429 output("@menu\n", 0);
430 for (i=0;
431 options = get_regex_type_flags(i),
432 name=get_regex_type_name(i);
433 ++i)
435 output("* ", 0);
436 output(name, 0);
437 content(" regular expression syntax");
438 output("::", 0);
439 newline();
441 output("@end menu\n", 0);
445 static void
446 describe_all(const char *up)
448 const char *name, *next, *previous;
449 int options;
450 int i, parent;
452 menu();
454 previous = "";
456 for (i=0;
457 options = get_regex_type_flags(i),
458 name=get_regex_type_name(i);
459 ++i)
461 next = get_regex_type_name(i+1);
462 if (NULL == next)
463 next = "";
464 begin_subsection(name, next, previous, up);
465 parent = get_regex_type_synonym(i);
466 if (parent >= 0)
468 content("This is a synonym for ");
469 content(get_regex_type_name(parent));
470 content(".");
472 else
474 describe_regex_syntax(options);
476 previous = name;
482 int main (int argc, char *argv[])
484 const char *up = "";
485 program_name = argv[0];
487 if (argc > 1)
488 up = argv[1];
490 describe_all(up);
491 return 0;