Made Texinfo style corrections suggested by Karl Berry.
[findutils.git] / lib / regexprops.c
blob974ce6bbad00cf5daf17ded227405b38dcf167ab
1 /* regexprops.c -- document the properties of the regular expressions
2 understood by gnulib.
4 Copyright 2005 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20 /* Written by James Youngman, <jay@gnu.org>. */
22 #if HAVE_CONFIG_H
23 # include <config.h>
24 #endif
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <errno.h>
30 #include "regex.h"
31 #include "regextype.h"
34 static void output(const char *s, int escape)
36 fputs(s, stdout);
40 static void newline(void)
42 output("\n", 0);
45 static void content(const char *s)
47 output(s, 1);
50 static void literal(const char *s)
52 output(s, 0);
55 static void directive(const char *s)
57 output(s, 0);
60 static void enum_item(const char *s)
62 newline();
63 directive("@item ");
64 literal(s);
65 newline();
67 static void table_item(const char *s)
69 directive("@item");
70 newline();
71 content(s);
72 newline();
75 static void code(const char *s)
77 directive("@code{");
78 content(s);
79 directive("}");
82 static void begin_subsection(const char *name,
83 const char *next,
84 const char *prev,
85 const char *up)
87 newline();
89 directive("@node ");
90 content(name);
91 content(",");
92 content(next);
93 content(",");
94 content(prev);
95 content(",");
96 content(up);
97 newline();
99 directive("@subsection ");
100 content(name);
101 newline();
104 static void begintable_asis()
106 newline();
107 directive("@table @asis");
108 newline();
111 static void begintable_markup(char const *markup)
113 newline();
114 directive("@table ");
115 literal(markup);
116 newline();
119 static void endtable()
121 newline();
122 directive("@end table");
123 newline();
126 static void beginenum()
128 newline();
129 directive("@enumerate");
130 newline();
133 static void endenum()
135 newline();
136 directive("@end enumerate");
137 newline();
140 static void newpara()
142 content("\n\n");
146 static int describe_regex_syntax(int options)
149 if (options & RE_NO_BK_PARENS)
151 literal("Grouping is performed with parentheses @samp{()}. ");
153 if (options & RE_UNMATCHED_RIGHT_PAREN_ORD)
154 literal("An unmatched @samp{)} matches just itself. ");
156 else
158 literal("Grouping is performed with backslashes followed by parentheses @samp{\\(}, @samp{\\)}. ");
161 if (options & RE_NO_BK_REFS)
163 content("A backslash followed by a digit matches that digit. ");
165 else
167 literal("A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis ");
168 if (options & RE_NO_BK_PARENS)
169 literal("@samp{(}");
170 else
171 literal("@samp{\\(}");
172 content(". ");
176 newpara();
177 if (!(options & RE_LIMITED_OPS))
179 if (options & RE_NO_BK_VBAR)
180 literal("The alternation operator is @samp{|}. ");
181 else
182 literal("The alternation operator is @samp{\\|}. ");
185 content("Bracket expressions are used to match ranges of characters. ");
186 literal("Bracket expressions where the range is backward, for example @samp{[z-a]}, are ");
187 if (options & RE_NO_EMPTY_RANGES)
188 content("invalid");
189 else
190 content("ignored");
191 content(". ");
193 if (options & RE_BACKSLASH_ESCAPE_IN_LISTS)
194 literal("Within square brackets, @samp{\\} can be used to quote "
195 "the following character. ");
196 else
197 literal("Within square brackets, @samp{\\} is taken literally. ");
199 newpara();
200 if (!(options & RE_LIMITED_OPS))
202 begintable_markup("@samp");
203 if (options & RE_BK_PLUS_QM)
205 enum_item("\\+");
206 content("indicates that the regular expression should match one"
207 " or more occurrences of the previous atom or regexp. ");
208 enum_item("\\?");
209 content("indicates that the regular expression should match zero"
210 " or one occurrence of the previous atom or regexp. ");
211 enum_item("+ and ? ");
212 content("match themselves. ");
214 else
216 enum_item("+");
217 content("indicates that the regular expression should match one"
218 " or more occurrences of the previous atom or regexp. ");
219 enum_item("?");
220 content("indicates that the regular expression should match zero"
221 " or one occurrence of the previous atom or regexp. ");
222 enum_item("\\+");
223 literal("matches a @samp{+}");
224 enum_item("\\?");
225 literal("matches a @samp{?}. ");
227 endtable();
230 newpara();
231 if (options & RE_CHAR_CLASSES)
232 content("Character classes are supported. ");
233 else
234 literal("Character classes are not not supported, so for example you would need to use @samp{[0-9]} instead of @samp{[[:digit:]]}. ");
237 newpara();
238 if (options & RE_CONTEXT_INDEP_ANCHORS)
240 literal("The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified. ");
242 else
244 literal("The character @samp{^} only represents the beginning of a string when it appears:");
245 beginenum();
246 enum_item("\nAt the beginning of a regular expression");
247 enum_item("After an open-group, signified by ");
248 if (options & RE_NO_BK_PARENS)
250 literal("@samp{(}");
252 else
254 literal("@samp{\\(}");
256 newline();
257 if (!(options & RE_LIMITED_OPS))
259 if (options & RE_NEWLINE_ALT)
260 enum_item("After a newline");
262 if (options & RE_NO_BK_VBAR )
263 enum_item("After the alternation operator @samp{|}");
264 else
265 enum_item("After the alternation operator @samp{\\|}");
267 endenum();
269 newpara();
270 literal("The character @samp{$} only represents the end of a string when it appears:");
271 beginenum();
272 enum_item("At the end of a regular expression");
273 enum_item("Before an close-group, signified by ");
274 if (options & RE_NO_BK_PARENS)
276 literal("@samp{)}");
278 else
280 literal("@samp{\\)}");
282 if (!(options & RE_LIMITED_OPS))
284 if (options & RE_NEWLINE_ALT)
285 enum_item("Before a newline");
287 if (options & RE_NO_BK_VBAR)
288 enum_item("Before the alternation operator @samp{|}");
289 else
290 enum_item("Before the alternation operator @samp{\\|}");
292 endenum();
294 newpara();
296 if (!(options & RE_LIMITED_OPS) )
298 if ((options & RE_CONTEXT_INDEP_OPS)
299 && !(options & RE_CONTEXT_INVALID_OPS))
301 literal("The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression. ");
303 else
305 if (options & RE_BK_PLUS_QM)
306 literal("@samp{\\*}, @samp{\\+} and @samp{\\?} ");
307 else
308 literal("@samp{*}, @samp{+} and @samp{?} ");
310 if (options & RE_CONTEXT_INVALID_OPS)
312 content("are special at any point in a regular expression except the following places, where they are illegal:");
314 else
316 content("are special at any point in a regular expression except:");
319 beginenum();
320 enum_item("At the beginning of a regular expression");
321 enum_item("After an open-group, signified by ");
322 if (options & RE_NO_BK_PARENS)
324 literal("@samp{(}");
326 else
328 literal("@samp{\\(}");
330 if (!(options & RE_LIMITED_OPS))
332 if (options & RE_NEWLINE_ALT)
333 enum_item("After a newline");
335 if (options & RE_NO_BK_VBAR)
336 enum_item("After the alternation operator @samp{|}");
337 else
338 enum_item("After the alternation operator @samp{\\|}");
340 endenum();
344 newpara();
345 content("The character @samp{.} matches any single character");
346 if ( (options & RE_DOT_NEWLINE) == 0 )
348 content(" except newline");
350 if (options & RE_DOT_NOT_NULL)
352 if ( (options & RE_DOT_NEWLINE) == 0 )
353 content(" and");
354 else
355 content(" except");
357 content(" the null character");
359 content(". ");
361 if (options & RE_HAT_LISTS_NOT_NEWLINE)
363 literal("Non-matching lists @samp{[^.....]} do not ever match newline. ");
366 if (options & RE_INTERVALS)
368 if (options & RE_NO_BK_BRACES)
369 literal("Intervals are specified by @samp{@{} and @samp{@}}. ");
370 else
371 literal("Intervals are specified by @samp{\\@{} and @samp{\\@}}. ");
373 if (options & RE_INVALID_INTERVAL_ORD)
375 literal("Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\\@{1}");
377 else
379 literal("Invalid intervals such as @samp{a@{1z} are not accepted. ");
382 newpara();
383 if (options & RE_NO_POSIX_BACKTRACKING)
385 content("Matching succeeds as soon as the whole pattern is matched, meaning that the result may not be the longest possible match. ");
387 else
389 content("The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups. ");
392 newpara();
393 if (options & RE_NO_GNU_OPS)
395 content("GNU extensions are not supported and so "
396 "@samp{\\w}, @samp{\\W}, @samp{\\<}, @samp{\\>}, @samp{\\b}, @samp{\\B}, @samp{\\`}, and @samp{\\'} "
397 "match "
398 "@samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively. ");
400 else
402 content("GNU extensions are supported:");
403 beginenum();
404 enum_item("@samp{\\w} matches a character within a word");
405 enum_item("@samp{\\W} matches a character which is not within a word");
406 enum_item("@samp{\\<} matches the beginning of a word");
407 enum_item("@samp{\\>} matches the end of a word");
408 enum_item("@samp{\\b} matches a word boundary");
409 enum_item("@samp{\\B} matches characters which are not a word boundaries");
410 enum_item("@samp{\\`} matches the beginning of the whole input");
411 enum_item("@samp{\\'} matches the end of the whole input");
412 endenum();
418 static int menu()
420 int i, options;
421 const char *name;
423 output("@menu\n", 0);
424 for (i=0;
425 options = get_regex_type_flags(i),
426 name=get_regex_type_name(i);
427 ++i)
429 output("* ", 0);
430 output(name, 0);
431 output("::", 0);
432 newline();
434 output("@end menu\n", 0);
438 static int describe_all(const char *up)
440 const char *name, *next, *previous;
441 int options;
442 int i, parent;
444 menu();
446 previous = "";
448 for (i=0;
449 options = get_regex_type_flags(i),
450 name=get_regex_type_name(i);
451 ++i)
453 next = get_regex_type_name(i+1);
454 if (NULL == next)
455 next = "";
456 begin_subsection(name, next, previous, up);
457 parent = get_regex_type_synonym(i);
458 if (parent >= 0)
460 content("This is a synonym for ");
461 content(get_regex_type_name(parent));
462 content(".");
464 else
466 describe_regex_syntax(options);
468 previous = name;
474 int main (int argc, char *argv[])
476 const char *up = "";
478 if (argc > 1)
479 up = argv[1];
481 describe_all(up);
482 return 0;