Eliminate string data mutation in ruler-mode
[emacs.git] / lib-src / etags.c
blob84dfa527e986121e899aefc990944d53e2819e02
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2024 Free Software
32 Foundation, Inc.
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or (at
39 your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <https://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
72 starting in 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 #ifdef DEBUG
82 # undef DEBUG
83 # define DEBUG true
84 #else
85 # define DEBUG false
86 #endif
88 #include <config.h>
90 #ifdef MSDOS
91 # undef MSDOS
92 # define MSDOS true
93 # include <sys/param.h>
94 #else
95 # define MSDOS false
96 #endif /* MSDOS */
98 #ifdef WINDOWSNT
99 # include <direct.h>
100 # undef HAVE_NTGUI
101 # undef DOS_NT
102 # define DOS_NT
103 /* The WINDOWSNT build doesn't use Gnulib's fcntl.h. */
104 # define O_CLOEXEC O_NOINHERIT
105 #endif /* WINDOWSNT */
107 #include <attribute.h>
108 #include <inttypes.h>
109 #include <limits.h>
110 #include <unistd.h>
111 #include <stdarg.h>
112 #include <stdckdint.h>
113 #include <stdlib.h>
114 #include <string.h>
115 #include <sysstdio.h>
116 #include <errno.h>
117 #include <fcntl.h>
118 #include <binary-io.h>
119 #include <intprops.h>
120 #include <unlocked-io.h>
121 #include <verify.h>
122 #include <c-ctype.h>
123 #include <c-strcase.h>
125 #include <assert.h>
126 #include <getopt.h>
127 #include <regex.h>
129 /* Define CTAGS to make the program "ctags" compatible with the usual one.
130 Leave it undefined to make the program "etags", which makes emacs-style
131 tag tables and tags typedefs, #defines and struct/union/enum by default. */
132 #ifdef CTAGS
133 # undef CTAGS
134 # define CTAGS true
135 #else
136 # define CTAGS false
137 #endif
139 /* Define MERCURY_HEURISTICS_RATIO as it was necessary to disambiguate
140 Mercury from Objective C, which have same file extensions .m
141 See comments before function test_objc_is_mercury for details. */
142 #ifndef MERCURY_HEURISTICS_RATIO
143 # define MERCURY_HEURISTICS_RATIO 0.5
144 #endif
146 /* Work around GCC bug 114882
147 <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114882>. */
148 #if GNUC_PREREQ (14, 0, 0)
149 # pragma GCC diagnostic ignored "-Wanalyzer-use-of-uninitialized-value"
150 #endif
152 /* COPY to DEST from SRC (containing LEN bytes), and append a NUL byte. */
153 static void
154 memcpyz (void *dest, void const *src, ptrdiff_t len)
156 char *e = mempcpy (dest, src, len);
157 *e = '\0';
160 static bool
161 streq (char const *s, char const *t)
163 return strcmp (s, t) == 0;
166 static bool
167 strcaseeq (char const *s, char const *t)
169 return c_strcasecmp (s, t) == 0;
172 static bool
173 strneq (char const *s, char const *t, size_t n)
175 return strncmp (s, t, n) == 0;
178 static bool
179 strncaseeq (char const *s, char const *t, size_t n)
181 return c_strncasecmp (s, t, n) == 0;
184 /* C is not in a name. */
185 static bool
186 notinname (unsigned char c)
188 /* Look at make_tag before modifying! */
189 static bool const table[UCHAR_MAX + 1] = {
190 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
191 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
193 return table[c];
196 /* C can start a token. */
197 static bool
198 begtoken (unsigned char c)
200 static bool const table[UCHAR_MAX + 1] = {
201 ['$']=1, ['@']=1,
202 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
203 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
204 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
205 ['Y']=1, ['Z']=1,
206 ['_']=1,
207 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
208 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
209 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
210 ['y']=1, ['z']=1,
211 ['~']=1
213 return table[c];
216 /* C can be in the middle of a token. */
217 static bool
218 intoken (unsigned char c)
220 static bool const table[UCHAR_MAX + 1] = {
221 ['$']=1,
222 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
223 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
224 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
225 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
226 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
227 ['Y']=1, ['Z']=1,
228 ['_']=1,
229 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
230 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
231 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
232 ['y']=1, ['z']=1
234 return table[c];
237 /* C can end a token. */
238 static bool
239 endtoken (unsigned char c)
241 static bool const table[UCHAR_MAX + 1] = {
242 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
243 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
244 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
245 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
246 ['{']=1, ['|']=1, ['}']=1, ['~']=1
248 return table[c];
252 * xrnew -- reallocate storage
254 * SYNOPSIS: void xrnew (OldPointer, ptrdiff_t n, int multiplier);
256 #define xrnew(op, n, m) ((op) = xnrealloc (op, n, (m) * sizeof *(op)))
258 typedef void Lang_function (FILE *);
260 typedef struct
262 const char *suffix; /* file name suffix for this compressor */
263 const char *command; /* takes one arg and decompresses to stdout */
264 } compressor;
266 typedef struct
268 const char *name; /* language name */
269 const char *help; /* detailed help for the language */
270 Lang_function *function; /* parse function */
271 const char **suffixes; /* name suffixes of this language's files */
272 const char **filenames; /* names of this language's files */
273 const char **interpreters; /* interpreters for this language */
274 bool metasource; /* source used to generate other sources */
275 } language;
277 typedef struct fdesc
279 struct fdesc *next; /* for the linked list */
280 char *infname; /* uncompressed input file name */
281 char *infabsname; /* absolute uncompressed input file name */
282 char *infabsdir; /* absolute dir of input file */
283 char *taggedfname; /* file name to write in tagfile */
284 language *lang; /* language of file */
285 char *prop; /* file properties to write in tagfile */
286 bool usecharno; /* etags tags shall contain char number */
287 bool written; /* entry written in the tags file */
288 } fdesc;
290 typedef struct node_st
291 { /* sorting structure */
292 struct node_st *left, *right; /* left and right sons */
293 fdesc *fdp; /* description of file to whom tag belongs */
294 char *name; /* tag name */
295 char *regex; /* search regexp */
296 bool valid; /* write this tag on the tag file */
297 bool is_func; /* function tag: use regexp in CTAGS mode */
298 bool been_warned; /* warning already given for duplicated tag */
299 intmax_t lno; /* line number tag is on */
300 intmax_t cno; /* character number line starts on */
301 } node;
304 * A `linebuffer' is a structure which holds a line of text.
305 * `readline_internal' reads a line from a stream into a linebuffer
306 * and works regardless of the length of the line.
307 * SIZE is the size of BUFFER, LEN is the length of the string in
308 * BUFFER after readline reads it.
310 typedef struct
312 ptrdiff_t size;
313 ptrdiff_t len;
314 char *buffer;
315 } linebuffer;
317 /* Used to support mixing of --lang and file names. */
318 typedef struct
320 enum {
321 at_language, /* a language specification */
322 at_regexp, /* a regular expression */
323 at_filename, /* a file name */
324 at_stdin, /* read from stdin here */
325 at_end /* stop parsing the list */
326 } arg_type; /* argument type */
327 language *lang; /* language associated with the argument */
328 char *what; /* the argument itself */
329 } argument;
331 /* Structure defining a regular expression. */
332 typedef struct regexp
334 struct regexp *p_next; /* pointer to next in list */
335 language *lang; /* if set, use only for this language */
336 char *pattern; /* the regexp pattern */
337 char *name; /* tag name */
338 struct re_pattern_buffer *pat; /* the compiled pattern */
339 struct re_registers regs; /* re registers */
340 bool error_signaled; /* already signaled for this regexp */
341 bool ignore_case; /* ignore case when matching */
342 bool multi_line; /* do a multi-line match on the whole file */
343 } regexp;
346 /* Many compilers barf on this:
347 Lang_function Ada_funcs;
348 so let's write it this way */
349 static void Ada_funcs (FILE *);
350 static void Asm_labels (FILE *);
351 static void C_entries (int c_ext, FILE *);
352 static void default_C_entries (FILE *);
353 static void plain_C_entries (FILE *);
354 static void Cjava_entries (FILE *);
355 static void Cobol_paragraphs (FILE *);
356 static void Cplusplus_entries (FILE *);
357 static void Cstar_entries (FILE *);
358 static void Erlang_functions (FILE *);
359 static void Forth_words (FILE *);
360 static void Fortran_functions (FILE *);
361 static void Go_functions (FILE *);
362 static void HTML_labels (FILE *);
363 static void Lisp_functions (FILE *);
364 static void Lua_functions (FILE *);
365 static void Makefile_targets (FILE *);
366 static void Mercury_functions (FILE *);
367 static void Pascal_functions (FILE *);
368 static void Perl_functions (FILE *);
369 static void PHP_functions (FILE *);
370 static void PS_functions (FILE *);
371 static void Prolog_functions (FILE *);
372 static void Python_functions (FILE *);
373 static void Ruby_functions (FILE *);
374 static void Rust_entries (FILE *);
375 static void Scheme_functions (FILE *);
376 static void TeX_commands (FILE *);
377 static void Texinfo_nodes (FILE *);
378 static void Yacc_entries (FILE *);
379 static void just_read_file (FILE *);
381 static language *get_language_from_langname (const char *);
382 static void readline (linebuffer *, FILE *);
383 static ptrdiff_t readline_internal (linebuffer *, FILE *, char const *, const bool);
384 static bool nocase_tail (const char *);
385 static void get_tag (char *, char **);
386 static void get_lispy_tag (char *);
387 static void test_objc_is_mercury (char *, language **);
389 static void analyze_regex (char *);
390 static void free_regexps (void);
391 static void regex_tag_multiline (void);
392 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
393 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
394 static _Noreturn void suggest_asking_for_help (void);
395 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
396 static _Noreturn void pfatal (const char *);
397 static void add_node (node *, node **);
399 static void process_file_name (char *, language *);
400 static void process_file (FILE *, char *, language *);
401 static void find_entries (FILE *);
402 static void free_tree (node *);
403 static void free_fdesc (fdesc *);
404 static void pfnote (char *, bool, char *, ptrdiff_t, intmax_t, intmax_t);
405 static void invalidate_nodes (fdesc *, node **);
406 static void put_entries (node *);
407 static void cleanup_tags_file (char const * const, char const * const);
409 #if !MSDOS && !defined (DOS_NT)
410 static char *escape_shell_arg_string (char *);
411 #endif
412 static void do_move_file (const char *, const char *);
413 static char *concat (const char *, const char *, const char *);
414 static char *skip_spaces (char *);
415 static char *skip_non_spaces (char *);
416 static char *skip_name (char *);
417 static char *savenstr (const char *, ptrdiff_t);
418 static char *savestr (const char *);
419 static char *etags_getcwd (void);
420 static char *relative_filename (char *, char *);
421 static char *absolute_filename (char *, char *);
422 static char *absolute_dirname (char *, char *);
423 static bool filename_is_absolute (char *f);
424 static void canonicalize_filename (char *);
425 static char *etags_mktmp (void);
426 static void linebuffer_init (linebuffer *);
427 static void linebuffer_setlen (linebuffer *, ptrdiff_t);
428 static void *xmalloc (ptrdiff_t) ATTRIBUTE_MALLOC_SIZE ((1));
429 static void *xnmalloc (ptrdiff_t, ptrdiff_t) ATTRIBUTE_MALLOC_SIZE ((1,2));
430 static void *xnrealloc (void *, ptrdiff_t, ptrdiff_t)
431 ATTRIBUTE_ALLOC_SIZE ((2,3));
434 static char searchar = '/'; /* use /.../ searches */
436 static char *tagfile; /* output file */
437 static char *progname; /* name this program was invoked with */
438 static char *cwd; /* current working directory */
439 static char *tagfiledir; /* directory of tagfile */
440 static FILE *tagf; /* ioptr for tags file */
441 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
443 static fdesc *fdhead; /* head of file description list */
444 static fdesc *curfdp; /* current file description */
445 static char *infilename; /* current input file name */
446 static intmax_t lineno; /* line number of current line */
447 static intmax_t charno; /* current character number */
448 static intmax_t linecharno; /* charno of start of current line */
449 static char *dbp; /* pointer to start of current tag */
451 static intmax_t const invalidcharno = -1;
453 static node *nodehead; /* the head of the binary tree of tags */
454 static node *last_node; /* the last node created */
456 static linebuffer lb; /* the current line */
457 static linebuffer filebuf; /* a buffer containing the whole file */
458 static linebuffer token_name; /* a buffer containing a tag name */
460 static bool append_to_tagfile; /* -a: append to tags */
461 /* The next five default to true in C and derived languages. */
462 static bool typedefs; /* -t: create tags for C and Ada typedefs */
463 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
464 /* 0 struct/enum/union decls, and C++ */
465 /* member functions. */
466 static bool constantypedefs; /* -d: create tags for C #define, enum */
467 /* constants and variables. */
468 /* -D: opposite of -d. Default under ctags. */
469 static int globals; /* create tags for global variables */
470 static int members; /* create tags for C member variables */
471 static int declarations; /* --declarations: tag them and extern in C&Co*/
472 static int no_line_directive; /* ignore #line directives (undocumented) */
473 static int no_duplicates; /* no duplicate tags for ctags (undocumented) */
474 static bool update; /* -u: update tags */
475 static bool vgrind_style; /* -v: create vgrind style index output */
476 static bool no_warnings; /* -w: suppress warnings (undocumented) */
477 static bool cxref_style; /* -x: create cxref style output */
478 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
479 static bool ignoreindent; /* -I: ignore indentation in C */
480 static int packages_only; /* --packages-only: in Ada, only tag packages*/
481 static int class_qualify; /* -Q: produce class-qualified tags in C++/Java */
482 static int debug; /* --debug */
484 /* STDIN is defined in LynxOS system headers */
485 #ifdef STDIN
486 # undef STDIN
487 #endif
489 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
490 static bool parsing_stdin; /* --parse-stdin used */
492 static regexp *p_head; /* list of all regexps */
493 static bool need_filebuf; /* some regexes are multi-line */
495 static struct option longopts[] =
497 { "append", no_argument, NULL, 'a' },
498 { "packages-only", no_argument, &packages_only, 1 },
499 { "c++", no_argument, NULL, 'C' },
500 { "debug", no_argument, &debug, 1 },
501 { "declarations", no_argument, &declarations, 1 },
502 { "no-line-directive", no_argument, &no_line_directive, 1 },
503 { "no-duplicates", no_argument, &no_duplicates, 1 },
504 { "help", no_argument, NULL, 'h' },
505 { "help", no_argument, NULL, 'H' },
506 { "ignore-indentation", no_argument, NULL, 'I' },
507 { "language", required_argument, NULL, 'l' },
508 { "members", no_argument, &members, 1 },
509 { "no-members", no_argument, &members, 0 },
510 { "output", required_argument, NULL, 'o' },
511 { "class-qualify", no_argument, &class_qualify, 'Q' },
512 { "regex", required_argument, NULL, 'r' },
513 { "no-regex", no_argument, NULL, 'R' },
514 { "ignore-case-regex", required_argument, NULL, 'c' },
515 { "parse-stdin", required_argument, NULL, STDIN },
516 { "version", no_argument, NULL, 'V' },
518 #if CTAGS /* Ctags options */
519 { "backward-search", no_argument, NULL, 'B' },
520 { "cxref", no_argument, NULL, 'x' },
521 { "defines", no_argument, NULL, 'd' },
522 { "globals", no_argument, &globals, 1 },
523 { "typedefs", no_argument, NULL, 't' },
524 { "typedefs-and-c++", no_argument, NULL, 'T' },
525 { "update", no_argument, NULL, 'u' },
526 { "vgrind", no_argument, NULL, 'v' },
527 { "no-warn", no_argument, NULL, 'w' },
529 #else /* Etags options */
530 { "no-defines", no_argument, NULL, 'D' },
531 { "no-globals", no_argument, &globals, 0 },
532 { "include", required_argument, NULL, 'i' },
533 #endif
534 { NULL }
537 static compressor compressors[] =
539 { "z", "gzip -d -c"},
540 { "Z", "gzip -d -c"},
541 { "gz", "gzip -d -c"},
542 { "GZ", "gzip -d -c"},
543 { "bz2", "bzip2 -d -c" },
544 { "xz", "xz -d -c" },
545 { "zst", "zstd -d -c" },
546 { NULL }
550 * Language stuff.
553 /* Ada code */
554 static const char *Ada_suffixes [] =
555 { "ads", "adb", "ada", NULL };
556 static const char Ada_help [] =
557 "In Ada code, functions, procedures, packages, tasks and types are\n\
558 tags. Use the '--packages-only' option to create tags for\n\
559 packages only.\n\
560 Ada tag names have suffixes indicating the type of entity:\n\
561 Entity type: Qualifier:\n\
562 ------------ ----------\n\
563 function /f\n\
564 procedure /p\n\
565 package spec /s\n\
566 package body /b\n\
567 type /t\n\
568 task /k\n\
569 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
570 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
571 will just search for any tag 'bidule'.";
573 /* Assembly code */
574 static const char *Asm_suffixes [] =
575 { "a", /* Unix assembler */
576 "asm", /* Microcontroller assembly */
577 "def", /* BSO/Tasking definition includes */
578 "inc", /* Microcontroller include files */
579 "ins", /* Microcontroller include files */
580 "s", "sa", /* Unix assembler */
581 "S", /* cpp-processed Unix assembler */
582 "src", /* BSO/Tasking C compiler output */
583 NULL
585 static const char Asm_help [] =
586 "In assembler code, labels appearing at the beginning of a line,\n\
587 followed by a colon, are tags.";
590 /* Note that .c and .h can be considered C++, if the --c++ flag was
591 given, or if the `class' or `template' keywords are met inside the file.
592 That is why default_C_entries is called for these. */
593 static const char *default_C_suffixes [] =
594 { "c", "h", NULL };
595 #if CTAGS /* C help for Ctags */
596 static const char default_C_help [] =
597 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
598 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
599 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
600 Use --globals to tag global variables.\n\
601 You can tag function declarations and external variables by\n\
602 using '--declarations', and struct members by using '--members'.";
603 #else /* C help for Etags */
604 static const char default_C_help [] =
605 "In C code, any C function or typedef is a tag, and so are\n\
606 definitions of 'struct', 'union' and 'enum'. '#define' macro\n\
607 definitions and 'enum' constants are tags unless you specify\n\
608 '--no-defines'. Global variables are tags unless you specify\n\
609 '--no-globals' and so are struct members unless you specify\n\
610 '--no-members'. Use of '--no-globals', '--no-defines' and\n\
611 '--no-members' can make the tags table file much smaller.\n\
612 You can tag function declarations and external variables by\n\
613 using '--declarations'.";
614 #endif /* C help for Ctags and Etags */
616 static const char *Cplusplus_suffixes [] =
617 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
618 "M", /* Objective C++ */
619 "pdb", /* PostScript with C syntax */
620 NULL };
621 static const char Cplusplus_help [] =
622 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
623 --help --lang=c --lang=c++ for full help.)\n\
624 In addition to C tags, member functions are also recognized. Member\n\
625 variables are recognized unless you use the '--no-members' option.\n\
626 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
627 and 'CLASS::FUNCTION'. 'operator' definitions have tag names like\n\
628 'operator+'.";
630 static const char *Cjava_suffixes [] =
631 { "java", NULL };
632 static char Cjava_help [] =
633 "In Java code, all the tags constructs of C and C++ code are\n\
634 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
637 static const char *Cobol_suffixes [] =
638 { "COB", "cob", NULL };
639 static char Cobol_help [] =
640 "In Cobol code, tags are paragraph names; that is, any word\n\
641 starting in column 8 and followed by a period.";
643 static const char *Cstar_suffixes [] =
644 { "cs", "hs", NULL };
646 static const char *Erlang_suffixes [] =
647 { "erl", "hrl", NULL };
648 static const char Erlang_help [] =
649 "In Erlang code, the tags are the functions, records and macros\n\
650 defined in the file.";
651 static const char *Erlang_interpreters [] =
652 { "escript", NULL };
654 static const char *Forth_suffixes [] =
655 { "fth", "tok", NULL };
656 static const char Forth_help [] =
657 "In Forth code, tags are words defined by ':',\n\
658 constant, code, create, defer, value, variable, buffer:, field.";
660 static const char *Fortran_suffixes [] =
661 { "F", "f", "f90", "for", NULL };
662 static const char Fortran_help [] =
663 "In Fortran code, functions, subroutines and block data are tags.";
665 static const char *Go_suffixes [] = {"go", NULL};
666 static const char Go_help [] =
667 "In Go code, functions, interfaces and packages are tags.";
669 static const char *HTML_suffixes [] =
670 { "htm", "html", "shtml", NULL };
671 static const char HTML_help [] =
672 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
673 'h3' headers. Also, tags are 'name=' in anchors and all\n\
674 occurrences of 'id='.";
676 static const char *Lisp_suffixes [] =
677 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
678 static const char Lisp_help [] =
679 "In Lisp code, any function defined with 'defun', any variable\n\
680 defined with 'defvar' or 'defconst', and in general the first\n\
681 argument of any expression that starts with '(def' in column zero\n\
682 is a tag.\n\
683 The '--declarations' option tags \"(defvar foo)\" constructs too.";
685 static const char *Lua_suffixes [] =
686 { "lua", "LUA", NULL };
687 static const char Lua_help [] =
688 "In Lua scripts, all functions are tags.";
689 static const char *Lua_interpreters [] =
690 { "lua", NULL };
692 static const char *Makefile_filenames [] =
693 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
694 static const char Makefile_help [] =
695 "In makefiles, targets are tags; additionally, variables are tags\n\
696 unless you specify '--no-globals'.";
698 /* Mercury and Objective C share the same .m file extensions. */
699 static const char *Mercury_suffixes [] =
700 {"m",
701 NULL};
702 static const char Mercury_help [] =
703 "In Mercury code, tags are all declarations beginning a line with ':-'\n\
704 and optionally Prolog-like definitions (first rule for a predicate or \
705 function).\n\
706 To enable this behavior, run etags using --declarations.";
707 static bool with_mercury_definitions = false;
708 float mercury_heuristics_ratio = MERCURY_HEURISTICS_RATIO;
710 static const char *Objc_suffixes [] =
711 { "lm", /* Objective lex file */
712 "m", /* By default, Objective C file will be assumed. */
713 NULL};
714 static const char Objc_help [] =
715 "In Objective C code, tags include Objective C definitions for classes,\n\
716 class categories, methods and protocols. Tags for variables and\n\
717 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\
718 \n(Use --help --lang=c --lang=objc --lang=java for full help.)";
720 static const char *Pascal_suffixes [] =
721 { "p", "pas", NULL };
722 static const char Pascal_help [] =
723 "In Pascal code, the tags are the functions and procedures defined\n\
724 in the file.";
725 /* " // this is for working around an Emacs highlighting bug... */
727 static const char *Perl_suffixes [] =
728 { "pl", "pm", NULL };
729 static const char *Perl_interpreters [] =
730 { "perl", "@PERL@", NULL };
731 static const char Perl_help [] =
732 "In Perl code, the tags are the packages, subroutines and variables\n\
733 defined by the 'package', 'sub', 'my' and 'local' keywords. Use\n\
734 '--globals' if you want to tag global variables. Tags for\n\
735 subroutines are named 'PACKAGE::SUB'. The name for subroutines\n\
736 defined in the default package is 'main::SUB'.";
738 static const char *PHP_suffixes [] =
739 { "php", "php3", "php4", NULL };
740 static const char PHP_help [] =
741 "In PHP code, tags are functions, classes and defines. Unless you use\n\
742 the '--no-members' option, vars are tags too.";
744 static const char *plain_C_suffixes [] =
745 { "pc", /* Pro*C file */
746 NULL };
748 static const char *PS_suffixes [] =
749 { "ps", "psw", NULL }; /* .psw is for PSWrap */
750 static const char PS_help [] =
751 "In PostScript code, the tags are the functions.";
753 static const char *Prolog_suffixes [] =
754 { "prolog", NULL };
755 static const char Prolog_help [] =
756 "In Prolog code, tags are predicates and rules at the beginning of\n\
757 line.";
758 static const char *Prolog_interpreters [] =
759 { "gprolog", "pl", "yap", "swipl", "prolog", NULL };
761 static const char *Python_suffixes [] =
762 { "py", NULL };
763 static const char Python_help [] =
764 "In Python code, 'def' or 'class' at the beginning of a line\n\
765 generate a tag.";
766 static const char *Python_interpreters [] =
767 { "python", NULL };
769 static const char *Ruby_suffixes [] =
770 { "rb", "ru", "rbw", NULL };
771 static const char *Ruby_filenames [] =
772 { "Rakefile", "Thorfile", NULL };
773 static const char Ruby_help [] =
774 "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
775 a line generate a tag. Constants also generate a tag.";
776 static const char *Ruby_interpreters [] =
777 { "ruby", NULL };
779 static const char *Rust_suffixes [] =
780 { "rs", NULL };
781 static const char Rust_help [] =
782 "In Rust code, tags anything defined with 'fn', 'enum', \n\
783 'struct' or 'macro_rules!'.";
785 /* Can't do the `SCM' or `scm' prefix with a version number. */
786 static const char *Scheme_suffixes [] =
787 { "oak", "rkt", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
788 static const char Scheme_help [] =
789 "In Scheme code, tags include anything defined with 'def' or with a\n\
790 construct whose name starts with 'def'. They also include\n\
791 variables set with 'set!' at top level in the file.";
793 static const char *TeX_suffixes [] =
794 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
795 static const char TeX_help [] =
796 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
797 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
798 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
799 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
800 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
802 Other commands can be specified by setting the environment variable\n\
803 'TEXTAGS' to a colon-separated list like, for example,\n\
804 TEXTAGS=\"mycommand:myothercommand\".";
807 static const char *Texinfo_suffixes [] =
808 { "texi", "texinfo", "txi", NULL };
809 static const char Texinfo_help [] =
810 "for texinfo files, lines starting with @node are tagged.";
812 static const char *Yacc_suffixes [] =
813 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
814 static const char Yacc_help [] =
815 "In Bison or Yacc input files, each rule defines as a tag the\n\
816 nonterminal it constructs. The portions of the file that contain\n\
817 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
818 for full help).";
820 static const char auto_help [] =
821 "'auto' is not a real language, it indicates to use\n\
822 a default language for files base on file name suffix and file contents.";
824 static const char none_help [] =
825 "'none' is not a real language, it indicates to only do\n\
826 regexp processing on files.";
828 static const char no_lang_help [] =
829 "No detailed help available for this language.";
833 * Table of languages.
835 * It is ok for a given function to be listed under more than one
836 * name. I just didn't.
839 static language lang_names [] =
841 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
842 { "asm", Asm_help, Asm_labels, Asm_suffixes },
843 { "c", default_C_help, default_C_entries, default_C_suffixes },
844 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
845 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
846 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
847 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes,
848 NULL, Erlang_interpreters },
849 { "forth", Forth_help, Forth_words, Forth_suffixes },
850 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
851 { "go", Go_help, Go_functions, Go_suffixes },
852 { "html", HTML_help, HTML_labels, HTML_suffixes },
853 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
854 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
855 { "lua", Lua_help,Lua_functions,Lua_suffixes,NULL,Lua_interpreters},
856 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
857 /* objc listed before mercury as it is a better default for .m extensions. */
858 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
859 { "mercury", Mercury_help, Mercury_functions, Mercury_suffixes },
860 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
861 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
862 { "php", PHP_help, PHP_functions, PHP_suffixes },
863 { "postscript",PS_help, PS_functions, PS_suffixes },
864 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
865 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes,
866 NULL, Prolog_interpreters },
867 { "python", Python_help, Python_functions, Python_suffixes,
868 NULL, Python_interpreters },
869 { "ruby", Ruby_help, Ruby_functions, Ruby_suffixes,
870 Ruby_filenames, Ruby_interpreters },
871 { "rust", Rust_help, Rust_entries, Rust_suffixes },
872 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
873 { "tex", TeX_help, TeX_commands, TeX_suffixes },
874 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
875 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
876 { "auto", auto_help }, /* default guessing scheme */
877 { "none", none_help, just_read_file }, /* regexp matching only */
878 { NULL } /* end of list */
882 static void
883 print_language_names (void)
885 language *lang;
886 const char **name, **ext;
888 puts ("\nThese are the currently supported languages, along with the\n\
889 default file names and dot suffixes:");
890 for (lang = lang_names; lang->name != NULL; lang++)
892 printf (" %-*s", 10, lang->name);
893 if (lang->filenames != NULL)
894 for (name = lang->filenames; *name != NULL; name++)
895 printf (" %s", *name);
896 if (lang->suffixes != NULL)
897 for (ext = lang->suffixes; *ext != NULL; ext++)
898 printf (" .%s", *ext);
899 puts ("");
901 puts ("where 'auto' means use default language for files based on file\n\
902 name suffix, and 'none' means only do regexp processing on files.\n\
903 If no language is specified and no matching suffix is found,\n\
904 the first line of the file is read for a sharp-bang (#!) sequence\n\
905 followed by the name of an interpreter. If no such sequence is found,\n\
906 Fortran is tried first; if no tags are found, C is tried next.\n\
907 When parsing any C file, a \"class\" or \"template\" keyword\n\
908 switches to C++.");
909 puts ("Compressed files are supported using gzip, bzip2, xz, and zstd.\n\
911 For detailed help on a given language use, for example,\n\
912 etags --help --lang=ada.");
915 #if CTAGS
916 # define PROGRAM_NAME "ctags"
917 #else
918 # define PROGRAM_NAME "etags"
919 #endif
920 static _Noreturn void
921 print_version (void)
923 fputs ((PROGRAM_NAME " (" PACKAGE_NAME " " PACKAGE_VERSION ")\n"
924 COPYRIGHT "\n"
925 "This program is distributed under the terms in ETAGS.README\n"),
926 stdout);
927 exit (EXIT_SUCCESS);
930 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
931 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
932 #endif
934 static _Noreturn void
935 print_help (argument *argbuffer)
937 bool help_for_lang = false;
939 for (; argbuffer->arg_type != at_end; argbuffer++)
940 if (argbuffer->arg_type == at_language)
942 if (help_for_lang)
943 puts ("");
944 puts (argbuffer->lang->help);
945 help_for_lang = true;
948 if (help_for_lang)
949 exit (EXIT_SUCCESS);
951 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
953 These are the options accepted by %s.\n", progname, progname);
954 puts ("You may use unambiguous abbreviations for the long option names.");
955 puts (" A - as file name means read names from stdin (one per line).\n\
956 Absolute names are stored in the output file as they are.\n\
957 Relative ones are stored relative to the output file's directory.\n");
959 puts ("-a, --append\n\
960 Append tag entries to existing tags file.");
962 puts ("--packages-only\n\
963 For Ada files, only generate tags for packages.");
965 if (CTAGS)
966 puts ("-B, --backward-search\n\
967 Write the search commands for the tag entries using '?', the\n\
968 backward-search command instead of '/', the forward-search command.");
970 /* This option is mostly obsolete, because etags can now automatically
971 detect C++. Retained for backward compatibility and for debugging and
972 experimentation. In principle, we could want to tag as C++ even
973 before any "class" or "template" keyword.
974 puts ("-C, --c++\n\
975 Treat files whose name suffix defaults to C language as C++ files.");
978 puts ("--declarations\n\
979 In C and derived languages, create tags for function declarations,");
980 if (CTAGS)
981 puts ("\tand create tags for extern variables if --globals is used.");
982 else
983 puts
984 ("\tand create tags for extern variables unless --no-globals is used.");
986 puts ("\tIn Mercury, tag both declarations starting a line with ':-' and\n\
987 first predicates or functions in clauses.");
989 if (CTAGS)
990 puts ("-d, --defines\n\
991 Create tag entries for C #define constants and enum constants, too.");
992 else
993 puts ("-D, --no-defines\n\
994 Don't create tag entries for C #define constants and enum constants.\n\
995 This makes the tags file smaller.");
997 if (!CTAGS)
998 puts ("-i FILE, --include=FILE\n\
999 Include a note in tag file indicating that, when searching for\n\
1000 a tag, one should also consult the tags file FILE after\n\
1001 checking the current file.");
1003 puts ("-l LANG, --language=LANG\n\
1004 Force the following files to be considered as written in the\n\
1005 named language up to the next --language=LANG option.");
1007 if (CTAGS)
1008 puts ("--globals\n\
1009 Create tag entries for global variables in some languages.");
1010 else
1011 puts ("--no-globals\n\
1012 Do not create tag entries for global variables in some\n\
1013 languages. This makes the tags file smaller.");
1015 puts ("--no-line-directive\n\
1016 Ignore #line preprocessor directives in C and derived languages.");
1018 if (CTAGS)
1019 puts ("--members\n\
1020 Create tag entries for members of structures in some languages.");
1021 else
1022 puts ("--no-members\n\
1023 Do not create tag entries for members of structures\n\
1024 in some languages.");
1026 puts ("-Q, --class-qualify\n\
1027 Qualify tag names with their class name in C++, ObjC, Java, and Perl.\n\
1028 This produces tag names of the form \"class::member\" for C++,\n\
1029 \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
1030 For Objective C, this also produces class methods qualified with\n\
1031 their arguments, as in \"foo:bar:baz:more\".\n\
1032 For Perl, this produces \"package::member\".");
1033 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1034 Make a tag for each line matching a regular expression pattern\n\
1035 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1036 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1037 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1038 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1039 puts (" If TAGNAME/ is present, the tags created are named.\n\
1040 For example Tcl named tags can be created with:\n\
1041 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1042 MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
1043 'm' means to allow multi-line matches, 's' implies 'm' and\n\
1044 causes dot to match any character, including newline.");
1046 puts ("-R, --no-regex\n\
1047 Don't create tags from regexps for the following files.");
1049 puts ("-I, --ignore-indentation\n\
1050 In C and C++ do not assume that a closing brace in the first\n\
1051 column is the final brace of a function or structure definition.");
1053 puts ("-o FILE, --output=FILE\n\
1054 Write the tags to FILE.");
1056 puts ("--parse-stdin=NAME\n\
1057 Read from standard input and record tags as belonging to file NAME.");
1059 if (CTAGS)
1061 puts ("-t, --typedefs\n\
1062 Generate tag entries for C and Ada typedefs.");
1063 puts ("-T, --typedefs-and-c++\n\
1064 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1065 and C++ member functions.");
1068 if (CTAGS)
1069 puts ("-u, --update\n\
1070 Update the tag entries for the given files, leaving tag\n\
1071 entries for other files in place. Currently, this is\n\
1072 implemented by deleting the existing entries for the given\n\
1073 files and then rewriting the new entries at the end of the\n\
1074 tags file. It is often faster to simply rebuild the entire\n\
1075 tag file than to use this.");
1077 if (CTAGS)
1079 puts ("-v, --vgrind\n\
1080 Print on the standard output an index of items intended for\n\
1081 human consumption, similar to the output of vgrind. The index\n\
1082 is sorted, and gives the page number of each item.");
1084 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1085 puts ("-w, --no-duplicates\n\
1086 Do not create duplicate tag entries, for compatibility with\n\
1087 traditional ctags.");
1089 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1090 puts ("-w, --no-warn\n\
1091 Suppress warning messages about duplicate tag entries.");
1093 puts ("-x, --cxref\n\
1094 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1095 The output uses line numbers instead of page numbers, but\n\
1096 beyond that the differences are cosmetic; try both to see\n\
1097 which you like.");
1100 puts ("-V, --version\n\
1101 Print the version of the program.\n\
1102 -h, --help\n\
1103 Print this help message.\n\
1104 Followed by one or more '--language' options prints detailed\n\
1105 help about tag generation for the specified languages.");
1107 print_language_names ();
1109 puts ("");
1110 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1112 exit (EXIT_SUCCESS);
1117 main (int argc, char **argv)
1119 int i;
1120 int nincluded_files;
1121 char **included_files;
1122 argument *argbuffer;
1123 int current_arg, file_count;
1124 linebuffer filename_lb;
1125 bool help_asked = false;
1126 ptrdiff_t len;
1127 char *optstring;
1128 int opt;
1130 progname = argv[0];
1131 nincluded_files = 0;
1132 included_files = xnmalloc (argc, sizeof *included_files);
1133 current_arg = 0;
1134 file_count = 0;
1136 /* Allocate enough no matter what happens. Overkill, but each one
1137 is small. */
1138 argbuffer = xnmalloc (argc, sizeof *argbuffer);
1141 * Always find typedefs and structure tags.
1142 * Also default to find macro constants, enum constants, struct
1143 * members and global variables. Do it for both etags and ctags.
1145 typedefs = typedefs_or_cplusplus = constantypedefs = true;
1146 globals = members = true;
1148 /* When the optstring begins with a '-' getopt_long does not rearrange the
1149 non-options arguments to be at the end, but leaves them alone. */
1150 optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1151 (CTAGS) ? "BxdtTuvw" : "Di:",
1152 "");
1154 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1155 switch (opt)
1157 case 0:
1158 /* If getopt returns 0, then it has already processed a
1159 long-named option. We should do nothing. */
1160 break;
1162 case 1:
1163 /* This means that a file name has been seen. Record it. */
1164 argbuffer[current_arg].arg_type = at_filename;
1165 argbuffer[current_arg].what = optarg;
1166 len = strlen (optarg);
1167 if (whatlen_max < len)
1168 whatlen_max = len;
1169 ++current_arg;
1170 ++file_count;
1171 break;
1173 case STDIN:
1174 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1175 argbuffer[current_arg].arg_type = at_stdin;
1176 argbuffer[current_arg].what = optarg;
1177 len = strlen (optarg);
1178 if (whatlen_max < len)
1179 whatlen_max = len;
1180 ++current_arg;
1181 ++file_count;
1182 if (parsing_stdin)
1183 fatal ("cannot parse standard input more than once");
1184 parsing_stdin = true;
1185 break;
1187 /* Common options. */
1188 case 'a': append_to_tagfile = true; break;
1189 case 'C': cplusplus = true; break;
1190 case 'f': /* for compatibility with old makefiles */
1191 case 'o':
1192 if (tagfile)
1194 error ("-o option may only be given once.");
1195 suggest_asking_for_help ();
1197 tagfile = optarg;
1198 break;
1199 case 'I':
1200 case 'S': /* for backward compatibility */
1201 ignoreindent = true;
1202 break;
1203 case 'l':
1205 language *lang = get_language_from_langname (optarg);
1206 if (lang != NULL)
1208 argbuffer[current_arg].lang = lang;
1209 argbuffer[current_arg].arg_type = at_language;
1210 ++current_arg;
1213 break;
1214 case 'c':
1215 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1216 optarg = concat (optarg, "i", ""); /* memory leak here */
1217 FALLTHROUGH;
1218 case 'r':
1219 argbuffer[current_arg].arg_type = at_regexp;
1220 argbuffer[current_arg].what = optarg;
1221 len = strlen (optarg);
1222 if (whatlen_max < len)
1223 whatlen_max = len;
1224 ++current_arg;
1225 break;
1226 case 'R':
1227 argbuffer[current_arg].arg_type = at_regexp;
1228 argbuffer[current_arg].what = NULL;
1229 ++current_arg;
1230 break;
1231 case 'V':
1232 print_version ();
1233 break;
1234 case 'h':
1235 case 'H':
1236 help_asked = true;
1237 break;
1238 case 'Q':
1239 class_qualify = 1;
1240 break;
1242 /* Etags options */
1243 case 'D': constantypedefs = false; break;
1244 case 'i': included_files[nincluded_files++] = optarg; break;
1246 /* Ctags options. */
1247 case 'B': searchar = '?'; break;
1248 case 'd': constantypedefs = true; break;
1249 case 't': typedefs = true; break;
1250 case 'T': typedefs = typedefs_or_cplusplus = true; break;
1251 case 'u': update = true; break;
1252 case 'v': vgrind_style = true; FALLTHROUGH;
1253 case 'x': cxref_style = true; break;
1254 case 'w': no_warnings = true; break;
1255 default:
1256 suggest_asking_for_help ();
1259 /* No more options. Store the rest of arguments. */
1260 for (; optind < argc; optind++)
1262 argbuffer[current_arg].arg_type = at_filename;
1263 argbuffer[current_arg].what = argv[optind];
1264 len = strlen (argv[optind]);
1265 if (whatlen_max < len)
1266 whatlen_max = len;
1267 ++current_arg;
1268 ++file_count;
1271 argbuffer[current_arg].arg_type = at_end;
1273 if (help_asked)
1274 print_help (argbuffer);
1276 if (nincluded_files == 0 && file_count == 0)
1278 error ("no input files specified.");
1279 suggest_asking_for_help ();
1282 if (tagfile == NULL)
1283 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1284 cwd = etags_getcwd (); /* the current working directory */
1285 if (cwd[strlen (cwd) - 1] != '/')
1287 char *oldcwd = cwd;
1288 cwd = concat (oldcwd, "/", "");
1289 free (oldcwd);
1292 /* Compute base directory for relative file names. */
1293 if (streq (tagfile, "-")
1294 || strneq (tagfile, "/dev/", 5))
1295 tagfiledir = cwd; /* relative file names are relative to cwd */
1296 else
1298 canonicalize_filename (tagfile);
1299 tagfiledir = absolute_dirname (tagfile, cwd);
1302 linebuffer_init (&lb);
1303 linebuffer_init (&filename_lb);
1304 linebuffer_init (&filebuf);
1305 linebuffer_init (&token_name);
1307 if (!CTAGS)
1309 if (streq (tagfile, "-"))
1311 tagf = stdout;
1312 set_binary_mode (STDOUT_FILENO, O_BINARY);
1314 else
1315 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1316 if (tagf == NULL)
1317 pfatal (tagfile);
1321 * Loop through files finding functions.
1323 for (i = 0; i < current_arg; i++)
1325 static language *lang; /* non-NULL if language is forced */
1326 char *this_file;
1328 switch (argbuffer[i].arg_type)
1330 case at_language:
1331 lang = argbuffer[i].lang;
1332 break;
1333 case at_regexp:
1334 analyze_regex (argbuffer[i].what);
1335 break;
1336 case at_filename:
1337 this_file = argbuffer[i].what;
1338 /* Input file named "-" means read file names from stdin
1339 (one per line) and use them. */
1340 if (streq (this_file, "-"))
1342 if (parsing_stdin)
1343 fatal ("cannot parse standard input "
1344 "AND read file names from it");
1345 while (readline_internal (&filename_lb, stdin, "-", false) > 0)
1346 process_file_name (filename_lb.buffer, lang);
1348 else
1349 process_file_name (this_file, lang);
1350 break;
1351 case at_stdin:
1352 this_file = argbuffer[i].what;
1353 process_file (stdin, this_file, lang);
1354 break;
1355 default:
1356 error ("internal error: arg_type");
1360 free_regexps ();
1361 free (lb.buffer);
1362 free (filebuf.buffer);
1363 free (token_name.buffer);
1365 if (!CTAGS || cxref_style)
1367 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1368 put_entries (nodehead);
1369 free_tree (nodehead);
1370 nodehead = NULL;
1371 if (!CTAGS)
1373 fdesc *fdp;
1375 /* Output file entries that have no tags. */
1376 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1377 if (!fdp->written)
1378 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1380 while (nincluded_files-- > 0)
1381 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1383 if (fclose (tagf) == EOF)
1384 pfatal (tagfile);
1387 return EXIT_SUCCESS;
1390 /* From here on, we are in (CTAGS && !cxref_style) */
1391 if (update)
1393 for (i = 0; i < current_arg; ++i)
1395 switch (argbuffer[i].arg_type)
1397 case at_filename:
1398 case at_stdin:
1399 break;
1400 default:
1401 continue; /* the for loop */
1403 cleanup_tags_file (tagfile, argbuffer[i].what);
1405 append_to_tagfile = true;
1408 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1409 if (tagf == NULL)
1410 pfatal (tagfile);
1411 put_entries (nodehead); /* write all the tags (CTAGS) */
1412 free_tree (nodehead);
1413 nodehead = NULL;
1414 if (fclose (tagf) == EOF)
1415 pfatal (tagfile);
1417 if (CTAGS)
1418 if (append_to_tagfile || update)
1420 /* Maybe these should be used:
1421 setenv ("LC_COLLATE", "C", 1);
1422 setenv ("LC_ALL", "C", 1); */
1423 char *cmd = xmalloc (8 * strlen (tagfile) + sizeof "sort -u -o '' ''");
1424 #if defined WINDOWSNT || MSDOS
1425 /* Quote "like this". No need to escape the quotes in the file name,
1426 since it is not allowed in file names on these systems. */
1427 char *z = stpcpy (cmd, "sort -u -o \"");
1428 z = stpcpy (z, tagfile);
1429 z = stpcpy (z, "\" \"");
1430 z = stpcpy (z, tagfile);
1431 stpcpy (z, "\"");
1432 #else
1433 /* Quote 'like this', and escape the apostrophe in the file name. */
1434 char *z = stpcpy (cmd, "sort -u -o '");
1435 char *escaped_tagfile = z;
1436 for (; *tagfile; *z++ = *tagfile++)
1437 if (*tagfile == '\'')
1438 z = stpcpy (z, "'\\'");
1439 ptrdiff_t escaped_tagfile_len = z - escaped_tagfile;
1440 z = stpcpy (z, "' '");
1441 z = mempcpy (z, escaped_tagfile, escaped_tagfile_len);
1442 strcpy (z, "'");
1443 #endif
1444 return system (cmd);
1446 return EXIT_SUCCESS;
1450 * Equivalent to: mv tags OTAGS;grep -Fv ' filename ' OTAGS >tags;rm OTAGS
1452 static void
1453 cleanup_tags_file (const char* tagfile, const char* match_file_name)
1455 FILE *otags_f = fopen ("OTAGS", "wb");
1456 FILE *tag_f = fopen (tagfile, "rb");
1458 if (otags_f == NULL)
1459 pfatal ("OTAGS");
1461 if (tag_f == NULL)
1462 pfatal (tagfile);
1464 int buf_len = strlen (match_file_name) + sizeof ("\t\t ") + 1;
1465 char *buf = xmalloc (buf_len);
1466 snprintf (buf, buf_len, "\t%s\t", match_file_name);
1468 linebuffer line;
1469 linebuffer_init (&line);
1470 while (readline_internal (&line, tag_f, tagfile, true) > 0)
1472 if (ferror (tag_f))
1473 pfatal (tagfile);
1475 if (strstr (line.buffer, buf) == NULL)
1477 fprintf (otags_f, "%s\n", line.buffer);
1478 if (ferror (tag_f))
1479 pfatal (tagfile);
1482 free (buf);
1483 free (line.buffer);
1485 if (fclose (otags_f) == EOF)
1486 pfatal ("OTAGS");
1488 if (fclose (tag_f) == EOF)
1489 pfatal (tagfile);
1491 do_move_file ("OTAGS", tagfile);
1492 return;
1496 * Return a compressor given the file name. If EXTPTR is non-zero,
1497 * return a pointer into FILE where the compressor-specific
1498 * extension begins. If no compressor is found, NULL is returned
1499 * and EXTPTR is not significant.
1500 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1502 static compressor *
1503 get_compressor_from_suffix (char *file, char **extptr)
1505 compressor *compr;
1506 char *slash, *suffix;
1508 /* File has been processed by canonicalize_filename,
1509 so we don't need to consider backslashes on DOS_NT. */
1510 slash = strrchr (file, '/');
1511 suffix = strrchr (file, '.');
1512 if (suffix == NULL || suffix < slash)
1513 return NULL;
1514 if (extptr != NULL)
1515 *extptr = suffix;
1516 suffix += 1;
1517 /* Let those poor souls who live with DOS 8+3 file name limits get
1518 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1519 Only the first do loop is run if not MSDOS */
1522 for (compr = compressors; compr->suffix != NULL; compr++)
1523 if (streq (compr->suffix, suffix))
1524 return compr;
1525 if (!MSDOS)
1526 break; /* do it only once: not really a loop */
1527 if (extptr != NULL)
1528 *extptr = ++suffix;
1529 } while (*suffix != '\0');
1530 return NULL;
1536 * Return a language given the name.
1538 static language *
1539 get_language_from_langname (const char *name)
1541 language *lang;
1543 if (name == NULL)
1544 error ("empty language name");
1545 else
1547 for (lang = lang_names; lang->name != NULL; lang++)
1548 if (streq (name, lang->name))
1549 return lang;
1550 error ("unknown language \"%s\"", name);
1553 return NULL;
1558 * Return a language given the interpreter name.
1560 static language *
1561 get_language_from_interpreter (char *interpreter)
1563 language *lang;
1564 const char **iname;
1566 if (interpreter == NULL)
1567 return NULL;
1568 for (lang = lang_names; lang->name != NULL; lang++)
1569 if (lang->interpreters != NULL)
1570 for (iname = lang->interpreters; *iname != NULL; iname++)
1571 if (streq (*iname, interpreter))
1572 return lang;
1574 return NULL;
1580 * Return a language given the file name.
1582 static language *
1583 get_language_from_filename (char *file, bool case_sensitive)
1585 language *lang;
1586 const char **name, **ext, *suffix;
1587 char *slash;
1589 /* Try whole file name first. */
1590 slash = strrchr (file, '/');
1591 if (slash != NULL)
1592 file = slash + 1;
1593 #ifdef DOS_NT
1594 else if (file[0] && file[1] == ':')
1595 file += 2;
1596 #endif
1597 for (lang = lang_names; lang->name != NULL; lang++)
1598 if (lang->filenames != NULL)
1599 for (name = lang->filenames; *name != NULL; name++)
1600 if ((case_sensitive)
1601 ? streq (*name, file)
1602 : strcaseeq (*name, file))
1603 return lang;
1605 /* If not found, try suffix after last dot. */
1606 suffix = strrchr (file, '.');
1607 if (suffix == NULL)
1608 return NULL;
1609 suffix += 1;
1610 for (lang = lang_names; lang->name != NULL; lang++)
1611 if (lang->suffixes != NULL)
1612 for (ext = lang->suffixes; *ext != NULL; ext++)
1613 if ((case_sensitive)
1614 ? streq (*ext, suffix)
1615 : strcaseeq (*ext, suffix))
1616 return lang;
1617 return NULL;
1622 * This routine is called on each file argument.
1624 static void
1625 process_file_name (char *file, language *lang)
1627 FILE *inf;
1628 fdesc *fdp;
1629 compressor *compr;
1630 char *compressed_name, *uncompressed_name;
1631 char *ext, *real_name UNINIT, *tmp_name UNINIT;
1632 int retval;
1634 canonicalize_filename (file);
1635 if (streq (file, tagfile) && !streq (tagfile, "-"))
1637 error ("skipping inclusion of %s in self.", file);
1638 return;
1640 compr = get_compressor_from_suffix (file, &ext);
1641 if (compr)
1643 compressed_name = file;
1644 uncompressed_name = savenstr (file, ext - file);
1646 else
1648 compressed_name = NULL;
1649 uncompressed_name = file;
1652 /* If the canonicalized uncompressed name
1653 has already been dealt with, skip it silently. */
1654 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1656 assert (fdp->infname != NULL);
1657 if (streq (uncompressed_name, fdp->infname))
1658 goto cleanup;
1661 inf = fopen (file, "r" FOPEN_BINARY);
1662 if (inf)
1663 real_name = file;
1664 else
1666 int file_errno = errno;
1667 if (compressed_name)
1669 /* Try with the given suffix. */
1670 inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1671 if (inf)
1672 real_name = uncompressed_name;
1674 else
1676 /* Try all possible suffixes. */
1677 for (compr = compressors; compr->suffix != NULL; compr++)
1679 compressed_name = concat (file, ".", compr->suffix);
1680 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1681 if (inf)
1683 real_name = compressed_name;
1684 break;
1686 if (MSDOS)
1688 char *suf = compressed_name + strlen (file);
1689 size_t suflen = strlen (compr->suffix) + 1;
1690 for ( ; suf[1]; suf++, suflen--)
1692 memmove (suf, suf + 1, suflen);
1693 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1694 if (inf)
1696 real_name = compressed_name;
1697 break;
1700 if (inf)
1701 break;
1703 free (compressed_name);
1704 compressed_name = NULL;
1707 if (! inf)
1709 errno = file_errno;
1710 perror (file);
1711 goto cleanup;
1715 if (real_name == compressed_name)
1717 fclose (inf);
1718 tmp_name = etags_mktmp ();
1719 if (!tmp_name)
1720 inf = NULL;
1721 else
1723 #if MSDOS || defined (DOS_NT)
1724 int buf_len =
1725 strlen (compr->command)
1726 + strlen (" \"\" > \"\"") + strlen (real_name)
1727 + strlen (tmp_name) + 1;
1728 char *cmd = xmalloc (buf_len);
1729 snprintf (cmd, buf_len, "%s \"%s\" > \"%s\"",
1730 compr->command, real_name, tmp_name);
1731 #else
1732 char *new_real_name = escape_shell_arg_string (real_name);
1733 char *new_tmp_name = escape_shell_arg_string (tmp_name);
1734 int buf_len =
1735 strlen (compr->command) + strlen (" > ") + strlen (new_real_name)
1736 + strlen (new_tmp_name) + 1;
1737 char *cmd = xmalloc (buf_len);
1738 snprintf (cmd, buf_len, "%s %s > %s",
1739 compr->command, new_real_name, new_tmp_name);
1740 free (new_real_name);
1741 free (new_tmp_name);
1742 #endif
1743 inf = (system (cmd) == -1
1744 ? NULL
1745 : fopen (tmp_name, "r" FOPEN_BINARY));
1746 free (cmd);
1749 if (!inf)
1751 perror (real_name);
1752 goto cleanup;
1756 process_file (inf, uncompressed_name, lang);
1758 retval = fclose (inf);
1759 if (real_name == compressed_name)
1761 remove (tmp_name);
1762 free (tmp_name);
1764 if (retval < 0)
1765 pfatal (file);
1767 cleanup:
1768 if (compressed_name != file)
1769 free (compressed_name);
1770 if (uncompressed_name != file)
1771 free (uncompressed_name);
1772 last_node = NULL;
1773 curfdp = NULL;
1774 return;
1777 static void
1778 process_file (FILE *fh, char *fn, language *lang)
1780 static const fdesc emptyfdesc;
1781 fdesc *fdp;
1783 infilename = fn;
1784 /* Create a new input file description entry. */
1785 fdp = xmalloc (sizeof *fdp);
1786 *fdp = emptyfdesc;
1787 fdp->next = fdhead;
1788 fdp->infname = savestr (fn);
1789 fdp->lang = lang;
1790 fdp->infabsname = absolute_filename (fn, cwd);
1791 fdp->infabsdir = absolute_dirname (fn, cwd);
1792 if (filename_is_absolute (fn))
1794 /* An absolute file name. Canonicalize it. */
1795 fdp->taggedfname = absolute_filename (fn, NULL);
1797 else
1799 /* A file name relative to cwd. Make it relative
1800 to the directory of the tags file. */
1801 fdp->taggedfname = relative_filename (fn, tagfiledir);
1803 fdp->usecharno = true; /* use char position when making tags */
1804 fdp->prop = NULL;
1805 fdp->written = false; /* not written on tags file yet */
1807 fdhead = fdp;
1808 curfdp = fdhead; /* the current file description */
1810 find_entries (fh);
1812 /* If not Ctags, and if this is not metasource and if it contained no #line
1813 directives, we can write the tags and free all nodes pointing to
1814 curfdp. */
1815 if (!CTAGS
1816 && curfdp->usecharno /* no #line directives in this file */
1817 && !curfdp->lang->metasource)
1819 node *np, *prev;
1821 /* Look for the head of the sublist relative to this file. See add_node
1822 for the structure of the node tree. */
1823 prev = NULL;
1824 for (np = nodehead; np != NULL; prev = np, np = np->left)
1825 if (np->fdp == curfdp)
1826 break;
1828 /* If we generated tags for this file, write and delete them. */
1829 if (np != NULL)
1831 /* This is the head of the last sublist, if any. The following
1832 instructions depend on this being true. */
1833 assert (np->left == NULL);
1835 assert (fdhead == curfdp);
1836 assert (last_node->fdp == curfdp);
1837 put_entries (np); /* write tags for file curfdp->taggedfname */
1838 free_tree (np); /* remove the written nodes */
1839 if (prev == NULL)
1840 nodehead = NULL; /* no nodes left */
1841 else
1842 prev->left = NULL; /* delete the pointer to the sublist */
1847 static void
1848 reset_input (FILE *inf)
1850 if (fseek (inf, 0, SEEK_SET) != 0)
1851 perror (infilename);
1855 * This routine opens the specified file and calls the function
1856 * which finds the function and type definitions.
1858 static void
1859 find_entries (FILE *inf)
1861 char *cp;
1862 language *lang = curfdp->lang;
1863 Lang_function *parser = NULL;
1865 /* If user specified a language, use it. */
1866 if (lang != NULL && lang->function != NULL)
1868 parser = lang->function;
1871 /* Else try to guess the language given the file name. */
1872 if (parser == NULL)
1874 lang = get_language_from_filename (curfdp->infname, true);
1876 /* Disambiguate file names between Objc and Mercury. */
1877 if (lang != NULL && strcmp (lang->name, "objc") == 0)
1878 test_objc_is_mercury (curfdp->infname, &lang);
1880 if (lang != NULL && lang->function != NULL)
1882 curfdp->lang = lang;
1883 parser = lang->function;
1887 /* Else look for sharp-bang as the first two characters. */
1888 if (parser == NULL
1889 && readline_internal (&lb, inf, infilename, false) > 0
1890 && lb.len >= 2
1891 && lb.buffer[0] == '#'
1892 && lb.buffer[1] == '!')
1894 char *lp;
1896 /* Set lp to point at the first char after the last slash in the
1897 line or, if no slashes, at the first nonblank. Then set cp to
1898 the first successive blank and terminate the string. */
1899 lp = strrchr (lb.buffer+2, '/');
1900 if (lp != NULL)
1901 lp += 1;
1902 else
1903 lp = skip_spaces (lb.buffer + 2);
1904 cp = skip_non_spaces (lp);
1905 /* If the "interpreter" turns out to be "env", the real interpreter is
1906 the next word. */
1907 if (cp > lp && strneq (lp, "env", cp - lp))
1909 lp = skip_spaces (cp);
1910 cp = skip_non_spaces (lp);
1912 *cp = '\0';
1914 if (*lp)
1916 lang = get_language_from_interpreter (lp);
1917 if (lang != NULL && lang->function != NULL)
1919 curfdp->lang = lang;
1920 parser = lang->function;
1925 reset_input (inf);
1927 /* Else try to guess the language given the case insensitive file name. */
1928 if (parser == NULL)
1930 lang = get_language_from_filename (curfdp->infname, false);
1931 if (lang != NULL && lang->function != NULL)
1933 curfdp->lang = lang;
1934 parser = lang->function;
1938 /* Else try Fortran or C. */
1939 if (parser == NULL)
1941 node *old_last_node = last_node;
1943 curfdp->lang = get_language_from_langname ("fortran");
1944 find_entries (inf);
1946 if (old_last_node == last_node)
1947 /* No Fortran entries found. Try C. */
1949 reset_input (inf);
1950 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1951 find_entries (inf);
1953 return;
1956 if (!no_line_directive
1957 && curfdp->lang != NULL && curfdp->lang->metasource)
1958 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1959 file, or anyway we parsed a file that is automatically generated from
1960 this one. If this is the case, the bingo.c file contained #line
1961 directives that generated tags pointing to this file. Let's delete
1962 them all before parsing this file, which is the real source. */
1964 fdesc **fdpp = &fdhead;
1965 while (*fdpp != NULL)
1966 if (*fdpp != curfdp
1967 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1968 /* We found one of those! We must delete both the file description
1969 and all tags referring to it. */
1971 fdesc *badfdp = *fdpp;
1973 /* Delete the tags referring to badfdp->taggedfname
1974 that were obtained from badfdp->infname. */
1975 invalidate_nodes (badfdp, &nodehead);
1977 *fdpp = badfdp->next; /* remove the bad description from the list */
1978 free_fdesc (badfdp);
1980 else
1981 fdpp = &(*fdpp)->next; /* advance the list pointer */
1984 assert (parser != NULL);
1986 /* Generic initializations before reading from file. */
1987 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1989 /* Generic initializations before parsing file with readline. */
1990 lineno = 0; /* reset global line number */
1991 charno = 0; /* reset global char number */
1992 linecharno = 0; /* reset global char number of line start */
1994 parser (inf);
1996 regex_tag_multiline ();
2001 * Check whether an implicitly named tag should be created,
2002 * then call `pfnote'.
2003 * NAME is a string that is internally copied by this function.
2005 * TAGS format specification
2006 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2007 * The following is explained in some more detail in etc/ETAGS.EBNF.
2009 * make_tag creates tags with "implicit tag names" (unnamed tags)
2010 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2011 * 1. NAME does not contain any of the characters in NONAM;
2012 * 2. LINESTART contains name as either a rightmost, or rightmost but
2013 * one character, substring;
2014 * 3. the character, if any, immediately before NAME in LINESTART must
2015 * be a character in NONAM;
2016 * 4. the character, if any, immediately after NAME in LINESTART must
2017 * also be a character in NONAM.
2019 * The implementation uses the notinname() macro, which recognizes the
2020 * characters stored in the string `nonam'.
2021 * etags.el needs to use the same characters that are in NONAM.
2023 static void
2024 make_tag (const char *name, /* tag name, or NULL if unnamed */
2025 ptrdiff_t namelen, /* tag length */
2026 bool is_func, /* tag is a function */
2027 char *linestart, /* start of the line where tag is */
2028 ptrdiff_t linelen, /* length of the line where tag is */
2029 intmax_t lno, /* line number */
2030 intmax_t cno) /* character number */
2032 bool named = (name != NULL && namelen > 0);
2033 char *nname = NULL;
2035 if (debug)
2036 fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n",
2037 named ? name : "(unnamed)", curfdp->taggedfname, lno, linestart);
2039 if (!CTAGS && named) /* maybe set named to false */
2040 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2041 such that etags.el can guess a name from it. */
2043 ptrdiff_t i;
2044 const char *cp = name;
2046 for (i = 0; i < namelen; i++)
2047 if (notinname (*cp++))
2048 break;
2049 if (i == namelen) /* rule #1 */
2051 cp = linestart + linelen - namelen;
2052 if (notinname (linestart[linelen-1]))
2053 cp -= 1; /* rule #4 */
2054 if (cp >= linestart /* rule #2 */
2055 && (cp == linestart
2056 || notinname (cp[-1])) /* rule #3 */
2057 && strneq (name, cp, namelen)) /* rule #2 */
2058 named = false; /* use implicit tag name */
2062 if (named)
2063 nname = savenstr (name, namelen);
2065 pfnote (nname, is_func, linestart, linelen, lno, cno);
2068 /* Record a tag. */
2069 static void
2070 pfnote (char *name, /* tag name, or NULL if unnamed */
2071 bool is_func, /* tag is a function */
2072 char *linestart, /* start of the line where tag is */
2073 ptrdiff_t linelen, /* length of the line where tag is */
2074 intmax_t lno, /* line number */
2075 intmax_t cno) /* character number */
2078 register node *np;
2080 if ((CTAGS && name == NULL)
2081 /* We used to have an assertion here for the case below, but if we hit
2082 that case, it just means our parser got confused, and there's nothing
2083 to do about such empty "tags". */
2084 || (!CTAGS && name && name[0] == '\0'))
2085 return;
2087 np = xmalloc (sizeof *np);
2089 /* If ctags mode, change name "main" to M<thisfilename>. */
2090 if (CTAGS && !cxref_style && streq (name, "main"))
2092 char *fp = strrchr (curfdp->taggedfname, '/');
2093 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2094 fp = strrchr (np->name, '.');
2095 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2096 fp[0] = '\0';
2098 else
2099 np->name = name;
2100 np->valid = true;
2101 np->been_warned = false;
2102 np->fdp = curfdp;
2103 np->is_func = is_func;
2104 np->lno = lno;
2105 if (np->fdp->usecharno)
2106 /* Our char numbers are 0-base, because of C language tradition?
2107 ctags compatibility? old versions compatibility? I don't know.
2108 Anyway, since emacs's are 1-base we expect etags.el to take care
2109 of the difference. If we wanted to have 1-based numbers, we would
2110 uncomment the +1 below. */
2111 np->cno = cno /* + 1 */ ;
2112 else
2113 np->cno = invalidcharno;
2114 np->left = np->right = NULL;
2115 if (CTAGS && !cxref_style)
2117 if (strnlen (linestart, 50) < 50)
2118 np->regex = concat (linestart, "$", "");
2119 else
2120 np->regex = savenstr (linestart, 50);
2122 else
2123 np->regex = savenstr (linestart, linelen);
2125 add_node (np, &nodehead);
2129 * Utility functions and data to avoid recursion.
2132 typedef struct stack_entry {
2133 node *np;
2134 struct stack_entry *next;
2135 } stkentry;
2137 static void
2138 push_node (node *np, stkentry **stack_top)
2140 if (np)
2142 stkentry *new = xmalloc (sizeof *new);
2144 new->np = np;
2145 new->next = *stack_top;
2146 *stack_top = new;
2150 static node *
2151 pop_node (stkentry **stack_top)
2153 node *ret = NULL;
2155 if (*stack_top)
2157 stkentry *old_start = *stack_top;
2159 ret = (*stack_top)->np;
2160 *stack_top = (*stack_top)->next;
2161 free (old_start);
2163 return ret;
2167 * free_tree ()
2168 * emulate recursion on left children, iterate on right children.
2170 static void
2171 free_tree (register node *np)
2173 stkentry *stack = NULL;
2175 while (np)
2177 /* Descent on left children. */
2178 while (np->left)
2180 push_node (np, &stack);
2181 np = np->left;
2183 /* Free node without left children. */
2184 node *node_right = np->right;
2185 free (np->name);
2186 free (np->regex);
2187 free (np);
2188 if (!node_right)
2190 /* Backtrack to find a node with right children, while freeing nodes
2191 that don't have right children. */
2192 while (node_right == NULL && (np = pop_node (&stack)) != NULL)
2194 node_right = np->right;
2195 free (np->name);
2196 free (np->regex);
2197 free (np);
2200 /* Free right children. */
2201 np = node_right;
2206 * free_fdesc ()
2207 * delete a file description
2209 static void
2210 free_fdesc (register fdesc *fdp)
2212 free (fdp->infname);
2213 free (fdp->infabsname);
2214 free (fdp->infabsdir);
2215 free (fdp->taggedfname);
2216 free (fdp->prop);
2217 free (fdp);
2221 * add_node ()
2222 * Adds a node to the tree of nodes. In etags mode, sort by file
2223 * name. In ctags mode, sort by tag name. Make no attempt at
2224 * balancing.
2226 * add_node is the only function allowed to add nodes, so it can
2227 * maintain state.
2229 static void
2230 add_node (node *np, node **cur_node_p)
2232 node *cur_node = *cur_node_p;
2234 /* Make the first node. */
2235 if (cur_node == NULL)
2237 *cur_node_p = np;
2238 last_node = np;
2239 return;
2242 if (!CTAGS)
2243 /* Etags Mode */
2245 /* For each file name, tags are in a linked sublist on the right
2246 pointer. The first tags of different files are a linked list
2247 on the left pointer. last_node points to the end of the last
2248 used sublist. */
2249 if (last_node != NULL && last_node->fdp == np->fdp)
2251 /* Let's use the same sublist as the last added node. */
2252 assert (last_node->right == NULL);
2253 last_node->right = np;
2254 last_node = np;
2256 else
2258 while (cur_node->fdp != np->fdp)
2260 if (cur_node->left == NULL)
2261 break;
2262 /* The head of this sublist is not good for us. Let's try the
2263 next one. */
2264 cur_node = cur_node->left;
2266 if (cur_node->left)
2268 /* Scanning the list we found the head of a sublist which is
2269 good for us. Let's scan this sublist. */
2270 if (cur_node->right)
2272 cur_node = cur_node->right;
2273 while (cur_node->right)
2274 cur_node = cur_node->right;
2276 /* Make a new node in this sublist. */
2277 cur_node->right = np;
2279 else
2281 /* Make a new sublist. */
2282 cur_node->left = np;
2284 last_node = np;
2286 } /* if ETAGS mode */
2287 else
2289 /* Ctags Mode */
2290 node **next_node = &cur_node;
2292 while ((cur_node = *next_node) != NULL)
2294 int dif = strcmp (np->name, cur_node->name);
2296 * If this tag name matches an existing one, then
2297 * do not add the node, but maybe print a warning.
2299 if (!dif && no_duplicates)
2301 if (np->fdp == cur_node->fdp)
2303 if (!no_warnings)
2305 fprintf (stderr,
2306 ("Duplicate entry in file %s, "
2307 "line %"PRIdMAX": %s\n"),
2308 np->fdp->infname, lineno, np->name);
2309 fprintf (stderr, "Second entry ignored\n");
2312 else if (!cur_node->been_warned && !no_warnings)
2314 fprintf
2315 (stderr,
2316 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2317 np->fdp->infname, cur_node->fdp->infname, np->name);
2318 cur_node->been_warned = true;
2320 return;
2322 else
2323 next_node = dif < 0 ? &cur_node->left : &cur_node->right;
2325 *next_node = np;
2326 last_node = np;
2327 } /* if CTAGS mode */
2331 * invalidate_nodes ()
2332 * Scan the node tree and invalidate all nodes pointing to the
2333 * given file description (CTAGS case) or free them (ETAGS case).
2335 static void
2336 invalidate_nodes (fdesc *badfdp, node **npp)
2338 node *np = *npp;
2339 stkentry *stack = NULL;
2341 if (CTAGS)
2343 while (np)
2345 /* Push all the left children on the stack. */
2346 while (np->left != NULL)
2348 push_node (np, &stack);
2349 np = np->left;
2351 /* Invalidate this node. */
2352 if (np->fdp == badfdp)
2353 np->valid = false;
2354 if (!np->right)
2356 /* Pop nodes from stack, invalidating them, until we find one
2357 with a right child. */
2358 while ((np = pop_node (&stack)) != NULL)
2360 if (np->fdp == badfdp)
2361 np->valid = false;
2362 if (np->right != NULL)
2363 break;
2366 /* Process the right child, if any. */
2367 if (np)
2368 np = np->right;
2371 else
2373 node super_root, *np_parent = NULL;
2375 super_root.left = np;
2376 super_root.fdp = (fdesc *) -1;
2377 np = &super_root;
2379 while (np)
2381 /* Descent on left children until node with BADFP. */
2382 while (np && np->fdp != badfdp)
2384 assert (np->fdp != NULL);
2385 np_parent = np;
2386 np = np->left;
2388 if (np)
2390 np_parent->left = np->left; /* detach subtree from the tree */
2391 np->left = NULL; /* isolate it */
2392 free_tree (np); /* free it */
2394 /* Continue with rest of tree. */
2395 np = np_parent->left;
2398 *npp = super_root.left;
2403 static ptrdiff_t total_size_of_entries (node *);
2404 static int number_len (intmax_t) ATTRIBUTE_CONST;
2406 /* Length of a non-negative number's decimal representation. */
2407 static int
2408 number_len (intmax_t num)
2410 int len = 1;
2411 while ((num /= 10) > 0)
2412 len += 1;
2413 return len;
2417 * Return total number of characters that put_entries will output for
2418 * the nodes in the linked list at the right of the specified node.
2419 * This count is irrelevant with etags.el since emacs 19.34 at least,
2420 * but is still supplied for backward compatibility.
2422 static ptrdiff_t
2423 total_size_of_entries (node *np)
2425 ptrdiff_t total = 0;
2427 for (; np != NULL; np = np->right)
2428 if (np->valid)
2430 total += strlen (np->regex) + 1; /* pat\177 */
2431 if (np->name != NULL)
2432 total += strlen (np->name) + 1; /* name\001 */
2433 total += number_len (np->lno) + 1; /* lno, */
2434 if (np->cno != invalidcharno) /* cno */
2435 total += number_len (np->cno);
2436 total += 1; /* newline */
2439 return total;
2442 static void
2443 put_entry (node *np)
2445 register char *sp;
2446 static fdesc *fdp = NULL;
2448 /* Output this entry */
2449 if (np->valid)
2451 if (!CTAGS)
2453 /* Etags mode */
2454 if (fdp != np->fdp)
2456 fdp = np->fdp;
2457 fprintf (tagf, "\f\n%s,%"PRIdPTR"\n",
2458 fdp->taggedfname, total_size_of_entries (np));
2459 fdp->written = true;
2461 fputs (np->regex, tagf);
2462 fputc ('\177', tagf);
2463 if (np->name != NULL)
2465 fputs (np->name, tagf);
2466 fputc ('\001', tagf);
2468 fprintf (tagf, "%"PRIdMAX",", np->lno);
2469 if (np->cno != invalidcharno)
2470 fprintf (tagf, "%"PRIdMAX, np->cno);
2471 fputs ("\n", tagf);
2473 else
2475 /* Ctags mode */
2476 if (np->name == NULL)
2477 error ("internal error: NULL name in ctags mode.");
2479 if (cxref_style)
2481 if (vgrind_style)
2482 fprintf (stdout, "%s %s %"PRIdMAX"\n",
2483 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2484 else
2485 fprintf (stdout, "%-16s %3"PRIdMAX" %-16s %s\n",
2486 np->name, np->lno, np->fdp->taggedfname, np->regex);
2488 else
2490 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2492 if (np->is_func)
2493 { /* function or #define macro with args */
2494 putc (searchar, tagf);
2495 putc ('^', tagf);
2497 for (sp = np->regex; *sp; sp++)
2499 if (*sp == '\\' || *sp == searchar)
2500 putc ('\\', tagf);
2501 putc (*sp, tagf);
2503 putc (searchar, tagf);
2505 else
2506 { /* anything else; text pattern inadequate */
2507 fprintf (tagf, "%"PRIdMAX, np->lno);
2509 putc ('\n', tagf);
2512 } /* if this node contains a valid tag */
2515 static void
2516 put_entries (node *np)
2518 stkentry *stack = NULL;
2520 if (np == NULL)
2521 return;
2523 if (CTAGS)
2525 while (np)
2527 /* Stack subentries that precede this one. */
2528 while (np->left)
2530 push_node (np, &stack);
2531 np = np->left;
2533 /* Output this subentry. */
2534 put_entry (np);
2535 /* Stack subentries that follow this one. */
2536 while (!np->right)
2538 /* Output subentries that precede the next one. */
2539 np = pop_node (&stack);
2540 if (!np)
2541 break;
2542 put_entry (np);
2544 if (np)
2545 np = np->right;
2548 else
2550 push_node (np, &stack);
2551 while ((np = pop_node (&stack)) != NULL)
2553 /* Output this subentry. */
2554 put_entry (np);
2555 while (np->right)
2557 /* Output subentries that follow this one. */
2558 put_entry (np->right);
2559 /* Stack subentries from the following files. */
2560 push_node (np->left, &stack);
2561 np = np->right;
2563 push_node (np->left, &stack);
2569 /* C extensions. */
2570 #define C_EXT 0x00fff /* C extensions */
2571 #define C_PLAIN 0x00000 /* C */
2572 #define C_PLPL 0x00001 /* C++ */
2573 #define C_STAR 0x00003 /* C* */
2574 #define C_JAVA 0x00005 /* JAVA */
2575 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2576 #define YACC 0x10000 /* yacc file */
2579 * The C symbol tables.
2581 enum sym_type
2583 st_none,
2584 st_C_objprot, st_C_objimpl, st_C_objend,
2585 st_C_gnumacro,
2586 st_C_ignore, st_C_attribute, st_C_enum_bf,
2587 st_C_javastruct,
2588 st_C_operator,
2589 st_C_class, st_C_template,
2590 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2593 /* Feed stuff between (but not including) %[ and %] lines to:
2594 gperf -m 5
2596 %compare-strncmp
2597 %enum
2598 %struct-type
2599 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2601 if, 0, st_C_ignore
2602 for, 0, st_C_ignore
2603 while, 0, st_C_ignore
2604 switch, 0, st_C_ignore
2605 return, 0, st_C_ignore
2606 __attribute__, 0, st_C_attribute
2607 GTY, 0, st_C_attribute
2608 @interface, 0, st_C_objprot
2609 @protocol, 0, st_C_objprot
2610 @implementation,0, st_C_objimpl
2611 @end, 0, st_C_objend
2612 import, (C_JAVA & ~C_PLPL), st_C_ignore
2613 package, (C_JAVA & ~C_PLPL), st_C_ignore
2614 friend, C_PLPL, st_C_ignore
2615 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2616 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2617 interface, (C_JAVA & ~C_PLPL), st_C_struct
2618 class, 0, st_C_class
2619 namespace, C_PLPL, st_C_struct
2620 domain, C_STAR, st_C_struct
2621 union, 0, st_C_struct
2622 struct, 0, st_C_struct
2623 extern, 0, st_C_extern
2624 enum, 0, st_C_enum
2625 typedef, 0, st_C_typedef
2626 define, 0, st_C_define
2627 undef, 0, st_C_define
2628 operator, C_PLPL, st_C_operator
2629 template, 0, st_C_template
2630 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2631 DEFUN, 0, st_C_gnumacro
2632 SYSCALL, 0, st_C_gnumacro
2633 ENTRY, 0, st_C_gnumacro
2634 PSEUDO, 0, st_C_gnumacro
2635 ENUM_BF, 0, st_C_enum_bf
2636 # These are defined inside C functions, so currently they are not met.
2637 # EXFUN used in glibc, DEFVAR_* in emacs.
2638 #EXFUN, 0, st_C_gnumacro
2639 #DEFVAR_, 0, st_C_gnumacro
2641 and replace lines between %< and %> with its output, then:
2642 - remove the #if characterset check
2643 - remove any #line directives
2644 - make in_word_set static and not inline
2645 - remove any 'register' qualifications from variable decls. */
2646 /*%<*/
2647 /* C code produced by gperf version 3.0.1 */
2648 /* Command-line: gperf -m 5 */
2649 /* Computed positions: -k'2-3' */
2651 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2652 /* maximum key range = 34, duplicates = 0 */
2654 static int
2655 hash (const char *str, int len)
2657 static char const asso_values[] =
2659 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2660 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2661 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2662 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2663 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2664 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2665 36, 36, 36, 36, 36, 36, 36, 36, 36, 3,
2666 27, 36, 36, 36, 36, 36, 36, 36, 26, 36,
2667 36, 36, 36, 25, 0, 0, 36, 36, 36, 0,
2668 36, 36, 36, 36, 36, 1, 36, 16, 36, 6,
2669 23, 0, 0, 36, 22, 0, 36, 36, 5, 0,
2670 0, 15, 1, 36, 6, 36, 8, 19, 36, 16,
2671 4, 5, 36, 36, 36, 36, 36, 36, 36, 36,
2672 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2673 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2674 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2675 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2676 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2677 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2678 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2679 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2680 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2681 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2682 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2683 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2684 36, 36, 36, 36, 36, 36
2686 int hval = len;
2688 switch (hval)
2690 default:
2691 hval += asso_values[(unsigned char) str[2]];
2692 FALLTHROUGH;
2693 case 2:
2694 hval += asso_values[(unsigned char) str[1]];
2695 break;
2697 return hval;
2700 static struct C_stab_entry *
2701 in_word_set (const char *str, ptrdiff_t len)
2703 enum
2705 TOTAL_KEYWORDS = 34,
2706 MIN_WORD_LENGTH = 2,
2707 MAX_WORD_LENGTH = 15,
2708 MIN_HASH_VALUE = 2,
2709 MAX_HASH_VALUE = 35
2712 static struct C_stab_entry wordlist[] =
2714 {""}, {""},
2715 {"if", 0, st_C_ignore},
2716 {"GTY", 0, st_C_attribute},
2717 {"@end", 0, st_C_objend},
2718 {"union", 0, st_C_struct},
2719 {"define", 0, st_C_define},
2720 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2721 {"template", 0, st_C_template},
2722 {"operator", C_PLPL, st_C_operator},
2723 {"@interface", 0, st_C_objprot},
2724 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2725 {"friend", C_PLPL, st_C_ignore},
2726 {"typedef", 0, st_C_typedef},
2727 {"return", 0, st_C_ignore},
2728 {"@implementation",0, st_C_objimpl},
2729 {"@protocol", 0, st_C_objprot},
2730 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2731 {"extern", 0, st_C_extern},
2732 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2733 {"struct", 0, st_C_struct},
2734 {"domain", C_STAR, st_C_struct},
2735 {"switch", 0, st_C_ignore},
2736 {"enum", 0, st_C_enum},
2737 {"for", 0, st_C_ignore},
2738 {"namespace", C_PLPL, st_C_struct},
2739 {"class", 0, st_C_class},
2740 {"while", 0, st_C_ignore},
2741 {"undef", 0, st_C_define},
2742 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2743 {"__attribute__", 0, st_C_attribute},
2744 {"ENTRY", 0, st_C_gnumacro},
2745 {"SYSCALL", 0, st_C_gnumacro},
2746 {"ENUM_BF", 0, st_C_enum_bf},
2747 {"PSEUDO", 0, st_C_gnumacro},
2748 {"DEFUN", 0, st_C_gnumacro}
2751 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2753 int key = hash (str, len);
2755 if (key <= MAX_HASH_VALUE && key >= 0)
2757 const char *s = wordlist[key].name;
2759 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2760 return &wordlist[key];
2763 return 0;
2765 /*%>*/
2767 static enum sym_type
2768 C_symtype (char *str, ptrdiff_t len, int c_ext)
2770 struct C_stab_entry *se = in_word_set (str, len);
2772 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2773 return st_none;
2774 return se->type;
2779 * Ignoring __attribute__ ((list))
2781 static bool inattribute; /* looking at an __attribute__ construct */
2783 /* Ignoring ENUM_BF (type)
2786 static bool in_enum_bf; /* inside parentheses following ENUM_BF */
2789 * C functions and variables are recognized using a simple
2790 * finite automaton. fvdef is its state variable.
2792 static enum
2794 fvnone, /* nothing seen */
2795 fdefunkey, /* Emacs DEFUN keyword seen */
2796 fdefunname, /* Emacs DEFUN name seen */
2797 foperator, /* func: operator keyword seen (cplpl) */
2798 fvnameseen, /* function or variable name seen */
2799 fstartlist, /* func: just after open parenthesis */
2800 finlist, /* func: in parameter list */
2801 flistseen, /* func: after parameter list */
2802 fignore, /* func: before open brace */
2803 vignore /* var-like: ignore until ';' */
2804 } fvdef;
2806 static bool fvextern; /* func or var: extern keyword seen; */
2809 * typedefs are recognized using a simple finite automaton.
2810 * typdef is its state variable.
2812 static enum
2814 tnone, /* nothing seen */
2815 tkeyseen, /* typedef keyword seen */
2816 ttypeseen, /* defined type seen */
2817 tinbody, /* inside typedef body */
2818 tend, /* just before typedef tag */
2819 tignore /* junk after typedef tag */
2820 } typdef;
2823 * struct-like structures (enum, struct and union) are recognized
2824 * using another simple finite automaton. `structdef' is its state
2825 * variable.
2827 static enum
2829 snone, /* nothing seen yet,
2830 or in struct body if bracelev > 0 */
2831 skeyseen, /* struct-like keyword seen */
2832 stagseen, /* struct-like tag seen */
2833 scolonseen /* colon seen after struct-like tag */
2834 } structdef;
2837 * When objdef is different from onone, objtag is the name of the class.
2839 static const char *objtag = "<uninited>";
2842 * Yet another little state machine to deal with preprocessor lines.
2844 static enum
2846 dnone, /* nothing seen */
2847 dsharpseen, /* '#' seen as first char on line */
2848 ddefineseen, /* '#' and 'define' seen */
2849 dignorerest /* ignore rest of line */
2850 } definedef;
2853 * State machine for Objective C protocols and implementations.
2854 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2856 static enum
2858 onone, /* nothing seen */
2859 oprotocol, /* @interface or @protocol seen */
2860 oimplementation, /* @implementations seen */
2861 otagseen, /* class name seen */
2862 oparenseen, /* parenthesis before category seen */
2863 ocatseen, /* category name seen */
2864 oinbody, /* in @implementation body */
2865 omethodsign, /* in @implementation body, after +/- */
2866 omethodtag, /* after method name */
2867 omethodcolon, /* after method colon */
2868 omethodparm, /* after method parameter */
2869 oignore /* wait for @end */
2870 } objdef;
2874 * Use this structure to keep info about the token read, and how it
2875 * should be tagged. Used by the make_C_tag function to build a tag.
2877 static struct tok
2879 char *line; /* string containing the token */
2880 ptrdiff_t offset; /* where the token starts in LINE */
2881 ptrdiff_t length; /* token length */
2883 The previous members can be used to pass strings around for generic
2884 purposes. The following ones specifically refer to creating tags. In this
2885 case the token contained here is the pattern that will be used to create a
2886 tag.
2888 bool valid; /* do not create a tag; the token should be
2889 invalidated whenever a state machine is
2890 reset prematurely */
2891 bool named; /* create a named tag */
2892 intmax_t lineno; /* source line number of tag */
2893 intmax_t linepos; /* source char number of tag */
2894 } token; /* latest token read */
2897 * Variables and functions for dealing with nested structures.
2898 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2900 static void pushclass_above (ptrdiff_t, char *, ptrdiff_t);
2901 static void popclass_above (ptrdiff_t);
2902 static void write_classname (linebuffer *, const char *qualifier);
2904 static struct {
2905 char **cname; /* nested class names */
2906 ptrdiff_t *bracelev; /* nested class brace level */
2907 ptrdiff_t nl; /* class nesting level (elements used) */
2908 ptrdiff_t size; /* length of the array */
2909 } cstack; /* stack for nested declaration tags */
2910 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2911 #define nestlev (cstack.nl)
2912 /* After struct keyword or in struct body, not inside a nested function. */
2913 #define instruct (structdef == snone && nestlev > 0 \
2914 && bracelev == cstack.bracelev[nestlev-1] + 1)
2916 static void
2917 pushclass_above (ptrdiff_t bracelev, char *str, ptrdiff_t len)
2919 ptrdiff_t nl;
2921 popclass_above (bracelev);
2922 nl = cstack.nl;
2923 if (nl >= cstack.size)
2925 xrnew (cstack.cname, cstack.size, 2);
2926 xrnew (cstack.bracelev, cstack.size, 2);
2927 cstack.size *= 2;
2929 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2930 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2931 cstack.bracelev[nl] = bracelev;
2932 cstack.nl = nl + 1;
2935 static void
2936 popclass_above (ptrdiff_t bracelev)
2938 for (ptrdiff_t nl = cstack.nl - 1;
2939 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2940 nl--)
2942 free (cstack.cname[nl]);
2943 cstack.nl = nl;
2947 static void
2948 write_classname (linebuffer *cn, const char *qualifier)
2950 ptrdiff_t len;
2952 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2954 len = 0;
2955 cn->len = 0;
2956 cn->buffer[0] = '\0';
2958 else
2960 len = strlen (cstack.cname[0]);
2961 linebuffer_setlen (cn, len);
2962 strcpy (cn->buffer, cstack.cname[0]);
2964 for (ptrdiff_t i = 1; i < cstack.nl; i++)
2966 char *s = cstack.cname[i];
2967 if (s == NULL)
2968 continue;
2969 int qlen = strlen (qualifier);
2970 ptrdiff_t slen = strlen (s);
2971 linebuffer_setlen (cn, len + qlen + slen);
2972 memcpyz (stpcpy (cn->buffer + len, qualifier), s, slen);
2973 len += qlen + slen;
2978 static bool consider_token (char *, ptrdiff_t, int, int *,
2979 ptrdiff_t, ptrdiff_t, bool *);
2980 static void make_C_tag (bool);
2983 * consider_token ()
2984 * checks to see if the current token is at the start of a
2985 * function or variable, or corresponds to a typedef, or
2986 * is a struct/union/enum tag, or #define, or an enum constant.
2988 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2989 * with args. C_EXTP points to which language we are looking at.
2991 * Globals
2992 * fvdef IN OUT
2993 * structdef IN OUT
2994 * definedef IN OUT
2995 * typdef IN OUT
2996 * objdef IN OUT
2999 static bool
3000 consider_token (char *str, /* IN: token pointer */
3001 ptrdiff_t len, /* IN: token length */
3002 int c, /* IN: first char after the token */
3003 int *c_extp, /* IN, OUT: C extensions mask */
3004 ptrdiff_t bracelev, /* IN: brace level */
3005 ptrdiff_t parlev, /* IN: parenthesis level */
3006 bool *is_func_or_var) /* OUT: function or variable found */
3008 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
3009 structtype is the type of the preceding struct-like keyword, and
3010 structbracelev is the brace level where it has been seen. */
3011 static enum sym_type structtype;
3012 static ptrdiff_t structbracelev;
3013 static enum sym_type toktype;
3016 toktype = C_symtype (str, len, *c_extp);
3019 * Skip __attribute__
3021 if (toktype == st_C_attribute)
3023 inattribute = true;
3024 return false;
3028 * Skip ENUM_BF
3030 if (toktype == st_C_enum_bf && definedef == dnone)
3032 in_enum_bf = true;
3033 return false;
3037 * Advance the definedef state machine.
3039 switch (definedef)
3041 case dnone:
3042 /* We're not on a preprocessor line. */
3043 if (toktype == st_C_gnumacro)
3045 fvdef = fdefunkey;
3046 return false;
3048 break;
3049 case dsharpseen:
3050 if (toktype == st_C_define)
3052 definedef = ddefineseen;
3054 else
3056 definedef = dignorerest;
3058 return false;
3059 case ddefineseen:
3061 * Make a tag for any macro, unless it is a constant
3062 * and constantypedefs is false.
3064 definedef = dignorerest;
3065 *is_func_or_var = (c == '(');
3066 if (!*is_func_or_var && !constantypedefs)
3067 return false;
3068 else
3069 return true;
3070 case dignorerest:
3071 return false;
3072 default:
3073 error ("internal error: definedef value.");
3077 * Now typedefs
3079 switch (typdef)
3081 case tnone:
3082 if (toktype == st_C_typedef)
3084 if (typedefs)
3085 typdef = tkeyseen;
3086 fvextern = false;
3087 fvdef = fvnone;
3088 return false;
3090 break;
3091 case tkeyseen:
3092 switch (toktype)
3094 case st_none:
3095 case st_C_class:
3096 case st_C_struct:
3097 case st_C_enum:
3098 typdef = ttypeseen;
3099 break;
3100 default:
3101 break;
3103 break;
3104 case ttypeseen:
3105 if (structdef == snone && fvdef == fvnone)
3107 fvdef = fvnameseen;
3108 return true;
3110 break;
3111 case tend:
3112 switch (toktype)
3114 case st_C_class:
3115 case st_C_struct:
3116 case st_C_enum:
3117 return false;
3118 default:
3119 return true;
3121 default:
3122 break;
3125 switch (toktype)
3127 case st_C_javastruct:
3128 if (structdef == stagseen)
3129 structdef = scolonseen;
3130 return false;
3131 case st_C_template:
3132 case st_C_class:
3133 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
3134 && bracelev == 0
3135 && definedef == dnone && structdef == snone
3136 && typdef == tnone && fvdef == fvnone)
3137 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3138 if (toktype == st_C_template)
3139 break;
3140 FALLTHROUGH;
3141 case st_C_struct:
3142 case st_C_enum:
3143 if (parlev == 0
3144 && fvdef != vignore
3145 && (typdef == tkeyseen
3146 || (typedefs_or_cplusplus && structdef == snone)))
3148 structdef = skeyseen;
3149 structtype = toktype;
3150 structbracelev = bracelev;
3151 if (fvdef == fvnameseen)
3152 fvdef = fvnone;
3154 return false;
3155 default:
3156 break;
3159 if (structdef == skeyseen)
3161 structdef = stagseen;
3162 return true;
3165 if (typdef != tnone)
3166 definedef = dnone;
3168 /* Detect Objective C constructs. */
3169 switch (objdef)
3171 case onone:
3172 switch (toktype)
3174 case st_C_objprot:
3175 objdef = oprotocol;
3176 return false;
3177 case st_C_objimpl:
3178 objdef = oimplementation;
3179 return false;
3180 default:
3181 break;
3183 break;
3184 case oimplementation:
3185 /* Save the class tag for functions or variables defined inside. */
3186 objtag = savenstr (str, len);
3187 objdef = oinbody;
3188 return false;
3189 case oprotocol:
3190 /* Save the class tag for categories. */
3191 objtag = savenstr (str, len);
3192 objdef = otagseen;
3193 *is_func_or_var = true;
3194 return true;
3195 case oparenseen:
3196 objdef = ocatseen;
3197 *is_func_or_var = true;
3198 return true;
3199 case oinbody:
3200 break;
3201 case omethodsign:
3202 if (parlev == 0)
3204 fvdef = fvnone;
3205 objdef = omethodtag;
3206 linebuffer_setlen (&token_name, len);
3207 memcpyz (token_name.buffer, str, len);
3208 return true;
3210 return false;
3211 case omethodcolon:
3212 if (parlev == 0)
3213 objdef = omethodparm;
3214 return false;
3215 case omethodparm:
3216 if (parlev == 0)
3218 objdef = omethodtag;
3219 if (class_qualify)
3221 ptrdiff_t oldlen = token_name.len;
3222 fvdef = fvnone;
3223 linebuffer_setlen (&token_name, oldlen + len);
3224 memcpyz (token_name.buffer + oldlen, str, len);
3226 return true;
3228 return false;
3229 case oignore:
3230 if (toktype == st_C_objend)
3232 /* Memory leakage here: the string pointed by objtag is
3233 never released, because many tests would be needed to
3234 avoid breaking on incorrect input code. The amount of
3235 memory leaked here is the sum of the lengths of the
3236 class tags.
3237 free (objtag); */
3238 objdef = onone;
3240 return false;
3241 default:
3242 break;
3245 /* A function, variable or enum constant? */
3246 switch (toktype)
3248 case st_C_extern:
3249 fvextern = true;
3250 switch (fvdef)
3252 case finlist:
3253 case flistseen:
3254 case fignore:
3255 case vignore:
3256 break;
3257 default:
3258 fvdef = fvnone;
3260 return false;
3261 case st_C_ignore:
3262 fvextern = false;
3263 fvdef = vignore;
3264 return false;
3265 case st_C_operator:
3266 fvdef = foperator;
3267 *is_func_or_var = true;
3268 return true;
3269 case st_none:
3270 if (constantypedefs
3271 && structdef == snone
3272 && structtype == st_C_enum && bracelev > structbracelev
3273 /* Don't tag tokens in expressions that assign values to enum
3274 constants. */
3275 && fvdef != vignore)
3276 return true; /* enum constant */
3277 switch (fvdef)
3279 case fdefunkey:
3280 if (bracelev > 0)
3281 break;
3282 fvdef = fdefunname; /* GNU macro */
3283 *is_func_or_var = true;
3284 return true;
3285 case fvnone:
3286 switch (typdef)
3288 case ttypeseen:
3289 return false;
3290 case tnone:
3291 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3292 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3294 fvdef = vignore;
3295 return false;
3297 break;
3298 default:
3299 break;
3301 FALLTHROUGH;
3302 case fvnameseen:
3303 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3305 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3306 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3307 fvdef = foperator;
3308 *is_func_or_var = true;
3309 return true;
3311 if (bracelev > 0 && !instruct)
3312 break;
3313 fvdef = fvnameseen; /* function or variable */
3314 *is_func_or_var = true;
3315 return true;
3316 default:
3317 break;
3319 break;
3320 default:
3321 break;
3324 return false;
3329 * C_entries often keeps pointers to tokens or lines which are older than
3330 * the line currently read. By keeping two line buffers, and switching
3331 * them at end of line, it is possible to use those pointers.
3333 static struct
3335 intmax_t linepos;
3336 linebuffer lb;
3337 } lbs[2];
3339 #define current_lb_is_new (newndx == curndx)
3340 #define switch_line_buffers() (curndx = 1 - curndx)
3342 #define curlb (lbs[curndx].lb)
3343 #define newlb (lbs[newndx].lb)
3344 #define curlinepos (lbs[curndx].linepos)
3345 #define newlinepos (lbs[newndx].linepos)
3347 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3348 #define cplpl (c_ext & C_PLPL)
3349 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3351 #define CNL_SAVE_DEFINEDEF() \
3352 do { \
3353 curlinepos = charno; \
3354 readline (&curlb, inf); \
3355 lp = curlb.buffer; \
3356 quotednl = false; \
3357 newndx = curndx; \
3358 } while (0)
3360 #define CNL() \
3361 do { \
3362 CNL_SAVE_DEFINEDEF (); \
3363 if (savetoken.valid) \
3365 token = savetoken; \
3366 savetoken.valid = false; \
3368 definedef = dnone; \
3369 } while (0)
3372 static void
3373 make_C_tag (bool isfun)
3375 /* This function is never called when token.valid is false, but
3376 we must protect against invalid input or internal errors. */
3377 if (token.valid)
3378 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3379 token.offset+token.length+1, token.lineno, token.linepos);
3380 else if (DEBUG)
3381 { /* this branch is optimized away if !DEBUG */
3382 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3383 token_name.len + 17, isfun, token.line,
3384 token.offset+token.length+1, token.lineno, token.linepos);
3385 error ("INVALID TOKEN");
3388 token.valid = false;
3391 static bool
3392 perhaps_more_input (FILE *inf)
3394 return !feof (inf) && !ferror (inf);
3399 * C_entries ()
3400 * This routine finds functions, variables, typedefs,
3401 * #define's, enum constants and struct/union/enum definitions in
3402 * C syntax and adds them to the list.
3404 static void
3405 C_entries (int c_ext, /* extension of C */
3406 FILE *inf) /* input file */
3408 char c; /* latest char read; '\0' for end of line */
3409 char *lp; /* pointer one beyond the character `c' */
3410 bool curndx, newndx; /* indices for current and new lb */
3411 ptrdiff_t tokoff; /* offset in line of start of current token */
3412 ptrdiff_t toklen; /* length of current token */
3413 const char *qualifier; /* string used to qualify names */
3414 int qlen; /* length of qualifier */
3415 ptrdiff_t bracelev; /* current brace level */
3416 ptrdiff_t bracketlev; /* current bracket level */
3417 ptrdiff_t parlev; /* current parenthesis level */
3418 ptrdiff_t attrparlev; /* __attribute__ parenthesis level */
3419 ptrdiff_t templatelev; /* current template level */
3420 ptrdiff_t typdefbracelev; /* bracelev where a typedef struct body begun */
3421 bool incomm, inquote, inchar, quotednl, midtoken;
3422 bool yacc_rules; /* in the rules part of a yacc file */
3423 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3426 linebuffer_init (&lbs[0].lb);
3427 linebuffer_init (&lbs[1].lb);
3428 if (cstack.size == 0)
3430 cstack.size = (DEBUG) ? 1 : 4;
3431 cstack.nl = 0;
3432 cstack.cname = xnmalloc (cstack.size, sizeof *cstack.cname);
3433 cstack.bracelev = xnmalloc (cstack.size, sizeof *cstack.bracelev);
3436 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3437 curndx = newndx = 0;
3438 lp = curlb.buffer;
3439 *lp = 0;
3441 fvdef = fvnone; fvextern = false; typdef = tnone;
3442 structdef = snone; definedef = dnone; objdef = onone;
3443 yacc_rules = false;
3444 midtoken = inquote = inchar = incomm = quotednl = false;
3445 token.valid = savetoken.valid = false;
3446 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3447 if (cjava)
3448 { qualifier = "."; qlen = 1; }
3449 else
3450 { qualifier = "::"; qlen = 2; }
3453 while (perhaps_more_input (inf))
3455 c = *lp++;
3456 if (c == '\\')
3458 /* If we are at the end of the line, the next character is a
3459 '\0'; do not skip it, because it is what tells us
3460 to read the next line. */
3461 if (*lp == '\0')
3463 quotednl = true;
3464 continue;
3466 lp++;
3467 c = ' ';
3469 else if (incomm)
3471 switch (c)
3473 case '*':
3474 if (*lp == '/')
3476 c = *lp++;
3477 incomm = false;
3479 break;
3480 case '\0':
3481 /* Newlines inside comments do not end macro definitions in
3482 traditional cpp. */
3483 CNL_SAVE_DEFINEDEF ();
3484 break;
3486 continue;
3488 else if (inquote)
3490 switch (c)
3492 case '"':
3493 inquote = false;
3494 break;
3495 case '\0':
3496 /* Newlines inside strings do not end macro definitions
3497 in traditional cpp, even though compilers don't
3498 usually accept them. */
3499 CNL_SAVE_DEFINEDEF ();
3500 break;
3502 continue;
3504 else if (inchar)
3506 switch (c)
3508 case '\0':
3509 /* Hmmm, something went wrong. */
3510 CNL ();
3511 FALLTHROUGH;
3512 case '\'':
3513 inchar = false;
3514 break;
3516 continue;
3518 else switch (c)
3520 case '"':
3521 inquote = true;
3522 if (bracketlev > 0)
3523 continue;
3524 if (inattribute)
3525 break;
3526 switch (fvdef)
3528 case fdefunkey:
3529 case fstartlist:
3530 case finlist:
3531 case fignore:
3532 case vignore:
3533 break;
3534 default:
3535 fvextern = false;
3536 fvdef = fvnone;
3538 continue;
3539 case '\'':
3540 inchar = true;
3541 if (bracketlev > 0)
3542 continue;
3543 if (inattribute)
3544 break;
3545 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3547 fvextern = false;
3548 fvdef = fvnone;
3550 continue;
3551 case '/':
3552 if (*lp == '*')
3554 incomm = true;
3555 lp++;
3556 c = ' ';
3557 if (bracketlev > 0)
3558 continue;
3560 else if (/* cplpl && */ *lp == '/')
3562 c = '\0';
3564 break;
3565 case '%':
3566 if ((c_ext & YACC) && *lp == '%')
3568 /* Entering or exiting rules section in yacc file. */
3569 lp++;
3570 definedef = dnone; fvdef = fvnone; fvextern = false;
3571 typdef = tnone; structdef = snone;
3572 midtoken = inquote = inchar = incomm = quotednl = false;
3573 bracelev = 0;
3574 yacc_rules = !yacc_rules;
3575 continue;
3577 else
3578 break;
3579 case '#':
3580 if (definedef == dnone)
3582 char *cp;
3583 bool cpptoken = true;
3585 /* Look back on this line. If all blanks, or nonblanks
3586 followed by an end of comment, this is a preprocessor
3587 token. */
3588 for (cp = newlb.buffer; cp < lp-1; cp++)
3589 if (!c_isspace (*cp))
3591 if (*cp == '*' && cp[1] == '/')
3593 cp++;
3594 cpptoken = true;
3596 else
3597 cpptoken = false;
3599 if (cpptoken)
3601 definedef = dsharpseen;
3602 /* This is needed for tagging enum values: when there are
3603 preprocessor conditionals inside the enum, we need to
3604 reset the value of fvdef so that the next enum value is
3605 tagged even though the one before it did not end in a
3606 comma. */
3607 if (fvdef == vignore && instruct && parlev == 0)
3609 if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3610 fvdef = fvnone;
3613 } /* if (definedef == dnone) */
3614 continue;
3615 case '[':
3616 bracketlev++;
3617 continue;
3618 default:
3619 if (bracketlev > 0)
3621 if (c == ']')
3622 --bracketlev;
3623 else if (c == '\0')
3624 CNL_SAVE_DEFINEDEF ();
3625 continue;
3627 break;
3628 } /* switch (c) */
3631 /* Consider token only if some involved conditions are satisfied. */
3632 if (typdef != tignore
3633 && definedef != dignorerest
3634 && fvdef != finlist
3635 && templatelev == 0
3636 && (definedef != dnone
3637 || structdef != scolonseen)
3638 && !inattribute
3639 && !in_enum_bf)
3641 if (midtoken)
3643 if (endtoken (c))
3645 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3646 /* This handles :: in the middle,
3647 but not at the beginning of an identifier.
3648 Also, space-separated :: is not recognized. */
3650 if (c_ext & C_AUTO) /* automatic detection of C++ */
3651 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3652 lp += 2;
3653 toklen += 2;
3654 c = lp[-1];
3655 goto still_in_token;
3657 else
3659 bool funorvar = false;
3661 if (yacc_rules
3662 || consider_token (newlb.buffer + tokoff, toklen, c,
3663 &c_ext, bracelev, parlev,
3664 &funorvar))
3666 if (fvdef == foperator)
3668 char *oldlp = lp;
3669 lp = skip_spaces (lp-1);
3670 if (*lp != '\0')
3671 lp += 1;
3672 while (*lp != '\0'
3673 && !c_isspace (*lp) && *lp != '(')
3674 lp += 1;
3675 c = *lp++;
3676 toklen += lp - oldlp;
3678 token.named = false;
3679 if (!plainc
3680 && nestlev > 0 && definedef == dnone)
3681 /* in struct body */
3683 if (class_qualify)
3685 write_classname (&token_name, qualifier);
3686 ptrdiff_t len = token_name.len;
3687 linebuffer_setlen (&token_name,
3688 len + qlen + toklen);
3689 memcpyz (stpcpy (token_name.buffer + len,
3690 qualifier),
3691 newlb.buffer + tokoff, toklen);
3693 else
3695 linebuffer_setlen (&token_name, toklen);
3696 memcpyz (token_name.buffer,
3697 newlb.buffer + tokoff, toklen);
3699 token.named = true;
3701 else if (objdef == ocatseen)
3702 /* Objective C category */
3704 if (class_qualify)
3706 ptrdiff_t len = strlen (objtag) + 2 + toklen;
3707 linebuffer_setlen (&token_name, len);
3708 char *p1 = stpcpy (token_name.buffer, objtag);
3709 char *p2 = stpcpy (p1, "(");
3710 char *p3 = mempcpy (p2, newlb.buffer + tokoff,
3711 toklen);
3712 strcpy (p3, ")");
3714 else
3716 linebuffer_setlen (&token_name, toklen);
3717 memcpyz (token_name.buffer,
3718 newlb.buffer + tokoff, toklen);
3720 token.named = true;
3722 else if (objdef == omethodtag
3723 || objdef == omethodparm)
3724 /* Objective C method */
3726 token.named = true;
3728 else if (fvdef == fdefunname)
3729 /* GNU DEFUN and similar macros */
3731 bool defun = (newlb.buffer[tokoff] == 'F');
3732 ptrdiff_t off = tokoff;
3733 ptrdiff_t len = toklen;
3735 if (defun)
3737 off += 1;
3738 len -= 1;
3740 /* First, tag it as its C name */
3741 linebuffer_setlen (&token_name, toklen);
3742 memcpyz (token_name.buffer,
3743 newlb.buffer + tokoff, toklen);
3744 token.named = true;
3745 token.lineno = lineno;
3746 token.offset = tokoff;
3747 token.length = toklen;
3748 token.line = newlb.buffer;
3749 token.linepos = newlinepos;
3750 token.valid = true;
3751 make_C_tag (funorvar);
3753 /* Rewrite the tag so that emacs lisp DEFUNs
3754 can be found also by their elisp name */
3755 linebuffer_setlen (&token_name, len);
3756 memcpyz (token_name.buffer,
3757 newlb.buffer + off, len);
3758 if (defun)
3759 while (--len >= 0)
3760 if (token_name.buffer[len] == '_')
3761 token_name.buffer[len] = '-';
3762 token.named = defun;
3764 else
3766 linebuffer_setlen (&token_name, toklen);
3767 memcpyz (token_name.buffer,
3768 newlb.buffer + tokoff, toklen);
3769 /* Name macros and members. */
3770 token.named = (structdef == stagseen
3771 || typdef == ttypeseen
3772 || typdef == tend
3773 || (funorvar
3774 && definedef == dignorerest)
3775 || (funorvar
3776 && definedef == dnone
3777 && structdef == snone
3778 && bracelev > 0));
3780 token.lineno = lineno;
3781 token.offset = tokoff;
3782 token.length = toklen;
3783 token.line = newlb.buffer;
3784 token.linepos = newlinepos;
3785 token.valid = true;
3787 if (definedef == dnone
3788 && (fvdef == fvnameseen
3789 || fvdef == foperator
3790 || structdef == stagseen
3791 || typdef == tend
3792 || typdef == ttypeseen
3793 || objdef != onone))
3795 if (current_lb_is_new)
3796 switch_line_buffers ();
3798 else if (definedef != dnone
3799 || fvdef == fdefunname
3800 || instruct)
3801 make_C_tag (funorvar);
3803 else /* not yacc and consider_token failed */
3805 if (inattribute && fvdef == fignore)
3807 /* We have just met __attribute__ after a
3808 function parameter list: do not tag the
3809 function again. */
3810 fvdef = fvnone;
3813 midtoken = false;
3815 } /* if (endtoken (c)) */
3816 else if (intoken (c))
3817 still_in_token:
3819 toklen++;
3820 continue;
3822 } /* if (midtoken) */
3823 else if (begtoken (c))
3825 switch (definedef)
3827 case dnone:
3828 switch (fvdef)
3830 case fstartlist:
3831 /* This prevents tagging fb in
3832 void (__attribute__ ((noreturn)) *fb) (void);
3833 Fixing this is not easy and not very important. */
3834 fvdef = finlist;
3835 continue;
3836 case flistseen:
3837 if (plainc || declarations)
3839 make_C_tag (true); /* a function */
3840 fvdef = fignore;
3842 break;
3843 default:
3844 break;
3846 if (structdef == stagseen && !cjava)
3848 popclass_above (bracelev);
3849 structdef = snone;
3851 break;
3852 case dsharpseen:
3853 savetoken = token;
3854 break;
3855 default:
3856 break;
3858 if (!yacc_rules || lp == newlb.buffer + 1)
3860 tokoff = lp - 1 - newlb.buffer;
3861 toklen = 1;
3862 midtoken = true;
3864 continue;
3865 } /* if (begtoken) */
3866 } /* if must look at token */
3869 /* Detect end of line, colon, comma, semicolon and various braces
3870 after having handled a token.*/
3871 switch (c)
3873 case ':':
3874 if (inattribute)
3875 break;
3876 if (yacc_rules && token.offset == 0 && token.valid)
3878 make_C_tag (false); /* a yacc function */
3879 break;
3881 if (definedef != dnone)
3882 break;
3883 switch (objdef)
3885 case otagseen:
3886 objdef = oignore;
3887 make_C_tag (true); /* an Objective C class */
3888 break;
3889 case omethodtag:
3890 case omethodparm:
3891 objdef = omethodcolon;
3892 if (class_qualify)
3894 ptrdiff_t toklen = token_name.len;
3895 linebuffer_setlen (&token_name, toklen + 1);
3896 strcpy (token_name.buffer + toklen, ":");
3898 break;
3899 default:
3900 break;
3902 if (structdef == stagseen)
3904 structdef = scolonseen;
3905 break;
3907 /* Should be useless, but may be work as a safety net. */
3908 if (cplpl && fvdef == flistseen)
3910 make_C_tag (true); /* a function */
3911 fvdef = fignore;
3912 break;
3914 break;
3915 case ';':
3916 if (definedef != dnone || inattribute)
3917 break;
3918 switch (typdef)
3920 case tend:
3921 case ttypeseen:
3922 make_C_tag (false); /* a typedef */
3923 typdef = tnone;
3924 fvdef = fvnone;
3925 break;
3926 case tnone:
3927 case tinbody:
3928 case tignore:
3929 switch (fvdef)
3931 case fignore:
3932 if (typdef == tignore || cplpl)
3933 fvdef = fvnone;
3934 break;
3935 case fvnameseen:
3936 if ((globals && bracelev == 0 && (!fvextern || declarations))
3937 || (members && instruct))
3938 make_C_tag (false); /* a variable */
3939 fvextern = false;
3940 fvdef = fvnone;
3941 token.valid = false;
3942 break;
3943 case flistseen:
3944 if ((declarations
3945 && (cplpl || !instruct)
3946 && (typdef == tnone || (typdef != tignore && instruct)))
3947 || (members
3948 && plainc && instruct))
3949 make_C_tag (true); /* a function */
3950 FALLTHROUGH;
3951 default:
3952 fvextern = false;
3953 fvdef = fvnone;
3954 if (declarations
3955 && cplpl && structdef == stagseen)
3956 make_C_tag (false); /* forward declaration */
3957 else
3958 token.valid = false;
3959 } /* switch (fvdef) */
3960 FALLTHROUGH;
3961 default:
3962 if (!instruct)
3963 typdef = tnone;
3965 if (structdef == stagseen)
3966 structdef = snone;
3967 break;
3968 case ',':
3969 if (definedef != dnone || inattribute)
3970 break;
3971 switch (objdef)
3973 case omethodtag:
3974 case omethodparm:
3975 make_C_tag (true); /* an Objective C method */
3976 objdef = oinbody;
3977 break;
3978 default:
3979 break;
3981 switch (fvdef)
3983 case fdefunkey:
3984 case foperator:
3985 case fstartlist:
3986 case finlist:
3987 case fignore:
3988 break;
3989 case vignore:
3990 if (instruct && parlev == 0)
3991 fvdef = fvnone;
3992 break;
3993 case fdefunname:
3994 fvdef = fignore;
3995 break;
3996 case fvnameseen:
3997 if (parlev == 0
3998 && ((globals
3999 && bracelev == 0
4000 && templatelev == 0
4001 && (!fvextern || declarations))
4002 || (members && instruct)))
4003 make_C_tag (false); /* a variable */
4004 break;
4005 case flistseen:
4006 if ((declarations && typdef == tnone && !instruct)
4007 || (members && typdef != tignore && instruct))
4009 make_C_tag (true); /* a function */
4010 fvdef = fvnameseen;
4012 else if (!declarations)
4013 fvdef = fvnone;
4014 token.valid = false;
4015 break;
4016 default:
4017 fvdef = fvnone;
4019 if (structdef == stagseen)
4020 structdef = snone;
4021 break;
4022 case ']':
4023 if (definedef != dnone || inattribute)
4024 break;
4025 if (structdef == stagseen)
4026 structdef = snone;
4027 switch (typdef)
4029 case ttypeseen:
4030 case tend:
4031 typdef = tignore;
4032 make_C_tag (false); /* a typedef */
4033 break;
4034 case tnone:
4035 case tinbody:
4036 switch (fvdef)
4038 case foperator:
4039 case finlist:
4040 case fignore:
4041 case vignore:
4042 break;
4043 case fvnameseen:
4044 if ((members && bracelev == 1)
4045 || (globals && bracelev == 0
4046 && (!fvextern || declarations)))
4047 make_C_tag (false); /* a variable */
4048 FALLTHROUGH;
4049 default:
4050 fvdef = fvnone;
4052 break;
4053 default:
4054 break;
4056 break;
4057 case '(':
4058 if (inattribute)
4060 attrparlev++;
4061 break;
4063 if (definedef != dnone)
4064 break;
4065 if (objdef == otagseen && parlev == 0)
4066 objdef = oparenseen;
4067 switch (fvdef)
4069 case fvnameseen:
4070 if (typdef == ttypeseen
4071 && *lp != '*'
4072 && !instruct)
4074 /* This handles constructs like:
4075 typedef void OperatorFun (int fun); */
4076 make_C_tag (false);
4077 typdef = tignore;
4078 fvdef = fignore;
4079 break;
4081 FALLTHROUGH;
4082 case foperator:
4083 fvdef = fstartlist;
4084 break;
4085 case flistseen:
4086 fvdef = finlist;
4087 break;
4088 default:
4089 break;
4091 parlev++;
4092 break;
4093 case ')':
4094 if (inattribute)
4096 if (--attrparlev == 0)
4097 inattribute = false;
4098 break;
4100 if (in_enum_bf)
4102 if (--parlev == 0)
4103 in_enum_bf = false;
4104 break;
4106 if (definedef != dnone)
4107 break;
4108 if (objdef == ocatseen && parlev == 1)
4110 make_C_tag (true); /* an Objective C category */
4111 objdef = oignore;
4113 if (--parlev == 0)
4115 switch (fvdef)
4117 case fstartlist:
4118 case finlist:
4119 fvdef = flistseen;
4120 break;
4121 default:
4122 break;
4124 if (!instruct
4125 && (typdef == tend
4126 || typdef == ttypeseen))
4128 typdef = tignore;
4129 make_C_tag (false); /* a typedef */
4132 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
4133 parlev = 0;
4134 break;
4135 case '{':
4136 if (definedef != dnone)
4137 break;
4138 if (typdef == ttypeseen)
4140 /* Whenever typdef is set to tinbody (currently only
4141 here), typdefbracelev should be set to bracelev. */
4142 typdef = tinbody;
4143 typdefbracelev = bracelev;
4145 switch (fvdef)
4147 case flistseen:
4148 if (cplpl && !class_qualify)
4150 /* Remove class and namespace qualifiers from the token,
4151 leaving only the method/member name. */
4152 char *cc, *uqname = token_name.buffer;
4153 char *tok_end = token_name.buffer + token_name.len;
4155 for (cc = token_name.buffer; cc < tok_end; cc++)
4157 if (*cc == ':' && cc[1] == ':')
4159 uqname = cc + 2;
4160 cc++;
4163 if (uqname > token_name.buffer)
4165 ptrdiff_t uqlen = strlen (uqname);
4166 linebuffer_setlen (&token_name, uqlen);
4167 memmove (token_name.buffer, uqname, uqlen + 1);
4170 make_C_tag (true); /* a function */
4171 FALLTHROUGH;
4172 case fignore:
4173 fvdef = fvnone;
4174 break;
4175 case fvnone:
4176 switch (objdef)
4178 case otagseen:
4179 make_C_tag (true); /* an Objective C class */
4180 objdef = oignore;
4181 break;
4182 case omethodtag:
4183 case omethodparm:
4184 make_C_tag (true); /* an Objective C method */
4185 objdef = oinbody;
4186 break;
4187 default:
4188 /* Neutralize `extern "C" {' grot. */
4189 if (bracelev == 0 && structdef == snone && nestlev == 0
4190 && typdef == tnone)
4191 bracelev = -1;
4193 break;
4194 default:
4195 break;
4197 switch (structdef)
4199 case skeyseen: /* unnamed struct */
4200 pushclass_above (bracelev, NULL, 0);
4201 structdef = snone;
4202 break;
4203 case stagseen: /* named struct or enum */
4204 case scolonseen: /* a class */
4205 pushclass_above (bracelev,token.line+token.offset, token.length);
4206 structdef = snone;
4207 make_C_tag (false); /* a struct or enum */
4208 break;
4209 default:
4210 break;
4212 bracelev += 1;
4213 break;
4214 case '*':
4215 if (definedef != dnone)
4216 break;
4217 if (fvdef == fstartlist)
4219 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
4220 token.valid = false;
4222 break;
4223 case '}':
4224 if (definedef != dnone)
4225 break;
4226 bracelev -= 1;
4227 /* If we see a closing brace in column zero, and we weren't told to
4228 ignore indentation, we assume this the final brace of a function
4229 or struct definition, and reset bracelev to zero. */
4230 if (!ignoreindent && lp == newlb.buffer + 1)
4232 if (bracelev != 0)
4233 token.valid = false; /* unexpected value, token unreliable */
4234 bracelev = 0; /* reset brace level if first column */
4235 parlev = 0; /* also reset paren level, just in case... */
4237 else if (bracelev < 0)
4239 token.valid = false; /* something gone amiss, token unreliable */
4240 bracelev = 0;
4242 if (bracelev == 0 && fvdef == vignore)
4243 fvdef = fvnone; /* end of function */
4244 popclass_above (bracelev);
4245 structdef = snone;
4246 /* Only if typdef == tinbody is typdefbracelev significant. */
4247 if (typdef == tinbody && bracelev <= typdefbracelev)
4249 assert (bracelev == typdefbracelev);
4250 typdef = tend;
4252 break;
4253 case '=':
4254 if (definedef != dnone)
4255 break;
4256 switch (fvdef)
4258 case foperator:
4259 case finlist:
4260 case fignore:
4261 case vignore:
4262 break;
4263 case fvnameseen:
4264 if ((members && bracelev == 1)
4265 || (globals && bracelev == 0 && (!fvextern || declarations)))
4266 make_C_tag (false); /* a variable */
4267 FALLTHROUGH;
4268 default:
4269 fvdef = vignore;
4271 break;
4272 case '<':
4273 if (cplpl
4274 && (structdef == stagseen || fvdef == fvnameseen))
4276 templatelev++;
4277 break;
4279 goto resetfvdef;
4280 case '>':
4281 if (templatelev > 0)
4283 templatelev--;
4284 break;
4286 goto resetfvdef;
4287 case '+':
4288 case '-':
4289 if (objdef == oinbody && bracelev == 0)
4291 objdef = omethodsign;
4292 break;
4294 FALLTHROUGH;
4295 case '#': case '~': case '&': case '%': case '/':
4296 case '|': case '^': case '!': case '.': case '?':
4297 resetfvdef:
4298 if (definedef != dnone)
4299 break;
4300 /* These surely cannot follow a function tag in C. */
4301 switch (fvdef)
4303 case foperator:
4304 case finlist:
4305 case fignore:
4306 case vignore:
4307 break;
4308 default:
4309 fvdef = fvnone;
4311 break;
4312 case '\0':
4313 if (objdef == otagseen)
4315 make_C_tag (true); /* an Objective C class */
4316 objdef = oignore;
4318 /* If a macro spans multiple lines don't reset its state. */
4319 if (quotednl)
4320 CNL_SAVE_DEFINEDEF ();
4321 else
4322 CNL ();
4323 break;
4324 } /* switch (c) */
4326 } /* while not eof */
4328 free (lbs[0].lb.buffer);
4329 free (lbs[1].lb.buffer);
4333 * Process either a C++ file or a C file depending on the setting
4334 * of a global flag.
4336 static void
4337 default_C_entries (FILE *inf)
4339 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4342 /* Always do plain C. */
4343 static void
4344 plain_C_entries (FILE *inf)
4346 C_entries (0, inf);
4349 /* Always do C++. */
4350 static void
4351 Cplusplus_entries (FILE *inf)
4353 C_entries (C_PLPL, inf);
4356 /* Always do Java. */
4357 static void
4358 Cjava_entries (FILE *inf)
4360 C_entries (C_JAVA, inf);
4363 /* Always do C*. */
4364 static void
4365 Cstar_entries (FILE *inf)
4367 C_entries (C_STAR, inf);
4370 /* Always do Yacc. */
4371 static void
4372 Yacc_entries (FILE *inf)
4374 C_entries (YACC, inf);
4378 /* Useful macros. */
4379 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4380 while (perhaps_more_input (file_pointer) \
4381 && (readline (&(line_buffer), file_pointer), \
4382 (char_pointer) = (line_buffer).buffer, \
4383 true))
4385 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4386 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4387 && strneq (cp, kw, sizeof (kw) - 1) /* cp points at kw */ \
4388 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
4389 && ((cp) = skip_spaces ((cp) + sizeof (kw) - 1), true)) /* skip spaces */
4391 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4392 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4393 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4394 && strncaseeq (cp, kw, sizeof (kw) - 1) /* cp points at kw */ \
4395 && ((cp) += sizeof (kw) - 1, true)) /* skip spaces */
4398 * Read a file, but do no processing. This is used to do regexp
4399 * matching on files that have no language defined.
4401 static void
4402 just_read_file (FILE *inf)
4404 while (perhaps_more_input (inf))
4405 readline (&lb, inf);
4409 /* Fortran parsing */
4411 static void F_takeprec (void);
4412 static void F_getit (FILE *);
4414 static void
4415 F_takeprec (void)
4417 dbp = skip_spaces (dbp);
4418 if (*dbp != '*')
4419 return;
4420 dbp++;
4421 dbp = skip_spaces (dbp);
4422 if (strneq (dbp, "(*)", 3))
4424 dbp += 3;
4425 return;
4427 if (!c_isdigit (*dbp))
4429 --dbp; /* force failure */
4430 return;
4433 dbp++;
4434 while (c_isdigit (*dbp));
4437 static void
4438 F_getit (FILE *inf)
4440 register char *cp;
4442 dbp = skip_spaces (dbp);
4443 if (*dbp == '\0')
4445 readline (&lb, inf);
4446 dbp = lb.buffer;
4447 if (dbp[5] != '&')
4448 return;
4449 dbp += 6;
4450 dbp = skip_spaces (dbp);
4452 if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4453 return;
4454 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4455 continue;
4456 make_tag (dbp, cp-dbp, true,
4457 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4461 static void
4462 Fortran_functions (FILE *inf)
4464 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4466 if (*dbp == '%')
4467 dbp++; /* Ratfor escape to fortran */
4468 dbp = skip_spaces (dbp);
4469 if (*dbp == '\0')
4470 continue;
4472 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4473 dbp = skip_spaces (dbp);
4475 if (LOOKING_AT_NOCASE (dbp, "pure"))
4476 dbp = skip_spaces (dbp);
4478 if (LOOKING_AT_NOCASE (dbp, "elemental"))
4479 dbp = skip_spaces (dbp);
4481 switch (c_tolower (*dbp))
4483 case 'i':
4484 if (nocase_tail ("integer"))
4485 F_takeprec ();
4486 break;
4487 case 'r':
4488 if (nocase_tail ("real"))
4489 F_takeprec ();
4490 break;
4491 case 'l':
4492 if (nocase_tail ("logical"))
4493 F_takeprec ();
4494 break;
4495 case 'c':
4496 if (nocase_tail ("complex") || nocase_tail ("character"))
4497 F_takeprec ();
4498 break;
4499 case 'd':
4500 if (nocase_tail ("double"))
4502 dbp = skip_spaces (dbp);
4503 if (*dbp == '\0')
4504 continue;
4505 if (nocase_tail ("precision"))
4506 break;
4507 continue;
4509 break;
4511 dbp = skip_spaces (dbp);
4512 if (*dbp == '\0')
4513 continue;
4514 switch (c_tolower (*dbp))
4516 case 'f':
4517 if (nocase_tail ("function"))
4518 F_getit (inf);
4519 continue;
4520 case 's':
4521 if (nocase_tail ("subroutine"))
4522 F_getit (inf);
4523 continue;
4524 case 'e':
4525 if (nocase_tail ("entry"))
4526 F_getit (inf);
4527 continue;
4528 case 'b':
4529 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4531 dbp = skip_spaces (dbp);
4532 if (*dbp == '\0') /* assume un-named */
4533 make_tag ("blockdata", 9, true,
4534 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4535 else
4536 F_getit (inf); /* look for name */
4538 continue;
4545 * Go language support
4546 * Original code by Xi Lu <lx@shellcodes.org> (2016)
4548 static void
4549 Go_functions(FILE *inf)
4551 char *cp, *name;
4553 LOOP_ON_INPUT_LINES(inf, lb, cp)
4555 cp = skip_spaces (cp);
4557 if (LOOKING_AT (cp, "package"))
4559 name = cp;
4560 while (!notinname (*cp) && *cp != '\0')
4561 cp++;
4562 make_tag (name, cp - name, false, lb.buffer,
4563 cp - lb.buffer + 1, lineno, linecharno);
4565 else if (LOOKING_AT (cp, "func"))
4567 /* Go implementation of interface, such as:
4568 func (n *Integer) Add(m Integer) ...
4569 skip `(n *Integer)` part.
4571 if (*cp == '(')
4573 while (*cp != ')')
4574 cp++;
4575 cp = skip_spaces (cp+1);
4578 if (*cp)
4580 name = cp;
4582 while (!notinname (*cp))
4583 cp++;
4585 make_tag (name, cp - name, true, lb.buffer,
4586 cp - lb.buffer + 1, lineno, linecharno);
4589 else if (members && LOOKING_AT (cp, "type"))
4591 name = cp;
4593 /* Ignore the likes of the following:
4594 type (
4598 if (*cp == '(')
4599 return;
4601 while (!notinname (*cp) && *cp != '\0')
4602 cp++;
4604 make_tag (name, cp - name, false, lb.buffer,
4605 cp - lb.buffer + 1, lineno, linecharno);
4612 * Ada parsing
4613 * Original code by
4614 * Philippe Waroquiers (1998)
4617 /* Once we are positioned after an "interesting" keyword, let's get
4618 the real tag value necessary. */
4619 static void
4620 Ada_getit (FILE *inf, const char *name_qualifier)
4622 register char *cp;
4623 char *name;
4624 char c;
4626 while (perhaps_more_input (inf))
4628 dbp = skip_spaces (dbp);
4629 if (*dbp == '\0'
4630 || (dbp[0] == '-' && dbp[1] == '-'))
4632 readline (&lb, inf);
4633 dbp = lb.buffer;
4635 switch (c_tolower (*dbp))
4637 case 'b':
4638 if (nocase_tail ("body"))
4640 /* Skipping body of procedure body or package body or ....
4641 resetting qualifier to body instead of spec. */
4642 name_qualifier = "/b";
4643 continue;
4645 break;
4646 case 't':
4647 /* Skipping type of task type or protected type ... */
4648 if (nocase_tail ("type"))
4649 continue;
4650 break;
4652 if (*dbp == '"')
4654 dbp += 1;
4655 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4656 continue;
4658 else
4660 dbp = skip_spaces (dbp);
4661 for (cp = dbp;
4662 c_isalnum (*cp) || *cp == '_' || *cp == '.';
4663 cp++)
4664 continue;
4665 if (cp == dbp)
4666 return;
4668 c = *cp;
4669 *cp = '\0';
4670 name = concat (dbp, name_qualifier, "");
4671 *cp = c;
4672 make_tag (name, strlen (name), true,
4673 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4674 free (name);
4675 if (c == '"')
4676 dbp = cp + 1;
4677 return;
4681 static void
4682 Ada_funcs (FILE *inf)
4684 bool inquote = false;
4685 bool skip_till_semicolumn = false;
4687 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4689 while (*dbp != '\0')
4691 /* Skip a string i.e. "abcd". */
4692 if (inquote || (*dbp == '"'))
4694 dbp = strchr (dbp + !inquote, '"');
4695 if (dbp != NULL)
4697 inquote = false;
4698 dbp += 1;
4699 continue; /* advance char */
4701 else
4703 inquote = true;
4704 break; /* advance line */
4708 /* Skip comments. */
4709 if (dbp[0] == '-' && dbp[1] == '-')
4710 break; /* advance line */
4712 /* Skip character enclosed in single quote i.e. 'a'
4713 and skip single quote starting an attribute i.e. 'Image. */
4714 if (*dbp == '\'')
4716 dbp++ ;
4717 if (*dbp != '\0')
4718 dbp++;
4719 continue;
4722 if (skip_till_semicolumn)
4724 if (*dbp == ';')
4725 skip_till_semicolumn = false;
4726 dbp++;
4727 continue; /* advance char */
4730 /* Search for beginning of a token. */
4731 if (!begtoken (*dbp))
4733 dbp++;
4734 continue; /* advance char */
4737 /* We are at the beginning of a token. */
4738 switch (c_tolower (*dbp))
4740 case 'f':
4741 if (!packages_only && nocase_tail ("function"))
4742 Ada_getit (inf, "/f");
4743 else
4744 break; /* from switch */
4745 continue; /* advance char */
4746 case 'p':
4747 if (!packages_only && nocase_tail ("procedure"))
4748 Ada_getit (inf, "/p");
4749 else if (nocase_tail ("package"))
4750 Ada_getit (inf, "/s");
4751 else if (nocase_tail ("protected")) /* protected type */
4752 Ada_getit (inf, "/t");
4753 else
4754 break; /* from switch */
4755 continue; /* advance char */
4757 case 'u':
4758 if (typedefs && !packages_only && nocase_tail ("use"))
4760 /* when tagging types, avoid tagging use type Pack.Typename;
4761 for this, we will skip everything till a ; */
4762 skip_till_semicolumn = true;
4763 continue; /* advance char */
4766 case 't':
4767 if (!packages_only && nocase_tail ("task"))
4768 Ada_getit (inf, "/k");
4769 else if (typedefs && !packages_only && nocase_tail ("type"))
4771 Ada_getit (inf, "/t");
4772 while (*dbp != '\0')
4773 dbp += 1;
4775 else
4776 break; /* from switch */
4777 continue; /* advance char */
4780 /* Look for the end of the token. */
4781 while (!endtoken (*dbp))
4782 dbp++;
4784 } /* advance char */
4785 } /* advance line */
4790 * Unix and microcontroller assembly tag handling
4791 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4792 * Idea by Bob Weiner, Motorola Inc. (1994)
4794 static void
4795 Asm_labels (FILE *inf)
4797 register char *cp;
4799 LOOP_ON_INPUT_LINES (inf, lb, cp)
4801 /* If first char is alphabetic or one of [_.$], test for colon
4802 following identifier. */
4803 if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4805 /* Read past label. */
4806 cp++;
4807 while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4808 cp++;
4809 if (*cp == ':' || c_isspace (*cp))
4810 /* Found end of label, so copy it and add it to the table. */
4811 make_tag (lb.buffer, cp - lb.buffer, true,
4812 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4819 * Perl support
4820 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4821 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4822 * Perl variable names: /^(my|local).../
4823 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4824 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4825 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4827 static void
4828 Perl_functions (FILE *inf)
4830 char *package = savestr ("main"); /* current package name */
4831 register char *cp;
4833 LOOP_ON_INPUT_LINES (inf, lb, cp)
4835 cp = skip_spaces (cp);
4837 if (LOOKING_AT (cp, "package"))
4839 free (package);
4840 get_tag (cp, &package);
4842 else if (LOOKING_AT (cp, "sub"))
4844 char *pos, *sp;
4846 subr:
4847 sp = cp;
4848 while (!notinname (*cp))
4849 cp++;
4850 if (cp == sp)
4851 continue; /* nothing found */
4852 pos = strchr (sp, ':');
4853 if (pos && pos < cp && pos[1] == ':')
4855 /* The name is already qualified. */
4856 if (!class_qualify)
4858 char *q = pos + 2, *qpos;
4859 while ((qpos = strchr (q, ':')) != NULL
4860 && qpos < cp
4861 && qpos[1] == ':')
4862 q = qpos + 2;
4863 sp = q;
4865 make_tag (sp, cp - sp, true,
4866 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4868 else if (class_qualify)
4869 /* Qualify it. */
4871 char savechar, *name;
4873 savechar = *cp;
4874 *cp = '\0';
4875 name = concat (package, "::", sp);
4876 *cp = savechar;
4877 make_tag (name, strlen (name), true,
4878 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4879 free (name);
4881 else
4882 make_tag (sp, cp - sp, true,
4883 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4885 else if (LOOKING_AT (cp, "use constant")
4886 || LOOKING_AT (cp, "use constant::defer"))
4888 /* For hash style multi-constant like
4889 use constant { FOO => 123,
4890 BAR => 456 };
4891 only the first FOO is picked up. Parsing across the value
4892 expressions would be difficult in general, due to possible nested
4893 hashes, here-documents, etc. */
4894 if (*cp == '{')
4895 cp = skip_spaces (cp+1);
4896 goto subr;
4898 else if (globals) /* only if we are tagging global vars */
4900 /* Skip a qualifier, if any. */
4901 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4902 /* After "my" or "local", but before any following paren or space. */
4903 char *varstart = cp;
4905 if (qual /* should this be removed? If yes, how? */
4906 && (*cp == '$' || *cp == '@' || *cp == '%'))
4908 varstart += 1;
4910 cp++;
4911 while (c_isalnum (*cp) || *cp == '_');
4913 else if (qual)
4915 /* Should be examining a variable list at this point;
4916 could insist on seeing an open parenthesis. */
4917 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4918 cp++;
4920 else
4921 continue;
4923 make_tag (varstart, cp - varstart, false,
4924 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4927 free (package);
4932 * Python support
4933 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4934 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4935 * More ideas by seb bacon <seb@jamkit.com> (2002)
4937 static void
4938 Python_functions (FILE *inf)
4940 register char *cp;
4942 LOOP_ON_INPUT_LINES (inf, lb, cp)
4944 cp = skip_spaces (cp);
4945 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4947 char *name = cp;
4948 while (!notinname (*cp) && *cp != ':')
4949 cp++;
4950 make_tag (name, cp - name, true,
4951 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4957 * Ruby support
4958 * Original code by Xi Lu <lx@shellcodes.org> (2015)
4960 static void
4961 Ruby_functions (FILE *inf)
4963 char *cp = NULL;
4964 bool reader = false, writer = false, alias = false, continuation = false;
4966 LOOP_ON_INPUT_LINES (inf, lb, cp)
4968 bool is_class = false;
4969 bool is_method = false;
4970 char *name;
4972 cp = skip_spaces (cp);
4973 if (!continuation
4974 /* Constants. */
4975 && c_isalpha (*cp) && c_isupper (*cp))
4977 char *bp, *colon = NULL;
4979 name = cp;
4981 for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
4983 if (*cp == ':')
4984 colon = cp;
4986 if (cp > name + 1)
4988 bp = skip_spaces (cp);
4989 if (*bp == '=' && !(bp[1] == '=' || bp[1] == '>'))
4991 if (colon && !c_isspace (colon[1]))
4992 name = colon + 1;
4993 make_tag (name, cp - name, false,
4994 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4998 else if (!continuation
4999 /* Modules, classes, methods. */
5000 && ((is_method = LOOKING_AT (cp, "def"))
5001 || (is_class = LOOKING_AT (cp, "class"))
5002 || LOOKING_AT (cp, "module")))
5004 const char self_name[] = "self.";
5005 const size_t self_size1 = sizeof (self_name) - 1;
5007 name = cp;
5009 /* Ruby method names can end in a '='. Also, operator overloading can
5010 define operators whose names include '='. */
5011 while (!notinname (*cp) || *cp == '=')
5012 cp++;
5014 /* Remove "self." from the method name. */
5015 if (cp - name > self_size1
5016 && strneq (name, self_name, self_size1))
5017 name += self_size1;
5019 /* Remove the class/module qualifiers from method names. */
5020 if (is_method)
5022 char *q;
5024 for (q = name; q < cp && *q != '.'; q++)
5026 if (q < cp - 1) /* punt if we see just "FOO." */
5027 name = q + 1;
5030 /* Don't tag singleton classes. */
5031 if (is_class && strneq (name, "<<", 2) && cp == name + 2)
5032 continue;
5034 make_tag (name, cp - name, true,
5035 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5037 else
5039 /* Tag accessors and aliases. */
5041 if (!continuation)
5042 reader = writer = alias = false;
5044 while (*cp && *cp != '#')
5046 if (!continuation)
5048 reader = writer = alias = false;
5049 if (LOOKING_AT (cp, "attr_reader"))
5050 reader = true;
5051 else if (LOOKING_AT (cp, "attr_writer"))
5052 writer = true;
5053 else if (LOOKING_AT (cp, "attr_accessor"))
5055 reader = true;
5056 writer = true;
5058 else if (LOOKING_AT (cp, "alias_method"))
5059 alias = true;
5061 if (reader || writer || alias)
5063 do {
5064 char *np;
5066 cp = skip_spaces (cp);
5067 if (*cp == '(')
5068 cp = skip_spaces (cp + 1);
5069 np = cp;
5070 cp = skip_name (cp);
5071 if (*np != ':')
5072 continue;
5073 np++;
5074 if (reader)
5076 make_tag (np, cp - np, true,
5077 lb.buffer, cp - lb.buffer + 1,
5078 lineno, linecharno);
5079 continuation = false;
5081 if (writer)
5083 size_t name_len = cp - np + 1;
5084 char *wr_name = xmalloc (name_len + 1);
5086 strcpy (mempcpy (wr_name, np, name_len - 1), "=");
5087 pfnote (wr_name, true, lb.buffer, cp - lb.buffer + 1,
5088 lineno, linecharno);
5089 if (debug)
5090 fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n", wr_name,
5091 curfdp->taggedfname, lineno, lb.buffer);
5092 continuation = false;
5094 if (alias)
5096 if (!continuation)
5097 make_tag (np, cp - np, true,
5098 lb.buffer, cp - lb.buffer + 1,
5099 lineno, linecharno);
5100 continuation = false;
5101 while (*cp && *cp != '#' && *cp != ';')
5103 if (*cp == ',')
5104 continuation = true;
5105 else if (!c_isspace (*cp))
5106 continuation = false;
5107 cp++;
5109 if (*cp == ';')
5110 continuation = false;
5112 cp = skip_spaces (cp);
5113 } while ((alias
5114 ? (*cp == ',')
5115 : (continuation = (*cp == ',')))
5116 && (cp = skip_spaces (cp + 1), *cp && *cp != '#'));
5118 if (*cp != '#')
5119 cp = skip_name (cp);
5120 while (*cp && *cp != '#' && notinname (*cp))
5121 cp++;
5129 * Rust support
5130 * Look for:
5131 * - fn: Function
5132 * - struct: Structure
5133 * - enum: Enumeration
5134 * - macro_rules!: Macro
5136 static void
5137 Rust_entries (FILE *inf)
5139 char *cp, *name;
5140 bool is_func = false;
5142 LOOP_ON_INPUT_LINES(inf, lb, cp)
5144 cp = skip_spaces(cp);
5145 name = cp;
5147 // Skip 'pub' keyworld
5148 (void)LOOKING_AT (cp, "pub");
5150 // Look for define
5151 if ((is_func = LOOKING_AT (cp, "fn"))
5152 || LOOKING_AT (cp, "enum")
5153 || LOOKING_AT (cp, "struct")
5154 || (is_func = LOOKING_AT (cp, "macro_rules!")))
5156 cp = skip_spaces (cp);
5157 name = cp;
5159 while (!notinname (*cp))
5160 cp++;
5162 make_tag (name, cp - name, is_func,
5163 lb.buffer, cp - lb.buffer + 1,
5164 lineno, linecharno);
5165 is_func = false;
5172 * PHP support
5173 * Look for:
5174 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
5175 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
5176 * - /^[ \t]*define\(\"[^\"]+/
5177 * Only with --members:
5178 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
5179 * Idea by Diez B. Roggisch (2001)
5181 static void
5182 PHP_functions (FILE *inf)
5184 char *cp, *name;
5185 bool search_identifier = false;
5187 LOOP_ON_INPUT_LINES (inf, lb, cp)
5189 cp = skip_spaces (cp);
5190 name = cp;
5191 if (search_identifier
5192 && *cp != '\0')
5194 while (!notinname (*cp))
5195 cp++;
5196 make_tag (name, cp - name, true,
5197 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5198 search_identifier = false;
5200 else if (LOOKING_AT (cp, "function"))
5202 if (*cp == '&')
5203 cp = skip_spaces (cp+1);
5204 if (*cp != '\0')
5206 name = cp;
5207 while (!notinname (*cp))
5208 cp++;
5209 make_tag (name, cp - name, true,
5210 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5212 else
5213 search_identifier = true;
5215 else if (LOOKING_AT (cp, "class"))
5217 if (*cp != '\0')
5219 name = cp;
5220 while (*cp != '\0' && !c_isspace (*cp))
5221 cp++;
5222 make_tag (name, cp - name, false,
5223 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5225 else
5226 search_identifier = true;
5228 else if (strneq (cp, "define", 6)
5229 && (cp = skip_spaces (cp+6))
5230 && *cp++ == '('
5231 && (*cp == '"' || *cp == '\''))
5233 char quote = *cp++;
5234 name = cp;
5235 while (*cp != quote && *cp != '\0')
5236 cp++;
5237 make_tag (name, cp - name, false,
5238 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5240 else if (members
5241 && LOOKING_AT (cp, "var")
5242 && *cp == '$')
5244 name = cp;
5245 while (!notinname (*cp))
5246 cp++;
5247 make_tag (name, cp - name, false,
5248 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5255 * Cobol tag functions
5256 * We could look for anything that could be a paragraph name.
5257 * i.e. anything that starts in column 8 is one word and ends in a full stop.
5258 * Idea by Corny de Souza (1993)
5260 static void
5261 Cobol_paragraphs (FILE *inf)
5263 register char *bp, *ep;
5265 LOOP_ON_INPUT_LINES (inf, lb, bp)
5267 if (lb.len < 9)
5268 continue;
5269 bp += 8;
5271 /* If eoln, compiler option or comment ignore whole line. */
5272 if (bp[-1] != ' ' || !c_isalnum (bp[0]))
5273 continue;
5275 for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
5276 continue;
5277 if (*ep++ == '.')
5278 make_tag (bp, ep - bp, true,
5279 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5285 * Makefile support
5286 * Ideas by Assar Westerlund <assar@sics.se> (2001)
5288 static void
5289 Makefile_targets (FILE *inf)
5291 register char *bp;
5293 LOOP_ON_INPUT_LINES (inf, lb, bp)
5295 if (*bp == '\t' || *bp == '#')
5296 continue;
5297 while (*bp != '\0' && *bp != '=' && *bp != ':')
5298 bp++;
5299 if (*bp == ':' || (globals && *bp == '='))
5301 /* We should detect if there is more than one tag, but we do not.
5302 We just skip initial and final spaces. */
5303 char * namestart = skip_spaces (lb.buffer);
5304 while (--bp > namestart)
5305 if (!notinname (*bp))
5306 break;
5307 make_tag (namestart, bp - namestart + 1, true,
5308 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
5315 * Pascal parsing
5316 * Original code by Mosur K. Mohan (1989)
5318 * Locates tags for procedures & functions. Doesn't do any type- or
5319 * var-definitions. It does look for the keyword "extern" or
5320 * "forward" immediately following the procedure statement; if found,
5321 * the tag is skipped.
5323 static void
5324 Pascal_functions (FILE *inf)
5326 linebuffer tline; /* mostly copied from C_entries */
5327 intmax_t save_lcno, save_lineno;
5328 ptrdiff_t namelen, taglen;
5329 char c, *name;
5331 bool /* each of these flags is true if: */
5332 incomment, /* point is inside a comment */
5333 inquote, /* point is inside '..' string */
5334 get_tagname, /* point is after PROCEDURE/FUNCTION
5335 keyword, so next item = potential tag */
5336 found_tag, /* point is after a potential tag */
5337 inparms, /* point is within parameter-list */
5338 verify_tag; /* point has passed the parm-list, so the
5339 next token will determine whether this
5340 is a FORWARD/EXTERN to be ignored, or
5341 whether it is a real tag */
5343 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
5344 name = NULL; /* keep compiler quiet */
5345 dbp = lb.buffer;
5346 *dbp = '\0';
5347 linebuffer_init (&tline);
5349 incomment = inquote = false;
5350 found_tag = false; /* have a proc name; check if extern */
5351 get_tagname = false; /* found "procedure" keyword */
5352 inparms = false; /* found '(' after "proc" */
5353 verify_tag = false; /* check if "extern" is ahead */
5356 while (perhaps_more_input (inf)) /* long main loop to get next char */
5358 c = *dbp++;
5359 if (c == '\0') /* if end of line */
5361 readline (&lb, inf);
5362 dbp = lb.buffer;
5363 if (*dbp == '\0')
5364 continue;
5365 if (!((found_tag && verify_tag)
5366 || get_tagname))
5367 c = *dbp++; /* only if don't need *dbp pointing
5368 to the beginning of the name of
5369 the procedure or function */
5371 if (incomment)
5373 if (c == '}') /* within { } comments */
5374 incomment = false;
5375 else if (c == '*' && *dbp == ')') /* within (* *) comments */
5377 dbp++;
5378 incomment = false;
5380 continue;
5382 else if (inquote)
5384 if (c == '\'')
5385 inquote = false;
5386 continue;
5388 else
5389 switch (c)
5391 case '\'':
5392 inquote = true; /* found first quote */
5393 continue;
5394 case '{': /* found open { comment */
5395 incomment = true;
5396 continue;
5397 case '(':
5398 if (*dbp == '*') /* found open (* comment */
5400 incomment = true;
5401 dbp++;
5403 else if (found_tag) /* found '(' after tag, i.e., parm-list */
5404 inparms = true;
5405 continue;
5406 case ')': /* end of parms list */
5407 if (inparms)
5408 inparms = false;
5409 continue;
5410 case ';':
5411 if (found_tag && !inparms) /* end of proc or fn stmt */
5413 verify_tag = true;
5414 break;
5416 continue;
5418 if (found_tag && verify_tag && (*dbp != ' '))
5420 /* Check if this is an "extern" declaration. */
5421 if (*dbp == '\0')
5422 continue;
5423 if (c_tolower (*dbp) == 'e')
5425 if (nocase_tail ("extern")) /* superfluous, really! */
5427 found_tag = false;
5428 verify_tag = false;
5431 else if (c_tolower (*dbp) == 'f')
5433 if (nocase_tail ("forward")) /* check for forward reference */
5435 found_tag = false;
5436 verify_tag = false;
5439 if (found_tag && verify_tag) /* not external proc, so make tag */
5441 found_tag = false;
5442 verify_tag = false;
5443 make_tag (name, namelen, true,
5444 tline.buffer, taglen, save_lineno, save_lcno);
5445 continue;
5448 if (get_tagname) /* grab name of proc or fn */
5450 char *cp;
5452 if (*dbp == '\0')
5453 continue;
5455 /* Find block name. */
5456 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
5457 continue;
5459 /* Save all values for later tagging. */
5460 linebuffer_setlen (&tline, lb.len);
5461 strcpy (tline.buffer, lb.buffer);
5462 save_lineno = lineno;
5463 save_lcno = linecharno;
5464 name = tline.buffer + (dbp - lb.buffer);
5465 namelen = cp - dbp;
5466 taglen = cp - lb.buffer + 1;
5468 dbp = cp; /* set dbp to e-o-token */
5469 get_tagname = false;
5470 found_tag = true;
5471 continue;
5473 /* And proceed to check for "extern". */
5475 else if (!incomment && !inquote && !found_tag)
5477 /* Check for proc/fn keywords. */
5478 switch (c_tolower (c))
5480 case 'p':
5481 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5482 get_tagname = true;
5483 continue;
5484 case 'f':
5485 if (nocase_tail ("unction"))
5486 get_tagname = true;
5487 continue;
5490 } /* while not eof */
5492 free (tline.buffer);
5497 * Lisp tag functions
5498 * look for (def or (DEF, quote or QUOTE
5501 static void L_getit (void);
5503 static void
5504 L_getit (void)
5506 if (*dbp == '\'') /* Skip prefix quote */
5507 dbp++;
5508 else if (*dbp == '(')
5510 dbp++;
5511 /* Try to skip "(quote " */
5512 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
5513 /* Ok, then skip "(" before name in (defstruct (foo)) */
5514 dbp = skip_spaces (dbp);
5516 get_lispy_tag (dbp);
5519 static void
5520 Lisp_functions (FILE *inf)
5522 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5524 if (dbp[0] != '(')
5525 continue;
5527 /* "(defvar foo)" is a declaration rather than a definition. */
5528 if (! declarations)
5530 char *p = dbp + 1;
5531 if (LOOKING_AT (p, "defvar"))
5533 p = skip_name (p); /* past var name */
5534 p = skip_spaces (p);
5535 if (*p == ')')
5536 continue;
5540 if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
5541 dbp += 3;
5543 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5545 dbp = skip_non_spaces (dbp);
5546 dbp = skip_spaces (dbp);
5547 L_getit ();
5549 else
5551 /* Check for (foo::defmumble name-defined ... */
5553 dbp++;
5554 while (!notinname (*dbp) && *dbp != ':');
5555 if (*dbp == ':')
5558 dbp++;
5559 while (*dbp == ':');
5561 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5563 dbp = skip_non_spaces (dbp);
5564 dbp = skip_spaces (dbp);
5565 L_getit ();
5574 * Lua script language parsing
5575 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5577 * "function" and "local function" are tags if they start at column 1.
5579 static void
5580 Lua_functions (FILE *inf)
5582 register char *bp;
5584 LOOP_ON_INPUT_LINES (inf, lb, bp)
5586 bp = skip_spaces (bp);
5587 if (bp[0] != 'f' && bp[0] != 'l')
5588 continue;
5590 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5592 if (LOOKING_AT (bp, "function"))
5594 char *tag_name, *tp_dot, *tp_colon;
5596 get_tag (bp, &tag_name);
5597 /* If the tag ends with ".foo" or ":foo", make an additional tag for
5598 "foo". */
5599 tp_dot = strrchr (tag_name, '.');
5600 tp_colon = strrchr (tag_name, ':');
5601 if (tp_dot || tp_colon)
5603 char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5604 ptrdiff_t len_add = p - tag_name + 1;
5606 get_tag (bp + len_add, NULL);
5614 * PostScript tags
5615 * Just look for lines where the first character is '/'
5616 * Also look at "defineps" for PSWrap
5617 * Ideas by:
5618 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5619 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5621 static void
5622 PS_functions (FILE *inf)
5624 register char *bp, *ep;
5626 LOOP_ON_INPUT_LINES (inf, lb, bp)
5628 if (bp[0] == '/')
5630 for (ep = bp+1;
5631 *ep != '\0' && *ep != ' ' && *ep != '{';
5632 ep++)
5633 continue;
5634 make_tag (bp, ep - bp, true,
5635 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5637 else if (LOOKING_AT (bp, "defineps"))
5638 get_tag (bp, NULL);
5644 * Forth tags
5645 * Ignore anything after \ followed by space or in ( )
5646 * Look for words defined by :
5647 * Look for constant, code, create, defer, value, and variable
5648 * OBP extensions: Look for buffer:, field,
5649 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5651 static void
5652 Forth_words (FILE *inf)
5654 register char *bp;
5656 LOOP_ON_INPUT_LINES (inf, lb, bp)
5657 while ((bp = skip_spaces (bp))[0] != '\0')
5658 if (bp[0] == '\\' && c_isspace (bp[1]))
5659 break; /* read next line */
5660 else if (bp[0] == '(' && c_isspace (bp[1]))
5661 do /* skip to ) or eol */
5662 bp++;
5663 while (*bp != ')' && *bp != '\0');
5664 else if (((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5665 || LOOKING_AT_NOCASE (bp, "constant")
5666 || LOOKING_AT_NOCASE (bp, "2constant")
5667 || LOOKING_AT_NOCASE (bp, "fconstant")
5668 || LOOKING_AT_NOCASE (bp, "code")
5669 || LOOKING_AT_NOCASE (bp, "create")
5670 || LOOKING_AT_NOCASE (bp, "defer")
5671 || LOOKING_AT_NOCASE (bp, "value")
5672 || LOOKING_AT_NOCASE (bp, "2value")
5673 || LOOKING_AT_NOCASE (bp, "fvalue")
5674 || LOOKING_AT_NOCASE (bp, "variable")
5675 || LOOKING_AT_NOCASE (bp, "2variable")
5676 || LOOKING_AT_NOCASE (bp, "fvariable")
5677 || LOOKING_AT_NOCASE (bp, "buffer:")
5678 || LOOKING_AT_NOCASE (bp, "field:")
5679 || LOOKING_AT_NOCASE (bp, "+field")
5680 || LOOKING_AT_NOCASE (bp, "field") /* not standard? */
5681 || LOOKING_AT_NOCASE (bp, "begin-structure")
5682 || LOOKING_AT_NOCASE (bp, "synonym")
5684 && c_isspace (bp[0]))
5686 /* Yay! A definition! */
5687 char* name_start = skip_spaces (bp);
5688 char* name_end = skip_non_spaces (name_start);
5689 if (name_start < name_end)
5690 make_tag (name_start, name_end - name_start,
5691 true, lb.buffer, name_end - lb.buffer,
5692 lineno, linecharno);
5693 bp = name_end;
5695 else
5696 bp = skip_non_spaces (bp);
5701 * Scheme tag functions
5702 * look for (def... xyzzy
5703 * (def... (xyzzy
5704 * (def ... ((...(xyzzy ....
5705 * (set! xyzzy
5706 * Original code by Ken Haase (1985?)
5708 static void
5709 Scheme_functions (FILE *inf)
5711 register char *bp;
5713 LOOP_ON_INPUT_LINES (inf, lb, bp)
5715 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5717 bp = skip_non_spaces (bp+4);
5718 /* Skip over open parens and white space.
5719 Don't continue past '\0' or '='. */
5720 while (*bp && notinname (*bp) && *bp != '=')
5721 bp++;
5722 get_lispy_tag (bp);
5724 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5725 get_lispy_tag (bp);
5730 /* Find tags in TeX and LaTeX input files. */
5732 /* TEX_toktab is a table of TeX control sequences that define tags.
5733 * Each entry records one such control sequence.
5735 * Original code from who knows whom.
5736 * Ideas by:
5737 * Stefan Monnier (2002)
5740 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5742 /* Default set of control sequences to put into TEX_toktab.
5743 The value of environment var TEXTAGS is prepended to this. */
5744 static const char *TEX_defenv = "\
5745 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5746 :part:appendix:entry:index:def\
5747 :newcommand:renewcommand:newenvironment:renewenvironment";
5749 static void TEX_decode_env (const char *, const char *);
5752 * TeX/LaTeX scanning loop.
5754 static void
5755 TeX_commands (FILE *inf)
5757 char *cp;
5758 linebuffer *key;
5760 char TEX_esc = '\0';
5761 char TEX_opgrp UNINIT, TEX_clgrp UNINIT;
5763 /* Initialize token table once from environment. */
5764 if (TEX_toktab == NULL)
5765 TEX_decode_env ("TEXTAGS", TEX_defenv);
5767 LOOP_ON_INPUT_LINES (inf, lb, cp)
5769 /* Look at each TEX keyword in line. */
5770 for (;;)
5772 /* Look for a TEX escape. */
5773 while (true)
5775 char c = *cp++;
5776 if (c == '\0' || c == '%')
5777 goto tex_next_line;
5779 /* Select either \ or ! as escape character, whichever comes
5780 first outside a comment. */
5781 if (!TEX_esc)
5782 switch (c)
5784 case '\\':
5785 TEX_esc = c;
5786 TEX_opgrp = '{';
5787 TEX_clgrp = '}';
5788 break;
5790 case '!':
5791 TEX_esc = c;
5792 TEX_opgrp = '<';
5793 TEX_clgrp = '>';
5794 break;
5797 if (c == TEX_esc)
5798 break;
5801 for (key = TEX_toktab; key->buffer != NULL; key++)
5802 if (strneq (cp, key->buffer, key->len))
5804 char *p;
5805 ptrdiff_t namelen, linelen;
5806 bool opgrp = false;
5808 cp = skip_spaces (cp + key->len);
5809 if (*cp == TEX_opgrp)
5811 opgrp = true;
5812 cp++;
5814 for (p = cp;
5815 (!c_isspace (*p) && *p != '#' &&
5816 *p != TEX_opgrp && *p != TEX_clgrp);
5817 p++)
5818 continue;
5819 namelen = p - cp;
5820 linelen = lb.len;
5821 if (!opgrp || *p == TEX_clgrp)
5823 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5824 p++;
5825 linelen = p - lb.buffer + 1;
5827 make_tag (cp, namelen, true,
5828 lb.buffer, linelen, lineno, linecharno);
5829 goto tex_next_line; /* We only tag a line once */
5832 tex_next_line:
5837 /* Read environment and prepend it to the default string.
5838 Build token table. */
5839 static void
5840 TEX_decode_env (const char *evarname, const char *defenv)
5842 const char *env, *p;
5843 ptrdiff_t len = 1;
5845 /* Append default string to environment. */
5846 env = getenv (evarname);
5847 if (!env)
5848 env = defenv;
5849 else
5850 env = concat (env, defenv, "");
5852 /* If the environment variable doesn't start with a colon, increase
5853 the length of the token table. */
5854 if (*env != ':')
5855 len++;
5857 /* Allocate a token table */
5858 for (p = env; (p = strchr (p, ':')); )
5859 if (*++p)
5860 len++;
5861 TEX_toktab = xnmalloc (len, sizeof *TEX_toktab);
5863 /* Unpack environment string into token table. Be careful about */
5864 /* zero-length strings (leading ':', "::" and trailing ':') */
5865 for (ptrdiff_t i = 0; *env != '\0'; )
5867 p = strchr (env, ':');
5868 if (!p) /* End of environment string. */
5869 p = env + strlen (env);
5870 if (p - env > 0)
5871 { /* Only non-zero strings. */
5872 TEX_toktab[i].buffer = savenstr (env, p - env);
5873 TEX_toktab[i].len = p - env;
5874 i++;
5876 if (*p)
5877 env = p + 1;
5878 else
5880 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5881 TEX_toktab[i].len = 0;
5882 break;
5888 /* Texinfo support. Dave Love, Mar. 2000. */
5889 static void
5890 Texinfo_nodes (FILE *inf)
5892 char *cp, *start;
5893 LOOP_ON_INPUT_LINES (inf, lb, cp)
5894 if (LOOKING_AT (cp, "@node"))
5896 start = cp;
5897 while (*cp != '\0' && *cp != ',')
5898 cp++;
5899 make_tag (start, cp - start, true,
5900 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5906 * HTML support.
5907 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5908 * Contents of <a name=xxx> are tags with name xxx.
5910 * Francesco Potortì, 2002.
5912 static void
5913 HTML_labels (FILE *inf)
5915 bool getnext = false; /* next text outside of HTML tags is a tag */
5916 bool skiptag = false; /* skip to the end of the current HTML tag */
5917 bool intag = false; /* inside an html tag, looking for ID= */
5918 bool inanchor = false; /* when INTAG, is an anchor, look for NAME= */
5919 char *end;
5922 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5924 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5925 for (;;) /* loop on the same line */
5927 if (skiptag) /* skip HTML tag */
5929 while (*dbp != '\0' && *dbp != '>')
5930 dbp++;
5931 if (*dbp == '>')
5933 dbp += 1;
5934 skiptag = false;
5935 continue; /* look on the same line */
5937 break; /* go to next line */
5940 else if (intag) /* look for "name=" or "id=" */
5942 while (*dbp != '\0' && *dbp != '>'
5943 && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5944 dbp++;
5945 if (*dbp == '\0')
5946 break; /* go to next line */
5947 if (*dbp == '>')
5949 dbp += 1;
5950 intag = false;
5951 continue; /* look on the same line */
5953 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5954 || LOOKING_AT_NOCASE (dbp, "id="))
5956 bool quoted = (dbp[0] == '"');
5958 if (quoted)
5959 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5960 continue;
5961 else
5962 for (end = dbp; *end != '\0' && intoken (*end); end++)
5963 continue;
5964 linebuffer_setlen (&token_name, end - dbp);
5965 memcpyz (token_name.buffer, dbp, end - dbp);
5967 dbp = end;
5968 intag = false; /* we found what we looked for */
5969 skiptag = true; /* skip to the end of the tag */
5970 getnext = true; /* then grab the text */
5971 continue; /* look on the same line */
5973 dbp += 1;
5976 else if (getnext) /* grab next tokens and tag them */
5978 dbp = skip_spaces (dbp);
5979 if (*dbp == '\0')
5980 break; /* go to next line */
5981 if (*dbp == '<')
5983 intag = true;
5984 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5985 continue; /* look on the same line */
5988 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5989 continue;
5990 make_tag (token_name.buffer, token_name.len, true,
5991 dbp, end - dbp, lineno, linecharno);
5992 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5993 getnext = false;
5994 break; /* go to next line */
5997 else /* look for an interesting HTML tag */
5999 while (*dbp != '\0' && *dbp != '<')
6000 dbp++;
6001 if (*dbp == '\0')
6002 break; /* go to next line */
6003 intag = true;
6004 if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
6006 inanchor = true;
6007 continue; /* look on the same line */
6009 else if (LOOKING_AT_NOCASE (dbp, "<title>")
6010 || LOOKING_AT_NOCASE (dbp, "<h1>")
6011 || LOOKING_AT_NOCASE (dbp, "<h2>")
6012 || LOOKING_AT_NOCASE (dbp, "<h3>"))
6014 intag = false;
6015 getnext = true;
6016 continue; /* look on the same line */
6018 dbp += 1;
6025 * Prolog support
6027 * Assumes that the predicate or rule starts at column 0.
6028 * Only the first clause of a predicate or rule is added.
6029 * Original code by Sunichirou Sugou (1989)
6030 * Rewritten by Anders Lindgren (1996)
6032 static ptrdiff_t prolog_pr (char *, char *, ptrdiff_t);
6033 static void prolog_skip_comment (linebuffer *, FILE *);
6034 static size_t prolog_atom (char *, size_t);
6036 static void
6037 Prolog_functions (FILE *inf)
6039 char *cp, *last = NULL;
6040 ptrdiff_t lastlen = 0, allocated = 0;
6042 LOOP_ON_INPUT_LINES (inf, lb, cp)
6044 if (cp[0] == '\0') /* Empty line */
6045 continue;
6046 else if (c_isspace (cp[0])) /* Not a predicate */
6047 continue;
6048 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
6049 prolog_skip_comment (&lb, inf);
6050 else
6052 ptrdiff_t len = prolog_pr (cp, last, lastlen);
6053 if (0 < len)
6055 /* Store the predicate name to avoid generating duplicate
6056 tags later. */
6057 if (allocated <= len)
6059 xrnew (last, len + 1, 1);
6060 allocated = len + 1;
6062 memcpyz (last, cp, len);
6063 lastlen = len;
6067 free (last);
6071 static void
6072 prolog_skip_comment (linebuffer *plb, FILE *inf)
6074 char *cp;
6078 for (cp = plb->buffer; *cp != '\0'; cp++)
6079 if (cp[0] == '*' && cp[1] == '/')
6080 return;
6081 readline (plb, inf);
6083 while (perhaps_more_input (inf));
6087 * A predicate or rule definition is added if it matches:
6088 * <beginning of line><Prolog Atom><whitespace>(
6089 * or <beginning of line><Prolog Atom><whitespace>:-
6091 * It is added to the tags database if it doesn't match the
6092 * name of the previous clause header.
6094 * Return the size of the name of the predicate or rule, or 0 if no
6095 * header was found.
6097 static ptrdiff_t
6098 prolog_pr (char *s, char *last, ptrdiff_t lastlen)
6100 ptrdiff_t len = prolog_atom (s, 0);
6101 if (len == 0)
6102 return 0;
6103 ptrdiff_t pos = skip_spaces (s + len) - s;
6105 /* Save only the first clause. */
6106 if ((s[pos] == '.'
6107 || (s[pos] == '(' && (pos += 1))
6108 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
6109 && ! (lastlen == len && memcmp (s, last, len) == 0))
6111 make_tag (s, len, true, s, pos, lineno, linecharno);
6112 return len;
6115 return 0;
6119 * Consume a Prolog atom.
6120 * Return the number of bytes consumed, or 0 if there was an error.
6122 * A prolog atom, in this context, could be one of:
6123 * - An alphanumeric sequence, starting with a lower case letter.
6124 * - A quoted arbitrary string. Single quotes can escape themselves.
6125 * Backslash quotes everything.
6127 static size_t
6128 prolog_atom (char *s, size_t pos)
6130 size_t origpos;
6132 origpos = pos;
6134 if (c_islower (s[pos]) || s[pos] == '_')
6136 /* The atom is unquoted. */
6137 pos++;
6138 while (c_isalnum (s[pos]) || s[pos] == '_')
6140 pos++;
6142 return pos - origpos;
6144 else if (s[pos] == '\'')
6146 pos++;
6148 for (;;)
6150 if (s[pos] == '\'')
6152 pos++;
6153 if (s[pos] != '\'')
6154 break;
6155 pos++; /* A double quote */
6157 else if (s[pos] == '\0')
6158 /* Multiline quoted atoms are ignored. */
6159 return 0;
6160 else if (s[pos] == '\\')
6162 if (s[pos+1] == '\0')
6163 return 0;
6164 pos += 2;
6166 else
6167 pos++;
6169 return pos - origpos;
6171 else
6172 return 0;
6177 * Support for Mercury
6179 * Assumes that the declarations start at column 0.
6180 * Original code by Sunichirou Sugou (1989) for Prolog.
6181 * Rewritten by Anders Lindgren (1996) for Prolog.
6182 * Adapted by Fabrice Nicol (2021) for Mercury.
6183 * Note: Prolog-support behavior is preserved if
6184 * --declarations is used, corresponding to
6185 * with_mercury_definitions=true.
6188 static ptrdiff_t mercury_pr (char *, char *, ptrdiff_t);
6189 static void mercury_skip_comment (linebuffer *, FILE *);
6190 static bool is_mercury_type = false;
6191 static bool is_mercury_quantifier = false;
6192 static bool is_mercury_declaration = false;
6193 typedef struct
6195 size_t pos; /* Position reached in parsing tag name. */
6196 size_t namelength; /* Length of tag name */
6197 size_t totlength; /* Total length of parsed tag: this field is currently
6198 reserved for control and debugging. */
6199 } mercury_pos_t;
6202 * Objective-C and Mercury have identical file extension .m.
6203 * To disambiguate between Objective C and Mercury, parse file
6204 * with the following heuristics hook:
6205 * - if line starts with :-, choose Mercury unconditionally;
6206 * - if line starts with #, @, choose Objective-C;
6207 * - otherwise compute the following ratio:
6209 * r = (number of lines with :-
6210 * or % in non-commented parts or . at trimmed EOL)
6211 * / (number of lines - number of lines starting by any amount
6212 * of whitespace, optionally followed by comment(s))
6214 * Note: strings are neglected in counts.
6216 * If r > mercury_heuristics_ratio, choose Mercury.
6217 * Experimental tests show that a possibly optimal default value for
6218 * this floor value is around 0.5. This is the default value for
6219 * MERCURY_HEURISTICS_RATIO, defined in the first lines of this file.
6220 * The closer r is to 0.5, the closer the source code to pure Prolog.
6221 * Idiomatic Mercury is scored either with r = 1.0 or higher.
6222 * Objective-C is scored with r = 0.0. When this fails, the r-score
6223 * never rose above 0.1 in Objective-C tests.
6226 static void
6227 test_objc_is_mercury (char *this_file, language **lang)
6229 if (this_file == NULL) return;
6230 FILE* fp = fopen (this_file, "r");
6231 if (fp == NULL)
6232 pfatal (this_file);
6234 bool blank_line = false; /* Line starting with any amount of white space
6235 followed by optional comment(s). */
6236 bool commented_line = false;
6237 bool found_dot = false;
6238 bool only_space_before = true;
6239 bool start_of_line = true;
6240 int c;
6241 intmax_t lines = 1;
6242 intmax_t mercury_dots = 0;
6243 intmax_t percentage_signs = 0;
6244 intmax_t rule_signs = 0;
6245 float ratio = 0;
6247 while ((c = fgetc (fp)) != EOF)
6249 switch (c)
6251 case '\n':
6252 if (! blank_line) ++lines;
6253 blank_line = true;
6254 commented_line = false;
6255 start_of_line = true;
6256 if (found_dot) ++mercury_dots;
6257 found_dot = false;
6258 only_space_before = true;
6259 break;
6260 case '.':
6261 found_dot = ! commented_line;
6262 only_space_before = false;
6263 break;
6264 case '%': /* More frequent in Mercury. May be modulo in Obj.-C. */
6265 if (! commented_line)
6267 ++percentage_signs;
6268 /* Cannot tell if it is a comment or modulo yet for sure.
6269 Yet works for heuristic purposes. */
6270 commented_line = true;
6272 found_dot = false;
6273 start_of_line = false;
6274 only_space_before = false;
6275 break;
6276 case '/':
6278 int d = fgetc (fp);
6279 found_dot = false;
6280 only_space_before = false;
6281 if (! commented_line)
6283 if (d == '*')
6284 commented_line = true;
6285 else
6286 /* If d == '/', cannot tell if it is an Obj.-C comment:
6287 may be Mercury integ. division. */
6288 blank_line = false;
6291 FALLTHROUGH;
6292 case ' ':
6293 case '\t':
6294 start_of_line = false;
6295 break;
6296 case ':':
6297 c = fgetc (fp);
6298 if (start_of_line)
6300 if (c == '-')
6302 ratio = 1.0; /* Failsafe, not an operator in Obj.-C. */
6303 goto out;
6305 start_of_line = false;
6307 else
6309 /* p :- q. Frequent in Mercury.
6310 Rare or in quoted exprs in Obj.-C. */
6311 if (c == '-' && ! commented_line)
6312 ++rule_signs;
6314 blank_line = false;
6315 found_dot = false;
6316 only_space_before = false;
6317 break;
6318 case '@':
6319 case '#':
6320 if (start_of_line || only_space_before)
6322 ratio = 0.0;
6323 goto out;
6325 FALLTHROUGH;
6326 default:
6327 start_of_line = false;
6328 blank_line = false;
6329 found_dot = false;
6330 only_space_before = false;
6334 /* Fallback heuristic test. Not failsafe but errless in practice. */
6335 ratio = ((float) rule_signs + percentage_signs + mercury_dots) / lines;
6337 out:
6338 if (fclose (fp) == EOF)
6339 pfatal (this_file);
6341 if (ratio > mercury_heuristics_ratio)
6343 /* Change the language from Objective-C to Mercury. */
6344 static language lang0 = { "mercury", Mercury_help, Mercury_functions,
6345 Mercury_suffixes };
6346 *lang = &lang0;
6350 static void
6351 Mercury_functions (FILE *inf)
6353 char *cp, *last = NULL;
6354 ptrdiff_t lastlen = 0, allocated = 0;
6355 if (declarations) with_mercury_definitions = true;
6357 LOOP_ON_INPUT_LINES (inf, lb, cp)
6359 if (cp[0] == '\0') /* Empty line. */
6360 continue;
6361 else if (c_isspace (cp[0]) || cp[0] == '%')
6362 /* A Prolog-type comment or anything other than a declaration. */
6363 continue;
6364 else if (cp[0] == '/' && cp[1] == '*') /* Mercury C-type comment. */
6365 mercury_skip_comment (&lb, inf);
6366 else
6368 is_mercury_declaration = (cp[0] == ':' && cp[1] == '-');
6370 if (is_mercury_declaration
6371 || with_mercury_definitions)
6373 ptrdiff_t len = mercury_pr (cp, last, lastlen);
6374 if (0 < len)
6376 /* Store the declaration to avoid generating duplicate
6377 tags later. */
6378 if (allocated <= len)
6380 xrnew (last, len + 1, 1);
6381 allocated = len + 1;
6383 memcpyz (last, cp, len);
6384 lastlen = len;
6389 free (last);
6392 static void
6393 mercury_skip_comment (linebuffer *plb, FILE *inf)
6395 char *cp;
6399 for (cp = plb->buffer; *cp != '\0'; ++cp)
6400 if (cp[0] == '*' && cp[1] == '/')
6401 return;
6402 readline (plb, inf);
6404 while (perhaps_more_input (inf));
6408 * A declaration is added if it matches:
6409 * <beginning of line>:-<whitespace><Mercury Term><whitespace>(
6410 * If with_mercury_definitions == true, we also add:
6411 * <beginning of line><Mercury item><whitespace>(
6412 * or <beginning of line><Mercury item><whitespace>:-
6413 * As for Prolog support, different arities and types are not taken into
6414 * consideration.
6415 * Item is added to the tags database if it doesn't match the
6416 * name of the previous declaration.
6418 * Consume a Mercury declaration.
6419 * Return the number of bytes consumed, or 0 if there was an error.
6421 * A Mercury declaration must be one of:
6422 * :- type
6423 * :- solver type
6424 * :- pred
6425 * :- func
6426 * :- inst
6427 * :- mode
6428 * :- typeclass
6429 * :- instance
6430 * :- pragma
6431 * :- promise
6432 * :- initialise
6433 * :- finalise
6434 * :- mutable
6435 * :- module
6436 * :- interface
6437 * :- implementation
6438 * :- import_module
6439 * :- use_module
6440 * :- include_module
6441 * :- end_module
6442 * followed on the same line by an alphanumeric sequence, starting with a lower
6443 * case letter or by a single-quoted arbitrary string.
6444 * Single quotes can escape themselves. Backslash quotes everything.
6446 * Return the size of the name of the declaration or 0 if no header was found.
6447 * As quantifiers may precede functions or predicates, we must list them too.
6450 static const char *Mercury_decl_tags[] = {"type", "solver type", "pred",
6451 "func", "inst", "mode", "typeclass", "instance", "pragma", "promise",
6452 "initialise", "finalise", "mutable", "module", "interface", "implementation",
6453 "import_module", "use_module", "include_module", "end_module", "some", "all"};
6455 static mercury_pos_t
6456 mercury_decl (char *s, size_t pos)
6458 mercury_pos_t null_pos = {0, 0, 0};
6460 if (s == NULL) return null_pos;
6462 size_t origpos;
6463 origpos = pos;
6465 while (c_isalnum (s[pos]) || s[pos] == '_')
6466 pos++;
6468 unsigned char decl_type_length = pos - origpos;
6469 char buf[decl_type_length + 1];
6470 memset (buf, 0, decl_type_length + 1);
6472 /* Mercury declaration tags. Consume them, then check the declaration item
6473 following :- is legitimate, then go on as in the prolog case. */
6475 memcpy (buf, &s[origpos], decl_type_length);
6477 bool found_decl_tag = false;
6479 if (is_mercury_quantifier)
6481 if (strcmp (buf, "pred") != 0 && strcmp (buf, "func") != 0) /* Bad syntax. */
6482 return null_pos;
6484 is_mercury_quantifier = false; /* Reset to base value. */
6485 found_decl_tag = true;
6487 else
6489 for (int j = 0; j < sizeof (Mercury_decl_tags) / sizeof (char*); ++j)
6491 if (strcmp (buf, Mercury_decl_tags[j]) == 0)
6493 found_decl_tag = true;
6494 if (strcmp (buf, "type") == 0)
6495 is_mercury_type = true;
6497 if (strcmp (buf, "some") == 0
6498 || strcmp (buf, "all") == 0)
6500 is_mercury_quantifier = true;
6503 break; /* Found declaration tag of rank j. */
6505 else
6506 /* 'solver type' has a blank in the middle,
6507 so this is the hard case. */
6508 if (strcmp (buf, "solver") == 0)
6511 pos++;
6512 while (c_isalnum (s[pos]) || s[pos] == '_');
6514 decl_type_length = pos - origpos;
6515 char buf2[decl_type_length + 1];
6516 memset (buf2, 0, decl_type_length + 1);
6517 memcpy (buf2, &s[origpos], decl_type_length);
6519 if (strcmp (buf2, "solver type") == 0)
6521 found_decl_tag = false;
6522 break; /* Found declaration tag of rank j. */
6528 /* If with_mercury_definitions == false
6529 * this is a Mercury syntax error, ignoring... */
6531 if (with_mercury_definitions)
6533 if (found_decl_tag)
6534 pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */
6535 else
6536 /* Prolog-like behavior
6537 * we have parsed the predicate once, yet inappropriately
6538 * so restarting again the parsing step. */
6539 pos = 0;
6541 else
6543 if (found_decl_tag)
6544 pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */
6545 else
6546 return null_pos;
6549 /* From now on it is the same as for Prolog except for module dots. */
6551 size_t start_of_name = pos;
6553 if (c_islower (s[pos]) || s[pos] == '_' )
6555 /* The name is unquoted.
6556 Do not confuse module dots with end-of-declaration dots. */
6557 int module_dot_pos = 0;
6559 while (c_isalnum (s[pos])
6560 || s[pos] == '_'
6561 || (s[pos] == '.' /* A module dot. */
6562 && (c_isalnum (s[pos + 1]) || s[pos + 1] == '_')
6563 && (module_dot_pos = pos))) /* Record module dot position.
6564 Erase module from name. */
6565 ++pos;
6567 if (module_dot_pos)
6569 start_of_name = module_dot_pos + 2;
6570 ++pos;
6573 mercury_pos_t position = {pos, pos - start_of_name + 1, pos - origpos};
6574 return position;
6576 else if (s[pos] == '\'')
6578 ++pos;
6579 for (;;)
6581 if (s[pos] == '\'')
6583 ++pos;
6584 if (s[pos] != '\'')
6585 break;
6586 ++pos; /* A double quote. */
6588 else if (s[pos] == '\0') /* Multiline quoted atoms are ignored. */
6589 return null_pos;
6590 else if (s[pos] == '\\')
6592 if (s[pos+1] == '\0')
6593 return null_pos;
6594 pos += 2;
6596 else
6597 ++pos;
6600 mercury_pos_t position = {pos, pos - start_of_name + 1, pos - origpos};
6601 return position;
6603 else if (is_mercury_quantifier && s[pos] == '[') /* :- some [T] pred/func. */
6605 char *close_bracket = strchr (s + pos + 1, ']');
6606 if (!close_bracket)
6607 return null_pos;
6608 pos = skip_spaces (close_bracket + 1) - s;
6609 mercury_pos_t position = mercury_decl (s, pos);
6610 position.totlength += pos - origpos;
6611 return position;
6613 else if (s[pos] == '.') /* as in ':- interface.' */
6615 mercury_pos_t position = {pos, pos - origpos + 1, pos - origpos};
6616 return position;
6618 else
6619 return null_pos;
6622 static ptrdiff_t
6623 mercury_pr (char *s, char *last, ptrdiff_t lastlen)
6625 size_t len0 = 0;
6626 is_mercury_type = false;
6627 is_mercury_quantifier = false;
6628 bool stop_at_rule = false;
6630 if (is_mercury_declaration)
6632 /* Skip len0 blanks only for declarations. */
6633 len0 = skip_spaces (s + 2) - s;
6636 mercury_pos_t position = mercury_decl (s, len0);
6637 size_t pos = position.pos;
6638 int offset = 0; /* may be < 0 */
6639 if (pos == 0) return 0;
6641 /* Skip white space for:
6642 a. rules in definitions before :-
6643 b. 0-arity predicates with inlined modes.
6644 c. possibly multiline type definitions */
6646 while (c_isspace (s[pos])) { ++pos; ++offset; }
6648 if (( ((s[pos] == '.' && (pos += 1)) /* case 1
6649 This is a statement dot,
6650 not a module dot. */
6651 || c_isalnum(s[pos]) /* 0-arity procedures */
6652 || (s[pos] == '(' && (pos += 1)) /* case 2: arity > 0 */
6653 || ((s[pos] == ':') /* case 3: rules */
6654 && s[pos + 1] == '-' && (stop_at_rule = true)))
6655 && (lastlen != pos || memcmp (s, last, pos) != 0)
6657 /* Types are often declared on several lines so keeping just
6658 the first line. */
6660 || is_mercury_type) /* When types are implemented. */
6662 size_t namelength = position.namelength;
6663 if (stop_at_rule && offset) --offset;
6665 /* Left-trim type definitions. */
6667 while (pos > namelength + offset
6668 && c_isspace (s[pos - namelength - offset]))
6669 --offset;
6671 make_tag (s + pos - namelength - offset, namelength - 1, true,
6672 s, pos - offset - 1, lineno, linecharno);
6673 return pos;
6676 return 0;
6681 * Support for Erlang
6683 * Generates tags for functions, defines, and records.
6684 * Assumes that Erlang functions start at column 0.
6685 * Original code by Anders Lindgren (1996)
6687 static ptrdiff_t erlang_func (char *, char *, ptrdiff_t, ptrdiff_t *);
6688 static void erlang_attribute (char *);
6689 static ptrdiff_t erlang_atom (char *);
6691 static void
6692 Erlang_functions (FILE *inf)
6694 char *cp, *last = NULL;
6695 ptrdiff_t lastlen = 0, allocated = 0;
6697 LOOP_ON_INPUT_LINES (inf, lb, cp)
6699 if (cp[0] == '\0') /* Empty line */
6700 continue;
6701 else if (c_isspace (cp[0])) /* Not function nor attribute */
6702 continue;
6703 else if (cp[0] == '%') /* comment */
6704 continue;
6705 else if (cp[0] == '"') /* Sometimes, strings start in column one */
6706 continue;
6707 else if (cp[0] == '-') /* attribute, e.g. "-define" */
6709 erlang_attribute (cp);
6710 if (last != NULL)
6712 free (last);
6713 last = NULL;
6714 allocated = lastlen = 0;
6717 else
6719 ptrdiff_t name_offset;
6720 ptrdiff_t len = erlang_func (cp, last, lastlen, &name_offset);
6721 if (0 < len)
6723 /* Store the function name to avoid generating duplicate
6724 tags later. */
6725 if (allocated <= len)
6727 xrnew (last, len + 1, 1);
6728 allocated = len + 1;
6730 memcpyz (last, cp + name_offset, len);
6731 lastlen = len;
6735 free (last);
6740 * A function definition is added if it matches:
6741 * <beginning of line><Erlang Atom><whitespace>(
6743 * It is added to the tags database if it doesn't match the
6744 * name of the previous clause header.
6746 * Return the size of the name of the function, or 0 if no function
6747 * was found.
6749 static ptrdiff_t
6750 erlang_func (char *s, char *last, ptrdiff_t lastlen, ptrdiff_t *name_offset)
6752 char *name = s;
6753 ptrdiff_t len = erlang_atom (s);
6754 if (len == 0)
6755 return 0;
6756 ptrdiff_t pos = skip_spaces (s + len) - s;
6758 /* If the name is quoted, the quotes are not part of the name. */
6759 bool quoted = 2 < len && name[0] == '\'' && name[len - 1] == '\'';
6760 name += quoted;
6761 len -= 2 * quoted;
6763 /* Save only the first clause. */
6764 if (s[pos++] == '('
6765 && ! (lastlen == len && memcmp (name, last, len) == 0))
6767 make_tag (s, len, true, s, pos, lineno, linecharno);
6768 *name_offset = quoted;
6769 return len;
6772 return 0;
6777 * Handle attributes. Currently, tags are generated for defines
6778 * and records.
6780 * They are on the form:
6781 * -define(foo, bar).
6782 * -define(Foo(M, N), M+N).
6783 * -record(graph, {vtab = notable, cyclic = true}).
6785 static void
6786 erlang_attribute (char *s)
6788 char *cp = s;
6790 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
6791 && *cp++ == '(')
6793 cp = skip_spaces (cp);
6794 ptrdiff_t len = erlang_atom (cp);
6795 ptrdiff_t pos = cp + len - s;
6796 if (len > 0)
6798 /* If the name is quoted, the quotes are not part of the name. */
6799 if (len > 2 && cp[0] == '\'' && cp[len - 1] == '\'')
6801 cp++;
6802 len -= 2;
6804 make_tag (cp, len, true, s, pos, lineno, linecharno);
6807 return;
6812 * Consume an Erlang atom (or variable).
6813 * Return the number of bytes consumed, or -1 if there was an error.
6815 static ptrdiff_t
6816 erlang_atom (char *s)
6818 ptrdiff_t pos = 0;
6820 if (c_isalpha (s[pos]) || s[pos] == '_')
6822 /* The atom is unquoted. */
6824 pos++;
6825 while (c_isalnum (s[pos]) || s[pos] == '_');
6827 else if (s[pos] == '\'')
6829 for (pos++; s[pos] != '\''; pos++)
6830 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
6831 || (s[pos] == '\\' && s[++pos] == '\0'))
6832 return 0;
6833 pos++;
6836 return pos;
6840 static char *scan_separators (char *);
6841 static void add_regex (char *, language *);
6842 static char *substitute (char *, char *, struct re_registers *);
6845 * Take a string like "/blah/" and turn it into "blah", verifying
6846 * that the first and last characters are the same, and handling
6847 * quoted separator characters. Actually, stops on the occurrence of
6848 * an unquoted separator. Also process \t, \n, etc. and turn into
6849 * appropriate characters. Works in place. Null terminates name string.
6850 * Returns pointer to terminating separator, or NULL for
6851 * unterminated regexps.
6853 static char *
6854 scan_separators (char *name)
6856 char sep = name[0];
6857 char *copyto = name;
6858 bool quoted = false;
6860 for (++name; *name != '\0'; ++name)
6862 if (quoted)
6864 switch (*name)
6866 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
6867 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
6868 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
6869 case 'e': *copyto++ = 033; break; /* ESC (delete) */
6870 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
6871 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
6872 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
6873 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
6874 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
6875 default:
6876 if (*name == sep)
6877 *copyto++ = sep;
6878 else
6880 /* Something else is quoted, so preserve the quote. */
6881 *copyto++ = '\\';
6882 *copyto++ = *name;
6884 break;
6886 quoted = false;
6888 else if (*name == '\\')
6889 quoted = true;
6890 else if (*name == sep)
6891 break;
6892 else
6893 *copyto++ = *name;
6895 if (*name != sep)
6896 name = NULL; /* signal unterminated regexp */
6898 /* Terminate copied string. */
6899 *copyto = '\0';
6900 return name;
6903 /* Look at the argument of --regex or --no-regex and do the right
6904 thing. Same for each line of a regexp file. */
6905 static void
6906 analyze_regex (char *regex_arg)
6908 if (regex_arg == NULL)
6910 free_regexps (); /* --no-regex: remove existing regexps */
6911 return;
6914 /* A real --regexp option or a line in a regexp file. */
6915 switch (regex_arg[0])
6917 /* Comments in regexp file or null arg to --regex. */
6918 case '\0':
6919 case ' ':
6920 case '\t':
6921 break;
6923 /* Read a regex file. This is recursive and may result in a
6924 loop, which will stop when the file descriptors are exhausted. */
6925 case '@':
6927 FILE *regexfp;
6928 linebuffer regexbuf;
6929 char *regexfile = regex_arg + 1;
6931 /* regexfile is a file containing regexps, one per line. */
6932 regexfp = fopen (regexfile, "r" FOPEN_BINARY);
6933 if (regexfp == NULL)
6934 pfatal (regexfile);
6935 linebuffer_init (&regexbuf);
6936 while (readline_internal (&regexbuf, regexfp, regexfile, false) > 0)
6937 analyze_regex (regexbuf.buffer);
6938 free (regexbuf.buffer);
6939 if (fclose (regexfp) != 0)
6940 pfatal (regexfile);
6942 break;
6944 /* Regexp to be used for a specific language only. */
6945 case '{':
6947 language *lang;
6948 char *lang_name = regex_arg + 1;
6949 char *cp;
6951 for (cp = lang_name; *cp != '}'; cp++)
6952 if (*cp == '\0')
6954 error ("unterminated language name in regex: %s", regex_arg);
6955 return;
6957 *cp++ = '\0';
6958 lang = get_language_from_langname (lang_name);
6959 if (lang == NULL)
6960 return;
6961 add_regex (cp, lang);
6963 break;
6965 /* Regexp to be used for any language. */
6966 default:
6967 add_regex (regex_arg, NULL);
6968 break;
6972 /* Separate the regexp pattern, compile it,
6973 and care for optional name and modifiers. */
6974 static void
6975 add_regex (char *regexp_pattern, language *lang)
6977 static struct re_pattern_buffer zeropattern;
6978 char sep, *pat, *name, *modifiers;
6979 char empty = '\0';
6980 const char *err;
6981 struct re_pattern_buffer *patbuf;
6982 regexp *rp;
6983 bool
6984 ignore_case = false, /* case is significant */
6985 multi_line = false, /* matches are done one line at a time */
6986 single_line = false; /* dot does not match newline */
6989 if (strnlen (regexp_pattern, 3) < 3)
6991 error ("null regexp");
6992 return;
6994 sep = regexp_pattern[0];
6995 name = scan_separators (regexp_pattern);
6996 if (name == NULL)
6998 error ("%s: unterminated regexp", regexp_pattern);
6999 return;
7001 if (name[1] == sep)
7003 error ("null name for regexp \"%s\"", regexp_pattern);
7004 return;
7006 modifiers = scan_separators (name);
7007 if (modifiers == NULL) /* no terminating separator --> no name */
7009 modifiers = name;
7010 name = &empty;
7012 else
7013 modifiers += 1; /* skip separator */
7015 /* Parse regex modifiers. */
7016 for (; modifiers[0] != '\0'; modifiers++)
7017 switch (modifiers[0])
7019 case 'N':
7020 if (modifiers == name)
7021 error ("forcing explicit tag name but no name, ignoring");
7022 /* This option has no effect and is present only for backward
7023 compatibility. */
7024 break;
7025 case 'i':
7026 ignore_case = true;
7027 break;
7028 case 's':
7029 single_line = true;
7030 FALLTHROUGH;
7031 case 'm':
7032 multi_line = true;
7033 need_filebuf = true;
7034 break;
7035 default:
7036 error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
7037 break;
7040 patbuf = xmalloc (sizeof *patbuf);
7041 *patbuf = zeropattern;
7042 if (ignore_case)
7044 static unsigned char lc_trans[UCHAR_MAX + 1];
7045 int i;
7046 for (i = 0; i < UCHAR_MAX + 1; i++)
7047 lc_trans[i] = c_tolower (i);
7048 patbuf->translate = lc_trans; /* translation table to fold case */
7051 if (multi_line)
7052 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
7053 else
7054 pat = regexp_pattern;
7056 if (single_line)
7057 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
7058 else
7059 re_set_syntax (RE_SYNTAX_EMACS);
7061 err = re_compile_pattern (pat, strlen (pat), patbuf);
7062 if (multi_line)
7063 free (pat);
7064 if (err != NULL)
7066 error ("%s while compiling pattern", err);
7067 return;
7070 rp = p_head;
7071 p_head = xmalloc (sizeof *p_head);
7072 p_head->pattern = savestr (regexp_pattern);
7073 p_head->p_next = rp;
7074 p_head->lang = lang;
7075 p_head->pat = patbuf;
7076 p_head->name = savestr (name);
7077 p_head->error_signaled = false;
7078 p_head->ignore_case = ignore_case;
7079 p_head->multi_line = multi_line;
7083 * Do the substitutions indicated by the regular expression and
7084 * arguments.
7086 static char *
7087 substitute (char *in, char *out, struct re_registers *regs)
7089 char *result, *t;
7091 result = NULL;
7092 ptrdiff_t size = strlen (out);
7094 /* Pass 1: figure out how much to allocate by finding all \N strings. */
7095 if (out[size - 1] == '\\')
7096 fatal ("pattern error in \"%s\"", out);
7097 for (t = strchr (out, '\\');
7098 t != NULL;
7099 t = strchr (t + 2, '\\'))
7100 if (c_isdigit (t[1]))
7102 int dig = t[1] - '0';
7103 ptrdiff_t diglen = regs->end[dig] - regs->start[dig];
7104 size += diglen - 2;
7106 else
7107 size -= 1;
7109 /* Allocate space and do the substitutions. */
7110 assert (size >= 0);
7111 result = xmalloc (size + 1);
7113 for (t = result; *out != '\0'; out++)
7114 if (*out == '\\' && c_isdigit (*++out))
7116 int dig = *out - '0';
7117 ptrdiff_t diglen = regs->end[dig] - regs->start[dig];
7118 memcpy (t, in + regs->start[dig], diglen);
7119 t += diglen;
7121 else
7122 *t++ = *out;
7123 *t = '\0';
7125 assert (t <= result + size);
7126 assert (t == result + strlen (result));
7128 return result;
7131 /* Deallocate all regexps. */
7132 static void
7133 free_regexps (void)
7135 regexp *rp;
7136 while (p_head != NULL)
7138 rp = p_head->p_next;
7139 free (p_head->pattern);
7140 free (p_head->name);
7141 free (p_head);
7142 p_head = rp;
7144 return;
7148 * Reads the whole file as a single string from `filebuf' and looks for
7149 * multi-line regular expressions, creating tags on matches.
7150 * readline already dealt with normal regexps.
7152 * Idea by Ben Wing <ben@666.com> (2002).
7154 static void
7155 regex_tag_multiline (void)
7157 char *buffer = filebuf.buffer;
7158 regexp *rp;
7159 char *name;
7161 for (rp = p_head; rp != NULL; rp = rp->p_next)
7163 ptrdiff_t match = 0;
7165 if (!rp->multi_line)
7166 continue; /* skip normal regexps */
7168 /* Generic initializations before parsing file from memory. */
7169 lineno = 1; /* reset global line number */
7170 charno = 0; /* reset global char number */
7171 linecharno = 0; /* reset global char number of line start */
7173 /* Only use generic regexps or those for the current language. */
7174 if (rp->lang != NULL && rp->lang != curfdp->lang)
7175 continue;
7177 while (match >= 0 && match < filebuf.len)
7179 match = re_search (rp->pat, buffer, filebuf.len, charno,
7180 filebuf.len - match, &rp->regs);
7181 switch (match)
7183 case -2:
7184 /* Some error. */
7185 if (!rp->error_signaled)
7187 error ("regexp stack overflow while matching \"%s\"",
7188 rp->pattern);
7189 rp->error_signaled = true;
7191 break;
7192 case -1:
7193 /* No match. */
7194 break;
7195 default:
7196 if (match == rp->regs.end[0])
7198 if (!rp->error_signaled)
7200 error ("regexp matches the empty string: \"%s\"",
7201 rp->pattern);
7202 rp->error_signaled = true;
7204 match = -3; /* exit from while loop */
7205 break;
7208 /* Match occurred. Construct a tag. */
7209 while (charno < rp->regs.end[0])
7210 if (buffer[charno++] == '\n')
7211 lineno++, linecharno = charno;
7212 name = rp->name;
7213 if (name[0] == '\0')
7214 name = NULL;
7215 else /* make a named tag */
7216 name = substitute (buffer, rp->name, &rp->regs);
7218 /* Force explicit tag name, if a name is there. */
7219 pfnote (name, true, buffer + linecharno,
7220 charno - linecharno + 1, lineno, linecharno);
7222 if (debug)
7223 fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n",
7224 name ? name : "(unnamed)", curfdp->taggedfname,
7225 lineno, buffer + linecharno);
7226 break;
7233 static bool
7234 nocase_tail (const char *cp)
7236 ptrdiff_t len = 0;
7238 while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
7239 cp++, len++;
7240 if (*cp == '\0' && !intoken (dbp[len]))
7242 dbp += len;
7243 return true;
7245 return false;
7248 static void
7249 get_tag (register char *bp, char **namepp)
7251 register char *cp = bp;
7253 if (*bp != '\0')
7255 /* Go till you get to white space or a syntactic break */
7256 for (cp = bp + 1; !notinname (*cp); cp++)
7257 continue;
7258 make_tag (bp, cp - bp, true,
7259 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
7262 if (namepp != NULL)
7263 *namepp = savenstr (bp, cp - bp);
7266 /* Similar to get_tag, but include '=' as part of the tag. */
7267 static void
7268 get_lispy_tag (register char *bp)
7270 register char *cp = bp;
7272 if (*bp != '\0')
7274 /* Go till you get to white space or a syntactic break */
7275 for (cp = bp + 1; !notinname (*cp) || *cp == '='; cp++)
7276 continue;
7277 make_tag (bp, cp - bp, true,
7278 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
7283 * Read a line of text from `stream' into `lbp', excluding the
7284 * newline or CR-NL (if `leave_cr` is false), if any. Return the
7285 * number of characters read from `stream', which is the length
7286 * of the line including the newline.
7288 * On DOS or Windows, if `leave_cr` is false, we do not count the
7289 * CR character, if any before the NL, in the returned length;
7290 * this mirrors the behavior of Emacs on those
7291 * platforms (for text files, it translates CR-NL to NL as it reads in the
7292 * file).
7294 * If multi-line regular expressions are requested, each line read is
7295 * appended to `filebuf'.
7297 static ptrdiff_t
7298 readline_internal (linebuffer *lbp, FILE *stream, char const *filename,
7299 const bool leave_cr)
7301 char *buffer = lbp->buffer;
7302 char *p = lbp->buffer;
7303 char *pend;
7304 int chars_deleted;
7306 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
7308 for (;;)
7310 register int c = getc (stream);
7311 if (p == pend)
7313 /* We're at the end of linebuffer: expand it. */
7314 xrnew (buffer, lbp->size, 2);
7315 p = buffer + lbp->size;
7316 lbp->size *= 2;
7317 pend = buffer + lbp->size;
7318 lbp->buffer = buffer;
7320 if (c == EOF)
7322 if (ferror (stream))
7323 perror (filename);
7324 *p = '\0';
7325 chars_deleted = 0;
7326 break;
7328 if (c == '\n')
7330 if (!leave_cr && p > buffer && p[-1] == '\r')
7332 p -= 1;
7333 chars_deleted = 2;
7335 else
7337 chars_deleted = 1;
7339 *p = '\0';
7340 break;
7342 *p++ = c;
7344 lbp->len = p - buffer;
7346 if (need_filebuf /* we need filebuf for multi-line regexps */
7347 && chars_deleted > 0) /* not at EOF */
7349 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
7351 /* Expand filebuf. */
7352 xrnew (filebuf.buffer, filebuf.size, 2);
7353 filebuf.size *= 2;
7355 strcpy (mempcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len),
7356 "\n");
7357 filebuf.len += lbp->len + 1;
7360 return lbp->len + chars_deleted;
7364 * Like readline_internal, above, but in addition try to match the
7365 * input line against relevant regular expressions and manage #line
7366 * directives.
7368 static void
7369 readline (linebuffer *lbp, FILE *stream)
7371 linecharno = charno; /* update global char number of line start */
7372 ptrdiff_t result = readline_internal (lbp, stream, infilename, false);
7373 lineno += 1; /* increment global line number */
7374 charno += result; /* increment global char number */
7376 /* Honor #line directives. */
7377 if (!no_line_directive)
7379 static bool discard_until_line_directive;
7381 /* Check whether this is a #line directive. */
7382 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
7384 char *lno_start = lbp->buffer + 6;
7385 char *lno_end;
7386 intmax_t lno = strtoimax (lno_start, &lno_end, 10);
7387 char *quoted_filename
7388 = lno_start < lno_end ? skip_spaces (lno_end) : NULL;
7390 if (quoted_filename && *quoted_filename == '"')
7392 char *endp = quoted_filename;
7393 while (*++endp && *endp != '"')
7394 endp += *endp == '\\' && endp[1];
7396 if (*endp)
7397 /* Ok, this is a real #line directive. Let's deal with it. */
7399 char *taggedabsname; /* absolute name of original file */
7400 char *taggedfname; /* name of original file as given */
7401 char *name = quoted_filename + 1;
7403 discard_until_line_directive = false; /* found it */
7404 *endp = '\0';
7405 canonicalize_filename (name);
7406 taggedabsname = absolute_filename (name, tagfiledir);
7407 if (filename_is_absolute (name)
7408 || filename_is_absolute (curfdp->infname))
7409 taggedfname = savestr (taggedabsname);
7410 else
7411 taggedfname = relative_filename (taggedabsname,tagfiledir);
7413 if (streq (curfdp->taggedfname, taggedfname))
7414 /* The #line directive is only a line number change. We
7415 deal with this afterwards. */
7416 free (taggedfname);
7417 else
7418 /* The tags following this #line directive should be
7419 attributed to taggedfname. In order to do this, set
7420 curfdp accordingly. */
7422 fdesc *fdp; /* file description pointer */
7424 /* Go look for a file description already set up for the
7425 file indicated in the #line directive. If there is
7426 one, use it from now until the next #line
7427 directive. */
7428 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
7429 if (streq (fdp->infname, curfdp->infname)
7430 && streq (fdp->taggedfname, taggedfname))
7431 /* If we remove the second test above (after the &&)
7432 then all entries pertaining to the same file are
7433 coalesced in the tags file. If we use it, then
7434 entries pertaining to the same file but generated
7435 from different files (via #line directives) will
7436 go into separate sections in the tags file. These
7437 alternatives look equivalent. The first one
7438 destroys some apparently useless information. */
7440 curfdp = fdp;
7441 free (taggedfname);
7442 break;
7444 /* Else, if we already tagged the real file, skip all
7445 input lines until the next #line directive. */
7446 if (fdp == NULL) /* not found */
7447 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
7448 if (streq (fdp->infabsname, taggedabsname))
7450 discard_until_line_directive = true;
7451 free (taggedfname);
7452 break;
7454 /* Else create a new file description and use that from
7455 now on, until the next #line directive. */
7456 if (fdp == NULL) /* not found */
7458 fdp = fdhead;
7459 fdhead = xmalloc (sizeof *fdhead);
7460 *fdhead = *curfdp; /* copy curr. file description */
7461 fdhead->next = fdp;
7462 fdhead->infname = savestr (curfdp->infname);
7463 fdhead->infabsname = savestr (curfdp->infabsname);
7464 fdhead->infabsdir = savestr (curfdp->infabsdir);
7465 fdhead->taggedfname = taggedfname;
7466 fdhead->usecharno = false;
7467 fdhead->prop = NULL;
7468 fdhead->written = false;
7469 curfdp = fdhead;
7472 free (taggedabsname);
7473 lineno = lno - 1;
7474 readline (lbp, stream);
7475 return;
7476 } /* if a real #line directive */
7477 } /* if #line is followed by a number */
7478 } /* if line begins with "#line " */
7480 /* If we are here, no #line directive was found. */
7481 if (discard_until_line_directive)
7483 if (result > 0)
7485 /* Do a tail recursion on ourselves, thus discarding the contents
7486 of the line buffer. */
7487 readline (lbp, stream);
7488 return;
7490 /* End of file. */
7491 discard_until_line_directive = false;
7492 return;
7494 } /* if #line directives should be considered */
7497 ptrdiff_t match;
7498 regexp *rp;
7499 char *name;
7501 /* Match against relevant regexps. */
7502 if (lbp->len > 0)
7503 for (rp = p_head; rp != NULL; rp = rp->p_next)
7505 /* Only use generic regexps or those for the current language.
7506 Also do not use multiline regexps, which is the job of
7507 regex_tag_multiline. */
7508 if ((rp->lang != NULL && rp->lang != fdhead->lang)
7509 || rp->multi_line)
7510 continue;
7512 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
7513 switch (match)
7515 case -2:
7516 /* Some error. */
7517 if (!rp->error_signaled)
7519 error ("regexp stack overflow while matching \"%s\"",
7520 rp->pattern);
7521 rp->error_signaled = true;
7523 break;
7524 case -1:
7525 /* No match. */
7526 break;
7527 case 0:
7528 /* Empty string matched. */
7529 if (!rp->error_signaled)
7531 error ("regexp matches the empty string: \"%s\"", rp->pattern);
7532 rp->error_signaled = true;
7534 break;
7535 default:
7536 /* Match occurred. Construct a tag. */
7537 name = rp->name;
7538 if (name[0] == '\0')
7539 name = NULL;
7540 else /* make a named tag */
7541 name = substitute (lbp->buffer, rp->name, &rp->regs);
7543 /* Force explicit tag name, if a name is there. */
7544 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
7546 if (debug)
7547 fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n",
7548 name ? name : "(unnamed)", curfdp->taggedfname,
7549 lineno, lbp->buffer);
7550 break;
7558 * Return a pointer to a space of size strlen(cp)+1 allocated
7559 * with xmalloc where the string CP has been copied.
7561 static char *
7562 savestr (const char *cp)
7564 return savenstr (cp, strlen (cp));
7568 * Return a pointer to a space of size LEN+1 allocated with xmalloc
7569 * with a copy of CP (containing LEN bytes) followed by a NUL byte.
7571 static char *
7572 savenstr (const char *cp, ptrdiff_t len)
7574 char *dp = xmalloc (len + 1);
7575 dp[len] = '\0';
7576 return memcpy (dp, cp, len);
7579 /* Skip spaces (end of string is not space), return new pointer. */
7580 static char *
7581 skip_spaces (char *cp)
7583 while (c_isspace (*cp))
7584 cp++;
7585 return cp;
7588 /* Skip non spaces, except end of string, return new pointer. */
7589 static char *
7590 skip_non_spaces (char *cp)
7592 while (*cp != '\0' && !c_isspace (*cp))
7593 cp++;
7594 return cp;
7597 /* Skip any chars in the "name" class.*/
7598 static char *
7599 skip_name (char *cp)
7601 /* '\0' is a notinname() so loop stops there too */
7602 while (! notinname (*cp))
7603 cp++;
7604 return cp;
7607 /* Print error message and exit. */
7608 static void
7609 fatal (char const *format, ...)
7611 va_list ap;
7612 va_start (ap, format);
7613 verror (format, ap);
7614 va_end (ap);
7615 exit (EXIT_FAILURE);
7618 static void
7619 pfatal (const char *s1)
7621 perror (s1);
7622 exit (EXIT_FAILURE);
7625 static void
7626 suggest_asking_for_help (void)
7628 fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
7629 progname);
7630 exit (EXIT_FAILURE);
7633 /* Output a diagnostic with printf-style FORMAT and args. */
7634 static void
7635 error (const char *format, ...)
7637 va_list ap;
7638 va_start (ap, format);
7639 verror (format, ap);
7640 va_end (ap);
7643 static void
7644 verror (char const *format, va_list ap)
7646 fprintf (stderr, "%s: ", progname);
7647 vfprintf (stderr, format, ap);
7648 fprintf (stderr, "\n");
7651 /* Return a newly-allocated string whose contents
7652 concatenate those of s1, s2, s3. */
7653 static char *
7654 concat (const char *s1, const char *s2, const char *s3)
7656 ptrdiff_t len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
7657 char *result = xmalloc (len1 + len2 + len3 + 1);
7658 strcpy (stpcpy (stpcpy (result, s1), s2), s3);
7659 return result;
7663 /* Does the same work as the system V getcwd, but does not need to
7664 guess the buffer size in advance. */
7665 static char *
7666 etags_getcwd (void)
7668 ptrdiff_t bufsize = 200;
7669 char *path = xmalloc (bufsize);
7671 while (getcwd (path, bufsize) == NULL)
7673 if (errno != ERANGE)
7674 pfatal ("getcwd");
7675 free (path);
7676 path = xnmalloc (bufsize, 2 * sizeof *path);
7677 bufsize *= 2;
7680 canonicalize_filename (path);
7681 return path;
7684 /* Return a newly allocated string containing a name of a temporary file. */
7685 static char *
7686 etags_mktmp (void)
7688 const char *tmpdir = getenv ("TMPDIR");
7689 const char *slash = "/";
7691 #if MSDOS || defined (DOS_NT)
7692 if (!tmpdir)
7693 tmpdir = getenv ("TEMP");
7694 if (!tmpdir)
7695 tmpdir = getenv ("TMP");
7696 if (!tmpdir)
7697 tmpdir = ".";
7698 if (tmpdir[strlen (tmpdir) - 1] == '/'
7699 || tmpdir[strlen (tmpdir) - 1] == '\\')
7700 slash = "";
7701 #else
7702 if (!tmpdir)
7703 tmpdir = "/tmp";
7704 if (tmpdir[strlen (tmpdir) - 1] == '/')
7705 slash = "";
7706 #endif
7708 char *templt = concat (tmpdir, slash, "etXXXXXX");
7709 int fd = mkostemp (templt, O_CLOEXEC);
7710 if (fd < 0 || close (fd) != 0)
7712 free (templt);
7713 templt = NULL;
7715 #if defined (DOS_NT)
7716 else
7718 /* The file name will be used in shell redirection, so it needs to have
7719 DOS-style backslashes, or else the Windows shell will barf. */
7720 char *p;
7721 for (p = templt; *p; p++)
7722 if (*p == '/')
7723 *p = '\\';
7725 #endif
7727 return templt;
7730 #if !MSDOS && !defined (DOS_NT)
7732 * Add single quotes around a string, and escape any single quotes.
7733 * Return a newly-allocated string.
7735 * For example:
7736 * escape_shell_arg_string ("test.txt") => "'test.txt'"
7737 * escape_shell_arg_string ("'test.txt") => "''\''test.txt'"
7739 static char *
7740 escape_shell_arg_string (char *str)
7742 char *p = str;
7743 int need_space = 2; /* ' at begin and end */
7745 while (*p != '\0')
7747 if (*p == '\'')
7748 need_space += 4; /* ' to '\'', length is 4 */
7749 else
7750 need_space++;
7752 p++;
7755 char *new_str = xmalloc (need_space + 1);
7756 new_str[0] = '\'';
7757 new_str[need_space-1] = '\'';
7759 int i = 1; /* skip first byte */
7760 p = str;
7761 while (*p != '\0')
7763 new_str[i] = *p;
7764 if (*p == '\'')
7766 new_str[i+1] = '\\';
7767 new_str[i+2] = '\'';
7768 new_str[i+3] = '\'';
7769 i += 3;
7772 i++;
7773 p++;
7776 new_str[need_space] = '\0';
7777 return new_str;
7779 #endif
7781 static void
7782 do_move_file (const char *src_file, const char *dst_file)
7784 if (rename (src_file, dst_file) == 0)
7785 return;
7787 FILE *src_f = fopen (src_file, "rb");
7788 FILE *dst_f = fopen (dst_file, "wb");
7790 if (src_f == NULL)
7791 pfatal (src_file);
7793 if (dst_f == NULL)
7794 pfatal (dst_file);
7796 int c;
7797 while ((c = fgetc (src_f)) != EOF)
7799 if (ferror (src_f))
7800 pfatal (src_file);
7802 if (ferror (dst_f))
7803 pfatal (dst_file);
7805 if (fputc (c, dst_f) == EOF)
7806 pfatal ("cannot write");
7809 if (fclose (src_f) == EOF)
7810 pfatal (src_file);
7812 if (fclose (dst_f) == EOF)
7813 pfatal (dst_file);
7815 if (unlink (src_file) == -1)
7816 pfatal ("unlink error");
7818 return;
7821 /* Return a newly allocated string containing the file name of FILE
7822 relative to the absolute directory DIR (which should end with a slash). */
7823 static char *
7824 relative_filename (char *file, char *dir)
7826 char *fp, *dp, *afn, *res;
7827 ptrdiff_t i;
7828 char *dir_last_slash UNINIT;
7830 /* Find the common root of file and dir (with a trailing slash). */
7831 afn = absolute_filename (file, cwd);
7832 fp = afn;
7833 dp = dir;
7834 while (*fp++ == *dp++)
7835 if (dp[-1] == '/')
7836 dir_last_slash = dp - 1;
7837 #ifdef DOS_NT
7838 if (fp - 1 == afn && afn[0] != '/')
7839 return afn; /* Cannot build a relative name. */
7840 #endif
7841 fp -= dp - dir_last_slash;
7842 dp = dir_last_slash;
7844 /* Build a sequence of "../" strings for the resulting relative file name. */
7845 i = 0;
7846 while ((dp = strchr (dp + 1, '/')) != NULL)
7847 i += 1;
7848 res = xmalloc (3*i + strlen (fp + 1) + 1);
7849 char *z = res;
7850 while (i-- > 0)
7851 z = stpcpy (z, "../");
7853 /* Add the file name relative to the common root of file and dir. */
7854 strcpy (z, fp + 1);
7855 free (afn);
7857 return res;
7860 /* Return a newly allocated string containing the absolute file name
7861 of FILE given DIR (which should end with a slash). */
7862 static char *
7863 absolute_filename (char *file, char *dir)
7865 char *slashp, *cp, *res;
7867 if (filename_is_absolute (file))
7868 res = savestr (file);
7869 #ifdef DOS_NT
7870 /* We don't support non-absolute file names with a drive
7871 letter, like `d:NAME' (it's too much hassle). */
7872 else if (file[1] == ':')
7873 fatal ("%s: relative file names with drive letters not supported", file);
7874 #endif
7875 else
7876 res = concat (dir, file, "");
7878 /* Delete the "/dirname/.." and "/." substrings. */
7879 slashp = strchr (res, '/');
7880 while (slashp != NULL && slashp[0] != '\0')
7882 if (slashp[1] == '.')
7884 if (slashp[2] == '.'
7885 && (slashp[3] == '/' || slashp[3] == '\0'))
7887 cp = slashp;
7889 cp--;
7890 while (cp >= res && !filename_is_absolute (cp));
7891 if (cp < res)
7892 cp = slashp; /* the absolute name begins with "/.." */
7893 #ifdef DOS_NT
7894 /* Under MSDOS and NT we get `d:/NAME' as absolute
7895 file name, so the luser could say `d:/../NAME'.
7896 We silently treat this as `d:/NAME'. */
7897 else if (cp[0] != '/')
7898 cp = slashp;
7899 #endif
7900 memmove (cp, slashp + 3, strlen (slashp + 2));
7901 slashp = cp;
7902 continue;
7904 else if (slashp[2] == '/' || slashp[2] == '\0')
7906 memmove (slashp, slashp + 2, strlen (slashp + 1));
7907 continue;
7911 slashp = strchr (slashp + 1, '/');
7914 if (res[0] == '\0') /* just a safety net: should never happen */
7916 free (res);
7917 return savestr ("/");
7919 else
7920 return res;
7923 /* Return a newly allocated string containing the absolute
7924 file name of dir where FILE resides given DIR (which should
7925 end with a slash). */
7926 static char *
7927 absolute_dirname (char *file, char *dir)
7929 char *slashp, *res;
7930 char save;
7932 slashp = strrchr (file, '/');
7933 if (slashp == NULL)
7934 return savestr (dir);
7935 save = slashp[1];
7936 slashp[1] = '\0';
7937 res = absolute_filename (file, dir);
7938 slashp[1] = save;
7940 return res;
7943 /* Whether the argument string is an absolute file name. The argument
7944 string must have been canonicalized with canonicalize_filename. */
7945 static bool
7946 filename_is_absolute (char *fn)
7948 return (fn[0] == '/'
7949 #ifdef DOS_NT
7950 || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
7951 #endif
7955 /* Downcase DOS drive letter and collapse separators into single slashes.
7956 Works in place. */
7957 static void
7958 canonicalize_filename (register char *fn)
7960 register char* cp;
7962 #ifdef DOS_NT
7963 /* Canonicalize drive letter case. */
7964 if (c_isupper (fn[0]) && fn[1] == ':')
7965 fn[0] = c_tolower (fn[0]);
7967 /* Collapse multiple forward- and back-slashes into a single forward
7968 slash. */
7969 for (cp = fn; *cp != '\0'; cp++, fn++)
7970 if (*cp == '/' || *cp == '\\')
7972 *fn = '/';
7973 while (cp[1] == '/' || cp[1] == '\\')
7974 cp++;
7976 else
7977 *fn = *cp;
7979 #else /* !DOS_NT */
7981 /* Collapse multiple slashes into a single slash. */
7982 for (cp = fn; *cp != '\0'; cp++, fn++)
7983 if (*cp == '/')
7985 *fn = '/';
7986 while (cp[1] == '/')
7987 cp++;
7989 else
7990 *fn = *cp;
7992 #endif /* !DOS_NT */
7994 *fn = '\0';
7998 /* Initialize a linebuffer for use. */
7999 static void
8000 linebuffer_init (linebuffer *lbp)
8002 lbp->size = (DEBUG) ? 3 : 200;
8003 lbp->buffer = xmalloc (lbp->size);
8004 lbp->buffer[0] = '\0';
8005 lbp->len = 0;
8008 /* Set the minimum size of a string contained in a linebuffer. */
8009 static void
8010 linebuffer_setlen (linebuffer *lbp, ptrdiff_t toksize)
8012 if (lbp->size <= toksize)
8014 ptrdiff_t multiplier = toksize / lbp->size + 1;
8015 xrnew (lbp->buffer, lbp->size, multiplier);
8016 lbp->size *= multiplier;
8018 lbp->len = toksize;
8021 /* Memory allocators with a fatal error if memory is exhausted. */
8023 static void
8024 memory_full (void)
8026 fatal ("virtual memory exhausted");
8029 static void *
8030 xmalloc (ptrdiff_t size)
8032 if (SIZE_MAX < size)
8033 memory_full ();
8034 void *result = malloc (size);
8035 if (result == NULL)
8036 memory_full ();
8037 return result;
8040 static void *
8041 xnmalloc (ptrdiff_t nitems, ptrdiff_t item_size)
8043 ptrdiff_t nbytes;
8044 assume (0 <= nitems);
8045 assume (0 < item_size);
8046 if (ckd_mul (&nbytes, nitems, item_size))
8047 memory_full ();
8048 return xmalloc (nbytes);
8051 static void *
8052 xnrealloc (void *pa, ptrdiff_t nitems, ptrdiff_t item_size)
8054 ptrdiff_t nbytes;
8055 assume (0 <= nitems);
8056 assume (0 < item_size);
8057 if (ckd_mul (&nbytes, nitems, item_size) || SIZE_MAX < nbytes)
8058 memory_full ();
8059 void *result = realloc (pa, nbytes);
8060 if (!result)
8061 memory_full ();
8062 return result;
8066 * Local Variables:
8067 * indent-tabs-mode: t
8068 * tab-width: 8
8069 * fill-column: 79
8070 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
8071 * c-file-style: "gnu"
8072 * End:
8075 /* etags.c ends here */