Avoid crashes due to invalid error forms from sentinels/filters
[emacs.git] / lib-src / etags.c
blob4000f47a414b2e9e4a1c23dab9ef8406667f0d82
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2017 Free Software
32 Foundation, Inc.
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or (at
39 your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <https://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
72 starting in 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
83 #ifdef DEBUG
84 # undef DEBUG
85 # define DEBUG true
86 #else
87 # define DEBUG false
88 # define NDEBUG /* disable assert */
89 #endif
91 #include <config.h>
93 /* WIN32_NATIVE is for XEmacs.
94 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
95 #ifdef WIN32_NATIVE
96 # undef MSDOS
97 # undef WINDOWSNT
98 # define WINDOWSNT
99 #endif /* WIN32_NATIVE */
101 #ifdef MSDOS
102 # undef MSDOS
103 # define MSDOS true
104 # include <sys/param.h>
105 #else
106 # define MSDOS false
107 #endif /* MSDOS */
109 #ifdef WINDOWSNT
110 # include <direct.h>
111 # undef HAVE_NTGUI
112 # undef DOS_NT
113 # define DOS_NT
114 /* The WINDOWSNT build doesn't use Gnulib's fcntl.h. */
115 # define O_CLOEXEC O_NOINHERIT
116 #endif /* WINDOWSNT */
118 #include <limits.h>
119 #include <unistd.h>
120 #include <stdarg.h>
121 #include <stdlib.h>
122 #include <string.h>
123 #include <sysstdio.h>
124 #include <errno.h>
125 #include <fcntl.h>
126 #include <binary-io.h>
127 #include <unlocked-io.h>
128 #include <c-ctype.h>
129 #include <c-strcase.h>
131 #include <assert.h>
132 #ifdef NDEBUG
133 # undef assert /* some systems have a buggy assert.h */
134 # define assert(x) ((void) 0)
135 #endif
137 #include <getopt.h>
138 #include <regex.h>
140 /* Define CTAGS to make the program "ctags" compatible with the usual one.
141 Leave it undefined to make the program "etags", which makes emacs-style
142 tag tables and tags typedefs, #defines and struct/union/enum by default. */
143 #ifdef CTAGS
144 # undef CTAGS
145 # define CTAGS true
146 #else
147 # define CTAGS false
148 #endif
150 static bool
151 streq (char const *s, char const *t)
153 return strcmp (s, t) == 0;
156 static bool
157 strcaseeq (char const *s, char const *t)
159 return c_strcasecmp (s, t) == 0;
162 static bool
163 strneq (char const *s, char const *t, size_t n)
165 return strncmp (s, t, n) == 0;
168 static bool
169 strncaseeq (char const *s, char const *t, size_t n)
171 return c_strncasecmp (s, t, n) == 0;
174 /* C is not in a name. */
175 static bool
176 notinname (unsigned char c)
178 /* Look at make_tag before modifying! */
179 static bool const table[UCHAR_MAX + 1] = {
180 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
181 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
183 return table[c];
186 /* C can start a token. */
187 static bool
188 begtoken (unsigned char c)
190 static bool const table[UCHAR_MAX + 1] = {
191 ['$']=1, ['@']=1,
192 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
193 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
194 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
195 ['Y']=1, ['Z']=1,
196 ['_']=1,
197 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
198 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
199 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
200 ['y']=1, ['z']=1,
201 ['~']=1
203 return table[c];
206 /* C can be in the middle of a token. */
207 static bool
208 intoken (unsigned char c)
210 static bool const table[UCHAR_MAX + 1] = {
211 ['$']=1,
212 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
213 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
214 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
215 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
216 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
217 ['Y']=1, ['Z']=1,
218 ['_']=1,
219 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
220 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
221 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
222 ['y']=1, ['z']=1
224 return table[c];
227 /* C can end a token. */
228 static bool
229 endtoken (unsigned char c)
231 static bool const table[UCHAR_MAX + 1] = {
232 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
233 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
234 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
235 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
236 ['{']=1, ['|']=1, ['}']=1, ['~']=1
238 return table[c];
242 * xnew, xrnew -- allocate, reallocate storage
244 * SYNOPSIS: Type *xnew (int n, Type);
245 * void xrnew (OldPointer, int n, Type);
247 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
248 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
250 typedef void Lang_function (FILE *);
252 typedef struct
254 const char *suffix; /* file name suffix for this compressor */
255 const char *command; /* takes one arg and decompresses to stdout */
256 } compressor;
258 typedef struct
260 const char *name; /* language name */
261 const char *help; /* detailed help for the language */
262 Lang_function *function; /* parse function */
263 const char **suffixes; /* name suffixes of this language's files */
264 const char **filenames; /* names of this language's files */
265 const char **interpreters; /* interpreters for this language */
266 bool metasource; /* source used to generate other sources */
267 } language;
269 typedef struct fdesc
271 struct fdesc *next; /* for the linked list */
272 char *infname; /* uncompressed input file name */
273 char *infabsname; /* absolute uncompressed input file name */
274 char *infabsdir; /* absolute dir of input file */
275 char *taggedfname; /* file name to write in tagfile */
276 language *lang; /* language of file */
277 char *prop; /* file properties to write in tagfile */
278 bool usecharno; /* etags tags shall contain char number */
279 bool written; /* entry written in the tags file */
280 } fdesc;
282 typedef struct node_st
283 { /* sorting structure */
284 struct node_st *left, *right; /* left and right sons */
285 fdesc *fdp; /* description of file to whom tag belongs */
286 char *name; /* tag name */
287 char *regex; /* search regexp */
288 bool valid; /* write this tag on the tag file */
289 bool is_func; /* function tag: use regexp in CTAGS mode */
290 bool been_warned; /* warning already given for duplicated tag */
291 int lno; /* line number tag is on */
292 long cno; /* character number line starts on */
293 } node;
296 * A `linebuffer' is a structure which holds a line of text.
297 * `readline_internal' reads a line from a stream into a linebuffer
298 * and works regardless of the length of the line.
299 * SIZE is the size of BUFFER, LEN is the length of the string in
300 * BUFFER after readline reads it.
302 typedef struct
304 long size;
305 int len;
306 char *buffer;
307 } linebuffer;
309 /* Used to support mixing of --lang and file names. */
310 typedef struct
312 enum {
313 at_language, /* a language specification */
314 at_regexp, /* a regular expression */
315 at_filename, /* a file name */
316 at_stdin, /* read from stdin here */
317 at_end /* stop parsing the list */
318 } arg_type; /* argument type */
319 language *lang; /* language associated with the argument */
320 char *what; /* the argument itself */
321 } argument;
323 /* Structure defining a regular expression. */
324 typedef struct regexp
326 struct regexp *p_next; /* pointer to next in list */
327 language *lang; /* if set, use only for this language */
328 char *pattern; /* the regexp pattern */
329 char *name; /* tag name */
330 struct re_pattern_buffer *pat; /* the compiled pattern */
331 struct re_registers regs; /* re registers */
332 bool error_signaled; /* already signaled for this regexp */
333 bool force_explicit_name; /* do not allow implicit tag name */
334 bool ignore_case; /* ignore case when matching */
335 bool multi_line; /* do a multi-line match on the whole file */
336 } regexp;
339 /* Many compilers barf on this:
340 Lang_function Ada_funcs;
341 so let's write it this way */
342 static void Ada_funcs (FILE *);
343 static void Asm_labels (FILE *);
344 static void C_entries (int c_ext, FILE *);
345 static void default_C_entries (FILE *);
346 static void plain_C_entries (FILE *);
347 static void Cjava_entries (FILE *);
348 static void Cobol_paragraphs (FILE *);
349 static void Cplusplus_entries (FILE *);
350 static void Cstar_entries (FILE *);
351 static void Erlang_functions (FILE *);
352 static void Forth_words (FILE *);
353 static void Fortran_functions (FILE *);
354 static void Go_functions (FILE *);
355 static void HTML_labels (FILE *);
356 static void Lisp_functions (FILE *);
357 static void Lua_functions (FILE *);
358 static void Makefile_targets (FILE *);
359 static void Pascal_functions (FILE *);
360 static void Perl_functions (FILE *);
361 static void PHP_functions (FILE *);
362 static void PS_functions (FILE *);
363 static void Prolog_functions (FILE *);
364 static void Python_functions (FILE *);
365 static void Ruby_functions (FILE *);
366 static void Scheme_functions (FILE *);
367 static void TeX_commands (FILE *);
368 static void Texinfo_nodes (FILE *);
369 static void Yacc_entries (FILE *);
370 static void just_read_file (FILE *);
372 static language *get_language_from_langname (const char *);
373 static void readline (linebuffer *, FILE *);
374 static long readline_internal (linebuffer *, FILE *, char const *);
375 static bool nocase_tail (const char *);
376 static void get_tag (char *, char **);
377 static void get_lispy_tag (char *);
379 static void analyze_regex (char *);
380 static void free_regexps (void);
381 static void regex_tag_multiline (void);
382 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
383 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
384 static _Noreturn void suggest_asking_for_help (void);
385 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
386 static _Noreturn void pfatal (const char *);
387 static void add_node (node *, node **);
389 static void process_file_name (char *, language *);
390 static void process_file (FILE *, char *, language *);
391 static void find_entries (FILE *);
392 static void free_tree (node *);
393 static void free_fdesc (fdesc *);
394 static void pfnote (char *, bool, char *, int, int, long);
395 static void invalidate_nodes (fdesc *, node **);
396 static void put_entries (node *);
398 static char *concat (const char *, const char *, const char *);
399 static char *skip_spaces (char *);
400 static char *skip_non_spaces (char *);
401 static char *skip_name (char *);
402 static char *savenstr (const char *, int);
403 static char *savestr (const char *);
404 static char *etags_getcwd (void);
405 static char *relative_filename (char *, char *);
406 static char *absolute_filename (char *, char *);
407 static char *absolute_dirname (char *, char *);
408 static bool filename_is_absolute (char *f);
409 static void canonicalize_filename (char *);
410 static char *etags_mktmp (void);
411 static void linebuffer_init (linebuffer *);
412 static void linebuffer_setlen (linebuffer *, int);
413 static void *xmalloc (size_t);
414 static void *xrealloc (void *, size_t);
417 static char searchar = '/'; /* use /.../ searches */
419 static char *tagfile; /* output file */
420 static char *progname; /* name this program was invoked with */
421 static char *cwd; /* current working directory */
422 static char *tagfiledir; /* directory of tagfile */
423 static FILE *tagf; /* ioptr for tags file */
424 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
426 static fdesc *fdhead; /* head of file description list */
427 static fdesc *curfdp; /* current file description */
428 static char *infilename; /* current input file name */
429 static int lineno; /* line number of current line */
430 static long charno; /* current character number */
431 static long linecharno; /* charno of start of current line */
432 static char *dbp; /* pointer to start of current tag */
434 static const int invalidcharno = -1;
436 static node *nodehead; /* the head of the binary tree of tags */
437 static node *last_node; /* the last node created */
439 static linebuffer lb; /* the current line */
440 static linebuffer filebuf; /* a buffer containing the whole file */
441 static linebuffer token_name; /* a buffer containing a tag name */
443 static bool append_to_tagfile; /* -a: append to tags */
444 /* The next five default to true in C and derived languages. */
445 static bool typedefs; /* -t: create tags for C and Ada typedefs */
446 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
447 /* 0 struct/enum/union decls, and C++ */
448 /* member functions. */
449 static bool constantypedefs; /* -d: create tags for C #define, enum */
450 /* constants and variables. */
451 /* -D: opposite of -d. Default under ctags. */
452 static int globals; /* create tags for global variables */
453 static int members; /* create tags for C member variables */
454 static int declarations; /* --declarations: tag them and extern in C&Co*/
455 static int no_line_directive; /* ignore #line directives (undocumented) */
456 static int no_duplicates; /* no duplicate tags for ctags (undocumented) */
457 static bool update; /* -u: update tags */
458 static bool vgrind_style; /* -v: create vgrind style index output */
459 static bool no_warnings; /* -w: suppress warnings (undocumented) */
460 static bool cxref_style; /* -x: create cxref style output */
461 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
462 static bool ignoreindent; /* -I: ignore indentation in C */
463 static int packages_only; /* --packages-only: in Ada, only tag packages*/
464 static int class_qualify; /* -Q: produce class-qualified tags in C++/Java */
465 static int debug; /* --debug */
467 /* STDIN is defined in LynxOS system headers */
468 #ifdef STDIN
469 # undef STDIN
470 #endif
472 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
473 static bool parsing_stdin; /* --parse-stdin used */
475 static regexp *p_head; /* list of all regexps */
476 static bool need_filebuf; /* some regexes are multi-line */
478 static struct option longopts[] =
480 { "append", no_argument, NULL, 'a' },
481 { "packages-only", no_argument, &packages_only, 1 },
482 { "c++", no_argument, NULL, 'C' },
483 { "debug", no_argument, &debug, 1 },
484 { "declarations", no_argument, &declarations, 1 },
485 { "no-line-directive", no_argument, &no_line_directive, 1 },
486 { "no-duplicates", no_argument, &no_duplicates, 1 },
487 { "help", no_argument, NULL, 'h' },
488 { "help", no_argument, NULL, 'H' },
489 { "ignore-indentation", no_argument, NULL, 'I' },
490 { "language", required_argument, NULL, 'l' },
491 { "members", no_argument, &members, 1 },
492 { "no-members", no_argument, &members, 0 },
493 { "output", required_argument, NULL, 'o' },
494 { "class-qualify", no_argument, &class_qualify, 'Q' },
495 { "regex", required_argument, NULL, 'r' },
496 { "no-regex", no_argument, NULL, 'R' },
497 { "ignore-case-regex", required_argument, NULL, 'c' },
498 { "parse-stdin", required_argument, NULL, STDIN },
499 { "version", no_argument, NULL, 'V' },
501 #if CTAGS /* Ctags options */
502 { "backward-search", no_argument, NULL, 'B' },
503 { "cxref", no_argument, NULL, 'x' },
504 { "defines", no_argument, NULL, 'd' },
505 { "globals", no_argument, &globals, 1 },
506 { "typedefs", no_argument, NULL, 't' },
507 { "typedefs-and-c++", no_argument, NULL, 'T' },
508 { "update", no_argument, NULL, 'u' },
509 { "vgrind", no_argument, NULL, 'v' },
510 { "no-warn", no_argument, NULL, 'w' },
512 #else /* Etags options */
513 { "no-defines", no_argument, NULL, 'D' },
514 { "no-globals", no_argument, &globals, 0 },
515 { "include", required_argument, NULL, 'i' },
516 #endif
517 { NULL }
520 static compressor compressors[] =
522 { "z", "gzip -d -c"},
523 { "Z", "gzip -d -c"},
524 { "gz", "gzip -d -c"},
525 { "GZ", "gzip -d -c"},
526 { "bz2", "bzip2 -d -c" },
527 { "xz", "xz -d -c" },
528 { NULL }
532 * Language stuff.
535 /* Ada code */
536 static const char *Ada_suffixes [] =
537 { "ads", "adb", "ada", NULL };
538 static const char Ada_help [] =
539 "In Ada code, functions, procedures, packages, tasks and types are\n\
540 tags. Use the '--packages-only' option to create tags for\n\
541 packages only.\n\
542 Ada tag names have suffixes indicating the type of entity:\n\
543 Entity type: Qualifier:\n\
544 ------------ ----------\n\
545 function /f\n\
546 procedure /p\n\
547 package spec /s\n\
548 package body /b\n\
549 type /t\n\
550 task /k\n\
551 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
552 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
553 will just search for any tag 'bidule'.";
555 /* Assembly code */
556 static const char *Asm_suffixes [] =
557 { "a", /* Unix assembler */
558 "asm", /* Microcontroller assembly */
559 "def", /* BSO/Tasking definition includes */
560 "inc", /* Microcontroller include files */
561 "ins", /* Microcontroller include files */
562 "s", "sa", /* Unix assembler */
563 "S", /* cpp-processed Unix assembler */
564 "src", /* BSO/Tasking C compiler output */
565 NULL
567 static const char Asm_help [] =
568 "In assembler code, labels appearing at the beginning of a line,\n\
569 followed by a colon, are tags.";
572 /* Note that .c and .h can be considered C++, if the --c++ flag was
573 given, or if the `class' or `template' keywords are met inside the file.
574 That is why default_C_entries is called for these. */
575 static const char *default_C_suffixes [] =
576 { "c", "h", NULL };
577 #if CTAGS /* C help for Ctags */
578 static const char default_C_help [] =
579 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
580 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
581 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
582 Use --globals to tag global variables.\n\
583 You can tag function declarations and external variables by\n\
584 using '--declarations', and struct members by using '--members'.";
585 #else /* C help for Etags */
586 static const char default_C_help [] =
587 "In C code, any C function or typedef is a tag, and so are\n\
588 definitions of 'struct', 'union' and 'enum'. '#define' macro\n\
589 definitions and 'enum' constants are tags unless you specify\n\
590 '--no-defines'. Global variables are tags unless you specify\n\
591 '--no-globals' and so are struct members unless you specify\n\
592 '--no-members'. Use of '--no-globals', '--no-defines' and\n\
593 '--no-members' can make the tags table file much smaller.\n\
594 You can tag function declarations and external variables by\n\
595 using '--declarations'.";
596 #endif /* C help for Ctags and Etags */
598 static const char *Cplusplus_suffixes [] =
599 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
600 "M", /* Objective C++ */
601 "pdb", /* PostScript with C syntax */
602 NULL };
603 static const char Cplusplus_help [] =
604 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
605 --help --lang=c --lang=c++ for full help.)\n\
606 In addition to C tags, member functions are also recognized. Member\n\
607 variables are recognized unless you use the '--no-members' option.\n\
608 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
609 and 'CLASS::FUNCTION'. 'operator' definitions have tag names like\n\
610 'operator+'.";
612 static const char *Cjava_suffixes [] =
613 { "java", NULL };
614 static char Cjava_help [] =
615 "In Java code, all the tags constructs of C and C++ code are\n\
616 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
619 static const char *Cobol_suffixes [] =
620 { "COB", "cob", NULL };
621 static char Cobol_help [] =
622 "In Cobol code, tags are paragraph names; that is, any word\n\
623 starting in column 8 and followed by a period.";
625 static const char *Cstar_suffixes [] =
626 { "cs", "hs", NULL };
628 static const char *Erlang_suffixes [] =
629 { "erl", "hrl", NULL };
630 static const char Erlang_help [] =
631 "In Erlang code, the tags are the functions, records and macros\n\
632 defined in the file.";
634 const char *Forth_suffixes [] =
635 { "fth", "tok", NULL };
636 static const char Forth_help [] =
637 "In Forth code, tags are words defined by ':',\n\
638 constant, code, create, defer, value, variable, buffer:, field.";
640 static const char *Fortran_suffixes [] =
641 { "F", "f", "f90", "for", NULL };
642 static const char Fortran_help [] =
643 "In Fortran code, functions, subroutines and block data are tags.";
645 static const char *Go_suffixes [] = {"go", NULL};
646 static const char Go_help [] =
647 "In Go code, functions, interfaces and packages are tags.";
649 static const char *HTML_suffixes [] =
650 { "htm", "html", "shtml", NULL };
651 static const char HTML_help [] =
652 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
653 'h3' headers. Also, tags are 'name=' in anchors and all\n\
654 occurrences of 'id='.";
656 static const char *Lisp_suffixes [] =
657 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
658 static const char Lisp_help [] =
659 "In Lisp code, any function defined with 'defun', any variable\n\
660 defined with 'defvar' or 'defconst', and in general the first\n\
661 argument of any expression that starts with '(def' in column zero\n\
662 is a tag.\n\
663 The '--declarations' option tags \"(defvar foo)\" constructs too.";
665 static const char *Lua_suffixes [] =
666 { "lua", "LUA", NULL };
667 static const char Lua_help [] =
668 "In Lua scripts, all functions are tags.";
670 static const char *Makefile_filenames [] =
671 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
672 static const char Makefile_help [] =
673 "In makefiles, targets are tags; additionally, variables are tags\n\
674 unless you specify '--no-globals'.";
676 static const char *Objc_suffixes [] =
677 { "lm", /* Objective lex file */
678 "m", /* Objective C file */
679 NULL };
680 static const char Objc_help [] =
681 "In Objective C code, tags include Objective C definitions for classes,\n\
682 class categories, methods and protocols. Tags for variables and\n\
683 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\
684 \n(Use --help --lang=c --lang=objc --lang=java for full help.)";
686 static const char *Pascal_suffixes [] =
687 { "p", "pas", NULL };
688 static const char Pascal_help [] =
689 "In Pascal code, the tags are the functions and procedures defined\n\
690 in the file.";
691 /* " // this is for working around an Emacs highlighting bug... */
693 static const char *Perl_suffixes [] =
694 { "pl", "pm", NULL };
695 static const char *Perl_interpreters [] =
696 { "perl", "@PERL@", NULL };
697 static const char Perl_help [] =
698 "In Perl code, the tags are the packages, subroutines and variables\n\
699 defined by the 'package', 'sub', 'my' and 'local' keywords. Use\n\
700 '--globals' if you want to tag global variables. Tags for\n\
701 subroutines are named 'PACKAGE::SUB'. The name for subroutines\n\
702 defined in the default package is 'main::SUB'.";
704 static const char *PHP_suffixes [] =
705 { "php", "php3", "php4", NULL };
706 static const char PHP_help [] =
707 "In PHP code, tags are functions, classes and defines. Unless you use\n\
708 the '--no-members' option, vars are tags too.";
710 static const char *plain_C_suffixes [] =
711 { "pc", /* Pro*C file */
712 NULL };
714 static const char *PS_suffixes [] =
715 { "ps", "psw", NULL }; /* .psw is for PSWrap */
716 static const char PS_help [] =
717 "In PostScript code, the tags are the functions.";
719 static const char *Prolog_suffixes [] =
720 { "prolog", NULL };
721 static const char Prolog_help [] =
722 "In Prolog code, tags are predicates and rules at the beginning of\n\
723 line.";
725 static const char *Python_suffixes [] =
726 { "py", NULL };
727 static const char Python_help [] =
728 "In Python code, 'def' or 'class' at the beginning of a line\n\
729 generate a tag.";
731 static const char *Ruby_suffixes [] =
732 { "rb", "ru", "rbw", NULL };
733 static const char *Ruby_filenames [] =
734 { "Rakefile", "Thorfile", NULL };
735 static const char Ruby_help [] =
736 "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
737 a line generate a tag. Constants also generate a tag.";
739 /* Can't do the `SCM' or `scm' prefix with a version number. */
740 static const char *Scheme_suffixes [] =
741 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
742 static const char Scheme_help [] =
743 "In Scheme code, tags include anything defined with 'def' or with a\n\
744 construct whose name starts with 'def'. They also include\n\
745 variables set with 'set!' at top level in the file.";
747 static const char *TeX_suffixes [] =
748 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
749 static const char TeX_help [] =
750 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
751 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
752 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
753 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
754 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
756 Other commands can be specified by setting the environment variable\n\
757 'TEXTAGS' to a colon-separated list like, for example,\n\
758 TEXTAGS=\"mycommand:myothercommand\".";
761 static const char *Texinfo_suffixes [] =
762 { "texi", "texinfo", "txi", NULL };
763 static const char Texinfo_help [] =
764 "for texinfo files, lines starting with @node are tagged.";
766 static const char *Yacc_suffixes [] =
767 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
768 static const char Yacc_help [] =
769 "In Bison or Yacc input files, each rule defines as a tag the\n\
770 nonterminal it constructs. The portions of the file that contain\n\
771 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
772 for full help).";
774 static const char auto_help [] =
775 "'auto' is not a real language, it indicates to use\n\
776 a default language for files base on file name suffix and file contents.";
778 static const char none_help [] =
779 "'none' is not a real language, it indicates to only do\n\
780 regexp processing on files.";
782 static const char no_lang_help [] =
783 "No detailed help available for this language.";
787 * Table of languages.
789 * It is ok for a given function to be listed under more than one
790 * name. I just didn't.
793 static language lang_names [] =
795 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
796 { "asm", Asm_help, Asm_labels, Asm_suffixes },
797 { "c", default_C_help, default_C_entries, default_C_suffixes },
798 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
799 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
800 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
801 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
802 { "forth", Forth_help, Forth_words, Forth_suffixes },
803 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
804 { "go", Go_help, Go_functions, Go_suffixes },
805 { "html", HTML_help, HTML_labels, HTML_suffixes },
806 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
807 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
808 { "lua", Lua_help, Lua_functions, Lua_suffixes },
809 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
810 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
811 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
812 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
813 { "php", PHP_help, PHP_functions, PHP_suffixes },
814 { "postscript",PS_help, PS_functions, PS_suffixes },
815 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
816 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
817 { "python", Python_help, Python_functions, Python_suffixes },
818 { "ruby", Ruby_help,Ruby_functions,Ruby_suffixes,Ruby_filenames },
819 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
820 { "tex", TeX_help, TeX_commands, TeX_suffixes },
821 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
822 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
823 { "auto", auto_help }, /* default guessing scheme */
824 { "none", none_help, just_read_file }, /* regexp matching only */
825 { NULL } /* end of list */
829 static void
830 print_language_names (void)
832 language *lang;
833 const char **name, **ext;
835 puts ("\nThese are the currently supported languages, along with the\n\
836 default file names and dot suffixes:");
837 for (lang = lang_names; lang->name != NULL; lang++)
839 printf (" %-*s", 10, lang->name);
840 if (lang->filenames != NULL)
841 for (name = lang->filenames; *name != NULL; name++)
842 printf (" %s", *name);
843 if (lang->suffixes != NULL)
844 for (ext = lang->suffixes; *ext != NULL; ext++)
845 printf (" .%s", *ext);
846 puts ("");
848 puts ("where 'auto' means use default language for files based on file\n\
849 name suffix, and 'none' means only do regexp processing on files.\n\
850 If no language is specified and no matching suffix is found,\n\
851 the first line of the file is read for a sharp-bang (#!) sequence\n\
852 followed by the name of an interpreter. If no such sequence is found,\n\
853 Fortran is tried first; if no tags are found, C is tried next.\n\
854 When parsing any C file, a \"class\" or \"template\" keyword\n\
855 switches to C++.");
856 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
858 For detailed help on a given language use, for example,\n\
859 etags --help --lang=ada.");
862 #ifndef EMACS_NAME
863 # define EMACS_NAME "standalone"
864 #endif
865 #ifndef VERSION
866 # define VERSION "17.38.1.4"
867 #endif
868 static _Noreturn void
869 print_version (void)
871 char emacs_copyright[] = COPYRIGHT;
873 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
874 puts (emacs_copyright);
875 puts ("This program is distributed under the terms in ETAGS.README");
877 exit (EXIT_SUCCESS);
880 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
881 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
882 #endif
884 static _Noreturn void
885 print_help (argument *argbuffer)
887 bool help_for_lang = false;
889 for (; argbuffer->arg_type != at_end; argbuffer++)
890 if (argbuffer->arg_type == at_language)
892 if (help_for_lang)
893 puts ("");
894 puts (argbuffer->lang->help);
895 help_for_lang = true;
898 if (help_for_lang)
899 exit (EXIT_SUCCESS);
901 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
903 These are the options accepted by %s.\n", progname, progname);
904 puts ("You may use unambiguous abbreviations for the long option names.");
905 puts (" A - as file name means read names from stdin (one per line).\n\
906 Absolute names are stored in the output file as they are.\n\
907 Relative ones are stored relative to the output file's directory.\n");
909 puts ("-a, --append\n\
910 Append tag entries to existing tags file.");
912 puts ("--packages-only\n\
913 For Ada files, only generate tags for packages.");
915 if (CTAGS)
916 puts ("-B, --backward-search\n\
917 Write the search commands for the tag entries using '?', the\n\
918 backward-search command instead of '/', the forward-search command.");
920 /* This option is mostly obsolete, because etags can now automatically
921 detect C++. Retained for backward compatibility and for debugging and
922 experimentation. In principle, we could want to tag as C++ even
923 before any "class" or "template" keyword.
924 puts ("-C, --c++\n\
925 Treat files whose name suffix defaults to C language as C++ files.");
928 puts ("--declarations\n\
929 In C and derived languages, create tags for function declarations,");
930 if (CTAGS)
931 puts ("\tand create tags for extern variables if --globals is used.");
932 else
933 puts
934 ("\tand create tags for extern variables unless --no-globals is used.");
936 if (CTAGS)
937 puts ("-d, --defines\n\
938 Create tag entries for C #define constants and enum constants, too.");
939 else
940 puts ("-D, --no-defines\n\
941 Don't create tag entries for C #define constants and enum constants.\n\
942 This makes the tags file smaller.");
944 if (!CTAGS)
945 puts ("-i FILE, --include=FILE\n\
946 Include a note in tag file indicating that, when searching for\n\
947 a tag, one should also consult the tags file FILE after\n\
948 checking the current file.");
950 puts ("-l LANG, --language=LANG\n\
951 Force the following files to be considered as written in the\n\
952 named language up to the next --language=LANG option.");
954 if (CTAGS)
955 puts ("--globals\n\
956 Create tag entries for global variables in some languages.");
957 else
958 puts ("--no-globals\n\
959 Do not create tag entries for global variables in some\n\
960 languages. This makes the tags file smaller.");
962 puts ("--no-line-directive\n\
963 Ignore #line preprocessor directives in C and derived languages.");
965 if (CTAGS)
966 puts ("--members\n\
967 Create tag entries for members of structures in some languages.");
968 else
969 puts ("--no-members\n\
970 Do not create tag entries for members of structures\n\
971 in some languages.");
973 puts ("-Q, --class-qualify\n\
974 Qualify tag names with their class name in C++, ObjC, Java, and Perl.\n\
975 This produces tag names of the form \"class::member\" for C++,\n\
976 \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
977 For Objective C, this also produces class methods qualified with\n\
978 their arguments, as in \"foo:bar:baz:more\".\n\
979 For Perl, this produces \"package::member\".");
980 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
981 Make a tag for each line matching a regular expression pattern\n\
982 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
983 files only. REGEXFILE is a file containing one REGEXP per line.\n\
984 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
985 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
986 puts (" If TAGNAME/ is present, the tags created are named.\n\
987 For example Tcl named tags can be created with:\n\
988 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
989 MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
990 'm' means to allow multi-line matches, 's' implies 'm' and\n\
991 causes dot to match any character, including newline.");
993 puts ("-R, --no-regex\n\
994 Don't create tags from regexps for the following files.");
996 puts ("-I, --ignore-indentation\n\
997 In C and C++ do not assume that a closing brace in the first\n\
998 column is the final brace of a function or structure definition.");
1000 puts ("-o FILE, --output=FILE\n\
1001 Write the tags to FILE.");
1003 puts ("--parse-stdin=NAME\n\
1004 Read from standard input and record tags as belonging to file NAME.");
1006 if (CTAGS)
1008 puts ("-t, --typedefs\n\
1009 Generate tag entries for C and Ada typedefs.");
1010 puts ("-T, --typedefs-and-c++\n\
1011 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1012 and C++ member functions.");
1015 if (CTAGS)
1016 puts ("-u, --update\n\
1017 Update the tag entries for the given files, leaving tag\n\
1018 entries for other files in place. Currently, this is\n\
1019 implemented by deleting the existing entries for the given\n\
1020 files and then rewriting the new entries at the end of the\n\
1021 tags file. It is often faster to simply rebuild the entire\n\
1022 tag file than to use this.");
1024 if (CTAGS)
1026 puts ("-v, --vgrind\n\
1027 Print on the standard output an index of items intended for\n\
1028 human consumption, similar to the output of vgrind. The index\n\
1029 is sorted, and gives the page number of each item.");
1031 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1032 puts ("-w, --no-duplicates\n\
1033 Do not create duplicate tag entries, for compatibility with\n\
1034 traditional ctags.");
1036 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1037 puts ("-w, --no-warn\n\
1038 Suppress warning messages about duplicate tag entries.");
1040 puts ("-x, --cxref\n\
1041 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1042 The output uses line numbers instead of page numbers, but\n\
1043 beyond that the differences are cosmetic; try both to see\n\
1044 which you like.");
1047 puts ("-V, --version\n\
1048 Print the version of the program.\n\
1049 -h, --help\n\
1050 Print this help message.\n\
1051 Followed by one or more '--language' options prints detailed\n\
1052 help about tag generation for the specified languages.");
1054 print_language_names ();
1056 puts ("");
1057 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1059 exit (EXIT_SUCCESS);
1064 main (int argc, char **argv)
1066 int i;
1067 unsigned int nincluded_files;
1068 char **included_files;
1069 argument *argbuffer;
1070 int current_arg, file_count;
1071 linebuffer filename_lb;
1072 bool help_asked = false;
1073 ptrdiff_t len;
1074 char *optstring;
1075 int opt;
1077 progname = argv[0];
1078 nincluded_files = 0;
1079 included_files = xnew (argc, char *);
1080 current_arg = 0;
1081 file_count = 0;
1083 /* Allocate enough no matter what happens. Overkill, but each one
1084 is small. */
1085 argbuffer = xnew (argc, argument);
1088 * Always find typedefs and structure tags.
1089 * Also default to find macro constants, enum constants, struct
1090 * members and global variables. Do it for both etags and ctags.
1092 typedefs = typedefs_or_cplusplus = constantypedefs = true;
1093 globals = members = true;
1095 /* When the optstring begins with a '-' getopt_long does not rearrange the
1096 non-options arguments to be at the end, but leaves them alone. */
1097 optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1098 (CTAGS) ? "BxdtTuvw" : "Di:",
1099 "");
1101 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1102 switch (opt)
1104 case 0:
1105 /* If getopt returns 0, then it has already processed a
1106 long-named option. We should do nothing. */
1107 break;
1109 case 1:
1110 /* This means that a file name has been seen. Record it. */
1111 argbuffer[current_arg].arg_type = at_filename;
1112 argbuffer[current_arg].what = optarg;
1113 len = strlen (optarg);
1114 if (whatlen_max < len)
1115 whatlen_max = len;
1116 ++current_arg;
1117 ++file_count;
1118 break;
1120 case STDIN:
1121 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1122 argbuffer[current_arg].arg_type = at_stdin;
1123 argbuffer[current_arg].what = optarg;
1124 len = strlen (optarg);
1125 if (whatlen_max < len)
1126 whatlen_max = len;
1127 ++current_arg;
1128 ++file_count;
1129 if (parsing_stdin)
1130 fatal ("cannot parse standard input more than once");
1131 parsing_stdin = true;
1132 break;
1134 /* Common options. */
1135 case 'a': append_to_tagfile = true; break;
1136 case 'C': cplusplus = true; break;
1137 case 'f': /* for compatibility with old makefiles */
1138 case 'o':
1139 if (tagfile)
1141 error ("-o option may only be given once.");
1142 suggest_asking_for_help ();
1143 /* NOTREACHED */
1145 tagfile = optarg;
1146 break;
1147 case 'I':
1148 case 'S': /* for backward compatibility */
1149 ignoreindent = true;
1150 break;
1151 case 'l':
1153 language *lang = get_language_from_langname (optarg);
1154 if (lang != NULL)
1156 argbuffer[current_arg].lang = lang;
1157 argbuffer[current_arg].arg_type = at_language;
1158 ++current_arg;
1161 break;
1162 case 'c':
1163 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1164 optarg = concat (optarg, "i", ""); /* memory leak here */
1165 FALLTHROUGH;
1166 case 'r':
1167 argbuffer[current_arg].arg_type = at_regexp;
1168 argbuffer[current_arg].what = optarg;
1169 len = strlen (optarg);
1170 if (whatlen_max < len)
1171 whatlen_max = len;
1172 ++current_arg;
1173 break;
1174 case 'R':
1175 argbuffer[current_arg].arg_type = at_regexp;
1176 argbuffer[current_arg].what = NULL;
1177 ++current_arg;
1178 break;
1179 case 'V':
1180 print_version ();
1181 break;
1182 case 'h':
1183 case 'H':
1184 help_asked = true;
1185 break;
1186 case 'Q':
1187 class_qualify = 1;
1188 break;
1190 /* Etags options */
1191 case 'D': constantypedefs = false; break;
1192 case 'i': included_files[nincluded_files++] = optarg; break;
1194 /* Ctags options. */
1195 case 'B': searchar = '?'; break;
1196 case 'd': constantypedefs = true; break;
1197 case 't': typedefs = true; break;
1198 case 'T': typedefs = typedefs_or_cplusplus = true; break;
1199 case 'u': update = true; break;
1200 case 'v': vgrind_style = true; FALLTHROUGH;
1201 case 'x': cxref_style = true; break;
1202 case 'w': no_warnings = true; break;
1203 default:
1204 suggest_asking_for_help ();
1205 /* NOTREACHED */
1208 /* No more options. Store the rest of arguments. */
1209 for (; optind < argc; optind++)
1211 argbuffer[current_arg].arg_type = at_filename;
1212 argbuffer[current_arg].what = argv[optind];
1213 len = strlen (argv[optind]);
1214 if (whatlen_max < len)
1215 whatlen_max = len;
1216 ++current_arg;
1217 ++file_count;
1220 argbuffer[current_arg].arg_type = at_end;
1222 if (help_asked)
1223 print_help (argbuffer);
1224 /* NOTREACHED */
1226 if (nincluded_files == 0 && file_count == 0)
1228 error ("no input files specified.");
1229 suggest_asking_for_help ();
1230 /* NOTREACHED */
1233 if (tagfile == NULL)
1234 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1235 cwd = etags_getcwd (); /* the current working directory */
1236 if (cwd[strlen (cwd) - 1] != '/')
1238 char *oldcwd = cwd;
1239 cwd = concat (oldcwd, "/", "");
1240 free (oldcwd);
1243 /* Compute base directory for relative file names. */
1244 if (streq (tagfile, "-")
1245 || strneq (tagfile, "/dev/", 5))
1246 tagfiledir = cwd; /* relative file names are relative to cwd */
1247 else
1249 canonicalize_filename (tagfile);
1250 tagfiledir = absolute_dirname (tagfile, cwd);
1253 linebuffer_init (&lb);
1254 linebuffer_init (&filename_lb);
1255 linebuffer_init (&filebuf);
1256 linebuffer_init (&token_name);
1258 if (!CTAGS)
1260 if (streq (tagfile, "-"))
1262 tagf = stdout;
1263 set_binary_mode (STDOUT_FILENO, O_BINARY);
1265 else
1266 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1267 if (tagf == NULL)
1268 pfatal (tagfile);
1272 * Loop through files finding functions.
1274 for (i = 0; i < current_arg; i++)
1276 static language *lang; /* non-NULL if language is forced */
1277 char *this_file;
1279 switch (argbuffer[i].arg_type)
1281 case at_language:
1282 lang = argbuffer[i].lang;
1283 break;
1284 case at_regexp:
1285 analyze_regex (argbuffer[i].what);
1286 break;
1287 case at_filename:
1288 this_file = argbuffer[i].what;
1289 /* Input file named "-" means read file names from stdin
1290 (one per line) and use them. */
1291 if (streq (this_file, "-"))
1293 if (parsing_stdin)
1294 fatal ("cannot parse standard input "
1295 "AND read file names from it");
1296 while (readline_internal (&filename_lb, stdin, "-") > 0)
1297 process_file_name (filename_lb.buffer, lang);
1299 else
1300 process_file_name (this_file, lang);
1301 break;
1302 case at_stdin:
1303 this_file = argbuffer[i].what;
1304 process_file (stdin, this_file, lang);
1305 break;
1306 default:
1307 error ("internal error: arg_type");
1311 free_regexps ();
1312 free (lb.buffer);
1313 free (filebuf.buffer);
1314 free (token_name.buffer);
1316 if (!CTAGS || cxref_style)
1318 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1319 put_entries (nodehead);
1320 free_tree (nodehead);
1321 nodehead = NULL;
1322 if (!CTAGS)
1324 fdesc *fdp;
1326 /* Output file entries that have no tags. */
1327 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1328 if (!fdp->written)
1329 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1331 while (nincluded_files-- > 0)
1332 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1334 if (fclose (tagf) == EOF)
1335 pfatal (tagfile);
1338 return EXIT_SUCCESS;
1341 /* From here on, we are in (CTAGS && !cxref_style) */
1342 if (update)
1344 char *cmd =
1345 xmalloc (strlen (tagfile) + whatlen_max +
1346 sizeof "mv..OTAGS;grep -Fv '\t\t' OTAGS >;rm OTAGS");
1347 for (i = 0; i < current_arg; ++i)
1349 switch (argbuffer[i].arg_type)
1351 case at_filename:
1352 case at_stdin:
1353 break;
1354 default:
1355 continue; /* the for loop */
1357 char *z = stpcpy (cmd, "mv ");
1358 z = stpcpy (z, tagfile);
1359 z = stpcpy (z, " OTAGS;grep -Fv '\t");
1360 z = stpcpy (z, argbuffer[i].what);
1361 z = stpcpy (z, "\t' OTAGS >");
1362 z = stpcpy (z, tagfile);
1363 strcpy (z, ";rm OTAGS");
1364 if (system (cmd) != EXIT_SUCCESS)
1365 fatal ("failed to execute shell command");
1367 free (cmd);
1368 append_to_tagfile = true;
1371 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1372 if (tagf == NULL)
1373 pfatal (tagfile);
1374 put_entries (nodehead); /* write all the tags (CTAGS) */
1375 free_tree (nodehead);
1376 nodehead = NULL;
1377 if (fclose (tagf) == EOF)
1378 pfatal (tagfile);
1380 if (CTAGS)
1381 if (append_to_tagfile || update)
1383 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1384 /* Maybe these should be used:
1385 setenv ("LC_COLLATE", "C", 1);
1386 setenv ("LC_ALL", "C", 1); */
1387 char *z = stpcpy (cmd, "sort -u -o ");
1388 z = stpcpy (z, tagfile);
1389 *z++ = ' ';
1390 strcpy (z, tagfile);
1391 return system (cmd);
1393 return EXIT_SUCCESS;
1398 * Return a compressor given the file name. If EXTPTR is non-zero,
1399 * return a pointer into FILE where the compressor-specific
1400 * extension begins. If no compressor is found, NULL is returned
1401 * and EXTPTR is not significant.
1402 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1404 static compressor *
1405 get_compressor_from_suffix (char *file, char **extptr)
1407 compressor *compr;
1408 char *slash, *suffix;
1410 /* File has been processed by canonicalize_filename,
1411 so we don't need to consider backslashes on DOS_NT. */
1412 slash = strrchr (file, '/');
1413 suffix = strrchr (file, '.');
1414 if (suffix == NULL || suffix < slash)
1415 return NULL;
1416 if (extptr != NULL)
1417 *extptr = suffix;
1418 suffix += 1;
1419 /* Let those poor souls who live with DOS 8+3 file name limits get
1420 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1421 Only the first do loop is run if not MSDOS */
1424 for (compr = compressors; compr->suffix != NULL; compr++)
1425 if (streq (compr->suffix, suffix))
1426 return compr;
1427 if (!MSDOS)
1428 break; /* do it only once: not really a loop */
1429 if (extptr != NULL)
1430 *extptr = ++suffix;
1431 } while (*suffix != '\0');
1432 return NULL;
1438 * Return a language given the name.
1440 static language *
1441 get_language_from_langname (const char *name)
1443 language *lang;
1445 if (name == NULL)
1446 error ("empty language name");
1447 else
1449 for (lang = lang_names; lang->name != NULL; lang++)
1450 if (streq (name, lang->name))
1451 return lang;
1452 error ("unknown language \"%s\"", name);
1455 return NULL;
1460 * Return a language given the interpreter name.
1462 static language *
1463 get_language_from_interpreter (char *interpreter)
1465 language *lang;
1466 const char **iname;
1468 if (interpreter == NULL)
1469 return NULL;
1470 for (lang = lang_names; lang->name != NULL; lang++)
1471 if (lang->interpreters != NULL)
1472 for (iname = lang->interpreters; *iname != NULL; iname++)
1473 if (streq (*iname, interpreter))
1474 return lang;
1476 return NULL;
1482 * Return a language given the file name.
1484 static language *
1485 get_language_from_filename (char *file, int case_sensitive)
1487 language *lang;
1488 const char **name, **ext, *suffix;
1489 char *slash;
1491 /* Try whole file name first. */
1492 slash = strrchr (file, '/');
1493 if (slash != NULL)
1494 file = slash + 1;
1495 #ifdef DOS_NT
1496 else if (file[0] && file[1] == ':')
1497 file += 2;
1498 #endif
1499 for (lang = lang_names; lang->name != NULL; lang++)
1500 if (lang->filenames != NULL)
1501 for (name = lang->filenames; *name != NULL; name++)
1502 if ((case_sensitive)
1503 ? streq (*name, file)
1504 : strcaseeq (*name, file))
1505 return lang;
1507 /* If not found, try suffix after last dot. */
1508 suffix = strrchr (file, '.');
1509 if (suffix == NULL)
1510 return NULL;
1511 suffix += 1;
1512 for (lang = lang_names; lang->name != NULL; lang++)
1513 if (lang->suffixes != NULL)
1514 for (ext = lang->suffixes; *ext != NULL; ext++)
1515 if ((case_sensitive)
1516 ? streq (*ext, suffix)
1517 : strcaseeq (*ext, suffix))
1518 return lang;
1519 return NULL;
1524 * This routine is called on each file argument.
1526 static void
1527 process_file_name (char *file, language *lang)
1529 FILE *inf;
1530 fdesc *fdp;
1531 compressor *compr;
1532 char *compressed_name, *uncompressed_name;
1533 char *ext, *real_name UNINIT, *tmp_name;
1534 int retval;
1536 canonicalize_filename (file);
1537 if (streq (file, tagfile) && !streq (tagfile, "-"))
1539 error ("skipping inclusion of %s in self.", file);
1540 return;
1542 compr = get_compressor_from_suffix (file, &ext);
1543 if (compr)
1545 compressed_name = file;
1546 uncompressed_name = savenstr (file, ext - file);
1548 else
1550 compressed_name = NULL;
1551 uncompressed_name = file;
1554 /* If the canonicalized uncompressed name
1555 has already been dealt with, skip it silently. */
1556 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1558 assert (fdp->infname != NULL);
1559 if (streq (uncompressed_name, fdp->infname))
1560 goto cleanup;
1563 inf = fopen (file, "r" FOPEN_BINARY);
1564 if (inf)
1565 real_name = file;
1566 else
1568 int file_errno = errno;
1569 if (compressed_name)
1571 /* Try with the given suffix. */
1572 inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1573 if (inf)
1574 real_name = uncompressed_name;
1576 else
1578 /* Try all possible suffixes. */
1579 for (compr = compressors; compr->suffix != NULL; compr++)
1581 compressed_name = concat (file, ".", compr->suffix);
1582 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1583 if (inf)
1585 real_name = compressed_name;
1586 break;
1588 if (MSDOS)
1590 char *suf = compressed_name + strlen (file);
1591 size_t suflen = strlen (compr->suffix) + 1;
1592 for ( ; suf[1]; suf++, suflen--)
1594 memmove (suf, suf + 1, suflen);
1595 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1596 if (inf)
1598 real_name = compressed_name;
1599 break;
1602 if (inf)
1603 break;
1605 free (compressed_name);
1606 compressed_name = NULL;
1609 if (! inf)
1611 errno = file_errno;
1612 perror (file);
1613 goto cleanup;
1617 if (real_name == compressed_name)
1619 fclose (inf);
1620 tmp_name = etags_mktmp ();
1621 if (!tmp_name)
1622 inf = NULL;
1623 else
1625 #if MSDOS || defined (DOS_NT)
1626 char *cmd1 = concat (compr->command, " \"", real_name);
1627 char *cmd = concat (cmd1, "\" > ", tmp_name);
1628 #else
1629 char *cmd1 = concat (compr->command, " '", real_name);
1630 char *cmd = concat (cmd1, "' > ", tmp_name);
1631 #endif
1632 free (cmd1);
1633 int tmp_errno;
1634 if (system (cmd) == -1)
1636 inf = NULL;
1637 tmp_errno = EINVAL;
1639 else
1641 inf = fopen (tmp_name, "r" FOPEN_BINARY);
1642 tmp_errno = errno;
1644 free (cmd);
1645 errno = tmp_errno;
1648 if (!inf)
1650 perror (real_name);
1651 goto cleanup;
1655 process_file (inf, uncompressed_name, lang);
1657 retval = fclose (inf);
1658 if (real_name == compressed_name)
1660 remove (tmp_name);
1661 free (tmp_name);
1663 if (retval < 0)
1664 pfatal (file);
1666 cleanup:
1667 if (compressed_name != file)
1668 free (compressed_name);
1669 if (uncompressed_name != file)
1670 free (uncompressed_name);
1671 last_node = NULL;
1672 curfdp = NULL;
1673 return;
1676 static void
1677 process_file (FILE *fh, char *fn, language *lang)
1679 static const fdesc emptyfdesc;
1680 fdesc *fdp;
1682 infilename = fn;
1683 /* Create a new input file description entry. */
1684 fdp = xnew (1, fdesc);
1685 *fdp = emptyfdesc;
1686 fdp->next = fdhead;
1687 fdp->infname = savestr (fn);
1688 fdp->lang = lang;
1689 fdp->infabsname = absolute_filename (fn, cwd);
1690 fdp->infabsdir = absolute_dirname (fn, cwd);
1691 if (filename_is_absolute (fn))
1693 /* An absolute file name. Canonicalize it. */
1694 fdp->taggedfname = absolute_filename (fn, NULL);
1696 else
1698 /* A file name relative to cwd. Make it relative
1699 to the directory of the tags file. */
1700 fdp->taggedfname = relative_filename (fn, tagfiledir);
1702 fdp->usecharno = true; /* use char position when making tags */
1703 fdp->prop = NULL;
1704 fdp->written = false; /* not written on tags file yet */
1706 fdhead = fdp;
1707 curfdp = fdhead; /* the current file description */
1709 find_entries (fh);
1711 /* If not Ctags, and if this is not metasource and if it contained no #line
1712 directives, we can write the tags and free all nodes pointing to
1713 curfdp. */
1714 if (!CTAGS
1715 && curfdp->usecharno /* no #line directives in this file */
1716 && !curfdp->lang->metasource)
1718 node *np, *prev;
1720 /* Look for the head of the sublist relative to this file. See add_node
1721 for the structure of the node tree. */
1722 prev = NULL;
1723 for (np = nodehead; np != NULL; prev = np, np = np->left)
1724 if (np->fdp == curfdp)
1725 break;
1727 /* If we generated tags for this file, write and delete them. */
1728 if (np != NULL)
1730 /* This is the head of the last sublist, if any. The following
1731 instructions depend on this being true. */
1732 assert (np->left == NULL);
1734 assert (fdhead == curfdp);
1735 assert (last_node->fdp == curfdp);
1736 put_entries (np); /* write tags for file curfdp->taggedfname */
1737 free_tree (np); /* remove the written nodes */
1738 if (prev == NULL)
1739 nodehead = NULL; /* no nodes left */
1740 else
1741 prev->left = NULL; /* delete the pointer to the sublist */
1746 static void
1747 reset_input (FILE *inf)
1749 if (fseek (inf, 0, SEEK_SET) != 0)
1750 perror (infilename);
1754 * This routine opens the specified file and calls the function
1755 * which finds the function and type definitions.
1757 static void
1758 find_entries (FILE *inf)
1760 char *cp;
1761 language *lang = curfdp->lang;
1762 Lang_function *parser = NULL;
1764 /* If user specified a language, use it. */
1765 if (lang != NULL && lang->function != NULL)
1767 parser = lang->function;
1770 /* Else try to guess the language given the file name. */
1771 if (parser == NULL)
1773 lang = get_language_from_filename (curfdp->infname, true);
1774 if (lang != NULL && lang->function != NULL)
1776 curfdp->lang = lang;
1777 parser = lang->function;
1781 /* Else look for sharp-bang as the first two characters. */
1782 if (parser == NULL
1783 && readline_internal (&lb, inf, infilename) > 0
1784 && lb.len >= 2
1785 && lb.buffer[0] == '#'
1786 && lb.buffer[1] == '!')
1788 char *lp;
1790 /* Set lp to point at the first char after the last slash in the
1791 line or, if no slashes, at the first nonblank. Then set cp to
1792 the first successive blank and terminate the string. */
1793 lp = strrchr (lb.buffer+2, '/');
1794 if (lp != NULL)
1795 lp += 1;
1796 else
1797 lp = skip_spaces (lb.buffer + 2);
1798 cp = skip_non_spaces (lp);
1799 *cp = '\0';
1801 if (strlen (lp) > 0)
1803 lang = get_language_from_interpreter (lp);
1804 if (lang != NULL && lang->function != NULL)
1806 curfdp->lang = lang;
1807 parser = lang->function;
1812 reset_input (inf);
1814 /* Else try to guess the language given the case insensitive file name. */
1815 if (parser == NULL)
1817 lang = get_language_from_filename (curfdp->infname, false);
1818 if (lang != NULL && lang->function != NULL)
1820 curfdp->lang = lang;
1821 parser = lang->function;
1825 /* Else try Fortran or C. */
1826 if (parser == NULL)
1828 node *old_last_node = last_node;
1830 curfdp->lang = get_language_from_langname ("fortran");
1831 find_entries (inf);
1833 if (old_last_node == last_node)
1834 /* No Fortran entries found. Try C. */
1836 reset_input (inf);
1837 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1838 find_entries (inf);
1840 return;
1843 if (!no_line_directive
1844 && curfdp->lang != NULL && curfdp->lang->metasource)
1845 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1846 file, or anyway we parsed a file that is automatically generated from
1847 this one. If this is the case, the bingo.c file contained #line
1848 directives that generated tags pointing to this file. Let's delete
1849 them all before parsing this file, which is the real source. */
1851 fdesc **fdpp = &fdhead;
1852 while (*fdpp != NULL)
1853 if (*fdpp != curfdp
1854 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1855 /* We found one of those! We must delete both the file description
1856 and all tags referring to it. */
1858 fdesc *badfdp = *fdpp;
1860 /* Delete the tags referring to badfdp->taggedfname
1861 that were obtained from badfdp->infname. */
1862 invalidate_nodes (badfdp, &nodehead);
1864 *fdpp = badfdp->next; /* remove the bad description from the list */
1865 free_fdesc (badfdp);
1867 else
1868 fdpp = &(*fdpp)->next; /* advance the list pointer */
1871 assert (parser != NULL);
1873 /* Generic initializations before reading from file. */
1874 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1876 /* Generic initializations before parsing file with readline. */
1877 lineno = 0; /* reset global line number */
1878 charno = 0; /* reset global char number */
1879 linecharno = 0; /* reset global char number of line start */
1881 parser (inf);
1883 regex_tag_multiline ();
1888 * Check whether an implicitly named tag should be created,
1889 * then call `pfnote'.
1890 * NAME is a string that is internally copied by this function.
1892 * TAGS format specification
1893 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1894 * The following is explained in some more detail in etc/ETAGS.EBNF.
1896 * make_tag creates tags with "implicit tag names" (unnamed tags)
1897 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1898 * 1. NAME does not contain any of the characters in NONAM;
1899 * 2. LINESTART contains name as either a rightmost, or rightmost but
1900 * one character, substring;
1901 * 3. the character, if any, immediately before NAME in LINESTART must
1902 * be a character in NONAM;
1903 * 4. the character, if any, immediately after NAME in LINESTART must
1904 * also be a character in NONAM.
1906 * The implementation uses the notinname() macro, which recognizes the
1907 * characters stored in the string `nonam'.
1908 * etags.el needs to use the same characters that are in NONAM.
1910 static void
1911 make_tag (const char *name, /* tag name, or NULL if unnamed */
1912 int namelen, /* tag length */
1913 bool is_func, /* tag is a function */
1914 char *linestart, /* start of the line where tag is */
1915 int linelen, /* length of the line where tag is */
1916 int lno, /* line number */
1917 long int cno) /* character number */
1919 bool named = (name != NULL && namelen > 0);
1920 char *nname = NULL;
1922 if (debug)
1923 fprintf (stderr, "%s on %s:%d: %s\n",
1924 named ? name : "(unnamed)", curfdp->taggedfname, lno, linestart);
1926 if (!CTAGS && named) /* maybe set named to false */
1927 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1928 such that etags.el can guess a name from it. */
1930 int i;
1931 register const char *cp = name;
1933 for (i = 0; i < namelen; i++)
1934 if (notinname (*cp++))
1935 break;
1936 if (i == namelen) /* rule #1 */
1938 cp = linestart + linelen - namelen;
1939 if (notinname (linestart[linelen-1]))
1940 cp -= 1; /* rule #4 */
1941 if (cp >= linestart /* rule #2 */
1942 && (cp == linestart
1943 || notinname (cp[-1])) /* rule #3 */
1944 && strneq (name, cp, namelen)) /* rule #2 */
1945 named = false; /* use implicit tag name */
1949 if (named)
1950 nname = savenstr (name, namelen);
1952 pfnote (nname, is_func, linestart, linelen, lno, cno);
1955 /* Record a tag. */
1956 static void
1957 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1958 long int cno)
1959 /* tag name, or NULL if unnamed */
1960 /* tag is a function */
1961 /* start of the line where tag is */
1962 /* length of the line where tag is */
1963 /* line number */
1964 /* character number */
1966 register node *np;
1968 assert (name == NULL || name[0] != '\0');
1969 if (CTAGS && name == NULL)
1970 return;
1972 np = xnew (1, node);
1974 /* If ctags mode, change name "main" to M<thisfilename>. */
1975 if (CTAGS && !cxref_style && streq (name, "main"))
1977 char *fp = strrchr (curfdp->taggedfname, '/');
1978 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1979 fp = strrchr (np->name, '.');
1980 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1981 fp[0] = '\0';
1983 else
1984 np->name = name;
1985 np->valid = true;
1986 np->been_warned = false;
1987 np->fdp = curfdp;
1988 np->is_func = is_func;
1989 np->lno = lno;
1990 if (np->fdp->usecharno)
1991 /* Our char numbers are 0-base, because of C language tradition?
1992 ctags compatibility? old versions compatibility? I don't know.
1993 Anyway, since emacs's are 1-base we expect etags.el to take care
1994 of the difference. If we wanted to have 1-based numbers, we would
1995 uncomment the +1 below. */
1996 np->cno = cno /* + 1 */ ;
1997 else
1998 np->cno = invalidcharno;
1999 np->left = np->right = NULL;
2000 if (CTAGS && !cxref_style)
2002 if (strlen (linestart) < 50)
2003 np->regex = concat (linestart, "$", "");
2004 else
2005 np->regex = savenstr (linestart, 50);
2007 else
2008 np->regex = savenstr (linestart, linelen);
2010 add_node (np, &nodehead);
2014 * Utility functions and data to avoid recursion.
2017 typedef struct stack_entry {
2018 node *np;
2019 struct stack_entry *next;
2020 } stkentry;
2022 static void
2023 push_node (node *np, stkentry **stack_top)
2025 if (np)
2027 stkentry *new = xnew (1, stkentry);
2029 new->np = np;
2030 new->next = *stack_top;
2031 *stack_top = new;
2035 static node *
2036 pop_node (stkentry **stack_top)
2038 node *ret = NULL;
2040 if (*stack_top)
2042 stkentry *old_start = *stack_top;
2044 ret = (*stack_top)->np;
2045 *stack_top = (*stack_top)->next;
2046 free (old_start);
2048 return ret;
2052 * free_tree ()
2053 * emulate recursion on left children, iterate on right children.
2055 static void
2056 free_tree (register node *np)
2058 stkentry *stack = NULL;
2060 while (np)
2062 /* Descent on left children. */
2063 while (np->left)
2065 push_node (np, &stack);
2066 np = np->left;
2068 /* Free node without left children. */
2069 node *node_right = np->right;
2070 free (np->name);
2071 free (np->regex);
2072 free (np);
2073 if (!node_right)
2075 /* Backtrack to find a node with right children, while freeing nodes
2076 that don't have right children. */
2077 while (node_right == NULL && (np = pop_node (&stack)) != NULL)
2079 node_right = np->right;
2080 free (np->name);
2081 free (np->regex);
2082 free (np);
2085 /* Free right children. */
2086 np = node_right;
2091 * free_fdesc ()
2092 * delete a file description
2094 static void
2095 free_fdesc (register fdesc *fdp)
2097 free (fdp->infname);
2098 free (fdp->infabsname);
2099 free (fdp->infabsdir);
2100 free (fdp->taggedfname);
2101 free (fdp->prop);
2102 free (fdp);
2106 * add_node ()
2107 * Adds a node to the tree of nodes. In etags mode, sort by file
2108 * name. In ctags mode, sort by tag name. Make no attempt at
2109 * balancing.
2111 * add_node is the only function allowed to add nodes, so it can
2112 * maintain state.
2114 static void
2115 add_node (node *np, node **cur_node_p)
2117 node *cur_node = *cur_node_p;
2119 /* Make the first node. */
2120 if (cur_node == NULL)
2122 *cur_node_p = np;
2123 last_node = np;
2124 return;
2127 if (!CTAGS)
2128 /* Etags Mode */
2130 /* For each file name, tags are in a linked sublist on the right
2131 pointer. The first tags of different files are a linked list
2132 on the left pointer. last_node points to the end of the last
2133 used sublist. */
2134 if (last_node != NULL && last_node->fdp == np->fdp)
2136 /* Let's use the same sublist as the last added node. */
2137 assert (last_node->right == NULL);
2138 last_node->right = np;
2139 last_node = np;
2141 else
2143 while (cur_node->fdp != np->fdp)
2145 if (cur_node->left == NULL)
2146 break;
2147 /* The head of this sublist is not good for us. Let's try the
2148 next one. */
2149 cur_node = cur_node->left;
2151 if (cur_node->left)
2153 /* Scanning the list we found the head of a sublist which is
2154 good for us. Let's scan this sublist. */
2155 if (cur_node->right)
2157 cur_node = cur_node->right;
2158 while (cur_node->right)
2159 cur_node = cur_node->right;
2161 /* Make a new node in this sublist. */
2162 cur_node->right = np;
2164 else
2166 /* Make a new sublist. */
2167 cur_node->left = np;
2169 last_node = np;
2171 } /* if ETAGS mode */
2172 else
2174 /* Ctags Mode */
2175 node **next_node = &cur_node;
2177 while ((cur_node = *next_node) != NULL)
2179 int dif = strcmp (np->name, cur_node->name);
2181 * If this tag name matches an existing one, then
2182 * do not add the node, but maybe print a warning.
2184 if (!dif && no_duplicates)
2186 if (np->fdp == cur_node->fdp)
2188 if (!no_warnings)
2190 fprintf (stderr,
2191 "Duplicate entry in file %s, line %d: %s\n",
2192 np->fdp->infname, lineno, np->name);
2193 fprintf (stderr, "Second entry ignored\n");
2196 else if (!cur_node->been_warned && !no_warnings)
2198 fprintf
2199 (stderr,
2200 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2201 np->fdp->infname, cur_node->fdp->infname, np->name);
2202 cur_node->been_warned = true;
2204 return;
2206 else
2207 next_node = dif < 0 ? &cur_node->left : &cur_node->right;
2209 *next_node = np;
2210 last_node = np;
2211 } /* if CTAGS mode */
2215 * invalidate_nodes ()
2216 * Scan the node tree and invalidate all nodes pointing to the
2217 * given file description (CTAGS case) or free them (ETAGS case).
2219 static void
2220 invalidate_nodes (fdesc *badfdp, node **npp)
2222 node *np = *npp;
2223 stkentry *stack = NULL;
2225 if (CTAGS)
2227 while (np)
2229 /* Push all the left children on the stack. */
2230 while (np->left != NULL)
2232 push_node (np, &stack);
2233 np = np->left;
2235 /* Invalidate this node. */
2236 if (np->fdp == badfdp)
2237 np->valid = false;
2238 if (!np->right)
2240 /* Pop nodes from stack, invalidating them, until we find one
2241 with a right child. */
2242 while ((np = pop_node (&stack)) != NULL)
2244 if (np->fdp == badfdp)
2245 np->valid = false;
2246 if (np->right != NULL)
2247 break;
2250 /* Process the right child, if any. */
2251 if (np)
2252 np = np->right;
2255 else
2257 node super_root, *np_parent = NULL;
2259 super_root.left = np;
2260 super_root.fdp = (fdesc *) -1;
2261 np = &super_root;
2263 while (np)
2265 /* Descent on left children until node with BADFP. */
2266 while (np && np->fdp != badfdp)
2268 assert (np->fdp != NULL);
2269 np_parent = np;
2270 np = np->left;
2272 if (np)
2274 np_parent->left = np->left; /* detach subtree from the tree */
2275 np->left = NULL; /* isolate it */
2276 free_tree (np); /* free it */
2278 /* Continue with rest of tree. */
2279 np = np_parent->left;
2282 *npp = super_root.left;
2287 static int total_size_of_entries (node *);
2288 static int number_len (long) ATTRIBUTE_CONST;
2290 /* Length of a non-negative number's decimal representation. */
2291 static int
2292 number_len (long int num)
2294 int len = 1;
2295 while ((num /= 10) > 0)
2296 len += 1;
2297 return len;
2301 * Return total number of characters that put_entries will output for
2302 * the nodes in the linked list at the right of the specified node.
2303 * This count is irrelevant with etags.el since emacs 19.34 at least,
2304 * but is still supplied for backward compatibility.
2306 static int
2307 total_size_of_entries (register node *np)
2309 register int total = 0;
2311 for (; np != NULL; np = np->right)
2312 if (np->valid)
2314 total += strlen (np->regex) + 1; /* pat\177 */
2315 if (np->name != NULL)
2316 total += strlen (np->name) + 1; /* name\001 */
2317 total += number_len ((long) np->lno) + 1; /* lno, */
2318 if (np->cno != invalidcharno) /* cno */
2319 total += number_len (np->cno);
2320 total += 1; /* newline */
2323 return total;
2326 static void
2327 put_entry (node *np)
2329 register char *sp;
2330 static fdesc *fdp = NULL;
2332 /* Output this entry */
2333 if (np->valid)
2335 if (!CTAGS)
2337 /* Etags mode */
2338 if (fdp != np->fdp)
2340 fdp = np->fdp;
2341 fprintf (tagf, "\f\n%s,%d\n",
2342 fdp->taggedfname, total_size_of_entries (np));
2343 fdp->written = true;
2345 fputs (np->regex, tagf);
2346 fputc ('\177', tagf);
2347 if (np->name != NULL)
2349 fputs (np->name, tagf);
2350 fputc ('\001', tagf);
2352 fprintf (tagf, "%d,", np->lno);
2353 if (np->cno != invalidcharno)
2354 fprintf (tagf, "%ld", np->cno);
2355 fputs ("\n", tagf);
2357 else
2359 /* Ctags mode */
2360 if (np->name == NULL)
2361 error ("internal error: NULL name in ctags mode.");
2363 if (cxref_style)
2365 if (vgrind_style)
2366 fprintf (stdout, "%s %s %d\n",
2367 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2368 else
2369 fprintf (stdout, "%-16s %3d %-16s %s\n",
2370 np->name, np->lno, np->fdp->taggedfname, np->regex);
2372 else
2374 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2376 if (np->is_func)
2377 { /* function or #define macro with args */
2378 putc (searchar, tagf);
2379 putc ('^', tagf);
2381 for (sp = np->regex; *sp; sp++)
2383 if (*sp == '\\' || *sp == searchar)
2384 putc ('\\', tagf);
2385 putc (*sp, tagf);
2387 putc (searchar, tagf);
2389 else
2390 { /* anything else; text pattern inadequate */
2391 fprintf (tagf, "%d", np->lno);
2393 putc ('\n', tagf);
2396 } /* if this node contains a valid tag */
2399 static void
2400 put_entries (node *np)
2402 stkentry *stack = NULL;
2404 if (np == NULL)
2405 return;
2407 if (CTAGS)
2409 while (np)
2411 /* Stack subentries that precede this one. */
2412 while (np->left)
2414 push_node (np, &stack);
2415 np = np->left;
2417 /* Output this subentry. */
2418 put_entry (np);
2419 /* Stack subentries that follow this one. */
2420 while (!np->right)
2422 /* Output subentries that precede the next one. */
2423 np = pop_node (&stack);
2424 if (!np)
2425 break;
2426 put_entry (np);
2428 if (np)
2429 np = np->right;
2432 else
2434 push_node (np, &stack);
2435 while ((np = pop_node (&stack)) != NULL)
2437 /* Output this subentry. */
2438 put_entry (np);
2439 while (np->right)
2441 /* Output subentries that follow this one. */
2442 put_entry (np->right);
2443 /* Stack subentries from the following files. */
2444 push_node (np->left, &stack);
2445 np = np->right;
2447 push_node (np->left, &stack);
2453 /* C extensions. */
2454 #define C_EXT 0x00fff /* C extensions */
2455 #define C_PLAIN 0x00000 /* C */
2456 #define C_PLPL 0x00001 /* C++ */
2457 #define C_STAR 0x00003 /* C* */
2458 #define C_JAVA 0x00005 /* JAVA */
2459 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2460 #define YACC 0x10000 /* yacc file */
2463 * The C symbol tables.
2465 enum sym_type
2467 st_none,
2468 st_C_objprot, st_C_objimpl, st_C_objend,
2469 st_C_gnumacro,
2470 st_C_ignore, st_C_attribute, st_C_enum_bf,
2471 st_C_javastruct,
2472 st_C_operator,
2473 st_C_class, st_C_template,
2474 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2477 /* Feed stuff between (but not including) %[ and %] lines to:
2478 gperf -m 5
2480 %compare-strncmp
2481 %enum
2482 %struct-type
2483 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2485 if, 0, st_C_ignore
2486 for, 0, st_C_ignore
2487 while, 0, st_C_ignore
2488 switch, 0, st_C_ignore
2489 return, 0, st_C_ignore
2490 __attribute__, 0, st_C_attribute
2491 GTY, 0, st_C_attribute
2492 @interface, 0, st_C_objprot
2493 @protocol, 0, st_C_objprot
2494 @implementation,0, st_C_objimpl
2495 @end, 0, st_C_objend
2496 import, (C_JAVA & ~C_PLPL), st_C_ignore
2497 package, (C_JAVA & ~C_PLPL), st_C_ignore
2498 friend, C_PLPL, st_C_ignore
2499 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2500 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2501 interface, (C_JAVA & ~C_PLPL), st_C_struct
2502 class, 0, st_C_class
2503 namespace, C_PLPL, st_C_struct
2504 domain, C_STAR, st_C_struct
2505 union, 0, st_C_struct
2506 struct, 0, st_C_struct
2507 extern, 0, st_C_extern
2508 enum, 0, st_C_enum
2509 typedef, 0, st_C_typedef
2510 define, 0, st_C_define
2511 undef, 0, st_C_define
2512 operator, C_PLPL, st_C_operator
2513 template, 0, st_C_template
2514 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2515 DEFUN, 0, st_C_gnumacro
2516 SYSCALL, 0, st_C_gnumacro
2517 ENTRY, 0, st_C_gnumacro
2518 PSEUDO, 0, st_C_gnumacro
2519 ENUM_BF, 0, st_C_enum_bf
2520 # These are defined inside C functions, so currently they are not met.
2521 # EXFUN used in glibc, DEFVAR_* in emacs.
2522 #EXFUN, 0, st_C_gnumacro
2523 #DEFVAR_, 0, st_C_gnumacro
2525 and replace lines between %< and %> with its output, then:
2526 - remove the #if characterset check
2527 - remove any #line directives
2528 - make in_word_set static and not inline
2529 - remove any 'register' qualifications from variable decls. */
2530 /*%<*/
2531 /* C code produced by gperf version 3.0.1 */
2532 /* Command-line: gperf -m 5 */
2533 /* Computed positions: -k'2-3' */
2535 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2536 /* maximum key range = 34, duplicates = 0 */
2538 static int
2539 hash (const char *str, int len)
2541 static char const asso_values[] =
2543 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2544 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2545 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2546 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2547 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2548 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2549 36, 36, 36, 36, 36, 36, 36, 36, 36, 3,
2550 27, 36, 36, 36, 36, 36, 36, 36, 26, 36,
2551 36, 36, 36, 25, 0, 0, 36, 36, 36, 0,
2552 36, 36, 36, 36, 36, 1, 36, 16, 36, 6,
2553 23, 0, 0, 36, 22, 0, 36, 36, 5, 0,
2554 0, 15, 1, 36, 6, 36, 8, 19, 36, 16,
2555 4, 5, 36, 36, 36, 36, 36, 36, 36, 36,
2556 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2557 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2558 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2559 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2560 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2561 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2562 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2563 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2564 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2565 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2566 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2567 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2568 36, 36, 36, 36, 36, 36
2570 int hval = len;
2572 switch (hval)
2574 default:
2575 hval += asso_values[(unsigned char) str[2]];
2576 FALLTHROUGH;
2577 case 2:
2578 hval += asso_values[(unsigned char) str[1]];
2579 break;
2581 return hval;
2584 static struct C_stab_entry *
2585 in_word_set (register const char *str, register unsigned int len)
2587 enum
2589 TOTAL_KEYWORDS = 34,
2590 MIN_WORD_LENGTH = 2,
2591 MAX_WORD_LENGTH = 15,
2592 MIN_HASH_VALUE = 2,
2593 MAX_HASH_VALUE = 35
2596 static struct C_stab_entry wordlist[] =
2598 {""}, {""},
2599 {"if", 0, st_C_ignore},
2600 {"GTY", 0, st_C_attribute},
2601 {"@end", 0, st_C_objend},
2602 {"union", 0, st_C_struct},
2603 {"define", 0, st_C_define},
2604 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2605 {"template", 0, st_C_template},
2606 {"operator", C_PLPL, st_C_operator},
2607 {"@interface", 0, st_C_objprot},
2608 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2609 {"friend", C_PLPL, st_C_ignore},
2610 {"typedef", 0, st_C_typedef},
2611 {"return", 0, st_C_ignore},
2612 {"@implementation",0, st_C_objimpl},
2613 {"@protocol", 0, st_C_objprot},
2614 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2615 {"extern", 0, st_C_extern},
2616 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2617 {"struct", 0, st_C_struct},
2618 {"domain", C_STAR, st_C_struct},
2619 {"switch", 0, st_C_ignore},
2620 {"enum", 0, st_C_enum},
2621 {"for", 0, st_C_ignore},
2622 {"namespace", C_PLPL, st_C_struct},
2623 {"class", 0, st_C_class},
2624 {"while", 0, st_C_ignore},
2625 {"undef", 0, st_C_define},
2626 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2627 {"__attribute__", 0, st_C_attribute},
2628 {"ENTRY", 0, st_C_gnumacro},
2629 {"SYSCALL", 0, st_C_gnumacro},
2630 {"ENUM_BF", 0, st_C_enum_bf},
2631 {"PSEUDO", 0, st_C_gnumacro},
2632 {"DEFUN", 0, st_C_gnumacro}
2635 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2637 int key = hash (str, len);
2639 if (key <= MAX_HASH_VALUE && key >= 0)
2641 const char *s = wordlist[key].name;
2643 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2644 return &wordlist[key];
2647 return 0;
2649 /*%>*/
2651 static enum sym_type
2652 C_symtype (char *str, int len, int c_ext)
2654 register struct C_stab_entry *se = in_word_set (str, len);
2656 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2657 return st_none;
2658 return se->type;
2663 * Ignoring __attribute__ ((list))
2665 static bool inattribute; /* looking at an __attribute__ construct */
2667 /* Ignoring ENUM_BF (type)
2670 static bool in_enum_bf; /* inside parentheses following ENUM_BF */
2673 * C functions and variables are recognized using a simple
2674 * finite automaton. fvdef is its state variable.
2676 static enum
2678 fvnone, /* nothing seen */
2679 fdefunkey, /* Emacs DEFUN keyword seen */
2680 fdefunname, /* Emacs DEFUN name seen */
2681 foperator, /* func: operator keyword seen (cplpl) */
2682 fvnameseen, /* function or variable name seen */
2683 fstartlist, /* func: just after open parenthesis */
2684 finlist, /* func: in parameter list */
2685 flistseen, /* func: after parameter list */
2686 fignore, /* func: before open brace */
2687 vignore /* var-like: ignore until ';' */
2688 } fvdef;
2690 static bool fvextern; /* func or var: extern keyword seen; */
2693 * typedefs are recognized using a simple finite automaton.
2694 * typdef is its state variable.
2696 static enum
2698 tnone, /* nothing seen */
2699 tkeyseen, /* typedef keyword seen */
2700 ttypeseen, /* defined type seen */
2701 tinbody, /* inside typedef body */
2702 tend, /* just before typedef tag */
2703 tignore /* junk after typedef tag */
2704 } typdef;
2707 * struct-like structures (enum, struct and union) are recognized
2708 * using another simple finite automaton. `structdef' is its state
2709 * variable.
2711 static enum
2713 snone, /* nothing seen yet,
2714 or in struct body if bracelev > 0 */
2715 skeyseen, /* struct-like keyword seen */
2716 stagseen, /* struct-like tag seen */
2717 scolonseen /* colon seen after struct-like tag */
2718 } structdef;
2721 * When objdef is different from onone, objtag is the name of the class.
2723 static const char *objtag = "<uninited>";
2726 * Yet another little state machine to deal with preprocessor lines.
2728 static enum
2730 dnone, /* nothing seen */
2731 dsharpseen, /* '#' seen as first char on line */
2732 ddefineseen, /* '#' and 'define' seen */
2733 dignorerest /* ignore rest of line */
2734 } definedef;
2737 * State machine for Objective C protocols and implementations.
2738 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2740 static enum
2742 onone, /* nothing seen */
2743 oprotocol, /* @interface or @protocol seen */
2744 oimplementation, /* @implementations seen */
2745 otagseen, /* class name seen */
2746 oparenseen, /* parenthesis before category seen */
2747 ocatseen, /* category name seen */
2748 oinbody, /* in @implementation body */
2749 omethodsign, /* in @implementation body, after +/- */
2750 omethodtag, /* after method name */
2751 omethodcolon, /* after method colon */
2752 omethodparm, /* after method parameter */
2753 oignore /* wait for @end */
2754 } objdef;
2758 * Use this structure to keep info about the token read, and how it
2759 * should be tagged. Used by the make_C_tag function to build a tag.
2761 static struct tok
2763 char *line; /* string containing the token */
2764 int offset; /* where the token starts in LINE */
2765 int length; /* token length */
2767 The previous members can be used to pass strings around for generic
2768 purposes. The following ones specifically refer to creating tags. In this
2769 case the token contained here is the pattern that will be used to create a
2770 tag.
2772 bool valid; /* do not create a tag; the token should be
2773 invalidated whenever a state machine is
2774 reset prematurely */
2775 bool named; /* create a named tag */
2776 int lineno; /* source line number of tag */
2777 long linepos; /* source char number of tag */
2778 } token; /* latest token read */
2781 * Variables and functions for dealing with nested structures.
2782 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2784 static void pushclass_above (int, char *, int);
2785 static void popclass_above (int);
2786 static void write_classname (linebuffer *, const char *qualifier);
2788 static struct {
2789 char **cname; /* nested class names */
2790 int *bracelev; /* nested class brace level */
2791 int nl; /* class nesting level (elements used) */
2792 int size; /* length of the array */
2793 } cstack; /* stack for nested declaration tags */
2794 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2795 #define nestlev (cstack.nl)
2796 /* After struct keyword or in struct body, not inside a nested function. */
2797 #define instruct (structdef == snone && nestlev > 0 \
2798 && bracelev == cstack.bracelev[nestlev-1] + 1)
2800 static void
2801 pushclass_above (int bracelev, char *str, int len)
2803 int nl;
2805 popclass_above (bracelev);
2806 nl = cstack.nl;
2807 if (nl >= cstack.size)
2809 int size = cstack.size *= 2;
2810 xrnew (cstack.cname, size, char *);
2811 xrnew (cstack.bracelev, size, int);
2813 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2814 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2815 cstack.bracelev[nl] = bracelev;
2816 cstack.nl = nl + 1;
2819 static void
2820 popclass_above (int bracelev)
2822 int nl;
2824 for (nl = cstack.nl - 1;
2825 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2826 nl--)
2828 free (cstack.cname[nl]);
2829 cstack.nl = nl;
2833 static void
2834 write_classname (linebuffer *cn, const char *qualifier)
2836 int i, len;
2837 int qlen = strlen (qualifier);
2839 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2841 len = 0;
2842 cn->len = 0;
2843 cn->buffer[0] = '\0';
2845 else
2847 len = strlen (cstack.cname[0]);
2848 linebuffer_setlen (cn, len);
2849 strcpy (cn->buffer, cstack.cname[0]);
2851 for (i = 1; i < cstack.nl; i++)
2853 char *s = cstack.cname[i];
2854 if (s == NULL)
2855 continue;
2856 linebuffer_setlen (cn, len + qlen + strlen (s));
2857 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2862 static bool consider_token (char *, int, int, int *, int, int, bool *);
2863 static void make_C_tag (bool);
2866 * consider_token ()
2867 * checks to see if the current token is at the start of a
2868 * function or variable, or corresponds to a typedef, or
2869 * is a struct/union/enum tag, or #define, or an enum constant.
2871 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2872 * with args. C_EXTP points to which language we are looking at.
2874 * Globals
2875 * fvdef IN OUT
2876 * structdef IN OUT
2877 * definedef IN OUT
2878 * typdef IN OUT
2879 * objdef IN OUT
2882 static bool
2883 consider_token (char *str, int len, int c, int *c_extp,
2884 int bracelev, int parlev, bool *is_func_or_var)
2885 /* IN: token pointer */
2886 /* IN: token length */
2887 /* IN: first char after the token */
2888 /* IN, OUT: C extensions mask */
2889 /* IN: brace level */
2890 /* IN: parenthesis level */
2891 /* OUT: function or variable found */
2893 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2894 structtype is the type of the preceding struct-like keyword, and
2895 structbracelev is the brace level where it has been seen. */
2896 static enum sym_type structtype;
2897 static int structbracelev;
2898 static enum sym_type toktype;
2901 toktype = C_symtype (str, len, *c_extp);
2904 * Skip __attribute__
2906 if (toktype == st_C_attribute)
2908 inattribute = true;
2909 return false;
2913 * Skip ENUM_BF
2915 if (toktype == st_C_enum_bf && definedef == dnone)
2917 in_enum_bf = true;
2918 return false;
2922 * Advance the definedef state machine.
2924 switch (definedef)
2926 case dnone:
2927 /* We're not on a preprocessor line. */
2928 if (toktype == st_C_gnumacro)
2930 fvdef = fdefunkey;
2931 return false;
2933 break;
2934 case dsharpseen:
2935 if (toktype == st_C_define)
2937 definedef = ddefineseen;
2939 else
2941 definedef = dignorerest;
2943 return false;
2944 case ddefineseen:
2946 * Make a tag for any macro, unless it is a constant
2947 * and constantypedefs is false.
2949 definedef = dignorerest;
2950 *is_func_or_var = (c == '(');
2951 if (!*is_func_or_var && !constantypedefs)
2952 return false;
2953 else
2954 return true;
2955 case dignorerest:
2956 return false;
2957 default:
2958 error ("internal error: definedef value.");
2962 * Now typedefs
2964 switch (typdef)
2966 case tnone:
2967 if (toktype == st_C_typedef)
2969 if (typedefs)
2970 typdef = tkeyseen;
2971 fvextern = false;
2972 fvdef = fvnone;
2973 return false;
2975 break;
2976 case tkeyseen:
2977 switch (toktype)
2979 case st_none:
2980 case st_C_class:
2981 case st_C_struct:
2982 case st_C_enum:
2983 typdef = ttypeseen;
2984 break;
2985 default:
2986 break;
2988 break;
2989 case ttypeseen:
2990 if (structdef == snone && fvdef == fvnone)
2992 fvdef = fvnameseen;
2993 return true;
2995 break;
2996 case tend:
2997 switch (toktype)
2999 case st_C_class:
3000 case st_C_struct:
3001 case st_C_enum:
3002 return false;
3003 default:
3004 return true;
3006 default:
3007 break;
3010 switch (toktype)
3012 case st_C_javastruct:
3013 if (structdef == stagseen)
3014 structdef = scolonseen;
3015 return false;
3016 case st_C_template:
3017 case st_C_class:
3018 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
3019 && bracelev == 0
3020 && definedef == dnone && structdef == snone
3021 && typdef == tnone && fvdef == fvnone)
3022 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3023 if (toktype == st_C_template)
3024 break;
3025 FALLTHROUGH;
3026 case st_C_struct:
3027 case st_C_enum:
3028 if (parlev == 0
3029 && fvdef != vignore
3030 && (typdef == tkeyseen
3031 || (typedefs_or_cplusplus && structdef == snone)))
3033 structdef = skeyseen;
3034 structtype = toktype;
3035 structbracelev = bracelev;
3036 if (fvdef == fvnameseen)
3037 fvdef = fvnone;
3039 return false;
3040 default:
3041 break;
3044 if (structdef == skeyseen)
3046 structdef = stagseen;
3047 return true;
3050 if (typdef != tnone)
3051 definedef = dnone;
3053 /* Detect Objective C constructs. */
3054 switch (objdef)
3056 case onone:
3057 switch (toktype)
3059 case st_C_objprot:
3060 objdef = oprotocol;
3061 return false;
3062 case st_C_objimpl:
3063 objdef = oimplementation;
3064 return false;
3065 default:
3066 break;
3068 break;
3069 case oimplementation:
3070 /* Save the class tag for functions or variables defined inside. */
3071 objtag = savenstr (str, len);
3072 objdef = oinbody;
3073 return false;
3074 case oprotocol:
3075 /* Save the class tag for categories. */
3076 objtag = savenstr (str, len);
3077 objdef = otagseen;
3078 *is_func_or_var = true;
3079 return true;
3080 case oparenseen:
3081 objdef = ocatseen;
3082 *is_func_or_var = true;
3083 return true;
3084 case oinbody:
3085 break;
3086 case omethodsign:
3087 if (parlev == 0)
3089 fvdef = fvnone;
3090 objdef = omethodtag;
3091 linebuffer_setlen (&token_name, len);
3092 memcpy (token_name.buffer, str, len);
3093 token_name.buffer[len] = '\0';
3094 return true;
3096 return false;
3097 case omethodcolon:
3098 if (parlev == 0)
3099 objdef = omethodparm;
3100 return false;
3101 case omethodparm:
3102 if (parlev == 0)
3104 objdef = omethodtag;
3105 if (class_qualify)
3107 int oldlen = token_name.len;
3108 fvdef = fvnone;
3109 linebuffer_setlen (&token_name, oldlen + len);
3110 memcpy (token_name.buffer + oldlen, str, len);
3111 token_name.buffer[oldlen + len] = '\0';
3113 return true;
3115 return false;
3116 case oignore:
3117 if (toktype == st_C_objend)
3119 /* Memory leakage here: the string pointed by objtag is
3120 never released, because many tests would be needed to
3121 avoid breaking on incorrect input code. The amount of
3122 memory leaked here is the sum of the lengths of the
3123 class tags.
3124 free (objtag); */
3125 objdef = onone;
3127 return false;
3128 default:
3129 break;
3132 /* A function, variable or enum constant? */
3133 switch (toktype)
3135 case st_C_extern:
3136 fvextern = true;
3137 switch (fvdef)
3139 case finlist:
3140 case flistseen:
3141 case fignore:
3142 case vignore:
3143 break;
3144 default:
3145 fvdef = fvnone;
3147 return false;
3148 case st_C_ignore:
3149 fvextern = false;
3150 fvdef = vignore;
3151 return false;
3152 case st_C_operator:
3153 fvdef = foperator;
3154 *is_func_or_var = true;
3155 return true;
3156 case st_none:
3157 if (constantypedefs
3158 && structdef == snone
3159 && structtype == st_C_enum && bracelev > structbracelev
3160 /* Don't tag tokens in expressions that assign values to enum
3161 constants. */
3162 && fvdef != vignore)
3163 return true; /* enum constant */
3164 switch (fvdef)
3166 case fdefunkey:
3167 if (bracelev > 0)
3168 break;
3169 fvdef = fdefunname; /* GNU macro */
3170 *is_func_or_var = true;
3171 return true;
3172 case fvnone:
3173 switch (typdef)
3175 case ttypeseen:
3176 return false;
3177 case tnone:
3178 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3179 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3181 fvdef = vignore;
3182 return false;
3184 break;
3185 default:
3186 break;
3188 FALLTHROUGH;
3189 case fvnameseen:
3190 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3192 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3193 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3194 fvdef = foperator;
3195 *is_func_or_var = true;
3196 return true;
3198 if (bracelev > 0 && !instruct)
3199 break;
3200 fvdef = fvnameseen; /* function or variable */
3201 *is_func_or_var = true;
3202 return true;
3203 default:
3204 break;
3206 break;
3207 default:
3208 break;
3211 return false;
3216 * C_entries often keeps pointers to tokens or lines which are older than
3217 * the line currently read. By keeping two line buffers, and switching
3218 * them at end of line, it is possible to use those pointers.
3220 static struct
3222 long linepos;
3223 linebuffer lb;
3224 } lbs[2];
3226 #define current_lb_is_new (newndx == curndx)
3227 #define switch_line_buffers() (curndx = 1 - curndx)
3229 #define curlb (lbs[curndx].lb)
3230 #define newlb (lbs[newndx].lb)
3231 #define curlinepos (lbs[curndx].linepos)
3232 #define newlinepos (lbs[newndx].linepos)
3234 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3235 #define cplpl (c_ext & C_PLPL)
3236 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3238 #define CNL_SAVE_DEFINEDEF() \
3239 do { \
3240 curlinepos = charno; \
3241 readline (&curlb, inf); \
3242 lp = curlb.buffer; \
3243 quotednl = false; \
3244 newndx = curndx; \
3245 } while (0)
3247 #define CNL() \
3248 do { \
3249 CNL_SAVE_DEFINEDEF (); \
3250 if (savetoken.valid) \
3252 token = savetoken; \
3253 savetoken.valid = false; \
3255 definedef = dnone; \
3256 } while (0)
3259 static void
3260 make_C_tag (bool isfun)
3262 /* This function is never called when token.valid is false, but
3263 we must protect against invalid input or internal errors. */
3264 if (token.valid)
3265 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3266 token.offset+token.length+1, token.lineno, token.linepos);
3267 else if (DEBUG)
3268 { /* this branch is optimized away if !DEBUG */
3269 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3270 token_name.len + 17, isfun, token.line,
3271 token.offset+token.length+1, token.lineno, token.linepos);
3272 error ("INVALID TOKEN");
3275 token.valid = false;
3278 static bool
3279 perhaps_more_input (FILE *inf)
3281 return !feof (inf) && !ferror (inf);
3286 * C_entries ()
3287 * This routine finds functions, variables, typedefs,
3288 * #define's, enum constants and struct/union/enum definitions in
3289 * C syntax and adds them to the list.
3291 static void
3292 C_entries (int c_ext, FILE *inf)
3293 /* extension of C */
3294 /* input file */
3296 register char c; /* latest char read; '\0' for end of line */
3297 register char *lp; /* pointer one beyond the character `c' */
3298 int curndx, newndx; /* indices for current and new lb */
3299 register int tokoff; /* offset in line of start of current token */
3300 register int toklen; /* length of current token */
3301 const char *qualifier; /* string used to qualify names */
3302 int qlen; /* length of qualifier */
3303 int bracelev; /* current brace level */
3304 int bracketlev; /* current bracket level */
3305 int parlev; /* current parenthesis level */
3306 int attrparlev; /* __attribute__ parenthesis level */
3307 int templatelev; /* current template level */
3308 int typdefbracelev; /* bracelev where a typedef struct body begun */
3309 bool incomm, inquote, inchar, quotednl, midtoken;
3310 bool yacc_rules; /* in the rules part of a yacc file */
3311 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3314 linebuffer_init (&lbs[0].lb);
3315 linebuffer_init (&lbs[1].lb);
3316 if (cstack.size == 0)
3318 cstack.size = (DEBUG) ? 1 : 4;
3319 cstack.nl = 0;
3320 cstack.cname = xnew (cstack.size, char *);
3321 cstack.bracelev = xnew (cstack.size, int);
3324 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3325 curndx = newndx = 0;
3326 lp = curlb.buffer;
3327 *lp = 0;
3329 fvdef = fvnone; fvextern = false; typdef = tnone;
3330 structdef = snone; definedef = dnone; objdef = onone;
3331 yacc_rules = false;
3332 midtoken = inquote = inchar = incomm = quotednl = false;
3333 token.valid = savetoken.valid = false;
3334 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3335 if (cjava)
3336 { qualifier = "."; qlen = 1; }
3337 else
3338 { qualifier = "::"; qlen = 2; }
3341 while (perhaps_more_input (inf))
3343 c = *lp++;
3344 if (c == '\\')
3346 /* If we are at the end of the line, the next character is a
3347 '\0'; do not skip it, because it is what tells us
3348 to read the next line. */
3349 if (*lp == '\0')
3351 quotednl = true;
3352 continue;
3354 lp++;
3355 c = ' ';
3357 else if (incomm)
3359 switch (c)
3361 case '*':
3362 if (*lp == '/')
3364 c = *lp++;
3365 incomm = false;
3367 break;
3368 case '\0':
3369 /* Newlines inside comments do not end macro definitions in
3370 traditional cpp. */
3371 CNL_SAVE_DEFINEDEF ();
3372 break;
3374 continue;
3376 else if (inquote)
3378 switch (c)
3380 case '"':
3381 inquote = false;
3382 break;
3383 case '\0':
3384 /* Newlines inside strings do not end macro definitions
3385 in traditional cpp, even though compilers don't
3386 usually accept them. */
3387 CNL_SAVE_DEFINEDEF ();
3388 break;
3390 continue;
3392 else if (inchar)
3394 switch (c)
3396 case '\0':
3397 /* Hmmm, something went wrong. */
3398 CNL ();
3399 FALLTHROUGH;
3400 case '\'':
3401 inchar = false;
3402 break;
3404 continue;
3406 else switch (c)
3408 case '"':
3409 inquote = true;
3410 if (bracketlev > 0)
3411 continue;
3412 if (inattribute)
3413 break;
3414 switch (fvdef)
3416 case fdefunkey:
3417 case fstartlist:
3418 case finlist:
3419 case fignore:
3420 case vignore:
3421 break;
3422 default:
3423 fvextern = false;
3424 fvdef = fvnone;
3426 continue;
3427 case '\'':
3428 inchar = true;
3429 if (bracketlev > 0)
3430 continue;
3431 if (inattribute)
3432 break;
3433 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3435 fvextern = false;
3436 fvdef = fvnone;
3438 continue;
3439 case '/':
3440 if (*lp == '*')
3442 incomm = true;
3443 lp++;
3444 c = ' ';
3445 if (bracketlev > 0)
3446 continue;
3448 else if (/* cplpl && */ *lp == '/')
3450 c = '\0';
3452 break;
3453 case '%':
3454 if ((c_ext & YACC) && *lp == '%')
3456 /* Entering or exiting rules section in yacc file. */
3457 lp++;
3458 definedef = dnone; fvdef = fvnone; fvextern = false;
3459 typdef = tnone; structdef = snone;
3460 midtoken = inquote = inchar = incomm = quotednl = false;
3461 bracelev = 0;
3462 yacc_rules = !yacc_rules;
3463 continue;
3465 else
3466 break;
3467 case '#':
3468 if (definedef == dnone)
3470 char *cp;
3471 bool cpptoken = true;
3473 /* Look back on this line. If all blanks, or nonblanks
3474 followed by an end of comment, this is a preprocessor
3475 token. */
3476 for (cp = newlb.buffer; cp < lp-1; cp++)
3477 if (!c_isspace (*cp))
3479 if (*cp == '*' && cp[1] == '/')
3481 cp++;
3482 cpptoken = true;
3484 else
3485 cpptoken = false;
3487 if (cpptoken)
3489 definedef = dsharpseen;
3490 /* This is needed for tagging enum values: when there are
3491 preprocessor conditionals inside the enum, we need to
3492 reset the value of fvdef so that the next enum value is
3493 tagged even though the one before it did not end in a
3494 comma. */
3495 if (fvdef == vignore && instruct && parlev == 0)
3497 if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3498 fvdef = fvnone;
3501 } /* if (definedef == dnone) */
3502 continue;
3503 case '[':
3504 bracketlev++;
3505 continue;
3506 default:
3507 if (bracketlev > 0)
3509 if (c == ']')
3510 --bracketlev;
3511 else if (c == '\0')
3512 CNL_SAVE_DEFINEDEF ();
3513 continue;
3515 break;
3516 } /* switch (c) */
3519 /* Consider token only if some involved conditions are satisfied. */
3520 if (typdef != tignore
3521 && definedef != dignorerest
3522 && fvdef != finlist
3523 && templatelev == 0
3524 && (definedef != dnone
3525 || structdef != scolonseen)
3526 && !inattribute
3527 && !in_enum_bf)
3529 if (midtoken)
3531 if (endtoken (c))
3533 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3534 /* This handles :: in the middle,
3535 but not at the beginning of an identifier.
3536 Also, space-separated :: is not recognized. */
3538 if (c_ext & C_AUTO) /* automatic detection of C++ */
3539 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3540 lp += 2;
3541 toklen += 2;
3542 c = lp[-1];
3543 goto still_in_token;
3545 else
3547 bool funorvar = false;
3549 if (yacc_rules
3550 || consider_token (newlb.buffer + tokoff, toklen, c,
3551 &c_ext, bracelev, parlev,
3552 &funorvar))
3554 if (fvdef == foperator)
3556 char *oldlp = lp;
3557 lp = skip_spaces (lp-1);
3558 if (*lp != '\0')
3559 lp += 1;
3560 while (*lp != '\0'
3561 && !c_isspace (*lp) && *lp != '(')
3562 lp += 1;
3563 c = *lp++;
3564 toklen += lp - oldlp;
3566 token.named = false;
3567 if (!plainc
3568 && nestlev > 0 && definedef == dnone)
3569 /* in struct body */
3571 if (class_qualify)
3573 int len;
3574 write_classname (&token_name, qualifier);
3575 len = token_name.len;
3576 linebuffer_setlen (&token_name,
3577 len + qlen + toklen);
3578 sprintf (token_name.buffer + len, "%s%.*s",
3579 qualifier, toklen,
3580 newlb.buffer + tokoff);
3582 else
3584 linebuffer_setlen (&token_name, toklen);
3585 sprintf (token_name.buffer, "%.*s",
3586 toklen, newlb.buffer + tokoff);
3588 token.named = true;
3590 else if (objdef == ocatseen)
3591 /* Objective C category */
3593 if (class_qualify)
3595 int len = strlen (objtag) + 2 + toklen;
3596 linebuffer_setlen (&token_name, len);
3597 sprintf (token_name.buffer, "%s(%.*s)",
3598 objtag, toklen,
3599 newlb.buffer + tokoff);
3601 else
3603 linebuffer_setlen (&token_name, toklen);
3604 sprintf (token_name.buffer, "%.*s",
3605 toklen, newlb.buffer + tokoff);
3607 token.named = true;
3609 else if (objdef == omethodtag
3610 || objdef == omethodparm)
3611 /* Objective C method */
3613 token.named = true;
3615 else if (fvdef == fdefunname)
3616 /* GNU DEFUN and similar macros */
3618 bool defun = (newlb.buffer[tokoff] == 'F');
3619 int off = tokoff;
3620 int len = toklen;
3622 if (defun)
3624 off += 1;
3625 len -= 1;
3627 /* First, tag it as its C name */
3628 linebuffer_setlen (&token_name, toklen);
3629 memcpy (token_name.buffer,
3630 newlb.buffer + tokoff, toklen);
3631 token_name.buffer[toklen] = '\0';
3632 token.named = true;
3633 token.lineno = lineno;
3634 token.offset = tokoff;
3635 token.length = toklen;
3636 token.line = newlb.buffer;
3637 token.linepos = newlinepos;
3638 token.valid = true;
3639 make_C_tag (funorvar);
3641 /* Rewrite the tag so that emacs lisp DEFUNs
3642 can be found also by their elisp name */
3643 linebuffer_setlen (&token_name, len);
3644 memcpy (token_name.buffer,
3645 newlb.buffer + off, len);
3646 token_name.buffer[len] = '\0';
3647 if (defun)
3648 while (--len >= 0)
3649 if (token_name.buffer[len] == '_')
3650 token_name.buffer[len] = '-';
3651 token.named = defun;
3653 else
3655 linebuffer_setlen (&token_name, toklen);
3656 memcpy (token_name.buffer,
3657 newlb.buffer + tokoff, toklen);
3658 token_name.buffer[toklen] = '\0';
3659 /* Name macros and members. */
3660 token.named = (structdef == stagseen
3661 || typdef == ttypeseen
3662 || typdef == tend
3663 || (funorvar
3664 && definedef == dignorerest)
3665 || (funorvar
3666 && definedef == dnone
3667 && structdef == snone
3668 && bracelev > 0));
3670 token.lineno = lineno;
3671 token.offset = tokoff;
3672 token.length = toklen;
3673 token.line = newlb.buffer;
3674 token.linepos = newlinepos;
3675 token.valid = true;
3677 if (definedef == dnone
3678 && (fvdef == fvnameseen
3679 || fvdef == foperator
3680 || structdef == stagseen
3681 || typdef == tend
3682 || typdef == ttypeseen
3683 || objdef != onone))
3685 if (current_lb_is_new)
3686 switch_line_buffers ();
3688 else if (definedef != dnone
3689 || fvdef == fdefunname
3690 || instruct)
3691 make_C_tag (funorvar);
3693 else /* not yacc and consider_token failed */
3695 if (inattribute && fvdef == fignore)
3697 /* We have just met __attribute__ after a
3698 function parameter list: do not tag the
3699 function again. */
3700 fvdef = fvnone;
3703 midtoken = false;
3705 } /* if (endtoken (c)) */
3706 else if (intoken (c))
3707 still_in_token:
3709 toklen++;
3710 continue;
3712 } /* if (midtoken) */
3713 else if (begtoken (c))
3715 switch (definedef)
3717 case dnone:
3718 switch (fvdef)
3720 case fstartlist:
3721 /* This prevents tagging fb in
3722 void (__attribute__((noreturn)) *fb) (void);
3723 Fixing this is not easy and not very important. */
3724 fvdef = finlist;
3725 continue;
3726 case flistseen:
3727 if (plainc || declarations)
3729 make_C_tag (true); /* a function */
3730 fvdef = fignore;
3732 break;
3733 default:
3734 break;
3736 if (structdef == stagseen && !cjava)
3738 popclass_above (bracelev);
3739 structdef = snone;
3741 break;
3742 case dsharpseen:
3743 savetoken = token;
3744 break;
3745 default:
3746 break;
3748 if (!yacc_rules || lp == newlb.buffer + 1)
3750 tokoff = lp - 1 - newlb.buffer;
3751 toklen = 1;
3752 midtoken = true;
3754 continue;
3755 } /* if (begtoken) */
3756 } /* if must look at token */
3759 /* Detect end of line, colon, comma, semicolon and various braces
3760 after having handled a token.*/
3761 switch (c)
3763 case ':':
3764 if (inattribute)
3765 break;
3766 if (yacc_rules && token.offset == 0 && token.valid)
3768 make_C_tag (false); /* a yacc function */
3769 break;
3771 if (definedef != dnone)
3772 break;
3773 switch (objdef)
3775 case otagseen:
3776 objdef = oignore;
3777 make_C_tag (true); /* an Objective C class */
3778 break;
3779 case omethodtag:
3780 case omethodparm:
3781 objdef = omethodcolon;
3782 if (class_qualify)
3784 int toklen = token_name.len;
3785 linebuffer_setlen (&token_name, toklen + 1);
3786 strcpy (token_name.buffer + toklen, ":");
3788 break;
3789 default:
3790 break;
3792 if (structdef == stagseen)
3794 structdef = scolonseen;
3795 break;
3797 /* Should be useless, but may be work as a safety net. */
3798 if (cplpl && fvdef == flistseen)
3800 make_C_tag (true); /* a function */
3801 fvdef = fignore;
3802 break;
3804 break;
3805 case ';':
3806 if (definedef != dnone || inattribute)
3807 break;
3808 switch (typdef)
3810 case tend:
3811 case ttypeseen:
3812 make_C_tag (false); /* a typedef */
3813 typdef = tnone;
3814 fvdef = fvnone;
3815 break;
3816 case tnone:
3817 case tinbody:
3818 case tignore:
3819 switch (fvdef)
3821 case fignore:
3822 if (typdef == tignore || cplpl)
3823 fvdef = fvnone;
3824 break;
3825 case fvnameseen:
3826 if ((globals && bracelev == 0 && (!fvextern || declarations))
3827 || (members && instruct))
3828 make_C_tag (false); /* a variable */
3829 fvextern = false;
3830 fvdef = fvnone;
3831 token.valid = false;
3832 break;
3833 case flistseen:
3834 if ((declarations
3835 && (cplpl || !instruct)
3836 && (typdef == tnone || (typdef != tignore && instruct)))
3837 || (members
3838 && plainc && instruct))
3839 make_C_tag (true); /* a function */
3840 FALLTHROUGH;
3841 default:
3842 fvextern = false;
3843 fvdef = fvnone;
3844 if (declarations
3845 && cplpl && structdef == stagseen)
3846 make_C_tag (false); /* forward declaration */
3847 else
3848 token.valid = false;
3849 } /* switch (fvdef) */
3850 FALLTHROUGH;
3851 default:
3852 if (!instruct)
3853 typdef = tnone;
3855 if (structdef == stagseen)
3856 structdef = snone;
3857 break;
3858 case ',':
3859 if (definedef != dnone || inattribute)
3860 break;
3861 switch (objdef)
3863 case omethodtag:
3864 case omethodparm:
3865 make_C_tag (true); /* an Objective C method */
3866 objdef = oinbody;
3867 break;
3868 default:
3869 break;
3871 switch (fvdef)
3873 case fdefunkey:
3874 case foperator:
3875 case fstartlist:
3876 case finlist:
3877 case fignore:
3878 break;
3879 case vignore:
3880 if (instruct && parlev == 0)
3881 fvdef = fvnone;
3882 break;
3883 case fdefunname:
3884 fvdef = fignore;
3885 break;
3886 case fvnameseen:
3887 if (parlev == 0
3888 && ((globals
3889 && bracelev == 0
3890 && templatelev == 0
3891 && (!fvextern || declarations))
3892 || (members && instruct)))
3893 make_C_tag (false); /* a variable */
3894 break;
3895 case flistseen:
3896 if ((declarations && typdef == tnone && !instruct)
3897 || (members && typdef != tignore && instruct))
3899 make_C_tag (true); /* a function */
3900 fvdef = fvnameseen;
3902 else if (!declarations)
3903 fvdef = fvnone;
3904 token.valid = false;
3905 break;
3906 default:
3907 fvdef = fvnone;
3909 if (structdef == stagseen)
3910 structdef = snone;
3911 break;
3912 case ']':
3913 if (definedef != dnone || inattribute)
3914 break;
3915 if (structdef == stagseen)
3916 structdef = snone;
3917 switch (typdef)
3919 case ttypeseen:
3920 case tend:
3921 typdef = tignore;
3922 make_C_tag (false); /* a typedef */
3923 break;
3924 case tnone:
3925 case tinbody:
3926 switch (fvdef)
3928 case foperator:
3929 case finlist:
3930 case fignore:
3931 case vignore:
3932 break;
3933 case fvnameseen:
3934 if ((members && bracelev == 1)
3935 || (globals && bracelev == 0
3936 && (!fvextern || declarations)))
3937 make_C_tag (false); /* a variable */
3938 FALLTHROUGH;
3939 default:
3940 fvdef = fvnone;
3942 break;
3943 default:
3944 break;
3946 break;
3947 case '(':
3948 if (inattribute)
3950 attrparlev++;
3951 break;
3953 if (definedef != dnone)
3954 break;
3955 if (objdef == otagseen && parlev == 0)
3956 objdef = oparenseen;
3957 switch (fvdef)
3959 case fvnameseen:
3960 if (typdef == ttypeseen
3961 && *lp != '*'
3962 && !instruct)
3964 /* This handles constructs like:
3965 typedef void OperatorFun (int fun); */
3966 make_C_tag (false);
3967 typdef = tignore;
3968 fvdef = fignore;
3969 break;
3971 FALLTHROUGH;
3972 case foperator:
3973 fvdef = fstartlist;
3974 break;
3975 case flistseen:
3976 fvdef = finlist;
3977 break;
3978 default:
3979 break;
3981 parlev++;
3982 break;
3983 case ')':
3984 if (inattribute)
3986 if (--attrparlev == 0)
3987 inattribute = false;
3988 break;
3990 if (in_enum_bf)
3992 if (--parlev == 0)
3993 in_enum_bf = false;
3994 break;
3996 if (definedef != dnone)
3997 break;
3998 if (objdef == ocatseen && parlev == 1)
4000 make_C_tag (true); /* an Objective C category */
4001 objdef = oignore;
4003 if (--parlev == 0)
4005 switch (fvdef)
4007 case fstartlist:
4008 case finlist:
4009 fvdef = flistseen;
4010 break;
4011 default:
4012 break;
4014 if (!instruct
4015 && (typdef == tend
4016 || typdef == ttypeseen))
4018 typdef = tignore;
4019 make_C_tag (false); /* a typedef */
4022 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
4023 parlev = 0;
4024 break;
4025 case '{':
4026 if (definedef != dnone)
4027 break;
4028 if (typdef == ttypeseen)
4030 /* Whenever typdef is set to tinbody (currently only
4031 here), typdefbracelev should be set to bracelev. */
4032 typdef = tinbody;
4033 typdefbracelev = bracelev;
4035 switch (fvdef)
4037 case flistseen:
4038 if (cplpl && !class_qualify)
4040 /* Remove class and namespace qualifiers from the token,
4041 leaving only the method/member name. */
4042 char *cc, *uqname = token_name.buffer;
4043 char *tok_end = token_name.buffer + token_name.len;
4045 for (cc = token_name.buffer; cc < tok_end; cc++)
4047 if (*cc == ':' && cc[1] == ':')
4049 uqname = cc + 2;
4050 cc++;
4053 if (uqname > token_name.buffer)
4055 int uqlen = strlen (uqname);
4056 linebuffer_setlen (&token_name, uqlen);
4057 memmove (token_name.buffer, uqname, uqlen + 1);
4060 make_C_tag (true); /* a function */
4061 FALLTHROUGH;
4062 case fignore:
4063 fvdef = fvnone;
4064 break;
4065 case fvnone:
4066 switch (objdef)
4068 case otagseen:
4069 make_C_tag (true); /* an Objective C class */
4070 objdef = oignore;
4071 break;
4072 case omethodtag:
4073 case omethodparm:
4074 make_C_tag (true); /* an Objective C method */
4075 objdef = oinbody;
4076 break;
4077 default:
4078 /* Neutralize `extern "C" {' grot. */
4079 if (bracelev == 0 && structdef == snone && nestlev == 0
4080 && typdef == tnone)
4081 bracelev = -1;
4083 break;
4084 default:
4085 break;
4087 switch (structdef)
4089 case skeyseen: /* unnamed struct */
4090 pushclass_above (bracelev, NULL, 0);
4091 structdef = snone;
4092 break;
4093 case stagseen: /* named struct or enum */
4094 case scolonseen: /* a class */
4095 pushclass_above (bracelev,token.line+token.offset, token.length);
4096 structdef = snone;
4097 make_C_tag (false); /* a struct or enum */
4098 break;
4099 default:
4100 break;
4102 bracelev += 1;
4103 break;
4104 case '*':
4105 if (definedef != dnone)
4106 break;
4107 if (fvdef == fstartlist)
4109 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
4110 token.valid = false;
4112 break;
4113 case '}':
4114 if (definedef != dnone)
4115 break;
4116 bracelev -= 1;
4117 if (!ignoreindent && lp == newlb.buffer + 1)
4119 if (bracelev != 0)
4120 token.valid = false; /* unexpected value, token unreliable */
4121 bracelev = 0; /* reset brace level if first column */
4122 parlev = 0; /* also reset paren level, just in case... */
4124 else if (bracelev < 0)
4126 token.valid = false; /* something gone amiss, token unreliable */
4127 bracelev = 0;
4129 if (bracelev == 0 && fvdef == vignore)
4130 fvdef = fvnone; /* end of function */
4131 popclass_above (bracelev);
4132 structdef = snone;
4133 /* Only if typdef == tinbody is typdefbracelev significant. */
4134 if (typdef == tinbody && bracelev <= typdefbracelev)
4136 assert (bracelev == typdefbracelev);
4137 typdef = tend;
4139 break;
4140 case '=':
4141 if (definedef != dnone)
4142 break;
4143 switch (fvdef)
4145 case foperator:
4146 case finlist:
4147 case fignore:
4148 case vignore:
4149 break;
4150 case fvnameseen:
4151 if ((members && bracelev == 1)
4152 || (globals && bracelev == 0 && (!fvextern || declarations)))
4153 make_C_tag (false); /* a variable */
4154 FALLTHROUGH;
4155 default:
4156 fvdef = vignore;
4158 break;
4159 case '<':
4160 if (cplpl
4161 && (structdef == stagseen || fvdef == fvnameseen))
4163 templatelev++;
4164 break;
4166 goto resetfvdef;
4167 case '>':
4168 if (templatelev > 0)
4170 templatelev--;
4171 break;
4173 goto resetfvdef;
4174 case '+':
4175 case '-':
4176 if (objdef == oinbody && bracelev == 0)
4178 objdef = omethodsign;
4179 break;
4181 FALLTHROUGH;
4182 resetfvdef:
4183 case '#': case '~': case '&': case '%': case '/':
4184 case '|': case '^': case '!': case '.': case '?':
4185 if (definedef != dnone)
4186 break;
4187 /* These surely cannot follow a function tag in C. */
4188 switch (fvdef)
4190 case foperator:
4191 case finlist:
4192 case fignore:
4193 case vignore:
4194 break;
4195 default:
4196 fvdef = fvnone;
4198 break;
4199 case '\0':
4200 if (objdef == otagseen)
4202 make_C_tag (true); /* an Objective C class */
4203 objdef = oignore;
4205 /* If a macro spans multiple lines don't reset its state. */
4206 if (quotednl)
4207 CNL_SAVE_DEFINEDEF ();
4208 else
4209 CNL ();
4210 break;
4211 } /* switch (c) */
4213 } /* while not eof */
4215 free (lbs[0].lb.buffer);
4216 free (lbs[1].lb.buffer);
4220 * Process either a C++ file or a C file depending on the setting
4221 * of a global flag.
4223 static void
4224 default_C_entries (FILE *inf)
4226 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4229 /* Always do plain C. */
4230 static void
4231 plain_C_entries (FILE *inf)
4233 C_entries (0, inf);
4236 /* Always do C++. */
4237 static void
4238 Cplusplus_entries (FILE *inf)
4240 C_entries (C_PLPL, inf);
4243 /* Always do Java. */
4244 static void
4245 Cjava_entries (FILE *inf)
4247 C_entries (C_JAVA, inf);
4250 /* Always do C*. */
4251 static void
4252 Cstar_entries (FILE *inf)
4254 C_entries (C_STAR, inf);
4257 /* Always do Yacc. */
4258 static void
4259 Yacc_entries (FILE *inf)
4261 C_entries (YACC, inf);
4265 /* Useful macros. */
4266 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4267 while (perhaps_more_input (file_pointer) \
4268 && (readline (&(line_buffer), file_pointer), \
4269 (char_pointer) = (line_buffer).buffer, \
4270 true)) \
4272 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4273 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4274 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4275 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
4276 && ((cp) = skip_spaces ((cp) + sizeof (kw) - 1), true)) /* skip spaces */
4278 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4279 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4280 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4281 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4282 && ((cp) += sizeof (kw) - 1, true)) /* skip spaces */
4285 * Read a file, but do no processing. This is used to do regexp
4286 * matching on files that have no language defined.
4288 static void
4289 just_read_file (FILE *inf)
4291 while (perhaps_more_input (inf))
4292 readline (&lb, inf);
4296 /* Fortran parsing */
4298 static void F_takeprec (void);
4299 static void F_getit (FILE *);
4301 static void
4302 F_takeprec (void)
4304 dbp = skip_spaces (dbp);
4305 if (*dbp != '*')
4306 return;
4307 dbp++;
4308 dbp = skip_spaces (dbp);
4309 if (strneq (dbp, "(*)", 3))
4311 dbp += 3;
4312 return;
4314 if (!c_isdigit (*dbp))
4316 --dbp; /* force failure */
4317 return;
4320 dbp++;
4321 while (c_isdigit (*dbp));
4324 static void
4325 F_getit (FILE *inf)
4327 register char *cp;
4329 dbp = skip_spaces (dbp);
4330 if (*dbp == '\0')
4332 readline (&lb, inf);
4333 dbp = lb.buffer;
4334 if (dbp[5] != '&')
4335 return;
4336 dbp += 6;
4337 dbp = skip_spaces (dbp);
4339 if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4340 return;
4341 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4342 continue;
4343 make_tag (dbp, cp-dbp, true,
4344 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4348 static void
4349 Fortran_functions (FILE *inf)
4351 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4353 if (*dbp == '%')
4354 dbp++; /* Ratfor escape to fortran */
4355 dbp = skip_spaces (dbp);
4356 if (*dbp == '\0')
4357 continue;
4359 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4360 dbp = skip_spaces (dbp);
4362 if (LOOKING_AT_NOCASE (dbp, "pure"))
4363 dbp = skip_spaces (dbp);
4365 if (LOOKING_AT_NOCASE (dbp, "elemental"))
4366 dbp = skip_spaces (dbp);
4368 switch (c_tolower (*dbp))
4370 case 'i':
4371 if (nocase_tail ("integer"))
4372 F_takeprec ();
4373 break;
4374 case 'r':
4375 if (nocase_tail ("real"))
4376 F_takeprec ();
4377 break;
4378 case 'l':
4379 if (nocase_tail ("logical"))
4380 F_takeprec ();
4381 break;
4382 case 'c':
4383 if (nocase_tail ("complex") || nocase_tail ("character"))
4384 F_takeprec ();
4385 break;
4386 case 'd':
4387 if (nocase_tail ("double"))
4389 dbp = skip_spaces (dbp);
4390 if (*dbp == '\0')
4391 continue;
4392 if (nocase_tail ("precision"))
4393 break;
4394 continue;
4396 break;
4398 dbp = skip_spaces (dbp);
4399 if (*dbp == '\0')
4400 continue;
4401 switch (c_tolower (*dbp))
4403 case 'f':
4404 if (nocase_tail ("function"))
4405 F_getit (inf);
4406 continue;
4407 case 's':
4408 if (nocase_tail ("subroutine"))
4409 F_getit (inf);
4410 continue;
4411 case 'e':
4412 if (nocase_tail ("entry"))
4413 F_getit (inf);
4414 continue;
4415 case 'b':
4416 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4418 dbp = skip_spaces (dbp);
4419 if (*dbp == '\0') /* assume un-named */
4420 make_tag ("blockdata", 9, true,
4421 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4422 else
4423 F_getit (inf); /* look for name */
4425 continue;
4432 * Go language support
4433 * Original code by Xi Lu <lx@shellcodes.org> (2016)
4435 static void
4436 Go_functions(FILE *inf)
4438 char *cp, *name;
4440 LOOP_ON_INPUT_LINES(inf, lb, cp)
4442 cp = skip_spaces (cp);
4444 if (LOOKING_AT (cp, "package"))
4446 name = cp;
4447 while (!notinname (*cp) && *cp != '\0')
4448 cp++;
4449 make_tag (name, cp - name, false, lb.buffer,
4450 cp - lb.buffer + 1, lineno, linecharno);
4452 else if (LOOKING_AT (cp, "func"))
4454 /* Go implementation of interface, such as:
4455 func (n *Integer) Add(m Integer) ...
4456 skip `(n *Integer)` part.
4458 if (*cp == '(')
4460 while (*cp != ')')
4461 cp++;
4462 cp = skip_spaces (cp+1);
4465 if (*cp)
4467 name = cp;
4469 while (!notinname (*cp))
4470 cp++;
4472 make_tag (name, cp - name, true, lb.buffer,
4473 cp - lb.buffer + 1, lineno, linecharno);
4476 else if (members && LOOKING_AT (cp, "type"))
4478 name = cp;
4480 /* Ignore the likes of the following:
4481 type (
4485 if (*cp == '(')
4486 return;
4488 while (!notinname (*cp) && *cp != '\0')
4489 cp++;
4491 make_tag (name, cp - name, false, lb.buffer,
4492 cp - lb.buffer + 1, lineno, linecharno);
4499 * Ada parsing
4500 * Original code by
4501 * Philippe Waroquiers (1998)
4504 /* Once we are positioned after an "interesting" keyword, let's get
4505 the real tag value necessary. */
4506 static void
4507 Ada_getit (FILE *inf, const char *name_qualifier)
4509 register char *cp;
4510 char *name;
4511 char c;
4513 while (perhaps_more_input (inf))
4515 dbp = skip_spaces (dbp);
4516 if (*dbp == '\0'
4517 || (dbp[0] == '-' && dbp[1] == '-'))
4519 readline (&lb, inf);
4520 dbp = lb.buffer;
4522 switch (c_tolower (*dbp))
4524 case 'b':
4525 if (nocase_tail ("body"))
4527 /* Skipping body of procedure body or package body or ....
4528 resetting qualifier to body instead of spec. */
4529 name_qualifier = "/b";
4530 continue;
4532 break;
4533 case 't':
4534 /* Skipping type of task type or protected type ... */
4535 if (nocase_tail ("type"))
4536 continue;
4537 break;
4539 if (*dbp == '"')
4541 dbp += 1;
4542 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4543 continue;
4545 else
4547 dbp = skip_spaces (dbp);
4548 for (cp = dbp;
4549 c_isalnum (*cp) || *cp == '_' || *cp == '.';
4550 cp++)
4551 continue;
4552 if (cp == dbp)
4553 return;
4555 c = *cp;
4556 *cp = '\0';
4557 name = concat (dbp, name_qualifier, "");
4558 *cp = c;
4559 make_tag (name, strlen (name), true,
4560 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4561 free (name);
4562 if (c == '"')
4563 dbp = cp + 1;
4564 return;
4568 static void
4569 Ada_funcs (FILE *inf)
4571 bool inquote = false;
4572 bool skip_till_semicolumn = false;
4574 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4576 while (*dbp != '\0')
4578 /* Skip a string i.e. "abcd". */
4579 if (inquote || (*dbp == '"'))
4581 dbp = strchr (dbp + !inquote, '"');
4582 if (dbp != NULL)
4584 inquote = false;
4585 dbp += 1;
4586 continue; /* advance char */
4588 else
4590 inquote = true;
4591 break; /* advance line */
4595 /* Skip comments. */
4596 if (dbp[0] == '-' && dbp[1] == '-')
4597 break; /* advance line */
4599 /* Skip character enclosed in single quote i.e. 'a'
4600 and skip single quote starting an attribute i.e. 'Image. */
4601 if (*dbp == '\'')
4603 dbp++ ;
4604 if (*dbp != '\0')
4605 dbp++;
4606 continue;
4609 if (skip_till_semicolumn)
4611 if (*dbp == ';')
4612 skip_till_semicolumn = false;
4613 dbp++;
4614 continue; /* advance char */
4617 /* Search for beginning of a token. */
4618 if (!begtoken (*dbp))
4620 dbp++;
4621 continue; /* advance char */
4624 /* We are at the beginning of a token. */
4625 switch (c_tolower (*dbp))
4627 case 'f':
4628 if (!packages_only && nocase_tail ("function"))
4629 Ada_getit (inf, "/f");
4630 else
4631 break; /* from switch */
4632 continue; /* advance char */
4633 case 'p':
4634 if (!packages_only && nocase_tail ("procedure"))
4635 Ada_getit (inf, "/p");
4636 else if (nocase_tail ("package"))
4637 Ada_getit (inf, "/s");
4638 else if (nocase_tail ("protected")) /* protected type */
4639 Ada_getit (inf, "/t");
4640 else
4641 break; /* from switch */
4642 continue; /* advance char */
4644 case 'u':
4645 if (typedefs && !packages_only && nocase_tail ("use"))
4647 /* when tagging types, avoid tagging use type Pack.Typename;
4648 for this, we will skip everything till a ; */
4649 skip_till_semicolumn = true;
4650 continue; /* advance char */
4653 case 't':
4654 if (!packages_only && nocase_tail ("task"))
4655 Ada_getit (inf, "/k");
4656 else if (typedefs && !packages_only && nocase_tail ("type"))
4658 Ada_getit (inf, "/t");
4659 while (*dbp != '\0')
4660 dbp += 1;
4662 else
4663 break; /* from switch */
4664 continue; /* advance char */
4667 /* Look for the end of the token. */
4668 while (!endtoken (*dbp))
4669 dbp++;
4671 } /* advance char */
4672 } /* advance line */
4677 * Unix and microcontroller assembly tag handling
4678 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4679 * Idea by Bob Weiner, Motorola Inc. (1994)
4681 static void
4682 Asm_labels (FILE *inf)
4684 register char *cp;
4686 LOOP_ON_INPUT_LINES (inf, lb, cp)
4688 /* If first char is alphabetic or one of [_.$], test for colon
4689 following identifier. */
4690 if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4692 /* Read past label. */
4693 cp++;
4694 while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4695 cp++;
4696 if (*cp == ':' || c_isspace (*cp))
4697 /* Found end of label, so copy it and add it to the table. */
4698 make_tag (lb.buffer, cp - lb.buffer, true,
4699 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4706 * Perl support
4707 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4708 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4709 * Perl variable names: /^(my|local).../
4710 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4711 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4712 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4714 static void
4715 Perl_functions (FILE *inf)
4717 char *package = savestr ("main"); /* current package name */
4718 register char *cp;
4720 LOOP_ON_INPUT_LINES (inf, lb, cp)
4722 cp = skip_spaces (cp);
4724 if (LOOKING_AT (cp, "package"))
4726 free (package);
4727 get_tag (cp, &package);
4729 else if (LOOKING_AT (cp, "sub"))
4731 char *pos, *sp;
4733 subr:
4734 sp = cp;
4735 while (!notinname (*cp))
4736 cp++;
4737 if (cp == sp)
4738 continue; /* nothing found */
4739 pos = strchr (sp, ':');
4740 if (pos && pos < cp && pos[1] == ':')
4742 /* The name is already qualified. */
4743 if (!class_qualify)
4745 char *q = pos + 2, *qpos;
4746 while ((qpos = strchr (q, ':')) != NULL
4747 && qpos < cp
4748 && qpos[1] == ':')
4749 q = qpos + 2;
4750 sp = q;
4752 make_tag (sp, cp - sp, true,
4753 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4755 else if (class_qualify)
4756 /* Qualify it. */
4758 char savechar, *name;
4760 savechar = *cp;
4761 *cp = '\0';
4762 name = concat (package, "::", sp);
4763 *cp = savechar;
4764 make_tag (name, strlen (name), true,
4765 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4766 free (name);
4768 else
4769 make_tag (sp, cp - sp, true,
4770 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4772 else if (LOOKING_AT (cp, "use constant")
4773 || LOOKING_AT (cp, "use constant::defer"))
4775 /* For hash style multi-constant like
4776 use constant { FOO => 123,
4777 BAR => 456 };
4778 only the first FOO is picked up. Parsing across the value
4779 expressions would be difficult in general, due to possible nested
4780 hashes, here-documents, etc. */
4781 if (*cp == '{')
4782 cp = skip_spaces (cp+1);
4783 goto subr;
4785 else if (globals) /* only if we are tagging global vars */
4787 /* Skip a qualifier, if any. */
4788 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4789 /* After "my" or "local", but before any following paren or space. */
4790 char *varstart = cp;
4792 if (qual /* should this be removed? If yes, how? */
4793 && (*cp == '$' || *cp == '@' || *cp == '%'))
4795 varstart += 1;
4797 cp++;
4798 while (c_isalnum (*cp) || *cp == '_');
4800 else if (qual)
4802 /* Should be examining a variable list at this point;
4803 could insist on seeing an open parenthesis. */
4804 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4805 cp++;
4807 else
4808 continue;
4810 make_tag (varstart, cp - varstart, false,
4811 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4814 free (package);
4819 * Python support
4820 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4821 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4822 * More ideas by seb bacon <seb@jamkit.com> (2002)
4824 static void
4825 Python_functions (FILE *inf)
4827 register char *cp;
4829 LOOP_ON_INPUT_LINES (inf, lb, cp)
4831 cp = skip_spaces (cp);
4832 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4834 char *name = cp;
4835 while (!notinname (*cp) && *cp != ':')
4836 cp++;
4837 make_tag (name, cp - name, true,
4838 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4844 * Ruby support
4845 * Original code by Xi Lu <lx@shellcodes.org> (2015)
4847 static void
4848 Ruby_functions (FILE *inf)
4850 char *cp = NULL;
4851 bool reader = false, writer = false, alias = false, continuation = false;
4853 LOOP_ON_INPUT_LINES (inf, lb, cp)
4855 bool is_class = false;
4856 bool is_method = false;
4857 char *name;
4859 cp = skip_spaces (cp);
4860 if (!continuation
4861 /* Constants. */
4862 && c_isalpha (*cp) && c_isupper (*cp))
4864 char *bp, *colon = NULL;
4866 name = cp;
4868 for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
4870 if (*cp == ':')
4871 colon = cp;
4873 if (cp > name + 1)
4875 bp = skip_spaces (cp);
4876 if (*bp == '=' && !(bp[1] == '=' || bp[1] == '>'))
4878 if (colon && !c_isspace (colon[1]))
4879 name = colon + 1;
4880 make_tag (name, cp - name, false,
4881 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4885 else if (!continuation
4886 /* Modules, classes, methods. */
4887 && ((is_method = LOOKING_AT (cp, "def"))
4888 || (is_class = LOOKING_AT (cp, "class"))
4889 || LOOKING_AT (cp, "module")))
4891 const char self_name[] = "self.";
4892 const size_t self_size1 = sizeof (self_name) - 1;
4894 name = cp;
4896 /* Ruby method names can end in a '='. Also, operator overloading can
4897 define operators whose names include '='. */
4898 while (!notinname (*cp) || *cp == '=')
4899 cp++;
4901 /* Remove "self." from the method name. */
4902 if (cp - name > self_size1
4903 && strneq (name, self_name, self_size1))
4904 name += self_size1;
4906 /* Remove the class/module qualifiers from method names. */
4907 if (is_method)
4909 char *q;
4911 for (q = name; q < cp && *q != '.'; q++)
4913 if (q < cp - 1) /* punt if we see just "FOO." */
4914 name = q + 1;
4917 /* Don't tag singleton classes. */
4918 if (is_class && strneq (name, "<<", 2) && cp == name + 2)
4919 continue;
4921 make_tag (name, cp - name, true,
4922 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4924 else
4926 /* Tag accessors and aliases. */
4928 if (!continuation)
4929 reader = writer = alias = false;
4931 while (*cp && *cp != '#')
4933 if (!continuation)
4935 reader = writer = alias = false;
4936 if (LOOKING_AT (cp, "attr_reader"))
4937 reader = true;
4938 else if (LOOKING_AT (cp, "attr_writer"))
4939 writer = true;
4940 else if (LOOKING_AT (cp, "attr_accessor"))
4942 reader = true;
4943 writer = true;
4945 else if (LOOKING_AT (cp, "alias_method"))
4946 alias = true;
4948 if (reader || writer || alias)
4950 do {
4951 char *np;
4953 cp = skip_spaces (cp);
4954 if (*cp == '(')
4955 cp = skip_spaces (cp + 1);
4956 np = cp;
4957 cp = skip_name (cp);
4958 if (*np != ':')
4959 continue;
4960 np++;
4961 if (reader)
4963 make_tag (np, cp - np, true,
4964 lb.buffer, cp - lb.buffer + 1,
4965 lineno, linecharno);
4966 continuation = false;
4968 if (writer)
4970 size_t name_len = cp - np + 1;
4971 char *wr_name = xnew (name_len + 1, char);
4973 memcpy (wr_name, np, name_len - 1);
4974 memcpy (wr_name + name_len - 1, "=", 2);
4975 pfnote (wr_name, true, lb.buffer, cp - lb.buffer + 1,
4976 lineno, linecharno);
4977 if (debug)
4978 fprintf (stderr, "%s on %s:%d: %s\n", wr_name,
4979 curfdp->taggedfname, lineno, lb.buffer);
4980 continuation = false;
4982 if (alias)
4984 if (!continuation)
4985 make_tag (np, cp - np, true,
4986 lb.buffer, cp - lb.buffer + 1,
4987 lineno, linecharno);
4988 continuation = false;
4989 while (*cp && *cp != '#' && *cp != ';')
4991 if (*cp == ',')
4992 continuation = true;
4993 else if (!c_isspace (*cp))
4994 continuation = false;
4995 cp++;
4997 if (*cp == ';')
4998 continuation = false;
5000 cp = skip_spaces (cp);
5001 } while ((alias
5002 ? (*cp == ',')
5003 : (continuation = (*cp == ',')))
5004 && (cp = skip_spaces (cp + 1), *cp && *cp != '#'));
5006 if (*cp != '#')
5007 cp = skip_name (cp);
5008 while (*cp && *cp != '#' && notinname (*cp))
5009 cp++;
5017 * PHP support
5018 * Look for:
5019 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
5020 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
5021 * - /^[ \t]*define\(\"[^\"]+/
5022 * Only with --members:
5023 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
5024 * Idea by Diez B. Roggisch (2001)
5026 static void
5027 PHP_functions (FILE *inf)
5029 char *cp, *name;
5030 bool search_identifier = false;
5032 LOOP_ON_INPUT_LINES (inf, lb, cp)
5034 cp = skip_spaces (cp);
5035 name = cp;
5036 if (search_identifier
5037 && *cp != '\0')
5039 while (!notinname (*cp))
5040 cp++;
5041 make_tag (name, cp - name, true,
5042 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5043 search_identifier = false;
5045 else if (LOOKING_AT (cp, "function"))
5047 if (*cp == '&')
5048 cp = skip_spaces (cp+1);
5049 if (*cp != '\0')
5051 name = cp;
5052 while (!notinname (*cp))
5053 cp++;
5054 make_tag (name, cp - name, true,
5055 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5057 else
5058 search_identifier = true;
5060 else if (LOOKING_AT (cp, "class"))
5062 if (*cp != '\0')
5064 name = cp;
5065 while (*cp != '\0' && !c_isspace (*cp))
5066 cp++;
5067 make_tag (name, cp - name, false,
5068 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5070 else
5071 search_identifier = true;
5073 else if (strneq (cp, "define", 6)
5074 && (cp = skip_spaces (cp+6))
5075 && *cp++ == '('
5076 && (*cp == '"' || *cp == '\''))
5078 char quote = *cp++;
5079 name = cp;
5080 while (*cp != quote && *cp != '\0')
5081 cp++;
5082 make_tag (name, cp - name, false,
5083 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5085 else if (members
5086 && LOOKING_AT (cp, "var")
5087 && *cp == '$')
5089 name = cp;
5090 while (!notinname (*cp))
5091 cp++;
5092 make_tag (name, cp - name, false,
5093 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5100 * Cobol tag functions
5101 * We could look for anything that could be a paragraph name.
5102 * i.e. anything that starts in column 8 is one word and ends in a full stop.
5103 * Idea by Corny de Souza (1993)
5105 static void
5106 Cobol_paragraphs (FILE *inf)
5108 register char *bp, *ep;
5110 LOOP_ON_INPUT_LINES (inf, lb, bp)
5112 if (lb.len < 9)
5113 continue;
5114 bp += 8;
5116 /* If eoln, compiler option or comment ignore whole line. */
5117 if (bp[-1] != ' ' || !c_isalnum (bp[0]))
5118 continue;
5120 for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
5121 continue;
5122 if (*ep++ == '.')
5123 make_tag (bp, ep - bp, true,
5124 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5130 * Makefile support
5131 * Ideas by Assar Westerlund <assar@sics.se> (2001)
5133 static void
5134 Makefile_targets (FILE *inf)
5136 register char *bp;
5138 LOOP_ON_INPUT_LINES (inf, lb, bp)
5140 if (*bp == '\t' || *bp == '#')
5141 continue;
5142 while (*bp != '\0' && *bp != '=' && *bp != ':')
5143 bp++;
5144 if (*bp == ':' || (globals && *bp == '='))
5146 /* We should detect if there is more than one tag, but we do not.
5147 We just skip initial and final spaces. */
5148 char * namestart = skip_spaces (lb.buffer);
5149 while (--bp > namestart)
5150 if (!notinname (*bp))
5151 break;
5152 make_tag (namestart, bp - namestart + 1, true,
5153 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
5160 * Pascal parsing
5161 * Original code by Mosur K. Mohan (1989)
5163 * Locates tags for procedures & functions. Doesn't do any type- or
5164 * var-definitions. It does look for the keyword "extern" or
5165 * "forward" immediately following the procedure statement; if found,
5166 * the tag is skipped.
5168 static void
5169 Pascal_functions (FILE *inf)
5171 linebuffer tline; /* mostly copied from C_entries */
5172 long save_lcno;
5173 int save_lineno, namelen, taglen;
5174 char c, *name;
5176 bool /* each of these flags is true if: */
5177 incomment, /* point is inside a comment */
5178 inquote, /* point is inside '..' string */
5179 get_tagname, /* point is after PROCEDURE/FUNCTION
5180 keyword, so next item = potential tag */
5181 found_tag, /* point is after a potential tag */
5182 inparms, /* point is within parameter-list */
5183 verify_tag; /* point has passed the parm-list, so the
5184 next token will determine whether this
5185 is a FORWARD/EXTERN to be ignored, or
5186 whether it is a real tag */
5188 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
5189 name = NULL; /* keep compiler quiet */
5190 dbp = lb.buffer;
5191 *dbp = '\0';
5192 linebuffer_init (&tline);
5194 incomment = inquote = false;
5195 found_tag = false; /* have a proc name; check if extern */
5196 get_tagname = false; /* found "procedure" keyword */
5197 inparms = false; /* found '(' after "proc" */
5198 verify_tag = false; /* check if "extern" is ahead */
5201 while (perhaps_more_input (inf)) /* long main loop to get next char */
5203 c = *dbp++;
5204 if (c == '\0') /* if end of line */
5206 readline (&lb, inf);
5207 dbp = lb.buffer;
5208 if (*dbp == '\0')
5209 continue;
5210 if (!((found_tag && verify_tag)
5211 || get_tagname))
5212 c = *dbp++; /* only if don't need *dbp pointing
5213 to the beginning of the name of
5214 the procedure or function */
5216 if (incomment)
5218 if (c == '}') /* within { } comments */
5219 incomment = false;
5220 else if (c == '*' && *dbp == ')') /* within (* *) comments */
5222 dbp++;
5223 incomment = false;
5225 continue;
5227 else if (inquote)
5229 if (c == '\'')
5230 inquote = false;
5231 continue;
5233 else
5234 switch (c)
5236 case '\'':
5237 inquote = true; /* found first quote */
5238 continue;
5239 case '{': /* found open { comment */
5240 incomment = true;
5241 continue;
5242 case '(':
5243 if (*dbp == '*') /* found open (* comment */
5245 incomment = true;
5246 dbp++;
5248 else if (found_tag) /* found '(' after tag, i.e., parm-list */
5249 inparms = true;
5250 continue;
5251 case ')': /* end of parms list */
5252 if (inparms)
5253 inparms = false;
5254 continue;
5255 case ';':
5256 if (found_tag && !inparms) /* end of proc or fn stmt */
5258 verify_tag = true;
5259 break;
5261 continue;
5263 if (found_tag && verify_tag && (*dbp != ' '))
5265 /* Check if this is an "extern" declaration. */
5266 if (*dbp == '\0')
5267 continue;
5268 if (c_tolower (*dbp) == 'e')
5270 if (nocase_tail ("extern")) /* superfluous, really! */
5272 found_tag = false;
5273 verify_tag = false;
5276 else if (c_tolower (*dbp) == 'f')
5278 if (nocase_tail ("forward")) /* check for forward reference */
5280 found_tag = false;
5281 verify_tag = false;
5284 if (found_tag && verify_tag) /* not external proc, so make tag */
5286 found_tag = false;
5287 verify_tag = false;
5288 make_tag (name, namelen, true,
5289 tline.buffer, taglen, save_lineno, save_lcno);
5290 continue;
5293 if (get_tagname) /* grab name of proc or fn */
5295 char *cp;
5297 if (*dbp == '\0')
5298 continue;
5300 /* Find block name. */
5301 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
5302 continue;
5304 /* Save all values for later tagging. */
5305 linebuffer_setlen (&tline, lb.len);
5306 strcpy (tline.buffer, lb.buffer);
5307 save_lineno = lineno;
5308 save_lcno = linecharno;
5309 name = tline.buffer + (dbp - lb.buffer);
5310 namelen = cp - dbp;
5311 taglen = cp - lb.buffer + 1;
5313 dbp = cp; /* set dbp to e-o-token */
5314 get_tagname = false;
5315 found_tag = true;
5316 continue;
5318 /* And proceed to check for "extern". */
5320 else if (!incomment && !inquote && !found_tag)
5322 /* Check for proc/fn keywords. */
5323 switch (c_tolower (c))
5325 case 'p':
5326 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5327 get_tagname = true;
5328 continue;
5329 case 'f':
5330 if (nocase_tail ("unction"))
5331 get_tagname = true;
5332 continue;
5335 } /* while not eof */
5337 free (tline.buffer);
5342 * Lisp tag functions
5343 * look for (def or (DEF, quote or QUOTE
5346 static void L_getit (void);
5348 static void
5349 L_getit (void)
5351 if (*dbp == '\'') /* Skip prefix quote */
5352 dbp++;
5353 else if (*dbp == '(')
5355 dbp++;
5356 /* Try to skip "(quote " */
5357 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
5358 /* Ok, then skip "(" before name in (defstruct (foo)) */
5359 dbp = skip_spaces (dbp);
5361 get_lispy_tag (dbp);
5364 static void
5365 Lisp_functions (FILE *inf)
5367 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5369 if (dbp[0] != '(')
5370 continue;
5372 /* "(defvar foo)" is a declaration rather than a definition. */
5373 if (! declarations)
5375 char *p = dbp + 1;
5376 if (LOOKING_AT (p, "defvar"))
5378 p = skip_name (p); /* past var name */
5379 p = skip_spaces (p);
5380 if (*p == ')')
5381 continue;
5385 if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
5386 dbp += 3;
5388 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5390 dbp = skip_non_spaces (dbp);
5391 dbp = skip_spaces (dbp);
5392 L_getit ();
5394 else
5396 /* Check for (foo::defmumble name-defined ... */
5398 dbp++;
5399 while (!notinname (*dbp) && *dbp != ':');
5400 if (*dbp == ':')
5403 dbp++;
5404 while (*dbp == ':');
5406 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5408 dbp = skip_non_spaces (dbp);
5409 dbp = skip_spaces (dbp);
5410 L_getit ();
5419 * Lua script language parsing
5420 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5422 * "function" and "local function" are tags if they start at column 1.
5424 static void
5425 Lua_functions (FILE *inf)
5427 register char *bp;
5429 LOOP_ON_INPUT_LINES (inf, lb, bp)
5431 bp = skip_spaces (bp);
5432 if (bp[0] != 'f' && bp[0] != 'l')
5433 continue;
5435 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5437 if (LOOKING_AT (bp, "function"))
5439 char *tag_name, *tp_dot, *tp_colon;
5441 get_tag (bp, &tag_name);
5442 /* If the tag ends with ".foo" or ":foo", make an additional tag for
5443 "foo". */
5444 tp_dot = strrchr (tag_name, '.');
5445 tp_colon = strrchr (tag_name, ':');
5446 if (tp_dot || tp_colon)
5448 char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5449 int len_add = p - tag_name + 1;
5451 get_tag (bp + len_add, NULL);
5459 * PostScript tags
5460 * Just look for lines where the first character is '/'
5461 * Also look at "defineps" for PSWrap
5462 * Ideas by:
5463 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5464 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5466 static void
5467 PS_functions (FILE *inf)
5469 register char *bp, *ep;
5471 LOOP_ON_INPUT_LINES (inf, lb, bp)
5473 if (bp[0] == '/')
5475 for (ep = bp+1;
5476 *ep != '\0' && *ep != ' ' && *ep != '{';
5477 ep++)
5478 continue;
5479 make_tag (bp, ep - bp, true,
5480 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5482 else if (LOOKING_AT (bp, "defineps"))
5483 get_tag (bp, NULL);
5489 * Forth tags
5490 * Ignore anything after \ followed by space or in ( )
5491 * Look for words defined by :
5492 * Look for constant, code, create, defer, value, and variable
5493 * OBP extensions: Look for buffer:, field,
5494 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5496 static void
5497 Forth_words (FILE *inf)
5499 register char *bp;
5501 LOOP_ON_INPUT_LINES (inf, lb, bp)
5502 while ((bp = skip_spaces (bp))[0] != '\0')
5503 if (bp[0] == '\\' && c_isspace (bp[1]))
5504 break; /* read next line */
5505 else if (bp[0] == '(' && c_isspace (bp[1]))
5506 do /* skip to ) or eol */
5507 bp++;
5508 while (*bp != ')' && *bp != '\0');
5509 else if (((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5510 || LOOKING_AT_NOCASE (bp, "constant")
5511 || LOOKING_AT_NOCASE (bp, "2constant")
5512 || LOOKING_AT_NOCASE (bp, "fconstant")
5513 || LOOKING_AT_NOCASE (bp, "code")
5514 || LOOKING_AT_NOCASE (bp, "create")
5515 || LOOKING_AT_NOCASE (bp, "defer")
5516 || LOOKING_AT_NOCASE (bp, "value")
5517 || LOOKING_AT_NOCASE (bp, "2value")
5518 || LOOKING_AT_NOCASE (bp, "fvalue")
5519 || LOOKING_AT_NOCASE (bp, "variable")
5520 || LOOKING_AT_NOCASE (bp, "2variable")
5521 || LOOKING_AT_NOCASE (bp, "fvariable")
5522 || LOOKING_AT_NOCASE (bp, "buffer:")
5523 || LOOKING_AT_NOCASE (bp, "field:")
5524 || LOOKING_AT_NOCASE (bp, "+field")
5525 || LOOKING_AT_NOCASE (bp, "field") /* not standard? */
5526 || LOOKING_AT_NOCASE (bp, "begin-structure")
5527 || LOOKING_AT_NOCASE (bp, "synonym")
5529 && c_isspace (bp[0]))
5531 /* Yay! A definition! */
5532 char* name_start = skip_spaces (bp);
5533 char* name_end = skip_non_spaces (name_start);
5534 if (name_start < name_end)
5535 make_tag (name_start, name_end - name_start,
5536 true, lb.buffer, name_end - lb.buffer,
5537 lineno, linecharno);
5538 bp = name_end;
5540 else
5541 bp = skip_non_spaces (bp);
5546 * Scheme tag functions
5547 * look for (def... xyzzy
5548 * (def... (xyzzy
5549 * (def ... ((...(xyzzy ....
5550 * (set! xyzzy
5551 * Original code by Ken Haase (1985?)
5553 static void
5554 Scheme_functions (FILE *inf)
5556 register char *bp;
5558 LOOP_ON_INPUT_LINES (inf, lb, bp)
5560 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5562 bp = skip_non_spaces (bp+4);
5563 /* Skip over open parens and white space.
5564 Don't continue past '\0' or '='. */
5565 while (*bp && notinname (*bp) && *bp != '=')
5566 bp++;
5567 get_lispy_tag (bp);
5569 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5570 get_lispy_tag (bp);
5575 /* Find tags in TeX and LaTeX input files. */
5577 /* TEX_toktab is a table of TeX control sequences that define tags.
5578 * Each entry records one such control sequence.
5580 * Original code from who knows whom.
5581 * Ideas by:
5582 * Stefan Monnier (2002)
5585 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5587 /* Default set of control sequences to put into TEX_toktab.
5588 The value of environment var TEXTAGS is prepended to this. */
5589 static const char *TEX_defenv = "\
5590 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5591 :part:appendix:entry:index:def\
5592 :newcommand:renewcommand:newenvironment:renewenvironment";
5594 static void TEX_decode_env (const char *, const char *);
5597 * TeX/LaTeX scanning loop.
5599 static void
5600 TeX_commands (FILE *inf)
5602 char *cp;
5603 linebuffer *key;
5605 char TEX_esc = '\0';
5606 char TEX_opgrp UNINIT, TEX_clgrp UNINIT;
5608 /* Initialize token table once from environment. */
5609 if (TEX_toktab == NULL)
5610 TEX_decode_env ("TEXTAGS", TEX_defenv);
5612 LOOP_ON_INPUT_LINES (inf, lb, cp)
5614 /* Look at each TEX keyword in line. */
5615 for (;;)
5617 /* Look for a TEX escape. */
5618 while (true)
5620 char c = *cp++;
5621 if (c == '\0' || c == '%')
5622 goto tex_next_line;
5624 /* Select either \ or ! as escape character, whichever comes
5625 first outside a comment. */
5626 if (!TEX_esc)
5627 switch (c)
5629 case '\\':
5630 TEX_esc = c;
5631 TEX_opgrp = '{';
5632 TEX_clgrp = '}';
5633 break;
5635 case '!':
5636 TEX_esc = c;
5637 TEX_opgrp = '<';
5638 TEX_clgrp = '>';
5639 break;
5642 if (c == TEX_esc)
5643 break;
5646 for (key = TEX_toktab; key->buffer != NULL; key++)
5647 if (strneq (cp, key->buffer, key->len))
5649 char *p;
5650 int namelen, linelen;
5651 bool opgrp = false;
5653 cp = skip_spaces (cp + key->len);
5654 if (*cp == TEX_opgrp)
5656 opgrp = true;
5657 cp++;
5659 for (p = cp;
5660 (!c_isspace (*p) && *p != '#' &&
5661 *p != TEX_opgrp && *p != TEX_clgrp);
5662 p++)
5663 continue;
5664 namelen = p - cp;
5665 linelen = lb.len;
5666 if (!opgrp || *p == TEX_clgrp)
5668 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5669 p++;
5670 linelen = p - lb.buffer + 1;
5672 make_tag (cp, namelen, true,
5673 lb.buffer, linelen, lineno, linecharno);
5674 goto tex_next_line; /* We only tag a line once */
5677 tex_next_line:
5682 /* Read environment and prepend it to the default string.
5683 Build token table. */
5684 static void
5685 TEX_decode_env (const char *evarname, const char *defenv)
5687 register const char *env, *p;
5688 int i, len;
5690 /* Append default string to environment. */
5691 env = getenv (evarname);
5692 if (!env)
5693 env = defenv;
5694 else
5695 env = concat (env, defenv, "");
5697 /* Allocate a token table */
5698 for (len = 1, p = env; (p = strchr (p, ':')); )
5699 if (*++p)
5700 len++;
5701 TEX_toktab = xnew (len, linebuffer);
5703 /* Unpack environment string into token table. Be careful about */
5704 /* zero-length strings (leading ':', "::" and trailing ':') */
5705 for (i = 0; *env != '\0';)
5707 p = strchr (env, ':');
5708 if (!p) /* End of environment string. */
5709 p = env + strlen (env);
5710 if (p - env > 0)
5711 { /* Only non-zero strings. */
5712 TEX_toktab[i].buffer = savenstr (env, p - env);
5713 TEX_toktab[i].len = p - env;
5714 i++;
5716 if (*p)
5717 env = p + 1;
5718 else
5720 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5721 TEX_toktab[i].len = 0;
5722 break;
5728 /* Texinfo support. Dave Love, Mar. 2000. */
5729 static void
5730 Texinfo_nodes (FILE *inf)
5732 char *cp, *start;
5733 LOOP_ON_INPUT_LINES (inf, lb, cp)
5734 if (LOOKING_AT (cp, "@node"))
5736 start = cp;
5737 while (*cp != '\0' && *cp != ',')
5738 cp++;
5739 make_tag (start, cp - start, true,
5740 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5746 * HTML support.
5747 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5748 * Contents of <a name=xxx> are tags with name xxx.
5750 * Francesco Potortì, 2002.
5752 static void
5753 HTML_labels (FILE *inf)
5755 bool getnext = false; /* next text outside of HTML tags is a tag */
5756 bool skiptag = false; /* skip to the end of the current HTML tag */
5757 bool intag = false; /* inside an html tag, looking for ID= */
5758 bool inanchor = false; /* when INTAG, is an anchor, look for NAME= */
5759 char *end;
5762 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5764 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5765 for (;;) /* loop on the same line */
5767 if (skiptag) /* skip HTML tag */
5769 while (*dbp != '\0' && *dbp != '>')
5770 dbp++;
5771 if (*dbp == '>')
5773 dbp += 1;
5774 skiptag = false;
5775 continue; /* look on the same line */
5777 break; /* go to next line */
5780 else if (intag) /* look for "name=" or "id=" */
5782 while (*dbp != '\0' && *dbp != '>'
5783 && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5784 dbp++;
5785 if (*dbp == '\0')
5786 break; /* go to next line */
5787 if (*dbp == '>')
5789 dbp += 1;
5790 intag = false;
5791 continue; /* look on the same line */
5793 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5794 || LOOKING_AT_NOCASE (dbp, "id="))
5796 bool quoted = (dbp[0] == '"');
5798 if (quoted)
5799 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5800 continue;
5801 else
5802 for (end = dbp; *end != '\0' && intoken (*end); end++)
5803 continue;
5804 linebuffer_setlen (&token_name, end - dbp);
5805 memcpy (token_name.buffer, dbp, end - dbp);
5806 token_name.buffer[end - dbp] = '\0';
5808 dbp = end;
5809 intag = false; /* we found what we looked for */
5810 skiptag = true; /* skip to the end of the tag */
5811 getnext = true; /* then grab the text */
5812 continue; /* look on the same line */
5814 dbp += 1;
5817 else if (getnext) /* grab next tokens and tag them */
5819 dbp = skip_spaces (dbp);
5820 if (*dbp == '\0')
5821 break; /* go to next line */
5822 if (*dbp == '<')
5824 intag = true;
5825 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5826 continue; /* look on the same line */
5829 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5830 continue;
5831 make_tag (token_name.buffer, token_name.len, true,
5832 dbp, end - dbp, lineno, linecharno);
5833 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5834 getnext = false;
5835 break; /* go to next line */
5838 else /* look for an interesting HTML tag */
5840 while (*dbp != '\0' && *dbp != '<')
5841 dbp++;
5842 if (*dbp == '\0')
5843 break; /* go to next line */
5844 intag = true;
5845 if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5847 inanchor = true;
5848 continue; /* look on the same line */
5850 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5851 || LOOKING_AT_NOCASE (dbp, "<h1>")
5852 || LOOKING_AT_NOCASE (dbp, "<h2>")
5853 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5855 intag = false;
5856 getnext = true;
5857 continue; /* look on the same line */
5859 dbp += 1;
5866 * Prolog support
5868 * Assumes that the predicate or rule starts at column 0.
5869 * Only the first clause of a predicate or rule is added.
5870 * Original code by Sunichirou Sugou (1989)
5871 * Rewritten by Anders Lindgren (1996)
5873 static size_t prolog_pr (char *, char *);
5874 static void prolog_skip_comment (linebuffer *, FILE *);
5875 static size_t prolog_atom (char *, size_t);
5877 static void
5878 Prolog_functions (FILE *inf)
5880 char *cp, *last;
5881 size_t len;
5882 size_t allocated;
5884 allocated = 0;
5885 len = 0;
5886 last = NULL;
5888 LOOP_ON_INPUT_LINES (inf, lb, cp)
5890 if (cp[0] == '\0') /* Empty line */
5891 continue;
5892 else if (c_isspace (cp[0])) /* Not a predicate */
5893 continue;
5894 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5895 prolog_skip_comment (&lb, inf);
5896 else if ((len = prolog_pr (cp, last)) > 0)
5898 /* Predicate or rule. Store the function name so that we
5899 only generate a tag for the first clause. */
5900 if (last == NULL)
5901 last = xnew (len + 1, char);
5902 else if (len + 1 > allocated)
5903 xrnew (last, len + 1, char);
5904 allocated = len + 1;
5905 memcpy (last, cp, len);
5906 last[len] = '\0';
5909 free (last);
5913 static void
5914 prolog_skip_comment (linebuffer *plb, FILE *inf)
5916 char *cp;
5920 for (cp = plb->buffer; *cp != '\0'; cp++)
5921 if (cp[0] == '*' && cp[1] == '/')
5922 return;
5923 readline (plb, inf);
5925 while (perhaps_more_input (inf));
5929 * A predicate or rule definition is added if it matches:
5930 * <beginning of line><Prolog Atom><whitespace>(
5931 * or <beginning of line><Prolog Atom><whitespace>:-
5933 * It is added to the tags database if it doesn't match the
5934 * name of the previous clause header.
5936 * Return the size of the name of the predicate or rule, or 0 if no
5937 * header was found.
5939 static size_t
5940 prolog_pr (char *s, char *last)
5942 /* Name of last clause. */
5944 size_t pos;
5945 size_t len;
5947 pos = prolog_atom (s, 0);
5948 if (! pos)
5949 return 0;
5951 len = pos;
5952 pos = skip_spaces (s + pos) - s;
5954 if ((s[pos] == '.'
5955 || (s[pos] == '(' && (pos += 1))
5956 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5957 && (last == NULL /* save only the first clause */
5958 || len != strlen (last)
5959 || !strneq (s, last, len)))
5961 make_tag (s, len, true, s, pos, lineno, linecharno);
5962 return len;
5964 else
5965 return 0;
5969 * Consume a Prolog atom.
5970 * Return the number of bytes consumed, or 0 if there was an error.
5972 * A prolog atom, in this context, could be one of:
5973 * - An alphanumeric sequence, starting with a lower case letter.
5974 * - A quoted arbitrary string. Single quotes can escape themselves.
5975 * Backslash quotes everything.
5977 static size_t
5978 prolog_atom (char *s, size_t pos)
5980 size_t origpos;
5982 origpos = pos;
5984 if (c_islower (s[pos]) || s[pos] == '_')
5986 /* The atom is unquoted. */
5987 pos++;
5988 while (c_isalnum (s[pos]) || s[pos] == '_')
5990 pos++;
5992 return pos - origpos;
5994 else if (s[pos] == '\'')
5996 pos++;
5998 for (;;)
6000 if (s[pos] == '\'')
6002 pos++;
6003 if (s[pos] != '\'')
6004 break;
6005 pos++; /* A double quote */
6007 else if (s[pos] == '\0')
6008 /* Multiline quoted atoms are ignored. */
6009 return 0;
6010 else if (s[pos] == '\\')
6012 if (s[pos+1] == '\0')
6013 return 0;
6014 pos += 2;
6016 else
6017 pos++;
6019 return pos - origpos;
6021 else
6022 return 0;
6027 * Support for Erlang
6029 * Generates tags for functions, defines, and records.
6030 * Assumes that Erlang functions start at column 0.
6031 * Original code by Anders Lindgren (1996)
6033 static int erlang_func (char *, char *);
6034 static void erlang_attribute (char *);
6035 static int erlang_atom (char *);
6037 static void
6038 Erlang_functions (FILE *inf)
6040 char *cp, *last;
6041 int len;
6042 int allocated;
6044 allocated = 0;
6045 len = 0;
6046 last = NULL;
6048 LOOP_ON_INPUT_LINES (inf, lb, cp)
6050 if (cp[0] == '\0') /* Empty line */
6051 continue;
6052 else if (c_isspace (cp[0])) /* Not function nor attribute */
6053 continue;
6054 else if (cp[0] == '%') /* comment */
6055 continue;
6056 else if (cp[0] == '"') /* Sometimes, strings start in column one */
6057 continue;
6058 else if (cp[0] == '-') /* attribute, e.g. "-define" */
6060 erlang_attribute (cp);
6061 if (last != NULL)
6063 free (last);
6064 last = NULL;
6067 else if ((len = erlang_func (cp, last)) > 0)
6070 * Function. Store the function name so that we only
6071 * generates a tag for the first clause.
6073 if (last == NULL)
6074 last = xnew (len + 1, char);
6075 else if (len + 1 > allocated)
6076 xrnew (last, len + 1, char);
6077 allocated = len + 1;
6078 memcpy (last, cp, len);
6079 last[len] = '\0';
6082 free (last);
6087 * A function definition is added if it matches:
6088 * <beginning of line><Erlang Atom><whitespace>(
6090 * It is added to the tags database if it doesn't match the
6091 * name of the previous clause header.
6093 * Return the size of the name of the function, or 0 if no function
6094 * was found.
6096 static int
6097 erlang_func (char *s, char *last)
6099 /* Name of last clause. */
6101 int pos;
6102 int len;
6104 pos = erlang_atom (s);
6105 if (pos < 1)
6106 return 0;
6108 len = pos;
6109 pos = skip_spaces (s + pos) - s;
6111 /* Save only the first clause. */
6112 if (s[pos++] == '('
6113 && (last == NULL
6114 || len != (int)strlen (last)
6115 || !strneq (s, last, len)))
6117 make_tag (s, len, true, s, pos, lineno, linecharno);
6118 return len;
6121 return 0;
6126 * Handle attributes. Currently, tags are generated for defines
6127 * and records.
6129 * They are on the form:
6130 * -define(foo, bar).
6131 * -define(Foo(M, N), M+N).
6132 * -record(graph, {vtab = notable, cyclic = true}).
6134 static void
6135 erlang_attribute (char *s)
6137 char *cp = s;
6139 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
6140 && *cp++ == '(')
6142 int len = erlang_atom (skip_spaces (cp));
6143 if (len > 0)
6144 make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
6146 return;
6151 * Consume an Erlang atom (or variable).
6152 * Return the number of bytes consumed, or -1 if there was an error.
6154 static int
6155 erlang_atom (char *s)
6157 int pos = 0;
6159 if (c_isalpha (s[pos]) || s[pos] == '_')
6161 /* The atom is unquoted. */
6163 pos++;
6164 while (c_isalnum (s[pos]) || s[pos] == '_');
6166 else if (s[pos] == '\'')
6168 for (pos++; s[pos] != '\''; pos++)
6169 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
6170 || (s[pos] == '\\' && s[++pos] == '\0'))
6171 return 0;
6172 pos++;
6175 return pos;
6179 static char *scan_separators (char *);
6180 static void add_regex (char *, language *);
6181 static char *substitute (char *, char *, struct re_registers *);
6184 * Take a string like "/blah/" and turn it into "blah", verifying
6185 * that the first and last characters are the same, and handling
6186 * quoted separator characters. Actually, stops on the occurrence of
6187 * an unquoted separator. Also process \t, \n, etc. and turn into
6188 * appropriate characters. Works in place. Null terminates name string.
6189 * Returns pointer to terminating separator, or NULL for
6190 * unterminated regexps.
6192 static char *
6193 scan_separators (char *name)
6195 char sep = name[0];
6196 char *copyto = name;
6197 bool quoted = false;
6199 for (++name; *name != '\0'; ++name)
6201 if (quoted)
6203 switch (*name)
6205 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
6206 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
6207 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
6208 case 'e': *copyto++ = 033; break; /* ESC (delete) */
6209 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
6210 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
6211 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
6212 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
6213 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
6214 default:
6215 if (*name == sep)
6216 *copyto++ = sep;
6217 else
6219 /* Something else is quoted, so preserve the quote. */
6220 *copyto++ = '\\';
6221 *copyto++ = *name;
6223 break;
6225 quoted = false;
6227 else if (*name == '\\')
6228 quoted = true;
6229 else if (*name == sep)
6230 break;
6231 else
6232 *copyto++ = *name;
6234 if (*name != sep)
6235 name = NULL; /* signal unterminated regexp */
6237 /* Terminate copied string. */
6238 *copyto = '\0';
6239 return name;
6242 /* Look at the argument of --regex or --no-regex and do the right
6243 thing. Same for each line of a regexp file. */
6244 static void
6245 analyze_regex (char *regex_arg)
6247 if (regex_arg == NULL)
6249 free_regexps (); /* --no-regex: remove existing regexps */
6250 return;
6253 /* A real --regexp option or a line in a regexp file. */
6254 switch (regex_arg[0])
6256 /* Comments in regexp file or null arg to --regex. */
6257 case '\0':
6258 case ' ':
6259 case '\t':
6260 break;
6262 /* Read a regex file. This is recursive and may result in a
6263 loop, which will stop when the file descriptors are exhausted. */
6264 case '@':
6266 FILE *regexfp;
6267 linebuffer regexbuf;
6268 char *regexfile = regex_arg + 1;
6270 /* regexfile is a file containing regexps, one per line. */
6271 regexfp = fopen (regexfile, "r" FOPEN_BINARY);
6272 if (regexfp == NULL)
6273 pfatal (regexfile);
6274 linebuffer_init (&regexbuf);
6275 while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
6276 analyze_regex (regexbuf.buffer);
6277 free (regexbuf.buffer);
6278 if (fclose (regexfp) != 0)
6279 pfatal (regexfile);
6281 break;
6283 /* Regexp to be used for a specific language only. */
6284 case '{':
6286 language *lang;
6287 char *lang_name = regex_arg + 1;
6288 char *cp;
6290 for (cp = lang_name; *cp != '}'; cp++)
6291 if (*cp == '\0')
6293 error ("unterminated language name in regex: %s", regex_arg);
6294 return;
6296 *cp++ = '\0';
6297 lang = get_language_from_langname (lang_name);
6298 if (lang == NULL)
6299 return;
6300 add_regex (cp, lang);
6302 break;
6304 /* Regexp to be used for any language. */
6305 default:
6306 add_regex (regex_arg, NULL);
6307 break;
6311 /* Separate the regexp pattern, compile it,
6312 and care for optional name and modifiers. */
6313 static void
6314 add_regex (char *regexp_pattern, language *lang)
6316 static struct re_pattern_buffer zeropattern;
6317 char sep, *pat, *name, *modifiers;
6318 char empty = '\0';
6319 const char *err;
6320 struct re_pattern_buffer *patbuf;
6321 regexp *rp;
6322 bool
6323 force_explicit_name = true, /* do not use implicit tag names */
6324 ignore_case = false, /* case is significant */
6325 multi_line = false, /* matches are done one line at a time */
6326 single_line = false; /* dot does not match newline */
6329 if (strlen (regexp_pattern) < 3)
6331 error ("null regexp");
6332 return;
6334 sep = regexp_pattern[0];
6335 name = scan_separators (regexp_pattern);
6336 if (name == NULL)
6338 error ("%s: unterminated regexp", regexp_pattern);
6339 return;
6341 if (name[1] == sep)
6343 error ("null name for regexp \"%s\"", regexp_pattern);
6344 return;
6346 modifiers = scan_separators (name);
6347 if (modifiers == NULL) /* no terminating separator --> no name */
6349 modifiers = name;
6350 name = &empty;
6352 else
6353 modifiers += 1; /* skip separator */
6355 /* Parse regex modifiers. */
6356 for (; modifiers[0] != '\0'; modifiers++)
6357 switch (modifiers[0])
6359 case 'N':
6360 if (modifiers == name)
6361 error ("forcing explicit tag name but no name, ignoring");
6362 force_explicit_name = true;
6363 break;
6364 case 'i':
6365 ignore_case = true;
6366 break;
6367 case 's':
6368 single_line = true;
6369 FALLTHROUGH;
6370 case 'm':
6371 multi_line = true;
6372 need_filebuf = true;
6373 break;
6374 default:
6375 error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
6376 break;
6379 patbuf = xnew (1, struct re_pattern_buffer);
6380 *patbuf = zeropattern;
6381 if (ignore_case)
6383 static char lc_trans[UCHAR_MAX + 1];
6384 int i;
6385 for (i = 0; i < UCHAR_MAX + 1; i++)
6386 lc_trans[i] = c_tolower (i);
6387 patbuf->translate = lc_trans; /* translation table to fold case */
6390 if (multi_line)
6391 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6392 else
6393 pat = regexp_pattern;
6395 if (single_line)
6396 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6397 else
6398 re_set_syntax (RE_SYNTAX_EMACS);
6400 err = re_compile_pattern (pat, strlen (pat), patbuf);
6401 if (multi_line)
6402 free (pat);
6403 if (err != NULL)
6405 error ("%s while compiling pattern", err);
6406 return;
6409 rp = p_head;
6410 p_head = xnew (1, regexp);
6411 p_head->pattern = savestr (regexp_pattern);
6412 p_head->p_next = rp;
6413 p_head->lang = lang;
6414 p_head->pat = patbuf;
6415 p_head->name = savestr (name);
6416 p_head->error_signaled = false;
6417 p_head->force_explicit_name = force_explicit_name;
6418 p_head->ignore_case = ignore_case;
6419 p_head->multi_line = multi_line;
6423 * Do the substitutions indicated by the regular expression and
6424 * arguments.
6426 static char *
6427 substitute (char *in, char *out, struct re_registers *regs)
6429 char *result, *t;
6430 int size, dig, diglen;
6432 result = NULL;
6433 size = strlen (out);
6435 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6436 if (out[size - 1] == '\\')
6437 fatal ("pattern error in \"%s\"", out);
6438 for (t = strchr (out, '\\');
6439 t != NULL;
6440 t = strchr (t + 2, '\\'))
6441 if (c_isdigit (t[1]))
6443 dig = t[1] - '0';
6444 diglen = regs->end[dig] - regs->start[dig];
6445 size += diglen - 2;
6447 else
6448 size -= 1;
6450 /* Allocate space and do the substitutions. */
6451 assert (size >= 0);
6452 result = xnew (size + 1, char);
6454 for (t = result; *out != '\0'; out++)
6455 if (*out == '\\' && c_isdigit (*++out))
6457 dig = *out - '0';
6458 diglen = regs->end[dig] - regs->start[dig];
6459 memcpy (t, in + regs->start[dig], diglen);
6460 t += diglen;
6462 else
6463 *t++ = *out;
6464 *t = '\0';
6466 assert (t <= result + size);
6467 assert (t - result == (int)strlen (result));
6469 return result;
6472 /* Deallocate all regexps. */
6473 static void
6474 free_regexps (void)
6476 regexp *rp;
6477 while (p_head != NULL)
6479 rp = p_head->p_next;
6480 free (p_head->pattern);
6481 free (p_head->name);
6482 free (p_head);
6483 p_head = rp;
6485 return;
6489 * Reads the whole file as a single string from `filebuf' and looks for
6490 * multi-line regular expressions, creating tags on matches.
6491 * readline already dealt with normal regexps.
6493 * Idea by Ben Wing <ben@666.com> (2002).
6495 static void
6496 regex_tag_multiline (void)
6498 char *buffer = filebuf.buffer;
6499 regexp *rp;
6500 char *name;
6502 for (rp = p_head; rp != NULL; rp = rp->p_next)
6504 int match = 0;
6506 if (!rp->multi_line)
6507 continue; /* skip normal regexps */
6509 /* Generic initializations before parsing file from memory. */
6510 lineno = 1; /* reset global line number */
6511 charno = 0; /* reset global char number */
6512 linecharno = 0; /* reset global char number of line start */
6514 /* Only use generic regexps or those for the current language. */
6515 if (rp->lang != NULL && rp->lang != curfdp->lang)
6516 continue;
6518 while (match >= 0 && match < filebuf.len)
6520 match = re_search (rp->pat, buffer, filebuf.len, charno,
6521 filebuf.len - match, &rp->regs);
6522 switch (match)
6524 case -2:
6525 /* Some error. */
6526 if (!rp->error_signaled)
6528 error ("regexp stack overflow while matching \"%s\"",
6529 rp->pattern);
6530 rp->error_signaled = true;
6532 break;
6533 case -1:
6534 /* No match. */
6535 break;
6536 default:
6537 if (match == rp->regs.end[0])
6539 if (!rp->error_signaled)
6541 error ("regexp matches the empty string: \"%s\"",
6542 rp->pattern);
6543 rp->error_signaled = true;
6545 match = -3; /* exit from while loop */
6546 break;
6549 /* Match occurred. Construct a tag. */
6550 while (charno < rp->regs.end[0])
6551 if (buffer[charno++] == '\n')
6552 lineno++, linecharno = charno;
6553 name = rp->name;
6554 if (name[0] == '\0')
6555 name = NULL;
6556 else /* make a named tag */
6557 name = substitute (buffer, rp->name, &rp->regs);
6558 if (rp->force_explicit_name)
6560 /* Force explicit tag name, if a name is there. */
6561 pfnote (name, true, buffer + linecharno,
6562 charno - linecharno + 1, lineno, linecharno);
6564 if (debug)
6565 fprintf (stderr, "%s on %s:%d: %s\n",
6566 name ? name : "(unnamed)", curfdp->taggedfname,
6567 lineno, buffer + linecharno);
6569 else
6570 make_tag (name, strlen (name), true, buffer + linecharno,
6571 charno - linecharno + 1, lineno, linecharno);
6572 break;
6579 static bool
6580 nocase_tail (const char *cp)
6582 int len = 0;
6584 while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6585 cp++, len++;
6586 if (*cp == '\0' && !intoken (dbp[len]))
6588 dbp += len;
6589 return true;
6591 return false;
6594 static void
6595 get_tag (register char *bp, char **namepp)
6597 register char *cp = bp;
6599 if (*bp != '\0')
6601 /* Go till you get to white space or a syntactic break */
6602 for (cp = bp + 1; !notinname (*cp); cp++)
6603 continue;
6604 make_tag (bp, cp - bp, true,
6605 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6608 if (namepp != NULL)
6609 *namepp = savenstr (bp, cp - bp);
6612 /* Similar to get_tag, but include '=' as part of the tag. */
6613 static void
6614 get_lispy_tag (register char *bp)
6616 register char *cp = bp;
6618 if (*bp != '\0')
6620 /* Go till you get to white space or a syntactic break */
6621 for (cp = bp + 1; !notinname (*cp) || *cp == '='; cp++)
6622 continue;
6623 make_tag (bp, cp - bp, true,
6624 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6629 * Read a line of text from `stream' into `lbp', excluding the
6630 * newline or CR-NL, if any. Return the number of characters read from
6631 * `stream', which is the length of the line including the newline.
6633 * On DOS or Windows we do not count the CR character, if any before the
6634 * NL, in the returned length; this mirrors the behavior of Emacs on those
6635 * platforms (for text files, it translates CR-NL to NL as it reads in the
6636 * file).
6638 * If multi-line regular expressions are requested, each line read is
6639 * appended to `filebuf'.
6641 static long
6642 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6644 char *buffer = lbp->buffer;
6645 char *p = lbp->buffer;
6646 char *pend;
6647 int chars_deleted;
6649 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6651 for (;;)
6653 register int c = getc (stream);
6654 if (p == pend)
6656 /* We're at the end of linebuffer: expand it. */
6657 lbp->size *= 2;
6658 xrnew (buffer, lbp->size, char);
6659 p += buffer - lbp->buffer;
6660 pend = buffer + lbp->size;
6661 lbp->buffer = buffer;
6663 if (c == EOF)
6665 if (ferror (stream))
6666 perror (filename);
6667 *p = '\0';
6668 chars_deleted = 0;
6669 break;
6671 if (c == '\n')
6673 if (p > buffer && p[-1] == '\r')
6675 p -= 1;
6676 chars_deleted = 2;
6678 else
6680 chars_deleted = 1;
6682 *p = '\0';
6683 break;
6685 *p++ = c;
6687 lbp->len = p - buffer;
6689 if (need_filebuf /* we need filebuf for multi-line regexps */
6690 && chars_deleted > 0) /* not at EOF */
6692 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6694 /* Expand filebuf. */
6695 filebuf.size *= 2;
6696 xrnew (filebuf.buffer, filebuf.size, char);
6698 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6699 filebuf.len += lbp->len;
6700 filebuf.buffer[filebuf.len++] = '\n';
6701 filebuf.buffer[filebuf.len] = '\0';
6704 return lbp->len + chars_deleted;
6708 * Like readline_internal, above, but in addition try to match the
6709 * input line against relevant regular expressions and manage #line
6710 * directives.
6712 static void
6713 readline (linebuffer *lbp, FILE *stream)
6715 long result;
6717 linecharno = charno; /* update global char number of line start */
6718 result = readline_internal (lbp, stream, infilename); /* read line */
6719 lineno += 1; /* increment global line number */
6720 charno += result; /* increment global char number */
6722 /* Honor #line directives. */
6723 if (!no_line_directive)
6725 static bool discard_until_line_directive;
6727 /* Check whether this is a #line directive. */
6728 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6730 unsigned int lno;
6731 int start = 0;
6733 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6734 && start > 0) /* double quote character found */
6736 char *endp = lbp->buffer + start;
6738 while ((endp = strchr (endp, '"')) != NULL
6739 && endp[-1] == '\\')
6740 endp++;
6741 if (endp != NULL)
6742 /* Ok, this is a real #line directive. Let's deal with it. */
6744 char *taggedabsname; /* absolute name of original file */
6745 char *taggedfname; /* name of original file as given */
6746 char *name; /* temp var */
6748 discard_until_line_directive = false; /* found it */
6749 name = lbp->buffer + start;
6750 *endp = '\0';
6751 canonicalize_filename (name);
6752 taggedabsname = absolute_filename (name, tagfiledir);
6753 if (filename_is_absolute (name)
6754 || filename_is_absolute (curfdp->infname))
6755 taggedfname = savestr (taggedabsname);
6756 else
6757 taggedfname = relative_filename (taggedabsname,tagfiledir);
6759 if (streq (curfdp->taggedfname, taggedfname))
6760 /* The #line directive is only a line number change. We
6761 deal with this afterwards. */
6762 free (taggedfname);
6763 else
6764 /* The tags following this #line directive should be
6765 attributed to taggedfname. In order to do this, set
6766 curfdp accordingly. */
6768 fdesc *fdp; /* file description pointer */
6770 /* Go look for a file description already set up for the
6771 file indicated in the #line directive. If there is
6772 one, use it from now until the next #line
6773 directive. */
6774 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6775 if (streq (fdp->infname, curfdp->infname)
6776 && streq (fdp->taggedfname, taggedfname))
6777 /* If we remove the second test above (after the &&)
6778 then all entries pertaining to the same file are
6779 coalesced in the tags file. If we use it, then
6780 entries pertaining to the same file but generated
6781 from different files (via #line directives) will
6782 go into separate sections in the tags file. These
6783 alternatives look equivalent. The first one
6784 destroys some apparently useless information. */
6786 curfdp = fdp;
6787 free (taggedfname);
6788 break;
6790 /* Else, if we already tagged the real file, skip all
6791 input lines until the next #line directive. */
6792 if (fdp == NULL) /* not found */
6793 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6794 if (streq (fdp->infabsname, taggedabsname))
6796 discard_until_line_directive = true;
6797 free (taggedfname);
6798 break;
6800 /* Else create a new file description and use that from
6801 now on, until the next #line directive. */
6802 if (fdp == NULL) /* not found */
6804 fdp = fdhead;
6805 fdhead = xnew (1, fdesc);
6806 *fdhead = *curfdp; /* copy curr. file description */
6807 fdhead->next = fdp;
6808 fdhead->infname = savestr (curfdp->infname);
6809 fdhead->infabsname = savestr (curfdp->infabsname);
6810 fdhead->infabsdir = savestr (curfdp->infabsdir);
6811 fdhead->taggedfname = taggedfname;
6812 fdhead->usecharno = false;
6813 fdhead->prop = NULL;
6814 fdhead->written = false;
6815 curfdp = fdhead;
6818 free (taggedabsname);
6819 lineno = lno - 1;
6820 readline (lbp, stream);
6821 return;
6822 } /* if a real #line directive */
6823 } /* if #line is followed by a number */
6824 } /* if line begins with "#line " */
6826 /* If we are here, no #line directive was found. */
6827 if (discard_until_line_directive)
6829 if (result > 0)
6831 /* Do a tail recursion on ourselves, thus discarding the contents
6832 of the line buffer. */
6833 readline (lbp, stream);
6834 return;
6836 /* End of file. */
6837 discard_until_line_directive = false;
6838 return;
6840 } /* if #line directives should be considered */
6843 int match;
6844 regexp *rp;
6845 char *name;
6847 /* Match against relevant regexps. */
6848 if (lbp->len > 0)
6849 for (rp = p_head; rp != NULL; rp = rp->p_next)
6851 /* Only use generic regexps or those for the current language.
6852 Also do not use multiline regexps, which is the job of
6853 regex_tag_multiline. */
6854 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6855 || rp->multi_line)
6856 continue;
6858 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6859 switch (match)
6861 case -2:
6862 /* Some error. */
6863 if (!rp->error_signaled)
6865 error ("regexp stack overflow while matching \"%s\"",
6866 rp->pattern);
6867 rp->error_signaled = true;
6869 break;
6870 case -1:
6871 /* No match. */
6872 break;
6873 case 0:
6874 /* Empty string matched. */
6875 if (!rp->error_signaled)
6877 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6878 rp->error_signaled = true;
6880 break;
6881 default:
6882 /* Match occurred. Construct a tag. */
6883 name = rp->name;
6884 if (name[0] == '\0')
6885 name = NULL;
6886 else /* make a named tag */
6887 name = substitute (lbp->buffer, rp->name, &rp->regs);
6888 if (rp->force_explicit_name)
6890 /* Force explicit tag name, if a name is there. */
6891 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6892 if (debug)
6893 fprintf (stderr, "%s on %s:%d: %s\n",
6894 name ? name : "(unnamed)", curfdp->taggedfname,
6895 lineno, lbp->buffer);
6897 else
6898 make_tag (name, strlen (name), true,
6899 lbp->buffer, match, lineno, linecharno);
6900 break;
6908 * Return a pointer to a space of size strlen(cp)+1 allocated
6909 * with xnew where the string CP has been copied.
6911 static char *
6912 savestr (const char *cp)
6914 return savenstr (cp, strlen (cp));
6918 * Return a pointer to a space of size LEN+1 allocated with xnew where
6919 * the string CP has been copied for at most the first LEN characters.
6921 static char *
6922 savenstr (const char *cp, int len)
6924 char *dp = xnew (len + 1, char);
6925 dp[len] = '\0';
6926 return memcpy (dp, cp, len);
6929 /* Skip spaces (end of string is not space), return new pointer. */
6930 static char *
6931 skip_spaces (char *cp)
6933 while (c_isspace (*cp))
6934 cp++;
6935 return cp;
6938 /* Skip non spaces, except end of string, return new pointer. */
6939 static char *
6940 skip_non_spaces (char *cp)
6942 while (*cp != '\0' && !c_isspace (*cp))
6943 cp++;
6944 return cp;
6947 /* Skip any chars in the "name" class.*/
6948 static char *
6949 skip_name (char *cp)
6951 /* '\0' is a notinname() so loop stops there too */
6952 while (! notinname (*cp))
6953 cp++;
6954 return cp;
6957 /* Print error message and exit. */
6958 static void
6959 fatal (char const *format, ...)
6961 va_list ap;
6962 va_start (ap, format);
6963 verror (format, ap);
6964 va_end (ap);
6965 exit (EXIT_FAILURE);
6968 static void
6969 pfatal (const char *s1)
6971 perror (s1);
6972 exit (EXIT_FAILURE);
6975 static void
6976 suggest_asking_for_help (void)
6978 fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
6979 progname);
6980 exit (EXIT_FAILURE);
6983 /* Output a diagnostic with printf-style FORMAT and args. */
6984 static void
6985 error (const char *format, ...)
6987 va_list ap;
6988 va_start (ap, format);
6989 verror (format, ap);
6990 va_end (ap);
6993 static void
6994 verror (char const *format, va_list ap)
6996 fprintf (stderr, "%s: ", progname);
6997 vfprintf (stderr, format, ap);
6998 fprintf (stderr, "\n");
7001 /* Return a newly-allocated string whose contents
7002 concatenate those of s1, s2, s3. */
7003 static char *
7004 concat (const char *s1, const char *s2, const char *s3)
7006 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
7007 char *result = xnew (len1 + len2 + len3 + 1, char);
7009 strcpy (result, s1);
7010 strcpy (result + len1, s2);
7011 strcpy (result + len1 + len2, s3);
7013 return result;
7017 /* Does the same work as the system V getcwd, but does not need to
7018 guess the buffer size in advance. */
7019 static char *
7020 etags_getcwd (void)
7022 int bufsize = 200;
7023 char *path = xnew (bufsize, char);
7025 while (getcwd (path, bufsize) == NULL)
7027 if (errno != ERANGE)
7028 pfatal ("getcwd");
7029 bufsize *= 2;
7030 free (path);
7031 path = xnew (bufsize, char);
7034 canonicalize_filename (path);
7035 return path;
7038 /* Return a newly allocated string containing a name of a temporary file. */
7039 static char *
7040 etags_mktmp (void)
7042 const char *tmpdir = getenv ("TMPDIR");
7043 const char *slash = "/";
7045 #if MSDOS || defined (DOS_NT)
7046 if (!tmpdir)
7047 tmpdir = getenv ("TEMP");
7048 if (!tmpdir)
7049 tmpdir = getenv ("TMP");
7050 if (!tmpdir)
7051 tmpdir = ".";
7052 if (tmpdir[strlen (tmpdir) - 1] == '/'
7053 || tmpdir[strlen (tmpdir) - 1] == '\\')
7054 slash = "";
7055 #else
7056 if (!tmpdir)
7057 tmpdir = "/tmp";
7058 if (tmpdir[strlen (tmpdir) - 1] == '/')
7059 slash = "";
7060 #endif
7062 char *templt = concat (tmpdir, slash, "etXXXXXX");
7063 int fd = mkostemp (templt, O_CLOEXEC);
7064 if (fd < 0 || close (fd) != 0)
7066 int temp_errno = errno;
7067 free (templt);
7068 errno = temp_errno;
7069 templt = NULL;
7072 #if defined (DOS_NT)
7073 /* The file name will be used in shell redirection, so it needs to have
7074 DOS-style backslashes, or else the Windows shell will barf. */
7075 char *p;
7076 for (p = templt; *p; p++)
7077 if (*p == '/')
7078 *p = '\\';
7079 #endif
7081 return templt;
7084 /* Return a newly allocated string containing the file name of FILE
7085 relative to the absolute directory DIR (which should end with a slash). */
7086 static char *
7087 relative_filename (char *file, char *dir)
7089 char *fp, *dp, *afn, *res;
7090 int i;
7092 /* Find the common root of file and dir (with a trailing slash). */
7093 afn = absolute_filename (file, cwd);
7094 fp = afn;
7095 dp = dir;
7096 while (*fp++ == *dp++)
7097 continue;
7098 fp--, dp--; /* back to the first differing char */
7099 #ifdef DOS_NT
7100 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
7101 return afn;
7102 #endif
7103 do /* look at the equal chars until '/' */
7104 fp--, dp--;
7105 while (*fp != '/');
7107 /* Build a sequence of "../" strings for the resulting relative file name. */
7108 i = 0;
7109 while ((dp = strchr (dp + 1, '/')) != NULL)
7110 i += 1;
7111 res = xnew (3*i + strlen (fp + 1) + 1, char);
7112 char *z = res;
7113 while (i-- > 0)
7114 z = stpcpy (z, "../");
7116 /* Add the file name relative to the common root of file and dir. */
7117 strcpy (z, fp + 1);
7118 free (afn);
7120 return res;
7123 /* Return a newly allocated string containing the absolute file name
7124 of FILE given DIR (which should end with a slash). */
7125 static char *
7126 absolute_filename (char *file, char *dir)
7128 char *slashp, *cp, *res;
7130 if (filename_is_absolute (file))
7131 res = savestr (file);
7132 #ifdef DOS_NT
7133 /* We don't support non-absolute file names with a drive
7134 letter, like `d:NAME' (it's too much hassle). */
7135 else if (file[1] == ':')
7136 fatal ("%s: relative file names with drive letters not supported", file);
7137 #endif
7138 else
7139 res = concat (dir, file, "");
7141 /* Delete the "/dirname/.." and "/." substrings. */
7142 slashp = strchr (res, '/');
7143 while (slashp != NULL && slashp[0] != '\0')
7145 if (slashp[1] == '.')
7147 if (slashp[2] == '.'
7148 && (slashp[3] == '/' || slashp[3] == '\0'))
7150 cp = slashp;
7152 cp--;
7153 while (cp >= res && !filename_is_absolute (cp));
7154 if (cp < res)
7155 cp = slashp; /* the absolute name begins with "/.." */
7156 #ifdef DOS_NT
7157 /* Under MSDOS and NT we get `d:/NAME' as absolute
7158 file name, so the luser could say `d:/../NAME'.
7159 We silently treat this as `d:/NAME'. */
7160 else if (cp[0] != '/')
7161 cp = slashp;
7162 #endif
7163 memmove (cp, slashp + 3, strlen (slashp + 2));
7164 slashp = cp;
7165 continue;
7167 else if (slashp[2] == '/' || slashp[2] == '\0')
7169 memmove (slashp, slashp + 2, strlen (slashp + 1));
7170 continue;
7174 slashp = strchr (slashp + 1, '/');
7177 if (res[0] == '\0') /* just a safety net: should never happen */
7179 free (res);
7180 return savestr ("/");
7182 else
7183 return res;
7186 /* Return a newly allocated string containing the absolute
7187 file name of dir where FILE resides given DIR (which should
7188 end with a slash). */
7189 static char *
7190 absolute_dirname (char *file, char *dir)
7192 char *slashp, *res;
7193 char save;
7195 slashp = strrchr (file, '/');
7196 if (slashp == NULL)
7197 return savestr (dir);
7198 save = slashp[1];
7199 slashp[1] = '\0';
7200 res = absolute_filename (file, dir);
7201 slashp[1] = save;
7203 return res;
7206 /* Whether the argument string is an absolute file name. The argument
7207 string must have been canonicalized with canonicalize_filename. */
7208 static bool
7209 filename_is_absolute (char *fn)
7211 return (fn[0] == '/'
7212 #ifdef DOS_NT
7213 || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
7214 #endif
7218 /* Downcase DOS drive letter and collapse separators into single slashes.
7219 Works in place. */
7220 static void
7221 canonicalize_filename (register char *fn)
7223 register char* cp;
7225 #ifdef DOS_NT
7226 /* Canonicalize drive letter case. */
7227 if (c_isupper (fn[0]) && fn[1] == ':')
7228 fn[0] = c_tolower (fn[0]);
7230 /* Collapse multiple forward- and back-slashes into a single forward
7231 slash. */
7232 for (cp = fn; *cp != '\0'; cp++, fn++)
7233 if (*cp == '/' || *cp == '\\')
7235 *fn = '/';
7236 while (cp[1] == '/' || cp[1] == '\\')
7237 cp++;
7239 else
7240 *fn = *cp;
7242 #else /* !DOS_NT */
7244 /* Collapse multiple slashes into a single slash. */
7245 for (cp = fn; *cp != '\0'; cp++, fn++)
7246 if (*cp == '/')
7248 *fn = '/';
7249 while (cp[1] == '/')
7250 cp++;
7252 else
7253 *fn = *cp;
7255 #endif /* !DOS_NT */
7257 *fn = '\0';
7261 /* Initialize a linebuffer for use. */
7262 static void
7263 linebuffer_init (linebuffer *lbp)
7265 lbp->size = (DEBUG) ? 3 : 200;
7266 lbp->buffer = xnew (lbp->size, char);
7267 lbp->buffer[0] = '\0';
7268 lbp->len = 0;
7271 /* Set the minimum size of a string contained in a linebuffer. */
7272 static void
7273 linebuffer_setlen (linebuffer *lbp, int toksize)
7275 while (lbp->size <= toksize)
7277 lbp->size *= 2;
7278 xrnew (lbp->buffer, lbp->size, char);
7280 lbp->len = toksize;
7283 /* Like malloc but get fatal error if memory is exhausted. */
7284 static void *
7285 xmalloc (size_t size)
7287 void *result = malloc (size);
7288 if (result == NULL)
7289 fatal ("virtual memory exhausted");
7290 return result;
7293 static void *
7294 xrealloc (void *ptr, size_t size)
7296 void *result = realloc (ptr, size);
7297 if (result == NULL)
7298 fatal ("virtual memory exhausted");
7299 return result;
7303 * Local Variables:
7304 * indent-tabs-mode: t
7305 * tab-width: 8
7306 * fill-column: 79
7307 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
7308 * c-file-style: "gnu"
7309 * End:
7312 /* etags.c ends here */