* lisp/net/tramp-archive.el (tramp-archive-run-real-handler): New defun.
[emacs.git] / lib-src / etags.c
blob588921bc70071d67bcf54f32ebb7d385731463af
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2018 Free Software
32 Foundation, Inc.
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or (at
39 your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <https://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
72 starting in 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
83 #ifdef DEBUG
84 # undef DEBUG
85 # define DEBUG true
86 #else
87 # define DEBUG false
88 # define NDEBUG /* disable assert */
89 #endif
91 #include <config.h>
93 /* WIN32_NATIVE is for XEmacs.
94 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
95 #ifdef WIN32_NATIVE
96 # undef MSDOS
97 # undef WINDOWSNT
98 # define WINDOWSNT
99 #endif /* WIN32_NATIVE */
101 #ifdef MSDOS
102 # undef MSDOS
103 # define MSDOS true
104 # include <sys/param.h>
105 #else
106 # define MSDOS false
107 #endif /* MSDOS */
109 #ifdef WINDOWSNT
110 # include <direct.h>
111 # undef HAVE_NTGUI
112 # undef DOS_NT
113 # define DOS_NT
114 /* The WINDOWSNT build doesn't use Gnulib's fcntl.h. */
115 # define O_CLOEXEC O_NOINHERIT
116 #endif /* WINDOWSNT */
118 #include <limits.h>
119 #include <unistd.h>
120 #include <stdarg.h>
121 #include <stdlib.h>
122 #include <string.h>
123 #include <sysstdio.h>
124 #include <errno.h>
125 #include <fcntl.h>
126 #include <binary-io.h>
127 #include <unlocked-io.h>
128 #include <c-ctype.h>
129 #include <c-strcase.h>
131 #include <assert.h>
132 #ifdef NDEBUG
133 # undef assert /* some systems have a buggy assert.h */
134 # define assert(x) ((void) 0)
135 #endif
137 #include <getopt.h>
138 #include <regex.h>
140 /* Define CTAGS to make the program "ctags" compatible with the usual one.
141 Leave it undefined to make the program "etags", which makes emacs-style
142 tag tables and tags typedefs, #defines and struct/union/enum by default. */
143 #ifdef CTAGS
144 # undef CTAGS
145 # define CTAGS true
146 #else
147 # define CTAGS false
148 #endif
150 static bool
151 streq (char const *s, char const *t)
153 return strcmp (s, t) == 0;
156 static bool
157 strcaseeq (char const *s, char const *t)
159 return c_strcasecmp (s, t) == 0;
162 static bool
163 strneq (char const *s, char const *t, size_t n)
165 return strncmp (s, t, n) == 0;
168 static bool
169 strncaseeq (char const *s, char const *t, size_t n)
171 return c_strncasecmp (s, t, n) == 0;
174 /* C is not in a name. */
175 static bool
176 notinname (unsigned char c)
178 /* Look at make_tag before modifying! */
179 static bool const table[UCHAR_MAX + 1] = {
180 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
181 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
183 return table[c];
186 /* C can start a token. */
187 static bool
188 begtoken (unsigned char c)
190 static bool const table[UCHAR_MAX + 1] = {
191 ['$']=1, ['@']=1,
192 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
193 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
194 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
195 ['Y']=1, ['Z']=1,
196 ['_']=1,
197 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
198 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
199 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
200 ['y']=1, ['z']=1,
201 ['~']=1
203 return table[c];
206 /* C can be in the middle of a token. */
207 static bool
208 intoken (unsigned char c)
210 static bool const table[UCHAR_MAX + 1] = {
211 ['$']=1,
212 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
213 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
214 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
215 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
216 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
217 ['Y']=1, ['Z']=1,
218 ['_']=1,
219 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
220 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
221 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
222 ['y']=1, ['z']=1
224 return table[c];
227 /* C can end a token. */
228 static bool
229 endtoken (unsigned char c)
231 static bool const table[UCHAR_MAX + 1] = {
232 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
233 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
234 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
235 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
236 ['{']=1, ['|']=1, ['}']=1, ['~']=1
238 return table[c];
242 * xnew, xrnew -- allocate, reallocate storage
244 * SYNOPSIS: Type *xnew (int n, Type);
245 * void xrnew (OldPointer, int n, Type);
247 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
248 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
250 typedef void Lang_function (FILE *);
252 typedef struct
254 const char *suffix; /* file name suffix for this compressor */
255 const char *command; /* takes one arg and decompresses to stdout */
256 } compressor;
258 typedef struct
260 const char *name; /* language name */
261 const char *help; /* detailed help for the language */
262 Lang_function *function; /* parse function */
263 const char **suffixes; /* name suffixes of this language's files */
264 const char **filenames; /* names of this language's files */
265 const char **interpreters; /* interpreters for this language */
266 bool metasource; /* source used to generate other sources */
267 } language;
269 typedef struct fdesc
271 struct fdesc *next; /* for the linked list */
272 char *infname; /* uncompressed input file name */
273 char *infabsname; /* absolute uncompressed input file name */
274 char *infabsdir; /* absolute dir of input file */
275 char *taggedfname; /* file name to write in tagfile */
276 language *lang; /* language of file */
277 char *prop; /* file properties to write in tagfile */
278 bool usecharno; /* etags tags shall contain char number */
279 bool written; /* entry written in the tags file */
280 } fdesc;
282 typedef struct node_st
283 { /* sorting structure */
284 struct node_st *left, *right; /* left and right sons */
285 fdesc *fdp; /* description of file to whom tag belongs */
286 char *name; /* tag name */
287 char *regex; /* search regexp */
288 bool valid; /* write this tag on the tag file */
289 bool is_func; /* function tag: use regexp in CTAGS mode */
290 bool been_warned; /* warning already given for duplicated tag */
291 int lno; /* line number tag is on */
292 long cno; /* character number line starts on */
293 } node;
296 * A `linebuffer' is a structure which holds a line of text.
297 * `readline_internal' reads a line from a stream into a linebuffer
298 * and works regardless of the length of the line.
299 * SIZE is the size of BUFFER, LEN is the length of the string in
300 * BUFFER after readline reads it.
302 typedef struct
304 long size;
305 int len;
306 char *buffer;
307 } linebuffer;
309 /* Used to support mixing of --lang and file names. */
310 typedef struct
312 enum {
313 at_language, /* a language specification */
314 at_regexp, /* a regular expression */
315 at_filename, /* a file name */
316 at_stdin, /* read from stdin here */
317 at_end /* stop parsing the list */
318 } arg_type; /* argument type */
319 language *lang; /* language associated with the argument */
320 char *what; /* the argument itself */
321 } argument;
323 /* Structure defining a regular expression. */
324 typedef struct regexp
326 struct regexp *p_next; /* pointer to next in list */
327 language *lang; /* if set, use only for this language */
328 char *pattern; /* the regexp pattern */
329 char *name; /* tag name */
330 struct re_pattern_buffer *pat; /* the compiled pattern */
331 struct re_registers regs; /* re registers */
332 bool error_signaled; /* already signaled for this regexp */
333 bool force_explicit_name; /* do not allow implicit tag name */
334 bool ignore_case; /* ignore case when matching */
335 bool multi_line; /* do a multi-line match on the whole file */
336 } regexp;
339 /* Many compilers barf on this:
340 Lang_function Ada_funcs;
341 so let's write it this way */
342 static void Ada_funcs (FILE *);
343 static void Asm_labels (FILE *);
344 static void C_entries (int c_ext, FILE *);
345 static void default_C_entries (FILE *);
346 static void plain_C_entries (FILE *);
347 static void Cjava_entries (FILE *);
348 static void Cobol_paragraphs (FILE *);
349 static void Cplusplus_entries (FILE *);
350 static void Cstar_entries (FILE *);
351 static void Erlang_functions (FILE *);
352 static void Forth_words (FILE *);
353 static void Fortran_functions (FILE *);
354 static void Go_functions (FILE *);
355 static void HTML_labels (FILE *);
356 static void Lisp_functions (FILE *);
357 static void Lua_functions (FILE *);
358 static void Makefile_targets (FILE *);
359 static void Pascal_functions (FILE *);
360 static void Perl_functions (FILE *);
361 static void PHP_functions (FILE *);
362 static void PS_functions (FILE *);
363 static void Prolog_functions (FILE *);
364 static void Python_functions (FILE *);
365 static void Ruby_functions (FILE *);
366 static void Scheme_functions (FILE *);
367 static void TeX_commands (FILE *);
368 static void Texinfo_nodes (FILE *);
369 static void Yacc_entries (FILE *);
370 static void just_read_file (FILE *);
372 static language *get_language_from_langname (const char *);
373 static void readline (linebuffer *, FILE *);
374 static long readline_internal (linebuffer *, FILE *, char const *);
375 static bool nocase_tail (const char *);
376 static void get_tag (char *, char **);
377 static void get_lispy_tag (char *);
379 static void analyze_regex (char *);
380 static void free_regexps (void);
381 static void regex_tag_multiline (void);
382 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
383 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
384 static _Noreturn void suggest_asking_for_help (void);
385 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
386 static _Noreturn void pfatal (const char *);
387 static void add_node (node *, node **);
389 static void process_file_name (char *, language *);
390 static void process_file (FILE *, char *, language *);
391 static void find_entries (FILE *);
392 static void free_tree (node *);
393 static void free_fdesc (fdesc *);
394 static void pfnote (char *, bool, char *, int, int, long);
395 static void invalidate_nodes (fdesc *, node **);
396 static void put_entries (node *);
398 static char *concat (const char *, const char *, const char *);
399 static char *skip_spaces (char *);
400 static char *skip_non_spaces (char *);
401 static char *skip_name (char *);
402 static char *savenstr (const char *, int);
403 static char *savestr (const char *);
404 static char *etags_getcwd (void);
405 static char *relative_filename (char *, char *);
406 static char *absolute_filename (char *, char *);
407 static char *absolute_dirname (char *, char *);
408 static bool filename_is_absolute (char *f);
409 static void canonicalize_filename (char *);
410 static char *etags_mktmp (void);
411 static void linebuffer_init (linebuffer *);
412 static void linebuffer_setlen (linebuffer *, int);
413 static void *xmalloc (size_t);
414 static void *xrealloc (void *, size_t);
417 static char searchar = '/'; /* use /.../ searches */
419 static char *tagfile; /* output file */
420 static char *progname; /* name this program was invoked with */
421 static char *cwd; /* current working directory */
422 static char *tagfiledir; /* directory of tagfile */
423 static FILE *tagf; /* ioptr for tags file */
424 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
426 static fdesc *fdhead; /* head of file description list */
427 static fdesc *curfdp; /* current file description */
428 static char *infilename; /* current input file name */
429 static int lineno; /* line number of current line */
430 static long charno; /* current character number */
431 static long linecharno; /* charno of start of current line */
432 static char *dbp; /* pointer to start of current tag */
434 static const int invalidcharno = -1;
436 static node *nodehead; /* the head of the binary tree of tags */
437 static node *last_node; /* the last node created */
439 static linebuffer lb; /* the current line */
440 static linebuffer filebuf; /* a buffer containing the whole file */
441 static linebuffer token_name; /* a buffer containing a tag name */
443 static bool append_to_tagfile; /* -a: append to tags */
444 /* The next five default to true in C and derived languages. */
445 static bool typedefs; /* -t: create tags for C and Ada typedefs */
446 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
447 /* 0 struct/enum/union decls, and C++ */
448 /* member functions. */
449 static bool constantypedefs; /* -d: create tags for C #define, enum */
450 /* constants and variables. */
451 /* -D: opposite of -d. Default under ctags. */
452 static int globals; /* create tags for global variables */
453 static int members; /* create tags for C member variables */
454 static int declarations; /* --declarations: tag them and extern in C&Co*/
455 static int no_line_directive; /* ignore #line directives (undocumented) */
456 static int no_duplicates; /* no duplicate tags for ctags (undocumented) */
457 static bool update; /* -u: update tags */
458 static bool vgrind_style; /* -v: create vgrind style index output */
459 static bool no_warnings; /* -w: suppress warnings (undocumented) */
460 static bool cxref_style; /* -x: create cxref style output */
461 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
462 static bool ignoreindent; /* -I: ignore indentation in C */
463 static int packages_only; /* --packages-only: in Ada, only tag packages*/
464 static int class_qualify; /* -Q: produce class-qualified tags in C++/Java */
465 static int debug; /* --debug */
467 /* STDIN is defined in LynxOS system headers */
468 #ifdef STDIN
469 # undef STDIN
470 #endif
472 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
473 static bool parsing_stdin; /* --parse-stdin used */
475 static regexp *p_head; /* list of all regexps */
476 static bool need_filebuf; /* some regexes are multi-line */
478 static struct option longopts[] =
480 { "append", no_argument, NULL, 'a' },
481 { "packages-only", no_argument, &packages_only, 1 },
482 { "c++", no_argument, NULL, 'C' },
483 { "debug", no_argument, &debug, 1 },
484 { "declarations", no_argument, &declarations, 1 },
485 { "no-line-directive", no_argument, &no_line_directive, 1 },
486 { "no-duplicates", no_argument, &no_duplicates, 1 },
487 { "help", no_argument, NULL, 'h' },
488 { "help", no_argument, NULL, 'H' },
489 { "ignore-indentation", no_argument, NULL, 'I' },
490 { "language", required_argument, NULL, 'l' },
491 { "members", no_argument, &members, 1 },
492 { "no-members", no_argument, &members, 0 },
493 { "output", required_argument, NULL, 'o' },
494 { "class-qualify", no_argument, &class_qualify, 'Q' },
495 { "regex", required_argument, NULL, 'r' },
496 { "no-regex", no_argument, NULL, 'R' },
497 { "ignore-case-regex", required_argument, NULL, 'c' },
498 { "parse-stdin", required_argument, NULL, STDIN },
499 { "version", no_argument, NULL, 'V' },
501 #if CTAGS /* Ctags options */
502 { "backward-search", no_argument, NULL, 'B' },
503 { "cxref", no_argument, NULL, 'x' },
504 { "defines", no_argument, NULL, 'd' },
505 { "globals", no_argument, &globals, 1 },
506 { "typedefs", no_argument, NULL, 't' },
507 { "typedefs-and-c++", no_argument, NULL, 'T' },
508 { "update", no_argument, NULL, 'u' },
509 { "vgrind", no_argument, NULL, 'v' },
510 { "no-warn", no_argument, NULL, 'w' },
512 #else /* Etags options */
513 { "no-defines", no_argument, NULL, 'D' },
514 { "no-globals", no_argument, &globals, 0 },
515 { "include", required_argument, NULL, 'i' },
516 #endif
517 { NULL }
520 static compressor compressors[] =
522 { "z", "gzip -d -c"},
523 { "Z", "gzip -d -c"},
524 { "gz", "gzip -d -c"},
525 { "GZ", "gzip -d -c"},
526 { "bz2", "bzip2 -d -c" },
527 { "xz", "xz -d -c" },
528 { NULL }
532 * Language stuff.
535 /* Ada code */
536 static const char *Ada_suffixes [] =
537 { "ads", "adb", "ada", NULL };
538 static const char Ada_help [] =
539 "In Ada code, functions, procedures, packages, tasks and types are\n\
540 tags. Use the '--packages-only' option to create tags for\n\
541 packages only.\n\
542 Ada tag names have suffixes indicating the type of entity:\n\
543 Entity type: Qualifier:\n\
544 ------------ ----------\n\
545 function /f\n\
546 procedure /p\n\
547 package spec /s\n\
548 package body /b\n\
549 type /t\n\
550 task /k\n\
551 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
552 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
553 will just search for any tag 'bidule'.";
555 /* Assembly code */
556 static const char *Asm_suffixes [] =
557 { "a", /* Unix assembler */
558 "asm", /* Microcontroller assembly */
559 "def", /* BSO/Tasking definition includes */
560 "inc", /* Microcontroller include files */
561 "ins", /* Microcontroller include files */
562 "s", "sa", /* Unix assembler */
563 "S", /* cpp-processed Unix assembler */
564 "src", /* BSO/Tasking C compiler output */
565 NULL
567 static const char Asm_help [] =
568 "In assembler code, labels appearing at the beginning of a line,\n\
569 followed by a colon, are tags.";
572 /* Note that .c and .h can be considered C++, if the --c++ flag was
573 given, or if the `class' or `template' keywords are met inside the file.
574 That is why default_C_entries is called for these. */
575 static const char *default_C_suffixes [] =
576 { "c", "h", NULL };
577 #if CTAGS /* C help for Ctags */
578 static const char default_C_help [] =
579 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
580 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
581 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
582 Use --globals to tag global variables.\n\
583 You can tag function declarations and external variables by\n\
584 using '--declarations', and struct members by using '--members'.";
585 #else /* C help for Etags */
586 static const char default_C_help [] =
587 "In C code, any C function or typedef is a tag, and so are\n\
588 definitions of 'struct', 'union' and 'enum'. '#define' macro\n\
589 definitions and 'enum' constants are tags unless you specify\n\
590 '--no-defines'. Global variables are tags unless you specify\n\
591 '--no-globals' and so are struct members unless you specify\n\
592 '--no-members'. Use of '--no-globals', '--no-defines' and\n\
593 '--no-members' can make the tags table file much smaller.\n\
594 You can tag function declarations and external variables by\n\
595 using '--declarations'.";
596 #endif /* C help for Ctags and Etags */
598 static const char *Cplusplus_suffixes [] =
599 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
600 "M", /* Objective C++ */
601 "pdb", /* PostScript with C syntax */
602 NULL };
603 static const char Cplusplus_help [] =
604 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
605 --help --lang=c --lang=c++ for full help.)\n\
606 In addition to C tags, member functions are also recognized. Member\n\
607 variables are recognized unless you use the '--no-members' option.\n\
608 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
609 and 'CLASS::FUNCTION'. 'operator' definitions have tag names like\n\
610 'operator+'.";
612 static const char *Cjava_suffixes [] =
613 { "java", NULL };
614 static char Cjava_help [] =
615 "In Java code, all the tags constructs of C and C++ code are\n\
616 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
619 static const char *Cobol_suffixes [] =
620 { "COB", "cob", NULL };
621 static char Cobol_help [] =
622 "In Cobol code, tags are paragraph names; that is, any word\n\
623 starting in column 8 and followed by a period.";
625 static const char *Cstar_suffixes [] =
626 { "cs", "hs", NULL };
628 static const char *Erlang_suffixes [] =
629 { "erl", "hrl", NULL };
630 static const char Erlang_help [] =
631 "In Erlang code, the tags are the functions, records and macros\n\
632 defined in the file.";
633 static const char *Erlang_interpreters [] =
634 { "escript", NULL };
636 const char *Forth_suffixes [] =
637 { "fth", "tok", NULL };
638 static const char Forth_help [] =
639 "In Forth code, tags are words defined by ':',\n\
640 constant, code, create, defer, value, variable, buffer:, field.";
642 static const char *Fortran_suffixes [] =
643 { "F", "f", "f90", "for", NULL };
644 static const char Fortran_help [] =
645 "In Fortran code, functions, subroutines and block data are tags.";
647 static const char *Go_suffixes [] = {"go", NULL};
648 static const char Go_help [] =
649 "In Go code, functions, interfaces and packages are tags.";
651 static const char *HTML_suffixes [] =
652 { "htm", "html", "shtml", NULL };
653 static const char HTML_help [] =
654 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
655 'h3' headers. Also, tags are 'name=' in anchors and all\n\
656 occurrences of 'id='.";
658 static const char *Lisp_suffixes [] =
659 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
660 static const char Lisp_help [] =
661 "In Lisp code, any function defined with 'defun', any variable\n\
662 defined with 'defvar' or 'defconst', and in general the first\n\
663 argument of any expression that starts with '(def' in column zero\n\
664 is a tag.\n\
665 The '--declarations' option tags \"(defvar foo)\" constructs too.";
667 static const char *Lua_suffixes [] =
668 { "lua", "LUA", NULL };
669 static const char Lua_help [] =
670 "In Lua scripts, all functions are tags.";
671 static const char *Lua_interpreters [] =
672 { "lua", NULL };
674 static const char *Makefile_filenames [] =
675 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
676 static const char Makefile_help [] =
677 "In makefiles, targets are tags; additionally, variables are tags\n\
678 unless you specify '--no-globals'.";
680 static const char *Objc_suffixes [] =
681 { "lm", /* Objective lex file */
682 "m", /* Objective C file */
683 NULL };
684 static const char Objc_help [] =
685 "In Objective C code, tags include Objective C definitions for classes,\n\
686 class categories, methods and protocols. Tags for variables and\n\
687 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\
688 \n(Use --help --lang=c --lang=objc --lang=java for full help.)";
690 static const char *Pascal_suffixes [] =
691 { "p", "pas", NULL };
692 static const char Pascal_help [] =
693 "In Pascal code, the tags are the functions and procedures defined\n\
694 in the file.";
695 /* " // this is for working around an Emacs highlighting bug... */
697 static const char *Perl_suffixes [] =
698 { "pl", "pm", NULL };
699 static const char *Perl_interpreters [] =
700 { "perl", "@PERL@", NULL };
701 static const char Perl_help [] =
702 "In Perl code, the tags are the packages, subroutines and variables\n\
703 defined by the 'package', 'sub', 'my' and 'local' keywords. Use\n\
704 '--globals' if you want to tag global variables. Tags for\n\
705 subroutines are named 'PACKAGE::SUB'. The name for subroutines\n\
706 defined in the default package is 'main::SUB'.";
708 static const char *PHP_suffixes [] =
709 { "php", "php3", "php4", NULL };
710 static const char PHP_help [] =
711 "In PHP code, tags are functions, classes and defines. Unless you use\n\
712 the '--no-members' option, vars are tags too.";
714 static const char *plain_C_suffixes [] =
715 { "pc", /* Pro*C file */
716 NULL };
718 static const char *PS_suffixes [] =
719 { "ps", "psw", NULL }; /* .psw is for PSWrap */
720 static const char PS_help [] =
721 "In PostScript code, the tags are the functions.";
723 static const char *Prolog_suffixes [] =
724 { "prolog", NULL };
725 static const char Prolog_help [] =
726 "In Prolog code, tags are predicates and rules at the beginning of\n\
727 line.";
728 static const char *Prolog_interpreters [] =
729 { "gprolog", "pl", "yap", "swipl", "prolog", NULL };
731 static const char *Python_suffixes [] =
732 { "py", NULL };
733 static const char Python_help [] =
734 "In Python code, 'def' or 'class' at the beginning of a line\n\
735 generate a tag.";
736 static const char *Python_interpreters [] =
737 { "python", NULL };
739 static const char *Ruby_suffixes [] =
740 { "rb", "ru", "rbw", NULL };
741 static const char *Ruby_filenames [] =
742 { "Rakefile", "Thorfile", NULL };
743 static const char Ruby_help [] =
744 "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
745 a line generate a tag. Constants also generate a tag.";
746 static const char *Ruby_interpreters [] =
747 { "ruby", NULL };
749 /* Can't do the `SCM' or `scm' prefix with a version number. */
750 static const char *Scheme_suffixes [] =
751 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
752 static const char Scheme_help [] =
753 "In Scheme code, tags include anything defined with 'def' or with a\n\
754 construct whose name starts with 'def'. They also include\n\
755 variables set with 'set!' at top level in the file.";
757 static const char *TeX_suffixes [] =
758 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
759 static const char TeX_help [] =
760 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
761 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
762 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
763 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
764 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
766 Other commands can be specified by setting the environment variable\n\
767 'TEXTAGS' to a colon-separated list like, for example,\n\
768 TEXTAGS=\"mycommand:myothercommand\".";
771 static const char *Texinfo_suffixes [] =
772 { "texi", "texinfo", "txi", NULL };
773 static const char Texinfo_help [] =
774 "for texinfo files, lines starting with @node are tagged.";
776 static const char *Yacc_suffixes [] =
777 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
778 static const char Yacc_help [] =
779 "In Bison or Yacc input files, each rule defines as a tag the\n\
780 nonterminal it constructs. The portions of the file that contain\n\
781 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
782 for full help).";
784 static const char auto_help [] =
785 "'auto' is not a real language, it indicates to use\n\
786 a default language for files base on file name suffix and file contents.";
788 static const char none_help [] =
789 "'none' is not a real language, it indicates to only do\n\
790 regexp processing on files.";
792 static const char no_lang_help [] =
793 "No detailed help available for this language.";
797 * Table of languages.
799 * It is ok for a given function to be listed under more than one
800 * name. I just didn't.
803 static language lang_names [] =
805 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
806 { "asm", Asm_help, Asm_labels, Asm_suffixes },
807 { "c", default_C_help, default_C_entries, default_C_suffixes },
808 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
809 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
810 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
811 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes,
812 NULL, Erlang_interpreters },
813 { "forth", Forth_help, Forth_words, Forth_suffixes },
814 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
815 { "go", Go_help, Go_functions, Go_suffixes },
816 { "html", HTML_help, HTML_labels, HTML_suffixes },
817 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
818 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
819 { "lua", Lua_help,Lua_functions,Lua_suffixes,NULL,Lua_interpreters},
820 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
821 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
822 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
823 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
824 { "php", PHP_help, PHP_functions, PHP_suffixes },
825 { "postscript",PS_help, PS_functions, PS_suffixes },
826 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
827 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes,
828 NULL, Prolog_interpreters },
829 { "python", Python_help, Python_functions, Python_suffixes,
830 NULL, Python_interpreters },
831 { "ruby", Ruby_help, Ruby_functions, Ruby_suffixes,
832 Ruby_filenames, Ruby_interpreters },
833 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
834 { "tex", TeX_help, TeX_commands, TeX_suffixes },
835 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
836 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
837 { "auto", auto_help }, /* default guessing scheme */
838 { "none", none_help, just_read_file }, /* regexp matching only */
839 { NULL } /* end of list */
843 static void
844 print_language_names (void)
846 language *lang;
847 const char **name, **ext;
849 puts ("\nThese are the currently supported languages, along with the\n\
850 default file names and dot suffixes:");
851 for (lang = lang_names; lang->name != NULL; lang++)
853 printf (" %-*s", 10, lang->name);
854 if (lang->filenames != NULL)
855 for (name = lang->filenames; *name != NULL; name++)
856 printf (" %s", *name);
857 if (lang->suffixes != NULL)
858 for (ext = lang->suffixes; *ext != NULL; ext++)
859 printf (" .%s", *ext);
860 puts ("");
862 puts ("where 'auto' means use default language for files based on file\n\
863 name suffix, and 'none' means only do regexp processing on files.\n\
864 If no language is specified and no matching suffix is found,\n\
865 the first line of the file is read for a sharp-bang (#!) sequence\n\
866 followed by the name of an interpreter. If no such sequence is found,\n\
867 Fortran is tried first; if no tags are found, C is tried next.\n\
868 When parsing any C file, a \"class\" or \"template\" keyword\n\
869 switches to C++.");
870 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
872 For detailed help on a given language use, for example,\n\
873 etags --help --lang=ada.");
876 #ifndef EMACS_NAME
877 # define EMACS_NAME "standalone"
878 #endif
879 #ifndef VERSION
880 # define VERSION "17.38.1.4"
881 #endif
882 static _Noreturn void
883 print_version (void)
885 char emacs_copyright[] = COPYRIGHT;
887 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
888 puts (emacs_copyright);
889 puts ("This program is distributed under the terms in ETAGS.README");
891 exit (EXIT_SUCCESS);
894 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
895 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
896 #endif
898 static _Noreturn void
899 print_help (argument *argbuffer)
901 bool help_for_lang = false;
903 for (; argbuffer->arg_type != at_end; argbuffer++)
904 if (argbuffer->arg_type == at_language)
906 if (help_for_lang)
907 puts ("");
908 puts (argbuffer->lang->help);
909 help_for_lang = true;
912 if (help_for_lang)
913 exit (EXIT_SUCCESS);
915 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
917 These are the options accepted by %s.\n", progname, progname);
918 puts ("You may use unambiguous abbreviations for the long option names.");
919 puts (" A - as file name means read names from stdin (one per line).\n\
920 Absolute names are stored in the output file as they are.\n\
921 Relative ones are stored relative to the output file's directory.\n");
923 puts ("-a, --append\n\
924 Append tag entries to existing tags file.");
926 puts ("--packages-only\n\
927 For Ada files, only generate tags for packages.");
929 if (CTAGS)
930 puts ("-B, --backward-search\n\
931 Write the search commands for the tag entries using '?', the\n\
932 backward-search command instead of '/', the forward-search command.");
934 /* This option is mostly obsolete, because etags can now automatically
935 detect C++. Retained for backward compatibility and for debugging and
936 experimentation. In principle, we could want to tag as C++ even
937 before any "class" or "template" keyword.
938 puts ("-C, --c++\n\
939 Treat files whose name suffix defaults to C language as C++ files.");
942 puts ("--declarations\n\
943 In C and derived languages, create tags for function declarations,");
944 if (CTAGS)
945 puts ("\tand create tags for extern variables if --globals is used.");
946 else
947 puts
948 ("\tand create tags for extern variables unless --no-globals is used.");
950 if (CTAGS)
951 puts ("-d, --defines\n\
952 Create tag entries for C #define constants and enum constants, too.");
953 else
954 puts ("-D, --no-defines\n\
955 Don't create tag entries for C #define constants and enum constants.\n\
956 This makes the tags file smaller.");
958 if (!CTAGS)
959 puts ("-i FILE, --include=FILE\n\
960 Include a note in tag file indicating that, when searching for\n\
961 a tag, one should also consult the tags file FILE after\n\
962 checking the current file.");
964 puts ("-l LANG, --language=LANG\n\
965 Force the following files to be considered as written in the\n\
966 named language up to the next --language=LANG option.");
968 if (CTAGS)
969 puts ("--globals\n\
970 Create tag entries for global variables in some languages.");
971 else
972 puts ("--no-globals\n\
973 Do not create tag entries for global variables in some\n\
974 languages. This makes the tags file smaller.");
976 puts ("--no-line-directive\n\
977 Ignore #line preprocessor directives in C and derived languages.");
979 if (CTAGS)
980 puts ("--members\n\
981 Create tag entries for members of structures in some languages.");
982 else
983 puts ("--no-members\n\
984 Do not create tag entries for members of structures\n\
985 in some languages.");
987 puts ("-Q, --class-qualify\n\
988 Qualify tag names with their class name in C++, ObjC, Java, and Perl.\n\
989 This produces tag names of the form \"class::member\" for C++,\n\
990 \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
991 For Objective C, this also produces class methods qualified with\n\
992 their arguments, as in \"foo:bar:baz:more\".\n\
993 For Perl, this produces \"package::member\".");
994 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
995 Make a tag for each line matching a regular expression pattern\n\
996 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
997 files only. REGEXFILE is a file containing one REGEXP per line.\n\
998 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
999 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1000 puts (" If TAGNAME/ is present, the tags created are named.\n\
1001 For example Tcl named tags can be created with:\n\
1002 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1003 MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
1004 'm' means to allow multi-line matches, 's' implies 'm' and\n\
1005 causes dot to match any character, including newline.");
1007 puts ("-R, --no-regex\n\
1008 Don't create tags from regexps for the following files.");
1010 puts ("-I, --ignore-indentation\n\
1011 In C and C++ do not assume that a closing brace in the first\n\
1012 column is the final brace of a function or structure definition.");
1014 puts ("-o FILE, --output=FILE\n\
1015 Write the tags to FILE.");
1017 puts ("--parse-stdin=NAME\n\
1018 Read from standard input and record tags as belonging to file NAME.");
1020 if (CTAGS)
1022 puts ("-t, --typedefs\n\
1023 Generate tag entries for C and Ada typedefs.");
1024 puts ("-T, --typedefs-and-c++\n\
1025 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1026 and C++ member functions.");
1029 if (CTAGS)
1030 puts ("-u, --update\n\
1031 Update the tag entries for the given files, leaving tag\n\
1032 entries for other files in place. Currently, this is\n\
1033 implemented by deleting the existing entries for the given\n\
1034 files and then rewriting the new entries at the end of the\n\
1035 tags file. It is often faster to simply rebuild the entire\n\
1036 tag file than to use this.");
1038 if (CTAGS)
1040 puts ("-v, --vgrind\n\
1041 Print on the standard output an index of items intended for\n\
1042 human consumption, similar to the output of vgrind. The index\n\
1043 is sorted, and gives the page number of each item.");
1045 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1046 puts ("-w, --no-duplicates\n\
1047 Do not create duplicate tag entries, for compatibility with\n\
1048 traditional ctags.");
1050 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1051 puts ("-w, --no-warn\n\
1052 Suppress warning messages about duplicate tag entries.");
1054 puts ("-x, --cxref\n\
1055 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1056 The output uses line numbers instead of page numbers, but\n\
1057 beyond that the differences are cosmetic; try both to see\n\
1058 which you like.");
1061 puts ("-V, --version\n\
1062 Print the version of the program.\n\
1063 -h, --help\n\
1064 Print this help message.\n\
1065 Followed by one or more '--language' options prints detailed\n\
1066 help about tag generation for the specified languages.");
1068 print_language_names ();
1070 puts ("");
1071 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1073 exit (EXIT_SUCCESS);
1078 main (int argc, char **argv)
1080 int i;
1081 unsigned int nincluded_files;
1082 char **included_files;
1083 argument *argbuffer;
1084 int current_arg, file_count;
1085 linebuffer filename_lb;
1086 bool help_asked = false;
1087 ptrdiff_t len;
1088 char *optstring;
1089 int opt;
1091 progname = argv[0];
1092 nincluded_files = 0;
1093 included_files = xnew (argc, char *);
1094 current_arg = 0;
1095 file_count = 0;
1097 /* Allocate enough no matter what happens. Overkill, but each one
1098 is small. */
1099 argbuffer = xnew (argc, argument);
1102 * Always find typedefs and structure tags.
1103 * Also default to find macro constants, enum constants, struct
1104 * members and global variables. Do it for both etags and ctags.
1106 typedefs = typedefs_or_cplusplus = constantypedefs = true;
1107 globals = members = true;
1109 /* When the optstring begins with a '-' getopt_long does not rearrange the
1110 non-options arguments to be at the end, but leaves them alone. */
1111 optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1112 (CTAGS) ? "BxdtTuvw" : "Di:",
1113 "");
1115 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1116 switch (opt)
1118 case 0:
1119 /* If getopt returns 0, then it has already processed a
1120 long-named option. We should do nothing. */
1121 break;
1123 case 1:
1124 /* This means that a file name has been seen. Record it. */
1125 argbuffer[current_arg].arg_type = at_filename;
1126 argbuffer[current_arg].what = optarg;
1127 len = strlen (optarg);
1128 if (whatlen_max < len)
1129 whatlen_max = len;
1130 ++current_arg;
1131 ++file_count;
1132 break;
1134 case STDIN:
1135 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1136 argbuffer[current_arg].arg_type = at_stdin;
1137 argbuffer[current_arg].what = optarg;
1138 len = strlen (optarg);
1139 if (whatlen_max < len)
1140 whatlen_max = len;
1141 ++current_arg;
1142 ++file_count;
1143 if (parsing_stdin)
1144 fatal ("cannot parse standard input more than once");
1145 parsing_stdin = true;
1146 break;
1148 /* Common options. */
1149 case 'a': append_to_tagfile = true; break;
1150 case 'C': cplusplus = true; break;
1151 case 'f': /* for compatibility with old makefiles */
1152 case 'o':
1153 if (tagfile)
1155 error ("-o option may only be given once.");
1156 suggest_asking_for_help ();
1157 /* NOTREACHED */
1159 tagfile = optarg;
1160 break;
1161 case 'I':
1162 case 'S': /* for backward compatibility */
1163 ignoreindent = true;
1164 break;
1165 case 'l':
1167 language *lang = get_language_from_langname (optarg);
1168 if (lang != NULL)
1170 argbuffer[current_arg].lang = lang;
1171 argbuffer[current_arg].arg_type = at_language;
1172 ++current_arg;
1175 break;
1176 case 'c':
1177 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1178 optarg = concat (optarg, "i", ""); /* memory leak here */
1179 FALLTHROUGH;
1180 case 'r':
1181 argbuffer[current_arg].arg_type = at_regexp;
1182 argbuffer[current_arg].what = optarg;
1183 len = strlen (optarg);
1184 if (whatlen_max < len)
1185 whatlen_max = len;
1186 ++current_arg;
1187 break;
1188 case 'R':
1189 argbuffer[current_arg].arg_type = at_regexp;
1190 argbuffer[current_arg].what = NULL;
1191 ++current_arg;
1192 break;
1193 case 'V':
1194 print_version ();
1195 break;
1196 case 'h':
1197 case 'H':
1198 help_asked = true;
1199 break;
1200 case 'Q':
1201 class_qualify = 1;
1202 break;
1204 /* Etags options */
1205 case 'D': constantypedefs = false; break;
1206 case 'i': included_files[nincluded_files++] = optarg; break;
1208 /* Ctags options. */
1209 case 'B': searchar = '?'; break;
1210 case 'd': constantypedefs = true; break;
1211 case 't': typedefs = true; break;
1212 case 'T': typedefs = typedefs_or_cplusplus = true; break;
1213 case 'u': update = true; break;
1214 case 'v': vgrind_style = true; FALLTHROUGH;
1215 case 'x': cxref_style = true; break;
1216 case 'w': no_warnings = true; break;
1217 default:
1218 suggest_asking_for_help ();
1219 /* NOTREACHED */
1222 /* No more options. Store the rest of arguments. */
1223 for (; optind < argc; optind++)
1225 argbuffer[current_arg].arg_type = at_filename;
1226 argbuffer[current_arg].what = argv[optind];
1227 len = strlen (argv[optind]);
1228 if (whatlen_max < len)
1229 whatlen_max = len;
1230 ++current_arg;
1231 ++file_count;
1234 argbuffer[current_arg].arg_type = at_end;
1236 if (help_asked)
1237 print_help (argbuffer);
1238 /* NOTREACHED */
1240 if (nincluded_files == 0 && file_count == 0)
1242 error ("no input files specified.");
1243 suggest_asking_for_help ();
1244 /* NOTREACHED */
1247 if (tagfile == NULL)
1248 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1249 cwd = etags_getcwd (); /* the current working directory */
1250 if (cwd[strlen (cwd) - 1] != '/')
1252 char *oldcwd = cwd;
1253 cwd = concat (oldcwd, "/", "");
1254 free (oldcwd);
1257 /* Compute base directory for relative file names. */
1258 if (streq (tagfile, "-")
1259 || strneq (tagfile, "/dev/", 5))
1260 tagfiledir = cwd; /* relative file names are relative to cwd */
1261 else
1263 canonicalize_filename (tagfile);
1264 tagfiledir = absolute_dirname (tagfile, cwd);
1267 linebuffer_init (&lb);
1268 linebuffer_init (&filename_lb);
1269 linebuffer_init (&filebuf);
1270 linebuffer_init (&token_name);
1272 if (!CTAGS)
1274 if (streq (tagfile, "-"))
1276 tagf = stdout;
1277 set_binary_mode (STDOUT_FILENO, O_BINARY);
1279 else
1280 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1281 if (tagf == NULL)
1282 pfatal (tagfile);
1286 * Loop through files finding functions.
1288 for (i = 0; i < current_arg; i++)
1290 static language *lang; /* non-NULL if language is forced */
1291 char *this_file;
1293 switch (argbuffer[i].arg_type)
1295 case at_language:
1296 lang = argbuffer[i].lang;
1297 break;
1298 case at_regexp:
1299 analyze_regex (argbuffer[i].what);
1300 break;
1301 case at_filename:
1302 this_file = argbuffer[i].what;
1303 /* Input file named "-" means read file names from stdin
1304 (one per line) and use them. */
1305 if (streq (this_file, "-"))
1307 if (parsing_stdin)
1308 fatal ("cannot parse standard input "
1309 "AND read file names from it");
1310 while (readline_internal (&filename_lb, stdin, "-") > 0)
1311 process_file_name (filename_lb.buffer, lang);
1313 else
1314 process_file_name (this_file, lang);
1315 break;
1316 case at_stdin:
1317 this_file = argbuffer[i].what;
1318 process_file (stdin, this_file, lang);
1319 break;
1320 default:
1321 error ("internal error: arg_type");
1325 free_regexps ();
1326 free (lb.buffer);
1327 free (filebuf.buffer);
1328 free (token_name.buffer);
1330 if (!CTAGS || cxref_style)
1332 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1333 put_entries (nodehead);
1334 free_tree (nodehead);
1335 nodehead = NULL;
1336 if (!CTAGS)
1338 fdesc *fdp;
1340 /* Output file entries that have no tags. */
1341 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1342 if (!fdp->written)
1343 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1345 while (nincluded_files-- > 0)
1346 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1348 if (fclose (tagf) == EOF)
1349 pfatal (tagfile);
1352 return EXIT_SUCCESS;
1355 /* From here on, we are in (CTAGS && !cxref_style) */
1356 if (update)
1358 char *cmd =
1359 xmalloc (strlen (tagfile) + whatlen_max +
1360 sizeof "mv..OTAGS;grep -Fv '\t\t' OTAGS >;rm OTAGS");
1361 for (i = 0; i < current_arg; ++i)
1363 switch (argbuffer[i].arg_type)
1365 case at_filename:
1366 case at_stdin:
1367 break;
1368 default:
1369 continue; /* the for loop */
1371 char *z = stpcpy (cmd, "mv ");
1372 z = stpcpy (z, tagfile);
1373 z = stpcpy (z, " OTAGS;grep -Fv '\t");
1374 z = stpcpy (z, argbuffer[i].what);
1375 z = stpcpy (z, "\t' OTAGS >");
1376 z = stpcpy (z, tagfile);
1377 strcpy (z, ";rm OTAGS");
1378 if (system (cmd) != EXIT_SUCCESS)
1379 fatal ("failed to execute shell command");
1381 free (cmd);
1382 append_to_tagfile = true;
1385 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1386 if (tagf == NULL)
1387 pfatal (tagfile);
1388 put_entries (nodehead); /* write all the tags (CTAGS) */
1389 free_tree (nodehead);
1390 nodehead = NULL;
1391 if (fclose (tagf) == EOF)
1392 pfatal (tagfile);
1394 if (CTAGS)
1395 if (append_to_tagfile || update)
1397 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1398 /* Maybe these should be used:
1399 setenv ("LC_COLLATE", "C", 1);
1400 setenv ("LC_ALL", "C", 1); */
1401 char *z = stpcpy (cmd, "sort -u -o ");
1402 z = stpcpy (z, tagfile);
1403 *z++ = ' ';
1404 strcpy (z, tagfile);
1405 return system (cmd);
1407 return EXIT_SUCCESS;
1412 * Return a compressor given the file name. If EXTPTR is non-zero,
1413 * return a pointer into FILE where the compressor-specific
1414 * extension begins. If no compressor is found, NULL is returned
1415 * and EXTPTR is not significant.
1416 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1418 static compressor *
1419 get_compressor_from_suffix (char *file, char **extptr)
1421 compressor *compr;
1422 char *slash, *suffix;
1424 /* File has been processed by canonicalize_filename,
1425 so we don't need to consider backslashes on DOS_NT. */
1426 slash = strrchr (file, '/');
1427 suffix = strrchr (file, '.');
1428 if (suffix == NULL || suffix < slash)
1429 return NULL;
1430 if (extptr != NULL)
1431 *extptr = suffix;
1432 suffix += 1;
1433 /* Let those poor souls who live with DOS 8+3 file name limits get
1434 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1435 Only the first do loop is run if not MSDOS */
1438 for (compr = compressors; compr->suffix != NULL; compr++)
1439 if (streq (compr->suffix, suffix))
1440 return compr;
1441 if (!MSDOS)
1442 break; /* do it only once: not really a loop */
1443 if (extptr != NULL)
1444 *extptr = ++suffix;
1445 } while (*suffix != '\0');
1446 return NULL;
1452 * Return a language given the name.
1454 static language *
1455 get_language_from_langname (const char *name)
1457 language *lang;
1459 if (name == NULL)
1460 error ("empty language name");
1461 else
1463 for (lang = lang_names; lang->name != NULL; lang++)
1464 if (streq (name, lang->name))
1465 return lang;
1466 error ("unknown language \"%s\"", name);
1469 return NULL;
1474 * Return a language given the interpreter name.
1476 static language *
1477 get_language_from_interpreter (char *interpreter)
1479 language *lang;
1480 const char **iname;
1482 if (interpreter == NULL)
1483 return NULL;
1484 for (lang = lang_names; lang->name != NULL; lang++)
1485 if (lang->interpreters != NULL)
1486 for (iname = lang->interpreters; *iname != NULL; iname++)
1487 if (streq (*iname, interpreter))
1488 return lang;
1490 return NULL;
1496 * Return a language given the file name.
1498 static language *
1499 get_language_from_filename (char *file, int case_sensitive)
1501 language *lang;
1502 const char **name, **ext, *suffix;
1503 char *slash;
1505 /* Try whole file name first. */
1506 slash = strrchr (file, '/');
1507 if (slash != NULL)
1508 file = slash + 1;
1509 #ifdef DOS_NT
1510 else if (file[0] && file[1] == ':')
1511 file += 2;
1512 #endif
1513 for (lang = lang_names; lang->name != NULL; lang++)
1514 if (lang->filenames != NULL)
1515 for (name = lang->filenames; *name != NULL; name++)
1516 if ((case_sensitive)
1517 ? streq (*name, file)
1518 : strcaseeq (*name, file))
1519 return lang;
1521 /* If not found, try suffix after last dot. */
1522 suffix = strrchr (file, '.');
1523 if (suffix == NULL)
1524 return NULL;
1525 suffix += 1;
1526 for (lang = lang_names; lang->name != NULL; lang++)
1527 if (lang->suffixes != NULL)
1528 for (ext = lang->suffixes; *ext != NULL; ext++)
1529 if ((case_sensitive)
1530 ? streq (*ext, suffix)
1531 : strcaseeq (*ext, suffix))
1532 return lang;
1533 return NULL;
1538 * This routine is called on each file argument.
1540 static void
1541 process_file_name (char *file, language *lang)
1543 FILE *inf;
1544 fdesc *fdp;
1545 compressor *compr;
1546 char *compressed_name, *uncompressed_name;
1547 char *ext, *real_name UNINIT, *tmp_name UNINIT;
1548 int retval;
1550 canonicalize_filename (file);
1551 if (streq (file, tagfile) && !streq (tagfile, "-"))
1553 error ("skipping inclusion of %s in self.", file);
1554 return;
1556 compr = get_compressor_from_suffix (file, &ext);
1557 if (compr)
1559 compressed_name = file;
1560 uncompressed_name = savenstr (file, ext - file);
1562 else
1564 compressed_name = NULL;
1565 uncompressed_name = file;
1568 /* If the canonicalized uncompressed name
1569 has already been dealt with, skip it silently. */
1570 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1572 assert (fdp->infname != NULL);
1573 if (streq (uncompressed_name, fdp->infname))
1574 goto cleanup;
1577 inf = fopen (file, "r" FOPEN_BINARY);
1578 if (inf)
1579 real_name = file;
1580 else
1582 int file_errno = errno;
1583 if (compressed_name)
1585 /* Try with the given suffix. */
1586 inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1587 if (inf)
1588 real_name = uncompressed_name;
1590 else
1592 /* Try all possible suffixes. */
1593 for (compr = compressors; compr->suffix != NULL; compr++)
1595 compressed_name = concat (file, ".", compr->suffix);
1596 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1597 if (inf)
1599 real_name = compressed_name;
1600 break;
1602 if (MSDOS)
1604 char *suf = compressed_name + strlen (file);
1605 size_t suflen = strlen (compr->suffix) + 1;
1606 for ( ; suf[1]; suf++, suflen--)
1608 memmove (suf, suf + 1, suflen);
1609 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1610 if (inf)
1612 real_name = compressed_name;
1613 break;
1616 if (inf)
1617 break;
1619 free (compressed_name);
1620 compressed_name = NULL;
1623 if (! inf)
1625 errno = file_errno;
1626 perror (file);
1627 goto cleanup;
1631 if (real_name == compressed_name)
1633 fclose (inf);
1634 tmp_name = etags_mktmp ();
1635 if (!tmp_name)
1636 inf = NULL;
1637 else
1639 #if MSDOS || defined (DOS_NT)
1640 char *cmd1 = concat (compr->command, " \"", real_name);
1641 char *cmd = concat (cmd1, "\" > ", tmp_name);
1642 #else
1643 char *cmd1 = concat (compr->command, " '", real_name);
1644 char *cmd = concat (cmd1, "' > ", tmp_name);
1645 #endif
1646 free (cmd1);
1647 int tmp_errno;
1648 if (system (cmd) == -1)
1650 inf = NULL;
1651 tmp_errno = EINVAL;
1653 else
1655 inf = fopen (tmp_name, "r" FOPEN_BINARY);
1656 tmp_errno = errno;
1658 free (cmd);
1659 errno = tmp_errno;
1662 if (!inf)
1664 perror (real_name);
1665 goto cleanup;
1669 process_file (inf, uncompressed_name, lang);
1671 retval = fclose (inf);
1672 if (real_name == compressed_name)
1674 remove (tmp_name);
1675 free (tmp_name);
1677 if (retval < 0)
1678 pfatal (file);
1680 cleanup:
1681 if (compressed_name != file)
1682 free (compressed_name);
1683 if (uncompressed_name != file)
1684 free (uncompressed_name);
1685 last_node = NULL;
1686 curfdp = NULL;
1687 return;
1690 static void
1691 process_file (FILE *fh, char *fn, language *lang)
1693 static const fdesc emptyfdesc;
1694 fdesc *fdp;
1696 infilename = fn;
1697 /* Create a new input file description entry. */
1698 fdp = xnew (1, fdesc);
1699 *fdp = emptyfdesc;
1700 fdp->next = fdhead;
1701 fdp->infname = savestr (fn);
1702 fdp->lang = lang;
1703 fdp->infabsname = absolute_filename (fn, cwd);
1704 fdp->infabsdir = absolute_dirname (fn, cwd);
1705 if (filename_is_absolute (fn))
1707 /* An absolute file name. Canonicalize it. */
1708 fdp->taggedfname = absolute_filename (fn, NULL);
1710 else
1712 /* A file name relative to cwd. Make it relative
1713 to the directory of the tags file. */
1714 fdp->taggedfname = relative_filename (fn, tagfiledir);
1716 fdp->usecharno = true; /* use char position when making tags */
1717 fdp->prop = NULL;
1718 fdp->written = false; /* not written on tags file yet */
1720 fdhead = fdp;
1721 curfdp = fdhead; /* the current file description */
1723 find_entries (fh);
1725 /* If not Ctags, and if this is not metasource and if it contained no #line
1726 directives, we can write the tags and free all nodes pointing to
1727 curfdp. */
1728 if (!CTAGS
1729 && curfdp->usecharno /* no #line directives in this file */
1730 && !curfdp->lang->metasource)
1732 node *np, *prev;
1734 /* Look for the head of the sublist relative to this file. See add_node
1735 for the structure of the node tree. */
1736 prev = NULL;
1737 for (np = nodehead; np != NULL; prev = np, np = np->left)
1738 if (np->fdp == curfdp)
1739 break;
1741 /* If we generated tags for this file, write and delete them. */
1742 if (np != NULL)
1744 /* This is the head of the last sublist, if any. The following
1745 instructions depend on this being true. */
1746 assert (np->left == NULL);
1748 assert (fdhead == curfdp);
1749 assert (last_node->fdp == curfdp);
1750 put_entries (np); /* write tags for file curfdp->taggedfname */
1751 free_tree (np); /* remove the written nodes */
1752 if (prev == NULL)
1753 nodehead = NULL; /* no nodes left */
1754 else
1755 prev->left = NULL; /* delete the pointer to the sublist */
1760 static void
1761 reset_input (FILE *inf)
1763 if (fseek (inf, 0, SEEK_SET) != 0)
1764 perror (infilename);
1768 * This routine opens the specified file and calls the function
1769 * which finds the function and type definitions.
1771 static void
1772 find_entries (FILE *inf)
1774 char *cp;
1775 language *lang = curfdp->lang;
1776 Lang_function *parser = NULL;
1778 /* If user specified a language, use it. */
1779 if (lang != NULL && lang->function != NULL)
1781 parser = lang->function;
1784 /* Else try to guess the language given the file name. */
1785 if (parser == NULL)
1787 lang = get_language_from_filename (curfdp->infname, true);
1788 if (lang != NULL && lang->function != NULL)
1790 curfdp->lang = lang;
1791 parser = lang->function;
1795 /* Else look for sharp-bang as the first two characters. */
1796 if (parser == NULL
1797 && readline_internal (&lb, inf, infilename) > 0
1798 && lb.len >= 2
1799 && lb.buffer[0] == '#'
1800 && lb.buffer[1] == '!')
1802 char *lp;
1804 /* Set lp to point at the first char after the last slash in the
1805 line or, if no slashes, at the first nonblank. Then set cp to
1806 the first successive blank and terminate the string. */
1807 lp = strrchr (lb.buffer+2, '/');
1808 if (lp != NULL)
1809 lp += 1;
1810 else
1811 lp = skip_spaces (lb.buffer + 2);
1812 cp = skip_non_spaces (lp);
1813 /* If the "interpreter" turns out to be "env", the real interpreter is
1814 the next word. */
1815 if (cp > lp && strneq (lp, "env", cp - lp))
1817 lp = skip_spaces (cp);
1818 cp = skip_non_spaces (lp);
1820 *cp = '\0';
1822 if (strlen (lp) > 0)
1824 lang = get_language_from_interpreter (lp);
1825 if (lang != NULL && lang->function != NULL)
1827 curfdp->lang = lang;
1828 parser = lang->function;
1833 reset_input (inf);
1835 /* Else try to guess the language given the case insensitive file name. */
1836 if (parser == NULL)
1838 lang = get_language_from_filename (curfdp->infname, false);
1839 if (lang != NULL && lang->function != NULL)
1841 curfdp->lang = lang;
1842 parser = lang->function;
1846 /* Else try Fortran or C. */
1847 if (parser == NULL)
1849 node *old_last_node = last_node;
1851 curfdp->lang = get_language_from_langname ("fortran");
1852 find_entries (inf);
1854 if (old_last_node == last_node)
1855 /* No Fortran entries found. Try C. */
1857 reset_input (inf);
1858 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1859 find_entries (inf);
1861 return;
1864 if (!no_line_directive
1865 && curfdp->lang != NULL && curfdp->lang->metasource)
1866 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1867 file, or anyway we parsed a file that is automatically generated from
1868 this one. If this is the case, the bingo.c file contained #line
1869 directives that generated tags pointing to this file. Let's delete
1870 them all before parsing this file, which is the real source. */
1872 fdesc **fdpp = &fdhead;
1873 while (*fdpp != NULL)
1874 if (*fdpp != curfdp
1875 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1876 /* We found one of those! We must delete both the file description
1877 and all tags referring to it. */
1879 fdesc *badfdp = *fdpp;
1881 /* Delete the tags referring to badfdp->taggedfname
1882 that were obtained from badfdp->infname. */
1883 invalidate_nodes (badfdp, &nodehead);
1885 *fdpp = badfdp->next; /* remove the bad description from the list */
1886 free_fdesc (badfdp);
1888 else
1889 fdpp = &(*fdpp)->next; /* advance the list pointer */
1892 assert (parser != NULL);
1894 /* Generic initializations before reading from file. */
1895 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1897 /* Generic initializations before parsing file with readline. */
1898 lineno = 0; /* reset global line number */
1899 charno = 0; /* reset global char number */
1900 linecharno = 0; /* reset global char number of line start */
1902 parser (inf);
1904 regex_tag_multiline ();
1909 * Check whether an implicitly named tag should be created,
1910 * then call `pfnote'.
1911 * NAME is a string that is internally copied by this function.
1913 * TAGS format specification
1914 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1915 * The following is explained in some more detail in etc/ETAGS.EBNF.
1917 * make_tag creates tags with "implicit tag names" (unnamed tags)
1918 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1919 * 1. NAME does not contain any of the characters in NONAM;
1920 * 2. LINESTART contains name as either a rightmost, or rightmost but
1921 * one character, substring;
1922 * 3. the character, if any, immediately before NAME in LINESTART must
1923 * be a character in NONAM;
1924 * 4. the character, if any, immediately after NAME in LINESTART must
1925 * also be a character in NONAM.
1927 * The implementation uses the notinname() macro, which recognizes the
1928 * characters stored in the string `nonam'.
1929 * etags.el needs to use the same characters that are in NONAM.
1931 static void
1932 make_tag (const char *name, /* tag name, or NULL if unnamed */
1933 int namelen, /* tag length */
1934 bool is_func, /* tag is a function */
1935 char *linestart, /* start of the line where tag is */
1936 int linelen, /* length of the line where tag is */
1937 int lno, /* line number */
1938 long int cno) /* character number */
1940 bool named = (name != NULL && namelen > 0);
1941 char *nname = NULL;
1943 if (debug)
1944 fprintf (stderr, "%s on %s:%d: %s\n",
1945 named ? name : "(unnamed)", curfdp->taggedfname, lno, linestart);
1947 if (!CTAGS && named) /* maybe set named to false */
1948 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1949 such that etags.el can guess a name from it. */
1951 int i;
1952 register const char *cp = name;
1954 for (i = 0; i < namelen; i++)
1955 if (notinname (*cp++))
1956 break;
1957 if (i == namelen) /* rule #1 */
1959 cp = linestart + linelen - namelen;
1960 if (notinname (linestart[linelen-1]))
1961 cp -= 1; /* rule #4 */
1962 if (cp >= linestart /* rule #2 */
1963 && (cp == linestart
1964 || notinname (cp[-1])) /* rule #3 */
1965 && strneq (name, cp, namelen)) /* rule #2 */
1966 named = false; /* use implicit tag name */
1970 if (named)
1971 nname = savenstr (name, namelen);
1973 pfnote (nname, is_func, linestart, linelen, lno, cno);
1976 /* Record a tag. */
1977 static void
1978 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1979 long int cno)
1980 /* tag name, or NULL if unnamed */
1981 /* tag is a function */
1982 /* start of the line where tag is */
1983 /* length of the line where tag is */
1984 /* line number */
1985 /* character number */
1987 register node *np;
1989 assert (name == NULL || name[0] != '\0');
1990 if (CTAGS && name == NULL)
1991 return;
1993 np = xnew (1, node);
1995 /* If ctags mode, change name "main" to M<thisfilename>. */
1996 if (CTAGS && !cxref_style && streq (name, "main"))
1998 char *fp = strrchr (curfdp->taggedfname, '/');
1999 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2000 fp = strrchr (np->name, '.');
2001 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2002 fp[0] = '\0';
2004 else
2005 np->name = name;
2006 np->valid = true;
2007 np->been_warned = false;
2008 np->fdp = curfdp;
2009 np->is_func = is_func;
2010 np->lno = lno;
2011 if (np->fdp->usecharno)
2012 /* Our char numbers are 0-base, because of C language tradition?
2013 ctags compatibility? old versions compatibility? I don't know.
2014 Anyway, since emacs's are 1-base we expect etags.el to take care
2015 of the difference. If we wanted to have 1-based numbers, we would
2016 uncomment the +1 below. */
2017 np->cno = cno /* + 1 */ ;
2018 else
2019 np->cno = invalidcharno;
2020 np->left = np->right = NULL;
2021 if (CTAGS && !cxref_style)
2023 if (strlen (linestart) < 50)
2024 np->regex = concat (linestart, "$", "");
2025 else
2026 np->regex = savenstr (linestart, 50);
2028 else
2029 np->regex = savenstr (linestart, linelen);
2031 add_node (np, &nodehead);
2035 * Utility functions and data to avoid recursion.
2038 typedef struct stack_entry {
2039 node *np;
2040 struct stack_entry *next;
2041 } stkentry;
2043 static void
2044 push_node (node *np, stkentry **stack_top)
2046 if (np)
2048 stkentry *new = xnew (1, stkentry);
2050 new->np = np;
2051 new->next = *stack_top;
2052 *stack_top = new;
2056 static node *
2057 pop_node (stkentry **stack_top)
2059 node *ret = NULL;
2061 if (*stack_top)
2063 stkentry *old_start = *stack_top;
2065 ret = (*stack_top)->np;
2066 *stack_top = (*stack_top)->next;
2067 free (old_start);
2069 return ret;
2073 * free_tree ()
2074 * emulate recursion on left children, iterate on right children.
2076 static void
2077 free_tree (register node *np)
2079 stkentry *stack = NULL;
2081 while (np)
2083 /* Descent on left children. */
2084 while (np->left)
2086 push_node (np, &stack);
2087 np = np->left;
2089 /* Free node without left children. */
2090 node *node_right = np->right;
2091 free (np->name);
2092 free (np->regex);
2093 free (np);
2094 if (!node_right)
2096 /* Backtrack to find a node with right children, while freeing nodes
2097 that don't have right children. */
2098 while (node_right == NULL && (np = pop_node (&stack)) != NULL)
2100 node_right = np->right;
2101 free (np->name);
2102 free (np->regex);
2103 free (np);
2106 /* Free right children. */
2107 np = node_right;
2112 * free_fdesc ()
2113 * delete a file description
2115 static void
2116 free_fdesc (register fdesc *fdp)
2118 free (fdp->infname);
2119 free (fdp->infabsname);
2120 free (fdp->infabsdir);
2121 free (fdp->taggedfname);
2122 free (fdp->prop);
2123 free (fdp);
2127 * add_node ()
2128 * Adds a node to the tree of nodes. In etags mode, sort by file
2129 * name. In ctags mode, sort by tag name. Make no attempt at
2130 * balancing.
2132 * add_node is the only function allowed to add nodes, so it can
2133 * maintain state.
2135 static void
2136 add_node (node *np, node **cur_node_p)
2138 node *cur_node = *cur_node_p;
2140 /* Make the first node. */
2141 if (cur_node == NULL)
2143 *cur_node_p = np;
2144 last_node = np;
2145 return;
2148 if (!CTAGS)
2149 /* Etags Mode */
2151 /* For each file name, tags are in a linked sublist on the right
2152 pointer. The first tags of different files are a linked list
2153 on the left pointer. last_node points to the end of the last
2154 used sublist. */
2155 if (last_node != NULL && last_node->fdp == np->fdp)
2157 /* Let's use the same sublist as the last added node. */
2158 assert (last_node->right == NULL);
2159 last_node->right = np;
2160 last_node = np;
2162 else
2164 while (cur_node->fdp != np->fdp)
2166 if (cur_node->left == NULL)
2167 break;
2168 /* The head of this sublist is not good for us. Let's try the
2169 next one. */
2170 cur_node = cur_node->left;
2172 if (cur_node->left)
2174 /* Scanning the list we found the head of a sublist which is
2175 good for us. Let's scan this sublist. */
2176 if (cur_node->right)
2178 cur_node = cur_node->right;
2179 while (cur_node->right)
2180 cur_node = cur_node->right;
2182 /* Make a new node in this sublist. */
2183 cur_node->right = np;
2185 else
2187 /* Make a new sublist. */
2188 cur_node->left = np;
2190 last_node = np;
2192 } /* if ETAGS mode */
2193 else
2195 /* Ctags Mode */
2196 node **next_node = &cur_node;
2198 while ((cur_node = *next_node) != NULL)
2200 int dif = strcmp (np->name, cur_node->name);
2202 * If this tag name matches an existing one, then
2203 * do not add the node, but maybe print a warning.
2205 if (!dif && no_duplicates)
2207 if (np->fdp == cur_node->fdp)
2209 if (!no_warnings)
2211 fprintf (stderr,
2212 "Duplicate entry in file %s, line %d: %s\n",
2213 np->fdp->infname, lineno, np->name);
2214 fprintf (stderr, "Second entry ignored\n");
2217 else if (!cur_node->been_warned && !no_warnings)
2219 fprintf
2220 (stderr,
2221 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2222 np->fdp->infname, cur_node->fdp->infname, np->name);
2223 cur_node->been_warned = true;
2225 return;
2227 else
2228 next_node = dif < 0 ? &cur_node->left : &cur_node->right;
2230 *next_node = np;
2231 last_node = np;
2232 } /* if CTAGS mode */
2236 * invalidate_nodes ()
2237 * Scan the node tree and invalidate all nodes pointing to the
2238 * given file description (CTAGS case) or free them (ETAGS case).
2240 static void
2241 invalidate_nodes (fdesc *badfdp, node **npp)
2243 node *np = *npp;
2244 stkentry *stack = NULL;
2246 if (CTAGS)
2248 while (np)
2250 /* Push all the left children on the stack. */
2251 while (np->left != NULL)
2253 push_node (np, &stack);
2254 np = np->left;
2256 /* Invalidate this node. */
2257 if (np->fdp == badfdp)
2258 np->valid = false;
2259 if (!np->right)
2261 /* Pop nodes from stack, invalidating them, until we find one
2262 with a right child. */
2263 while ((np = pop_node (&stack)) != NULL)
2265 if (np->fdp == badfdp)
2266 np->valid = false;
2267 if (np->right != NULL)
2268 break;
2271 /* Process the right child, if any. */
2272 if (np)
2273 np = np->right;
2276 else
2278 node super_root, *np_parent = NULL;
2280 super_root.left = np;
2281 super_root.fdp = (fdesc *) -1;
2282 np = &super_root;
2284 while (np)
2286 /* Descent on left children until node with BADFP. */
2287 while (np && np->fdp != badfdp)
2289 assert (np->fdp != NULL);
2290 np_parent = np;
2291 np = np->left;
2293 if (np)
2295 np_parent->left = np->left; /* detach subtree from the tree */
2296 np->left = NULL; /* isolate it */
2297 free_tree (np); /* free it */
2299 /* Continue with rest of tree. */
2300 np = np_parent->left;
2303 *npp = super_root.left;
2308 static int total_size_of_entries (node *);
2309 static int number_len (long) ATTRIBUTE_CONST;
2311 /* Length of a non-negative number's decimal representation. */
2312 static int
2313 number_len (long int num)
2315 int len = 1;
2316 while ((num /= 10) > 0)
2317 len += 1;
2318 return len;
2322 * Return total number of characters that put_entries will output for
2323 * the nodes in the linked list at the right of the specified node.
2324 * This count is irrelevant with etags.el since emacs 19.34 at least,
2325 * but is still supplied for backward compatibility.
2327 static int
2328 total_size_of_entries (register node *np)
2330 register int total = 0;
2332 for (; np != NULL; np = np->right)
2333 if (np->valid)
2335 total += strlen (np->regex) + 1; /* pat\177 */
2336 if (np->name != NULL)
2337 total += strlen (np->name) + 1; /* name\001 */
2338 total += number_len ((long) np->lno) + 1; /* lno, */
2339 if (np->cno != invalidcharno) /* cno */
2340 total += number_len (np->cno);
2341 total += 1; /* newline */
2344 return total;
2347 static void
2348 put_entry (node *np)
2350 register char *sp;
2351 static fdesc *fdp = NULL;
2353 /* Output this entry */
2354 if (np->valid)
2356 if (!CTAGS)
2358 /* Etags mode */
2359 if (fdp != np->fdp)
2361 fdp = np->fdp;
2362 fprintf (tagf, "\f\n%s,%d\n",
2363 fdp->taggedfname, total_size_of_entries (np));
2364 fdp->written = true;
2366 fputs (np->regex, tagf);
2367 fputc ('\177', tagf);
2368 if (np->name != NULL)
2370 fputs (np->name, tagf);
2371 fputc ('\001', tagf);
2373 fprintf (tagf, "%d,", np->lno);
2374 if (np->cno != invalidcharno)
2375 fprintf (tagf, "%ld", np->cno);
2376 fputs ("\n", tagf);
2378 else
2380 /* Ctags mode */
2381 if (np->name == NULL)
2382 error ("internal error: NULL name in ctags mode.");
2384 if (cxref_style)
2386 if (vgrind_style)
2387 fprintf (stdout, "%s %s %d\n",
2388 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2389 else
2390 fprintf (stdout, "%-16s %3d %-16s %s\n",
2391 np->name, np->lno, np->fdp->taggedfname, np->regex);
2393 else
2395 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2397 if (np->is_func)
2398 { /* function or #define macro with args */
2399 putc (searchar, tagf);
2400 putc ('^', tagf);
2402 for (sp = np->regex; *sp; sp++)
2404 if (*sp == '\\' || *sp == searchar)
2405 putc ('\\', tagf);
2406 putc (*sp, tagf);
2408 putc (searchar, tagf);
2410 else
2411 { /* anything else; text pattern inadequate */
2412 fprintf (tagf, "%d", np->lno);
2414 putc ('\n', tagf);
2417 } /* if this node contains a valid tag */
2420 static void
2421 put_entries (node *np)
2423 stkentry *stack = NULL;
2425 if (np == NULL)
2426 return;
2428 if (CTAGS)
2430 while (np)
2432 /* Stack subentries that precede this one. */
2433 while (np->left)
2435 push_node (np, &stack);
2436 np = np->left;
2438 /* Output this subentry. */
2439 put_entry (np);
2440 /* Stack subentries that follow this one. */
2441 while (!np->right)
2443 /* Output subentries that precede the next one. */
2444 np = pop_node (&stack);
2445 if (!np)
2446 break;
2447 put_entry (np);
2449 if (np)
2450 np = np->right;
2453 else
2455 push_node (np, &stack);
2456 while ((np = pop_node (&stack)) != NULL)
2458 /* Output this subentry. */
2459 put_entry (np);
2460 while (np->right)
2462 /* Output subentries that follow this one. */
2463 put_entry (np->right);
2464 /* Stack subentries from the following files. */
2465 push_node (np->left, &stack);
2466 np = np->right;
2468 push_node (np->left, &stack);
2474 /* C extensions. */
2475 #define C_EXT 0x00fff /* C extensions */
2476 #define C_PLAIN 0x00000 /* C */
2477 #define C_PLPL 0x00001 /* C++ */
2478 #define C_STAR 0x00003 /* C* */
2479 #define C_JAVA 0x00005 /* JAVA */
2480 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2481 #define YACC 0x10000 /* yacc file */
2484 * The C symbol tables.
2486 enum sym_type
2488 st_none,
2489 st_C_objprot, st_C_objimpl, st_C_objend,
2490 st_C_gnumacro,
2491 st_C_ignore, st_C_attribute, st_C_enum_bf,
2492 st_C_javastruct,
2493 st_C_operator,
2494 st_C_class, st_C_template,
2495 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2498 /* Feed stuff between (but not including) %[ and %] lines to:
2499 gperf -m 5
2501 %compare-strncmp
2502 %enum
2503 %struct-type
2504 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2506 if, 0, st_C_ignore
2507 for, 0, st_C_ignore
2508 while, 0, st_C_ignore
2509 switch, 0, st_C_ignore
2510 return, 0, st_C_ignore
2511 __attribute__, 0, st_C_attribute
2512 GTY, 0, st_C_attribute
2513 @interface, 0, st_C_objprot
2514 @protocol, 0, st_C_objprot
2515 @implementation,0, st_C_objimpl
2516 @end, 0, st_C_objend
2517 import, (C_JAVA & ~C_PLPL), st_C_ignore
2518 package, (C_JAVA & ~C_PLPL), st_C_ignore
2519 friend, C_PLPL, st_C_ignore
2520 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2521 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2522 interface, (C_JAVA & ~C_PLPL), st_C_struct
2523 class, 0, st_C_class
2524 namespace, C_PLPL, st_C_struct
2525 domain, C_STAR, st_C_struct
2526 union, 0, st_C_struct
2527 struct, 0, st_C_struct
2528 extern, 0, st_C_extern
2529 enum, 0, st_C_enum
2530 typedef, 0, st_C_typedef
2531 define, 0, st_C_define
2532 undef, 0, st_C_define
2533 operator, C_PLPL, st_C_operator
2534 template, 0, st_C_template
2535 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2536 DEFUN, 0, st_C_gnumacro
2537 SYSCALL, 0, st_C_gnumacro
2538 ENTRY, 0, st_C_gnumacro
2539 PSEUDO, 0, st_C_gnumacro
2540 ENUM_BF, 0, st_C_enum_bf
2541 # These are defined inside C functions, so currently they are not met.
2542 # EXFUN used in glibc, DEFVAR_* in emacs.
2543 #EXFUN, 0, st_C_gnumacro
2544 #DEFVAR_, 0, st_C_gnumacro
2546 and replace lines between %< and %> with its output, then:
2547 - remove the #if characterset check
2548 - remove any #line directives
2549 - make in_word_set static and not inline
2550 - remove any 'register' qualifications from variable decls. */
2551 /*%<*/
2552 /* C code produced by gperf version 3.0.1 */
2553 /* Command-line: gperf -m 5 */
2554 /* Computed positions: -k'2-3' */
2556 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2557 /* maximum key range = 34, duplicates = 0 */
2559 static int
2560 hash (const char *str, int len)
2562 static char const asso_values[] =
2564 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2565 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2566 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2567 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2568 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2569 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2570 36, 36, 36, 36, 36, 36, 36, 36, 36, 3,
2571 27, 36, 36, 36, 36, 36, 36, 36, 26, 36,
2572 36, 36, 36, 25, 0, 0, 36, 36, 36, 0,
2573 36, 36, 36, 36, 36, 1, 36, 16, 36, 6,
2574 23, 0, 0, 36, 22, 0, 36, 36, 5, 0,
2575 0, 15, 1, 36, 6, 36, 8, 19, 36, 16,
2576 4, 5, 36, 36, 36, 36, 36, 36, 36, 36,
2577 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2578 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2579 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2580 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2581 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2582 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2583 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2584 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2585 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2586 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2587 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2588 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2589 36, 36, 36, 36, 36, 36
2591 int hval = len;
2593 switch (hval)
2595 default:
2596 hval += asso_values[(unsigned char) str[2]];
2597 FALLTHROUGH;
2598 case 2:
2599 hval += asso_values[(unsigned char) str[1]];
2600 break;
2602 return hval;
2605 static struct C_stab_entry *
2606 in_word_set (register const char *str, register unsigned int len)
2608 enum
2610 TOTAL_KEYWORDS = 34,
2611 MIN_WORD_LENGTH = 2,
2612 MAX_WORD_LENGTH = 15,
2613 MIN_HASH_VALUE = 2,
2614 MAX_HASH_VALUE = 35
2617 static struct C_stab_entry wordlist[] =
2619 {""}, {""},
2620 {"if", 0, st_C_ignore},
2621 {"GTY", 0, st_C_attribute},
2622 {"@end", 0, st_C_objend},
2623 {"union", 0, st_C_struct},
2624 {"define", 0, st_C_define},
2625 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2626 {"template", 0, st_C_template},
2627 {"operator", C_PLPL, st_C_operator},
2628 {"@interface", 0, st_C_objprot},
2629 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2630 {"friend", C_PLPL, st_C_ignore},
2631 {"typedef", 0, st_C_typedef},
2632 {"return", 0, st_C_ignore},
2633 {"@implementation",0, st_C_objimpl},
2634 {"@protocol", 0, st_C_objprot},
2635 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2636 {"extern", 0, st_C_extern},
2637 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2638 {"struct", 0, st_C_struct},
2639 {"domain", C_STAR, st_C_struct},
2640 {"switch", 0, st_C_ignore},
2641 {"enum", 0, st_C_enum},
2642 {"for", 0, st_C_ignore},
2643 {"namespace", C_PLPL, st_C_struct},
2644 {"class", 0, st_C_class},
2645 {"while", 0, st_C_ignore},
2646 {"undef", 0, st_C_define},
2647 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2648 {"__attribute__", 0, st_C_attribute},
2649 {"ENTRY", 0, st_C_gnumacro},
2650 {"SYSCALL", 0, st_C_gnumacro},
2651 {"ENUM_BF", 0, st_C_enum_bf},
2652 {"PSEUDO", 0, st_C_gnumacro},
2653 {"DEFUN", 0, st_C_gnumacro}
2656 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2658 int key = hash (str, len);
2660 if (key <= MAX_HASH_VALUE && key >= 0)
2662 const char *s = wordlist[key].name;
2664 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2665 return &wordlist[key];
2668 return 0;
2670 /*%>*/
2672 static enum sym_type
2673 C_symtype (char *str, int len, int c_ext)
2675 register struct C_stab_entry *se = in_word_set (str, len);
2677 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2678 return st_none;
2679 return se->type;
2684 * Ignoring __attribute__ ((list))
2686 static bool inattribute; /* looking at an __attribute__ construct */
2688 /* Ignoring ENUM_BF (type)
2691 static bool in_enum_bf; /* inside parentheses following ENUM_BF */
2694 * C functions and variables are recognized using a simple
2695 * finite automaton. fvdef is its state variable.
2697 static enum
2699 fvnone, /* nothing seen */
2700 fdefunkey, /* Emacs DEFUN keyword seen */
2701 fdefunname, /* Emacs DEFUN name seen */
2702 foperator, /* func: operator keyword seen (cplpl) */
2703 fvnameseen, /* function or variable name seen */
2704 fstartlist, /* func: just after open parenthesis */
2705 finlist, /* func: in parameter list */
2706 flistseen, /* func: after parameter list */
2707 fignore, /* func: before open brace */
2708 vignore /* var-like: ignore until ';' */
2709 } fvdef;
2711 static bool fvextern; /* func or var: extern keyword seen; */
2714 * typedefs are recognized using a simple finite automaton.
2715 * typdef is its state variable.
2717 static enum
2719 tnone, /* nothing seen */
2720 tkeyseen, /* typedef keyword seen */
2721 ttypeseen, /* defined type seen */
2722 tinbody, /* inside typedef body */
2723 tend, /* just before typedef tag */
2724 tignore /* junk after typedef tag */
2725 } typdef;
2728 * struct-like structures (enum, struct and union) are recognized
2729 * using another simple finite automaton. `structdef' is its state
2730 * variable.
2732 static enum
2734 snone, /* nothing seen yet,
2735 or in struct body if bracelev > 0 */
2736 skeyseen, /* struct-like keyword seen */
2737 stagseen, /* struct-like tag seen */
2738 scolonseen /* colon seen after struct-like tag */
2739 } structdef;
2742 * When objdef is different from onone, objtag is the name of the class.
2744 static const char *objtag = "<uninited>";
2747 * Yet another little state machine to deal with preprocessor lines.
2749 static enum
2751 dnone, /* nothing seen */
2752 dsharpseen, /* '#' seen as first char on line */
2753 ddefineseen, /* '#' and 'define' seen */
2754 dignorerest /* ignore rest of line */
2755 } definedef;
2758 * State machine for Objective C protocols and implementations.
2759 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2761 static enum
2763 onone, /* nothing seen */
2764 oprotocol, /* @interface or @protocol seen */
2765 oimplementation, /* @implementations seen */
2766 otagseen, /* class name seen */
2767 oparenseen, /* parenthesis before category seen */
2768 ocatseen, /* category name seen */
2769 oinbody, /* in @implementation body */
2770 omethodsign, /* in @implementation body, after +/- */
2771 omethodtag, /* after method name */
2772 omethodcolon, /* after method colon */
2773 omethodparm, /* after method parameter */
2774 oignore /* wait for @end */
2775 } objdef;
2779 * Use this structure to keep info about the token read, and how it
2780 * should be tagged. Used by the make_C_tag function to build a tag.
2782 static struct tok
2784 char *line; /* string containing the token */
2785 int offset; /* where the token starts in LINE */
2786 int length; /* token length */
2788 The previous members can be used to pass strings around for generic
2789 purposes. The following ones specifically refer to creating tags. In this
2790 case the token contained here is the pattern that will be used to create a
2791 tag.
2793 bool valid; /* do not create a tag; the token should be
2794 invalidated whenever a state machine is
2795 reset prematurely */
2796 bool named; /* create a named tag */
2797 int lineno; /* source line number of tag */
2798 long linepos; /* source char number of tag */
2799 } token; /* latest token read */
2802 * Variables and functions for dealing with nested structures.
2803 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2805 static void pushclass_above (int, char *, int);
2806 static void popclass_above (int);
2807 static void write_classname (linebuffer *, const char *qualifier);
2809 static struct {
2810 char **cname; /* nested class names */
2811 int *bracelev; /* nested class brace level */
2812 int nl; /* class nesting level (elements used) */
2813 int size; /* length of the array */
2814 } cstack; /* stack for nested declaration tags */
2815 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2816 #define nestlev (cstack.nl)
2817 /* After struct keyword or in struct body, not inside a nested function. */
2818 #define instruct (structdef == snone && nestlev > 0 \
2819 && bracelev == cstack.bracelev[nestlev-1] + 1)
2821 static void
2822 pushclass_above (int bracelev, char *str, int len)
2824 int nl;
2826 popclass_above (bracelev);
2827 nl = cstack.nl;
2828 if (nl >= cstack.size)
2830 int size = cstack.size *= 2;
2831 xrnew (cstack.cname, size, char *);
2832 xrnew (cstack.bracelev, size, int);
2834 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2835 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2836 cstack.bracelev[nl] = bracelev;
2837 cstack.nl = nl + 1;
2840 static void
2841 popclass_above (int bracelev)
2843 int nl;
2845 for (nl = cstack.nl - 1;
2846 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2847 nl--)
2849 free (cstack.cname[nl]);
2850 cstack.nl = nl;
2854 static void
2855 write_classname (linebuffer *cn, const char *qualifier)
2857 int i, len;
2858 int qlen = strlen (qualifier);
2860 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2862 len = 0;
2863 cn->len = 0;
2864 cn->buffer[0] = '\0';
2866 else
2868 len = strlen (cstack.cname[0]);
2869 linebuffer_setlen (cn, len);
2870 strcpy (cn->buffer, cstack.cname[0]);
2872 for (i = 1; i < cstack.nl; i++)
2874 char *s = cstack.cname[i];
2875 if (s == NULL)
2876 continue;
2877 linebuffer_setlen (cn, len + qlen + strlen (s));
2878 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2883 static bool consider_token (char *, int, int, int *, int, int, bool *);
2884 static void make_C_tag (bool);
2887 * consider_token ()
2888 * checks to see if the current token is at the start of a
2889 * function or variable, or corresponds to a typedef, or
2890 * is a struct/union/enum tag, or #define, or an enum constant.
2892 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2893 * with args. C_EXTP points to which language we are looking at.
2895 * Globals
2896 * fvdef IN OUT
2897 * structdef IN OUT
2898 * definedef IN OUT
2899 * typdef IN OUT
2900 * objdef IN OUT
2903 static bool
2904 consider_token (char *str, int len, int c, int *c_extp,
2905 int bracelev, int parlev, bool *is_func_or_var)
2906 /* IN: token pointer */
2907 /* IN: token length */
2908 /* IN: first char after the token */
2909 /* IN, OUT: C extensions mask */
2910 /* IN: brace level */
2911 /* IN: parenthesis level */
2912 /* OUT: function or variable found */
2914 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2915 structtype is the type of the preceding struct-like keyword, and
2916 structbracelev is the brace level where it has been seen. */
2917 static enum sym_type structtype;
2918 static int structbracelev;
2919 static enum sym_type toktype;
2922 toktype = C_symtype (str, len, *c_extp);
2925 * Skip __attribute__
2927 if (toktype == st_C_attribute)
2929 inattribute = true;
2930 return false;
2934 * Skip ENUM_BF
2936 if (toktype == st_C_enum_bf && definedef == dnone)
2938 in_enum_bf = true;
2939 return false;
2943 * Advance the definedef state machine.
2945 switch (definedef)
2947 case dnone:
2948 /* We're not on a preprocessor line. */
2949 if (toktype == st_C_gnumacro)
2951 fvdef = fdefunkey;
2952 return false;
2954 break;
2955 case dsharpseen:
2956 if (toktype == st_C_define)
2958 definedef = ddefineseen;
2960 else
2962 definedef = dignorerest;
2964 return false;
2965 case ddefineseen:
2967 * Make a tag for any macro, unless it is a constant
2968 * and constantypedefs is false.
2970 definedef = dignorerest;
2971 *is_func_or_var = (c == '(');
2972 if (!*is_func_or_var && !constantypedefs)
2973 return false;
2974 else
2975 return true;
2976 case dignorerest:
2977 return false;
2978 default:
2979 error ("internal error: definedef value.");
2983 * Now typedefs
2985 switch (typdef)
2987 case tnone:
2988 if (toktype == st_C_typedef)
2990 if (typedefs)
2991 typdef = tkeyseen;
2992 fvextern = false;
2993 fvdef = fvnone;
2994 return false;
2996 break;
2997 case tkeyseen:
2998 switch (toktype)
3000 case st_none:
3001 case st_C_class:
3002 case st_C_struct:
3003 case st_C_enum:
3004 typdef = ttypeseen;
3005 break;
3006 default:
3007 break;
3009 break;
3010 case ttypeseen:
3011 if (structdef == snone && fvdef == fvnone)
3013 fvdef = fvnameseen;
3014 return true;
3016 break;
3017 case tend:
3018 switch (toktype)
3020 case st_C_class:
3021 case st_C_struct:
3022 case st_C_enum:
3023 return false;
3024 default:
3025 return true;
3027 default:
3028 break;
3031 switch (toktype)
3033 case st_C_javastruct:
3034 if (structdef == stagseen)
3035 structdef = scolonseen;
3036 return false;
3037 case st_C_template:
3038 case st_C_class:
3039 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
3040 && bracelev == 0
3041 && definedef == dnone && structdef == snone
3042 && typdef == tnone && fvdef == fvnone)
3043 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3044 if (toktype == st_C_template)
3045 break;
3046 FALLTHROUGH;
3047 case st_C_struct:
3048 case st_C_enum:
3049 if (parlev == 0
3050 && fvdef != vignore
3051 && (typdef == tkeyseen
3052 || (typedefs_or_cplusplus && structdef == snone)))
3054 structdef = skeyseen;
3055 structtype = toktype;
3056 structbracelev = bracelev;
3057 if (fvdef == fvnameseen)
3058 fvdef = fvnone;
3060 return false;
3061 default:
3062 break;
3065 if (structdef == skeyseen)
3067 structdef = stagseen;
3068 return true;
3071 if (typdef != tnone)
3072 definedef = dnone;
3074 /* Detect Objective C constructs. */
3075 switch (objdef)
3077 case onone:
3078 switch (toktype)
3080 case st_C_objprot:
3081 objdef = oprotocol;
3082 return false;
3083 case st_C_objimpl:
3084 objdef = oimplementation;
3085 return false;
3086 default:
3087 break;
3089 break;
3090 case oimplementation:
3091 /* Save the class tag for functions or variables defined inside. */
3092 objtag = savenstr (str, len);
3093 objdef = oinbody;
3094 return false;
3095 case oprotocol:
3096 /* Save the class tag for categories. */
3097 objtag = savenstr (str, len);
3098 objdef = otagseen;
3099 *is_func_or_var = true;
3100 return true;
3101 case oparenseen:
3102 objdef = ocatseen;
3103 *is_func_or_var = true;
3104 return true;
3105 case oinbody:
3106 break;
3107 case omethodsign:
3108 if (parlev == 0)
3110 fvdef = fvnone;
3111 objdef = omethodtag;
3112 linebuffer_setlen (&token_name, len);
3113 memcpy (token_name.buffer, str, len);
3114 token_name.buffer[len] = '\0';
3115 return true;
3117 return false;
3118 case omethodcolon:
3119 if (parlev == 0)
3120 objdef = omethodparm;
3121 return false;
3122 case omethodparm:
3123 if (parlev == 0)
3125 objdef = omethodtag;
3126 if (class_qualify)
3128 int oldlen = token_name.len;
3129 fvdef = fvnone;
3130 linebuffer_setlen (&token_name, oldlen + len);
3131 memcpy (token_name.buffer + oldlen, str, len);
3132 token_name.buffer[oldlen + len] = '\0';
3134 return true;
3136 return false;
3137 case oignore:
3138 if (toktype == st_C_objend)
3140 /* Memory leakage here: the string pointed by objtag is
3141 never released, because many tests would be needed to
3142 avoid breaking on incorrect input code. The amount of
3143 memory leaked here is the sum of the lengths of the
3144 class tags.
3145 free (objtag); */
3146 objdef = onone;
3148 return false;
3149 default:
3150 break;
3153 /* A function, variable or enum constant? */
3154 switch (toktype)
3156 case st_C_extern:
3157 fvextern = true;
3158 switch (fvdef)
3160 case finlist:
3161 case flistseen:
3162 case fignore:
3163 case vignore:
3164 break;
3165 default:
3166 fvdef = fvnone;
3168 return false;
3169 case st_C_ignore:
3170 fvextern = false;
3171 fvdef = vignore;
3172 return false;
3173 case st_C_operator:
3174 fvdef = foperator;
3175 *is_func_or_var = true;
3176 return true;
3177 case st_none:
3178 if (constantypedefs
3179 && structdef == snone
3180 && structtype == st_C_enum && bracelev > structbracelev
3181 /* Don't tag tokens in expressions that assign values to enum
3182 constants. */
3183 && fvdef != vignore)
3184 return true; /* enum constant */
3185 switch (fvdef)
3187 case fdefunkey:
3188 if (bracelev > 0)
3189 break;
3190 fvdef = fdefunname; /* GNU macro */
3191 *is_func_or_var = true;
3192 return true;
3193 case fvnone:
3194 switch (typdef)
3196 case ttypeseen:
3197 return false;
3198 case tnone:
3199 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3200 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3202 fvdef = vignore;
3203 return false;
3205 break;
3206 default:
3207 break;
3209 FALLTHROUGH;
3210 case fvnameseen:
3211 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3213 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3214 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3215 fvdef = foperator;
3216 *is_func_or_var = true;
3217 return true;
3219 if (bracelev > 0 && !instruct)
3220 break;
3221 fvdef = fvnameseen; /* function or variable */
3222 *is_func_or_var = true;
3223 return true;
3224 default:
3225 break;
3227 break;
3228 default:
3229 break;
3232 return false;
3237 * C_entries often keeps pointers to tokens or lines which are older than
3238 * the line currently read. By keeping two line buffers, and switching
3239 * them at end of line, it is possible to use those pointers.
3241 static struct
3243 long linepos;
3244 linebuffer lb;
3245 } lbs[2];
3247 #define current_lb_is_new (newndx == curndx)
3248 #define switch_line_buffers() (curndx = 1 - curndx)
3250 #define curlb (lbs[curndx].lb)
3251 #define newlb (lbs[newndx].lb)
3252 #define curlinepos (lbs[curndx].linepos)
3253 #define newlinepos (lbs[newndx].linepos)
3255 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3256 #define cplpl (c_ext & C_PLPL)
3257 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3259 #define CNL_SAVE_DEFINEDEF() \
3260 do { \
3261 curlinepos = charno; \
3262 readline (&curlb, inf); \
3263 lp = curlb.buffer; \
3264 quotednl = false; \
3265 newndx = curndx; \
3266 } while (0)
3268 #define CNL() \
3269 do { \
3270 CNL_SAVE_DEFINEDEF (); \
3271 if (savetoken.valid) \
3273 token = savetoken; \
3274 savetoken.valid = false; \
3276 definedef = dnone; \
3277 } while (0)
3280 static void
3281 make_C_tag (bool isfun)
3283 /* This function is never called when token.valid is false, but
3284 we must protect against invalid input or internal errors. */
3285 if (token.valid)
3286 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3287 token.offset+token.length+1, token.lineno, token.linepos);
3288 else if (DEBUG)
3289 { /* this branch is optimized away if !DEBUG */
3290 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3291 token_name.len + 17, isfun, token.line,
3292 token.offset+token.length+1, token.lineno, token.linepos);
3293 error ("INVALID TOKEN");
3296 token.valid = false;
3299 static bool
3300 perhaps_more_input (FILE *inf)
3302 return !feof (inf) && !ferror (inf);
3307 * C_entries ()
3308 * This routine finds functions, variables, typedefs,
3309 * #define's, enum constants and struct/union/enum definitions in
3310 * C syntax and adds them to the list.
3312 static void
3313 C_entries (int c_ext, FILE *inf)
3314 /* extension of C */
3315 /* input file */
3317 register char c; /* latest char read; '\0' for end of line */
3318 register char *lp; /* pointer one beyond the character `c' */
3319 int curndx, newndx; /* indices for current and new lb */
3320 register int tokoff; /* offset in line of start of current token */
3321 register int toklen; /* length of current token */
3322 const char *qualifier; /* string used to qualify names */
3323 int qlen; /* length of qualifier */
3324 int bracelev; /* current brace level */
3325 int bracketlev; /* current bracket level */
3326 int parlev; /* current parenthesis level */
3327 int attrparlev; /* __attribute__ parenthesis level */
3328 int templatelev; /* current template level */
3329 int typdefbracelev; /* bracelev where a typedef struct body begun */
3330 bool incomm, inquote, inchar, quotednl, midtoken;
3331 bool yacc_rules; /* in the rules part of a yacc file */
3332 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3335 linebuffer_init (&lbs[0].lb);
3336 linebuffer_init (&lbs[1].lb);
3337 if (cstack.size == 0)
3339 cstack.size = (DEBUG) ? 1 : 4;
3340 cstack.nl = 0;
3341 cstack.cname = xnew (cstack.size, char *);
3342 cstack.bracelev = xnew (cstack.size, int);
3345 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3346 curndx = newndx = 0;
3347 lp = curlb.buffer;
3348 *lp = 0;
3350 fvdef = fvnone; fvextern = false; typdef = tnone;
3351 structdef = snone; definedef = dnone; objdef = onone;
3352 yacc_rules = false;
3353 midtoken = inquote = inchar = incomm = quotednl = false;
3354 token.valid = savetoken.valid = false;
3355 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3356 if (cjava)
3357 { qualifier = "."; qlen = 1; }
3358 else
3359 { qualifier = "::"; qlen = 2; }
3362 while (perhaps_more_input (inf))
3364 c = *lp++;
3365 if (c == '\\')
3367 /* If we are at the end of the line, the next character is a
3368 '\0'; do not skip it, because it is what tells us
3369 to read the next line. */
3370 if (*lp == '\0')
3372 quotednl = true;
3373 continue;
3375 lp++;
3376 c = ' ';
3378 else if (incomm)
3380 switch (c)
3382 case '*':
3383 if (*lp == '/')
3385 c = *lp++;
3386 incomm = false;
3388 break;
3389 case '\0':
3390 /* Newlines inside comments do not end macro definitions in
3391 traditional cpp. */
3392 CNL_SAVE_DEFINEDEF ();
3393 break;
3395 continue;
3397 else if (inquote)
3399 switch (c)
3401 case '"':
3402 inquote = false;
3403 break;
3404 case '\0':
3405 /* Newlines inside strings do not end macro definitions
3406 in traditional cpp, even though compilers don't
3407 usually accept them. */
3408 CNL_SAVE_DEFINEDEF ();
3409 break;
3411 continue;
3413 else if (inchar)
3415 switch (c)
3417 case '\0':
3418 /* Hmmm, something went wrong. */
3419 CNL ();
3420 FALLTHROUGH;
3421 case '\'':
3422 inchar = false;
3423 break;
3425 continue;
3427 else switch (c)
3429 case '"':
3430 inquote = true;
3431 if (bracketlev > 0)
3432 continue;
3433 if (inattribute)
3434 break;
3435 switch (fvdef)
3437 case fdefunkey:
3438 case fstartlist:
3439 case finlist:
3440 case fignore:
3441 case vignore:
3442 break;
3443 default:
3444 fvextern = false;
3445 fvdef = fvnone;
3447 continue;
3448 case '\'':
3449 inchar = true;
3450 if (bracketlev > 0)
3451 continue;
3452 if (inattribute)
3453 break;
3454 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3456 fvextern = false;
3457 fvdef = fvnone;
3459 continue;
3460 case '/':
3461 if (*lp == '*')
3463 incomm = true;
3464 lp++;
3465 c = ' ';
3466 if (bracketlev > 0)
3467 continue;
3469 else if (/* cplpl && */ *lp == '/')
3471 c = '\0';
3473 break;
3474 case '%':
3475 if ((c_ext & YACC) && *lp == '%')
3477 /* Entering or exiting rules section in yacc file. */
3478 lp++;
3479 definedef = dnone; fvdef = fvnone; fvextern = false;
3480 typdef = tnone; structdef = snone;
3481 midtoken = inquote = inchar = incomm = quotednl = false;
3482 bracelev = 0;
3483 yacc_rules = !yacc_rules;
3484 continue;
3486 else
3487 break;
3488 case '#':
3489 if (definedef == dnone)
3491 char *cp;
3492 bool cpptoken = true;
3494 /* Look back on this line. If all blanks, or nonblanks
3495 followed by an end of comment, this is a preprocessor
3496 token. */
3497 for (cp = newlb.buffer; cp < lp-1; cp++)
3498 if (!c_isspace (*cp))
3500 if (*cp == '*' && cp[1] == '/')
3502 cp++;
3503 cpptoken = true;
3505 else
3506 cpptoken = false;
3508 if (cpptoken)
3510 definedef = dsharpseen;
3511 /* This is needed for tagging enum values: when there are
3512 preprocessor conditionals inside the enum, we need to
3513 reset the value of fvdef so that the next enum value is
3514 tagged even though the one before it did not end in a
3515 comma. */
3516 if (fvdef == vignore && instruct && parlev == 0)
3518 if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3519 fvdef = fvnone;
3522 } /* if (definedef == dnone) */
3523 continue;
3524 case '[':
3525 bracketlev++;
3526 continue;
3527 default:
3528 if (bracketlev > 0)
3530 if (c == ']')
3531 --bracketlev;
3532 else if (c == '\0')
3533 CNL_SAVE_DEFINEDEF ();
3534 continue;
3536 break;
3537 } /* switch (c) */
3540 /* Consider token only if some involved conditions are satisfied. */
3541 if (typdef != tignore
3542 && definedef != dignorerest
3543 && fvdef != finlist
3544 && templatelev == 0
3545 && (definedef != dnone
3546 || structdef != scolonseen)
3547 && !inattribute
3548 && !in_enum_bf)
3550 if (midtoken)
3552 if (endtoken (c))
3554 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3555 /* This handles :: in the middle,
3556 but not at the beginning of an identifier.
3557 Also, space-separated :: is not recognized. */
3559 if (c_ext & C_AUTO) /* automatic detection of C++ */
3560 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3561 lp += 2;
3562 toklen += 2;
3563 c = lp[-1];
3564 goto still_in_token;
3566 else
3568 bool funorvar = false;
3570 if (yacc_rules
3571 || consider_token (newlb.buffer + tokoff, toklen, c,
3572 &c_ext, bracelev, parlev,
3573 &funorvar))
3575 if (fvdef == foperator)
3577 char *oldlp = lp;
3578 lp = skip_spaces (lp-1);
3579 if (*lp != '\0')
3580 lp += 1;
3581 while (*lp != '\0'
3582 && !c_isspace (*lp) && *lp != '(')
3583 lp += 1;
3584 c = *lp++;
3585 toklen += lp - oldlp;
3587 token.named = false;
3588 if (!plainc
3589 && nestlev > 0 && definedef == dnone)
3590 /* in struct body */
3592 if (class_qualify)
3594 int len;
3595 write_classname (&token_name, qualifier);
3596 len = token_name.len;
3597 linebuffer_setlen (&token_name,
3598 len + qlen + toklen);
3599 sprintf (token_name.buffer + len, "%s%.*s",
3600 qualifier, toklen,
3601 newlb.buffer + tokoff);
3603 else
3605 linebuffer_setlen (&token_name, toklen);
3606 sprintf (token_name.buffer, "%.*s",
3607 toklen, newlb.buffer + tokoff);
3609 token.named = true;
3611 else if (objdef == ocatseen)
3612 /* Objective C category */
3614 if (class_qualify)
3616 int len = strlen (objtag) + 2 + toklen;
3617 linebuffer_setlen (&token_name, len);
3618 sprintf (token_name.buffer, "%s(%.*s)",
3619 objtag, toklen,
3620 newlb.buffer + tokoff);
3622 else
3624 linebuffer_setlen (&token_name, toklen);
3625 sprintf (token_name.buffer, "%.*s",
3626 toklen, newlb.buffer + tokoff);
3628 token.named = true;
3630 else if (objdef == omethodtag
3631 || objdef == omethodparm)
3632 /* Objective C method */
3634 token.named = true;
3636 else if (fvdef == fdefunname)
3637 /* GNU DEFUN and similar macros */
3639 bool defun = (newlb.buffer[tokoff] == 'F');
3640 int off = tokoff;
3641 int len = toklen;
3643 if (defun)
3645 off += 1;
3646 len -= 1;
3648 /* First, tag it as its C name */
3649 linebuffer_setlen (&token_name, toklen);
3650 memcpy (token_name.buffer,
3651 newlb.buffer + tokoff, toklen);
3652 token_name.buffer[toklen] = '\0';
3653 token.named = true;
3654 token.lineno = lineno;
3655 token.offset = tokoff;
3656 token.length = toklen;
3657 token.line = newlb.buffer;
3658 token.linepos = newlinepos;
3659 token.valid = true;
3660 make_C_tag (funorvar);
3662 /* Rewrite the tag so that emacs lisp DEFUNs
3663 can be found also by their elisp name */
3664 linebuffer_setlen (&token_name, len);
3665 memcpy (token_name.buffer,
3666 newlb.buffer + off, len);
3667 token_name.buffer[len] = '\0';
3668 if (defun)
3669 while (--len >= 0)
3670 if (token_name.buffer[len] == '_')
3671 token_name.buffer[len] = '-';
3672 token.named = defun;
3674 else
3676 linebuffer_setlen (&token_name, toklen);
3677 memcpy (token_name.buffer,
3678 newlb.buffer + tokoff, toklen);
3679 token_name.buffer[toklen] = '\0';
3680 /* Name macros and members. */
3681 token.named = (structdef == stagseen
3682 || typdef == ttypeseen
3683 || typdef == tend
3684 || (funorvar
3685 && definedef == dignorerest)
3686 || (funorvar
3687 && definedef == dnone
3688 && structdef == snone
3689 && bracelev > 0));
3691 token.lineno = lineno;
3692 token.offset = tokoff;
3693 token.length = toklen;
3694 token.line = newlb.buffer;
3695 token.linepos = newlinepos;
3696 token.valid = true;
3698 if (definedef == dnone
3699 && (fvdef == fvnameseen
3700 || fvdef == foperator
3701 || structdef == stagseen
3702 || typdef == tend
3703 || typdef == ttypeseen
3704 || objdef != onone))
3706 if (current_lb_is_new)
3707 switch_line_buffers ();
3709 else if (definedef != dnone
3710 || fvdef == fdefunname
3711 || instruct)
3712 make_C_tag (funorvar);
3714 else /* not yacc and consider_token failed */
3716 if (inattribute && fvdef == fignore)
3718 /* We have just met __attribute__ after a
3719 function parameter list: do not tag the
3720 function again. */
3721 fvdef = fvnone;
3724 midtoken = false;
3726 } /* if (endtoken (c)) */
3727 else if (intoken (c))
3728 still_in_token:
3730 toklen++;
3731 continue;
3733 } /* if (midtoken) */
3734 else if (begtoken (c))
3736 switch (definedef)
3738 case dnone:
3739 switch (fvdef)
3741 case fstartlist:
3742 /* This prevents tagging fb in
3743 void (__attribute__((noreturn)) *fb) (void);
3744 Fixing this is not easy and not very important. */
3745 fvdef = finlist;
3746 continue;
3747 case flistseen:
3748 if (plainc || declarations)
3750 make_C_tag (true); /* a function */
3751 fvdef = fignore;
3753 break;
3754 default:
3755 break;
3757 if (structdef == stagseen && !cjava)
3759 popclass_above (bracelev);
3760 structdef = snone;
3762 break;
3763 case dsharpseen:
3764 savetoken = token;
3765 break;
3766 default:
3767 break;
3769 if (!yacc_rules || lp == newlb.buffer + 1)
3771 tokoff = lp - 1 - newlb.buffer;
3772 toklen = 1;
3773 midtoken = true;
3775 continue;
3776 } /* if (begtoken) */
3777 } /* if must look at token */
3780 /* Detect end of line, colon, comma, semicolon and various braces
3781 after having handled a token.*/
3782 switch (c)
3784 case ':':
3785 if (inattribute)
3786 break;
3787 if (yacc_rules && token.offset == 0 && token.valid)
3789 make_C_tag (false); /* a yacc function */
3790 break;
3792 if (definedef != dnone)
3793 break;
3794 switch (objdef)
3796 case otagseen:
3797 objdef = oignore;
3798 make_C_tag (true); /* an Objective C class */
3799 break;
3800 case omethodtag:
3801 case omethodparm:
3802 objdef = omethodcolon;
3803 if (class_qualify)
3805 int toklen = token_name.len;
3806 linebuffer_setlen (&token_name, toklen + 1);
3807 strcpy (token_name.buffer + toklen, ":");
3809 break;
3810 default:
3811 break;
3813 if (structdef == stagseen)
3815 structdef = scolonseen;
3816 break;
3818 /* Should be useless, but may be work as a safety net. */
3819 if (cplpl && fvdef == flistseen)
3821 make_C_tag (true); /* a function */
3822 fvdef = fignore;
3823 break;
3825 break;
3826 case ';':
3827 if (definedef != dnone || inattribute)
3828 break;
3829 switch (typdef)
3831 case tend:
3832 case ttypeseen:
3833 make_C_tag (false); /* a typedef */
3834 typdef = tnone;
3835 fvdef = fvnone;
3836 break;
3837 case tnone:
3838 case tinbody:
3839 case tignore:
3840 switch (fvdef)
3842 case fignore:
3843 if (typdef == tignore || cplpl)
3844 fvdef = fvnone;
3845 break;
3846 case fvnameseen:
3847 if ((globals && bracelev == 0 && (!fvextern || declarations))
3848 || (members && instruct))
3849 make_C_tag (false); /* a variable */
3850 fvextern = false;
3851 fvdef = fvnone;
3852 token.valid = false;
3853 break;
3854 case flistseen:
3855 if ((declarations
3856 && (cplpl || !instruct)
3857 && (typdef == tnone || (typdef != tignore && instruct)))
3858 || (members
3859 && plainc && instruct))
3860 make_C_tag (true); /* a function */
3861 FALLTHROUGH;
3862 default:
3863 fvextern = false;
3864 fvdef = fvnone;
3865 if (declarations
3866 && cplpl && structdef == stagseen)
3867 make_C_tag (false); /* forward declaration */
3868 else
3869 token.valid = false;
3870 } /* switch (fvdef) */
3871 FALLTHROUGH;
3872 default:
3873 if (!instruct)
3874 typdef = tnone;
3876 if (structdef == stagseen)
3877 structdef = snone;
3878 break;
3879 case ',':
3880 if (definedef != dnone || inattribute)
3881 break;
3882 switch (objdef)
3884 case omethodtag:
3885 case omethodparm:
3886 make_C_tag (true); /* an Objective C method */
3887 objdef = oinbody;
3888 break;
3889 default:
3890 break;
3892 switch (fvdef)
3894 case fdefunkey:
3895 case foperator:
3896 case fstartlist:
3897 case finlist:
3898 case fignore:
3899 break;
3900 case vignore:
3901 if (instruct && parlev == 0)
3902 fvdef = fvnone;
3903 break;
3904 case fdefunname:
3905 fvdef = fignore;
3906 break;
3907 case fvnameseen:
3908 if (parlev == 0
3909 && ((globals
3910 && bracelev == 0
3911 && templatelev == 0
3912 && (!fvextern || declarations))
3913 || (members && instruct)))
3914 make_C_tag (false); /* a variable */
3915 break;
3916 case flistseen:
3917 if ((declarations && typdef == tnone && !instruct)
3918 || (members && typdef != tignore && instruct))
3920 make_C_tag (true); /* a function */
3921 fvdef = fvnameseen;
3923 else if (!declarations)
3924 fvdef = fvnone;
3925 token.valid = false;
3926 break;
3927 default:
3928 fvdef = fvnone;
3930 if (structdef == stagseen)
3931 structdef = snone;
3932 break;
3933 case ']':
3934 if (definedef != dnone || inattribute)
3935 break;
3936 if (structdef == stagseen)
3937 structdef = snone;
3938 switch (typdef)
3940 case ttypeseen:
3941 case tend:
3942 typdef = tignore;
3943 make_C_tag (false); /* a typedef */
3944 break;
3945 case tnone:
3946 case tinbody:
3947 switch (fvdef)
3949 case foperator:
3950 case finlist:
3951 case fignore:
3952 case vignore:
3953 break;
3954 case fvnameseen:
3955 if ((members && bracelev == 1)
3956 || (globals && bracelev == 0
3957 && (!fvextern || declarations)))
3958 make_C_tag (false); /* a variable */
3959 FALLTHROUGH;
3960 default:
3961 fvdef = fvnone;
3963 break;
3964 default:
3965 break;
3967 break;
3968 case '(':
3969 if (inattribute)
3971 attrparlev++;
3972 break;
3974 if (definedef != dnone)
3975 break;
3976 if (objdef == otagseen && parlev == 0)
3977 objdef = oparenseen;
3978 switch (fvdef)
3980 case fvnameseen:
3981 if (typdef == ttypeseen
3982 && *lp != '*'
3983 && !instruct)
3985 /* This handles constructs like:
3986 typedef void OperatorFun (int fun); */
3987 make_C_tag (false);
3988 typdef = tignore;
3989 fvdef = fignore;
3990 break;
3992 FALLTHROUGH;
3993 case foperator:
3994 fvdef = fstartlist;
3995 break;
3996 case flistseen:
3997 fvdef = finlist;
3998 break;
3999 default:
4000 break;
4002 parlev++;
4003 break;
4004 case ')':
4005 if (inattribute)
4007 if (--attrparlev == 0)
4008 inattribute = false;
4009 break;
4011 if (in_enum_bf)
4013 if (--parlev == 0)
4014 in_enum_bf = false;
4015 break;
4017 if (definedef != dnone)
4018 break;
4019 if (objdef == ocatseen && parlev == 1)
4021 make_C_tag (true); /* an Objective C category */
4022 objdef = oignore;
4024 if (--parlev == 0)
4026 switch (fvdef)
4028 case fstartlist:
4029 case finlist:
4030 fvdef = flistseen;
4031 break;
4032 default:
4033 break;
4035 if (!instruct
4036 && (typdef == tend
4037 || typdef == ttypeseen))
4039 typdef = tignore;
4040 make_C_tag (false); /* a typedef */
4043 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
4044 parlev = 0;
4045 break;
4046 case '{':
4047 if (definedef != dnone)
4048 break;
4049 if (typdef == ttypeseen)
4051 /* Whenever typdef is set to tinbody (currently only
4052 here), typdefbracelev should be set to bracelev. */
4053 typdef = tinbody;
4054 typdefbracelev = bracelev;
4056 switch (fvdef)
4058 case flistseen:
4059 if (cplpl && !class_qualify)
4061 /* Remove class and namespace qualifiers from the token,
4062 leaving only the method/member name. */
4063 char *cc, *uqname = token_name.buffer;
4064 char *tok_end = token_name.buffer + token_name.len;
4066 for (cc = token_name.buffer; cc < tok_end; cc++)
4068 if (*cc == ':' && cc[1] == ':')
4070 uqname = cc + 2;
4071 cc++;
4074 if (uqname > token_name.buffer)
4076 int uqlen = strlen (uqname);
4077 linebuffer_setlen (&token_name, uqlen);
4078 memmove (token_name.buffer, uqname, uqlen + 1);
4081 make_C_tag (true); /* a function */
4082 FALLTHROUGH;
4083 case fignore:
4084 fvdef = fvnone;
4085 break;
4086 case fvnone:
4087 switch (objdef)
4089 case otagseen:
4090 make_C_tag (true); /* an Objective C class */
4091 objdef = oignore;
4092 break;
4093 case omethodtag:
4094 case omethodparm:
4095 make_C_tag (true); /* an Objective C method */
4096 objdef = oinbody;
4097 break;
4098 default:
4099 /* Neutralize `extern "C" {' grot. */
4100 if (bracelev == 0 && structdef == snone && nestlev == 0
4101 && typdef == tnone)
4102 bracelev = -1;
4104 break;
4105 default:
4106 break;
4108 switch (structdef)
4110 case skeyseen: /* unnamed struct */
4111 pushclass_above (bracelev, NULL, 0);
4112 structdef = snone;
4113 break;
4114 case stagseen: /* named struct or enum */
4115 case scolonseen: /* a class */
4116 pushclass_above (bracelev,token.line+token.offset, token.length);
4117 structdef = snone;
4118 make_C_tag (false); /* a struct or enum */
4119 break;
4120 default:
4121 break;
4123 bracelev += 1;
4124 break;
4125 case '*':
4126 if (definedef != dnone)
4127 break;
4128 if (fvdef == fstartlist)
4130 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
4131 token.valid = false;
4133 break;
4134 case '}':
4135 if (definedef != dnone)
4136 break;
4137 bracelev -= 1;
4138 if (!ignoreindent && lp == newlb.buffer + 1)
4140 if (bracelev != 0)
4141 token.valid = false; /* unexpected value, token unreliable */
4142 bracelev = 0; /* reset brace level if first column */
4143 parlev = 0; /* also reset paren level, just in case... */
4145 else if (bracelev < 0)
4147 token.valid = false; /* something gone amiss, token unreliable */
4148 bracelev = 0;
4150 if (bracelev == 0 && fvdef == vignore)
4151 fvdef = fvnone; /* end of function */
4152 popclass_above (bracelev);
4153 structdef = snone;
4154 /* Only if typdef == tinbody is typdefbracelev significant. */
4155 if (typdef == tinbody && bracelev <= typdefbracelev)
4157 assert (bracelev == typdefbracelev);
4158 typdef = tend;
4160 break;
4161 case '=':
4162 if (definedef != dnone)
4163 break;
4164 switch (fvdef)
4166 case foperator:
4167 case finlist:
4168 case fignore:
4169 case vignore:
4170 break;
4171 case fvnameseen:
4172 if ((members && bracelev == 1)
4173 || (globals && bracelev == 0 && (!fvextern || declarations)))
4174 make_C_tag (false); /* a variable */
4175 FALLTHROUGH;
4176 default:
4177 fvdef = vignore;
4179 break;
4180 case '<':
4181 if (cplpl
4182 && (structdef == stagseen || fvdef == fvnameseen))
4184 templatelev++;
4185 break;
4187 goto resetfvdef;
4188 case '>':
4189 if (templatelev > 0)
4191 templatelev--;
4192 break;
4194 goto resetfvdef;
4195 case '+':
4196 case '-':
4197 if (objdef == oinbody && bracelev == 0)
4199 objdef = omethodsign;
4200 break;
4202 FALLTHROUGH;
4203 resetfvdef:
4204 case '#': case '~': case '&': case '%': case '/':
4205 case '|': case '^': case '!': case '.': case '?':
4206 if (definedef != dnone)
4207 break;
4208 /* These surely cannot follow a function tag in C. */
4209 switch (fvdef)
4211 case foperator:
4212 case finlist:
4213 case fignore:
4214 case vignore:
4215 break;
4216 default:
4217 fvdef = fvnone;
4219 break;
4220 case '\0':
4221 if (objdef == otagseen)
4223 make_C_tag (true); /* an Objective C class */
4224 objdef = oignore;
4226 /* If a macro spans multiple lines don't reset its state. */
4227 if (quotednl)
4228 CNL_SAVE_DEFINEDEF ();
4229 else
4230 CNL ();
4231 break;
4232 } /* switch (c) */
4234 } /* while not eof */
4236 free (lbs[0].lb.buffer);
4237 free (lbs[1].lb.buffer);
4241 * Process either a C++ file or a C file depending on the setting
4242 * of a global flag.
4244 static void
4245 default_C_entries (FILE *inf)
4247 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4250 /* Always do plain C. */
4251 static void
4252 plain_C_entries (FILE *inf)
4254 C_entries (0, inf);
4257 /* Always do C++. */
4258 static void
4259 Cplusplus_entries (FILE *inf)
4261 C_entries (C_PLPL, inf);
4264 /* Always do Java. */
4265 static void
4266 Cjava_entries (FILE *inf)
4268 C_entries (C_JAVA, inf);
4271 /* Always do C*. */
4272 static void
4273 Cstar_entries (FILE *inf)
4275 C_entries (C_STAR, inf);
4278 /* Always do Yacc. */
4279 static void
4280 Yacc_entries (FILE *inf)
4282 C_entries (YACC, inf);
4286 /* Useful macros. */
4287 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4288 while (perhaps_more_input (file_pointer) \
4289 && (readline (&(line_buffer), file_pointer), \
4290 (char_pointer) = (line_buffer).buffer, \
4291 true)) \
4293 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4294 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4295 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4296 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
4297 && ((cp) = skip_spaces ((cp) + sizeof (kw) - 1), true)) /* skip spaces */
4299 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4300 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4301 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4302 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4303 && ((cp) += sizeof (kw) - 1, true)) /* skip spaces */
4306 * Read a file, but do no processing. This is used to do regexp
4307 * matching on files that have no language defined.
4309 static void
4310 just_read_file (FILE *inf)
4312 while (perhaps_more_input (inf))
4313 readline (&lb, inf);
4317 /* Fortran parsing */
4319 static void F_takeprec (void);
4320 static void F_getit (FILE *);
4322 static void
4323 F_takeprec (void)
4325 dbp = skip_spaces (dbp);
4326 if (*dbp != '*')
4327 return;
4328 dbp++;
4329 dbp = skip_spaces (dbp);
4330 if (strneq (dbp, "(*)", 3))
4332 dbp += 3;
4333 return;
4335 if (!c_isdigit (*dbp))
4337 --dbp; /* force failure */
4338 return;
4341 dbp++;
4342 while (c_isdigit (*dbp));
4345 static void
4346 F_getit (FILE *inf)
4348 register char *cp;
4350 dbp = skip_spaces (dbp);
4351 if (*dbp == '\0')
4353 readline (&lb, inf);
4354 dbp = lb.buffer;
4355 if (dbp[5] != '&')
4356 return;
4357 dbp += 6;
4358 dbp = skip_spaces (dbp);
4360 if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4361 return;
4362 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4363 continue;
4364 make_tag (dbp, cp-dbp, true,
4365 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4369 static void
4370 Fortran_functions (FILE *inf)
4372 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4374 if (*dbp == '%')
4375 dbp++; /* Ratfor escape to fortran */
4376 dbp = skip_spaces (dbp);
4377 if (*dbp == '\0')
4378 continue;
4380 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4381 dbp = skip_spaces (dbp);
4383 if (LOOKING_AT_NOCASE (dbp, "pure"))
4384 dbp = skip_spaces (dbp);
4386 if (LOOKING_AT_NOCASE (dbp, "elemental"))
4387 dbp = skip_spaces (dbp);
4389 switch (c_tolower (*dbp))
4391 case 'i':
4392 if (nocase_tail ("integer"))
4393 F_takeprec ();
4394 break;
4395 case 'r':
4396 if (nocase_tail ("real"))
4397 F_takeprec ();
4398 break;
4399 case 'l':
4400 if (nocase_tail ("logical"))
4401 F_takeprec ();
4402 break;
4403 case 'c':
4404 if (nocase_tail ("complex") || nocase_tail ("character"))
4405 F_takeprec ();
4406 break;
4407 case 'd':
4408 if (nocase_tail ("double"))
4410 dbp = skip_spaces (dbp);
4411 if (*dbp == '\0')
4412 continue;
4413 if (nocase_tail ("precision"))
4414 break;
4415 continue;
4417 break;
4419 dbp = skip_spaces (dbp);
4420 if (*dbp == '\0')
4421 continue;
4422 switch (c_tolower (*dbp))
4424 case 'f':
4425 if (nocase_tail ("function"))
4426 F_getit (inf);
4427 continue;
4428 case 's':
4429 if (nocase_tail ("subroutine"))
4430 F_getit (inf);
4431 continue;
4432 case 'e':
4433 if (nocase_tail ("entry"))
4434 F_getit (inf);
4435 continue;
4436 case 'b':
4437 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4439 dbp = skip_spaces (dbp);
4440 if (*dbp == '\0') /* assume un-named */
4441 make_tag ("blockdata", 9, true,
4442 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4443 else
4444 F_getit (inf); /* look for name */
4446 continue;
4453 * Go language support
4454 * Original code by Xi Lu <lx@shellcodes.org> (2016)
4456 static void
4457 Go_functions(FILE *inf)
4459 char *cp, *name;
4461 LOOP_ON_INPUT_LINES(inf, lb, cp)
4463 cp = skip_spaces (cp);
4465 if (LOOKING_AT (cp, "package"))
4467 name = cp;
4468 while (!notinname (*cp) && *cp != '\0')
4469 cp++;
4470 make_tag (name, cp - name, false, lb.buffer,
4471 cp - lb.buffer + 1, lineno, linecharno);
4473 else if (LOOKING_AT (cp, "func"))
4475 /* Go implementation of interface, such as:
4476 func (n *Integer) Add(m Integer) ...
4477 skip `(n *Integer)` part.
4479 if (*cp == '(')
4481 while (*cp != ')')
4482 cp++;
4483 cp = skip_spaces (cp+1);
4486 if (*cp)
4488 name = cp;
4490 while (!notinname (*cp))
4491 cp++;
4493 make_tag (name, cp - name, true, lb.buffer,
4494 cp - lb.buffer + 1, lineno, linecharno);
4497 else if (members && LOOKING_AT (cp, "type"))
4499 name = cp;
4501 /* Ignore the likes of the following:
4502 type (
4506 if (*cp == '(')
4507 return;
4509 while (!notinname (*cp) && *cp != '\0')
4510 cp++;
4512 make_tag (name, cp - name, false, lb.buffer,
4513 cp - lb.buffer + 1, lineno, linecharno);
4520 * Ada parsing
4521 * Original code by
4522 * Philippe Waroquiers (1998)
4525 /* Once we are positioned after an "interesting" keyword, let's get
4526 the real tag value necessary. */
4527 static void
4528 Ada_getit (FILE *inf, const char *name_qualifier)
4530 register char *cp;
4531 char *name;
4532 char c;
4534 while (perhaps_more_input (inf))
4536 dbp = skip_spaces (dbp);
4537 if (*dbp == '\0'
4538 || (dbp[0] == '-' && dbp[1] == '-'))
4540 readline (&lb, inf);
4541 dbp = lb.buffer;
4543 switch (c_tolower (*dbp))
4545 case 'b':
4546 if (nocase_tail ("body"))
4548 /* Skipping body of procedure body or package body or ....
4549 resetting qualifier to body instead of spec. */
4550 name_qualifier = "/b";
4551 continue;
4553 break;
4554 case 't':
4555 /* Skipping type of task type or protected type ... */
4556 if (nocase_tail ("type"))
4557 continue;
4558 break;
4560 if (*dbp == '"')
4562 dbp += 1;
4563 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4564 continue;
4566 else
4568 dbp = skip_spaces (dbp);
4569 for (cp = dbp;
4570 c_isalnum (*cp) || *cp == '_' || *cp == '.';
4571 cp++)
4572 continue;
4573 if (cp == dbp)
4574 return;
4576 c = *cp;
4577 *cp = '\0';
4578 name = concat (dbp, name_qualifier, "");
4579 *cp = c;
4580 make_tag (name, strlen (name), true,
4581 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4582 free (name);
4583 if (c == '"')
4584 dbp = cp + 1;
4585 return;
4589 static void
4590 Ada_funcs (FILE *inf)
4592 bool inquote = false;
4593 bool skip_till_semicolumn = false;
4595 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4597 while (*dbp != '\0')
4599 /* Skip a string i.e. "abcd". */
4600 if (inquote || (*dbp == '"'))
4602 dbp = strchr (dbp + !inquote, '"');
4603 if (dbp != NULL)
4605 inquote = false;
4606 dbp += 1;
4607 continue; /* advance char */
4609 else
4611 inquote = true;
4612 break; /* advance line */
4616 /* Skip comments. */
4617 if (dbp[0] == '-' && dbp[1] == '-')
4618 break; /* advance line */
4620 /* Skip character enclosed in single quote i.e. 'a'
4621 and skip single quote starting an attribute i.e. 'Image. */
4622 if (*dbp == '\'')
4624 dbp++ ;
4625 if (*dbp != '\0')
4626 dbp++;
4627 continue;
4630 if (skip_till_semicolumn)
4632 if (*dbp == ';')
4633 skip_till_semicolumn = false;
4634 dbp++;
4635 continue; /* advance char */
4638 /* Search for beginning of a token. */
4639 if (!begtoken (*dbp))
4641 dbp++;
4642 continue; /* advance char */
4645 /* We are at the beginning of a token. */
4646 switch (c_tolower (*dbp))
4648 case 'f':
4649 if (!packages_only && nocase_tail ("function"))
4650 Ada_getit (inf, "/f");
4651 else
4652 break; /* from switch */
4653 continue; /* advance char */
4654 case 'p':
4655 if (!packages_only && nocase_tail ("procedure"))
4656 Ada_getit (inf, "/p");
4657 else if (nocase_tail ("package"))
4658 Ada_getit (inf, "/s");
4659 else if (nocase_tail ("protected")) /* protected type */
4660 Ada_getit (inf, "/t");
4661 else
4662 break; /* from switch */
4663 continue; /* advance char */
4665 case 'u':
4666 if (typedefs && !packages_only && nocase_tail ("use"))
4668 /* when tagging types, avoid tagging use type Pack.Typename;
4669 for this, we will skip everything till a ; */
4670 skip_till_semicolumn = true;
4671 continue; /* advance char */
4674 case 't':
4675 if (!packages_only && nocase_tail ("task"))
4676 Ada_getit (inf, "/k");
4677 else if (typedefs && !packages_only && nocase_tail ("type"))
4679 Ada_getit (inf, "/t");
4680 while (*dbp != '\0')
4681 dbp += 1;
4683 else
4684 break; /* from switch */
4685 continue; /* advance char */
4688 /* Look for the end of the token. */
4689 while (!endtoken (*dbp))
4690 dbp++;
4692 } /* advance char */
4693 } /* advance line */
4698 * Unix and microcontroller assembly tag handling
4699 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4700 * Idea by Bob Weiner, Motorola Inc. (1994)
4702 static void
4703 Asm_labels (FILE *inf)
4705 register char *cp;
4707 LOOP_ON_INPUT_LINES (inf, lb, cp)
4709 /* If first char is alphabetic or one of [_.$], test for colon
4710 following identifier. */
4711 if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4713 /* Read past label. */
4714 cp++;
4715 while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4716 cp++;
4717 if (*cp == ':' || c_isspace (*cp))
4718 /* Found end of label, so copy it and add it to the table. */
4719 make_tag (lb.buffer, cp - lb.buffer, true,
4720 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4727 * Perl support
4728 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4729 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4730 * Perl variable names: /^(my|local).../
4731 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4732 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4733 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4735 static void
4736 Perl_functions (FILE *inf)
4738 char *package = savestr ("main"); /* current package name */
4739 register char *cp;
4741 LOOP_ON_INPUT_LINES (inf, lb, cp)
4743 cp = skip_spaces (cp);
4745 if (LOOKING_AT (cp, "package"))
4747 free (package);
4748 get_tag (cp, &package);
4750 else if (LOOKING_AT (cp, "sub"))
4752 char *pos, *sp;
4754 subr:
4755 sp = cp;
4756 while (!notinname (*cp))
4757 cp++;
4758 if (cp == sp)
4759 continue; /* nothing found */
4760 pos = strchr (sp, ':');
4761 if (pos && pos < cp && pos[1] == ':')
4763 /* The name is already qualified. */
4764 if (!class_qualify)
4766 char *q = pos + 2, *qpos;
4767 while ((qpos = strchr (q, ':')) != NULL
4768 && qpos < cp
4769 && qpos[1] == ':')
4770 q = qpos + 2;
4771 sp = q;
4773 make_tag (sp, cp - sp, true,
4774 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4776 else if (class_qualify)
4777 /* Qualify it. */
4779 char savechar, *name;
4781 savechar = *cp;
4782 *cp = '\0';
4783 name = concat (package, "::", sp);
4784 *cp = savechar;
4785 make_tag (name, strlen (name), true,
4786 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4787 free (name);
4789 else
4790 make_tag (sp, cp - sp, true,
4791 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4793 else if (LOOKING_AT (cp, "use constant")
4794 || LOOKING_AT (cp, "use constant::defer"))
4796 /* For hash style multi-constant like
4797 use constant { FOO => 123,
4798 BAR => 456 };
4799 only the first FOO is picked up. Parsing across the value
4800 expressions would be difficult in general, due to possible nested
4801 hashes, here-documents, etc. */
4802 if (*cp == '{')
4803 cp = skip_spaces (cp+1);
4804 goto subr;
4806 else if (globals) /* only if we are tagging global vars */
4808 /* Skip a qualifier, if any. */
4809 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4810 /* After "my" or "local", but before any following paren or space. */
4811 char *varstart = cp;
4813 if (qual /* should this be removed? If yes, how? */
4814 && (*cp == '$' || *cp == '@' || *cp == '%'))
4816 varstart += 1;
4818 cp++;
4819 while (c_isalnum (*cp) || *cp == '_');
4821 else if (qual)
4823 /* Should be examining a variable list at this point;
4824 could insist on seeing an open parenthesis. */
4825 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4826 cp++;
4828 else
4829 continue;
4831 make_tag (varstart, cp - varstart, false,
4832 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4835 free (package);
4840 * Python support
4841 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4842 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4843 * More ideas by seb bacon <seb@jamkit.com> (2002)
4845 static void
4846 Python_functions (FILE *inf)
4848 register char *cp;
4850 LOOP_ON_INPUT_LINES (inf, lb, cp)
4852 cp = skip_spaces (cp);
4853 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4855 char *name = cp;
4856 while (!notinname (*cp) && *cp != ':')
4857 cp++;
4858 make_tag (name, cp - name, true,
4859 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4865 * Ruby support
4866 * Original code by Xi Lu <lx@shellcodes.org> (2015)
4868 static void
4869 Ruby_functions (FILE *inf)
4871 char *cp = NULL;
4872 bool reader = false, writer = false, alias = false, continuation = false;
4874 LOOP_ON_INPUT_LINES (inf, lb, cp)
4876 bool is_class = false;
4877 bool is_method = false;
4878 char *name;
4880 cp = skip_spaces (cp);
4881 if (!continuation
4882 /* Constants. */
4883 && c_isalpha (*cp) && c_isupper (*cp))
4885 char *bp, *colon = NULL;
4887 name = cp;
4889 for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
4891 if (*cp == ':')
4892 colon = cp;
4894 if (cp > name + 1)
4896 bp = skip_spaces (cp);
4897 if (*bp == '=' && !(bp[1] == '=' || bp[1] == '>'))
4899 if (colon && !c_isspace (colon[1]))
4900 name = colon + 1;
4901 make_tag (name, cp - name, false,
4902 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4906 else if (!continuation
4907 /* Modules, classes, methods. */
4908 && ((is_method = LOOKING_AT (cp, "def"))
4909 || (is_class = LOOKING_AT (cp, "class"))
4910 || LOOKING_AT (cp, "module")))
4912 const char self_name[] = "self.";
4913 const size_t self_size1 = sizeof (self_name) - 1;
4915 name = cp;
4917 /* Ruby method names can end in a '='. Also, operator overloading can
4918 define operators whose names include '='. */
4919 while (!notinname (*cp) || *cp == '=')
4920 cp++;
4922 /* Remove "self." from the method name. */
4923 if (cp - name > self_size1
4924 && strneq (name, self_name, self_size1))
4925 name += self_size1;
4927 /* Remove the class/module qualifiers from method names. */
4928 if (is_method)
4930 char *q;
4932 for (q = name; q < cp && *q != '.'; q++)
4934 if (q < cp - 1) /* punt if we see just "FOO." */
4935 name = q + 1;
4938 /* Don't tag singleton classes. */
4939 if (is_class && strneq (name, "<<", 2) && cp == name + 2)
4940 continue;
4942 make_tag (name, cp - name, true,
4943 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4945 else
4947 /* Tag accessors and aliases. */
4949 if (!continuation)
4950 reader = writer = alias = false;
4952 while (*cp && *cp != '#')
4954 if (!continuation)
4956 reader = writer = alias = false;
4957 if (LOOKING_AT (cp, "attr_reader"))
4958 reader = true;
4959 else if (LOOKING_AT (cp, "attr_writer"))
4960 writer = true;
4961 else if (LOOKING_AT (cp, "attr_accessor"))
4963 reader = true;
4964 writer = true;
4966 else if (LOOKING_AT (cp, "alias_method"))
4967 alias = true;
4969 if (reader || writer || alias)
4971 do {
4972 char *np;
4974 cp = skip_spaces (cp);
4975 if (*cp == '(')
4976 cp = skip_spaces (cp + 1);
4977 np = cp;
4978 cp = skip_name (cp);
4979 if (*np != ':')
4980 continue;
4981 np++;
4982 if (reader)
4984 make_tag (np, cp - np, true,
4985 lb.buffer, cp - lb.buffer + 1,
4986 lineno, linecharno);
4987 continuation = false;
4989 if (writer)
4991 size_t name_len = cp - np + 1;
4992 char *wr_name = xnew (name_len + 1, char);
4994 memcpy (wr_name, np, name_len - 1);
4995 memcpy (wr_name + name_len - 1, "=", 2);
4996 pfnote (wr_name, true, lb.buffer, cp - lb.buffer + 1,
4997 lineno, linecharno);
4998 if (debug)
4999 fprintf (stderr, "%s on %s:%d: %s\n", wr_name,
5000 curfdp->taggedfname, lineno, lb.buffer);
5001 continuation = false;
5003 if (alias)
5005 if (!continuation)
5006 make_tag (np, cp - np, true,
5007 lb.buffer, cp - lb.buffer + 1,
5008 lineno, linecharno);
5009 continuation = false;
5010 while (*cp && *cp != '#' && *cp != ';')
5012 if (*cp == ',')
5013 continuation = true;
5014 else if (!c_isspace (*cp))
5015 continuation = false;
5016 cp++;
5018 if (*cp == ';')
5019 continuation = false;
5021 cp = skip_spaces (cp);
5022 } while ((alias
5023 ? (*cp == ',')
5024 : (continuation = (*cp == ',')))
5025 && (cp = skip_spaces (cp + 1), *cp && *cp != '#'));
5027 if (*cp != '#')
5028 cp = skip_name (cp);
5029 while (*cp && *cp != '#' && notinname (*cp))
5030 cp++;
5038 * PHP support
5039 * Look for:
5040 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
5041 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
5042 * - /^[ \t]*define\(\"[^\"]+/
5043 * Only with --members:
5044 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
5045 * Idea by Diez B. Roggisch (2001)
5047 static void
5048 PHP_functions (FILE *inf)
5050 char *cp, *name;
5051 bool search_identifier = false;
5053 LOOP_ON_INPUT_LINES (inf, lb, cp)
5055 cp = skip_spaces (cp);
5056 name = cp;
5057 if (search_identifier
5058 && *cp != '\0')
5060 while (!notinname (*cp))
5061 cp++;
5062 make_tag (name, cp - name, true,
5063 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5064 search_identifier = false;
5066 else if (LOOKING_AT (cp, "function"))
5068 if (*cp == '&')
5069 cp = skip_spaces (cp+1);
5070 if (*cp != '\0')
5072 name = cp;
5073 while (!notinname (*cp))
5074 cp++;
5075 make_tag (name, cp - name, true,
5076 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5078 else
5079 search_identifier = true;
5081 else if (LOOKING_AT (cp, "class"))
5083 if (*cp != '\0')
5085 name = cp;
5086 while (*cp != '\0' && !c_isspace (*cp))
5087 cp++;
5088 make_tag (name, cp - name, false,
5089 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5091 else
5092 search_identifier = true;
5094 else if (strneq (cp, "define", 6)
5095 && (cp = skip_spaces (cp+6))
5096 && *cp++ == '('
5097 && (*cp == '"' || *cp == '\''))
5099 char quote = *cp++;
5100 name = cp;
5101 while (*cp != quote && *cp != '\0')
5102 cp++;
5103 make_tag (name, cp - name, false,
5104 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5106 else if (members
5107 && LOOKING_AT (cp, "var")
5108 && *cp == '$')
5110 name = cp;
5111 while (!notinname (*cp))
5112 cp++;
5113 make_tag (name, cp - name, false,
5114 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5121 * Cobol tag functions
5122 * We could look for anything that could be a paragraph name.
5123 * i.e. anything that starts in column 8 is one word and ends in a full stop.
5124 * Idea by Corny de Souza (1993)
5126 static void
5127 Cobol_paragraphs (FILE *inf)
5129 register char *bp, *ep;
5131 LOOP_ON_INPUT_LINES (inf, lb, bp)
5133 if (lb.len < 9)
5134 continue;
5135 bp += 8;
5137 /* If eoln, compiler option or comment ignore whole line. */
5138 if (bp[-1] != ' ' || !c_isalnum (bp[0]))
5139 continue;
5141 for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
5142 continue;
5143 if (*ep++ == '.')
5144 make_tag (bp, ep - bp, true,
5145 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5151 * Makefile support
5152 * Ideas by Assar Westerlund <assar@sics.se> (2001)
5154 static void
5155 Makefile_targets (FILE *inf)
5157 register char *bp;
5159 LOOP_ON_INPUT_LINES (inf, lb, bp)
5161 if (*bp == '\t' || *bp == '#')
5162 continue;
5163 while (*bp != '\0' && *bp != '=' && *bp != ':')
5164 bp++;
5165 if (*bp == ':' || (globals && *bp == '='))
5167 /* We should detect if there is more than one tag, but we do not.
5168 We just skip initial and final spaces. */
5169 char * namestart = skip_spaces (lb.buffer);
5170 while (--bp > namestart)
5171 if (!notinname (*bp))
5172 break;
5173 make_tag (namestart, bp - namestart + 1, true,
5174 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
5181 * Pascal parsing
5182 * Original code by Mosur K. Mohan (1989)
5184 * Locates tags for procedures & functions. Doesn't do any type- or
5185 * var-definitions. It does look for the keyword "extern" or
5186 * "forward" immediately following the procedure statement; if found,
5187 * the tag is skipped.
5189 static void
5190 Pascal_functions (FILE *inf)
5192 linebuffer tline; /* mostly copied from C_entries */
5193 long save_lcno;
5194 int save_lineno, namelen, taglen;
5195 char c, *name;
5197 bool /* each of these flags is true if: */
5198 incomment, /* point is inside a comment */
5199 inquote, /* point is inside '..' string */
5200 get_tagname, /* point is after PROCEDURE/FUNCTION
5201 keyword, so next item = potential tag */
5202 found_tag, /* point is after a potential tag */
5203 inparms, /* point is within parameter-list */
5204 verify_tag; /* point has passed the parm-list, so the
5205 next token will determine whether this
5206 is a FORWARD/EXTERN to be ignored, or
5207 whether it is a real tag */
5209 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
5210 name = NULL; /* keep compiler quiet */
5211 dbp = lb.buffer;
5212 *dbp = '\0';
5213 linebuffer_init (&tline);
5215 incomment = inquote = false;
5216 found_tag = false; /* have a proc name; check if extern */
5217 get_tagname = false; /* found "procedure" keyword */
5218 inparms = false; /* found '(' after "proc" */
5219 verify_tag = false; /* check if "extern" is ahead */
5222 while (perhaps_more_input (inf)) /* long main loop to get next char */
5224 c = *dbp++;
5225 if (c == '\0') /* if end of line */
5227 readline (&lb, inf);
5228 dbp = lb.buffer;
5229 if (*dbp == '\0')
5230 continue;
5231 if (!((found_tag && verify_tag)
5232 || get_tagname))
5233 c = *dbp++; /* only if don't need *dbp pointing
5234 to the beginning of the name of
5235 the procedure or function */
5237 if (incomment)
5239 if (c == '}') /* within { } comments */
5240 incomment = false;
5241 else if (c == '*' && *dbp == ')') /* within (* *) comments */
5243 dbp++;
5244 incomment = false;
5246 continue;
5248 else if (inquote)
5250 if (c == '\'')
5251 inquote = false;
5252 continue;
5254 else
5255 switch (c)
5257 case '\'':
5258 inquote = true; /* found first quote */
5259 continue;
5260 case '{': /* found open { comment */
5261 incomment = true;
5262 continue;
5263 case '(':
5264 if (*dbp == '*') /* found open (* comment */
5266 incomment = true;
5267 dbp++;
5269 else if (found_tag) /* found '(' after tag, i.e., parm-list */
5270 inparms = true;
5271 continue;
5272 case ')': /* end of parms list */
5273 if (inparms)
5274 inparms = false;
5275 continue;
5276 case ';':
5277 if (found_tag && !inparms) /* end of proc or fn stmt */
5279 verify_tag = true;
5280 break;
5282 continue;
5284 if (found_tag && verify_tag && (*dbp != ' '))
5286 /* Check if this is an "extern" declaration. */
5287 if (*dbp == '\0')
5288 continue;
5289 if (c_tolower (*dbp) == 'e')
5291 if (nocase_tail ("extern")) /* superfluous, really! */
5293 found_tag = false;
5294 verify_tag = false;
5297 else if (c_tolower (*dbp) == 'f')
5299 if (nocase_tail ("forward")) /* check for forward reference */
5301 found_tag = false;
5302 verify_tag = false;
5305 if (found_tag && verify_tag) /* not external proc, so make tag */
5307 found_tag = false;
5308 verify_tag = false;
5309 make_tag (name, namelen, true,
5310 tline.buffer, taglen, save_lineno, save_lcno);
5311 continue;
5314 if (get_tagname) /* grab name of proc or fn */
5316 char *cp;
5318 if (*dbp == '\0')
5319 continue;
5321 /* Find block name. */
5322 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
5323 continue;
5325 /* Save all values for later tagging. */
5326 linebuffer_setlen (&tline, lb.len);
5327 strcpy (tline.buffer, lb.buffer);
5328 save_lineno = lineno;
5329 save_lcno = linecharno;
5330 name = tline.buffer + (dbp - lb.buffer);
5331 namelen = cp - dbp;
5332 taglen = cp - lb.buffer + 1;
5334 dbp = cp; /* set dbp to e-o-token */
5335 get_tagname = false;
5336 found_tag = true;
5337 continue;
5339 /* And proceed to check for "extern". */
5341 else if (!incomment && !inquote && !found_tag)
5343 /* Check for proc/fn keywords. */
5344 switch (c_tolower (c))
5346 case 'p':
5347 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5348 get_tagname = true;
5349 continue;
5350 case 'f':
5351 if (nocase_tail ("unction"))
5352 get_tagname = true;
5353 continue;
5356 } /* while not eof */
5358 free (tline.buffer);
5363 * Lisp tag functions
5364 * look for (def or (DEF, quote or QUOTE
5367 static void L_getit (void);
5369 static void
5370 L_getit (void)
5372 if (*dbp == '\'') /* Skip prefix quote */
5373 dbp++;
5374 else if (*dbp == '(')
5376 dbp++;
5377 /* Try to skip "(quote " */
5378 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
5379 /* Ok, then skip "(" before name in (defstruct (foo)) */
5380 dbp = skip_spaces (dbp);
5382 get_lispy_tag (dbp);
5385 static void
5386 Lisp_functions (FILE *inf)
5388 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5390 if (dbp[0] != '(')
5391 continue;
5393 /* "(defvar foo)" is a declaration rather than a definition. */
5394 if (! declarations)
5396 char *p = dbp + 1;
5397 if (LOOKING_AT (p, "defvar"))
5399 p = skip_name (p); /* past var name */
5400 p = skip_spaces (p);
5401 if (*p == ')')
5402 continue;
5406 if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
5407 dbp += 3;
5409 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5411 dbp = skip_non_spaces (dbp);
5412 dbp = skip_spaces (dbp);
5413 L_getit ();
5415 else
5417 /* Check for (foo::defmumble name-defined ... */
5419 dbp++;
5420 while (!notinname (*dbp) && *dbp != ':');
5421 if (*dbp == ':')
5424 dbp++;
5425 while (*dbp == ':');
5427 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5429 dbp = skip_non_spaces (dbp);
5430 dbp = skip_spaces (dbp);
5431 L_getit ();
5440 * Lua script language parsing
5441 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5443 * "function" and "local function" are tags if they start at column 1.
5445 static void
5446 Lua_functions (FILE *inf)
5448 register char *bp;
5450 LOOP_ON_INPUT_LINES (inf, lb, bp)
5452 bp = skip_spaces (bp);
5453 if (bp[0] != 'f' && bp[0] != 'l')
5454 continue;
5456 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5458 if (LOOKING_AT (bp, "function"))
5460 char *tag_name, *tp_dot, *tp_colon;
5462 get_tag (bp, &tag_name);
5463 /* If the tag ends with ".foo" or ":foo", make an additional tag for
5464 "foo". */
5465 tp_dot = strrchr (tag_name, '.');
5466 tp_colon = strrchr (tag_name, ':');
5467 if (tp_dot || tp_colon)
5469 char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5470 int len_add = p - tag_name + 1;
5472 get_tag (bp + len_add, NULL);
5480 * PostScript tags
5481 * Just look for lines where the first character is '/'
5482 * Also look at "defineps" for PSWrap
5483 * Ideas by:
5484 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5485 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5487 static void
5488 PS_functions (FILE *inf)
5490 register char *bp, *ep;
5492 LOOP_ON_INPUT_LINES (inf, lb, bp)
5494 if (bp[0] == '/')
5496 for (ep = bp+1;
5497 *ep != '\0' && *ep != ' ' && *ep != '{';
5498 ep++)
5499 continue;
5500 make_tag (bp, ep - bp, true,
5501 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5503 else if (LOOKING_AT (bp, "defineps"))
5504 get_tag (bp, NULL);
5510 * Forth tags
5511 * Ignore anything after \ followed by space or in ( )
5512 * Look for words defined by :
5513 * Look for constant, code, create, defer, value, and variable
5514 * OBP extensions: Look for buffer:, field,
5515 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5517 static void
5518 Forth_words (FILE *inf)
5520 register char *bp;
5522 LOOP_ON_INPUT_LINES (inf, lb, bp)
5523 while ((bp = skip_spaces (bp))[0] != '\0')
5524 if (bp[0] == '\\' && c_isspace (bp[1]))
5525 break; /* read next line */
5526 else if (bp[0] == '(' && c_isspace (bp[1]))
5527 do /* skip to ) or eol */
5528 bp++;
5529 while (*bp != ')' && *bp != '\0');
5530 else if (((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5531 || LOOKING_AT_NOCASE (bp, "constant")
5532 || LOOKING_AT_NOCASE (bp, "2constant")
5533 || LOOKING_AT_NOCASE (bp, "fconstant")
5534 || LOOKING_AT_NOCASE (bp, "code")
5535 || LOOKING_AT_NOCASE (bp, "create")
5536 || LOOKING_AT_NOCASE (bp, "defer")
5537 || LOOKING_AT_NOCASE (bp, "value")
5538 || LOOKING_AT_NOCASE (bp, "2value")
5539 || LOOKING_AT_NOCASE (bp, "fvalue")
5540 || LOOKING_AT_NOCASE (bp, "variable")
5541 || LOOKING_AT_NOCASE (bp, "2variable")
5542 || LOOKING_AT_NOCASE (bp, "fvariable")
5543 || LOOKING_AT_NOCASE (bp, "buffer:")
5544 || LOOKING_AT_NOCASE (bp, "field:")
5545 || LOOKING_AT_NOCASE (bp, "+field")
5546 || LOOKING_AT_NOCASE (bp, "field") /* not standard? */
5547 || LOOKING_AT_NOCASE (bp, "begin-structure")
5548 || LOOKING_AT_NOCASE (bp, "synonym")
5550 && c_isspace (bp[0]))
5552 /* Yay! A definition! */
5553 char* name_start = skip_spaces (bp);
5554 char* name_end = skip_non_spaces (name_start);
5555 if (name_start < name_end)
5556 make_tag (name_start, name_end - name_start,
5557 true, lb.buffer, name_end - lb.buffer,
5558 lineno, linecharno);
5559 bp = name_end;
5561 else
5562 bp = skip_non_spaces (bp);
5567 * Scheme tag functions
5568 * look for (def... xyzzy
5569 * (def... (xyzzy
5570 * (def ... ((...(xyzzy ....
5571 * (set! xyzzy
5572 * Original code by Ken Haase (1985?)
5574 static void
5575 Scheme_functions (FILE *inf)
5577 register char *bp;
5579 LOOP_ON_INPUT_LINES (inf, lb, bp)
5581 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5583 bp = skip_non_spaces (bp+4);
5584 /* Skip over open parens and white space.
5585 Don't continue past '\0' or '='. */
5586 while (*bp && notinname (*bp) && *bp != '=')
5587 bp++;
5588 get_lispy_tag (bp);
5590 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5591 get_lispy_tag (bp);
5596 /* Find tags in TeX and LaTeX input files. */
5598 /* TEX_toktab is a table of TeX control sequences that define tags.
5599 * Each entry records one such control sequence.
5601 * Original code from who knows whom.
5602 * Ideas by:
5603 * Stefan Monnier (2002)
5606 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5608 /* Default set of control sequences to put into TEX_toktab.
5609 The value of environment var TEXTAGS is prepended to this. */
5610 static const char *TEX_defenv = "\
5611 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5612 :part:appendix:entry:index:def\
5613 :newcommand:renewcommand:newenvironment:renewenvironment";
5615 static void TEX_decode_env (const char *, const char *);
5618 * TeX/LaTeX scanning loop.
5620 static void
5621 TeX_commands (FILE *inf)
5623 char *cp;
5624 linebuffer *key;
5626 char TEX_esc = '\0';
5627 char TEX_opgrp UNINIT, TEX_clgrp UNINIT;
5629 /* Initialize token table once from environment. */
5630 if (TEX_toktab == NULL)
5631 TEX_decode_env ("TEXTAGS", TEX_defenv);
5633 LOOP_ON_INPUT_LINES (inf, lb, cp)
5635 /* Look at each TEX keyword in line. */
5636 for (;;)
5638 /* Look for a TEX escape. */
5639 while (true)
5641 char c = *cp++;
5642 if (c == '\0' || c == '%')
5643 goto tex_next_line;
5645 /* Select either \ or ! as escape character, whichever comes
5646 first outside a comment. */
5647 if (!TEX_esc)
5648 switch (c)
5650 case '\\':
5651 TEX_esc = c;
5652 TEX_opgrp = '{';
5653 TEX_clgrp = '}';
5654 break;
5656 case '!':
5657 TEX_esc = c;
5658 TEX_opgrp = '<';
5659 TEX_clgrp = '>';
5660 break;
5663 if (c == TEX_esc)
5664 break;
5667 for (key = TEX_toktab; key->buffer != NULL; key++)
5668 if (strneq (cp, key->buffer, key->len))
5670 char *p;
5671 int namelen, linelen;
5672 bool opgrp = false;
5674 cp = skip_spaces (cp + key->len);
5675 if (*cp == TEX_opgrp)
5677 opgrp = true;
5678 cp++;
5680 for (p = cp;
5681 (!c_isspace (*p) && *p != '#' &&
5682 *p != TEX_opgrp && *p != TEX_clgrp);
5683 p++)
5684 continue;
5685 namelen = p - cp;
5686 linelen = lb.len;
5687 if (!opgrp || *p == TEX_clgrp)
5689 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5690 p++;
5691 linelen = p - lb.buffer + 1;
5693 make_tag (cp, namelen, true,
5694 lb.buffer, linelen, lineno, linecharno);
5695 goto tex_next_line; /* We only tag a line once */
5698 tex_next_line:
5703 /* Read environment and prepend it to the default string.
5704 Build token table. */
5705 static void
5706 TEX_decode_env (const char *evarname, const char *defenv)
5708 register const char *env, *p;
5709 int i, len;
5711 /* Append default string to environment. */
5712 env = getenv (evarname);
5713 if (!env)
5714 env = defenv;
5715 else
5716 env = concat (env, defenv, "");
5718 /* Allocate a token table */
5719 for (len = 1, p = env; (p = strchr (p, ':')); )
5720 if (*++p)
5721 len++;
5722 TEX_toktab = xnew (len, linebuffer);
5724 /* Unpack environment string into token table. Be careful about */
5725 /* zero-length strings (leading ':', "::" and trailing ':') */
5726 for (i = 0; *env != '\0';)
5728 p = strchr (env, ':');
5729 if (!p) /* End of environment string. */
5730 p = env + strlen (env);
5731 if (p - env > 0)
5732 { /* Only non-zero strings. */
5733 TEX_toktab[i].buffer = savenstr (env, p - env);
5734 TEX_toktab[i].len = p - env;
5735 i++;
5737 if (*p)
5738 env = p + 1;
5739 else
5741 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5742 TEX_toktab[i].len = 0;
5743 break;
5749 /* Texinfo support. Dave Love, Mar. 2000. */
5750 static void
5751 Texinfo_nodes (FILE *inf)
5753 char *cp, *start;
5754 LOOP_ON_INPUT_LINES (inf, lb, cp)
5755 if (LOOKING_AT (cp, "@node"))
5757 start = cp;
5758 while (*cp != '\0' && *cp != ',')
5759 cp++;
5760 make_tag (start, cp - start, true,
5761 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5767 * HTML support.
5768 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5769 * Contents of <a name=xxx> are tags with name xxx.
5771 * Francesco Potortì, 2002.
5773 static void
5774 HTML_labels (FILE *inf)
5776 bool getnext = false; /* next text outside of HTML tags is a tag */
5777 bool skiptag = false; /* skip to the end of the current HTML tag */
5778 bool intag = false; /* inside an html tag, looking for ID= */
5779 bool inanchor = false; /* when INTAG, is an anchor, look for NAME= */
5780 char *end;
5783 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5785 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5786 for (;;) /* loop on the same line */
5788 if (skiptag) /* skip HTML tag */
5790 while (*dbp != '\0' && *dbp != '>')
5791 dbp++;
5792 if (*dbp == '>')
5794 dbp += 1;
5795 skiptag = false;
5796 continue; /* look on the same line */
5798 break; /* go to next line */
5801 else if (intag) /* look for "name=" or "id=" */
5803 while (*dbp != '\0' && *dbp != '>'
5804 && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5805 dbp++;
5806 if (*dbp == '\0')
5807 break; /* go to next line */
5808 if (*dbp == '>')
5810 dbp += 1;
5811 intag = false;
5812 continue; /* look on the same line */
5814 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5815 || LOOKING_AT_NOCASE (dbp, "id="))
5817 bool quoted = (dbp[0] == '"');
5819 if (quoted)
5820 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5821 continue;
5822 else
5823 for (end = dbp; *end != '\0' && intoken (*end); end++)
5824 continue;
5825 linebuffer_setlen (&token_name, end - dbp);
5826 memcpy (token_name.buffer, dbp, end - dbp);
5827 token_name.buffer[end - dbp] = '\0';
5829 dbp = end;
5830 intag = false; /* we found what we looked for */
5831 skiptag = true; /* skip to the end of the tag */
5832 getnext = true; /* then grab the text */
5833 continue; /* look on the same line */
5835 dbp += 1;
5838 else if (getnext) /* grab next tokens and tag them */
5840 dbp = skip_spaces (dbp);
5841 if (*dbp == '\0')
5842 break; /* go to next line */
5843 if (*dbp == '<')
5845 intag = true;
5846 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5847 continue; /* look on the same line */
5850 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5851 continue;
5852 make_tag (token_name.buffer, token_name.len, true,
5853 dbp, end - dbp, lineno, linecharno);
5854 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5855 getnext = false;
5856 break; /* go to next line */
5859 else /* look for an interesting HTML tag */
5861 while (*dbp != '\0' && *dbp != '<')
5862 dbp++;
5863 if (*dbp == '\0')
5864 break; /* go to next line */
5865 intag = true;
5866 if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5868 inanchor = true;
5869 continue; /* look on the same line */
5871 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5872 || LOOKING_AT_NOCASE (dbp, "<h1>")
5873 || LOOKING_AT_NOCASE (dbp, "<h2>")
5874 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5876 intag = false;
5877 getnext = true;
5878 continue; /* look on the same line */
5880 dbp += 1;
5887 * Prolog support
5889 * Assumes that the predicate or rule starts at column 0.
5890 * Only the first clause of a predicate or rule is added.
5891 * Original code by Sunichirou Sugou (1989)
5892 * Rewritten by Anders Lindgren (1996)
5894 static size_t prolog_pr (char *, char *);
5895 static void prolog_skip_comment (linebuffer *, FILE *);
5896 static size_t prolog_atom (char *, size_t);
5898 static void
5899 Prolog_functions (FILE *inf)
5901 char *cp, *last;
5902 size_t len;
5903 size_t allocated;
5905 allocated = 0;
5906 len = 0;
5907 last = NULL;
5909 LOOP_ON_INPUT_LINES (inf, lb, cp)
5911 if (cp[0] == '\0') /* Empty line */
5912 continue;
5913 else if (c_isspace (cp[0])) /* Not a predicate */
5914 continue;
5915 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5916 prolog_skip_comment (&lb, inf);
5917 else if ((len = prolog_pr (cp, last)) > 0)
5919 /* Predicate or rule. Store the function name so that we
5920 only generate a tag for the first clause. */
5921 if (last == NULL)
5922 last = xnew (len + 1, char);
5923 else if (len + 1 > allocated)
5924 xrnew (last, len + 1, char);
5925 allocated = len + 1;
5926 memcpy (last, cp, len);
5927 last[len] = '\0';
5930 free (last);
5934 static void
5935 prolog_skip_comment (linebuffer *plb, FILE *inf)
5937 char *cp;
5941 for (cp = plb->buffer; *cp != '\0'; cp++)
5942 if (cp[0] == '*' && cp[1] == '/')
5943 return;
5944 readline (plb, inf);
5946 while (perhaps_more_input (inf));
5950 * A predicate or rule definition is added if it matches:
5951 * <beginning of line><Prolog Atom><whitespace>(
5952 * or <beginning of line><Prolog Atom><whitespace>:-
5954 * It is added to the tags database if it doesn't match the
5955 * name of the previous clause header.
5957 * Return the size of the name of the predicate or rule, or 0 if no
5958 * header was found.
5960 static size_t
5961 prolog_pr (char *s, char *last)
5963 /* Name of last clause. */
5965 size_t pos;
5966 size_t len;
5968 pos = prolog_atom (s, 0);
5969 if (! pos)
5970 return 0;
5972 len = pos;
5973 pos = skip_spaces (s + pos) - s;
5975 if ((s[pos] == '.'
5976 || (s[pos] == '(' && (pos += 1))
5977 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5978 && (last == NULL /* save only the first clause */
5979 || len != strlen (last)
5980 || !strneq (s, last, len)))
5982 make_tag (s, len, true, s, pos, lineno, linecharno);
5983 return len;
5985 else
5986 return 0;
5990 * Consume a Prolog atom.
5991 * Return the number of bytes consumed, or 0 if there was an error.
5993 * A prolog atom, in this context, could be one of:
5994 * - An alphanumeric sequence, starting with a lower case letter.
5995 * - A quoted arbitrary string. Single quotes can escape themselves.
5996 * Backslash quotes everything.
5998 static size_t
5999 prolog_atom (char *s, size_t pos)
6001 size_t origpos;
6003 origpos = pos;
6005 if (c_islower (s[pos]) || s[pos] == '_')
6007 /* The atom is unquoted. */
6008 pos++;
6009 while (c_isalnum (s[pos]) || s[pos] == '_')
6011 pos++;
6013 return pos - origpos;
6015 else if (s[pos] == '\'')
6017 pos++;
6019 for (;;)
6021 if (s[pos] == '\'')
6023 pos++;
6024 if (s[pos] != '\'')
6025 break;
6026 pos++; /* A double quote */
6028 else if (s[pos] == '\0')
6029 /* Multiline quoted atoms are ignored. */
6030 return 0;
6031 else if (s[pos] == '\\')
6033 if (s[pos+1] == '\0')
6034 return 0;
6035 pos += 2;
6037 else
6038 pos++;
6040 return pos - origpos;
6042 else
6043 return 0;
6048 * Support for Erlang
6050 * Generates tags for functions, defines, and records.
6051 * Assumes that Erlang functions start at column 0.
6052 * Original code by Anders Lindgren (1996)
6054 static int erlang_func (char *, char *);
6055 static void erlang_attribute (char *);
6056 static int erlang_atom (char *);
6058 static void
6059 Erlang_functions (FILE *inf)
6061 char *cp, *last;
6062 int len;
6063 int allocated;
6065 allocated = 0;
6066 len = 0;
6067 last = NULL;
6069 LOOP_ON_INPUT_LINES (inf, lb, cp)
6071 if (cp[0] == '\0') /* Empty line */
6072 continue;
6073 else if (c_isspace (cp[0])) /* Not function nor attribute */
6074 continue;
6075 else if (cp[0] == '%') /* comment */
6076 continue;
6077 else if (cp[0] == '"') /* Sometimes, strings start in column one */
6078 continue;
6079 else if (cp[0] == '-') /* attribute, e.g. "-define" */
6081 erlang_attribute (cp);
6082 if (last != NULL)
6084 free (last);
6085 last = NULL;
6088 else if ((len = erlang_func (cp, last)) > 0)
6091 * Function. Store the function name so that we only
6092 * generates a tag for the first clause.
6094 if (last == NULL)
6095 last = xnew (len + 1, char);
6096 else if (len + 1 > allocated)
6097 xrnew (last, len + 1, char);
6098 allocated = len + 1;
6099 memcpy (last, cp, len);
6100 last[len] = '\0';
6103 free (last);
6108 * A function definition is added if it matches:
6109 * <beginning of line><Erlang Atom><whitespace>(
6111 * It is added to the tags database if it doesn't match the
6112 * name of the previous clause header.
6114 * Return the size of the name of the function, or 0 if no function
6115 * was found.
6117 static int
6118 erlang_func (char *s, char *last)
6120 /* Name of last clause. */
6122 int pos;
6123 int len;
6125 pos = erlang_atom (s);
6126 if (pos < 1)
6127 return 0;
6129 len = pos;
6130 pos = skip_spaces (s + pos) - s;
6132 /* Save only the first clause. */
6133 if (s[pos++] == '('
6134 && (last == NULL
6135 || len != (int)strlen (last)
6136 || !strneq (s, last, len)))
6138 make_tag (s, len, true, s, pos, lineno, linecharno);
6139 return len;
6142 return 0;
6147 * Handle attributes. Currently, tags are generated for defines
6148 * and records.
6150 * They are on the form:
6151 * -define(foo, bar).
6152 * -define(Foo(M, N), M+N).
6153 * -record(graph, {vtab = notable, cyclic = true}).
6155 static void
6156 erlang_attribute (char *s)
6158 char *cp = s;
6160 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
6161 && *cp++ == '(')
6163 int len = erlang_atom (skip_spaces (cp));
6164 if (len > 0)
6165 make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
6167 return;
6172 * Consume an Erlang atom (or variable).
6173 * Return the number of bytes consumed, or -1 if there was an error.
6175 static int
6176 erlang_atom (char *s)
6178 int pos = 0;
6180 if (c_isalpha (s[pos]) || s[pos] == '_')
6182 /* The atom is unquoted. */
6184 pos++;
6185 while (c_isalnum (s[pos]) || s[pos] == '_');
6187 else if (s[pos] == '\'')
6189 for (pos++; s[pos] != '\''; pos++)
6190 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
6191 || (s[pos] == '\\' && s[++pos] == '\0'))
6192 return 0;
6193 pos++;
6196 return pos;
6200 static char *scan_separators (char *);
6201 static void add_regex (char *, language *);
6202 static char *substitute (char *, char *, struct re_registers *);
6205 * Take a string like "/blah/" and turn it into "blah", verifying
6206 * that the first and last characters are the same, and handling
6207 * quoted separator characters. Actually, stops on the occurrence of
6208 * an unquoted separator. Also process \t, \n, etc. and turn into
6209 * appropriate characters. Works in place. Null terminates name string.
6210 * Returns pointer to terminating separator, or NULL for
6211 * unterminated regexps.
6213 static char *
6214 scan_separators (char *name)
6216 char sep = name[0];
6217 char *copyto = name;
6218 bool quoted = false;
6220 for (++name; *name != '\0'; ++name)
6222 if (quoted)
6224 switch (*name)
6226 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
6227 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
6228 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
6229 case 'e': *copyto++ = 033; break; /* ESC (delete) */
6230 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
6231 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
6232 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
6233 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
6234 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
6235 default:
6236 if (*name == sep)
6237 *copyto++ = sep;
6238 else
6240 /* Something else is quoted, so preserve the quote. */
6241 *copyto++ = '\\';
6242 *copyto++ = *name;
6244 break;
6246 quoted = false;
6248 else if (*name == '\\')
6249 quoted = true;
6250 else if (*name == sep)
6251 break;
6252 else
6253 *copyto++ = *name;
6255 if (*name != sep)
6256 name = NULL; /* signal unterminated regexp */
6258 /* Terminate copied string. */
6259 *copyto = '\0';
6260 return name;
6263 /* Look at the argument of --regex or --no-regex and do the right
6264 thing. Same for each line of a regexp file. */
6265 static void
6266 analyze_regex (char *regex_arg)
6268 if (regex_arg == NULL)
6270 free_regexps (); /* --no-regex: remove existing regexps */
6271 return;
6274 /* A real --regexp option or a line in a regexp file. */
6275 switch (regex_arg[0])
6277 /* Comments in regexp file or null arg to --regex. */
6278 case '\0':
6279 case ' ':
6280 case '\t':
6281 break;
6283 /* Read a regex file. This is recursive and may result in a
6284 loop, which will stop when the file descriptors are exhausted. */
6285 case '@':
6287 FILE *regexfp;
6288 linebuffer regexbuf;
6289 char *regexfile = regex_arg + 1;
6291 /* regexfile is a file containing regexps, one per line. */
6292 regexfp = fopen (regexfile, "r" FOPEN_BINARY);
6293 if (regexfp == NULL)
6294 pfatal (regexfile);
6295 linebuffer_init (&regexbuf);
6296 while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
6297 analyze_regex (regexbuf.buffer);
6298 free (regexbuf.buffer);
6299 if (fclose (regexfp) != 0)
6300 pfatal (regexfile);
6302 break;
6304 /* Regexp to be used for a specific language only. */
6305 case '{':
6307 language *lang;
6308 char *lang_name = regex_arg + 1;
6309 char *cp;
6311 for (cp = lang_name; *cp != '}'; cp++)
6312 if (*cp == '\0')
6314 error ("unterminated language name in regex: %s", regex_arg);
6315 return;
6317 *cp++ = '\0';
6318 lang = get_language_from_langname (lang_name);
6319 if (lang == NULL)
6320 return;
6321 add_regex (cp, lang);
6323 break;
6325 /* Regexp to be used for any language. */
6326 default:
6327 add_regex (regex_arg, NULL);
6328 break;
6332 /* Separate the regexp pattern, compile it,
6333 and care for optional name and modifiers. */
6334 static void
6335 add_regex (char *regexp_pattern, language *lang)
6337 static struct re_pattern_buffer zeropattern;
6338 char sep, *pat, *name, *modifiers;
6339 char empty = '\0';
6340 const char *err;
6341 struct re_pattern_buffer *patbuf;
6342 regexp *rp;
6343 bool
6344 force_explicit_name = true, /* do not use implicit tag names */
6345 ignore_case = false, /* case is significant */
6346 multi_line = false, /* matches are done one line at a time */
6347 single_line = false; /* dot does not match newline */
6350 if (strlen (regexp_pattern) < 3)
6352 error ("null regexp");
6353 return;
6355 sep = regexp_pattern[0];
6356 name = scan_separators (regexp_pattern);
6357 if (name == NULL)
6359 error ("%s: unterminated regexp", regexp_pattern);
6360 return;
6362 if (name[1] == sep)
6364 error ("null name for regexp \"%s\"", regexp_pattern);
6365 return;
6367 modifiers = scan_separators (name);
6368 if (modifiers == NULL) /* no terminating separator --> no name */
6370 modifiers = name;
6371 name = &empty;
6373 else
6374 modifiers += 1; /* skip separator */
6376 /* Parse regex modifiers. */
6377 for (; modifiers[0] != '\0'; modifiers++)
6378 switch (modifiers[0])
6380 case 'N':
6381 if (modifiers == name)
6382 error ("forcing explicit tag name but no name, ignoring");
6383 force_explicit_name = true;
6384 break;
6385 case 'i':
6386 ignore_case = true;
6387 break;
6388 case 's':
6389 single_line = true;
6390 FALLTHROUGH;
6391 case 'm':
6392 multi_line = true;
6393 need_filebuf = true;
6394 break;
6395 default:
6396 error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
6397 break;
6400 patbuf = xnew (1, struct re_pattern_buffer);
6401 *patbuf = zeropattern;
6402 if (ignore_case)
6404 static char lc_trans[UCHAR_MAX + 1];
6405 int i;
6406 for (i = 0; i < UCHAR_MAX + 1; i++)
6407 lc_trans[i] = c_tolower (i);
6408 patbuf->translate = lc_trans; /* translation table to fold case */
6411 if (multi_line)
6412 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6413 else
6414 pat = regexp_pattern;
6416 if (single_line)
6417 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6418 else
6419 re_set_syntax (RE_SYNTAX_EMACS);
6421 err = re_compile_pattern (pat, strlen (pat), patbuf);
6422 if (multi_line)
6423 free (pat);
6424 if (err != NULL)
6426 error ("%s while compiling pattern", err);
6427 return;
6430 rp = p_head;
6431 p_head = xnew (1, regexp);
6432 p_head->pattern = savestr (regexp_pattern);
6433 p_head->p_next = rp;
6434 p_head->lang = lang;
6435 p_head->pat = patbuf;
6436 p_head->name = savestr (name);
6437 p_head->error_signaled = false;
6438 p_head->force_explicit_name = force_explicit_name;
6439 p_head->ignore_case = ignore_case;
6440 p_head->multi_line = multi_line;
6444 * Do the substitutions indicated by the regular expression and
6445 * arguments.
6447 static char *
6448 substitute (char *in, char *out, struct re_registers *regs)
6450 char *result, *t;
6451 int size, dig, diglen;
6453 result = NULL;
6454 size = strlen (out);
6456 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6457 if (out[size - 1] == '\\')
6458 fatal ("pattern error in \"%s\"", out);
6459 for (t = strchr (out, '\\');
6460 t != NULL;
6461 t = strchr (t + 2, '\\'))
6462 if (c_isdigit (t[1]))
6464 dig = t[1] - '0';
6465 diglen = regs->end[dig] - regs->start[dig];
6466 size += diglen - 2;
6468 else
6469 size -= 1;
6471 /* Allocate space and do the substitutions. */
6472 assert (size >= 0);
6473 result = xnew (size + 1, char);
6475 for (t = result; *out != '\0'; out++)
6476 if (*out == '\\' && c_isdigit (*++out))
6478 dig = *out - '0';
6479 diglen = regs->end[dig] - regs->start[dig];
6480 memcpy (t, in + regs->start[dig], diglen);
6481 t += diglen;
6483 else
6484 *t++ = *out;
6485 *t = '\0';
6487 assert (t <= result + size);
6488 assert (t - result == (int)strlen (result));
6490 return result;
6493 /* Deallocate all regexps. */
6494 static void
6495 free_regexps (void)
6497 regexp *rp;
6498 while (p_head != NULL)
6500 rp = p_head->p_next;
6501 free (p_head->pattern);
6502 free (p_head->name);
6503 free (p_head);
6504 p_head = rp;
6506 return;
6510 * Reads the whole file as a single string from `filebuf' and looks for
6511 * multi-line regular expressions, creating tags on matches.
6512 * readline already dealt with normal regexps.
6514 * Idea by Ben Wing <ben@666.com> (2002).
6516 static void
6517 regex_tag_multiline (void)
6519 char *buffer = filebuf.buffer;
6520 regexp *rp;
6521 char *name;
6523 for (rp = p_head; rp != NULL; rp = rp->p_next)
6525 int match = 0;
6527 if (!rp->multi_line)
6528 continue; /* skip normal regexps */
6530 /* Generic initializations before parsing file from memory. */
6531 lineno = 1; /* reset global line number */
6532 charno = 0; /* reset global char number */
6533 linecharno = 0; /* reset global char number of line start */
6535 /* Only use generic regexps or those for the current language. */
6536 if (rp->lang != NULL && rp->lang != curfdp->lang)
6537 continue;
6539 while (match >= 0 && match < filebuf.len)
6541 match = re_search (rp->pat, buffer, filebuf.len, charno,
6542 filebuf.len - match, &rp->regs);
6543 switch (match)
6545 case -2:
6546 /* Some error. */
6547 if (!rp->error_signaled)
6549 error ("regexp stack overflow while matching \"%s\"",
6550 rp->pattern);
6551 rp->error_signaled = true;
6553 break;
6554 case -1:
6555 /* No match. */
6556 break;
6557 default:
6558 if (match == rp->regs.end[0])
6560 if (!rp->error_signaled)
6562 error ("regexp matches the empty string: \"%s\"",
6563 rp->pattern);
6564 rp->error_signaled = true;
6566 match = -3; /* exit from while loop */
6567 break;
6570 /* Match occurred. Construct a tag. */
6571 while (charno < rp->regs.end[0])
6572 if (buffer[charno++] == '\n')
6573 lineno++, linecharno = charno;
6574 name = rp->name;
6575 if (name[0] == '\0')
6576 name = NULL;
6577 else /* make a named tag */
6578 name = substitute (buffer, rp->name, &rp->regs);
6579 if (rp->force_explicit_name)
6581 /* Force explicit tag name, if a name is there. */
6582 pfnote (name, true, buffer + linecharno,
6583 charno - linecharno + 1, lineno, linecharno);
6585 if (debug)
6586 fprintf (stderr, "%s on %s:%d: %s\n",
6587 name ? name : "(unnamed)", curfdp->taggedfname,
6588 lineno, buffer + linecharno);
6590 else
6591 make_tag (name, strlen (name), true, buffer + linecharno,
6592 charno - linecharno + 1, lineno, linecharno);
6593 break;
6600 static bool
6601 nocase_tail (const char *cp)
6603 int len = 0;
6605 while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6606 cp++, len++;
6607 if (*cp == '\0' && !intoken (dbp[len]))
6609 dbp += len;
6610 return true;
6612 return false;
6615 static void
6616 get_tag (register char *bp, char **namepp)
6618 register char *cp = bp;
6620 if (*bp != '\0')
6622 /* Go till you get to white space or a syntactic break */
6623 for (cp = bp + 1; !notinname (*cp); cp++)
6624 continue;
6625 make_tag (bp, cp - bp, true,
6626 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6629 if (namepp != NULL)
6630 *namepp = savenstr (bp, cp - bp);
6633 /* Similar to get_tag, but include '=' as part of the tag. */
6634 static void
6635 get_lispy_tag (register char *bp)
6637 register char *cp = bp;
6639 if (*bp != '\0')
6641 /* Go till you get to white space or a syntactic break */
6642 for (cp = bp + 1; !notinname (*cp) || *cp == '='; cp++)
6643 continue;
6644 make_tag (bp, cp - bp, true,
6645 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6650 * Read a line of text from `stream' into `lbp', excluding the
6651 * newline or CR-NL, if any. Return the number of characters read from
6652 * `stream', which is the length of the line including the newline.
6654 * On DOS or Windows we do not count the CR character, if any before the
6655 * NL, in the returned length; this mirrors the behavior of Emacs on those
6656 * platforms (for text files, it translates CR-NL to NL as it reads in the
6657 * file).
6659 * If multi-line regular expressions are requested, each line read is
6660 * appended to `filebuf'.
6662 static long
6663 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6665 char *buffer = lbp->buffer;
6666 char *p = lbp->buffer;
6667 char *pend;
6668 int chars_deleted;
6670 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6672 for (;;)
6674 register int c = getc (stream);
6675 if (p == pend)
6677 /* We're at the end of linebuffer: expand it. */
6678 lbp->size *= 2;
6679 xrnew (buffer, lbp->size, char);
6680 p += buffer - lbp->buffer;
6681 pend = buffer + lbp->size;
6682 lbp->buffer = buffer;
6684 if (c == EOF)
6686 if (ferror (stream))
6687 perror (filename);
6688 *p = '\0';
6689 chars_deleted = 0;
6690 break;
6692 if (c == '\n')
6694 if (p > buffer && p[-1] == '\r')
6696 p -= 1;
6697 chars_deleted = 2;
6699 else
6701 chars_deleted = 1;
6703 *p = '\0';
6704 break;
6706 *p++ = c;
6708 lbp->len = p - buffer;
6710 if (need_filebuf /* we need filebuf for multi-line regexps */
6711 && chars_deleted > 0) /* not at EOF */
6713 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6715 /* Expand filebuf. */
6716 filebuf.size *= 2;
6717 xrnew (filebuf.buffer, filebuf.size, char);
6719 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6720 filebuf.len += lbp->len;
6721 filebuf.buffer[filebuf.len++] = '\n';
6722 filebuf.buffer[filebuf.len] = '\0';
6725 return lbp->len + chars_deleted;
6729 * Like readline_internal, above, but in addition try to match the
6730 * input line against relevant regular expressions and manage #line
6731 * directives.
6733 static void
6734 readline (linebuffer *lbp, FILE *stream)
6736 long result;
6738 linecharno = charno; /* update global char number of line start */
6739 result = readline_internal (lbp, stream, infilename); /* read line */
6740 lineno += 1; /* increment global line number */
6741 charno += result; /* increment global char number */
6743 /* Honor #line directives. */
6744 if (!no_line_directive)
6746 static bool discard_until_line_directive;
6748 /* Check whether this is a #line directive. */
6749 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6751 unsigned int lno;
6752 int start = 0;
6754 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6755 && start > 0) /* double quote character found */
6757 char *endp = lbp->buffer + start;
6759 while ((endp = strchr (endp, '"')) != NULL
6760 && endp[-1] == '\\')
6761 endp++;
6762 if (endp != NULL)
6763 /* Ok, this is a real #line directive. Let's deal with it. */
6765 char *taggedabsname; /* absolute name of original file */
6766 char *taggedfname; /* name of original file as given */
6767 char *name; /* temp var */
6769 discard_until_line_directive = false; /* found it */
6770 name = lbp->buffer + start;
6771 *endp = '\0';
6772 canonicalize_filename (name);
6773 taggedabsname = absolute_filename (name, tagfiledir);
6774 if (filename_is_absolute (name)
6775 || filename_is_absolute (curfdp->infname))
6776 taggedfname = savestr (taggedabsname);
6777 else
6778 taggedfname = relative_filename (taggedabsname,tagfiledir);
6780 if (streq (curfdp->taggedfname, taggedfname))
6781 /* The #line directive is only a line number change. We
6782 deal with this afterwards. */
6783 free (taggedfname);
6784 else
6785 /* The tags following this #line directive should be
6786 attributed to taggedfname. In order to do this, set
6787 curfdp accordingly. */
6789 fdesc *fdp; /* file description pointer */
6791 /* Go look for a file description already set up for the
6792 file indicated in the #line directive. If there is
6793 one, use it from now until the next #line
6794 directive. */
6795 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6796 if (streq (fdp->infname, curfdp->infname)
6797 && streq (fdp->taggedfname, taggedfname))
6798 /* If we remove the second test above (after the &&)
6799 then all entries pertaining to the same file are
6800 coalesced in the tags file. If we use it, then
6801 entries pertaining to the same file but generated
6802 from different files (via #line directives) will
6803 go into separate sections in the tags file. These
6804 alternatives look equivalent. The first one
6805 destroys some apparently useless information. */
6807 curfdp = fdp;
6808 free (taggedfname);
6809 break;
6811 /* Else, if we already tagged the real file, skip all
6812 input lines until the next #line directive. */
6813 if (fdp == NULL) /* not found */
6814 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6815 if (streq (fdp->infabsname, taggedabsname))
6817 discard_until_line_directive = true;
6818 free (taggedfname);
6819 break;
6821 /* Else create a new file description and use that from
6822 now on, until the next #line directive. */
6823 if (fdp == NULL) /* not found */
6825 fdp = fdhead;
6826 fdhead = xnew (1, fdesc);
6827 *fdhead = *curfdp; /* copy curr. file description */
6828 fdhead->next = fdp;
6829 fdhead->infname = savestr (curfdp->infname);
6830 fdhead->infabsname = savestr (curfdp->infabsname);
6831 fdhead->infabsdir = savestr (curfdp->infabsdir);
6832 fdhead->taggedfname = taggedfname;
6833 fdhead->usecharno = false;
6834 fdhead->prop = NULL;
6835 fdhead->written = false;
6836 curfdp = fdhead;
6839 free (taggedabsname);
6840 lineno = lno - 1;
6841 readline (lbp, stream);
6842 return;
6843 } /* if a real #line directive */
6844 } /* if #line is followed by a number */
6845 } /* if line begins with "#line " */
6847 /* If we are here, no #line directive was found. */
6848 if (discard_until_line_directive)
6850 if (result > 0)
6852 /* Do a tail recursion on ourselves, thus discarding the contents
6853 of the line buffer. */
6854 readline (lbp, stream);
6855 return;
6857 /* End of file. */
6858 discard_until_line_directive = false;
6859 return;
6861 } /* if #line directives should be considered */
6864 int match;
6865 regexp *rp;
6866 char *name;
6868 /* Match against relevant regexps. */
6869 if (lbp->len > 0)
6870 for (rp = p_head; rp != NULL; rp = rp->p_next)
6872 /* Only use generic regexps or those for the current language.
6873 Also do not use multiline regexps, which is the job of
6874 regex_tag_multiline. */
6875 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6876 || rp->multi_line)
6877 continue;
6879 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6880 switch (match)
6882 case -2:
6883 /* Some error. */
6884 if (!rp->error_signaled)
6886 error ("regexp stack overflow while matching \"%s\"",
6887 rp->pattern);
6888 rp->error_signaled = true;
6890 break;
6891 case -1:
6892 /* No match. */
6893 break;
6894 case 0:
6895 /* Empty string matched. */
6896 if (!rp->error_signaled)
6898 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6899 rp->error_signaled = true;
6901 break;
6902 default:
6903 /* Match occurred. Construct a tag. */
6904 name = rp->name;
6905 if (name[0] == '\0')
6906 name = NULL;
6907 else /* make a named tag */
6908 name = substitute (lbp->buffer, rp->name, &rp->regs);
6909 if (rp->force_explicit_name)
6911 /* Force explicit tag name, if a name is there. */
6912 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6913 if (debug)
6914 fprintf (stderr, "%s on %s:%d: %s\n",
6915 name ? name : "(unnamed)", curfdp->taggedfname,
6916 lineno, lbp->buffer);
6918 else
6919 make_tag (name, strlen (name), true,
6920 lbp->buffer, match, lineno, linecharno);
6921 break;
6929 * Return a pointer to a space of size strlen(cp)+1 allocated
6930 * with xnew where the string CP has been copied.
6932 static char *
6933 savestr (const char *cp)
6935 return savenstr (cp, strlen (cp));
6939 * Return a pointer to a space of size LEN+1 allocated with xnew where
6940 * the string CP has been copied for at most the first LEN characters.
6942 static char *
6943 savenstr (const char *cp, int len)
6945 char *dp = xnew (len + 1, char);
6946 dp[len] = '\0';
6947 return memcpy (dp, cp, len);
6950 /* Skip spaces (end of string is not space), return new pointer. */
6951 static char *
6952 skip_spaces (char *cp)
6954 while (c_isspace (*cp))
6955 cp++;
6956 return cp;
6959 /* Skip non spaces, except end of string, return new pointer. */
6960 static char *
6961 skip_non_spaces (char *cp)
6963 while (*cp != '\0' && !c_isspace (*cp))
6964 cp++;
6965 return cp;
6968 /* Skip any chars in the "name" class.*/
6969 static char *
6970 skip_name (char *cp)
6972 /* '\0' is a notinname() so loop stops there too */
6973 while (! notinname (*cp))
6974 cp++;
6975 return cp;
6978 /* Print error message and exit. */
6979 static void
6980 fatal (char const *format, ...)
6982 va_list ap;
6983 va_start (ap, format);
6984 verror (format, ap);
6985 va_end (ap);
6986 exit (EXIT_FAILURE);
6989 static void
6990 pfatal (const char *s1)
6992 perror (s1);
6993 exit (EXIT_FAILURE);
6996 static void
6997 suggest_asking_for_help (void)
6999 fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
7000 progname);
7001 exit (EXIT_FAILURE);
7004 /* Output a diagnostic with printf-style FORMAT and args. */
7005 static void
7006 error (const char *format, ...)
7008 va_list ap;
7009 va_start (ap, format);
7010 verror (format, ap);
7011 va_end (ap);
7014 static void
7015 verror (char const *format, va_list ap)
7017 fprintf (stderr, "%s: ", progname);
7018 vfprintf (stderr, format, ap);
7019 fprintf (stderr, "\n");
7022 /* Return a newly-allocated string whose contents
7023 concatenate those of s1, s2, s3. */
7024 static char *
7025 concat (const char *s1, const char *s2, const char *s3)
7027 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
7028 char *result = xnew (len1 + len2 + len3 + 1, char);
7030 strcpy (result, s1);
7031 strcpy (result + len1, s2);
7032 strcpy (result + len1 + len2, s3);
7034 return result;
7038 /* Does the same work as the system V getcwd, but does not need to
7039 guess the buffer size in advance. */
7040 static char *
7041 etags_getcwd (void)
7043 int bufsize = 200;
7044 char *path = xnew (bufsize, char);
7046 while (getcwd (path, bufsize) == NULL)
7048 if (errno != ERANGE)
7049 pfatal ("getcwd");
7050 bufsize *= 2;
7051 free (path);
7052 path = xnew (bufsize, char);
7055 canonicalize_filename (path);
7056 return path;
7059 /* Return a newly allocated string containing a name of a temporary file. */
7060 static char *
7061 etags_mktmp (void)
7063 const char *tmpdir = getenv ("TMPDIR");
7064 const char *slash = "/";
7066 #if MSDOS || defined (DOS_NT)
7067 if (!tmpdir)
7068 tmpdir = getenv ("TEMP");
7069 if (!tmpdir)
7070 tmpdir = getenv ("TMP");
7071 if (!tmpdir)
7072 tmpdir = ".";
7073 if (tmpdir[strlen (tmpdir) - 1] == '/'
7074 || tmpdir[strlen (tmpdir) - 1] == '\\')
7075 slash = "";
7076 #else
7077 if (!tmpdir)
7078 tmpdir = "/tmp";
7079 if (tmpdir[strlen (tmpdir) - 1] == '/')
7080 slash = "";
7081 #endif
7083 char *templt = concat (tmpdir, slash, "etXXXXXX");
7084 int fd = mkostemp (templt, O_CLOEXEC);
7085 if (fd < 0 || close (fd) != 0)
7087 int temp_errno = errno;
7088 free (templt);
7089 errno = temp_errno;
7090 templt = NULL;
7092 #if defined (DOS_NT)
7093 else
7095 /* The file name will be used in shell redirection, so it needs to have
7096 DOS-style backslashes, or else the Windows shell will barf. */
7097 char *p;
7098 for (p = templt; *p; p++)
7099 if (*p == '/')
7100 *p = '\\';
7102 #endif
7104 return templt;
7107 /* Return a newly allocated string containing the file name of FILE
7108 relative to the absolute directory DIR (which should end with a slash). */
7109 static char *
7110 relative_filename (char *file, char *dir)
7112 char *fp, *dp, *afn, *res;
7113 int i;
7115 /* Find the common root of file and dir (with a trailing slash). */
7116 afn = absolute_filename (file, cwd);
7117 fp = afn;
7118 dp = dir;
7119 while (*fp++ == *dp++)
7120 continue;
7121 fp--, dp--; /* back to the first differing char */
7122 #ifdef DOS_NT
7123 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
7124 return afn;
7125 #endif
7126 do /* look at the equal chars until '/' */
7127 fp--, dp--;
7128 while (*fp != '/');
7130 /* Build a sequence of "../" strings for the resulting relative file name. */
7131 i = 0;
7132 while ((dp = strchr (dp + 1, '/')) != NULL)
7133 i += 1;
7134 res = xnew (3*i + strlen (fp + 1) + 1, char);
7135 char *z = res;
7136 while (i-- > 0)
7137 z = stpcpy (z, "../");
7139 /* Add the file name relative to the common root of file and dir. */
7140 strcpy (z, fp + 1);
7141 free (afn);
7143 return res;
7146 /* Return a newly allocated string containing the absolute file name
7147 of FILE given DIR (which should end with a slash). */
7148 static char *
7149 absolute_filename (char *file, char *dir)
7151 char *slashp, *cp, *res;
7153 if (filename_is_absolute (file))
7154 res = savestr (file);
7155 #ifdef DOS_NT
7156 /* We don't support non-absolute file names with a drive
7157 letter, like `d:NAME' (it's too much hassle). */
7158 else if (file[1] == ':')
7159 fatal ("%s: relative file names with drive letters not supported", file);
7160 #endif
7161 else
7162 res = concat (dir, file, "");
7164 /* Delete the "/dirname/.." and "/." substrings. */
7165 slashp = strchr (res, '/');
7166 while (slashp != NULL && slashp[0] != '\0')
7168 if (slashp[1] == '.')
7170 if (slashp[2] == '.'
7171 && (slashp[3] == '/' || slashp[3] == '\0'))
7173 cp = slashp;
7175 cp--;
7176 while (cp >= res && !filename_is_absolute (cp));
7177 if (cp < res)
7178 cp = slashp; /* the absolute name begins with "/.." */
7179 #ifdef DOS_NT
7180 /* Under MSDOS and NT we get `d:/NAME' as absolute
7181 file name, so the luser could say `d:/../NAME'.
7182 We silently treat this as `d:/NAME'. */
7183 else if (cp[0] != '/')
7184 cp = slashp;
7185 #endif
7186 memmove (cp, slashp + 3, strlen (slashp + 2));
7187 slashp = cp;
7188 continue;
7190 else if (slashp[2] == '/' || slashp[2] == '\0')
7192 memmove (slashp, slashp + 2, strlen (slashp + 1));
7193 continue;
7197 slashp = strchr (slashp + 1, '/');
7200 if (res[0] == '\0') /* just a safety net: should never happen */
7202 free (res);
7203 return savestr ("/");
7205 else
7206 return res;
7209 /* Return a newly allocated string containing the absolute
7210 file name of dir where FILE resides given DIR (which should
7211 end with a slash). */
7212 static char *
7213 absolute_dirname (char *file, char *dir)
7215 char *slashp, *res;
7216 char save;
7218 slashp = strrchr (file, '/');
7219 if (slashp == NULL)
7220 return savestr (dir);
7221 save = slashp[1];
7222 slashp[1] = '\0';
7223 res = absolute_filename (file, dir);
7224 slashp[1] = save;
7226 return res;
7229 /* Whether the argument string is an absolute file name. The argument
7230 string must have been canonicalized with canonicalize_filename. */
7231 static bool
7232 filename_is_absolute (char *fn)
7234 return (fn[0] == '/'
7235 #ifdef DOS_NT
7236 || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
7237 #endif
7241 /* Downcase DOS drive letter and collapse separators into single slashes.
7242 Works in place. */
7243 static void
7244 canonicalize_filename (register char *fn)
7246 register char* cp;
7248 #ifdef DOS_NT
7249 /* Canonicalize drive letter case. */
7250 if (c_isupper (fn[0]) && fn[1] == ':')
7251 fn[0] = c_tolower (fn[0]);
7253 /* Collapse multiple forward- and back-slashes into a single forward
7254 slash. */
7255 for (cp = fn; *cp != '\0'; cp++, fn++)
7256 if (*cp == '/' || *cp == '\\')
7258 *fn = '/';
7259 while (cp[1] == '/' || cp[1] == '\\')
7260 cp++;
7262 else
7263 *fn = *cp;
7265 #else /* !DOS_NT */
7267 /* Collapse multiple slashes into a single slash. */
7268 for (cp = fn; *cp != '\0'; cp++, fn++)
7269 if (*cp == '/')
7271 *fn = '/';
7272 while (cp[1] == '/')
7273 cp++;
7275 else
7276 *fn = *cp;
7278 #endif /* !DOS_NT */
7280 *fn = '\0';
7284 /* Initialize a linebuffer for use. */
7285 static void
7286 linebuffer_init (linebuffer *lbp)
7288 lbp->size = (DEBUG) ? 3 : 200;
7289 lbp->buffer = xnew (lbp->size, char);
7290 lbp->buffer[0] = '\0';
7291 lbp->len = 0;
7294 /* Set the minimum size of a string contained in a linebuffer. */
7295 static void
7296 linebuffer_setlen (linebuffer *lbp, int toksize)
7298 while (lbp->size <= toksize)
7300 lbp->size *= 2;
7301 xrnew (lbp->buffer, lbp->size, char);
7303 lbp->len = toksize;
7306 /* Like malloc but get fatal error if memory is exhausted. */
7307 static void *
7308 xmalloc (size_t size)
7310 void *result = malloc (size);
7311 if (result == NULL)
7312 fatal ("virtual memory exhausted");
7313 return result;
7316 static void *
7317 xrealloc (void *ptr, size_t size)
7319 void *result = realloc (ptr, size);
7320 if (result == NULL)
7321 fatal ("virtual memory exhausted");
7322 return result;
7326 * Local Variables:
7327 * indent-tabs-mode: t
7328 * tab-width: 8
7329 * fill-column: 79
7330 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
7331 * c-file-style: "gnu"
7332 * End:
7335 /* etags.c ends here */