Align stack bottom properly.
[emacs.git] / lib-src / etags.c
blob5e05c19c62448b8ceb045ccea5a2920d300c21d8
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2017 Free Software
32 Foundation, Inc.
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or (at
39 your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
72 starting in 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
83 #ifdef DEBUG
84 # undef DEBUG
85 # define DEBUG true
86 #else
87 # define DEBUG false
88 # define NDEBUG /* disable assert */
89 #endif
91 #include <config.h>
93 /* WIN32_NATIVE is for XEmacs.
94 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
95 #ifdef WIN32_NATIVE
96 # undef MSDOS
97 # undef WINDOWSNT
98 # define WINDOWSNT
99 #endif /* WIN32_NATIVE */
101 #ifdef MSDOS
102 # undef MSDOS
103 # define MSDOS true
104 # include <sys/param.h>
105 #else
106 # define MSDOS false
107 #endif /* MSDOS */
109 #ifdef WINDOWSNT
110 # include <direct.h>
111 # undef HAVE_NTGUI
112 # undef DOS_NT
113 # define DOS_NT
114 /* The WINDOWSNT build doesn't use Gnulib's fcntl.h. */
115 # define O_CLOEXEC O_NOINHERIT
116 #endif /* WINDOWSNT */
118 #include <limits.h>
119 #include <unistd.h>
120 #include <stdarg.h>
121 #include <stdlib.h>
122 #include <string.h>
123 #include <sysstdio.h>
124 #include <errno.h>
125 #include <fcntl.h>
126 #include <binary-io.h>
127 #include <unlocked-io.h>
128 #include <c-ctype.h>
129 #include <c-strcase.h>
131 #include <assert.h>
132 #ifdef NDEBUG
133 # undef assert /* some systems have a buggy assert.h */
134 # define assert(x) ((void) 0)
135 #endif
137 #include <getopt.h>
138 #include <regex.h>
140 /* Define CTAGS to make the program "ctags" compatible with the usual one.
141 Leave it undefined to make the program "etags", which makes emacs-style
142 tag tables and tags typedefs, #defines and struct/union/enum by default. */
143 #ifdef CTAGS
144 # undef CTAGS
145 # define CTAGS true
146 #else
147 # define CTAGS false
148 #endif
150 static bool
151 streq (char const *s, char const *t)
153 return strcmp (s, t) == 0;
156 static bool
157 strcaseeq (char const *s, char const *t)
159 return c_strcasecmp (s, t) == 0;
162 static bool
163 strneq (char const *s, char const *t, size_t n)
165 return strncmp (s, t, n) == 0;
168 static bool
169 strncaseeq (char const *s, char const *t, size_t n)
171 return c_strncasecmp (s, t, n) == 0;
174 /* C is not in a name. */
175 static bool
176 notinname (unsigned char c)
178 /* Look at make_tag before modifying! */
179 static bool const table[UCHAR_MAX + 1] = {
180 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
181 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
183 return table[c];
186 /* C can start a token. */
187 static bool
188 begtoken (unsigned char c)
190 static bool const table[UCHAR_MAX + 1] = {
191 ['$']=1, ['@']=1,
192 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
193 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
194 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
195 ['Y']=1, ['Z']=1,
196 ['_']=1,
197 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
198 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
199 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
200 ['y']=1, ['z']=1,
201 ['~']=1
203 return table[c];
206 /* C can be in the middle of a token. */
207 static bool
208 intoken (unsigned char c)
210 static bool const table[UCHAR_MAX + 1] = {
211 ['$']=1,
212 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
213 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
214 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
215 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
216 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
217 ['Y']=1, ['Z']=1,
218 ['_']=1,
219 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
220 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
221 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
222 ['y']=1, ['z']=1
224 return table[c];
227 /* C can end a token. */
228 static bool
229 endtoken (unsigned char c)
231 static bool const table[UCHAR_MAX + 1] = {
232 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
233 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
234 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
235 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
236 ['{']=1, ['|']=1, ['}']=1, ['~']=1
238 return table[c];
242 * xnew, xrnew -- allocate, reallocate storage
244 * SYNOPSIS: Type *xnew (int n, Type);
245 * void xrnew (OldPointer, int n, Type);
247 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
248 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
250 typedef void Lang_function (FILE *);
252 typedef struct
254 const char *suffix; /* file name suffix for this compressor */
255 const char *command; /* takes one arg and decompresses to stdout */
256 } compressor;
258 typedef struct
260 const char *name; /* language name */
261 const char *help; /* detailed help for the language */
262 Lang_function *function; /* parse function */
263 const char **suffixes; /* name suffixes of this language's files */
264 const char **filenames; /* names of this language's files */
265 const char **interpreters; /* interpreters for this language */
266 bool metasource; /* source used to generate other sources */
267 } language;
269 typedef struct fdesc
271 struct fdesc *next; /* for the linked list */
272 char *infname; /* uncompressed input file name */
273 char *infabsname; /* absolute uncompressed input file name */
274 char *infabsdir; /* absolute dir of input file */
275 char *taggedfname; /* file name to write in tagfile */
276 language *lang; /* language of file */
277 char *prop; /* file properties to write in tagfile */
278 bool usecharno; /* etags tags shall contain char number */
279 bool written; /* entry written in the tags file */
280 } fdesc;
282 typedef struct node_st
283 { /* sorting structure */
284 struct node_st *left, *right; /* left and right sons */
285 fdesc *fdp; /* description of file to whom tag belongs */
286 char *name; /* tag name */
287 char *regex; /* search regexp */
288 bool valid; /* write this tag on the tag file */
289 bool is_func; /* function tag: use regexp in CTAGS mode */
290 bool been_warned; /* warning already given for duplicated tag */
291 int lno; /* line number tag is on */
292 long cno; /* character number line starts on */
293 } node;
296 * A `linebuffer' is a structure which holds a line of text.
297 * `readline_internal' reads a line from a stream into a linebuffer
298 * and works regardless of the length of the line.
299 * SIZE is the size of BUFFER, LEN is the length of the string in
300 * BUFFER after readline reads it.
302 typedef struct
304 long size;
305 int len;
306 char *buffer;
307 } linebuffer;
309 /* Used to support mixing of --lang and file names. */
310 typedef struct
312 enum {
313 at_language, /* a language specification */
314 at_regexp, /* a regular expression */
315 at_filename, /* a file name */
316 at_stdin, /* read from stdin here */
317 at_end /* stop parsing the list */
318 } arg_type; /* argument type */
319 language *lang; /* language associated with the argument */
320 char *what; /* the argument itself */
321 } argument;
323 /* Structure defining a regular expression. */
324 typedef struct regexp
326 struct regexp *p_next; /* pointer to next in list */
327 language *lang; /* if set, use only for this language */
328 char *pattern; /* the regexp pattern */
329 char *name; /* tag name */
330 struct re_pattern_buffer *pat; /* the compiled pattern */
331 struct re_registers regs; /* re registers */
332 bool error_signaled; /* already signaled for this regexp */
333 bool force_explicit_name; /* do not allow implicit tag name */
334 bool ignore_case; /* ignore case when matching */
335 bool multi_line; /* do a multi-line match on the whole file */
336 } regexp;
339 /* Many compilers barf on this:
340 Lang_function Ada_funcs;
341 so let's write it this way */
342 static void Ada_funcs (FILE *);
343 static void Asm_labels (FILE *);
344 static void C_entries (int c_ext, FILE *);
345 static void default_C_entries (FILE *);
346 static void plain_C_entries (FILE *);
347 static void Cjava_entries (FILE *);
348 static void Cobol_paragraphs (FILE *);
349 static void Cplusplus_entries (FILE *);
350 static void Cstar_entries (FILE *);
351 static void Erlang_functions (FILE *);
352 static void Forth_words (FILE *);
353 static void Fortran_functions (FILE *);
354 static void Go_functions (FILE *);
355 static void HTML_labels (FILE *);
356 static void Lisp_functions (FILE *);
357 static void Lua_functions (FILE *);
358 static void Makefile_targets (FILE *);
359 static void Pascal_functions (FILE *);
360 static void Perl_functions (FILE *);
361 static void PHP_functions (FILE *);
362 static void PS_functions (FILE *);
363 static void Prolog_functions (FILE *);
364 static void Python_functions (FILE *);
365 static void Ruby_functions (FILE *);
366 static void Scheme_functions (FILE *);
367 static void TeX_commands (FILE *);
368 static void Texinfo_nodes (FILE *);
369 static void Yacc_entries (FILE *);
370 static void just_read_file (FILE *);
372 static language *get_language_from_langname (const char *);
373 static void readline (linebuffer *, FILE *);
374 static long readline_internal (linebuffer *, FILE *, char const *);
375 static bool nocase_tail (const char *);
376 static void get_tag (char *, char **);
377 static void get_lispy_tag (char *);
379 static void analyze_regex (char *);
380 static void free_regexps (void);
381 static void regex_tag_multiline (void);
382 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
383 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
384 static _Noreturn void suggest_asking_for_help (void);
385 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
386 static _Noreturn void pfatal (const char *);
387 static void add_node (node *, node **);
389 static void process_file_name (char *, language *);
390 static void process_file (FILE *, char *, language *);
391 static void find_entries (FILE *);
392 static void free_tree (node *);
393 static void free_fdesc (fdesc *);
394 static void pfnote (char *, bool, char *, int, int, long);
395 static void invalidate_nodes (fdesc *, node **);
396 static void put_entries (node *);
398 static char *concat (const char *, const char *, const char *);
399 static char *skip_spaces (char *);
400 static char *skip_non_spaces (char *);
401 static char *skip_name (char *);
402 static char *savenstr (const char *, int);
403 static char *savestr (const char *);
404 static char *etags_getcwd (void);
405 static char *relative_filename (char *, char *);
406 static char *absolute_filename (char *, char *);
407 static char *absolute_dirname (char *, char *);
408 static bool filename_is_absolute (char *f);
409 static void canonicalize_filename (char *);
410 static char *etags_mktmp (void);
411 static void linebuffer_init (linebuffer *);
412 static void linebuffer_setlen (linebuffer *, int);
413 static void *xmalloc (size_t);
414 static void *xrealloc (void *, size_t);
417 static char searchar = '/'; /* use /.../ searches */
419 static char *tagfile; /* output file */
420 static char *progname; /* name this program was invoked with */
421 static char *cwd; /* current working directory */
422 static char *tagfiledir; /* directory of tagfile */
423 static FILE *tagf; /* ioptr for tags file */
424 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
426 static fdesc *fdhead; /* head of file description list */
427 static fdesc *curfdp; /* current file description */
428 static char *infilename; /* current input file name */
429 static int lineno; /* line number of current line */
430 static long charno; /* current character number */
431 static long linecharno; /* charno of start of current line */
432 static char *dbp; /* pointer to start of current tag */
434 static const int invalidcharno = -1;
436 static node *nodehead; /* the head of the binary tree of tags */
437 static node *last_node; /* the last node created */
439 static linebuffer lb; /* the current line */
440 static linebuffer filebuf; /* a buffer containing the whole file */
441 static linebuffer token_name; /* a buffer containing a tag name */
443 static bool append_to_tagfile; /* -a: append to tags */
444 /* The next five default to true in C and derived languages. */
445 static bool typedefs; /* -t: create tags for C and Ada typedefs */
446 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
447 /* 0 struct/enum/union decls, and C++ */
448 /* member functions. */
449 static bool constantypedefs; /* -d: create tags for C #define, enum */
450 /* constants and variables. */
451 /* -D: opposite of -d. Default under ctags. */
452 static int globals; /* create tags for global variables */
453 static int members; /* create tags for C member variables */
454 static int declarations; /* --declarations: tag them and extern in C&Co*/
455 static int no_line_directive; /* ignore #line directives (undocumented) */
456 static int no_duplicates; /* no duplicate tags for ctags (undocumented) */
457 static bool update; /* -u: update tags */
458 static bool vgrind_style; /* -v: create vgrind style index output */
459 static bool no_warnings; /* -w: suppress warnings (undocumented) */
460 static bool cxref_style; /* -x: create cxref style output */
461 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
462 static bool ignoreindent; /* -I: ignore indentation in C */
463 static int packages_only; /* --packages-only: in Ada, only tag packages*/
464 static int class_qualify; /* -Q: produce class-qualified tags in C++/Java */
466 /* STDIN is defined in LynxOS system headers */
467 #ifdef STDIN
468 # undef STDIN
469 #endif
471 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
472 static bool parsing_stdin; /* --parse-stdin used */
474 static regexp *p_head; /* list of all regexps */
475 static bool need_filebuf; /* some regexes are multi-line */
477 static struct option longopts[] =
479 { "append", no_argument, NULL, 'a' },
480 { "packages-only", no_argument, &packages_only, 1 },
481 { "c++", no_argument, NULL, 'C' },
482 { "declarations", no_argument, &declarations, 1 },
483 { "no-line-directive", no_argument, &no_line_directive, 1 },
484 { "no-duplicates", no_argument, &no_duplicates, 1 },
485 { "help", no_argument, NULL, 'h' },
486 { "help", no_argument, NULL, 'H' },
487 { "ignore-indentation", no_argument, NULL, 'I' },
488 { "language", required_argument, NULL, 'l' },
489 { "members", no_argument, &members, 1 },
490 { "no-members", no_argument, &members, 0 },
491 { "output", required_argument, NULL, 'o' },
492 { "class-qualify", no_argument, &class_qualify, 'Q' },
493 { "regex", required_argument, NULL, 'r' },
494 { "no-regex", no_argument, NULL, 'R' },
495 { "ignore-case-regex", required_argument, NULL, 'c' },
496 { "parse-stdin", required_argument, NULL, STDIN },
497 { "version", no_argument, NULL, 'V' },
499 #if CTAGS /* Ctags options */
500 { "backward-search", no_argument, NULL, 'B' },
501 { "cxref", no_argument, NULL, 'x' },
502 { "defines", no_argument, NULL, 'd' },
503 { "globals", no_argument, &globals, 1 },
504 { "typedefs", no_argument, NULL, 't' },
505 { "typedefs-and-c++", no_argument, NULL, 'T' },
506 { "update", no_argument, NULL, 'u' },
507 { "vgrind", no_argument, NULL, 'v' },
508 { "no-warn", no_argument, NULL, 'w' },
510 #else /* Etags options */
511 { "no-defines", no_argument, NULL, 'D' },
512 { "no-globals", no_argument, &globals, 0 },
513 { "include", required_argument, NULL, 'i' },
514 #endif
515 { NULL }
518 static compressor compressors[] =
520 { "z", "gzip -d -c"},
521 { "Z", "gzip -d -c"},
522 { "gz", "gzip -d -c"},
523 { "GZ", "gzip -d -c"},
524 { "bz2", "bzip2 -d -c" },
525 { "xz", "xz -d -c" },
526 { NULL }
530 * Language stuff.
533 /* Ada code */
534 static const char *Ada_suffixes [] =
535 { "ads", "adb", "ada", NULL };
536 static const char Ada_help [] =
537 "In Ada code, functions, procedures, packages, tasks and types are\n\
538 tags. Use the '--packages-only' option to create tags for\n\
539 packages only.\n\
540 Ada tag names have suffixes indicating the type of entity:\n\
541 Entity type: Qualifier:\n\
542 ------------ ----------\n\
543 function /f\n\
544 procedure /p\n\
545 package spec /s\n\
546 package body /b\n\
547 type /t\n\
548 task /k\n\
549 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
550 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
551 will just search for any tag 'bidule'.";
553 /* Assembly code */
554 static const char *Asm_suffixes [] =
555 { "a", /* Unix assembler */
556 "asm", /* Microcontroller assembly */
557 "def", /* BSO/Tasking definition includes */
558 "inc", /* Microcontroller include files */
559 "ins", /* Microcontroller include files */
560 "s", "sa", /* Unix assembler */
561 "S", /* cpp-processed Unix assembler */
562 "src", /* BSO/Tasking C compiler output */
563 NULL
565 static const char Asm_help [] =
566 "In assembler code, labels appearing at the beginning of a line,\n\
567 followed by a colon, are tags.";
570 /* Note that .c and .h can be considered C++, if the --c++ flag was
571 given, or if the `class' or `template' keywords are met inside the file.
572 That is why default_C_entries is called for these. */
573 static const char *default_C_suffixes [] =
574 { "c", "h", NULL };
575 #if CTAGS /* C help for Ctags */
576 static const char default_C_help [] =
577 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
578 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
579 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
580 Use --globals to tag global variables.\n\
581 You can tag function declarations and external variables by\n\
582 using '--declarations', and struct members by using '--members'.";
583 #else /* C help for Etags */
584 static const char default_C_help [] =
585 "In C code, any C function or typedef is a tag, and so are\n\
586 definitions of 'struct', 'union' and 'enum'. '#define' macro\n\
587 definitions and 'enum' constants are tags unless you specify\n\
588 '--no-defines'. Global variables are tags unless you specify\n\
589 '--no-globals' and so are struct members unless you specify\n\
590 '--no-members'. Use of '--no-globals', '--no-defines' and\n\
591 '--no-members' can make the tags table file much smaller.\n\
592 You can tag function declarations and external variables by\n\
593 using '--declarations'.";
594 #endif /* C help for Ctags and Etags */
596 static const char *Cplusplus_suffixes [] =
597 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
598 "M", /* Objective C++ */
599 "pdb", /* PostScript with C syntax */
600 NULL };
601 static const char Cplusplus_help [] =
602 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
603 --help --lang=c --lang=c++ for full help.)\n\
604 In addition to C tags, member functions are also recognized. Member\n\
605 variables are recognized unless you use the '--no-members' option.\n\
606 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
607 and 'CLASS::FUNCTION'. 'operator' definitions have tag names like\n\
608 'operator+'.";
610 static const char *Cjava_suffixes [] =
611 { "java", NULL };
612 static char Cjava_help [] =
613 "In Java code, all the tags constructs of C and C++ code are\n\
614 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
617 static const char *Cobol_suffixes [] =
618 { "COB", "cob", NULL };
619 static char Cobol_help [] =
620 "In Cobol code, tags are paragraph names; that is, any word\n\
621 starting in column 8 and followed by a period.";
623 static const char *Cstar_suffixes [] =
624 { "cs", "hs", NULL };
626 static const char *Erlang_suffixes [] =
627 { "erl", "hrl", NULL };
628 static const char Erlang_help [] =
629 "In Erlang code, the tags are the functions, records and macros\n\
630 defined in the file.";
632 const char *Forth_suffixes [] =
633 { "fth", "tok", NULL };
634 static const char Forth_help [] =
635 "In Forth code, tags are words defined by ':',\n\
636 constant, code, create, defer, value, variable, buffer:, field.";
638 static const char *Fortran_suffixes [] =
639 { "F", "f", "f90", "for", NULL };
640 static const char Fortran_help [] =
641 "In Fortran code, functions, subroutines and block data are tags.";
643 static const char *Go_suffixes [] = {"go", NULL};
644 static const char Go_help [] =
645 "In Go code, functions, interfaces and packages are tags.";
647 static const char *HTML_suffixes [] =
648 { "htm", "html", "shtml", NULL };
649 static const char HTML_help [] =
650 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
651 'h3' headers. Also, tags are 'name=' in anchors and all\n\
652 occurrences of 'id='.";
654 static const char *Lisp_suffixes [] =
655 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
656 static const char Lisp_help [] =
657 "In Lisp code, any function defined with 'defun', any variable\n\
658 defined with 'defvar' or 'defconst', and in general the first\n\
659 argument of any expression that starts with '(def' in column zero\n\
660 is a tag.\n\
661 The '--declarations' option tags \"(defvar foo)\" constructs too.";
663 static const char *Lua_suffixes [] =
664 { "lua", "LUA", NULL };
665 static const char Lua_help [] =
666 "In Lua scripts, all functions are tags.";
668 static const char *Makefile_filenames [] =
669 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
670 static const char Makefile_help [] =
671 "In makefiles, targets are tags; additionally, variables are tags\n\
672 unless you specify '--no-globals'.";
674 static const char *Objc_suffixes [] =
675 { "lm", /* Objective lex file */
676 "m", /* Objective C file */
677 NULL };
678 static const char Objc_help [] =
679 "In Objective C code, tags include Objective C definitions for classes,\n\
680 class categories, methods and protocols. Tags for variables and\n\
681 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\
682 \n(Use --help --lang=c --lang=objc --lang=java for full help.)";
684 static const char *Pascal_suffixes [] =
685 { "p", "pas", NULL };
686 static const char Pascal_help [] =
687 "In Pascal code, the tags are the functions and procedures defined\n\
688 in the file.";
689 /* " // this is for working around an Emacs highlighting bug... */
691 static const char *Perl_suffixes [] =
692 { "pl", "pm", NULL };
693 static const char *Perl_interpreters [] =
694 { "perl", "@PERL@", NULL };
695 static const char Perl_help [] =
696 "In Perl code, the tags are the packages, subroutines and variables\n\
697 defined by the 'package', 'sub', 'my' and 'local' keywords. Use\n\
698 '--globals' if you want to tag global variables. Tags for\n\
699 subroutines are named 'PACKAGE::SUB'. The name for subroutines\n\
700 defined in the default package is 'main::SUB'.";
702 static const char *PHP_suffixes [] =
703 { "php", "php3", "php4", NULL };
704 static const char PHP_help [] =
705 "In PHP code, tags are functions, classes and defines. Unless you use\n\
706 the '--no-members' option, vars are tags too.";
708 static const char *plain_C_suffixes [] =
709 { "pc", /* Pro*C file */
710 NULL };
712 static const char *PS_suffixes [] =
713 { "ps", "psw", NULL }; /* .psw is for PSWrap */
714 static const char PS_help [] =
715 "In PostScript code, the tags are the functions.";
717 static const char *Prolog_suffixes [] =
718 { "prolog", NULL };
719 static const char Prolog_help [] =
720 "In Prolog code, tags are predicates and rules at the beginning of\n\
721 line.";
723 static const char *Python_suffixes [] =
724 { "py", NULL };
725 static const char Python_help [] =
726 "In Python code, 'def' or 'class' at the beginning of a line\n\
727 generate a tag.";
729 static const char *Ruby_suffixes [] =
730 { "rb", "ru", "rbw", NULL };
731 static const char *Ruby_filenames [] =
732 { "Rakefile", "Thorfile", NULL };
733 static const char Ruby_help [] =
734 "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
735 a line generate a tag. Constants also generate a tag.";
737 /* Can't do the `SCM' or `scm' prefix with a version number. */
738 static const char *Scheme_suffixes [] =
739 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
740 static const char Scheme_help [] =
741 "In Scheme code, tags include anything defined with 'def' or with a\n\
742 construct whose name starts with 'def'. They also include\n\
743 variables set with 'set!' at top level in the file.";
745 static const char *TeX_suffixes [] =
746 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
747 static const char TeX_help [] =
748 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
749 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
750 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
751 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
752 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
754 Other commands can be specified by setting the environment variable\n\
755 'TEXTAGS' to a colon-separated list like, for example,\n\
756 TEXTAGS=\"mycommand:myothercommand\".";
759 static const char *Texinfo_suffixes [] =
760 { "texi", "texinfo", "txi", NULL };
761 static const char Texinfo_help [] =
762 "for texinfo files, lines starting with @node are tagged.";
764 static const char *Yacc_suffixes [] =
765 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
766 static const char Yacc_help [] =
767 "In Bison or Yacc input files, each rule defines as a tag the\n\
768 nonterminal it constructs. The portions of the file that contain\n\
769 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
770 for full help).";
772 static const char auto_help [] =
773 "'auto' is not a real language, it indicates to use\n\
774 a default language for files base on file name suffix and file contents.";
776 static const char none_help [] =
777 "'none' is not a real language, it indicates to only do\n\
778 regexp processing on files.";
780 static const char no_lang_help [] =
781 "No detailed help available for this language.";
785 * Table of languages.
787 * It is ok for a given function to be listed under more than one
788 * name. I just didn't.
791 static language lang_names [] =
793 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
794 { "asm", Asm_help, Asm_labels, Asm_suffixes },
795 { "c", default_C_help, default_C_entries, default_C_suffixes },
796 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
797 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
798 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
799 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
800 { "forth", Forth_help, Forth_words, Forth_suffixes },
801 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
802 { "go", Go_help, Go_functions, Go_suffixes },
803 { "html", HTML_help, HTML_labels, HTML_suffixes },
804 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
805 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
806 { "lua", Lua_help, Lua_functions, Lua_suffixes },
807 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
808 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
809 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
810 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
811 { "php", PHP_help, PHP_functions, PHP_suffixes },
812 { "postscript",PS_help, PS_functions, PS_suffixes },
813 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
814 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
815 { "python", Python_help, Python_functions, Python_suffixes },
816 { "ruby", Ruby_help,Ruby_functions,Ruby_suffixes,Ruby_filenames },
817 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
818 { "tex", TeX_help, TeX_commands, TeX_suffixes },
819 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
820 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
821 { "auto", auto_help }, /* default guessing scheme */
822 { "none", none_help, just_read_file }, /* regexp matching only */
823 { NULL } /* end of list */
827 static void
828 print_language_names (void)
830 language *lang;
831 const char **name, **ext;
833 puts ("\nThese are the currently supported languages, along with the\n\
834 default file names and dot suffixes:");
835 for (lang = lang_names; lang->name != NULL; lang++)
837 printf (" %-*s", 10, lang->name);
838 if (lang->filenames != NULL)
839 for (name = lang->filenames; *name != NULL; name++)
840 printf (" %s", *name);
841 if (lang->suffixes != NULL)
842 for (ext = lang->suffixes; *ext != NULL; ext++)
843 printf (" .%s", *ext);
844 puts ("");
846 puts ("where 'auto' means use default language for files based on file\n\
847 name suffix, and 'none' means only do regexp processing on files.\n\
848 If no language is specified and no matching suffix is found,\n\
849 the first line of the file is read for a sharp-bang (#!) sequence\n\
850 followed by the name of an interpreter. If no such sequence is found,\n\
851 Fortran is tried first; if no tags are found, C is tried next.\n\
852 When parsing any C file, a \"class\" or \"template\" keyword\n\
853 switches to C++.");
854 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
856 For detailed help on a given language use, for example,\n\
857 etags --help --lang=ada.");
860 #ifndef EMACS_NAME
861 # define EMACS_NAME "standalone"
862 #endif
863 #ifndef VERSION
864 # define VERSION "17.38.1.4"
865 #endif
866 static _Noreturn void
867 print_version (void)
869 char emacs_copyright[] = COPYRIGHT;
871 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
872 puts (emacs_copyright);
873 puts ("This program is distributed under the terms in ETAGS.README");
875 exit (EXIT_SUCCESS);
878 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
879 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
880 #endif
882 static _Noreturn void
883 print_help (argument *argbuffer)
885 bool help_for_lang = false;
887 for (; argbuffer->arg_type != at_end; argbuffer++)
888 if (argbuffer->arg_type == at_language)
890 if (help_for_lang)
891 puts ("");
892 puts (argbuffer->lang->help);
893 help_for_lang = true;
896 if (help_for_lang)
897 exit (EXIT_SUCCESS);
899 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
901 These are the options accepted by %s.\n", progname, progname);
902 puts ("You may use unambiguous abbreviations for the long option names.");
903 puts (" A - as file name means read names from stdin (one per line).\n\
904 Absolute names are stored in the output file as they are.\n\
905 Relative ones are stored relative to the output file's directory.\n");
907 puts ("-a, --append\n\
908 Append tag entries to existing tags file.");
910 puts ("--packages-only\n\
911 For Ada files, only generate tags for packages.");
913 if (CTAGS)
914 puts ("-B, --backward-search\n\
915 Write the search commands for the tag entries using '?', the\n\
916 backward-search command instead of '/', the forward-search command.");
918 /* This option is mostly obsolete, because etags can now automatically
919 detect C++. Retained for backward compatibility and for debugging and
920 experimentation. In principle, we could want to tag as C++ even
921 before any "class" or "template" keyword.
922 puts ("-C, --c++\n\
923 Treat files whose name suffix defaults to C language as C++ files.");
926 puts ("--declarations\n\
927 In C and derived languages, create tags for function declarations,");
928 if (CTAGS)
929 puts ("\tand create tags for extern variables if --globals is used.");
930 else
931 puts
932 ("\tand create tags for extern variables unless --no-globals is used.");
934 if (CTAGS)
935 puts ("-d, --defines\n\
936 Create tag entries for C #define constants and enum constants, too.");
937 else
938 puts ("-D, --no-defines\n\
939 Don't create tag entries for C #define constants and enum constants.\n\
940 This makes the tags file smaller.");
942 if (!CTAGS)
943 puts ("-i FILE, --include=FILE\n\
944 Include a note in tag file indicating that, when searching for\n\
945 a tag, one should also consult the tags file FILE after\n\
946 checking the current file.");
948 puts ("-l LANG, --language=LANG\n\
949 Force the following files to be considered as written in the\n\
950 named language up to the next --language=LANG option.");
952 if (CTAGS)
953 puts ("--globals\n\
954 Create tag entries for global variables in some languages.");
955 else
956 puts ("--no-globals\n\
957 Do not create tag entries for global variables in some\n\
958 languages. This makes the tags file smaller.");
960 puts ("--no-line-directive\n\
961 Ignore #line preprocessor directives in C and derived languages.");
963 if (CTAGS)
964 puts ("--members\n\
965 Create tag entries for members of structures in some languages.");
966 else
967 puts ("--no-members\n\
968 Do not create tag entries for members of structures\n\
969 in some languages.");
971 puts ("-Q, --class-qualify\n\
972 Qualify tag names with their class name in C++, ObjC, Java, and Perl.\n\
973 This produces tag names of the form \"class::member\" for C++,\n\
974 \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
975 For Objective C, this also produces class methods qualified with\n\
976 their arguments, as in \"foo:bar:baz:more\".\n\
977 For Perl, this produces \"package::member\".");
978 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
979 Make a tag for each line matching a regular expression pattern\n\
980 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
981 files only. REGEXFILE is a file containing one REGEXP per line.\n\
982 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
983 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
984 puts (" If TAGNAME/ is present, the tags created are named.\n\
985 For example Tcl named tags can be created with:\n\
986 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
987 MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
988 'm' means to allow multi-line matches, 's' implies 'm' and\n\
989 causes dot to match any character, including newline.");
991 puts ("-R, --no-regex\n\
992 Don't create tags from regexps for the following files.");
994 puts ("-I, --ignore-indentation\n\
995 In C and C++ do not assume that a closing brace in the first\n\
996 column is the final brace of a function or structure definition.");
998 puts ("-o FILE, --output=FILE\n\
999 Write the tags to FILE.");
1001 puts ("--parse-stdin=NAME\n\
1002 Read from standard input and record tags as belonging to file NAME.");
1004 if (CTAGS)
1006 puts ("-t, --typedefs\n\
1007 Generate tag entries for C and Ada typedefs.");
1008 puts ("-T, --typedefs-and-c++\n\
1009 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1010 and C++ member functions.");
1013 if (CTAGS)
1014 puts ("-u, --update\n\
1015 Update the tag entries for the given files, leaving tag\n\
1016 entries for other files in place. Currently, this is\n\
1017 implemented by deleting the existing entries for the given\n\
1018 files and then rewriting the new entries at the end of the\n\
1019 tags file. It is often faster to simply rebuild the entire\n\
1020 tag file than to use this.");
1022 if (CTAGS)
1024 puts ("-v, --vgrind\n\
1025 Print on the standard output an index of items intended for\n\
1026 human consumption, similar to the output of vgrind. The index\n\
1027 is sorted, and gives the page number of each item.");
1029 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1030 puts ("-w, --no-duplicates\n\
1031 Do not create duplicate tag entries, for compatibility with\n\
1032 traditional ctags.");
1034 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1035 puts ("-w, --no-warn\n\
1036 Suppress warning messages about duplicate tag entries.");
1038 puts ("-x, --cxref\n\
1039 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1040 The output uses line numbers instead of page numbers, but\n\
1041 beyond that the differences are cosmetic; try both to see\n\
1042 which you like.");
1045 puts ("-V, --version\n\
1046 Print the version of the program.\n\
1047 -h, --help\n\
1048 Print this help message.\n\
1049 Followed by one or more '--language' options prints detailed\n\
1050 help about tag generation for the specified languages.");
1052 print_language_names ();
1054 puts ("");
1055 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1057 exit (EXIT_SUCCESS);
1062 main (int argc, char **argv)
1064 int i;
1065 unsigned int nincluded_files;
1066 char **included_files;
1067 argument *argbuffer;
1068 int current_arg, file_count;
1069 linebuffer filename_lb;
1070 bool help_asked = false;
1071 ptrdiff_t len;
1072 char *optstring;
1073 int opt;
1075 progname = argv[0];
1076 nincluded_files = 0;
1077 included_files = xnew (argc, char *);
1078 current_arg = 0;
1079 file_count = 0;
1081 /* Allocate enough no matter what happens. Overkill, but each one
1082 is small. */
1083 argbuffer = xnew (argc, argument);
1086 * Always find typedefs and structure tags.
1087 * Also default to find macro constants, enum constants, struct
1088 * members and global variables. Do it for both etags and ctags.
1090 typedefs = typedefs_or_cplusplus = constantypedefs = true;
1091 globals = members = true;
1093 /* When the optstring begins with a '-' getopt_long does not rearrange the
1094 non-options arguments to be at the end, but leaves them alone. */
1095 optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1096 (CTAGS) ? "BxdtTuvw" : "Di:",
1097 "");
1099 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1100 switch (opt)
1102 case 0:
1103 /* If getopt returns 0, then it has already processed a
1104 long-named option. We should do nothing. */
1105 break;
1107 case 1:
1108 /* This means that a file name has been seen. Record it. */
1109 argbuffer[current_arg].arg_type = at_filename;
1110 argbuffer[current_arg].what = optarg;
1111 len = strlen (optarg);
1112 if (whatlen_max < len)
1113 whatlen_max = len;
1114 ++current_arg;
1115 ++file_count;
1116 break;
1118 case STDIN:
1119 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1120 argbuffer[current_arg].arg_type = at_stdin;
1121 argbuffer[current_arg].what = optarg;
1122 len = strlen (optarg);
1123 if (whatlen_max < len)
1124 whatlen_max = len;
1125 ++current_arg;
1126 ++file_count;
1127 if (parsing_stdin)
1128 fatal ("cannot parse standard input more than once");
1129 parsing_stdin = true;
1130 break;
1132 /* Common options. */
1133 case 'a': append_to_tagfile = true; break;
1134 case 'C': cplusplus = true; break;
1135 case 'f': /* for compatibility with old makefiles */
1136 case 'o':
1137 if (tagfile)
1139 error ("-o option may only be given once.");
1140 suggest_asking_for_help ();
1141 /* NOTREACHED */
1143 tagfile = optarg;
1144 break;
1145 case 'I':
1146 case 'S': /* for backward compatibility */
1147 ignoreindent = true;
1148 break;
1149 case 'l':
1151 language *lang = get_language_from_langname (optarg);
1152 if (lang != NULL)
1154 argbuffer[current_arg].lang = lang;
1155 argbuffer[current_arg].arg_type = at_language;
1156 ++current_arg;
1159 break;
1160 case 'c':
1161 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1162 optarg = concat (optarg, "i", ""); /* memory leak here */
1163 FALLTHROUGH;
1164 case 'r':
1165 argbuffer[current_arg].arg_type = at_regexp;
1166 argbuffer[current_arg].what = optarg;
1167 len = strlen (optarg);
1168 if (whatlen_max < len)
1169 whatlen_max = len;
1170 ++current_arg;
1171 break;
1172 case 'R':
1173 argbuffer[current_arg].arg_type = at_regexp;
1174 argbuffer[current_arg].what = NULL;
1175 ++current_arg;
1176 break;
1177 case 'V':
1178 print_version ();
1179 break;
1180 case 'h':
1181 case 'H':
1182 help_asked = true;
1183 break;
1184 case 'Q':
1185 class_qualify = 1;
1186 break;
1188 /* Etags options */
1189 case 'D': constantypedefs = false; break;
1190 case 'i': included_files[nincluded_files++] = optarg; break;
1192 /* Ctags options. */
1193 case 'B': searchar = '?'; break;
1194 case 'd': constantypedefs = true; break;
1195 case 't': typedefs = true; break;
1196 case 'T': typedefs = typedefs_or_cplusplus = true; break;
1197 case 'u': update = true; break;
1198 case 'v': vgrind_style = true; FALLTHROUGH;
1199 case 'x': cxref_style = true; break;
1200 case 'w': no_warnings = true; break;
1201 default:
1202 suggest_asking_for_help ();
1203 /* NOTREACHED */
1206 /* No more options. Store the rest of arguments. */
1207 for (; optind < argc; optind++)
1209 argbuffer[current_arg].arg_type = at_filename;
1210 argbuffer[current_arg].what = argv[optind];
1211 len = strlen (argv[optind]);
1212 if (whatlen_max < len)
1213 whatlen_max = len;
1214 ++current_arg;
1215 ++file_count;
1218 argbuffer[current_arg].arg_type = at_end;
1220 if (help_asked)
1221 print_help (argbuffer);
1222 /* NOTREACHED */
1224 if (nincluded_files == 0 && file_count == 0)
1226 error ("no input files specified.");
1227 suggest_asking_for_help ();
1228 /* NOTREACHED */
1231 if (tagfile == NULL)
1232 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1233 cwd = etags_getcwd (); /* the current working directory */
1234 if (cwd[strlen (cwd) - 1] != '/')
1236 char *oldcwd = cwd;
1237 cwd = concat (oldcwd, "/", "");
1238 free (oldcwd);
1241 /* Compute base directory for relative file names. */
1242 if (streq (tagfile, "-")
1243 || strneq (tagfile, "/dev/", 5))
1244 tagfiledir = cwd; /* relative file names are relative to cwd */
1245 else
1247 canonicalize_filename (tagfile);
1248 tagfiledir = absolute_dirname (tagfile, cwd);
1251 linebuffer_init (&lb);
1252 linebuffer_init (&filename_lb);
1253 linebuffer_init (&filebuf);
1254 linebuffer_init (&token_name);
1256 if (!CTAGS)
1258 if (streq (tagfile, "-"))
1260 tagf = stdout;
1261 set_binary_mode (STDOUT_FILENO, O_BINARY);
1263 else
1264 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1265 if (tagf == NULL)
1266 pfatal (tagfile);
1270 * Loop through files finding functions.
1272 for (i = 0; i < current_arg; i++)
1274 static language *lang; /* non-NULL if language is forced */
1275 char *this_file;
1277 switch (argbuffer[i].arg_type)
1279 case at_language:
1280 lang = argbuffer[i].lang;
1281 break;
1282 case at_regexp:
1283 analyze_regex (argbuffer[i].what);
1284 break;
1285 case at_filename:
1286 this_file = argbuffer[i].what;
1287 /* Input file named "-" means read file names from stdin
1288 (one per line) and use them. */
1289 if (streq (this_file, "-"))
1291 if (parsing_stdin)
1292 fatal ("cannot parse standard input "
1293 "AND read file names from it");
1294 while (readline_internal (&filename_lb, stdin, "-") > 0)
1295 process_file_name (filename_lb.buffer, lang);
1297 else
1298 process_file_name (this_file, lang);
1299 break;
1300 case at_stdin:
1301 this_file = argbuffer[i].what;
1302 process_file (stdin, this_file, lang);
1303 break;
1304 default:
1305 error ("internal error: arg_type");
1309 free_regexps ();
1310 free (lb.buffer);
1311 free (filebuf.buffer);
1312 free (token_name.buffer);
1314 if (!CTAGS || cxref_style)
1316 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1317 put_entries (nodehead);
1318 free_tree (nodehead);
1319 nodehead = NULL;
1320 if (!CTAGS)
1322 fdesc *fdp;
1324 /* Output file entries that have no tags. */
1325 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1326 if (!fdp->written)
1327 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1329 while (nincluded_files-- > 0)
1330 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1332 if (fclose (tagf) == EOF)
1333 pfatal (tagfile);
1336 return EXIT_SUCCESS;
1339 /* From here on, we are in (CTAGS && !cxref_style) */
1340 if (update)
1342 char *cmd =
1343 xmalloc (strlen (tagfile) + whatlen_max +
1344 sizeof "mv..OTAGS;grep -Fv '\t\t' OTAGS >;rm OTAGS");
1345 for (i = 0; i < current_arg; ++i)
1347 switch (argbuffer[i].arg_type)
1349 case at_filename:
1350 case at_stdin:
1351 break;
1352 default:
1353 continue; /* the for loop */
1355 char *z = stpcpy (cmd, "mv ");
1356 z = stpcpy (z, tagfile);
1357 z = stpcpy (z, " OTAGS;grep -Fv '\t");
1358 z = stpcpy (z, argbuffer[i].what);
1359 z = stpcpy (z, "\t' OTAGS >");
1360 z = stpcpy (z, tagfile);
1361 strcpy (z, ";rm OTAGS");
1362 if (system (cmd) != EXIT_SUCCESS)
1363 fatal ("failed to execute shell command");
1365 free (cmd);
1366 append_to_tagfile = true;
1369 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1370 if (tagf == NULL)
1371 pfatal (tagfile);
1372 put_entries (nodehead); /* write all the tags (CTAGS) */
1373 free_tree (nodehead);
1374 nodehead = NULL;
1375 if (fclose (tagf) == EOF)
1376 pfatal (tagfile);
1378 if (CTAGS)
1379 if (append_to_tagfile || update)
1381 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1382 /* Maybe these should be used:
1383 setenv ("LC_COLLATE", "C", 1);
1384 setenv ("LC_ALL", "C", 1); */
1385 char *z = stpcpy (cmd, "sort -u -o ");
1386 z = stpcpy (z, tagfile);
1387 *z++ = ' ';
1388 strcpy (z, tagfile);
1389 return system (cmd);
1391 return EXIT_SUCCESS;
1396 * Return a compressor given the file name. If EXTPTR is non-zero,
1397 * return a pointer into FILE where the compressor-specific
1398 * extension begins. If no compressor is found, NULL is returned
1399 * and EXTPTR is not significant.
1400 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1402 static compressor *
1403 get_compressor_from_suffix (char *file, char **extptr)
1405 compressor *compr;
1406 char *slash, *suffix;
1408 /* File has been processed by canonicalize_filename,
1409 so we don't need to consider backslashes on DOS_NT. */
1410 slash = strrchr (file, '/');
1411 suffix = strrchr (file, '.');
1412 if (suffix == NULL || suffix < slash)
1413 return NULL;
1414 if (extptr != NULL)
1415 *extptr = suffix;
1416 suffix += 1;
1417 /* Let those poor souls who live with DOS 8+3 file name limits get
1418 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1419 Only the first do loop is run if not MSDOS */
1422 for (compr = compressors; compr->suffix != NULL; compr++)
1423 if (streq (compr->suffix, suffix))
1424 return compr;
1425 if (!MSDOS)
1426 break; /* do it only once: not really a loop */
1427 if (extptr != NULL)
1428 *extptr = ++suffix;
1429 } while (*suffix != '\0');
1430 return NULL;
1436 * Return a language given the name.
1438 static language *
1439 get_language_from_langname (const char *name)
1441 language *lang;
1443 if (name == NULL)
1444 error ("empty language name");
1445 else
1447 for (lang = lang_names; lang->name != NULL; lang++)
1448 if (streq (name, lang->name))
1449 return lang;
1450 error ("unknown language \"%s\"", name);
1453 return NULL;
1458 * Return a language given the interpreter name.
1460 static language *
1461 get_language_from_interpreter (char *interpreter)
1463 language *lang;
1464 const char **iname;
1466 if (interpreter == NULL)
1467 return NULL;
1468 for (lang = lang_names; lang->name != NULL; lang++)
1469 if (lang->interpreters != NULL)
1470 for (iname = lang->interpreters; *iname != NULL; iname++)
1471 if (streq (*iname, interpreter))
1472 return lang;
1474 return NULL;
1480 * Return a language given the file name.
1482 static language *
1483 get_language_from_filename (char *file, int case_sensitive)
1485 language *lang;
1486 const char **name, **ext, *suffix;
1487 char *slash;
1489 /* Try whole file name first. */
1490 slash = strrchr (file, '/');
1491 if (slash != NULL)
1492 file = slash + 1;
1493 #ifdef DOS_NT
1494 else if (file[0] && file[1] == ':')
1495 file += 2;
1496 #endif
1497 for (lang = lang_names; lang->name != NULL; lang++)
1498 if (lang->filenames != NULL)
1499 for (name = lang->filenames; *name != NULL; name++)
1500 if ((case_sensitive)
1501 ? streq (*name, file)
1502 : strcaseeq (*name, file))
1503 return lang;
1505 /* If not found, try suffix after last dot. */
1506 suffix = strrchr (file, '.');
1507 if (suffix == NULL)
1508 return NULL;
1509 suffix += 1;
1510 for (lang = lang_names; lang->name != NULL; lang++)
1511 if (lang->suffixes != NULL)
1512 for (ext = lang->suffixes; *ext != NULL; ext++)
1513 if ((case_sensitive)
1514 ? streq (*ext, suffix)
1515 : strcaseeq (*ext, suffix))
1516 return lang;
1517 return NULL;
1522 * This routine is called on each file argument.
1524 static void
1525 process_file_name (char *file, language *lang)
1527 FILE *inf;
1528 fdesc *fdp;
1529 compressor *compr;
1530 char *compressed_name, *uncompressed_name;
1531 char *ext, *real_name, *tmp_name;
1532 int retval;
1534 canonicalize_filename (file);
1535 if (streq (file, tagfile) && !streq (tagfile, "-"))
1537 error ("skipping inclusion of %s in self.", file);
1538 return;
1540 compr = get_compressor_from_suffix (file, &ext);
1541 if (compr)
1543 compressed_name = file;
1544 uncompressed_name = savenstr (file, ext - file);
1546 else
1548 compressed_name = NULL;
1549 uncompressed_name = file;
1552 /* If the canonicalized uncompressed name
1553 has already been dealt with, skip it silently. */
1554 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1556 assert (fdp->infname != NULL);
1557 if (streq (uncompressed_name, fdp->infname))
1558 goto cleanup;
1561 inf = fopen (file, "r" FOPEN_BINARY);
1562 if (inf)
1563 real_name = file;
1564 else
1566 int file_errno = errno;
1567 if (compressed_name)
1569 /* Try with the given suffix. */
1570 inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1571 if (inf)
1572 real_name = uncompressed_name;
1574 else
1576 /* Try all possible suffixes. */
1577 for (compr = compressors; compr->suffix != NULL; compr++)
1579 compressed_name = concat (file, ".", compr->suffix);
1580 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1581 if (inf)
1583 real_name = compressed_name;
1584 break;
1586 if (MSDOS)
1588 char *suf = compressed_name + strlen (file);
1589 size_t suflen = strlen (compr->suffix) + 1;
1590 for ( ; suf[1]; suf++, suflen--)
1592 memmove (suf, suf + 1, suflen);
1593 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1594 if (inf)
1596 real_name = compressed_name;
1597 break;
1600 if (inf)
1601 break;
1603 free (compressed_name);
1604 compressed_name = NULL;
1607 if (! inf)
1609 errno = file_errno;
1610 perror (file);
1611 goto cleanup;
1615 if (real_name == compressed_name)
1617 fclose (inf);
1618 tmp_name = etags_mktmp ();
1619 if (!tmp_name)
1620 inf = NULL;
1621 else
1623 #if MSDOS || defined (DOS_NT)
1624 char *cmd1 = concat (compr->command, " \"", real_name);
1625 char *cmd = concat (cmd1, "\" > ", tmp_name);
1626 #else
1627 char *cmd1 = concat (compr->command, " '", real_name);
1628 char *cmd = concat (cmd1, "' > ", tmp_name);
1629 #endif
1630 free (cmd1);
1631 int tmp_errno;
1632 if (system (cmd) == -1)
1634 inf = NULL;
1635 tmp_errno = EINVAL;
1637 else
1639 inf = fopen (tmp_name, "r" FOPEN_BINARY);
1640 tmp_errno = errno;
1642 free (cmd);
1643 errno = tmp_errno;
1646 if (!inf)
1648 perror (real_name);
1649 goto cleanup;
1653 process_file (inf, uncompressed_name, lang);
1655 retval = fclose (inf);
1656 if (real_name == compressed_name)
1658 remove (tmp_name);
1659 free (tmp_name);
1661 if (retval < 0)
1662 pfatal (file);
1664 cleanup:
1665 if (compressed_name != file)
1666 free (compressed_name);
1667 if (uncompressed_name != file)
1668 free (uncompressed_name);
1669 last_node = NULL;
1670 curfdp = NULL;
1671 return;
1674 static void
1675 process_file (FILE *fh, char *fn, language *lang)
1677 static const fdesc emptyfdesc;
1678 fdesc *fdp;
1680 infilename = fn;
1681 /* Create a new input file description entry. */
1682 fdp = xnew (1, fdesc);
1683 *fdp = emptyfdesc;
1684 fdp->next = fdhead;
1685 fdp->infname = savestr (fn);
1686 fdp->lang = lang;
1687 fdp->infabsname = absolute_filename (fn, cwd);
1688 fdp->infabsdir = absolute_dirname (fn, cwd);
1689 if (filename_is_absolute (fn))
1691 /* An absolute file name. Canonicalize it. */
1692 fdp->taggedfname = absolute_filename (fn, NULL);
1694 else
1696 /* A file name relative to cwd. Make it relative
1697 to the directory of the tags file. */
1698 fdp->taggedfname = relative_filename (fn, tagfiledir);
1700 fdp->usecharno = true; /* use char position when making tags */
1701 fdp->prop = NULL;
1702 fdp->written = false; /* not written on tags file yet */
1704 fdhead = fdp;
1705 curfdp = fdhead; /* the current file description */
1707 find_entries (fh);
1709 /* If not Ctags, and if this is not metasource and if it contained no #line
1710 directives, we can write the tags and free all nodes pointing to
1711 curfdp. */
1712 if (!CTAGS
1713 && curfdp->usecharno /* no #line directives in this file */
1714 && !curfdp->lang->metasource)
1716 node *np, *prev;
1718 /* Look for the head of the sublist relative to this file. See add_node
1719 for the structure of the node tree. */
1720 prev = NULL;
1721 for (np = nodehead; np != NULL; prev = np, np = np->left)
1722 if (np->fdp == curfdp)
1723 break;
1725 /* If we generated tags for this file, write and delete them. */
1726 if (np != NULL)
1728 /* This is the head of the last sublist, if any. The following
1729 instructions depend on this being true. */
1730 assert (np->left == NULL);
1732 assert (fdhead == curfdp);
1733 assert (last_node->fdp == curfdp);
1734 put_entries (np); /* write tags for file curfdp->taggedfname */
1735 free_tree (np); /* remove the written nodes */
1736 if (prev == NULL)
1737 nodehead = NULL; /* no nodes left */
1738 else
1739 prev->left = NULL; /* delete the pointer to the sublist */
1744 static void
1745 reset_input (FILE *inf)
1747 if (fseek (inf, 0, SEEK_SET) != 0)
1748 perror (infilename);
1752 * This routine opens the specified file and calls the function
1753 * which finds the function and type definitions.
1755 static void
1756 find_entries (FILE *inf)
1758 char *cp;
1759 language *lang = curfdp->lang;
1760 Lang_function *parser = NULL;
1762 /* If user specified a language, use it. */
1763 if (lang != NULL && lang->function != NULL)
1765 parser = lang->function;
1768 /* Else try to guess the language given the file name. */
1769 if (parser == NULL)
1771 lang = get_language_from_filename (curfdp->infname, true);
1772 if (lang != NULL && lang->function != NULL)
1774 curfdp->lang = lang;
1775 parser = lang->function;
1779 /* Else look for sharp-bang as the first two characters. */
1780 if (parser == NULL
1781 && readline_internal (&lb, inf, infilename) > 0
1782 && lb.len >= 2
1783 && lb.buffer[0] == '#'
1784 && lb.buffer[1] == '!')
1786 char *lp;
1788 /* Set lp to point at the first char after the last slash in the
1789 line or, if no slashes, at the first nonblank. Then set cp to
1790 the first successive blank and terminate the string. */
1791 lp = strrchr (lb.buffer+2, '/');
1792 if (lp != NULL)
1793 lp += 1;
1794 else
1795 lp = skip_spaces (lb.buffer + 2);
1796 cp = skip_non_spaces (lp);
1797 *cp = '\0';
1799 if (strlen (lp) > 0)
1801 lang = get_language_from_interpreter (lp);
1802 if (lang != NULL && lang->function != NULL)
1804 curfdp->lang = lang;
1805 parser = lang->function;
1810 reset_input (inf);
1812 /* Else try to guess the language given the case insensitive file name. */
1813 if (parser == NULL)
1815 lang = get_language_from_filename (curfdp->infname, false);
1816 if (lang != NULL && lang->function != NULL)
1818 curfdp->lang = lang;
1819 parser = lang->function;
1823 /* Else try Fortran or C. */
1824 if (parser == NULL)
1826 node *old_last_node = last_node;
1828 curfdp->lang = get_language_from_langname ("fortran");
1829 find_entries (inf);
1831 if (old_last_node == last_node)
1832 /* No Fortran entries found. Try C. */
1834 reset_input (inf);
1835 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1836 find_entries (inf);
1838 return;
1841 if (!no_line_directive
1842 && curfdp->lang != NULL && curfdp->lang->metasource)
1843 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1844 file, or anyway we parsed a file that is automatically generated from
1845 this one. If this is the case, the bingo.c file contained #line
1846 directives that generated tags pointing to this file. Let's delete
1847 them all before parsing this file, which is the real source. */
1849 fdesc **fdpp = &fdhead;
1850 while (*fdpp != NULL)
1851 if (*fdpp != curfdp
1852 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1853 /* We found one of those! We must delete both the file description
1854 and all tags referring to it. */
1856 fdesc *badfdp = *fdpp;
1858 /* Delete the tags referring to badfdp->taggedfname
1859 that were obtained from badfdp->infname. */
1860 invalidate_nodes (badfdp, &nodehead);
1862 *fdpp = badfdp->next; /* remove the bad description from the list */
1863 free_fdesc (badfdp);
1865 else
1866 fdpp = &(*fdpp)->next; /* advance the list pointer */
1869 assert (parser != NULL);
1871 /* Generic initializations before reading from file. */
1872 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1874 /* Generic initializations before parsing file with readline. */
1875 lineno = 0; /* reset global line number */
1876 charno = 0; /* reset global char number */
1877 linecharno = 0; /* reset global char number of line start */
1879 parser (inf);
1881 regex_tag_multiline ();
1886 * Check whether an implicitly named tag should be created,
1887 * then call `pfnote'.
1888 * NAME is a string that is internally copied by this function.
1890 * TAGS format specification
1891 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1892 * The following is explained in some more detail in etc/ETAGS.EBNF.
1894 * make_tag creates tags with "implicit tag names" (unnamed tags)
1895 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1896 * 1. NAME does not contain any of the characters in NONAM;
1897 * 2. LINESTART contains name as either a rightmost, or rightmost but
1898 * one character, substring;
1899 * 3. the character, if any, immediately before NAME in LINESTART must
1900 * be a character in NONAM;
1901 * 4. the character, if any, immediately after NAME in LINESTART must
1902 * also be a character in NONAM.
1904 * The implementation uses the notinname() macro, which recognizes the
1905 * characters stored in the string `nonam'.
1906 * etags.el needs to use the same characters that are in NONAM.
1908 static void
1909 make_tag (const char *name, /* tag name, or NULL if unnamed */
1910 int namelen, /* tag length */
1911 bool is_func, /* tag is a function */
1912 char *linestart, /* start of the line where tag is */
1913 int linelen, /* length of the line where tag is */
1914 int lno, /* line number */
1915 long int cno) /* character number */
1917 bool named = (name != NULL && namelen > 0);
1918 char *nname = NULL;
1920 if (!CTAGS && named) /* maybe set named to false */
1921 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1922 such that etags.el can guess a name from it. */
1924 int i;
1925 register const char *cp = name;
1927 for (i = 0; i < namelen; i++)
1928 if (notinname (*cp++))
1929 break;
1930 if (i == namelen) /* rule #1 */
1932 cp = linestart + linelen - namelen;
1933 if (notinname (linestart[linelen-1]))
1934 cp -= 1; /* rule #4 */
1935 if (cp >= linestart /* rule #2 */
1936 && (cp == linestart
1937 || notinname (cp[-1])) /* rule #3 */
1938 && strneq (name, cp, namelen)) /* rule #2 */
1939 named = false; /* use implicit tag name */
1943 if (named)
1944 nname = savenstr (name, namelen);
1946 pfnote (nname, is_func, linestart, linelen, lno, cno);
1949 /* Record a tag. */
1950 static void
1951 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1952 long int cno)
1953 /* tag name, or NULL if unnamed */
1954 /* tag is a function */
1955 /* start of the line where tag is */
1956 /* length of the line where tag is */
1957 /* line number */
1958 /* character number */
1960 register node *np;
1962 assert (name == NULL || name[0] != '\0');
1963 if (CTAGS && name == NULL)
1964 return;
1966 np = xnew (1, node);
1968 /* If ctags mode, change name "main" to M<thisfilename>. */
1969 if (CTAGS && !cxref_style && streq (name, "main"))
1971 char *fp = strrchr (curfdp->taggedfname, '/');
1972 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1973 fp = strrchr (np->name, '.');
1974 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1975 fp[0] = '\0';
1977 else
1978 np->name = name;
1979 np->valid = true;
1980 np->been_warned = false;
1981 np->fdp = curfdp;
1982 np->is_func = is_func;
1983 np->lno = lno;
1984 if (np->fdp->usecharno)
1985 /* Our char numbers are 0-base, because of C language tradition?
1986 ctags compatibility? old versions compatibility? I don't know.
1987 Anyway, since emacs's are 1-base we expect etags.el to take care
1988 of the difference. If we wanted to have 1-based numbers, we would
1989 uncomment the +1 below. */
1990 np->cno = cno /* + 1 */ ;
1991 else
1992 np->cno = invalidcharno;
1993 np->left = np->right = NULL;
1994 if (CTAGS && !cxref_style)
1996 if (strlen (linestart) < 50)
1997 np->regex = concat (linestart, "$", "");
1998 else
1999 np->regex = savenstr (linestart, 50);
2001 else
2002 np->regex = savenstr (linestart, linelen);
2004 add_node (np, &nodehead);
2008 * Utility functions and data to avoid recursion.
2011 typedef struct stack_entry {
2012 node *np;
2013 struct stack_entry *next;
2014 } stkentry;
2016 static void
2017 push_node (node *np, stkentry **stack_top)
2019 if (np)
2021 stkentry *new = xnew (1, stkentry);
2023 new->np = np;
2024 new->next = *stack_top;
2025 *stack_top = new;
2029 static node *
2030 pop_node (stkentry **stack_top)
2032 node *ret = NULL;
2034 if (*stack_top)
2036 stkentry *old_start = *stack_top;
2038 ret = (*stack_top)->np;
2039 *stack_top = (*stack_top)->next;
2040 free (old_start);
2042 return ret;
2046 * free_tree ()
2047 * emulate recursion on left children, iterate on right children.
2049 static void
2050 free_tree (register node *np)
2052 stkentry *stack = NULL;
2054 while (np)
2056 /* Descent on left children. */
2057 while (np->left)
2059 push_node (np, &stack);
2060 np = np->left;
2062 /* Free node without left children. */
2063 node *node_right = np->right;
2064 free (np->name);
2065 free (np->regex);
2066 free (np);
2067 if (!node_right)
2069 /* Backtrack to find a node with right children, while freeing nodes
2070 that don't have right children. */
2071 while (node_right == NULL && (np = pop_node (&stack)) != NULL)
2073 node_right = np->right;
2074 free (np->name);
2075 free (np->regex);
2076 free (np);
2079 /* Free right children. */
2080 np = node_right;
2085 * free_fdesc ()
2086 * delete a file description
2088 static void
2089 free_fdesc (register fdesc *fdp)
2091 free (fdp->infname);
2092 free (fdp->infabsname);
2093 free (fdp->infabsdir);
2094 free (fdp->taggedfname);
2095 free (fdp->prop);
2096 free (fdp);
2100 * add_node ()
2101 * Adds a node to the tree of nodes. In etags mode, sort by file
2102 * name. In ctags mode, sort by tag name. Make no attempt at
2103 * balancing.
2105 * add_node is the only function allowed to add nodes, so it can
2106 * maintain state.
2108 static void
2109 add_node (node *np, node **cur_node_p)
2111 node *cur_node = *cur_node_p;
2113 /* Make the first node. */
2114 if (cur_node == NULL)
2116 *cur_node_p = np;
2117 last_node = np;
2118 return;
2121 if (!CTAGS)
2122 /* Etags Mode */
2124 /* For each file name, tags are in a linked sublist on the right
2125 pointer. The first tags of different files are a linked list
2126 on the left pointer. last_node points to the end of the last
2127 used sublist. */
2128 if (last_node != NULL && last_node->fdp == np->fdp)
2130 /* Let's use the same sublist as the last added node. */
2131 assert (last_node->right == NULL);
2132 last_node->right = np;
2133 last_node = np;
2135 else
2137 while (cur_node->fdp != np->fdp)
2139 if (cur_node->left == NULL)
2140 break;
2141 /* The head of this sublist is not good for us. Let's try the
2142 next one. */
2143 cur_node = cur_node->left;
2145 if (cur_node->left)
2147 /* Scanning the list we found the head of a sublist which is
2148 good for us. Let's scan this sublist. */
2149 if (cur_node->right)
2151 cur_node = cur_node->right;
2152 while (cur_node->right)
2153 cur_node = cur_node->right;
2155 /* Make a new node in this sublist. */
2156 cur_node->right = np;
2158 else
2160 /* Make a new sublist. */
2161 cur_node->left = np;
2163 last_node = np;
2165 } /* if ETAGS mode */
2166 else
2168 /* Ctags Mode */
2169 node **next_node = &cur_node;
2171 while ((cur_node = *next_node) != NULL)
2173 int dif = strcmp (np->name, cur_node->name);
2175 * If this tag name matches an existing one, then
2176 * do not add the node, but maybe print a warning.
2178 if (!dif && no_duplicates)
2180 if (np->fdp == cur_node->fdp)
2182 if (!no_warnings)
2184 fprintf (stderr,
2185 "Duplicate entry in file %s, line %d: %s\n",
2186 np->fdp->infname, lineno, np->name);
2187 fprintf (stderr, "Second entry ignored\n");
2190 else if (!cur_node->been_warned && !no_warnings)
2192 fprintf
2193 (stderr,
2194 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2195 np->fdp->infname, cur_node->fdp->infname, np->name);
2196 cur_node->been_warned = true;
2198 return;
2200 else
2201 next_node = dif < 0 ? &cur_node->left : &cur_node->right;
2203 *next_node = np;
2204 last_node = np;
2205 } /* if CTAGS mode */
2209 * invalidate_nodes ()
2210 * Scan the node tree and invalidate all nodes pointing to the
2211 * given file description (CTAGS case) or free them (ETAGS case).
2213 static void
2214 invalidate_nodes (fdesc *badfdp, node **npp)
2216 node *np = *npp;
2217 stkentry *stack = NULL;
2219 if (CTAGS)
2221 while (np)
2223 /* Push all the left children on the stack. */
2224 while (np->left != NULL)
2226 push_node (np, &stack);
2227 np = np->left;
2229 /* Invalidate this node. */
2230 if (np->fdp == badfdp)
2231 np->valid = false;
2232 if (!np->right)
2234 /* Pop nodes from stack, invalidating them, until we find one
2235 with a right child. */
2236 while ((np = pop_node (&stack)) != NULL)
2238 if (np->fdp == badfdp)
2239 np->valid = false;
2240 if (np->right != NULL)
2241 break;
2244 /* Process the right child, if any. */
2245 if (np)
2246 np = np->right;
2249 else
2251 node super_root, *np_parent = NULL;
2253 super_root.left = np;
2254 super_root.fdp = (fdesc *) -1;
2255 np = &super_root;
2257 while (np)
2259 /* Descent on left children until node with BADFP. */
2260 while (np && np->fdp != badfdp)
2262 assert (np->fdp != NULL);
2263 np_parent = np;
2264 np = np->left;
2266 if (np)
2268 np_parent->left = np->left; /* detach subtree from the tree */
2269 np->left = NULL; /* isolate it */
2270 free_tree (np); /* free it */
2272 /* Continue with rest of tree. */
2273 np = np_parent->left;
2276 *npp = super_root.left;
2281 static int total_size_of_entries (node *);
2282 static int number_len (long) ATTRIBUTE_CONST;
2284 /* Length of a non-negative number's decimal representation. */
2285 static int
2286 number_len (long int num)
2288 int len = 1;
2289 while ((num /= 10) > 0)
2290 len += 1;
2291 return len;
2295 * Return total number of characters that put_entries will output for
2296 * the nodes in the linked list at the right of the specified node.
2297 * This count is irrelevant with etags.el since emacs 19.34 at least,
2298 * but is still supplied for backward compatibility.
2300 static int
2301 total_size_of_entries (register node *np)
2303 register int total = 0;
2305 for (; np != NULL; np = np->right)
2306 if (np->valid)
2308 total += strlen (np->regex) + 1; /* pat\177 */
2309 if (np->name != NULL)
2310 total += strlen (np->name) + 1; /* name\001 */
2311 total += number_len ((long) np->lno) + 1; /* lno, */
2312 if (np->cno != invalidcharno) /* cno */
2313 total += number_len (np->cno);
2314 total += 1; /* newline */
2317 return total;
2320 static void
2321 put_entry (node *np)
2323 register char *sp;
2324 static fdesc *fdp = NULL;
2326 /* Output this entry */
2327 if (np->valid)
2329 if (!CTAGS)
2331 /* Etags mode */
2332 if (fdp != np->fdp)
2334 fdp = np->fdp;
2335 fprintf (tagf, "\f\n%s,%d\n",
2336 fdp->taggedfname, total_size_of_entries (np));
2337 fdp->written = true;
2339 fputs (np->regex, tagf);
2340 fputc ('\177', tagf);
2341 if (np->name != NULL)
2343 fputs (np->name, tagf);
2344 fputc ('\001', tagf);
2346 fprintf (tagf, "%d,", np->lno);
2347 if (np->cno != invalidcharno)
2348 fprintf (tagf, "%ld", np->cno);
2349 fputs ("\n", tagf);
2351 else
2353 /* Ctags mode */
2354 if (np->name == NULL)
2355 error ("internal error: NULL name in ctags mode.");
2357 if (cxref_style)
2359 if (vgrind_style)
2360 fprintf (stdout, "%s %s %d\n",
2361 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2362 else
2363 fprintf (stdout, "%-16s %3d %-16s %s\n",
2364 np->name, np->lno, np->fdp->taggedfname, np->regex);
2366 else
2368 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2370 if (np->is_func)
2371 { /* function or #define macro with args */
2372 putc (searchar, tagf);
2373 putc ('^', tagf);
2375 for (sp = np->regex; *sp; sp++)
2377 if (*sp == '\\' || *sp == searchar)
2378 putc ('\\', tagf);
2379 putc (*sp, tagf);
2381 putc (searchar, tagf);
2383 else
2384 { /* anything else; text pattern inadequate */
2385 fprintf (tagf, "%d", np->lno);
2387 putc ('\n', tagf);
2390 } /* if this node contains a valid tag */
2393 static void
2394 put_entries (node *np)
2396 stkentry *stack = NULL;
2398 if (np == NULL)
2399 return;
2401 if (CTAGS)
2403 while (np)
2405 /* Stack subentries that precede this one. */
2406 while (np->left)
2408 push_node (np, &stack);
2409 np = np->left;
2411 /* Output this subentry. */
2412 put_entry (np);
2413 /* Stack subentries that follow this one. */
2414 while (!np->right)
2416 /* Output subentries that precede the next one. */
2417 np = pop_node (&stack);
2418 if (!np)
2419 break;
2420 put_entry (np);
2422 if (np)
2423 np = np->right;
2426 else
2428 push_node (np, &stack);
2429 while ((np = pop_node (&stack)) != NULL)
2431 /* Output this subentry. */
2432 put_entry (np);
2433 while (np->right)
2435 /* Output subentries that follow this one. */
2436 put_entry (np->right);
2437 /* Stack subentries from the following files. */
2438 push_node (np->left, &stack);
2439 np = np->right;
2441 push_node (np->left, &stack);
2447 /* C extensions. */
2448 #define C_EXT 0x00fff /* C extensions */
2449 #define C_PLAIN 0x00000 /* C */
2450 #define C_PLPL 0x00001 /* C++ */
2451 #define C_STAR 0x00003 /* C* */
2452 #define C_JAVA 0x00005 /* JAVA */
2453 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2454 #define YACC 0x10000 /* yacc file */
2457 * The C symbol tables.
2459 enum sym_type
2461 st_none,
2462 st_C_objprot, st_C_objimpl, st_C_objend,
2463 st_C_gnumacro,
2464 st_C_ignore, st_C_attribute, st_C_enum_bf,
2465 st_C_javastruct,
2466 st_C_operator,
2467 st_C_class, st_C_template,
2468 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2471 /* Feed stuff between (but not including) %[ and %] lines to:
2472 gperf -m 5
2474 %compare-strncmp
2475 %enum
2476 %struct-type
2477 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2479 if, 0, st_C_ignore
2480 for, 0, st_C_ignore
2481 while, 0, st_C_ignore
2482 switch, 0, st_C_ignore
2483 return, 0, st_C_ignore
2484 __attribute__, 0, st_C_attribute
2485 GTY, 0, st_C_attribute
2486 @interface, 0, st_C_objprot
2487 @protocol, 0, st_C_objprot
2488 @implementation,0, st_C_objimpl
2489 @end, 0, st_C_objend
2490 import, (C_JAVA & ~C_PLPL), st_C_ignore
2491 package, (C_JAVA & ~C_PLPL), st_C_ignore
2492 friend, C_PLPL, st_C_ignore
2493 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2494 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2495 interface, (C_JAVA & ~C_PLPL), st_C_struct
2496 class, 0, st_C_class
2497 namespace, C_PLPL, st_C_struct
2498 domain, C_STAR, st_C_struct
2499 union, 0, st_C_struct
2500 struct, 0, st_C_struct
2501 extern, 0, st_C_extern
2502 enum, 0, st_C_enum
2503 typedef, 0, st_C_typedef
2504 define, 0, st_C_define
2505 undef, 0, st_C_define
2506 operator, C_PLPL, st_C_operator
2507 template, 0, st_C_template
2508 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2509 DEFUN, 0, st_C_gnumacro
2510 SYSCALL, 0, st_C_gnumacro
2511 ENTRY, 0, st_C_gnumacro
2512 PSEUDO, 0, st_C_gnumacro
2513 ENUM_BF, 0, st_C_enum_bf
2514 # These are defined inside C functions, so currently they are not met.
2515 # EXFUN used in glibc, DEFVAR_* in emacs.
2516 #EXFUN, 0, st_C_gnumacro
2517 #DEFVAR_, 0, st_C_gnumacro
2519 and replace lines between %< and %> with its output, then:
2520 - remove the #if characterset check
2521 - remove any #line directives
2522 - make in_word_set static and not inline
2523 - remove any 'register' qualifications from variable decls. */
2524 /*%<*/
2525 /* C code produced by gperf version 3.0.1 */
2526 /* Command-line: gperf -m 5 */
2527 /* Computed positions: -k'2-3' */
2529 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2530 /* maximum key range = 34, duplicates = 0 */
2532 static int
2533 hash (const char *str, int len)
2535 static char const asso_values[] =
2537 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2538 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2539 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2540 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2541 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2542 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2543 36, 36, 36, 36, 36, 36, 36, 36, 36, 3,
2544 27, 36, 36, 36, 36, 36, 36, 36, 26, 36,
2545 36, 36, 36, 25, 0, 0, 36, 36, 36, 0,
2546 36, 36, 36, 36, 36, 1, 36, 16, 36, 6,
2547 23, 0, 0, 36, 22, 0, 36, 36, 5, 0,
2548 0, 15, 1, 36, 6, 36, 8, 19, 36, 16,
2549 4, 5, 36, 36, 36, 36, 36, 36, 36, 36,
2550 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2551 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2552 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2553 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2554 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2555 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2556 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2557 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2558 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2559 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2560 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2561 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2562 36, 36, 36, 36, 36, 36
2564 int hval = len;
2566 switch (hval)
2568 default:
2569 hval += asso_values[(unsigned char) str[2]];
2570 FALLTHROUGH;
2571 case 2:
2572 hval += asso_values[(unsigned char) str[1]];
2573 break;
2575 return hval;
2578 static struct C_stab_entry *
2579 in_word_set (register const char *str, register unsigned int len)
2581 enum
2583 TOTAL_KEYWORDS = 34,
2584 MIN_WORD_LENGTH = 2,
2585 MAX_WORD_LENGTH = 15,
2586 MIN_HASH_VALUE = 2,
2587 MAX_HASH_VALUE = 35
2590 static struct C_stab_entry wordlist[] =
2592 {""}, {""},
2593 {"if", 0, st_C_ignore},
2594 {"GTY", 0, st_C_attribute},
2595 {"@end", 0, st_C_objend},
2596 {"union", 0, st_C_struct},
2597 {"define", 0, st_C_define},
2598 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2599 {"template", 0, st_C_template},
2600 {"operator", C_PLPL, st_C_operator},
2601 {"@interface", 0, st_C_objprot},
2602 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2603 {"friend", C_PLPL, st_C_ignore},
2604 {"typedef", 0, st_C_typedef},
2605 {"return", 0, st_C_ignore},
2606 {"@implementation",0, st_C_objimpl},
2607 {"@protocol", 0, st_C_objprot},
2608 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2609 {"extern", 0, st_C_extern},
2610 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2611 {"struct", 0, st_C_struct},
2612 {"domain", C_STAR, st_C_struct},
2613 {"switch", 0, st_C_ignore},
2614 {"enum", 0, st_C_enum},
2615 {"for", 0, st_C_ignore},
2616 {"namespace", C_PLPL, st_C_struct},
2617 {"class", 0, st_C_class},
2618 {"while", 0, st_C_ignore},
2619 {"undef", 0, st_C_define},
2620 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2621 {"__attribute__", 0, st_C_attribute},
2622 {"ENTRY", 0, st_C_gnumacro},
2623 {"SYSCALL", 0, st_C_gnumacro},
2624 {"ENUM_BF", 0, st_C_enum_bf},
2625 {"PSEUDO", 0, st_C_gnumacro},
2626 {"DEFUN", 0, st_C_gnumacro}
2629 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2631 int key = hash (str, len);
2633 if (key <= MAX_HASH_VALUE && key >= 0)
2635 const char *s = wordlist[key].name;
2637 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2638 return &wordlist[key];
2641 return 0;
2643 /*%>*/
2645 static enum sym_type
2646 C_symtype (char *str, int len, int c_ext)
2648 register struct C_stab_entry *se = in_word_set (str, len);
2650 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2651 return st_none;
2652 return se->type;
2657 * Ignoring __attribute__ ((list))
2659 static bool inattribute; /* looking at an __attribute__ construct */
2661 /* Ignoring ENUM_BF (type)
2664 static bool in_enum_bf; /* inside parentheses following ENUM_BF */
2667 * C functions and variables are recognized using a simple
2668 * finite automaton. fvdef is its state variable.
2670 static enum
2672 fvnone, /* nothing seen */
2673 fdefunkey, /* Emacs DEFUN keyword seen */
2674 fdefunname, /* Emacs DEFUN name seen */
2675 foperator, /* func: operator keyword seen (cplpl) */
2676 fvnameseen, /* function or variable name seen */
2677 fstartlist, /* func: just after open parenthesis */
2678 finlist, /* func: in parameter list */
2679 flistseen, /* func: after parameter list */
2680 fignore, /* func: before open brace */
2681 vignore /* var-like: ignore until ';' */
2682 } fvdef;
2684 static bool fvextern; /* func or var: extern keyword seen; */
2687 * typedefs are recognized using a simple finite automaton.
2688 * typdef is its state variable.
2690 static enum
2692 tnone, /* nothing seen */
2693 tkeyseen, /* typedef keyword seen */
2694 ttypeseen, /* defined type seen */
2695 tinbody, /* inside typedef body */
2696 tend, /* just before typedef tag */
2697 tignore /* junk after typedef tag */
2698 } typdef;
2701 * struct-like structures (enum, struct and union) are recognized
2702 * using another simple finite automaton. `structdef' is its state
2703 * variable.
2705 static enum
2707 snone, /* nothing seen yet,
2708 or in struct body if bracelev > 0 */
2709 skeyseen, /* struct-like keyword seen */
2710 stagseen, /* struct-like tag seen */
2711 scolonseen /* colon seen after struct-like tag */
2712 } structdef;
2715 * When objdef is different from onone, objtag is the name of the class.
2717 static const char *objtag = "<uninited>";
2720 * Yet another little state machine to deal with preprocessor lines.
2722 static enum
2724 dnone, /* nothing seen */
2725 dsharpseen, /* '#' seen as first char on line */
2726 ddefineseen, /* '#' and 'define' seen */
2727 dignorerest /* ignore rest of line */
2728 } definedef;
2731 * State machine for Objective C protocols and implementations.
2732 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2734 static enum
2736 onone, /* nothing seen */
2737 oprotocol, /* @interface or @protocol seen */
2738 oimplementation, /* @implementations seen */
2739 otagseen, /* class name seen */
2740 oparenseen, /* parenthesis before category seen */
2741 ocatseen, /* category name seen */
2742 oinbody, /* in @implementation body */
2743 omethodsign, /* in @implementation body, after +/- */
2744 omethodtag, /* after method name */
2745 omethodcolon, /* after method colon */
2746 omethodparm, /* after method parameter */
2747 oignore /* wait for @end */
2748 } objdef;
2752 * Use this structure to keep info about the token read, and how it
2753 * should be tagged. Used by the make_C_tag function to build a tag.
2755 static struct tok
2757 char *line; /* string containing the token */
2758 int offset; /* where the token starts in LINE */
2759 int length; /* token length */
2761 The previous members can be used to pass strings around for generic
2762 purposes. The following ones specifically refer to creating tags. In this
2763 case the token contained here is the pattern that will be used to create a
2764 tag.
2766 bool valid; /* do not create a tag; the token should be
2767 invalidated whenever a state machine is
2768 reset prematurely */
2769 bool named; /* create a named tag */
2770 int lineno; /* source line number of tag */
2771 long linepos; /* source char number of tag */
2772 } token; /* latest token read */
2775 * Variables and functions for dealing with nested structures.
2776 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2778 static void pushclass_above (int, char *, int);
2779 static void popclass_above (int);
2780 static void write_classname (linebuffer *, const char *qualifier);
2782 static struct {
2783 char **cname; /* nested class names */
2784 int *bracelev; /* nested class brace level */
2785 int nl; /* class nesting level (elements used) */
2786 int size; /* length of the array */
2787 } cstack; /* stack for nested declaration tags */
2788 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2789 #define nestlev (cstack.nl)
2790 /* After struct keyword or in struct body, not inside a nested function. */
2791 #define instruct (structdef == snone && nestlev > 0 \
2792 && bracelev == cstack.bracelev[nestlev-1] + 1)
2794 static void
2795 pushclass_above (int bracelev, char *str, int len)
2797 int nl;
2799 popclass_above (bracelev);
2800 nl = cstack.nl;
2801 if (nl >= cstack.size)
2803 int size = cstack.size *= 2;
2804 xrnew (cstack.cname, size, char *);
2805 xrnew (cstack.bracelev, size, int);
2807 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2808 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2809 cstack.bracelev[nl] = bracelev;
2810 cstack.nl = nl + 1;
2813 static void
2814 popclass_above (int bracelev)
2816 int nl;
2818 for (nl = cstack.nl - 1;
2819 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2820 nl--)
2822 free (cstack.cname[nl]);
2823 cstack.nl = nl;
2827 static void
2828 write_classname (linebuffer *cn, const char *qualifier)
2830 int i, len;
2831 int qlen = strlen (qualifier);
2833 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2835 len = 0;
2836 cn->len = 0;
2837 cn->buffer[0] = '\0';
2839 else
2841 len = strlen (cstack.cname[0]);
2842 linebuffer_setlen (cn, len);
2843 strcpy (cn->buffer, cstack.cname[0]);
2845 for (i = 1; i < cstack.nl; i++)
2847 char *s = cstack.cname[i];
2848 if (s == NULL)
2849 continue;
2850 linebuffer_setlen (cn, len + qlen + strlen (s));
2851 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2856 static bool consider_token (char *, int, int, int *, int, int, bool *);
2857 static void make_C_tag (bool);
2860 * consider_token ()
2861 * checks to see if the current token is at the start of a
2862 * function or variable, or corresponds to a typedef, or
2863 * is a struct/union/enum tag, or #define, or an enum constant.
2865 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2866 * with args. C_EXTP points to which language we are looking at.
2868 * Globals
2869 * fvdef IN OUT
2870 * structdef IN OUT
2871 * definedef IN OUT
2872 * typdef IN OUT
2873 * objdef IN OUT
2876 static bool
2877 consider_token (char *str, int len, int c, int *c_extp,
2878 int bracelev, int parlev, bool *is_func_or_var)
2879 /* IN: token pointer */
2880 /* IN: token length */
2881 /* IN: first char after the token */
2882 /* IN, OUT: C extensions mask */
2883 /* IN: brace level */
2884 /* IN: parenthesis level */
2885 /* OUT: function or variable found */
2887 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2888 structtype is the type of the preceding struct-like keyword, and
2889 structbracelev is the brace level where it has been seen. */
2890 static enum sym_type structtype;
2891 static int structbracelev;
2892 static enum sym_type toktype;
2895 toktype = C_symtype (str, len, *c_extp);
2898 * Skip __attribute__
2900 if (toktype == st_C_attribute)
2902 inattribute = true;
2903 return false;
2907 * Skip ENUM_BF
2909 if (toktype == st_C_enum_bf && definedef == dnone)
2911 in_enum_bf = true;
2912 return false;
2916 * Advance the definedef state machine.
2918 switch (definedef)
2920 case dnone:
2921 /* We're not on a preprocessor line. */
2922 if (toktype == st_C_gnumacro)
2924 fvdef = fdefunkey;
2925 return false;
2927 break;
2928 case dsharpseen:
2929 if (toktype == st_C_define)
2931 definedef = ddefineseen;
2933 else
2935 definedef = dignorerest;
2937 return false;
2938 case ddefineseen:
2940 * Make a tag for any macro, unless it is a constant
2941 * and constantypedefs is false.
2943 definedef = dignorerest;
2944 *is_func_or_var = (c == '(');
2945 if (!*is_func_or_var && !constantypedefs)
2946 return false;
2947 else
2948 return true;
2949 case dignorerest:
2950 return false;
2951 default:
2952 error ("internal error: definedef value.");
2956 * Now typedefs
2958 switch (typdef)
2960 case tnone:
2961 if (toktype == st_C_typedef)
2963 if (typedefs)
2964 typdef = tkeyseen;
2965 fvextern = false;
2966 fvdef = fvnone;
2967 return false;
2969 break;
2970 case tkeyseen:
2971 switch (toktype)
2973 case st_none:
2974 case st_C_class:
2975 case st_C_struct:
2976 case st_C_enum:
2977 typdef = ttypeseen;
2978 break;
2979 default:
2980 break;
2982 break;
2983 case ttypeseen:
2984 if (structdef == snone && fvdef == fvnone)
2986 fvdef = fvnameseen;
2987 return true;
2989 break;
2990 case tend:
2991 switch (toktype)
2993 case st_C_class:
2994 case st_C_struct:
2995 case st_C_enum:
2996 return false;
2997 default:
2998 return true;
3000 default:
3001 break;
3004 switch (toktype)
3006 case st_C_javastruct:
3007 if (structdef == stagseen)
3008 structdef = scolonseen;
3009 return false;
3010 case st_C_template:
3011 case st_C_class:
3012 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
3013 && bracelev == 0
3014 && definedef == dnone && structdef == snone
3015 && typdef == tnone && fvdef == fvnone)
3016 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3017 if (toktype == st_C_template)
3018 break;
3019 FALLTHROUGH;
3020 case st_C_struct:
3021 case st_C_enum:
3022 if (parlev == 0
3023 && fvdef != vignore
3024 && (typdef == tkeyseen
3025 || (typedefs_or_cplusplus && structdef == snone)))
3027 structdef = skeyseen;
3028 structtype = toktype;
3029 structbracelev = bracelev;
3030 if (fvdef == fvnameseen)
3031 fvdef = fvnone;
3033 return false;
3034 default:
3035 break;
3038 if (structdef == skeyseen)
3040 structdef = stagseen;
3041 return true;
3044 if (typdef != tnone)
3045 definedef = dnone;
3047 /* Detect Objective C constructs. */
3048 switch (objdef)
3050 case onone:
3051 switch (toktype)
3053 case st_C_objprot:
3054 objdef = oprotocol;
3055 return false;
3056 case st_C_objimpl:
3057 objdef = oimplementation;
3058 return false;
3059 default:
3060 break;
3062 break;
3063 case oimplementation:
3064 /* Save the class tag for functions or variables defined inside. */
3065 objtag = savenstr (str, len);
3066 objdef = oinbody;
3067 return false;
3068 case oprotocol:
3069 /* Save the class tag for categories. */
3070 objtag = savenstr (str, len);
3071 objdef = otagseen;
3072 *is_func_or_var = true;
3073 return true;
3074 case oparenseen:
3075 objdef = ocatseen;
3076 *is_func_or_var = true;
3077 return true;
3078 case oinbody:
3079 break;
3080 case omethodsign:
3081 if (parlev == 0)
3083 fvdef = fvnone;
3084 objdef = omethodtag;
3085 linebuffer_setlen (&token_name, len);
3086 memcpy (token_name.buffer, str, len);
3087 token_name.buffer[len] = '\0';
3088 return true;
3090 return false;
3091 case omethodcolon:
3092 if (parlev == 0)
3093 objdef = omethodparm;
3094 return false;
3095 case omethodparm:
3096 if (parlev == 0)
3098 objdef = omethodtag;
3099 if (class_qualify)
3101 int oldlen = token_name.len;
3102 fvdef = fvnone;
3103 linebuffer_setlen (&token_name, oldlen + len);
3104 memcpy (token_name.buffer + oldlen, str, len);
3105 token_name.buffer[oldlen + len] = '\0';
3107 return true;
3109 return false;
3110 case oignore:
3111 if (toktype == st_C_objend)
3113 /* Memory leakage here: the string pointed by objtag is
3114 never released, because many tests would be needed to
3115 avoid breaking on incorrect input code. The amount of
3116 memory leaked here is the sum of the lengths of the
3117 class tags.
3118 free (objtag); */
3119 objdef = onone;
3121 return false;
3122 default:
3123 break;
3126 /* A function, variable or enum constant? */
3127 switch (toktype)
3129 case st_C_extern:
3130 fvextern = true;
3131 switch (fvdef)
3133 case finlist:
3134 case flistseen:
3135 case fignore:
3136 case vignore:
3137 break;
3138 default:
3139 fvdef = fvnone;
3141 return false;
3142 case st_C_ignore:
3143 fvextern = false;
3144 fvdef = vignore;
3145 return false;
3146 case st_C_operator:
3147 fvdef = foperator;
3148 *is_func_or_var = true;
3149 return true;
3150 case st_none:
3151 if (constantypedefs
3152 && structdef == snone
3153 && structtype == st_C_enum && bracelev > structbracelev
3154 /* Don't tag tokens in expressions that assign values to enum
3155 constants. */
3156 && fvdef != vignore)
3157 return true; /* enum constant */
3158 switch (fvdef)
3160 case fdefunkey:
3161 if (bracelev > 0)
3162 break;
3163 fvdef = fdefunname; /* GNU macro */
3164 *is_func_or_var = true;
3165 return true;
3166 case fvnone:
3167 switch (typdef)
3169 case ttypeseen:
3170 return false;
3171 case tnone:
3172 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3173 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3175 fvdef = vignore;
3176 return false;
3178 break;
3179 default:
3180 break;
3182 FALLTHROUGH;
3183 case fvnameseen:
3184 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3186 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3187 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3188 fvdef = foperator;
3189 *is_func_or_var = true;
3190 return true;
3192 if (bracelev > 0 && !instruct)
3193 break;
3194 fvdef = fvnameseen; /* function or variable */
3195 *is_func_or_var = true;
3196 return true;
3197 default:
3198 break;
3200 break;
3201 default:
3202 break;
3205 return false;
3210 * C_entries often keeps pointers to tokens or lines which are older than
3211 * the line currently read. By keeping two line buffers, and switching
3212 * them at end of line, it is possible to use those pointers.
3214 static struct
3216 long linepos;
3217 linebuffer lb;
3218 } lbs[2];
3220 #define current_lb_is_new (newndx == curndx)
3221 #define switch_line_buffers() (curndx = 1 - curndx)
3223 #define curlb (lbs[curndx].lb)
3224 #define newlb (lbs[newndx].lb)
3225 #define curlinepos (lbs[curndx].linepos)
3226 #define newlinepos (lbs[newndx].linepos)
3228 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3229 #define cplpl (c_ext & C_PLPL)
3230 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3232 #define CNL_SAVE_DEFINEDEF() \
3233 do { \
3234 curlinepos = charno; \
3235 readline (&curlb, inf); \
3236 lp = curlb.buffer; \
3237 quotednl = false; \
3238 newndx = curndx; \
3239 } while (0)
3241 #define CNL() \
3242 do { \
3243 CNL_SAVE_DEFINEDEF (); \
3244 if (savetoken.valid) \
3246 token = savetoken; \
3247 savetoken.valid = false; \
3249 definedef = dnone; \
3250 } while (0)
3253 static void
3254 make_C_tag (bool isfun)
3256 /* This function is never called when token.valid is false, but
3257 we must protect against invalid input or internal errors. */
3258 if (token.valid)
3259 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3260 token.offset+token.length+1, token.lineno, token.linepos);
3261 else if (DEBUG)
3262 { /* this branch is optimized away if !DEBUG */
3263 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3264 token_name.len + 17, isfun, token.line,
3265 token.offset+token.length+1, token.lineno, token.linepos);
3266 error ("INVALID TOKEN");
3269 token.valid = false;
3272 static bool
3273 perhaps_more_input (FILE *inf)
3275 return !feof (inf) && !ferror (inf);
3280 * C_entries ()
3281 * This routine finds functions, variables, typedefs,
3282 * #define's, enum constants and struct/union/enum definitions in
3283 * C syntax and adds them to the list.
3285 static void
3286 C_entries (int c_ext, FILE *inf)
3287 /* extension of C */
3288 /* input file */
3290 register char c; /* latest char read; '\0' for end of line */
3291 register char *lp; /* pointer one beyond the character `c' */
3292 int curndx, newndx; /* indices for current and new lb */
3293 register int tokoff; /* offset in line of start of current token */
3294 register int toklen; /* length of current token */
3295 const char *qualifier; /* string used to qualify names */
3296 int qlen; /* length of qualifier */
3297 int bracelev; /* current brace level */
3298 int bracketlev; /* current bracket level */
3299 int parlev; /* current parenthesis level */
3300 int attrparlev; /* __attribute__ parenthesis level */
3301 int templatelev; /* current template level */
3302 int typdefbracelev; /* bracelev where a typedef struct body begun */
3303 bool incomm, inquote, inchar, quotednl, midtoken;
3304 bool yacc_rules; /* in the rules part of a yacc file */
3305 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3308 linebuffer_init (&lbs[0].lb);
3309 linebuffer_init (&lbs[1].lb);
3310 if (cstack.size == 0)
3312 cstack.size = (DEBUG) ? 1 : 4;
3313 cstack.nl = 0;
3314 cstack.cname = xnew (cstack.size, char *);
3315 cstack.bracelev = xnew (cstack.size, int);
3318 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3319 curndx = newndx = 0;
3320 lp = curlb.buffer;
3321 *lp = 0;
3323 fvdef = fvnone; fvextern = false; typdef = tnone;
3324 structdef = snone; definedef = dnone; objdef = onone;
3325 yacc_rules = false;
3326 midtoken = inquote = inchar = incomm = quotednl = false;
3327 token.valid = savetoken.valid = false;
3328 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3329 if (cjava)
3330 { qualifier = "."; qlen = 1; }
3331 else
3332 { qualifier = "::"; qlen = 2; }
3335 while (perhaps_more_input (inf))
3337 c = *lp++;
3338 if (c == '\\')
3340 /* If we are at the end of the line, the next character is a
3341 '\0'; do not skip it, because it is what tells us
3342 to read the next line. */
3343 if (*lp == '\0')
3345 quotednl = true;
3346 continue;
3348 lp++;
3349 c = ' ';
3351 else if (incomm)
3353 switch (c)
3355 case '*':
3356 if (*lp == '/')
3358 c = *lp++;
3359 incomm = false;
3361 break;
3362 case '\0':
3363 /* Newlines inside comments do not end macro definitions in
3364 traditional cpp. */
3365 CNL_SAVE_DEFINEDEF ();
3366 break;
3368 continue;
3370 else if (inquote)
3372 switch (c)
3374 case '"':
3375 inquote = false;
3376 break;
3377 case '\0':
3378 /* Newlines inside strings do not end macro definitions
3379 in traditional cpp, even though compilers don't
3380 usually accept them. */
3381 CNL_SAVE_DEFINEDEF ();
3382 break;
3384 continue;
3386 else if (inchar)
3388 switch (c)
3390 case '\0':
3391 /* Hmmm, something went wrong. */
3392 CNL ();
3393 FALLTHROUGH;
3394 case '\'':
3395 inchar = false;
3396 break;
3398 continue;
3400 else switch (c)
3402 case '"':
3403 inquote = true;
3404 if (bracketlev > 0)
3405 continue;
3406 if (inattribute)
3407 break;
3408 switch (fvdef)
3410 case fdefunkey:
3411 case fstartlist:
3412 case finlist:
3413 case fignore:
3414 case vignore:
3415 break;
3416 default:
3417 fvextern = false;
3418 fvdef = fvnone;
3420 continue;
3421 case '\'':
3422 inchar = true;
3423 if (bracketlev > 0)
3424 continue;
3425 if (inattribute)
3426 break;
3427 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3429 fvextern = false;
3430 fvdef = fvnone;
3432 continue;
3433 case '/':
3434 if (*lp == '*')
3436 incomm = true;
3437 lp++;
3438 c = ' ';
3439 if (bracketlev > 0)
3440 continue;
3442 else if (/* cplpl && */ *lp == '/')
3444 c = '\0';
3446 break;
3447 case '%':
3448 if ((c_ext & YACC) && *lp == '%')
3450 /* Entering or exiting rules section in yacc file. */
3451 lp++;
3452 definedef = dnone; fvdef = fvnone; fvextern = false;
3453 typdef = tnone; structdef = snone;
3454 midtoken = inquote = inchar = incomm = quotednl = false;
3455 bracelev = 0;
3456 yacc_rules = !yacc_rules;
3457 continue;
3459 else
3460 break;
3461 case '#':
3462 if (definedef == dnone)
3464 char *cp;
3465 bool cpptoken = true;
3467 /* Look back on this line. If all blanks, or nonblanks
3468 followed by an end of comment, this is a preprocessor
3469 token. */
3470 for (cp = newlb.buffer; cp < lp-1; cp++)
3471 if (!c_isspace (*cp))
3473 if (*cp == '*' && cp[1] == '/')
3475 cp++;
3476 cpptoken = true;
3478 else
3479 cpptoken = false;
3481 if (cpptoken)
3483 definedef = dsharpseen;
3484 /* This is needed for tagging enum values: when there are
3485 preprocessor conditionals inside the enum, we need to
3486 reset the value of fvdef so that the next enum value is
3487 tagged even though the one before it did not end in a
3488 comma. */
3489 if (fvdef == vignore && instruct && parlev == 0)
3491 if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3492 fvdef = fvnone;
3495 } /* if (definedef == dnone) */
3496 continue;
3497 case '[':
3498 bracketlev++;
3499 continue;
3500 default:
3501 if (bracketlev > 0)
3503 if (c == ']')
3504 --bracketlev;
3505 else if (c == '\0')
3506 CNL_SAVE_DEFINEDEF ();
3507 continue;
3509 break;
3510 } /* switch (c) */
3513 /* Consider token only if some involved conditions are satisfied. */
3514 if (typdef != tignore
3515 && definedef != dignorerest
3516 && fvdef != finlist
3517 && templatelev == 0
3518 && (definedef != dnone
3519 || structdef != scolonseen)
3520 && !inattribute
3521 && !in_enum_bf)
3523 if (midtoken)
3525 if (endtoken (c))
3527 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3528 /* This handles :: in the middle,
3529 but not at the beginning of an identifier.
3530 Also, space-separated :: is not recognized. */
3532 if (c_ext & C_AUTO) /* automatic detection of C++ */
3533 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3534 lp += 2;
3535 toklen += 2;
3536 c = lp[-1];
3537 goto still_in_token;
3539 else
3541 bool funorvar = false;
3543 if (yacc_rules
3544 || consider_token (newlb.buffer + tokoff, toklen, c,
3545 &c_ext, bracelev, parlev,
3546 &funorvar))
3548 if (fvdef == foperator)
3550 char *oldlp = lp;
3551 lp = skip_spaces (lp-1);
3552 if (*lp != '\0')
3553 lp += 1;
3554 while (*lp != '\0'
3555 && !c_isspace (*lp) && *lp != '(')
3556 lp += 1;
3557 c = *lp++;
3558 toklen += lp - oldlp;
3560 token.named = false;
3561 if (!plainc
3562 && nestlev > 0 && definedef == dnone)
3563 /* in struct body */
3565 if (class_qualify)
3567 int len;
3568 write_classname (&token_name, qualifier);
3569 len = token_name.len;
3570 linebuffer_setlen (&token_name,
3571 len + qlen + toklen);
3572 sprintf (token_name.buffer + len, "%s%.*s",
3573 qualifier, toklen,
3574 newlb.buffer + tokoff);
3576 else
3578 linebuffer_setlen (&token_name, toklen);
3579 sprintf (token_name.buffer, "%.*s",
3580 toklen, newlb.buffer + tokoff);
3582 token.named = true;
3584 else if (objdef == ocatseen)
3585 /* Objective C category */
3587 if (class_qualify)
3589 int len = strlen (objtag) + 2 + toklen;
3590 linebuffer_setlen (&token_name, len);
3591 sprintf (token_name.buffer, "%s(%.*s)",
3592 objtag, toklen,
3593 newlb.buffer + tokoff);
3595 else
3597 linebuffer_setlen (&token_name, toklen);
3598 sprintf (token_name.buffer, "%.*s",
3599 toklen, newlb.buffer + tokoff);
3601 token.named = true;
3603 else if (objdef == omethodtag
3604 || objdef == omethodparm)
3605 /* Objective C method */
3607 token.named = true;
3609 else if (fvdef == fdefunname)
3610 /* GNU DEFUN and similar macros */
3612 bool defun = (newlb.buffer[tokoff] == 'F');
3613 int off = tokoff;
3614 int len = toklen;
3616 if (defun)
3618 off += 1;
3619 len -= 1;
3621 /* First, tag it as its C name */
3622 linebuffer_setlen (&token_name, toklen);
3623 memcpy (token_name.buffer,
3624 newlb.buffer + tokoff, toklen);
3625 token_name.buffer[toklen] = '\0';
3626 token.named = true;
3627 token.lineno = lineno;
3628 token.offset = tokoff;
3629 token.length = toklen;
3630 token.line = newlb.buffer;
3631 token.linepos = newlinepos;
3632 token.valid = true;
3633 make_C_tag (funorvar);
3635 /* Rewrite the tag so that emacs lisp DEFUNs
3636 can be found also by their elisp name */
3637 linebuffer_setlen (&token_name, len);
3638 memcpy (token_name.buffer,
3639 newlb.buffer + off, len);
3640 token_name.buffer[len] = '\0';
3641 if (defun)
3642 while (--len >= 0)
3643 if (token_name.buffer[len] == '_')
3644 token_name.buffer[len] = '-';
3645 token.named = defun;
3647 else
3649 linebuffer_setlen (&token_name, toklen);
3650 memcpy (token_name.buffer,
3651 newlb.buffer + tokoff, toklen);
3652 token_name.buffer[toklen] = '\0';
3653 /* Name macros and members. */
3654 token.named = (structdef == stagseen
3655 || typdef == ttypeseen
3656 || typdef == tend
3657 || (funorvar
3658 && definedef == dignorerest)
3659 || (funorvar
3660 && definedef == dnone
3661 && structdef == snone
3662 && bracelev > 0));
3664 token.lineno = lineno;
3665 token.offset = tokoff;
3666 token.length = toklen;
3667 token.line = newlb.buffer;
3668 token.linepos = newlinepos;
3669 token.valid = true;
3671 if (definedef == dnone
3672 && (fvdef == fvnameseen
3673 || fvdef == foperator
3674 || structdef == stagseen
3675 || typdef == tend
3676 || typdef == ttypeseen
3677 || objdef != onone))
3679 if (current_lb_is_new)
3680 switch_line_buffers ();
3682 else if (definedef != dnone
3683 || fvdef == fdefunname
3684 || instruct)
3685 make_C_tag (funorvar);
3687 else /* not yacc and consider_token failed */
3689 if (inattribute && fvdef == fignore)
3691 /* We have just met __attribute__ after a
3692 function parameter list: do not tag the
3693 function again. */
3694 fvdef = fvnone;
3697 midtoken = false;
3699 } /* if (endtoken (c)) */
3700 else if (intoken (c))
3701 still_in_token:
3703 toklen++;
3704 continue;
3706 } /* if (midtoken) */
3707 else if (begtoken (c))
3709 switch (definedef)
3711 case dnone:
3712 switch (fvdef)
3714 case fstartlist:
3715 /* This prevents tagging fb in
3716 void (__attribute__((noreturn)) *fb) (void);
3717 Fixing this is not easy and not very important. */
3718 fvdef = finlist;
3719 continue;
3720 case flistseen:
3721 if (plainc || declarations)
3723 make_C_tag (true); /* a function */
3724 fvdef = fignore;
3726 break;
3727 default:
3728 break;
3730 if (structdef == stagseen && !cjava)
3732 popclass_above (bracelev);
3733 structdef = snone;
3735 break;
3736 case dsharpseen:
3737 savetoken = token;
3738 break;
3739 default:
3740 break;
3742 if (!yacc_rules || lp == newlb.buffer + 1)
3744 tokoff = lp - 1 - newlb.buffer;
3745 toklen = 1;
3746 midtoken = true;
3748 continue;
3749 } /* if (begtoken) */
3750 } /* if must look at token */
3753 /* Detect end of line, colon, comma, semicolon and various braces
3754 after having handled a token.*/
3755 switch (c)
3757 case ':':
3758 if (inattribute)
3759 break;
3760 if (yacc_rules && token.offset == 0 && token.valid)
3762 make_C_tag (false); /* a yacc function */
3763 break;
3765 if (definedef != dnone)
3766 break;
3767 switch (objdef)
3769 case otagseen:
3770 objdef = oignore;
3771 make_C_tag (true); /* an Objective C class */
3772 break;
3773 case omethodtag:
3774 case omethodparm:
3775 objdef = omethodcolon;
3776 if (class_qualify)
3778 int toklen = token_name.len;
3779 linebuffer_setlen (&token_name, toklen + 1);
3780 strcpy (token_name.buffer + toklen, ":");
3782 break;
3783 default:
3784 break;
3786 if (structdef == stagseen)
3788 structdef = scolonseen;
3789 break;
3791 /* Should be useless, but may be work as a safety net. */
3792 if (cplpl && fvdef == flistseen)
3794 make_C_tag (true); /* a function */
3795 fvdef = fignore;
3796 break;
3798 break;
3799 case ';':
3800 if (definedef != dnone || inattribute)
3801 break;
3802 switch (typdef)
3804 case tend:
3805 case ttypeseen:
3806 make_C_tag (false); /* a typedef */
3807 typdef = tnone;
3808 fvdef = fvnone;
3809 break;
3810 case tnone:
3811 case tinbody:
3812 case tignore:
3813 switch (fvdef)
3815 case fignore:
3816 if (typdef == tignore || cplpl)
3817 fvdef = fvnone;
3818 break;
3819 case fvnameseen:
3820 if ((globals && bracelev == 0 && (!fvextern || declarations))
3821 || (members && instruct))
3822 make_C_tag (false); /* a variable */
3823 fvextern = false;
3824 fvdef = fvnone;
3825 token.valid = false;
3826 break;
3827 case flistseen:
3828 if ((declarations
3829 && (cplpl || !instruct)
3830 && (typdef == tnone || (typdef != tignore && instruct)))
3831 || (members
3832 && plainc && instruct))
3833 make_C_tag (true); /* a function */
3834 FALLTHROUGH;
3835 default:
3836 fvextern = false;
3837 fvdef = fvnone;
3838 if (declarations
3839 && cplpl && structdef == stagseen)
3840 make_C_tag (false); /* forward declaration */
3841 else
3842 token.valid = false;
3843 } /* switch (fvdef) */
3844 FALLTHROUGH;
3845 default:
3846 if (!instruct)
3847 typdef = tnone;
3849 if (structdef == stagseen)
3850 structdef = snone;
3851 break;
3852 case ',':
3853 if (definedef != dnone || inattribute)
3854 break;
3855 switch (objdef)
3857 case omethodtag:
3858 case omethodparm:
3859 make_C_tag (true); /* an Objective C method */
3860 objdef = oinbody;
3861 break;
3862 default:
3863 break;
3865 switch (fvdef)
3867 case fdefunkey:
3868 case foperator:
3869 case fstartlist:
3870 case finlist:
3871 case fignore:
3872 break;
3873 case vignore:
3874 if (instruct && parlev == 0)
3875 fvdef = fvnone;
3876 break;
3877 case fdefunname:
3878 fvdef = fignore;
3879 break;
3880 case fvnameseen:
3881 if (parlev == 0
3882 && ((globals
3883 && bracelev == 0
3884 && templatelev == 0
3885 && (!fvextern || declarations))
3886 || (members && instruct)))
3887 make_C_tag (false); /* a variable */
3888 break;
3889 case flistseen:
3890 if ((declarations && typdef == tnone && !instruct)
3891 || (members && typdef != tignore && instruct))
3893 make_C_tag (true); /* a function */
3894 fvdef = fvnameseen;
3896 else if (!declarations)
3897 fvdef = fvnone;
3898 token.valid = false;
3899 break;
3900 default:
3901 fvdef = fvnone;
3903 if (structdef == stagseen)
3904 structdef = snone;
3905 break;
3906 case ']':
3907 if (definedef != dnone || inattribute)
3908 break;
3909 if (structdef == stagseen)
3910 structdef = snone;
3911 switch (typdef)
3913 case ttypeseen:
3914 case tend:
3915 typdef = tignore;
3916 make_C_tag (false); /* a typedef */
3917 break;
3918 case tnone:
3919 case tinbody:
3920 switch (fvdef)
3922 case foperator:
3923 case finlist:
3924 case fignore:
3925 case vignore:
3926 break;
3927 case fvnameseen:
3928 if ((members && bracelev == 1)
3929 || (globals && bracelev == 0
3930 && (!fvextern || declarations)))
3931 make_C_tag (false); /* a variable */
3932 FALLTHROUGH;
3933 default:
3934 fvdef = fvnone;
3936 break;
3937 default:
3938 break;
3940 break;
3941 case '(':
3942 if (inattribute)
3944 attrparlev++;
3945 break;
3947 if (definedef != dnone)
3948 break;
3949 if (objdef == otagseen && parlev == 0)
3950 objdef = oparenseen;
3951 switch (fvdef)
3953 case fvnameseen:
3954 if (typdef == ttypeseen
3955 && *lp != '*'
3956 && !instruct)
3958 /* This handles constructs like:
3959 typedef void OperatorFun (int fun); */
3960 make_C_tag (false);
3961 typdef = tignore;
3962 fvdef = fignore;
3963 break;
3965 FALLTHROUGH;
3966 case foperator:
3967 fvdef = fstartlist;
3968 break;
3969 case flistseen:
3970 fvdef = finlist;
3971 break;
3972 default:
3973 break;
3975 parlev++;
3976 break;
3977 case ')':
3978 if (inattribute)
3980 if (--attrparlev == 0)
3981 inattribute = false;
3982 break;
3984 if (in_enum_bf)
3986 if (--parlev == 0)
3987 in_enum_bf = false;
3988 break;
3990 if (definedef != dnone)
3991 break;
3992 if (objdef == ocatseen && parlev == 1)
3994 make_C_tag (true); /* an Objective C category */
3995 objdef = oignore;
3997 if (--parlev == 0)
3999 switch (fvdef)
4001 case fstartlist:
4002 case finlist:
4003 fvdef = flistseen;
4004 break;
4005 default:
4006 break;
4008 if (!instruct
4009 && (typdef == tend
4010 || typdef == ttypeseen))
4012 typdef = tignore;
4013 make_C_tag (false); /* a typedef */
4016 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
4017 parlev = 0;
4018 break;
4019 case '{':
4020 if (definedef != dnone)
4021 break;
4022 if (typdef == ttypeseen)
4024 /* Whenever typdef is set to tinbody (currently only
4025 here), typdefbracelev should be set to bracelev. */
4026 typdef = tinbody;
4027 typdefbracelev = bracelev;
4029 switch (fvdef)
4031 case flistseen:
4032 if (cplpl && !class_qualify)
4034 /* Remove class and namespace qualifiers from the token,
4035 leaving only the method/member name. */
4036 char *cc, *uqname = token_name.buffer;
4037 char *tok_end = token_name.buffer + token_name.len;
4039 for (cc = token_name.buffer; cc < tok_end; cc++)
4041 if (*cc == ':' && cc[1] == ':')
4043 uqname = cc + 2;
4044 cc++;
4047 if (uqname > token_name.buffer)
4049 int uqlen = strlen (uqname);
4050 linebuffer_setlen (&token_name, uqlen);
4051 memmove (token_name.buffer, uqname, uqlen + 1);
4054 make_C_tag (true); /* a function */
4055 FALLTHROUGH;
4056 case fignore:
4057 fvdef = fvnone;
4058 break;
4059 case fvnone:
4060 switch (objdef)
4062 case otagseen:
4063 make_C_tag (true); /* an Objective C class */
4064 objdef = oignore;
4065 break;
4066 case omethodtag:
4067 case omethodparm:
4068 make_C_tag (true); /* an Objective C method */
4069 objdef = oinbody;
4070 break;
4071 default:
4072 /* Neutralize `extern "C" {' grot. */
4073 if (bracelev == 0 && structdef == snone && nestlev == 0
4074 && typdef == tnone)
4075 bracelev = -1;
4077 break;
4078 default:
4079 break;
4081 switch (structdef)
4083 case skeyseen: /* unnamed struct */
4084 pushclass_above (bracelev, NULL, 0);
4085 structdef = snone;
4086 break;
4087 case stagseen: /* named struct or enum */
4088 case scolonseen: /* a class */
4089 pushclass_above (bracelev,token.line+token.offset, token.length);
4090 structdef = snone;
4091 make_C_tag (false); /* a struct or enum */
4092 break;
4093 default:
4094 break;
4096 bracelev += 1;
4097 break;
4098 case '*':
4099 if (definedef != dnone)
4100 break;
4101 if (fvdef == fstartlist)
4103 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
4104 token.valid = false;
4106 break;
4107 case '}':
4108 if (definedef != dnone)
4109 break;
4110 bracelev -= 1;
4111 if (!ignoreindent && lp == newlb.buffer + 1)
4113 if (bracelev != 0)
4114 token.valid = false; /* unexpected value, token unreliable */
4115 bracelev = 0; /* reset brace level if first column */
4116 parlev = 0; /* also reset paren level, just in case... */
4118 else if (bracelev < 0)
4120 token.valid = false; /* something gone amiss, token unreliable */
4121 bracelev = 0;
4123 if (bracelev == 0 && fvdef == vignore)
4124 fvdef = fvnone; /* end of function */
4125 popclass_above (bracelev);
4126 structdef = snone;
4127 /* Only if typdef == tinbody is typdefbracelev significant. */
4128 if (typdef == tinbody && bracelev <= typdefbracelev)
4130 assert (bracelev == typdefbracelev);
4131 typdef = tend;
4133 break;
4134 case '=':
4135 if (definedef != dnone)
4136 break;
4137 switch (fvdef)
4139 case foperator:
4140 case finlist:
4141 case fignore:
4142 case vignore:
4143 break;
4144 case fvnameseen:
4145 if ((members && bracelev == 1)
4146 || (globals && bracelev == 0 && (!fvextern || declarations)))
4147 make_C_tag (false); /* a variable */
4148 FALLTHROUGH;
4149 default:
4150 fvdef = vignore;
4152 break;
4153 case '<':
4154 if (cplpl
4155 && (structdef == stagseen || fvdef == fvnameseen))
4157 templatelev++;
4158 break;
4160 goto resetfvdef;
4161 case '>':
4162 if (templatelev > 0)
4164 templatelev--;
4165 break;
4167 goto resetfvdef;
4168 case '+':
4169 case '-':
4170 if (objdef == oinbody && bracelev == 0)
4172 objdef = omethodsign;
4173 break;
4175 FALLTHROUGH;
4176 resetfvdef:
4177 case '#': case '~': case '&': case '%': case '/':
4178 case '|': case '^': case '!': case '.': case '?':
4179 if (definedef != dnone)
4180 break;
4181 /* These surely cannot follow a function tag in C. */
4182 switch (fvdef)
4184 case foperator:
4185 case finlist:
4186 case fignore:
4187 case vignore:
4188 break;
4189 default:
4190 fvdef = fvnone;
4192 break;
4193 case '\0':
4194 if (objdef == otagseen)
4196 make_C_tag (true); /* an Objective C class */
4197 objdef = oignore;
4199 /* If a macro spans multiple lines don't reset its state. */
4200 if (quotednl)
4201 CNL_SAVE_DEFINEDEF ();
4202 else
4203 CNL ();
4204 break;
4205 } /* switch (c) */
4207 } /* while not eof */
4209 free (lbs[0].lb.buffer);
4210 free (lbs[1].lb.buffer);
4214 * Process either a C++ file or a C file depending on the setting
4215 * of a global flag.
4217 static void
4218 default_C_entries (FILE *inf)
4220 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4223 /* Always do plain C. */
4224 static void
4225 plain_C_entries (FILE *inf)
4227 C_entries (0, inf);
4230 /* Always do C++. */
4231 static void
4232 Cplusplus_entries (FILE *inf)
4234 C_entries (C_PLPL, inf);
4237 /* Always do Java. */
4238 static void
4239 Cjava_entries (FILE *inf)
4241 C_entries (C_JAVA, inf);
4244 /* Always do C*. */
4245 static void
4246 Cstar_entries (FILE *inf)
4248 C_entries (C_STAR, inf);
4251 /* Always do Yacc. */
4252 static void
4253 Yacc_entries (FILE *inf)
4255 C_entries (YACC, inf);
4259 /* Useful macros. */
4260 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4261 while (perhaps_more_input (file_pointer) \
4262 && (readline (&(line_buffer), file_pointer), \
4263 (char_pointer) = (line_buffer).buffer, \
4264 true)) \
4266 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4267 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4268 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4269 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
4270 && ((cp) = skip_spaces ((cp) + sizeof (kw) - 1), true)) /* skip spaces */
4272 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4273 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4274 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4275 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4276 && ((cp) += sizeof (kw) - 1, true)) /* skip spaces */
4279 * Read a file, but do no processing. This is used to do regexp
4280 * matching on files that have no language defined.
4282 static void
4283 just_read_file (FILE *inf)
4285 while (perhaps_more_input (inf))
4286 readline (&lb, inf);
4290 /* Fortran parsing */
4292 static void F_takeprec (void);
4293 static void F_getit (FILE *);
4295 static void
4296 F_takeprec (void)
4298 dbp = skip_spaces (dbp);
4299 if (*dbp != '*')
4300 return;
4301 dbp++;
4302 dbp = skip_spaces (dbp);
4303 if (strneq (dbp, "(*)", 3))
4305 dbp += 3;
4306 return;
4308 if (!c_isdigit (*dbp))
4310 --dbp; /* force failure */
4311 return;
4314 dbp++;
4315 while (c_isdigit (*dbp));
4318 static void
4319 F_getit (FILE *inf)
4321 register char *cp;
4323 dbp = skip_spaces (dbp);
4324 if (*dbp == '\0')
4326 readline (&lb, inf);
4327 dbp = lb.buffer;
4328 if (dbp[5] != '&')
4329 return;
4330 dbp += 6;
4331 dbp = skip_spaces (dbp);
4333 if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4334 return;
4335 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4336 continue;
4337 make_tag (dbp, cp-dbp, true,
4338 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4342 static void
4343 Fortran_functions (FILE *inf)
4345 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4347 if (*dbp == '%')
4348 dbp++; /* Ratfor escape to fortran */
4349 dbp = skip_spaces (dbp);
4350 if (*dbp == '\0')
4351 continue;
4353 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4354 dbp = skip_spaces (dbp);
4356 if (LOOKING_AT_NOCASE (dbp, "pure"))
4357 dbp = skip_spaces (dbp);
4359 if (LOOKING_AT_NOCASE (dbp, "elemental"))
4360 dbp = skip_spaces (dbp);
4362 switch (c_tolower (*dbp))
4364 case 'i':
4365 if (nocase_tail ("integer"))
4366 F_takeprec ();
4367 break;
4368 case 'r':
4369 if (nocase_tail ("real"))
4370 F_takeprec ();
4371 break;
4372 case 'l':
4373 if (nocase_tail ("logical"))
4374 F_takeprec ();
4375 break;
4376 case 'c':
4377 if (nocase_tail ("complex") || nocase_tail ("character"))
4378 F_takeprec ();
4379 break;
4380 case 'd':
4381 if (nocase_tail ("double"))
4383 dbp = skip_spaces (dbp);
4384 if (*dbp == '\0')
4385 continue;
4386 if (nocase_tail ("precision"))
4387 break;
4388 continue;
4390 break;
4392 dbp = skip_spaces (dbp);
4393 if (*dbp == '\0')
4394 continue;
4395 switch (c_tolower (*dbp))
4397 case 'f':
4398 if (nocase_tail ("function"))
4399 F_getit (inf);
4400 continue;
4401 case 's':
4402 if (nocase_tail ("subroutine"))
4403 F_getit (inf);
4404 continue;
4405 case 'e':
4406 if (nocase_tail ("entry"))
4407 F_getit (inf);
4408 continue;
4409 case 'b':
4410 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4412 dbp = skip_spaces (dbp);
4413 if (*dbp == '\0') /* assume un-named */
4414 make_tag ("blockdata", 9, true,
4415 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4416 else
4417 F_getit (inf); /* look for name */
4419 continue;
4426 * Go language support
4427 * Original code by Xi Lu <lx@shellcodes.org> (2016)
4429 static void
4430 Go_functions(FILE *inf)
4432 char *cp, *name;
4434 LOOP_ON_INPUT_LINES(inf, lb, cp)
4436 cp = skip_spaces (cp);
4438 if (LOOKING_AT (cp, "package"))
4440 name = cp;
4441 while (!notinname (*cp) && *cp != '\0')
4442 cp++;
4443 make_tag (name, cp - name, false, lb.buffer,
4444 cp - lb.buffer + 1, lineno, linecharno);
4446 else if (LOOKING_AT (cp, "func"))
4448 /* Go implementation of interface, such as:
4449 func (n *Integer) Add(m Integer) ...
4450 skip `(n *Integer)` part.
4452 if (*cp == '(')
4454 while (*cp != ')')
4455 cp++;
4456 cp = skip_spaces (cp+1);
4459 if (*cp)
4461 name = cp;
4463 while (!notinname (*cp))
4464 cp++;
4466 make_tag (name, cp - name, true, lb.buffer,
4467 cp - lb.buffer + 1, lineno, linecharno);
4470 else if (members && LOOKING_AT (cp, "type"))
4472 name = cp;
4474 /* Ignore the likes of the following:
4475 type (
4479 if (*cp == '(')
4480 return;
4482 while (!notinname (*cp) && *cp != '\0')
4483 cp++;
4485 make_tag (name, cp - name, false, lb.buffer,
4486 cp - lb.buffer + 1, lineno, linecharno);
4493 * Ada parsing
4494 * Original code by
4495 * Philippe Waroquiers (1998)
4498 /* Once we are positioned after an "interesting" keyword, let's get
4499 the real tag value necessary. */
4500 static void
4501 Ada_getit (FILE *inf, const char *name_qualifier)
4503 register char *cp;
4504 char *name;
4505 char c;
4507 while (perhaps_more_input (inf))
4509 dbp = skip_spaces (dbp);
4510 if (*dbp == '\0'
4511 || (dbp[0] == '-' && dbp[1] == '-'))
4513 readline (&lb, inf);
4514 dbp = lb.buffer;
4516 switch (c_tolower (*dbp))
4518 case 'b':
4519 if (nocase_tail ("body"))
4521 /* Skipping body of procedure body or package body or ....
4522 resetting qualifier to body instead of spec. */
4523 name_qualifier = "/b";
4524 continue;
4526 break;
4527 case 't':
4528 /* Skipping type of task type or protected type ... */
4529 if (nocase_tail ("type"))
4530 continue;
4531 break;
4533 if (*dbp == '"')
4535 dbp += 1;
4536 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4537 continue;
4539 else
4541 dbp = skip_spaces (dbp);
4542 for (cp = dbp;
4543 c_isalnum (*cp) || *cp == '_' || *cp == '.';
4544 cp++)
4545 continue;
4546 if (cp == dbp)
4547 return;
4549 c = *cp;
4550 *cp = '\0';
4551 name = concat (dbp, name_qualifier, "");
4552 *cp = c;
4553 make_tag (name, strlen (name), true,
4554 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4555 free (name);
4556 if (c == '"')
4557 dbp = cp + 1;
4558 return;
4562 static void
4563 Ada_funcs (FILE *inf)
4565 bool inquote = false;
4566 bool skip_till_semicolumn = false;
4568 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4570 while (*dbp != '\0')
4572 /* Skip a string i.e. "abcd". */
4573 if (inquote || (*dbp == '"'))
4575 dbp = strchr (dbp + !inquote, '"');
4576 if (dbp != NULL)
4578 inquote = false;
4579 dbp += 1;
4580 continue; /* advance char */
4582 else
4584 inquote = true;
4585 break; /* advance line */
4589 /* Skip comments. */
4590 if (dbp[0] == '-' && dbp[1] == '-')
4591 break; /* advance line */
4593 /* Skip character enclosed in single quote i.e. 'a'
4594 and skip single quote starting an attribute i.e. 'Image. */
4595 if (*dbp == '\'')
4597 dbp++ ;
4598 if (*dbp != '\0')
4599 dbp++;
4600 continue;
4603 if (skip_till_semicolumn)
4605 if (*dbp == ';')
4606 skip_till_semicolumn = false;
4607 dbp++;
4608 continue; /* advance char */
4611 /* Search for beginning of a token. */
4612 if (!begtoken (*dbp))
4614 dbp++;
4615 continue; /* advance char */
4618 /* We are at the beginning of a token. */
4619 switch (c_tolower (*dbp))
4621 case 'f':
4622 if (!packages_only && nocase_tail ("function"))
4623 Ada_getit (inf, "/f");
4624 else
4625 break; /* from switch */
4626 continue; /* advance char */
4627 case 'p':
4628 if (!packages_only && nocase_tail ("procedure"))
4629 Ada_getit (inf, "/p");
4630 else if (nocase_tail ("package"))
4631 Ada_getit (inf, "/s");
4632 else if (nocase_tail ("protected")) /* protected type */
4633 Ada_getit (inf, "/t");
4634 else
4635 break; /* from switch */
4636 continue; /* advance char */
4638 case 'u':
4639 if (typedefs && !packages_only && nocase_tail ("use"))
4641 /* when tagging types, avoid tagging use type Pack.Typename;
4642 for this, we will skip everything till a ; */
4643 skip_till_semicolumn = true;
4644 continue; /* advance char */
4647 case 't':
4648 if (!packages_only && nocase_tail ("task"))
4649 Ada_getit (inf, "/k");
4650 else if (typedefs && !packages_only && nocase_tail ("type"))
4652 Ada_getit (inf, "/t");
4653 while (*dbp != '\0')
4654 dbp += 1;
4656 else
4657 break; /* from switch */
4658 continue; /* advance char */
4661 /* Look for the end of the token. */
4662 while (!endtoken (*dbp))
4663 dbp++;
4665 } /* advance char */
4666 } /* advance line */
4671 * Unix and microcontroller assembly tag handling
4672 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4673 * Idea by Bob Weiner, Motorola Inc. (1994)
4675 static void
4676 Asm_labels (FILE *inf)
4678 register char *cp;
4680 LOOP_ON_INPUT_LINES (inf, lb, cp)
4682 /* If first char is alphabetic or one of [_.$], test for colon
4683 following identifier. */
4684 if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4686 /* Read past label. */
4687 cp++;
4688 while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4689 cp++;
4690 if (*cp == ':' || c_isspace (*cp))
4691 /* Found end of label, so copy it and add it to the table. */
4692 make_tag (lb.buffer, cp - lb.buffer, true,
4693 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4700 * Perl support
4701 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4702 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4703 * Perl variable names: /^(my|local).../
4704 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4705 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4706 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4708 static void
4709 Perl_functions (FILE *inf)
4711 char *package = savestr ("main"); /* current package name */
4712 register char *cp;
4714 LOOP_ON_INPUT_LINES (inf, lb, cp)
4716 cp = skip_spaces (cp);
4718 if (LOOKING_AT (cp, "package"))
4720 free (package);
4721 get_tag (cp, &package);
4723 else if (LOOKING_AT (cp, "sub"))
4725 char *pos, *sp;
4727 subr:
4728 sp = cp;
4729 while (!notinname (*cp))
4730 cp++;
4731 if (cp == sp)
4732 continue; /* nothing found */
4733 pos = strchr (sp, ':');
4734 if (pos && pos < cp && pos[1] == ':')
4736 /* The name is already qualified. */
4737 if (!class_qualify)
4739 char *q = pos + 2, *qpos;
4740 while ((qpos = strchr (q, ':')) != NULL
4741 && qpos < cp
4742 && qpos[1] == ':')
4743 q = qpos + 2;
4744 sp = q;
4746 make_tag (sp, cp - sp, true,
4747 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4749 else if (class_qualify)
4750 /* Qualify it. */
4752 char savechar, *name;
4754 savechar = *cp;
4755 *cp = '\0';
4756 name = concat (package, "::", sp);
4757 *cp = savechar;
4758 make_tag (name, strlen (name), true,
4759 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4760 free (name);
4762 else
4763 make_tag (sp, cp - sp, true,
4764 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4766 else if (LOOKING_AT (cp, "use constant")
4767 || LOOKING_AT (cp, "use constant::defer"))
4769 /* For hash style multi-constant like
4770 use constant { FOO => 123,
4771 BAR => 456 };
4772 only the first FOO is picked up. Parsing across the value
4773 expressions would be difficult in general, due to possible nested
4774 hashes, here-documents, etc. */
4775 if (*cp == '{')
4776 cp = skip_spaces (cp+1);
4777 goto subr;
4779 else if (globals) /* only if we are tagging global vars */
4781 /* Skip a qualifier, if any. */
4782 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4783 /* After "my" or "local", but before any following paren or space. */
4784 char *varstart = cp;
4786 if (qual /* should this be removed? If yes, how? */
4787 && (*cp == '$' || *cp == '@' || *cp == '%'))
4789 varstart += 1;
4791 cp++;
4792 while (c_isalnum (*cp) || *cp == '_');
4794 else if (qual)
4796 /* Should be examining a variable list at this point;
4797 could insist on seeing an open parenthesis. */
4798 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4799 cp++;
4801 else
4802 continue;
4804 make_tag (varstart, cp - varstart, false,
4805 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4808 free (package);
4813 * Python support
4814 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4815 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4816 * More ideas by seb bacon <seb@jamkit.com> (2002)
4818 static void
4819 Python_functions (FILE *inf)
4821 register char *cp;
4823 LOOP_ON_INPUT_LINES (inf, lb, cp)
4825 cp = skip_spaces (cp);
4826 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4828 char *name = cp;
4829 while (!notinname (*cp) && *cp != ':')
4830 cp++;
4831 make_tag (name, cp - name, true,
4832 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4838 * Ruby support
4839 * Original code by Xi Lu <lx@shellcodes.org> (2015)
4841 static void
4842 Ruby_functions (FILE *inf)
4844 char *cp = NULL;
4845 bool reader = false, writer = false, alias = false, continuation = false;
4847 LOOP_ON_INPUT_LINES (inf, lb, cp)
4849 bool is_class = false;
4850 bool is_method = false;
4851 char *name;
4853 cp = skip_spaces (cp);
4854 if (!continuation
4855 /* Constants. */
4856 && c_isalpha (*cp) && c_isupper (*cp))
4858 char *bp, *colon = NULL;
4860 name = cp;
4862 for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
4864 if (*cp == ':')
4865 colon = cp;
4867 if (cp > name + 1)
4869 bp = skip_spaces (cp);
4870 if (*bp == '=' && !(bp[1] == '=' || bp[1] == '>'))
4872 if (colon && !c_isspace (colon[1]))
4873 name = colon + 1;
4874 make_tag (name, cp - name, false,
4875 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4879 else if (!continuation
4880 /* Modules, classes, methods. */
4881 && ((is_method = LOOKING_AT (cp, "def"))
4882 || (is_class = LOOKING_AT (cp, "class"))
4883 || LOOKING_AT (cp, "module")))
4885 const char self_name[] = "self.";
4886 const size_t self_size1 = sizeof (self_name) - 1;
4888 name = cp;
4890 /* Ruby method names can end in a '='. Also, operator overloading can
4891 define operators whose names include '='. */
4892 while (!notinname (*cp) || *cp == '=')
4893 cp++;
4895 /* Remove "self." from the method name. */
4896 if (cp - name > self_size1
4897 && strneq (name, self_name, self_size1))
4898 name += self_size1;
4900 /* Remove the class/module qualifiers from method names. */
4901 if (is_method)
4903 char *q;
4905 for (q = name; q < cp && *q != '.'; q++)
4907 if (q < cp - 1) /* punt if we see just "FOO." */
4908 name = q + 1;
4911 /* Don't tag singleton classes. */
4912 if (is_class && strneq (name, "<<", 2) && cp == name + 2)
4913 continue;
4915 make_tag (name, cp - name, true,
4916 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4918 else
4920 /* Tag accessors and aliases. */
4922 if (!continuation)
4923 reader = writer = alias = false;
4925 while (*cp && *cp != '#')
4927 if (!continuation)
4929 reader = writer = alias = false;
4930 if (LOOKING_AT (cp, "attr_reader"))
4931 reader = true;
4932 else if (LOOKING_AT (cp, "attr_writer"))
4933 writer = true;
4934 else if (LOOKING_AT (cp, "attr_accessor"))
4936 reader = true;
4937 writer = true;
4939 else if (LOOKING_AT (cp, "alias_method"))
4940 alias = true;
4942 if (reader || writer || alias)
4944 do {
4945 char *np;
4947 cp = skip_spaces (cp);
4948 if (*cp == '(')
4949 cp = skip_spaces (cp + 1);
4950 np = cp;
4951 cp = skip_name (cp);
4952 if (*np != ':')
4953 continue;
4954 np++;
4955 if (reader)
4957 make_tag (np, cp - np, true,
4958 lb.buffer, cp - lb.buffer + 1,
4959 lineno, linecharno);
4960 continuation = false;
4962 if (writer)
4964 size_t name_len = cp - np + 1;
4965 char *wr_name = xnew (name_len + 1, char);
4967 memcpy (wr_name, np, name_len - 1);
4968 memcpy (wr_name + name_len - 1, "=", 2);
4969 pfnote (wr_name, true, lb.buffer, cp - lb.buffer + 1,
4970 lineno, linecharno);
4971 continuation = false;
4973 if (alias)
4975 if (!continuation)
4976 make_tag (np, cp - np, true,
4977 lb.buffer, cp - lb.buffer + 1,
4978 lineno, linecharno);
4979 continuation = false;
4980 while (*cp && *cp != '#' && *cp != ';')
4982 if (*cp == ',')
4983 continuation = true;
4984 else if (!c_isspace (*cp))
4985 continuation = false;
4986 cp++;
4988 if (*cp == ';')
4989 continuation = false;
4991 cp = skip_spaces (cp);
4992 } while ((alias
4993 ? (*cp == ',')
4994 : (continuation = (*cp == ',')))
4995 && (cp = skip_spaces (cp + 1), *cp && *cp != '#'));
4997 if (*cp != '#')
4998 cp = skip_name (cp);
4999 while (*cp && *cp != '#' && notinname (*cp))
5000 cp++;
5008 * PHP support
5009 * Look for:
5010 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
5011 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
5012 * - /^[ \t]*define\(\"[^\"]+/
5013 * Only with --members:
5014 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
5015 * Idea by Diez B. Roggisch (2001)
5017 static void
5018 PHP_functions (FILE *inf)
5020 char *cp, *name;
5021 bool search_identifier = false;
5023 LOOP_ON_INPUT_LINES (inf, lb, cp)
5025 cp = skip_spaces (cp);
5026 name = cp;
5027 if (search_identifier
5028 && *cp != '\0')
5030 while (!notinname (*cp))
5031 cp++;
5032 make_tag (name, cp - name, true,
5033 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5034 search_identifier = false;
5036 else if (LOOKING_AT (cp, "function"))
5038 if (*cp == '&')
5039 cp = skip_spaces (cp+1);
5040 if (*cp != '\0')
5042 name = cp;
5043 while (!notinname (*cp))
5044 cp++;
5045 make_tag (name, cp - name, true,
5046 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5048 else
5049 search_identifier = true;
5051 else if (LOOKING_AT (cp, "class"))
5053 if (*cp != '\0')
5055 name = cp;
5056 while (*cp != '\0' && !c_isspace (*cp))
5057 cp++;
5058 make_tag (name, cp - name, false,
5059 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5061 else
5062 search_identifier = true;
5064 else if (strneq (cp, "define", 6)
5065 && (cp = skip_spaces (cp+6))
5066 && *cp++ == '('
5067 && (*cp == '"' || *cp == '\''))
5069 char quote = *cp++;
5070 name = cp;
5071 while (*cp != quote && *cp != '\0')
5072 cp++;
5073 make_tag (name, cp - name, false,
5074 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5076 else if (members
5077 && LOOKING_AT (cp, "var")
5078 && *cp == '$')
5080 name = cp;
5081 while (!notinname (*cp))
5082 cp++;
5083 make_tag (name, cp - name, false,
5084 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5091 * Cobol tag functions
5092 * We could look for anything that could be a paragraph name.
5093 * i.e. anything that starts in column 8 is one word and ends in a full stop.
5094 * Idea by Corny de Souza (1993)
5096 static void
5097 Cobol_paragraphs (FILE *inf)
5099 register char *bp, *ep;
5101 LOOP_ON_INPUT_LINES (inf, lb, bp)
5103 if (lb.len < 9)
5104 continue;
5105 bp += 8;
5107 /* If eoln, compiler option or comment ignore whole line. */
5108 if (bp[-1] != ' ' || !c_isalnum (bp[0]))
5109 continue;
5111 for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
5112 continue;
5113 if (*ep++ == '.')
5114 make_tag (bp, ep - bp, true,
5115 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5121 * Makefile support
5122 * Ideas by Assar Westerlund <assar@sics.se> (2001)
5124 static void
5125 Makefile_targets (FILE *inf)
5127 register char *bp;
5129 LOOP_ON_INPUT_LINES (inf, lb, bp)
5131 if (*bp == '\t' || *bp == '#')
5132 continue;
5133 while (*bp != '\0' && *bp != '=' && *bp != ':')
5134 bp++;
5135 if (*bp == ':' || (globals && *bp == '='))
5137 /* We should detect if there is more than one tag, but we do not.
5138 We just skip initial and final spaces. */
5139 char * namestart = skip_spaces (lb.buffer);
5140 while (--bp > namestart)
5141 if (!notinname (*bp))
5142 break;
5143 make_tag (namestart, bp - namestart + 1, true,
5144 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
5151 * Pascal parsing
5152 * Original code by Mosur K. Mohan (1989)
5154 * Locates tags for procedures & functions. Doesn't do any type- or
5155 * var-definitions. It does look for the keyword "extern" or
5156 * "forward" immediately following the procedure statement; if found,
5157 * the tag is skipped.
5159 static void
5160 Pascal_functions (FILE *inf)
5162 linebuffer tline; /* mostly copied from C_entries */
5163 long save_lcno;
5164 int save_lineno, namelen, taglen;
5165 char c, *name;
5167 bool /* each of these flags is true if: */
5168 incomment, /* point is inside a comment */
5169 inquote, /* point is inside '..' string */
5170 get_tagname, /* point is after PROCEDURE/FUNCTION
5171 keyword, so next item = potential tag */
5172 found_tag, /* point is after a potential tag */
5173 inparms, /* point is within parameter-list */
5174 verify_tag; /* point has passed the parm-list, so the
5175 next token will determine whether this
5176 is a FORWARD/EXTERN to be ignored, or
5177 whether it is a real tag */
5179 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
5180 name = NULL; /* keep compiler quiet */
5181 dbp = lb.buffer;
5182 *dbp = '\0';
5183 linebuffer_init (&tline);
5185 incomment = inquote = false;
5186 found_tag = false; /* have a proc name; check if extern */
5187 get_tagname = false; /* found "procedure" keyword */
5188 inparms = false; /* found '(' after "proc" */
5189 verify_tag = false; /* check if "extern" is ahead */
5192 while (perhaps_more_input (inf)) /* long main loop to get next char */
5194 c = *dbp++;
5195 if (c == '\0') /* if end of line */
5197 readline (&lb, inf);
5198 dbp = lb.buffer;
5199 if (*dbp == '\0')
5200 continue;
5201 if (!((found_tag && verify_tag)
5202 || get_tagname))
5203 c = *dbp++; /* only if don't need *dbp pointing
5204 to the beginning of the name of
5205 the procedure or function */
5207 if (incomment)
5209 if (c == '}') /* within { } comments */
5210 incomment = false;
5211 else if (c == '*' && *dbp == ')') /* within (* *) comments */
5213 dbp++;
5214 incomment = false;
5216 continue;
5218 else if (inquote)
5220 if (c == '\'')
5221 inquote = false;
5222 continue;
5224 else
5225 switch (c)
5227 case '\'':
5228 inquote = true; /* found first quote */
5229 continue;
5230 case '{': /* found open { comment */
5231 incomment = true;
5232 continue;
5233 case '(':
5234 if (*dbp == '*') /* found open (* comment */
5236 incomment = true;
5237 dbp++;
5239 else if (found_tag) /* found '(' after tag, i.e., parm-list */
5240 inparms = true;
5241 continue;
5242 case ')': /* end of parms list */
5243 if (inparms)
5244 inparms = false;
5245 continue;
5246 case ';':
5247 if (found_tag && !inparms) /* end of proc or fn stmt */
5249 verify_tag = true;
5250 break;
5252 continue;
5254 if (found_tag && verify_tag && (*dbp != ' '))
5256 /* Check if this is an "extern" declaration. */
5257 if (*dbp == '\0')
5258 continue;
5259 if (c_tolower (*dbp) == 'e')
5261 if (nocase_tail ("extern")) /* superfluous, really! */
5263 found_tag = false;
5264 verify_tag = false;
5267 else if (c_tolower (*dbp) == 'f')
5269 if (nocase_tail ("forward")) /* check for forward reference */
5271 found_tag = false;
5272 verify_tag = false;
5275 if (found_tag && verify_tag) /* not external proc, so make tag */
5277 found_tag = false;
5278 verify_tag = false;
5279 make_tag (name, namelen, true,
5280 tline.buffer, taglen, save_lineno, save_lcno);
5281 continue;
5284 if (get_tagname) /* grab name of proc or fn */
5286 char *cp;
5288 if (*dbp == '\0')
5289 continue;
5291 /* Find block name. */
5292 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
5293 continue;
5295 /* Save all values for later tagging. */
5296 linebuffer_setlen (&tline, lb.len);
5297 strcpy (tline.buffer, lb.buffer);
5298 save_lineno = lineno;
5299 save_lcno = linecharno;
5300 name = tline.buffer + (dbp - lb.buffer);
5301 namelen = cp - dbp;
5302 taglen = cp - lb.buffer + 1;
5304 dbp = cp; /* set dbp to e-o-token */
5305 get_tagname = false;
5306 found_tag = true;
5307 continue;
5309 /* And proceed to check for "extern". */
5311 else if (!incomment && !inquote && !found_tag)
5313 /* Check for proc/fn keywords. */
5314 switch (c_tolower (c))
5316 case 'p':
5317 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5318 get_tagname = true;
5319 continue;
5320 case 'f':
5321 if (nocase_tail ("unction"))
5322 get_tagname = true;
5323 continue;
5326 } /* while not eof */
5328 free (tline.buffer);
5333 * Lisp tag functions
5334 * look for (def or (DEF, quote or QUOTE
5337 static void L_getit (void);
5339 static void
5340 L_getit (void)
5342 if (*dbp == '\'') /* Skip prefix quote */
5343 dbp++;
5344 else if (*dbp == '(')
5346 dbp++;
5347 /* Try to skip "(quote " */
5348 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
5349 /* Ok, then skip "(" before name in (defstruct (foo)) */
5350 dbp = skip_spaces (dbp);
5352 get_lispy_tag (dbp);
5355 static void
5356 Lisp_functions (FILE *inf)
5358 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5360 if (dbp[0] != '(')
5361 continue;
5363 /* "(defvar foo)" is a declaration rather than a definition. */
5364 if (! declarations)
5366 char *p = dbp + 1;
5367 if (LOOKING_AT (p, "defvar"))
5369 p = skip_name (p); /* past var name */
5370 p = skip_spaces (p);
5371 if (*p == ')')
5372 continue;
5376 if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
5377 dbp += 3;
5379 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5381 dbp = skip_non_spaces (dbp);
5382 dbp = skip_spaces (dbp);
5383 L_getit ();
5385 else
5387 /* Check for (foo::defmumble name-defined ... */
5389 dbp++;
5390 while (!notinname (*dbp) && *dbp != ':');
5391 if (*dbp == ':')
5394 dbp++;
5395 while (*dbp == ':');
5397 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5399 dbp = skip_non_spaces (dbp);
5400 dbp = skip_spaces (dbp);
5401 L_getit ();
5410 * Lua script language parsing
5411 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5413 * "function" and "local function" are tags if they start at column 1.
5415 static void
5416 Lua_functions (FILE *inf)
5418 register char *bp;
5420 LOOP_ON_INPUT_LINES (inf, lb, bp)
5422 bp = skip_spaces (bp);
5423 if (bp[0] != 'f' && bp[0] != 'l')
5424 continue;
5426 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5428 if (LOOKING_AT (bp, "function"))
5430 char *tag_name, *tp_dot, *tp_colon;
5432 get_tag (bp, &tag_name);
5433 /* If the tag ends with ".foo" or ":foo", make an additional tag for
5434 "foo". */
5435 tp_dot = strrchr (tag_name, '.');
5436 tp_colon = strrchr (tag_name, ':');
5437 if (tp_dot || tp_colon)
5439 char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5440 int len_add = p - tag_name + 1;
5442 get_tag (bp + len_add, NULL);
5450 * PostScript tags
5451 * Just look for lines where the first character is '/'
5452 * Also look at "defineps" for PSWrap
5453 * Ideas by:
5454 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5455 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5457 static void
5458 PS_functions (FILE *inf)
5460 register char *bp, *ep;
5462 LOOP_ON_INPUT_LINES (inf, lb, bp)
5464 if (bp[0] == '/')
5466 for (ep = bp+1;
5467 *ep != '\0' && *ep != ' ' && *ep != '{';
5468 ep++)
5469 continue;
5470 make_tag (bp, ep - bp, true,
5471 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5473 else if (LOOKING_AT (bp, "defineps"))
5474 get_tag (bp, NULL);
5480 * Forth tags
5481 * Ignore anything after \ followed by space or in ( )
5482 * Look for words defined by :
5483 * Look for constant, code, create, defer, value, and variable
5484 * OBP extensions: Look for buffer:, field,
5485 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5487 static void
5488 Forth_words (FILE *inf)
5490 register char *bp;
5492 LOOP_ON_INPUT_LINES (inf, lb, bp)
5493 while ((bp = skip_spaces (bp))[0] != '\0')
5494 if (bp[0] == '\\' && c_isspace (bp[1]))
5495 break; /* read next line */
5496 else if (bp[0] == '(' && c_isspace (bp[1]))
5497 do /* skip to ) or eol */
5498 bp++;
5499 while (*bp != ')' && *bp != '\0');
5500 else if (((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5501 || LOOKING_AT_NOCASE (bp, "constant")
5502 || LOOKING_AT_NOCASE (bp, "2constant")
5503 || LOOKING_AT_NOCASE (bp, "fconstant")
5504 || LOOKING_AT_NOCASE (bp, "code")
5505 || LOOKING_AT_NOCASE (bp, "create")
5506 || LOOKING_AT_NOCASE (bp, "defer")
5507 || LOOKING_AT_NOCASE (bp, "value")
5508 || LOOKING_AT_NOCASE (bp, "2value")
5509 || LOOKING_AT_NOCASE (bp, "fvalue")
5510 || LOOKING_AT_NOCASE (bp, "variable")
5511 || LOOKING_AT_NOCASE (bp, "2variable")
5512 || LOOKING_AT_NOCASE (bp, "fvariable")
5513 || LOOKING_AT_NOCASE (bp, "buffer:")
5514 || LOOKING_AT_NOCASE (bp, "field:")
5515 || LOOKING_AT_NOCASE (bp, "+field")
5516 || LOOKING_AT_NOCASE (bp, "field") /* not standard? */
5517 || LOOKING_AT_NOCASE (bp, "begin-structure")
5518 || LOOKING_AT_NOCASE (bp, "synonym")
5520 && c_isspace (bp[0]))
5522 /* Yay! A definition! */
5523 char* name_start = skip_spaces (bp);
5524 char* name_end = skip_non_spaces (name_start);
5525 if (name_start < name_end)
5526 make_tag (name_start, name_end - name_start,
5527 true, lb.buffer, name_end - lb.buffer,
5528 lineno, linecharno);
5529 bp = name_end;
5531 else
5532 bp = skip_non_spaces (bp);
5537 * Scheme tag functions
5538 * look for (def... xyzzy
5539 * (def... (xyzzy
5540 * (def ... ((...(xyzzy ....
5541 * (set! xyzzy
5542 * Original code by Ken Haase (1985?)
5544 static void
5545 Scheme_functions (FILE *inf)
5547 register char *bp;
5549 LOOP_ON_INPUT_LINES (inf, lb, bp)
5551 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5553 bp = skip_non_spaces (bp+4);
5554 /* Skip over open parens and white space.
5555 Don't continue past '\0' or '='. */
5556 while (*bp && notinname (*bp) && *bp != '=')
5557 bp++;
5558 get_lispy_tag (bp);
5560 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5561 get_lispy_tag (bp);
5566 /* Find tags in TeX and LaTeX input files. */
5568 /* TEX_toktab is a table of TeX control sequences that define tags.
5569 * Each entry records one such control sequence.
5571 * Original code from who knows whom.
5572 * Ideas by:
5573 * Stefan Monnier (2002)
5576 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5578 /* Default set of control sequences to put into TEX_toktab.
5579 The value of environment var TEXTAGS is prepended to this. */
5580 static const char *TEX_defenv = "\
5581 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5582 :part:appendix:entry:index:def\
5583 :newcommand:renewcommand:newenvironment:renewenvironment";
5585 static void TEX_decode_env (const char *, const char *);
5588 * TeX/LaTeX scanning loop.
5590 static void
5591 TeX_commands (FILE *inf)
5593 char *cp;
5594 linebuffer *key;
5596 char TEX_esc = '\0';
5597 char TEX_opgrp, TEX_clgrp;
5599 /* Initialize token table once from environment. */
5600 if (TEX_toktab == NULL)
5601 TEX_decode_env ("TEXTAGS", TEX_defenv);
5603 LOOP_ON_INPUT_LINES (inf, lb, cp)
5605 /* Look at each TEX keyword in line. */
5606 for (;;)
5608 /* Look for a TEX escape. */
5609 while (true)
5611 char c = *cp++;
5612 if (c == '\0' || c == '%')
5613 goto tex_next_line;
5615 /* Select either \ or ! as escape character, whichever comes
5616 first outside a comment. */
5617 if (!TEX_esc)
5618 switch (c)
5620 case '\\':
5621 TEX_esc = c;
5622 TEX_opgrp = '{';
5623 TEX_clgrp = '}';
5624 break;
5626 case '!':
5627 TEX_esc = c;
5628 TEX_opgrp = '<';
5629 TEX_clgrp = '>';
5630 break;
5633 if (c == TEX_esc)
5634 break;
5637 for (key = TEX_toktab; key->buffer != NULL; key++)
5638 if (strneq (cp, key->buffer, key->len))
5640 char *p;
5641 int namelen, linelen;
5642 bool opgrp = false;
5644 cp = skip_spaces (cp + key->len);
5645 if (*cp == TEX_opgrp)
5647 opgrp = true;
5648 cp++;
5650 for (p = cp;
5651 (!c_isspace (*p) && *p != '#' &&
5652 *p != TEX_opgrp && *p != TEX_clgrp);
5653 p++)
5654 continue;
5655 namelen = p - cp;
5656 linelen = lb.len;
5657 if (!opgrp || *p == TEX_clgrp)
5659 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5660 p++;
5661 linelen = p - lb.buffer + 1;
5663 make_tag (cp, namelen, true,
5664 lb.buffer, linelen, lineno, linecharno);
5665 goto tex_next_line; /* We only tag a line once */
5668 tex_next_line:
5673 /* Read environment and prepend it to the default string.
5674 Build token table. */
5675 static void
5676 TEX_decode_env (const char *evarname, const char *defenv)
5678 register const char *env, *p;
5679 int i, len;
5681 /* Append default string to environment. */
5682 env = getenv (evarname);
5683 if (!env)
5684 env = defenv;
5685 else
5686 env = concat (env, defenv, "");
5688 /* Allocate a token table */
5689 for (len = 1, p = env; (p = strchr (p, ':')); )
5690 if (*++p)
5691 len++;
5692 TEX_toktab = xnew (len, linebuffer);
5694 /* Unpack environment string into token table. Be careful about */
5695 /* zero-length strings (leading ':', "::" and trailing ':') */
5696 for (i = 0; *env != '\0';)
5698 p = strchr (env, ':');
5699 if (!p) /* End of environment string. */
5700 p = env + strlen (env);
5701 if (p - env > 0)
5702 { /* Only non-zero strings. */
5703 TEX_toktab[i].buffer = savenstr (env, p - env);
5704 TEX_toktab[i].len = p - env;
5705 i++;
5707 if (*p)
5708 env = p + 1;
5709 else
5711 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5712 TEX_toktab[i].len = 0;
5713 break;
5719 /* Texinfo support. Dave Love, Mar. 2000. */
5720 static void
5721 Texinfo_nodes (FILE *inf)
5723 char *cp, *start;
5724 LOOP_ON_INPUT_LINES (inf, lb, cp)
5725 if (LOOKING_AT (cp, "@node"))
5727 start = cp;
5728 while (*cp != '\0' && *cp != ',')
5729 cp++;
5730 make_tag (start, cp - start, true,
5731 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5737 * HTML support.
5738 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5739 * Contents of <a name=xxx> are tags with name xxx.
5741 * Francesco Potortì, 2002.
5743 static void
5744 HTML_labels (FILE *inf)
5746 bool getnext = false; /* next text outside of HTML tags is a tag */
5747 bool skiptag = false; /* skip to the end of the current HTML tag */
5748 bool intag = false; /* inside an html tag, looking for ID= */
5749 bool inanchor = false; /* when INTAG, is an anchor, look for NAME= */
5750 char *end;
5753 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5755 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5756 for (;;) /* loop on the same line */
5758 if (skiptag) /* skip HTML tag */
5760 while (*dbp != '\0' && *dbp != '>')
5761 dbp++;
5762 if (*dbp == '>')
5764 dbp += 1;
5765 skiptag = false;
5766 continue; /* look on the same line */
5768 break; /* go to next line */
5771 else if (intag) /* look for "name=" or "id=" */
5773 while (*dbp != '\0' && *dbp != '>'
5774 && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5775 dbp++;
5776 if (*dbp == '\0')
5777 break; /* go to next line */
5778 if (*dbp == '>')
5780 dbp += 1;
5781 intag = false;
5782 continue; /* look on the same line */
5784 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5785 || LOOKING_AT_NOCASE (dbp, "id="))
5787 bool quoted = (dbp[0] == '"');
5789 if (quoted)
5790 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5791 continue;
5792 else
5793 for (end = dbp; *end != '\0' && intoken (*end); end++)
5794 continue;
5795 linebuffer_setlen (&token_name, end - dbp);
5796 memcpy (token_name.buffer, dbp, end - dbp);
5797 token_name.buffer[end - dbp] = '\0';
5799 dbp = end;
5800 intag = false; /* we found what we looked for */
5801 skiptag = true; /* skip to the end of the tag */
5802 getnext = true; /* then grab the text */
5803 continue; /* look on the same line */
5805 dbp += 1;
5808 else if (getnext) /* grab next tokens and tag them */
5810 dbp = skip_spaces (dbp);
5811 if (*dbp == '\0')
5812 break; /* go to next line */
5813 if (*dbp == '<')
5815 intag = true;
5816 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5817 continue; /* look on the same line */
5820 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5821 continue;
5822 make_tag (token_name.buffer, token_name.len, true,
5823 dbp, end - dbp, lineno, linecharno);
5824 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5825 getnext = false;
5826 break; /* go to next line */
5829 else /* look for an interesting HTML tag */
5831 while (*dbp != '\0' && *dbp != '<')
5832 dbp++;
5833 if (*dbp == '\0')
5834 break; /* go to next line */
5835 intag = true;
5836 if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5838 inanchor = true;
5839 continue; /* look on the same line */
5841 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5842 || LOOKING_AT_NOCASE (dbp, "<h1>")
5843 || LOOKING_AT_NOCASE (dbp, "<h2>")
5844 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5846 intag = false;
5847 getnext = true;
5848 continue; /* look on the same line */
5850 dbp += 1;
5857 * Prolog support
5859 * Assumes that the predicate or rule starts at column 0.
5860 * Only the first clause of a predicate or rule is added.
5861 * Original code by Sunichirou Sugou (1989)
5862 * Rewritten by Anders Lindgren (1996)
5864 static size_t prolog_pr (char *, char *);
5865 static void prolog_skip_comment (linebuffer *, FILE *);
5866 static size_t prolog_atom (char *, size_t);
5868 static void
5869 Prolog_functions (FILE *inf)
5871 char *cp, *last;
5872 size_t len;
5873 size_t allocated;
5875 allocated = 0;
5876 len = 0;
5877 last = NULL;
5879 LOOP_ON_INPUT_LINES (inf, lb, cp)
5881 if (cp[0] == '\0') /* Empty line */
5882 continue;
5883 else if (c_isspace (cp[0])) /* Not a predicate */
5884 continue;
5885 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5886 prolog_skip_comment (&lb, inf);
5887 else if ((len = prolog_pr (cp, last)) > 0)
5889 /* Predicate or rule. Store the function name so that we
5890 only generate a tag for the first clause. */
5891 if (last == NULL)
5892 last = xnew (len + 1, char);
5893 else if (len + 1 > allocated)
5894 xrnew (last, len + 1, char);
5895 allocated = len + 1;
5896 memcpy (last, cp, len);
5897 last[len] = '\0';
5900 free (last);
5904 static void
5905 prolog_skip_comment (linebuffer *plb, FILE *inf)
5907 char *cp;
5911 for (cp = plb->buffer; *cp != '\0'; cp++)
5912 if (cp[0] == '*' && cp[1] == '/')
5913 return;
5914 readline (plb, inf);
5916 while (perhaps_more_input (inf));
5920 * A predicate or rule definition is added if it matches:
5921 * <beginning of line><Prolog Atom><whitespace>(
5922 * or <beginning of line><Prolog Atom><whitespace>:-
5924 * It is added to the tags database if it doesn't match the
5925 * name of the previous clause header.
5927 * Return the size of the name of the predicate or rule, or 0 if no
5928 * header was found.
5930 static size_t
5931 prolog_pr (char *s, char *last)
5933 /* Name of last clause. */
5935 size_t pos;
5936 size_t len;
5938 pos = prolog_atom (s, 0);
5939 if (! pos)
5940 return 0;
5942 len = pos;
5943 pos = skip_spaces (s + pos) - s;
5945 if ((s[pos] == '.'
5946 || (s[pos] == '(' && (pos += 1))
5947 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5948 && (last == NULL /* save only the first clause */
5949 || len != strlen (last)
5950 || !strneq (s, last, len)))
5952 make_tag (s, len, true, s, pos, lineno, linecharno);
5953 return len;
5955 else
5956 return 0;
5960 * Consume a Prolog atom.
5961 * Return the number of bytes consumed, or 0 if there was an error.
5963 * A prolog atom, in this context, could be one of:
5964 * - An alphanumeric sequence, starting with a lower case letter.
5965 * - A quoted arbitrary string. Single quotes can escape themselves.
5966 * Backslash quotes everything.
5968 static size_t
5969 prolog_atom (char *s, size_t pos)
5971 size_t origpos;
5973 origpos = pos;
5975 if (c_islower (s[pos]) || s[pos] == '_')
5977 /* The atom is unquoted. */
5978 pos++;
5979 while (c_isalnum (s[pos]) || s[pos] == '_')
5981 pos++;
5983 return pos - origpos;
5985 else if (s[pos] == '\'')
5987 pos++;
5989 for (;;)
5991 if (s[pos] == '\'')
5993 pos++;
5994 if (s[pos] != '\'')
5995 break;
5996 pos++; /* A double quote */
5998 else if (s[pos] == '\0')
5999 /* Multiline quoted atoms are ignored. */
6000 return 0;
6001 else if (s[pos] == '\\')
6003 if (s[pos+1] == '\0')
6004 return 0;
6005 pos += 2;
6007 else
6008 pos++;
6010 return pos - origpos;
6012 else
6013 return 0;
6018 * Support for Erlang
6020 * Generates tags for functions, defines, and records.
6021 * Assumes that Erlang functions start at column 0.
6022 * Original code by Anders Lindgren (1996)
6024 static int erlang_func (char *, char *);
6025 static void erlang_attribute (char *);
6026 static int erlang_atom (char *);
6028 static void
6029 Erlang_functions (FILE *inf)
6031 char *cp, *last;
6032 int len;
6033 int allocated;
6035 allocated = 0;
6036 len = 0;
6037 last = NULL;
6039 LOOP_ON_INPUT_LINES (inf, lb, cp)
6041 if (cp[0] == '\0') /* Empty line */
6042 continue;
6043 else if (c_isspace (cp[0])) /* Not function nor attribute */
6044 continue;
6045 else if (cp[0] == '%') /* comment */
6046 continue;
6047 else if (cp[0] == '"') /* Sometimes, strings start in column one */
6048 continue;
6049 else if (cp[0] == '-') /* attribute, e.g. "-define" */
6051 erlang_attribute (cp);
6052 if (last != NULL)
6054 free (last);
6055 last = NULL;
6058 else if ((len = erlang_func (cp, last)) > 0)
6061 * Function. Store the function name so that we only
6062 * generates a tag for the first clause.
6064 if (last == NULL)
6065 last = xnew (len + 1, char);
6066 else if (len + 1 > allocated)
6067 xrnew (last, len + 1, char);
6068 allocated = len + 1;
6069 memcpy (last, cp, len);
6070 last[len] = '\0';
6073 free (last);
6078 * A function definition is added if it matches:
6079 * <beginning of line><Erlang Atom><whitespace>(
6081 * It is added to the tags database if it doesn't match the
6082 * name of the previous clause header.
6084 * Return the size of the name of the function, or 0 if no function
6085 * was found.
6087 static int
6088 erlang_func (char *s, char *last)
6090 /* Name of last clause. */
6092 int pos;
6093 int len;
6095 pos = erlang_atom (s);
6096 if (pos < 1)
6097 return 0;
6099 len = pos;
6100 pos = skip_spaces (s + pos) - s;
6102 /* Save only the first clause. */
6103 if (s[pos++] == '('
6104 && (last == NULL
6105 || len != (int)strlen (last)
6106 || !strneq (s, last, len)))
6108 make_tag (s, len, true, s, pos, lineno, linecharno);
6109 return len;
6112 return 0;
6117 * Handle attributes. Currently, tags are generated for defines
6118 * and records.
6120 * They are on the form:
6121 * -define(foo, bar).
6122 * -define(Foo(M, N), M+N).
6123 * -record(graph, {vtab = notable, cyclic = true}).
6125 static void
6126 erlang_attribute (char *s)
6128 char *cp = s;
6130 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
6131 && *cp++ == '(')
6133 int len = erlang_atom (skip_spaces (cp));
6134 if (len > 0)
6135 make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
6137 return;
6142 * Consume an Erlang atom (or variable).
6143 * Return the number of bytes consumed, or -1 if there was an error.
6145 static int
6146 erlang_atom (char *s)
6148 int pos = 0;
6150 if (c_isalpha (s[pos]) || s[pos] == '_')
6152 /* The atom is unquoted. */
6154 pos++;
6155 while (c_isalnum (s[pos]) || s[pos] == '_');
6157 else if (s[pos] == '\'')
6159 for (pos++; s[pos] != '\''; pos++)
6160 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
6161 || (s[pos] == '\\' && s[++pos] == '\0'))
6162 return 0;
6163 pos++;
6166 return pos;
6170 static char *scan_separators (char *);
6171 static void add_regex (char *, language *);
6172 static char *substitute (char *, char *, struct re_registers *);
6175 * Take a string like "/blah/" and turn it into "blah", verifying
6176 * that the first and last characters are the same, and handling
6177 * quoted separator characters. Actually, stops on the occurrence of
6178 * an unquoted separator. Also process \t, \n, etc. and turn into
6179 * appropriate characters. Works in place. Null terminates name string.
6180 * Returns pointer to terminating separator, or NULL for
6181 * unterminated regexps.
6183 static char *
6184 scan_separators (char *name)
6186 char sep = name[0];
6187 char *copyto = name;
6188 bool quoted = false;
6190 for (++name; *name != '\0'; ++name)
6192 if (quoted)
6194 switch (*name)
6196 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
6197 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
6198 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
6199 case 'e': *copyto++ = 033; break; /* ESC (delete) */
6200 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
6201 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
6202 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
6203 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
6204 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
6205 default:
6206 if (*name == sep)
6207 *copyto++ = sep;
6208 else
6210 /* Something else is quoted, so preserve the quote. */
6211 *copyto++ = '\\';
6212 *copyto++ = *name;
6214 break;
6216 quoted = false;
6218 else if (*name == '\\')
6219 quoted = true;
6220 else if (*name == sep)
6221 break;
6222 else
6223 *copyto++ = *name;
6225 if (*name != sep)
6226 name = NULL; /* signal unterminated regexp */
6228 /* Terminate copied string. */
6229 *copyto = '\0';
6230 return name;
6233 /* Look at the argument of --regex or --no-regex and do the right
6234 thing. Same for each line of a regexp file. */
6235 static void
6236 analyze_regex (char *regex_arg)
6238 if (regex_arg == NULL)
6240 free_regexps (); /* --no-regex: remove existing regexps */
6241 return;
6244 /* A real --regexp option or a line in a regexp file. */
6245 switch (regex_arg[0])
6247 /* Comments in regexp file or null arg to --regex. */
6248 case '\0':
6249 case ' ':
6250 case '\t':
6251 break;
6253 /* Read a regex file. This is recursive and may result in a
6254 loop, which will stop when the file descriptors are exhausted. */
6255 case '@':
6257 FILE *regexfp;
6258 linebuffer regexbuf;
6259 char *regexfile = regex_arg + 1;
6261 /* regexfile is a file containing regexps, one per line. */
6262 regexfp = fopen (regexfile, "r" FOPEN_BINARY);
6263 if (regexfp == NULL)
6264 pfatal (regexfile);
6265 linebuffer_init (&regexbuf);
6266 while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
6267 analyze_regex (regexbuf.buffer);
6268 free (regexbuf.buffer);
6269 if (fclose (regexfp) != 0)
6270 pfatal (regexfile);
6272 break;
6274 /* Regexp to be used for a specific language only. */
6275 case '{':
6277 language *lang;
6278 char *lang_name = regex_arg + 1;
6279 char *cp;
6281 for (cp = lang_name; *cp != '}'; cp++)
6282 if (*cp == '\0')
6284 error ("unterminated language name in regex: %s", regex_arg);
6285 return;
6287 *cp++ = '\0';
6288 lang = get_language_from_langname (lang_name);
6289 if (lang == NULL)
6290 return;
6291 add_regex (cp, lang);
6293 break;
6295 /* Regexp to be used for any language. */
6296 default:
6297 add_regex (regex_arg, NULL);
6298 break;
6302 /* Separate the regexp pattern, compile it,
6303 and care for optional name and modifiers. */
6304 static void
6305 add_regex (char *regexp_pattern, language *lang)
6307 static struct re_pattern_buffer zeropattern;
6308 char sep, *pat, *name, *modifiers;
6309 char empty = '\0';
6310 const char *err;
6311 struct re_pattern_buffer *patbuf;
6312 regexp *rp;
6313 bool
6314 force_explicit_name = true, /* do not use implicit tag names */
6315 ignore_case = false, /* case is significant */
6316 multi_line = false, /* matches are done one line at a time */
6317 single_line = false; /* dot does not match newline */
6320 if (strlen (regexp_pattern) < 3)
6322 error ("null regexp");
6323 return;
6325 sep = regexp_pattern[0];
6326 name = scan_separators (regexp_pattern);
6327 if (name == NULL)
6329 error ("%s: unterminated regexp", regexp_pattern);
6330 return;
6332 if (name[1] == sep)
6334 error ("null name for regexp \"%s\"", regexp_pattern);
6335 return;
6337 modifiers = scan_separators (name);
6338 if (modifiers == NULL) /* no terminating separator --> no name */
6340 modifiers = name;
6341 name = &empty;
6343 else
6344 modifiers += 1; /* skip separator */
6346 /* Parse regex modifiers. */
6347 for (; modifiers[0] != '\0'; modifiers++)
6348 switch (modifiers[0])
6350 case 'N':
6351 if (modifiers == name)
6352 error ("forcing explicit tag name but no name, ignoring");
6353 force_explicit_name = true;
6354 break;
6355 case 'i':
6356 ignore_case = true;
6357 break;
6358 case 's':
6359 single_line = true;
6360 FALLTHROUGH;
6361 case 'm':
6362 multi_line = true;
6363 need_filebuf = true;
6364 break;
6365 default:
6366 error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
6367 break;
6370 patbuf = xnew (1, struct re_pattern_buffer);
6371 *patbuf = zeropattern;
6372 if (ignore_case)
6374 static char lc_trans[UCHAR_MAX + 1];
6375 int i;
6376 for (i = 0; i < UCHAR_MAX + 1; i++)
6377 lc_trans[i] = c_tolower (i);
6378 patbuf->translate = lc_trans; /* translation table to fold case */
6381 if (multi_line)
6382 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6383 else
6384 pat = regexp_pattern;
6386 if (single_line)
6387 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6388 else
6389 re_set_syntax (RE_SYNTAX_EMACS);
6391 err = re_compile_pattern (pat, strlen (pat), patbuf);
6392 if (multi_line)
6393 free (pat);
6394 if (err != NULL)
6396 error ("%s while compiling pattern", err);
6397 return;
6400 rp = p_head;
6401 p_head = xnew (1, regexp);
6402 p_head->pattern = savestr (regexp_pattern);
6403 p_head->p_next = rp;
6404 p_head->lang = lang;
6405 p_head->pat = patbuf;
6406 p_head->name = savestr (name);
6407 p_head->error_signaled = false;
6408 p_head->force_explicit_name = force_explicit_name;
6409 p_head->ignore_case = ignore_case;
6410 p_head->multi_line = multi_line;
6414 * Do the substitutions indicated by the regular expression and
6415 * arguments.
6417 static char *
6418 substitute (char *in, char *out, struct re_registers *regs)
6420 char *result, *t;
6421 int size, dig, diglen;
6423 result = NULL;
6424 size = strlen (out);
6426 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6427 if (out[size - 1] == '\\')
6428 fatal ("pattern error in \"%s\"", out);
6429 for (t = strchr (out, '\\');
6430 t != NULL;
6431 t = strchr (t + 2, '\\'))
6432 if (c_isdigit (t[1]))
6434 dig = t[1] - '0';
6435 diglen = regs->end[dig] - regs->start[dig];
6436 size += diglen - 2;
6438 else
6439 size -= 1;
6441 /* Allocate space and do the substitutions. */
6442 assert (size >= 0);
6443 result = xnew (size + 1, char);
6445 for (t = result; *out != '\0'; out++)
6446 if (*out == '\\' && c_isdigit (*++out))
6448 dig = *out - '0';
6449 diglen = regs->end[dig] - regs->start[dig];
6450 memcpy (t, in + regs->start[dig], diglen);
6451 t += diglen;
6453 else
6454 *t++ = *out;
6455 *t = '\0';
6457 assert (t <= result + size);
6458 assert (t - result == (int)strlen (result));
6460 return result;
6463 /* Deallocate all regexps. */
6464 static void
6465 free_regexps (void)
6467 regexp *rp;
6468 while (p_head != NULL)
6470 rp = p_head->p_next;
6471 free (p_head->pattern);
6472 free (p_head->name);
6473 free (p_head);
6474 p_head = rp;
6476 return;
6480 * Reads the whole file as a single string from `filebuf' and looks for
6481 * multi-line regular expressions, creating tags on matches.
6482 * readline already dealt with normal regexps.
6484 * Idea by Ben Wing <ben@666.com> (2002).
6486 static void
6487 regex_tag_multiline (void)
6489 char *buffer = filebuf.buffer;
6490 regexp *rp;
6491 char *name;
6493 for (rp = p_head; rp != NULL; rp = rp->p_next)
6495 int match = 0;
6497 if (!rp->multi_line)
6498 continue; /* skip normal regexps */
6500 /* Generic initializations before parsing file from memory. */
6501 lineno = 1; /* reset global line number */
6502 charno = 0; /* reset global char number */
6503 linecharno = 0; /* reset global char number of line start */
6505 /* Only use generic regexps or those for the current language. */
6506 if (rp->lang != NULL && rp->lang != curfdp->lang)
6507 continue;
6509 while (match >= 0 && match < filebuf.len)
6511 match = re_search (rp->pat, buffer, filebuf.len, charno,
6512 filebuf.len - match, &rp->regs);
6513 switch (match)
6515 case -2:
6516 /* Some error. */
6517 if (!rp->error_signaled)
6519 error ("regexp stack overflow while matching \"%s\"",
6520 rp->pattern);
6521 rp->error_signaled = true;
6523 break;
6524 case -1:
6525 /* No match. */
6526 break;
6527 default:
6528 if (match == rp->regs.end[0])
6530 if (!rp->error_signaled)
6532 error ("regexp matches the empty string: \"%s\"",
6533 rp->pattern);
6534 rp->error_signaled = true;
6536 match = -3; /* exit from while loop */
6537 break;
6540 /* Match occurred. Construct a tag. */
6541 while (charno < rp->regs.end[0])
6542 if (buffer[charno++] == '\n')
6543 lineno++, linecharno = charno;
6544 name = rp->name;
6545 if (name[0] == '\0')
6546 name = NULL;
6547 else /* make a named tag */
6548 name = substitute (buffer, rp->name, &rp->regs);
6549 if (rp->force_explicit_name)
6550 /* Force explicit tag name, if a name is there. */
6551 pfnote (name, true, buffer + linecharno,
6552 charno - linecharno + 1, lineno, linecharno);
6553 else
6554 make_tag (name, strlen (name), true, buffer + linecharno,
6555 charno - linecharno + 1, lineno, linecharno);
6556 break;
6563 static bool
6564 nocase_tail (const char *cp)
6566 int len = 0;
6568 while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6569 cp++, len++;
6570 if (*cp == '\0' && !intoken (dbp[len]))
6572 dbp += len;
6573 return true;
6575 return false;
6578 static void
6579 get_tag (register char *bp, char **namepp)
6581 register char *cp = bp;
6583 if (*bp != '\0')
6585 /* Go till you get to white space or a syntactic break */
6586 for (cp = bp + 1; !notinname (*cp); cp++)
6587 continue;
6588 make_tag (bp, cp - bp, true,
6589 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6592 if (namepp != NULL)
6593 *namepp = savenstr (bp, cp - bp);
6596 /* Similar to get_tag, but include '=' as part of the tag. */
6597 static void
6598 get_lispy_tag (register char *bp)
6600 register char *cp = bp;
6602 if (*bp != '\0')
6604 /* Go till you get to white space or a syntactic break */
6605 for (cp = bp + 1; !notinname (*cp) || *cp == '='; cp++)
6606 continue;
6607 make_tag (bp, cp - bp, true,
6608 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6613 * Read a line of text from `stream' into `lbp', excluding the
6614 * newline or CR-NL, if any. Return the number of characters read from
6615 * `stream', which is the length of the line including the newline.
6617 * On DOS or Windows we do not count the CR character, if any before the
6618 * NL, in the returned length; this mirrors the behavior of Emacs on those
6619 * platforms (for text files, it translates CR-NL to NL as it reads in the
6620 * file).
6622 * If multi-line regular expressions are requested, each line read is
6623 * appended to `filebuf'.
6625 static long
6626 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6628 char *buffer = lbp->buffer;
6629 char *p = lbp->buffer;
6630 char *pend;
6631 int chars_deleted;
6633 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6635 for (;;)
6637 register int c = getc (stream);
6638 if (p == pend)
6640 /* We're at the end of linebuffer: expand it. */
6641 lbp->size *= 2;
6642 xrnew (buffer, lbp->size, char);
6643 p += buffer - lbp->buffer;
6644 pend = buffer + lbp->size;
6645 lbp->buffer = buffer;
6647 if (c == EOF)
6649 if (ferror (stream))
6650 perror (filename);
6651 *p = '\0';
6652 chars_deleted = 0;
6653 break;
6655 if (c == '\n')
6657 if (p > buffer && p[-1] == '\r')
6659 p -= 1;
6660 chars_deleted = 2;
6662 else
6664 chars_deleted = 1;
6666 *p = '\0';
6667 break;
6669 *p++ = c;
6671 lbp->len = p - buffer;
6673 if (need_filebuf /* we need filebuf for multi-line regexps */
6674 && chars_deleted > 0) /* not at EOF */
6676 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6678 /* Expand filebuf. */
6679 filebuf.size *= 2;
6680 xrnew (filebuf.buffer, filebuf.size, char);
6682 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6683 filebuf.len += lbp->len;
6684 filebuf.buffer[filebuf.len++] = '\n';
6685 filebuf.buffer[filebuf.len] = '\0';
6688 return lbp->len + chars_deleted;
6692 * Like readline_internal, above, but in addition try to match the
6693 * input line against relevant regular expressions and manage #line
6694 * directives.
6696 static void
6697 readline (linebuffer *lbp, FILE *stream)
6699 long result;
6701 linecharno = charno; /* update global char number of line start */
6702 result = readline_internal (lbp, stream, infilename); /* read line */
6703 lineno += 1; /* increment global line number */
6704 charno += result; /* increment global char number */
6706 /* Honor #line directives. */
6707 if (!no_line_directive)
6709 static bool discard_until_line_directive;
6711 /* Check whether this is a #line directive. */
6712 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6714 unsigned int lno;
6715 int start = 0;
6717 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6718 && start > 0) /* double quote character found */
6720 char *endp = lbp->buffer + start;
6722 while ((endp = strchr (endp, '"')) != NULL
6723 && endp[-1] == '\\')
6724 endp++;
6725 if (endp != NULL)
6726 /* Ok, this is a real #line directive. Let's deal with it. */
6728 char *taggedabsname; /* absolute name of original file */
6729 char *taggedfname; /* name of original file as given */
6730 char *name; /* temp var */
6732 discard_until_line_directive = false; /* found it */
6733 name = lbp->buffer + start;
6734 *endp = '\0';
6735 canonicalize_filename (name);
6736 taggedabsname = absolute_filename (name, tagfiledir);
6737 if (filename_is_absolute (name)
6738 || filename_is_absolute (curfdp->infname))
6739 taggedfname = savestr (taggedabsname);
6740 else
6741 taggedfname = relative_filename (taggedabsname,tagfiledir);
6743 if (streq (curfdp->taggedfname, taggedfname))
6744 /* The #line directive is only a line number change. We
6745 deal with this afterwards. */
6746 free (taggedfname);
6747 else
6748 /* The tags following this #line directive should be
6749 attributed to taggedfname. In order to do this, set
6750 curfdp accordingly. */
6752 fdesc *fdp; /* file description pointer */
6754 /* Go look for a file description already set up for the
6755 file indicated in the #line directive. If there is
6756 one, use it from now until the next #line
6757 directive. */
6758 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6759 if (streq (fdp->infname, curfdp->infname)
6760 && streq (fdp->taggedfname, taggedfname))
6761 /* If we remove the second test above (after the &&)
6762 then all entries pertaining to the same file are
6763 coalesced in the tags file. If we use it, then
6764 entries pertaining to the same file but generated
6765 from different files (via #line directives) will
6766 go into separate sections in the tags file. These
6767 alternatives look equivalent. The first one
6768 destroys some apparently useless information. */
6770 curfdp = fdp;
6771 free (taggedfname);
6772 break;
6774 /* Else, if we already tagged the real file, skip all
6775 input lines until the next #line directive. */
6776 if (fdp == NULL) /* not found */
6777 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6778 if (streq (fdp->infabsname, taggedabsname))
6780 discard_until_line_directive = true;
6781 free (taggedfname);
6782 break;
6784 /* Else create a new file description and use that from
6785 now on, until the next #line directive. */
6786 if (fdp == NULL) /* not found */
6788 fdp = fdhead;
6789 fdhead = xnew (1, fdesc);
6790 *fdhead = *curfdp; /* copy curr. file description */
6791 fdhead->next = fdp;
6792 fdhead->infname = savestr (curfdp->infname);
6793 fdhead->infabsname = savestr (curfdp->infabsname);
6794 fdhead->infabsdir = savestr (curfdp->infabsdir);
6795 fdhead->taggedfname = taggedfname;
6796 fdhead->usecharno = false;
6797 fdhead->prop = NULL;
6798 fdhead->written = false;
6799 curfdp = fdhead;
6802 free (taggedabsname);
6803 lineno = lno - 1;
6804 readline (lbp, stream);
6805 return;
6806 } /* if a real #line directive */
6807 } /* if #line is followed by a number */
6808 } /* if line begins with "#line " */
6810 /* If we are here, no #line directive was found. */
6811 if (discard_until_line_directive)
6813 if (result > 0)
6815 /* Do a tail recursion on ourselves, thus discarding the contents
6816 of the line buffer. */
6817 readline (lbp, stream);
6818 return;
6820 /* End of file. */
6821 discard_until_line_directive = false;
6822 return;
6824 } /* if #line directives should be considered */
6827 int match;
6828 regexp *rp;
6829 char *name;
6831 /* Match against relevant regexps. */
6832 if (lbp->len > 0)
6833 for (rp = p_head; rp != NULL; rp = rp->p_next)
6835 /* Only use generic regexps or those for the current language.
6836 Also do not use multiline regexps, which is the job of
6837 regex_tag_multiline. */
6838 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6839 || rp->multi_line)
6840 continue;
6842 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6843 switch (match)
6845 case -2:
6846 /* Some error. */
6847 if (!rp->error_signaled)
6849 error ("regexp stack overflow while matching \"%s\"",
6850 rp->pattern);
6851 rp->error_signaled = true;
6853 break;
6854 case -1:
6855 /* No match. */
6856 break;
6857 case 0:
6858 /* Empty string matched. */
6859 if (!rp->error_signaled)
6861 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6862 rp->error_signaled = true;
6864 break;
6865 default:
6866 /* Match occurred. Construct a tag. */
6867 name = rp->name;
6868 if (name[0] == '\0')
6869 name = NULL;
6870 else /* make a named tag */
6871 name = substitute (lbp->buffer, rp->name, &rp->regs);
6872 if (rp->force_explicit_name)
6873 /* Force explicit tag name, if a name is there. */
6874 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6875 else
6876 make_tag (name, strlen (name), true,
6877 lbp->buffer, match, lineno, linecharno);
6878 break;
6886 * Return a pointer to a space of size strlen(cp)+1 allocated
6887 * with xnew where the string CP has been copied.
6889 static char *
6890 savestr (const char *cp)
6892 return savenstr (cp, strlen (cp));
6896 * Return a pointer to a space of size LEN+1 allocated with xnew where
6897 * the string CP has been copied for at most the first LEN characters.
6899 static char *
6900 savenstr (const char *cp, int len)
6902 char *dp = xnew (len + 1, char);
6903 dp[len] = '\0';
6904 return memcpy (dp, cp, len);
6907 /* Skip spaces (end of string is not space), return new pointer. */
6908 static char *
6909 skip_spaces (char *cp)
6911 while (c_isspace (*cp))
6912 cp++;
6913 return cp;
6916 /* Skip non spaces, except end of string, return new pointer. */
6917 static char *
6918 skip_non_spaces (char *cp)
6920 while (*cp != '\0' && !c_isspace (*cp))
6921 cp++;
6922 return cp;
6925 /* Skip any chars in the "name" class.*/
6926 static char *
6927 skip_name (char *cp)
6929 /* '\0' is a notinname() so loop stops there too */
6930 while (! notinname (*cp))
6931 cp++;
6932 return cp;
6935 /* Print error message and exit. */
6936 static void
6937 fatal (char const *format, ...)
6939 va_list ap;
6940 va_start (ap, format);
6941 verror (format, ap);
6942 va_end (ap);
6943 exit (EXIT_FAILURE);
6946 static void
6947 pfatal (const char *s1)
6949 perror (s1);
6950 exit (EXIT_FAILURE);
6953 static void
6954 suggest_asking_for_help (void)
6956 fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
6957 progname);
6958 exit (EXIT_FAILURE);
6961 /* Output a diagnostic with printf-style FORMAT and args. */
6962 static void
6963 error (const char *format, ...)
6965 va_list ap;
6966 va_start (ap, format);
6967 verror (format, ap);
6968 va_end (ap);
6971 static void
6972 verror (char const *format, va_list ap)
6974 fprintf (stderr, "%s: ", progname);
6975 vfprintf (stderr, format, ap);
6976 fprintf (stderr, "\n");
6979 /* Return a newly-allocated string whose contents
6980 concatenate those of s1, s2, s3. */
6981 static char *
6982 concat (const char *s1, const char *s2, const char *s3)
6984 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6985 char *result = xnew (len1 + len2 + len3 + 1, char);
6987 strcpy (result, s1);
6988 strcpy (result + len1, s2);
6989 strcpy (result + len1 + len2, s3);
6991 return result;
6995 /* Does the same work as the system V getcwd, but does not need to
6996 guess the buffer size in advance. */
6997 static char *
6998 etags_getcwd (void)
7000 int bufsize = 200;
7001 char *path = xnew (bufsize, char);
7003 while (getcwd (path, bufsize) == NULL)
7005 if (errno != ERANGE)
7006 pfatal ("getcwd");
7007 bufsize *= 2;
7008 free (path);
7009 path = xnew (bufsize, char);
7012 canonicalize_filename (path);
7013 return path;
7016 /* Return a newly allocated string containing a name of a temporary file. */
7017 static char *
7018 etags_mktmp (void)
7020 const char *tmpdir = getenv ("TMPDIR");
7021 const char *slash = "/";
7023 #if MSDOS || defined (DOS_NT)
7024 if (!tmpdir)
7025 tmpdir = getenv ("TEMP");
7026 if (!tmpdir)
7027 tmpdir = getenv ("TMP");
7028 if (!tmpdir)
7029 tmpdir = ".";
7030 if (tmpdir[strlen (tmpdir) - 1] == '/'
7031 || tmpdir[strlen (tmpdir) - 1] == '\\')
7032 slash = "";
7033 #else
7034 if (!tmpdir)
7035 tmpdir = "/tmp";
7036 if (tmpdir[strlen (tmpdir) - 1] == '/')
7037 slash = "";
7038 #endif
7040 char *templt = concat (tmpdir, slash, "etXXXXXX");
7041 int fd = mkostemp (templt, O_CLOEXEC);
7042 if (fd < 0 || close (fd) != 0)
7044 int temp_errno = errno;
7045 free (templt);
7046 errno = temp_errno;
7047 templt = NULL;
7050 #if defined (DOS_NT)
7051 /* The file name will be used in shell redirection, so it needs to have
7052 DOS-style backslashes, or else the Windows shell will barf. */
7053 char *p;
7054 for (p = templt; *p; p++)
7055 if (*p == '/')
7056 *p = '\\';
7057 #endif
7059 return templt;
7062 /* Return a newly allocated string containing the file name of FILE
7063 relative to the absolute directory DIR (which should end with a slash). */
7064 static char *
7065 relative_filename (char *file, char *dir)
7067 char *fp, *dp, *afn, *res;
7068 int i;
7070 /* Find the common root of file and dir (with a trailing slash). */
7071 afn = absolute_filename (file, cwd);
7072 fp = afn;
7073 dp = dir;
7074 while (*fp++ == *dp++)
7075 continue;
7076 fp--, dp--; /* back to the first differing char */
7077 #ifdef DOS_NT
7078 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
7079 return afn;
7080 #endif
7081 do /* look at the equal chars until '/' */
7082 fp--, dp--;
7083 while (*fp != '/');
7085 /* Build a sequence of "../" strings for the resulting relative file name. */
7086 i = 0;
7087 while ((dp = strchr (dp + 1, '/')) != NULL)
7088 i += 1;
7089 res = xnew (3*i + strlen (fp + 1) + 1, char);
7090 char *z = res;
7091 while (i-- > 0)
7092 z = stpcpy (z, "../");
7094 /* Add the file name relative to the common root of file and dir. */
7095 strcpy (z, fp + 1);
7096 free (afn);
7098 return res;
7101 /* Return a newly allocated string containing the absolute file name
7102 of FILE given DIR (which should end with a slash). */
7103 static char *
7104 absolute_filename (char *file, char *dir)
7106 char *slashp, *cp, *res;
7108 if (filename_is_absolute (file))
7109 res = savestr (file);
7110 #ifdef DOS_NT
7111 /* We don't support non-absolute file names with a drive
7112 letter, like `d:NAME' (it's too much hassle). */
7113 else if (file[1] == ':')
7114 fatal ("%s: relative file names with drive letters not supported", file);
7115 #endif
7116 else
7117 res = concat (dir, file, "");
7119 /* Delete the "/dirname/.." and "/." substrings. */
7120 slashp = strchr (res, '/');
7121 while (slashp != NULL && slashp[0] != '\0')
7123 if (slashp[1] == '.')
7125 if (slashp[2] == '.'
7126 && (slashp[3] == '/' || slashp[3] == '\0'))
7128 cp = slashp;
7130 cp--;
7131 while (cp >= res && !filename_is_absolute (cp));
7132 if (cp < res)
7133 cp = slashp; /* the absolute name begins with "/.." */
7134 #ifdef DOS_NT
7135 /* Under MSDOS and NT we get `d:/NAME' as absolute
7136 file name, so the luser could say `d:/../NAME'.
7137 We silently treat this as `d:/NAME'. */
7138 else if (cp[0] != '/')
7139 cp = slashp;
7140 #endif
7141 memmove (cp, slashp + 3, strlen (slashp + 2));
7142 slashp = cp;
7143 continue;
7145 else if (slashp[2] == '/' || slashp[2] == '\0')
7147 memmove (slashp, slashp + 2, strlen (slashp + 1));
7148 continue;
7152 slashp = strchr (slashp + 1, '/');
7155 if (res[0] == '\0') /* just a safety net: should never happen */
7157 free (res);
7158 return savestr ("/");
7160 else
7161 return res;
7164 /* Return a newly allocated string containing the absolute
7165 file name of dir where FILE resides given DIR (which should
7166 end with a slash). */
7167 static char *
7168 absolute_dirname (char *file, char *dir)
7170 char *slashp, *res;
7171 char save;
7173 slashp = strrchr (file, '/');
7174 if (slashp == NULL)
7175 return savestr (dir);
7176 save = slashp[1];
7177 slashp[1] = '\0';
7178 res = absolute_filename (file, dir);
7179 slashp[1] = save;
7181 return res;
7184 /* Whether the argument string is an absolute file name. The argument
7185 string must have been canonicalized with canonicalize_filename. */
7186 static bool
7187 filename_is_absolute (char *fn)
7189 return (fn[0] == '/'
7190 #ifdef DOS_NT
7191 || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
7192 #endif
7196 /* Downcase DOS drive letter and collapse separators into single slashes.
7197 Works in place. */
7198 static void
7199 canonicalize_filename (register char *fn)
7201 register char* cp;
7203 #ifdef DOS_NT
7204 /* Canonicalize drive letter case. */
7205 if (c_isupper (fn[0]) && fn[1] == ':')
7206 fn[0] = c_tolower (fn[0]);
7208 /* Collapse multiple forward- and back-slashes into a single forward
7209 slash. */
7210 for (cp = fn; *cp != '\0'; cp++, fn++)
7211 if (*cp == '/' || *cp == '\\')
7213 *fn = '/';
7214 while (cp[1] == '/' || cp[1] == '\\')
7215 cp++;
7217 else
7218 *fn = *cp;
7220 #else /* !DOS_NT */
7222 /* Collapse multiple slashes into a single slash. */
7223 for (cp = fn; *cp != '\0'; cp++, fn++)
7224 if (*cp == '/')
7226 *fn = '/';
7227 while (cp[1] == '/')
7228 cp++;
7230 else
7231 *fn = *cp;
7233 #endif /* !DOS_NT */
7235 *fn = '\0';
7239 /* Initialize a linebuffer for use. */
7240 static void
7241 linebuffer_init (linebuffer *lbp)
7243 lbp->size = (DEBUG) ? 3 : 200;
7244 lbp->buffer = xnew (lbp->size, char);
7245 lbp->buffer[0] = '\0';
7246 lbp->len = 0;
7249 /* Set the minimum size of a string contained in a linebuffer. */
7250 static void
7251 linebuffer_setlen (linebuffer *lbp, int toksize)
7253 while (lbp->size <= toksize)
7255 lbp->size *= 2;
7256 xrnew (lbp->buffer, lbp->size, char);
7258 lbp->len = toksize;
7261 /* Like malloc but get fatal error if memory is exhausted. */
7262 static void *
7263 xmalloc (size_t size)
7265 void *result = malloc (size);
7266 if (result == NULL)
7267 fatal ("virtual memory exhausted");
7268 return result;
7271 static void *
7272 xrealloc (void *ptr, size_t size)
7274 void *result = realloc (ptr, size);
7275 if (result == NULL)
7276 fatal ("virtual memory exhausted");
7277 return result;
7281 * Local Variables:
7282 * indent-tabs-mode: t
7283 * tab-width: 8
7284 * fill-column: 79
7285 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
7286 * c-file-style: "gnu"
7287 * End:
7290 /* etags.c ends here */