Improve of file-local-name use in vc-git-checkin
[emacs.git] / lib-src / etags.c
blob6f280d8ab40bc03d04598babef5c34cd6a06f675
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2017 Free Software
32 Foundation, Inc.
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or (at
39 your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
72 starting in 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
83 #ifdef DEBUG
84 # undef DEBUG
85 # define DEBUG true
86 #else
87 # define DEBUG false
88 # define NDEBUG /* disable assert */
89 #endif
91 #include <config.h>
93 /* WIN32_NATIVE is for XEmacs.
94 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
95 #ifdef WIN32_NATIVE
96 # undef MSDOS
97 # undef WINDOWSNT
98 # define WINDOWSNT
99 #endif /* WIN32_NATIVE */
101 #ifdef MSDOS
102 # undef MSDOS
103 # define MSDOS true
104 # include <sys/param.h>
105 #else
106 # define MSDOS false
107 #endif /* MSDOS */
109 #ifdef WINDOWSNT
110 # include <direct.h>
111 # undef HAVE_NTGUI
112 # undef DOS_NT
113 # define DOS_NT
114 # define O_CLOEXEC O_NOINHERIT
115 #endif /* WINDOWSNT */
117 #include <limits.h>
118 #include <unistd.h>
119 #include <stdarg.h>
120 #include <stdlib.h>
121 #include <string.h>
122 #include <sysstdio.h>
123 #include <errno.h>
124 #include <fcntl.h>
125 #include <binary-io.h>
126 #include <c-ctype.h>
127 #include <c-strcase.h>
129 #include <assert.h>
130 #ifdef NDEBUG
131 # undef assert /* some systems have a buggy assert.h */
132 # define assert(x) ((void) 0)
133 #endif
135 #include <getopt.h>
136 #include <regex.h>
138 /* Define CTAGS to make the program "ctags" compatible with the usual one.
139 Leave it undefined to make the program "etags", which makes emacs-style
140 tag tables and tags typedefs, #defines and struct/union/enum by default. */
141 #ifdef CTAGS
142 # undef CTAGS
143 # define CTAGS true
144 #else
145 # define CTAGS false
146 #endif
148 static bool
149 streq (char const *s, char const *t)
151 return strcmp (s, t) == 0;
154 static bool
155 strcaseeq (char const *s, char const *t)
157 return c_strcasecmp (s, t) == 0;
160 static bool
161 strneq (char const *s, char const *t, size_t n)
163 return strncmp (s, t, n) == 0;
166 static bool
167 strncaseeq (char const *s, char const *t, size_t n)
169 return c_strncasecmp (s, t, n) == 0;
172 /* C is not in a name. */
173 static bool
174 notinname (unsigned char c)
176 /* Look at make_tag before modifying! */
177 static bool const table[UCHAR_MAX + 1] = {
178 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
179 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
181 return table[c];
184 /* C can start a token. */
185 static bool
186 begtoken (unsigned char c)
188 static bool const table[UCHAR_MAX + 1] = {
189 ['$']=1, ['@']=1,
190 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
191 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
192 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
193 ['Y']=1, ['Z']=1,
194 ['_']=1,
195 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
196 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
197 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
198 ['y']=1, ['z']=1,
199 ['~']=1
201 return table[c];
204 /* C can be in the middle of a token. */
205 static bool
206 intoken (unsigned char c)
208 static bool const table[UCHAR_MAX + 1] = {
209 ['$']=1,
210 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
211 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
212 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
213 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
214 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
215 ['Y']=1, ['Z']=1,
216 ['_']=1,
217 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
218 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
219 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
220 ['y']=1, ['z']=1
222 return table[c];
225 /* C can end a token. */
226 static bool
227 endtoken (unsigned char c)
229 static bool const table[UCHAR_MAX + 1] = {
230 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
231 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
232 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
233 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
234 ['{']=1, ['|']=1, ['}']=1, ['~']=1
236 return table[c];
240 * xnew, xrnew -- allocate, reallocate storage
242 * SYNOPSIS: Type *xnew (int n, Type);
243 * void xrnew (OldPointer, int n, Type);
245 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
246 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
248 typedef void Lang_function (FILE *);
250 typedef struct
252 const char *suffix; /* file name suffix for this compressor */
253 const char *command; /* takes one arg and decompresses to stdout */
254 } compressor;
256 typedef struct
258 const char *name; /* language name */
259 const char *help; /* detailed help for the language */
260 Lang_function *function; /* parse function */
261 const char **suffixes; /* name suffixes of this language's files */
262 const char **filenames; /* names of this language's files */
263 const char **interpreters; /* interpreters for this language */
264 bool metasource; /* source used to generate other sources */
265 } language;
267 typedef struct fdesc
269 struct fdesc *next; /* for the linked list */
270 char *infname; /* uncompressed input file name */
271 char *infabsname; /* absolute uncompressed input file name */
272 char *infabsdir; /* absolute dir of input file */
273 char *taggedfname; /* file name to write in tagfile */
274 language *lang; /* language of file */
275 char *prop; /* file properties to write in tagfile */
276 bool usecharno; /* etags tags shall contain char number */
277 bool written; /* entry written in the tags file */
278 } fdesc;
280 typedef struct node_st
281 { /* sorting structure */
282 struct node_st *left, *right; /* left and right sons */
283 fdesc *fdp; /* description of file to whom tag belongs */
284 char *name; /* tag name */
285 char *regex; /* search regexp */
286 bool valid; /* write this tag on the tag file */
287 bool is_func; /* function tag: use regexp in CTAGS mode */
288 bool been_warned; /* warning already given for duplicated tag */
289 int lno; /* line number tag is on */
290 long cno; /* character number line starts on */
291 } node;
294 * A `linebuffer' is a structure which holds a line of text.
295 * `readline_internal' reads a line from a stream into a linebuffer
296 * and works regardless of the length of the line.
297 * SIZE is the size of BUFFER, LEN is the length of the string in
298 * BUFFER after readline reads it.
300 typedef struct
302 long size;
303 int len;
304 char *buffer;
305 } linebuffer;
307 /* Used to support mixing of --lang and file names. */
308 typedef struct
310 enum {
311 at_language, /* a language specification */
312 at_regexp, /* a regular expression */
313 at_filename, /* a file name */
314 at_stdin, /* read from stdin here */
315 at_end /* stop parsing the list */
316 } arg_type; /* argument type */
317 language *lang; /* language associated with the argument */
318 char *what; /* the argument itself */
319 } argument;
321 /* Structure defining a regular expression. */
322 typedef struct regexp
324 struct regexp *p_next; /* pointer to next in list */
325 language *lang; /* if set, use only for this language */
326 char *pattern; /* the regexp pattern */
327 char *name; /* tag name */
328 struct re_pattern_buffer *pat; /* the compiled pattern */
329 struct re_registers regs; /* re registers */
330 bool error_signaled; /* already signaled for this regexp */
331 bool force_explicit_name; /* do not allow implicit tag name */
332 bool ignore_case; /* ignore case when matching */
333 bool multi_line; /* do a multi-line match on the whole file */
334 } regexp;
337 /* Many compilers barf on this:
338 Lang_function Ada_funcs;
339 so let's write it this way */
340 static void Ada_funcs (FILE *);
341 static void Asm_labels (FILE *);
342 static void C_entries (int c_ext, FILE *);
343 static void default_C_entries (FILE *);
344 static void plain_C_entries (FILE *);
345 static void Cjava_entries (FILE *);
346 static void Cobol_paragraphs (FILE *);
347 static void Cplusplus_entries (FILE *);
348 static void Cstar_entries (FILE *);
349 static void Erlang_functions (FILE *);
350 static void Forth_words (FILE *);
351 static void Fortran_functions (FILE *);
352 static void Go_functions (FILE *);
353 static void HTML_labels (FILE *);
354 static void Lisp_functions (FILE *);
355 static void Lua_functions (FILE *);
356 static void Makefile_targets (FILE *);
357 static void Pascal_functions (FILE *);
358 static void Perl_functions (FILE *);
359 static void PHP_functions (FILE *);
360 static void PS_functions (FILE *);
361 static void Prolog_functions (FILE *);
362 static void Python_functions (FILE *);
363 static void Ruby_functions (FILE *);
364 static void Scheme_functions (FILE *);
365 static void TeX_commands (FILE *);
366 static void Texinfo_nodes (FILE *);
367 static void Yacc_entries (FILE *);
368 static void just_read_file (FILE *);
370 static language *get_language_from_langname (const char *);
371 static void readline (linebuffer *, FILE *);
372 static long readline_internal (linebuffer *, FILE *, char const *);
373 static bool nocase_tail (const char *);
374 static void get_tag (char *, char **);
376 static void analyze_regex (char *);
377 static void free_regexps (void);
378 static void regex_tag_multiline (void);
379 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
380 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
381 static _Noreturn void suggest_asking_for_help (void);
382 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
383 static _Noreturn void pfatal (const char *);
384 static void add_node (node *, node **);
386 static void process_file_name (char *, language *);
387 static void process_file (FILE *, char *, language *);
388 static void find_entries (FILE *);
389 static void free_tree (node *);
390 static void free_fdesc (fdesc *);
391 static void pfnote (char *, bool, char *, int, int, long);
392 static void invalidate_nodes (fdesc *, node **);
393 static void put_entries (node *);
395 static char *concat (const char *, const char *, const char *);
396 static char *skip_spaces (char *);
397 static char *skip_non_spaces (char *);
398 static char *skip_name (char *);
399 static char *savenstr (const char *, int);
400 static char *savestr (const char *);
401 static char *etags_getcwd (void);
402 static char *relative_filename (char *, char *);
403 static char *absolute_filename (char *, char *);
404 static char *absolute_dirname (char *, char *);
405 static bool filename_is_absolute (char *f);
406 static void canonicalize_filename (char *);
407 static char *etags_mktmp (void);
408 static void linebuffer_init (linebuffer *);
409 static void linebuffer_setlen (linebuffer *, int);
410 static void *xmalloc (size_t);
411 static void *xrealloc (void *, size_t);
414 static char searchar = '/'; /* use /.../ searches */
416 static char *tagfile; /* output file */
417 static char *progname; /* name this program was invoked with */
418 static char *cwd; /* current working directory */
419 static char *tagfiledir; /* directory of tagfile */
420 static FILE *tagf; /* ioptr for tags file */
421 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
423 static fdesc *fdhead; /* head of file description list */
424 static fdesc *curfdp; /* current file description */
425 static char *infilename; /* current input file name */
426 static int lineno; /* line number of current line */
427 static long charno; /* current character number */
428 static long linecharno; /* charno of start of current line */
429 static char *dbp; /* pointer to start of current tag */
431 static const int invalidcharno = -1;
433 static node *nodehead; /* the head of the binary tree of tags */
434 static node *last_node; /* the last node created */
436 static linebuffer lb; /* the current line */
437 static linebuffer filebuf; /* a buffer containing the whole file */
438 static linebuffer token_name; /* a buffer containing a tag name */
440 static bool append_to_tagfile; /* -a: append to tags */
441 /* The next five default to true in C and derived languages. */
442 static bool typedefs; /* -t: create tags for C and Ada typedefs */
443 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
444 /* 0 struct/enum/union decls, and C++ */
445 /* member functions. */
446 static bool constantypedefs; /* -d: create tags for C #define, enum */
447 /* constants and variables. */
448 /* -D: opposite of -d. Default under ctags. */
449 static int globals; /* create tags for global variables */
450 static int members; /* create tags for C member variables */
451 static int declarations; /* --declarations: tag them and extern in C&Co*/
452 static int no_line_directive; /* ignore #line directives (undocumented) */
453 static int no_duplicates; /* no duplicate tags for ctags (undocumented) */
454 static bool update; /* -u: update tags */
455 static bool vgrind_style; /* -v: create vgrind style index output */
456 static bool no_warnings; /* -w: suppress warnings (undocumented) */
457 static bool cxref_style; /* -x: create cxref style output */
458 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
459 static bool ignoreindent; /* -I: ignore indentation in C */
460 static int packages_only; /* --packages-only: in Ada, only tag packages*/
461 static int class_qualify; /* -Q: produce class-qualified tags in C++/Java */
463 /* STDIN is defined in LynxOS system headers */
464 #ifdef STDIN
465 # undef STDIN
466 #endif
468 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
469 static bool parsing_stdin; /* --parse-stdin used */
471 static regexp *p_head; /* list of all regexps */
472 static bool need_filebuf; /* some regexes are multi-line */
474 static struct option longopts[] =
476 { "append", no_argument, NULL, 'a' },
477 { "packages-only", no_argument, &packages_only, 1 },
478 { "c++", no_argument, NULL, 'C' },
479 { "declarations", no_argument, &declarations, 1 },
480 { "no-line-directive", no_argument, &no_line_directive, 1 },
481 { "no-duplicates", no_argument, &no_duplicates, 1 },
482 { "help", no_argument, NULL, 'h' },
483 { "help", no_argument, NULL, 'H' },
484 { "ignore-indentation", no_argument, NULL, 'I' },
485 { "language", required_argument, NULL, 'l' },
486 { "members", no_argument, &members, 1 },
487 { "no-members", no_argument, &members, 0 },
488 { "output", required_argument, NULL, 'o' },
489 { "class-qualify", no_argument, &class_qualify, 'Q' },
490 { "regex", required_argument, NULL, 'r' },
491 { "no-regex", no_argument, NULL, 'R' },
492 { "ignore-case-regex", required_argument, NULL, 'c' },
493 { "parse-stdin", required_argument, NULL, STDIN },
494 { "version", no_argument, NULL, 'V' },
496 #if CTAGS /* Ctags options */
497 { "backward-search", no_argument, NULL, 'B' },
498 { "cxref", no_argument, NULL, 'x' },
499 { "defines", no_argument, NULL, 'd' },
500 { "globals", no_argument, &globals, 1 },
501 { "typedefs", no_argument, NULL, 't' },
502 { "typedefs-and-c++", no_argument, NULL, 'T' },
503 { "update", no_argument, NULL, 'u' },
504 { "vgrind", no_argument, NULL, 'v' },
505 { "no-warn", no_argument, NULL, 'w' },
507 #else /* Etags options */
508 { "no-defines", no_argument, NULL, 'D' },
509 { "no-globals", no_argument, &globals, 0 },
510 { "include", required_argument, NULL, 'i' },
511 #endif
512 { NULL }
515 static compressor compressors[] =
517 { "z", "gzip -d -c"},
518 { "Z", "gzip -d -c"},
519 { "gz", "gzip -d -c"},
520 { "GZ", "gzip -d -c"},
521 { "bz2", "bzip2 -d -c" },
522 { "xz", "xz -d -c" },
523 { NULL }
527 * Language stuff.
530 /* Ada code */
531 static const char *Ada_suffixes [] =
532 { "ads", "adb", "ada", NULL };
533 static const char Ada_help [] =
534 "In Ada code, functions, procedures, packages, tasks and types are\n\
535 tags. Use the '--packages-only' option to create tags for\n\
536 packages only.\n\
537 Ada tag names have suffixes indicating the type of entity:\n\
538 Entity type: Qualifier:\n\
539 ------------ ----------\n\
540 function /f\n\
541 procedure /p\n\
542 package spec /s\n\
543 package body /b\n\
544 type /t\n\
545 task /k\n\
546 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
547 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
548 will just search for any tag 'bidule'.";
550 /* Assembly code */
551 static const char *Asm_suffixes [] =
552 { "a", /* Unix assembler */
553 "asm", /* Microcontroller assembly */
554 "def", /* BSO/Tasking definition includes */
555 "inc", /* Microcontroller include files */
556 "ins", /* Microcontroller include files */
557 "s", "sa", /* Unix assembler */
558 "S", /* cpp-processed Unix assembler */
559 "src", /* BSO/Tasking C compiler output */
560 NULL
562 static const char Asm_help [] =
563 "In assembler code, labels appearing at the beginning of a line,\n\
564 followed by a colon, are tags.";
567 /* Note that .c and .h can be considered C++, if the --c++ flag was
568 given, or if the `class' or `template' keywords are met inside the file.
569 That is why default_C_entries is called for these. */
570 static const char *default_C_suffixes [] =
571 { "c", "h", NULL };
572 #if CTAGS /* C help for Ctags */
573 static const char default_C_help [] =
574 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
575 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
576 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
577 Use --globals to tag global variables.\n\
578 You can tag function declarations and external variables by\n\
579 using '--declarations', and struct members by using '--members'.";
580 #else /* C help for Etags */
581 static const char default_C_help [] =
582 "In C code, any C function or typedef is a tag, and so are\n\
583 definitions of 'struct', 'union' and 'enum'. '#define' macro\n\
584 definitions and 'enum' constants are tags unless you specify\n\
585 '--no-defines'. Global variables are tags unless you specify\n\
586 '--no-globals' and so are struct members unless you specify\n\
587 '--no-members'. Use of '--no-globals', '--no-defines' and\n\
588 '--no-members' can make the tags table file much smaller.\n\
589 You can tag function declarations and external variables by\n\
590 using '--declarations'.";
591 #endif /* C help for Ctags and Etags */
593 static const char *Cplusplus_suffixes [] =
594 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
595 "M", /* Objective C++ */
596 "pdb", /* PostScript with C syntax */
597 NULL };
598 static const char Cplusplus_help [] =
599 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
600 --help --lang=c --lang=c++ for full help.)\n\
601 In addition to C tags, member functions are also recognized. Member\n\
602 variables are recognized unless you use the '--no-members' option.\n\
603 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
604 and 'CLASS::FUNCTION'. 'operator' definitions have tag names like\n\
605 'operator+'.";
607 static const char *Cjava_suffixes [] =
608 { "java", NULL };
609 static char Cjava_help [] =
610 "In Java code, all the tags constructs of C and C++ code are\n\
611 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
614 static const char *Cobol_suffixes [] =
615 { "COB", "cob", NULL };
616 static char Cobol_help [] =
617 "In Cobol code, tags are paragraph names; that is, any word\n\
618 starting in column 8 and followed by a period.";
620 static const char *Cstar_suffixes [] =
621 { "cs", "hs", NULL };
623 static const char *Erlang_suffixes [] =
624 { "erl", "hrl", NULL };
625 static const char Erlang_help [] =
626 "In Erlang code, the tags are the functions, records and macros\n\
627 defined in the file.";
629 const char *Forth_suffixes [] =
630 { "fth", "tok", NULL };
631 static const char Forth_help [] =
632 "In Forth code, tags are words defined by ':',\n\
633 constant, code, create, defer, value, variable, buffer:, field.";
635 static const char *Fortran_suffixes [] =
636 { "F", "f", "f90", "for", NULL };
637 static const char Fortran_help [] =
638 "In Fortran code, functions, subroutines and block data are tags.";
640 static const char *Go_suffixes [] = {"go", NULL};
641 static const char Go_help [] =
642 "In Go code, functions, interfaces and packages are tags.";
644 static const char *HTML_suffixes [] =
645 { "htm", "html", "shtml", NULL };
646 static const char HTML_help [] =
647 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
648 'h3' headers. Also, tags are 'name=' in anchors and all\n\
649 occurrences of 'id='.";
651 static const char *Lisp_suffixes [] =
652 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
653 static const char Lisp_help [] =
654 "In Lisp code, any function defined with 'defun', any variable\n\
655 defined with 'defvar' or 'defconst', and in general the first\n\
656 argument of any expression that starts with '(def' in column zero\n\
657 is a tag.\n\
658 The '--declarations' option tags \"(defvar foo)\" constructs too.";
660 static const char *Lua_suffixes [] =
661 { "lua", "LUA", NULL };
662 static const char Lua_help [] =
663 "In Lua scripts, all functions are tags.";
665 static const char *Makefile_filenames [] =
666 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
667 static const char Makefile_help [] =
668 "In makefiles, targets are tags; additionally, variables are tags\n\
669 unless you specify '--no-globals'.";
671 static const char *Objc_suffixes [] =
672 { "lm", /* Objective lex file */
673 "m", /* Objective C file */
674 NULL };
675 static const char Objc_help [] =
676 "In Objective C code, tags include Objective C definitions for classes,\n\
677 class categories, methods and protocols. Tags for variables and\n\
678 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\
679 \n(Use --help --lang=c --lang=objc --lang=java for full help.)";
681 static const char *Pascal_suffixes [] =
682 { "p", "pas", NULL };
683 static const char Pascal_help [] =
684 "In Pascal code, the tags are the functions and procedures defined\n\
685 in the file.";
686 /* " // this is for working around an Emacs highlighting bug... */
688 static const char *Perl_suffixes [] =
689 { "pl", "pm", NULL };
690 static const char *Perl_interpreters [] =
691 { "perl", "@PERL@", NULL };
692 static const char Perl_help [] =
693 "In Perl code, the tags are the packages, subroutines and variables\n\
694 defined by the 'package', 'sub', 'my' and 'local' keywords. Use\n\
695 '--globals' if you want to tag global variables. Tags for\n\
696 subroutines are named 'PACKAGE::SUB'. The name for subroutines\n\
697 defined in the default package is 'main::SUB'.";
699 static const char *PHP_suffixes [] =
700 { "php", "php3", "php4", NULL };
701 static const char PHP_help [] =
702 "In PHP code, tags are functions, classes and defines. Unless you use\n\
703 the '--no-members' option, vars are tags too.";
705 static const char *plain_C_suffixes [] =
706 { "pc", /* Pro*C file */
707 NULL };
709 static const char *PS_suffixes [] =
710 { "ps", "psw", NULL }; /* .psw is for PSWrap */
711 static const char PS_help [] =
712 "In PostScript code, the tags are the functions.";
714 static const char *Prolog_suffixes [] =
715 { "prolog", NULL };
716 static const char Prolog_help [] =
717 "In Prolog code, tags are predicates and rules at the beginning of\n\
718 line.";
720 static const char *Python_suffixes [] =
721 { "py", NULL };
722 static const char Python_help [] =
723 "In Python code, 'def' or 'class' at the beginning of a line\n\
724 generate a tag.";
726 static const char *Ruby_suffixes [] =
727 { "rb", "ru", "rbw", NULL };
728 static const char *Ruby_filenames [] =
729 { "Rakefile", "Thorfile", NULL };
730 static const char Ruby_help [] =
731 "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
732 a line generate a tag. Constants also generate a tag.";
734 /* Can't do the `SCM' or `scm' prefix with a version number. */
735 static const char *Scheme_suffixes [] =
736 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
737 static const char Scheme_help [] =
738 "In Scheme code, tags include anything defined with 'def' or with a\n\
739 construct whose name starts with 'def'. They also include\n\
740 variables set with 'set!' at top level in the file.";
742 static const char *TeX_suffixes [] =
743 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
744 static const char TeX_help [] =
745 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
746 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
747 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
748 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
749 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
751 Other commands can be specified by setting the environment variable\n\
752 'TEXTAGS' to a colon-separated list like, for example,\n\
753 TEXTAGS=\"mycommand:myothercommand\".";
756 static const char *Texinfo_suffixes [] =
757 { "texi", "texinfo", "txi", NULL };
758 static const char Texinfo_help [] =
759 "for texinfo files, lines starting with @node are tagged.";
761 static const char *Yacc_suffixes [] =
762 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
763 static const char Yacc_help [] =
764 "In Bison or Yacc input files, each rule defines as a tag the\n\
765 nonterminal it constructs. The portions of the file that contain\n\
766 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
767 for full help).";
769 static const char auto_help [] =
770 "'auto' is not a real language, it indicates to use\n\
771 a default language for files base on file name suffix and file contents.";
773 static const char none_help [] =
774 "'none' is not a real language, it indicates to only do\n\
775 regexp processing on files.";
777 static const char no_lang_help [] =
778 "No detailed help available for this language.";
782 * Table of languages.
784 * It is ok for a given function to be listed under more than one
785 * name. I just didn't.
788 static language lang_names [] =
790 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
791 { "asm", Asm_help, Asm_labels, Asm_suffixes },
792 { "c", default_C_help, default_C_entries, default_C_suffixes },
793 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
794 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
795 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
796 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
797 { "forth", Forth_help, Forth_words, Forth_suffixes },
798 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
799 { "go", Go_help, Go_functions, Go_suffixes },
800 { "html", HTML_help, HTML_labels, HTML_suffixes },
801 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
802 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
803 { "lua", Lua_help, Lua_functions, Lua_suffixes },
804 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
805 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
806 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
807 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
808 { "php", PHP_help, PHP_functions, PHP_suffixes },
809 { "postscript",PS_help, PS_functions, PS_suffixes },
810 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
811 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
812 { "python", Python_help, Python_functions, Python_suffixes },
813 { "ruby", Ruby_help,Ruby_functions,Ruby_suffixes,Ruby_filenames },
814 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
815 { "tex", TeX_help, TeX_commands, TeX_suffixes },
816 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
817 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
818 { "auto", auto_help }, /* default guessing scheme */
819 { "none", none_help, just_read_file }, /* regexp matching only */
820 { NULL } /* end of list */
824 static void
825 print_language_names (void)
827 language *lang;
828 const char **name, **ext;
830 puts ("\nThese are the currently supported languages, along with the\n\
831 default file names and dot suffixes:");
832 for (lang = lang_names; lang->name != NULL; lang++)
834 printf (" %-*s", 10, lang->name);
835 if (lang->filenames != NULL)
836 for (name = lang->filenames; *name != NULL; name++)
837 printf (" %s", *name);
838 if (lang->suffixes != NULL)
839 for (ext = lang->suffixes; *ext != NULL; ext++)
840 printf (" .%s", *ext);
841 puts ("");
843 puts ("where 'auto' means use default language for files based on file\n\
844 name suffix, and 'none' means only do regexp processing on files.\n\
845 If no language is specified and no matching suffix is found,\n\
846 the first line of the file is read for a sharp-bang (#!) sequence\n\
847 followed by the name of an interpreter. If no such sequence is found,\n\
848 Fortran is tried first; if no tags are found, C is tried next.\n\
849 When parsing any C file, a \"class\" or \"template\" keyword\n\
850 switches to C++.");
851 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
853 For detailed help on a given language use, for example,\n\
854 etags --help --lang=ada.");
857 #ifndef EMACS_NAME
858 # define EMACS_NAME "standalone"
859 #endif
860 #ifndef VERSION
861 # define VERSION "17.38.1.4"
862 #endif
863 static _Noreturn void
864 print_version (void)
866 char emacs_copyright[] = COPYRIGHT;
868 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
869 puts (emacs_copyright);
870 puts ("This program is distributed under the terms in ETAGS.README");
872 exit (EXIT_SUCCESS);
875 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
876 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
877 #endif
879 static _Noreturn void
880 print_help (argument *argbuffer)
882 bool help_for_lang = false;
884 for (; argbuffer->arg_type != at_end; argbuffer++)
885 if (argbuffer->arg_type == at_language)
887 if (help_for_lang)
888 puts ("");
889 puts (argbuffer->lang->help);
890 help_for_lang = true;
893 if (help_for_lang)
894 exit (EXIT_SUCCESS);
896 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
898 These are the options accepted by %s.\n", progname, progname);
899 puts ("You may use unambiguous abbreviations for the long option names.");
900 puts (" A - as file name means read names from stdin (one per line).\n\
901 Absolute names are stored in the output file as they are.\n\
902 Relative ones are stored relative to the output file's directory.\n");
904 puts ("-a, --append\n\
905 Append tag entries to existing tags file.");
907 puts ("--packages-only\n\
908 For Ada files, only generate tags for packages.");
910 if (CTAGS)
911 puts ("-B, --backward-search\n\
912 Write the search commands for the tag entries using '?', the\n\
913 backward-search command instead of '/', the forward-search command.");
915 /* This option is mostly obsolete, because etags can now automatically
916 detect C++. Retained for backward compatibility and for debugging and
917 experimentation. In principle, we could want to tag as C++ even
918 before any "class" or "template" keyword.
919 puts ("-C, --c++\n\
920 Treat files whose name suffix defaults to C language as C++ files.");
923 puts ("--declarations\n\
924 In C and derived languages, create tags for function declarations,");
925 if (CTAGS)
926 puts ("\tand create tags for extern variables if --globals is used.");
927 else
928 puts
929 ("\tand create tags for extern variables unless --no-globals is used.");
931 if (CTAGS)
932 puts ("-d, --defines\n\
933 Create tag entries for C #define constants and enum constants, too.");
934 else
935 puts ("-D, --no-defines\n\
936 Don't create tag entries for C #define constants and enum constants.\n\
937 This makes the tags file smaller.");
939 if (!CTAGS)
940 puts ("-i FILE, --include=FILE\n\
941 Include a note in tag file indicating that, when searching for\n\
942 a tag, one should also consult the tags file FILE after\n\
943 checking the current file.");
945 puts ("-l LANG, --language=LANG\n\
946 Force the following files to be considered as written in the\n\
947 named language up to the next --language=LANG option.");
949 if (CTAGS)
950 puts ("--globals\n\
951 Create tag entries for global variables in some languages.");
952 else
953 puts ("--no-globals\n\
954 Do not create tag entries for global variables in some\n\
955 languages. This makes the tags file smaller.");
957 puts ("--no-line-directive\n\
958 Ignore #line preprocessor directives in C and derived languages.");
960 if (CTAGS)
961 puts ("--members\n\
962 Create tag entries for members of structures in some languages.");
963 else
964 puts ("--no-members\n\
965 Do not create tag entries for members of structures\n\
966 in some languages.");
968 puts ("-Q, --class-qualify\n\
969 Qualify tag names with their class name in C++, ObjC, Java, and Perl.\n\
970 This produces tag names of the form \"class::member\" for C++,\n\
971 \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
972 For Objective C, this also produces class methods qualified with\n\
973 their arguments, as in \"foo:bar:baz:more\".\n\
974 For Perl, this produces \"package::member\".");
975 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
976 Make a tag for each line matching a regular expression pattern\n\
977 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
978 files only. REGEXFILE is a file containing one REGEXP per line.\n\
979 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
980 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
981 puts (" If TAGNAME/ is present, the tags created are named.\n\
982 For example Tcl named tags can be created with:\n\
983 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
984 MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
985 'm' means to allow multi-line matches, 's' implies 'm' and\n\
986 causes dot to match any character, including newline.");
988 puts ("-R, --no-regex\n\
989 Don't create tags from regexps for the following files.");
991 puts ("-I, --ignore-indentation\n\
992 In C and C++ do not assume that a closing brace in the first\n\
993 column is the final brace of a function or structure definition.");
995 puts ("-o FILE, --output=FILE\n\
996 Write the tags to FILE.");
998 puts ("--parse-stdin=NAME\n\
999 Read from standard input and record tags as belonging to file NAME.");
1001 if (CTAGS)
1003 puts ("-t, --typedefs\n\
1004 Generate tag entries for C and Ada typedefs.");
1005 puts ("-T, --typedefs-and-c++\n\
1006 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1007 and C++ member functions.");
1010 if (CTAGS)
1011 puts ("-u, --update\n\
1012 Update the tag entries for the given files, leaving tag\n\
1013 entries for other files in place. Currently, this is\n\
1014 implemented by deleting the existing entries for the given\n\
1015 files and then rewriting the new entries at the end of the\n\
1016 tags file. It is often faster to simply rebuild the entire\n\
1017 tag file than to use this.");
1019 if (CTAGS)
1021 puts ("-v, --vgrind\n\
1022 Print on the standard output an index of items intended for\n\
1023 human consumption, similar to the output of vgrind. The index\n\
1024 is sorted, and gives the page number of each item.");
1026 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1027 puts ("-w, --no-duplicates\n\
1028 Do not create duplicate tag entries, for compatibility with\n\
1029 traditional ctags.");
1031 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1032 puts ("-w, --no-warn\n\
1033 Suppress warning messages about duplicate tag entries.");
1035 puts ("-x, --cxref\n\
1036 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1037 The output uses line numbers instead of page numbers, but\n\
1038 beyond that the differences are cosmetic; try both to see\n\
1039 which you like.");
1042 puts ("-V, --version\n\
1043 Print the version of the program.\n\
1044 -h, --help\n\
1045 Print this help message.\n\
1046 Followed by one or more '--language' options prints detailed\n\
1047 help about tag generation for the specified languages.");
1049 print_language_names ();
1051 puts ("");
1052 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1054 exit (EXIT_SUCCESS);
1059 main (int argc, char **argv)
1061 int i;
1062 unsigned int nincluded_files;
1063 char **included_files;
1064 argument *argbuffer;
1065 int current_arg, file_count;
1066 linebuffer filename_lb;
1067 bool help_asked = false;
1068 ptrdiff_t len;
1069 char *optstring;
1070 int opt;
1072 progname = argv[0];
1073 nincluded_files = 0;
1074 included_files = xnew (argc, char *);
1075 current_arg = 0;
1076 file_count = 0;
1078 /* Allocate enough no matter what happens. Overkill, but each one
1079 is small. */
1080 argbuffer = xnew (argc, argument);
1083 * Always find typedefs and structure tags.
1084 * Also default to find macro constants, enum constants, struct
1085 * members and global variables. Do it for both etags and ctags.
1087 typedefs = typedefs_or_cplusplus = constantypedefs = true;
1088 globals = members = true;
1090 /* When the optstring begins with a '-' getopt_long does not rearrange the
1091 non-options arguments to be at the end, but leaves them alone. */
1092 optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1093 (CTAGS) ? "BxdtTuvw" : "Di:",
1094 "");
1096 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1097 switch (opt)
1099 case 0:
1100 /* If getopt returns 0, then it has already processed a
1101 long-named option. We should do nothing. */
1102 break;
1104 case 1:
1105 /* This means that a file name has been seen. Record it. */
1106 argbuffer[current_arg].arg_type = at_filename;
1107 argbuffer[current_arg].what = optarg;
1108 len = strlen (optarg);
1109 if (whatlen_max < len)
1110 whatlen_max = len;
1111 ++current_arg;
1112 ++file_count;
1113 break;
1115 case STDIN:
1116 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1117 argbuffer[current_arg].arg_type = at_stdin;
1118 argbuffer[current_arg].what = optarg;
1119 len = strlen (optarg);
1120 if (whatlen_max < len)
1121 whatlen_max = len;
1122 ++current_arg;
1123 ++file_count;
1124 if (parsing_stdin)
1125 fatal ("cannot parse standard input more than once");
1126 parsing_stdin = true;
1127 break;
1129 /* Common options. */
1130 case 'a': append_to_tagfile = true; break;
1131 case 'C': cplusplus = true; break;
1132 case 'f': /* for compatibility with old makefiles */
1133 case 'o':
1134 if (tagfile)
1136 error ("-o option may only be given once.");
1137 suggest_asking_for_help ();
1138 /* NOTREACHED */
1140 tagfile = optarg;
1141 break;
1142 case 'I':
1143 case 'S': /* for backward compatibility */
1144 ignoreindent = true;
1145 break;
1146 case 'l':
1148 language *lang = get_language_from_langname (optarg);
1149 if (lang != NULL)
1151 argbuffer[current_arg].lang = lang;
1152 argbuffer[current_arg].arg_type = at_language;
1153 ++current_arg;
1156 break;
1157 case 'c':
1158 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1159 optarg = concat (optarg, "i", ""); /* memory leak here */
1160 FALLTHROUGH;
1161 case 'r':
1162 argbuffer[current_arg].arg_type = at_regexp;
1163 argbuffer[current_arg].what = optarg;
1164 len = strlen (optarg);
1165 if (whatlen_max < len)
1166 whatlen_max = len;
1167 ++current_arg;
1168 break;
1169 case 'R':
1170 argbuffer[current_arg].arg_type = at_regexp;
1171 argbuffer[current_arg].what = NULL;
1172 ++current_arg;
1173 break;
1174 case 'V':
1175 print_version ();
1176 break;
1177 case 'h':
1178 case 'H':
1179 help_asked = true;
1180 break;
1181 case 'Q':
1182 class_qualify = 1;
1183 break;
1185 /* Etags options */
1186 case 'D': constantypedefs = false; break;
1187 case 'i': included_files[nincluded_files++] = optarg; break;
1189 /* Ctags options. */
1190 case 'B': searchar = '?'; break;
1191 case 'd': constantypedefs = true; break;
1192 case 't': typedefs = true; break;
1193 case 'T': typedefs = typedefs_or_cplusplus = true; break;
1194 case 'u': update = true; break;
1195 case 'v': vgrind_style = true; FALLTHROUGH;
1196 case 'x': cxref_style = true; break;
1197 case 'w': no_warnings = true; break;
1198 default:
1199 suggest_asking_for_help ();
1200 /* NOTREACHED */
1203 /* No more options. Store the rest of arguments. */
1204 for (; optind < argc; optind++)
1206 argbuffer[current_arg].arg_type = at_filename;
1207 argbuffer[current_arg].what = argv[optind];
1208 len = strlen (argv[optind]);
1209 if (whatlen_max < len)
1210 whatlen_max = len;
1211 ++current_arg;
1212 ++file_count;
1215 argbuffer[current_arg].arg_type = at_end;
1217 if (help_asked)
1218 print_help (argbuffer);
1219 /* NOTREACHED */
1221 if (nincluded_files == 0 && file_count == 0)
1223 error ("no input files specified.");
1224 suggest_asking_for_help ();
1225 /* NOTREACHED */
1228 if (tagfile == NULL)
1229 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1230 cwd = etags_getcwd (); /* the current working directory */
1231 if (cwd[strlen (cwd) - 1] != '/')
1233 char *oldcwd = cwd;
1234 cwd = concat (oldcwd, "/", "");
1235 free (oldcwd);
1238 /* Compute base directory for relative file names. */
1239 if (streq (tagfile, "-")
1240 || strneq (tagfile, "/dev/", 5))
1241 tagfiledir = cwd; /* relative file names are relative to cwd */
1242 else
1244 canonicalize_filename (tagfile);
1245 tagfiledir = absolute_dirname (tagfile, cwd);
1248 linebuffer_init (&lb);
1249 linebuffer_init (&filename_lb);
1250 linebuffer_init (&filebuf);
1251 linebuffer_init (&token_name);
1253 if (!CTAGS)
1255 if (streq (tagfile, "-"))
1257 tagf = stdout;
1258 set_binary_mode (STDOUT_FILENO, O_BINARY);
1260 else
1261 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1262 if (tagf == NULL)
1263 pfatal (tagfile);
1267 * Loop through files finding functions.
1269 for (i = 0; i < current_arg; i++)
1271 static language *lang; /* non-NULL if language is forced */
1272 char *this_file;
1274 switch (argbuffer[i].arg_type)
1276 case at_language:
1277 lang = argbuffer[i].lang;
1278 break;
1279 case at_regexp:
1280 analyze_regex (argbuffer[i].what);
1281 break;
1282 case at_filename:
1283 this_file = argbuffer[i].what;
1284 /* Input file named "-" means read file names from stdin
1285 (one per line) and use them. */
1286 if (streq (this_file, "-"))
1288 if (parsing_stdin)
1289 fatal ("cannot parse standard input "
1290 "AND read file names from it");
1291 while (readline_internal (&filename_lb, stdin, "-") > 0)
1292 process_file_name (filename_lb.buffer, lang);
1294 else
1295 process_file_name (this_file, lang);
1296 break;
1297 case at_stdin:
1298 this_file = argbuffer[i].what;
1299 process_file (stdin, this_file, lang);
1300 break;
1301 default:
1302 error ("internal error: arg_type");
1306 free_regexps ();
1307 free (lb.buffer);
1308 free (filebuf.buffer);
1309 free (token_name.buffer);
1311 if (!CTAGS || cxref_style)
1313 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1314 put_entries (nodehead);
1315 free_tree (nodehead);
1316 nodehead = NULL;
1317 if (!CTAGS)
1319 fdesc *fdp;
1321 /* Output file entries that have no tags. */
1322 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1323 if (!fdp->written)
1324 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1326 while (nincluded_files-- > 0)
1327 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1329 if (fclose (tagf) == EOF)
1330 pfatal (tagfile);
1333 return EXIT_SUCCESS;
1336 /* From here on, we are in (CTAGS && !cxref_style) */
1337 if (update)
1339 char *cmd =
1340 xmalloc (strlen (tagfile) + whatlen_max +
1341 sizeof "mv..OTAGS;grep -Fv '\t\t' OTAGS >;rm OTAGS");
1342 for (i = 0; i < current_arg; ++i)
1344 switch (argbuffer[i].arg_type)
1346 case at_filename:
1347 case at_stdin:
1348 break;
1349 default:
1350 continue; /* the for loop */
1352 char *z = stpcpy (cmd, "mv ");
1353 z = stpcpy (z, tagfile);
1354 z = stpcpy (z, " OTAGS;grep -Fv '\t");
1355 z = stpcpy (z, argbuffer[i].what);
1356 z = stpcpy (z, "\t' OTAGS >");
1357 z = stpcpy (z, tagfile);
1358 strcpy (z, ";rm OTAGS");
1359 if (system (cmd) != EXIT_SUCCESS)
1360 fatal ("failed to execute shell command");
1362 free (cmd);
1363 append_to_tagfile = true;
1366 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1367 if (tagf == NULL)
1368 pfatal (tagfile);
1369 put_entries (nodehead); /* write all the tags (CTAGS) */
1370 free_tree (nodehead);
1371 nodehead = NULL;
1372 if (fclose (tagf) == EOF)
1373 pfatal (tagfile);
1375 if (CTAGS)
1376 if (append_to_tagfile || update)
1378 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1379 /* Maybe these should be used:
1380 setenv ("LC_COLLATE", "C", 1);
1381 setenv ("LC_ALL", "C", 1); */
1382 char *z = stpcpy (cmd, "sort -u -o ");
1383 z = stpcpy (z, tagfile);
1384 *z++ = ' ';
1385 strcpy (z, tagfile);
1386 return system (cmd);
1388 return EXIT_SUCCESS;
1393 * Return a compressor given the file name. If EXTPTR is non-zero,
1394 * return a pointer into FILE where the compressor-specific
1395 * extension begins. If no compressor is found, NULL is returned
1396 * and EXTPTR is not significant.
1397 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1399 static compressor *
1400 get_compressor_from_suffix (char *file, char **extptr)
1402 compressor *compr;
1403 char *slash, *suffix;
1405 /* File has been processed by canonicalize_filename,
1406 so we don't need to consider backslashes on DOS_NT. */
1407 slash = strrchr (file, '/');
1408 suffix = strrchr (file, '.');
1409 if (suffix == NULL || suffix < slash)
1410 return NULL;
1411 if (extptr != NULL)
1412 *extptr = suffix;
1413 suffix += 1;
1414 /* Let those poor souls who live with DOS 8+3 file name limits get
1415 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1416 Only the first do loop is run if not MSDOS */
1419 for (compr = compressors; compr->suffix != NULL; compr++)
1420 if (streq (compr->suffix, suffix))
1421 return compr;
1422 if (!MSDOS)
1423 break; /* do it only once: not really a loop */
1424 if (extptr != NULL)
1425 *extptr = ++suffix;
1426 } while (*suffix != '\0');
1427 return NULL;
1433 * Return a language given the name.
1435 static language *
1436 get_language_from_langname (const char *name)
1438 language *lang;
1440 if (name == NULL)
1441 error ("empty language name");
1442 else
1444 for (lang = lang_names; lang->name != NULL; lang++)
1445 if (streq (name, lang->name))
1446 return lang;
1447 error ("unknown language \"%s\"", name);
1450 return NULL;
1455 * Return a language given the interpreter name.
1457 static language *
1458 get_language_from_interpreter (char *interpreter)
1460 language *lang;
1461 const char **iname;
1463 if (interpreter == NULL)
1464 return NULL;
1465 for (lang = lang_names; lang->name != NULL; lang++)
1466 if (lang->interpreters != NULL)
1467 for (iname = lang->interpreters; *iname != NULL; iname++)
1468 if (streq (*iname, interpreter))
1469 return lang;
1471 return NULL;
1477 * Return a language given the file name.
1479 static language *
1480 get_language_from_filename (char *file, int case_sensitive)
1482 language *lang;
1483 const char **name, **ext, *suffix;
1484 char *slash;
1486 /* Try whole file name first. */
1487 slash = strrchr (file, '/');
1488 if (slash != NULL)
1489 file = slash + 1;
1490 #ifdef DOS_NT
1491 else if (file[0] && file[1] == ':')
1492 file += 2;
1493 #endif
1494 for (lang = lang_names; lang->name != NULL; lang++)
1495 if (lang->filenames != NULL)
1496 for (name = lang->filenames; *name != NULL; name++)
1497 if ((case_sensitive)
1498 ? streq (*name, file)
1499 : strcaseeq (*name, file))
1500 return lang;
1502 /* If not found, try suffix after last dot. */
1503 suffix = strrchr (file, '.');
1504 if (suffix == NULL)
1505 return NULL;
1506 suffix += 1;
1507 for (lang = lang_names; lang->name != NULL; lang++)
1508 if (lang->suffixes != NULL)
1509 for (ext = lang->suffixes; *ext != NULL; ext++)
1510 if ((case_sensitive)
1511 ? streq (*ext, suffix)
1512 : strcaseeq (*ext, suffix))
1513 return lang;
1514 return NULL;
1519 * This routine is called on each file argument.
1521 static void
1522 process_file_name (char *file, language *lang)
1524 FILE *inf;
1525 fdesc *fdp;
1526 compressor *compr;
1527 char *compressed_name, *uncompressed_name;
1528 char *ext, *real_name, *tmp_name;
1529 int retval;
1531 canonicalize_filename (file);
1532 if (streq (file, tagfile) && !streq (tagfile, "-"))
1534 error ("skipping inclusion of %s in self.", file);
1535 return;
1537 compr = get_compressor_from_suffix (file, &ext);
1538 if (compr)
1540 compressed_name = file;
1541 uncompressed_name = savenstr (file, ext - file);
1543 else
1545 compressed_name = NULL;
1546 uncompressed_name = file;
1549 /* If the canonicalized uncompressed name
1550 has already been dealt with, skip it silently. */
1551 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1553 assert (fdp->infname != NULL);
1554 if (streq (uncompressed_name, fdp->infname))
1555 goto cleanup;
1558 inf = fopen (file, "r" FOPEN_BINARY);
1559 if (inf)
1560 real_name = file;
1561 else
1563 int file_errno = errno;
1564 if (compressed_name)
1566 /* Try with the given suffix. */
1567 inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1568 if (inf)
1569 real_name = uncompressed_name;
1571 else
1573 /* Try all possible suffixes. */
1574 for (compr = compressors; compr->suffix != NULL; compr++)
1576 compressed_name = concat (file, ".", compr->suffix);
1577 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1578 if (inf)
1580 real_name = compressed_name;
1581 break;
1583 if (MSDOS)
1585 char *suf = compressed_name + strlen (file);
1586 size_t suflen = strlen (compr->suffix) + 1;
1587 for ( ; suf[1]; suf++, suflen--)
1589 memmove (suf, suf + 1, suflen);
1590 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1591 if (inf)
1593 real_name = compressed_name;
1594 break;
1597 if (inf)
1598 break;
1600 free (compressed_name);
1601 compressed_name = NULL;
1604 if (! inf)
1606 errno = file_errno;
1607 perror (file);
1608 goto cleanup;
1612 if (real_name == compressed_name)
1614 fclose (inf);
1615 tmp_name = etags_mktmp ();
1616 if (!tmp_name)
1617 inf = NULL;
1618 else
1620 #if MSDOS || defined (DOS_NT)
1621 char *cmd1 = concat (compr->command, " \"", real_name);
1622 char *cmd = concat (cmd1, "\" > ", tmp_name);
1623 #else
1624 char *cmd1 = concat (compr->command, " '", real_name);
1625 char *cmd = concat (cmd1, "' > ", tmp_name);
1626 #endif
1627 free (cmd1);
1628 int tmp_errno;
1629 if (system (cmd) == -1)
1631 inf = NULL;
1632 tmp_errno = EINVAL;
1634 else
1636 inf = fopen (tmp_name, "r" FOPEN_BINARY);
1637 tmp_errno = errno;
1639 free (cmd);
1640 errno = tmp_errno;
1643 if (!inf)
1645 perror (real_name);
1646 goto cleanup;
1650 process_file (inf, uncompressed_name, lang);
1652 retval = fclose (inf);
1653 if (real_name == compressed_name)
1655 remove (tmp_name);
1656 free (tmp_name);
1658 if (retval < 0)
1659 pfatal (file);
1661 cleanup:
1662 if (compressed_name != file)
1663 free (compressed_name);
1664 if (uncompressed_name != file)
1665 free (uncompressed_name);
1666 last_node = NULL;
1667 curfdp = NULL;
1668 return;
1671 static void
1672 process_file (FILE *fh, char *fn, language *lang)
1674 static const fdesc emptyfdesc;
1675 fdesc *fdp;
1677 infilename = fn;
1678 /* Create a new input file description entry. */
1679 fdp = xnew (1, fdesc);
1680 *fdp = emptyfdesc;
1681 fdp->next = fdhead;
1682 fdp->infname = savestr (fn);
1683 fdp->lang = lang;
1684 fdp->infabsname = absolute_filename (fn, cwd);
1685 fdp->infabsdir = absolute_dirname (fn, cwd);
1686 if (filename_is_absolute (fn))
1688 /* An absolute file name. Canonicalize it. */
1689 fdp->taggedfname = absolute_filename (fn, NULL);
1691 else
1693 /* A file name relative to cwd. Make it relative
1694 to the directory of the tags file. */
1695 fdp->taggedfname = relative_filename (fn, tagfiledir);
1697 fdp->usecharno = true; /* use char position when making tags */
1698 fdp->prop = NULL;
1699 fdp->written = false; /* not written on tags file yet */
1701 fdhead = fdp;
1702 curfdp = fdhead; /* the current file description */
1704 find_entries (fh);
1706 /* If not Ctags, and if this is not metasource and if it contained no #line
1707 directives, we can write the tags and free all nodes pointing to
1708 curfdp. */
1709 if (!CTAGS
1710 && curfdp->usecharno /* no #line directives in this file */
1711 && !curfdp->lang->metasource)
1713 node *np, *prev;
1715 /* Look for the head of the sublist relative to this file. See add_node
1716 for the structure of the node tree. */
1717 prev = NULL;
1718 for (np = nodehead; np != NULL; prev = np, np = np->left)
1719 if (np->fdp == curfdp)
1720 break;
1722 /* If we generated tags for this file, write and delete them. */
1723 if (np != NULL)
1725 /* This is the head of the last sublist, if any. The following
1726 instructions depend on this being true. */
1727 assert (np->left == NULL);
1729 assert (fdhead == curfdp);
1730 assert (last_node->fdp == curfdp);
1731 put_entries (np); /* write tags for file curfdp->taggedfname */
1732 free_tree (np); /* remove the written nodes */
1733 if (prev == NULL)
1734 nodehead = NULL; /* no nodes left */
1735 else
1736 prev->left = NULL; /* delete the pointer to the sublist */
1741 static void
1742 reset_input (FILE *inf)
1744 if (fseek (inf, 0, SEEK_SET) != 0)
1745 perror (infilename);
1749 * This routine opens the specified file and calls the function
1750 * which finds the function and type definitions.
1752 static void
1753 find_entries (FILE *inf)
1755 char *cp;
1756 language *lang = curfdp->lang;
1757 Lang_function *parser = NULL;
1759 /* If user specified a language, use it. */
1760 if (lang != NULL && lang->function != NULL)
1762 parser = lang->function;
1765 /* Else try to guess the language given the file name. */
1766 if (parser == NULL)
1768 lang = get_language_from_filename (curfdp->infname, true);
1769 if (lang != NULL && lang->function != NULL)
1771 curfdp->lang = lang;
1772 parser = lang->function;
1776 /* Else look for sharp-bang as the first two characters. */
1777 if (parser == NULL
1778 && readline_internal (&lb, inf, infilename) > 0
1779 && lb.len >= 2
1780 && lb.buffer[0] == '#'
1781 && lb.buffer[1] == '!')
1783 char *lp;
1785 /* Set lp to point at the first char after the last slash in the
1786 line or, if no slashes, at the first nonblank. Then set cp to
1787 the first successive blank and terminate the string. */
1788 lp = strrchr (lb.buffer+2, '/');
1789 if (lp != NULL)
1790 lp += 1;
1791 else
1792 lp = skip_spaces (lb.buffer + 2);
1793 cp = skip_non_spaces (lp);
1794 *cp = '\0';
1796 if (strlen (lp) > 0)
1798 lang = get_language_from_interpreter (lp);
1799 if (lang != NULL && lang->function != NULL)
1801 curfdp->lang = lang;
1802 parser = lang->function;
1807 reset_input (inf);
1809 /* Else try to guess the language given the case insensitive file name. */
1810 if (parser == NULL)
1812 lang = get_language_from_filename (curfdp->infname, false);
1813 if (lang != NULL && lang->function != NULL)
1815 curfdp->lang = lang;
1816 parser = lang->function;
1820 /* Else try Fortran or C. */
1821 if (parser == NULL)
1823 node *old_last_node = last_node;
1825 curfdp->lang = get_language_from_langname ("fortran");
1826 find_entries (inf);
1828 if (old_last_node == last_node)
1829 /* No Fortran entries found. Try C. */
1831 reset_input (inf);
1832 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1833 find_entries (inf);
1835 return;
1838 if (!no_line_directive
1839 && curfdp->lang != NULL && curfdp->lang->metasource)
1840 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1841 file, or anyway we parsed a file that is automatically generated from
1842 this one. If this is the case, the bingo.c file contained #line
1843 directives that generated tags pointing to this file. Let's delete
1844 them all before parsing this file, which is the real source. */
1846 fdesc **fdpp = &fdhead;
1847 while (*fdpp != NULL)
1848 if (*fdpp != curfdp
1849 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1850 /* We found one of those! We must delete both the file description
1851 and all tags referring to it. */
1853 fdesc *badfdp = *fdpp;
1855 /* Delete the tags referring to badfdp->taggedfname
1856 that were obtained from badfdp->infname. */
1857 invalidate_nodes (badfdp, &nodehead);
1859 *fdpp = badfdp->next; /* remove the bad description from the list */
1860 free_fdesc (badfdp);
1862 else
1863 fdpp = &(*fdpp)->next; /* advance the list pointer */
1866 assert (parser != NULL);
1868 /* Generic initializations before reading from file. */
1869 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1871 /* Generic initializations before parsing file with readline. */
1872 lineno = 0; /* reset global line number */
1873 charno = 0; /* reset global char number */
1874 linecharno = 0; /* reset global char number of line start */
1876 parser (inf);
1878 regex_tag_multiline ();
1883 * Check whether an implicitly named tag should be created,
1884 * then call `pfnote'.
1885 * NAME is a string that is internally copied by this function.
1887 * TAGS format specification
1888 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1889 * The following is explained in some more detail in etc/ETAGS.EBNF.
1891 * make_tag creates tags with "implicit tag names" (unnamed tags)
1892 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1893 * 1. NAME does not contain any of the characters in NONAM;
1894 * 2. LINESTART contains name as either a rightmost, or rightmost but
1895 * one character, substring;
1896 * 3. the character, if any, immediately before NAME in LINESTART must
1897 * be a character in NONAM;
1898 * 4. the character, if any, immediately after NAME in LINESTART must
1899 * also be a character in NONAM.
1901 * The implementation uses the notinname() macro, which recognizes the
1902 * characters stored in the string `nonam'.
1903 * etags.el needs to use the same characters that are in NONAM.
1905 static void
1906 make_tag (const char *name, /* tag name, or NULL if unnamed */
1907 int namelen, /* tag length */
1908 bool is_func, /* tag is a function */
1909 char *linestart, /* start of the line where tag is */
1910 int linelen, /* length of the line where tag is */
1911 int lno, /* line number */
1912 long int cno) /* character number */
1914 bool named = (name != NULL && namelen > 0);
1915 char *nname = NULL;
1917 if (!CTAGS && named) /* maybe set named to false */
1918 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1919 such that etags.el can guess a name from it. */
1921 int i;
1922 register const char *cp = name;
1924 for (i = 0; i < namelen; i++)
1925 if (notinname (*cp++))
1926 break;
1927 if (i == namelen) /* rule #1 */
1929 cp = linestart + linelen - namelen;
1930 if (notinname (linestart[linelen-1]))
1931 cp -= 1; /* rule #4 */
1932 if (cp >= linestart /* rule #2 */
1933 && (cp == linestart
1934 || notinname (cp[-1])) /* rule #3 */
1935 && strneq (name, cp, namelen)) /* rule #2 */
1936 named = false; /* use implicit tag name */
1940 if (named)
1941 nname = savenstr (name, namelen);
1943 pfnote (nname, is_func, linestart, linelen, lno, cno);
1946 /* Record a tag. */
1947 static void
1948 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1949 long int cno)
1950 /* tag name, or NULL if unnamed */
1951 /* tag is a function */
1952 /* start of the line where tag is */
1953 /* length of the line where tag is */
1954 /* line number */
1955 /* character number */
1957 register node *np;
1959 assert (name == NULL || name[0] != '\0');
1960 if (CTAGS && name == NULL)
1961 return;
1963 np = xnew (1, node);
1965 /* If ctags mode, change name "main" to M<thisfilename>. */
1966 if (CTAGS && !cxref_style && streq (name, "main"))
1968 char *fp = strrchr (curfdp->taggedfname, '/');
1969 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1970 fp = strrchr (np->name, '.');
1971 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1972 fp[0] = '\0';
1974 else
1975 np->name = name;
1976 np->valid = true;
1977 np->been_warned = false;
1978 np->fdp = curfdp;
1979 np->is_func = is_func;
1980 np->lno = lno;
1981 if (np->fdp->usecharno)
1982 /* Our char numbers are 0-base, because of C language tradition?
1983 ctags compatibility? old versions compatibility? I don't know.
1984 Anyway, since emacs's are 1-base we expect etags.el to take care
1985 of the difference. If we wanted to have 1-based numbers, we would
1986 uncomment the +1 below. */
1987 np->cno = cno /* + 1 */ ;
1988 else
1989 np->cno = invalidcharno;
1990 np->left = np->right = NULL;
1991 if (CTAGS && !cxref_style)
1993 if (strlen (linestart) < 50)
1994 np->regex = concat (linestart, "$", "");
1995 else
1996 np->regex = savenstr (linestart, 50);
1998 else
1999 np->regex = savenstr (linestart, linelen);
2001 add_node (np, &nodehead);
2005 * Utility functions and data to avoid recursion.
2008 typedef struct stack_entry {
2009 node *np;
2010 struct stack_entry *next;
2011 } stkentry;
2013 static void
2014 push_node (node *np, stkentry **stack_top)
2016 if (np)
2018 stkentry *new = xnew (1, stkentry);
2020 new->np = np;
2021 new->next = *stack_top;
2022 *stack_top = new;
2026 static node *
2027 pop_node (stkentry **stack_top)
2029 node *ret = NULL;
2031 if (*stack_top)
2033 stkentry *old_start = *stack_top;
2035 ret = (*stack_top)->np;
2036 *stack_top = (*stack_top)->next;
2037 free (old_start);
2039 return ret;
2043 * free_tree ()
2044 * emulate recursion on left children, iterate on right children.
2046 static void
2047 free_tree (register node *np)
2049 stkentry *stack = NULL;
2051 while (np)
2053 /* Descent on left children. */
2054 while (np->left)
2056 push_node (np, &stack);
2057 np = np->left;
2059 /* Free node without left children. */
2060 node *node_right = np->right;
2061 free (np->name);
2062 free (np->regex);
2063 free (np);
2064 if (!node_right)
2066 /* Backtrack to find a node with right children, while freeing nodes
2067 that don't have right children. */
2068 while (node_right == NULL && (np = pop_node (&stack)) != NULL)
2070 node_right = np->right;
2071 free (np->name);
2072 free (np->regex);
2073 free (np);
2076 /* Free right children. */
2077 np = node_right;
2082 * free_fdesc ()
2083 * delete a file description
2085 static void
2086 free_fdesc (register fdesc *fdp)
2088 free (fdp->infname);
2089 free (fdp->infabsname);
2090 free (fdp->infabsdir);
2091 free (fdp->taggedfname);
2092 free (fdp->prop);
2093 free (fdp);
2097 * add_node ()
2098 * Adds a node to the tree of nodes. In etags mode, sort by file
2099 * name. In ctags mode, sort by tag name. Make no attempt at
2100 * balancing.
2102 * add_node is the only function allowed to add nodes, so it can
2103 * maintain state.
2105 static void
2106 add_node (node *np, node **cur_node_p)
2108 node *cur_node = *cur_node_p;
2110 /* Make the first node. */
2111 if (cur_node == NULL)
2113 *cur_node_p = np;
2114 last_node = np;
2115 return;
2118 if (!CTAGS)
2119 /* Etags Mode */
2121 /* For each file name, tags are in a linked sublist on the right
2122 pointer. The first tags of different files are a linked list
2123 on the left pointer. last_node points to the end of the last
2124 used sublist. */
2125 if (last_node != NULL && last_node->fdp == np->fdp)
2127 /* Let's use the same sublist as the last added node. */
2128 assert (last_node->right == NULL);
2129 last_node->right = np;
2130 last_node = np;
2132 else
2134 while (cur_node->fdp != np->fdp)
2136 if (cur_node->left == NULL)
2137 break;
2138 /* The head of this sublist is not good for us. Let's try the
2139 next one. */
2140 cur_node = cur_node->left;
2142 if (cur_node->left)
2144 /* Scanning the list we found the head of a sublist which is
2145 good for us. Let's scan this sublist. */
2146 if (cur_node->right)
2148 cur_node = cur_node->right;
2149 while (cur_node->right)
2150 cur_node = cur_node->right;
2152 /* Make a new node in this sublist. */
2153 cur_node->right = np;
2155 else
2157 /* Make a new sublist. */
2158 cur_node->left = np;
2160 last_node = np;
2162 } /* if ETAGS mode */
2163 else
2165 /* Ctags Mode */
2166 node **next_node = &cur_node;
2168 while ((cur_node = *next_node) != NULL)
2170 int dif = strcmp (np->name, cur_node->name);
2172 * If this tag name matches an existing one, then
2173 * do not add the node, but maybe print a warning.
2175 if (!dif && no_duplicates)
2177 if (np->fdp == cur_node->fdp)
2179 if (!no_warnings)
2181 fprintf (stderr,
2182 "Duplicate entry in file %s, line %d: %s\n",
2183 np->fdp->infname, lineno, np->name);
2184 fprintf (stderr, "Second entry ignored\n");
2187 else if (!cur_node->been_warned && !no_warnings)
2189 fprintf
2190 (stderr,
2191 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2192 np->fdp->infname, cur_node->fdp->infname, np->name);
2193 cur_node->been_warned = true;
2195 return;
2197 else
2198 next_node = dif < 0 ? &cur_node->left : &cur_node->right;
2200 *next_node = np;
2201 last_node = np;
2202 } /* if CTAGS mode */
2206 * invalidate_nodes ()
2207 * Scan the node tree and invalidate all nodes pointing to the
2208 * given file description (CTAGS case) or free them (ETAGS case).
2210 static void
2211 invalidate_nodes (fdesc *badfdp, node **npp)
2213 node *np = *npp;
2214 stkentry *stack = NULL;
2216 if (CTAGS)
2218 while (np)
2220 /* Push all the left children on the stack. */
2221 while (np->left != NULL)
2223 push_node (np, &stack);
2224 np = np->left;
2226 /* Invalidate this node. */
2227 if (np->fdp == badfdp)
2228 np->valid = false;
2229 if (!np->right)
2231 /* Pop nodes from stack, invalidating them, until we find one
2232 with a right child. */
2233 while ((np = pop_node (&stack)) != NULL)
2235 if (np->fdp == badfdp)
2236 np->valid = false;
2237 if (np->right != NULL)
2238 break;
2241 /* Process the right child, if any. */
2242 if (np)
2243 np = np->right;
2246 else
2248 node super_root, *np_parent = NULL;
2250 super_root.left = np;
2251 super_root.fdp = (fdesc *) -1;
2252 np = &super_root;
2254 while (np)
2256 /* Descent on left children until node with BADFP. */
2257 while (np && np->fdp != badfdp)
2259 assert (np->fdp != NULL);
2260 np_parent = np;
2261 np = np->left;
2263 if (np)
2265 np_parent->left = np->left; /* detach subtree from the tree */
2266 np->left = NULL; /* isolate it */
2267 free_tree (np); /* free it */
2269 /* Continue with rest of tree. */
2270 np = np_parent->left;
2273 *npp = super_root.left;
2278 static int total_size_of_entries (node *);
2279 static int number_len (long) ATTRIBUTE_CONST;
2281 /* Length of a non-negative number's decimal representation. */
2282 static int
2283 number_len (long int num)
2285 int len = 1;
2286 while ((num /= 10) > 0)
2287 len += 1;
2288 return len;
2292 * Return total number of characters that put_entries will output for
2293 * the nodes in the linked list at the right of the specified node.
2294 * This count is irrelevant with etags.el since emacs 19.34 at least,
2295 * but is still supplied for backward compatibility.
2297 static int
2298 total_size_of_entries (register node *np)
2300 register int total = 0;
2302 for (; np != NULL; np = np->right)
2303 if (np->valid)
2305 total += strlen (np->regex) + 1; /* pat\177 */
2306 if (np->name != NULL)
2307 total += strlen (np->name) + 1; /* name\001 */
2308 total += number_len ((long) np->lno) + 1; /* lno, */
2309 if (np->cno != invalidcharno) /* cno */
2310 total += number_len (np->cno);
2311 total += 1; /* newline */
2314 return total;
2317 static void
2318 put_entry (node *np)
2320 register char *sp;
2321 static fdesc *fdp = NULL;
2323 /* Output this entry */
2324 if (np->valid)
2326 if (!CTAGS)
2328 /* Etags mode */
2329 if (fdp != np->fdp)
2331 fdp = np->fdp;
2332 fprintf (tagf, "\f\n%s,%d\n",
2333 fdp->taggedfname, total_size_of_entries (np));
2334 fdp->written = true;
2336 fputs (np->regex, tagf);
2337 fputc ('\177', tagf);
2338 if (np->name != NULL)
2340 fputs (np->name, tagf);
2341 fputc ('\001', tagf);
2343 fprintf (tagf, "%d,", np->lno);
2344 if (np->cno != invalidcharno)
2345 fprintf (tagf, "%ld", np->cno);
2346 fputs ("\n", tagf);
2348 else
2350 /* Ctags mode */
2351 if (np->name == NULL)
2352 error ("internal error: NULL name in ctags mode.");
2354 if (cxref_style)
2356 if (vgrind_style)
2357 fprintf (stdout, "%s %s %d\n",
2358 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2359 else
2360 fprintf (stdout, "%-16s %3d %-16s %s\n",
2361 np->name, np->lno, np->fdp->taggedfname, np->regex);
2363 else
2365 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2367 if (np->is_func)
2368 { /* function or #define macro with args */
2369 putc (searchar, tagf);
2370 putc ('^', tagf);
2372 for (sp = np->regex; *sp; sp++)
2374 if (*sp == '\\' || *sp == searchar)
2375 putc ('\\', tagf);
2376 putc (*sp, tagf);
2378 putc (searchar, tagf);
2380 else
2381 { /* anything else; text pattern inadequate */
2382 fprintf (tagf, "%d", np->lno);
2384 putc ('\n', tagf);
2387 } /* if this node contains a valid tag */
2390 static void
2391 put_entries (node *np)
2393 stkentry *stack = NULL;
2395 if (np == NULL)
2396 return;
2398 if (CTAGS)
2400 while (np)
2402 /* Stack subentries that precede this one. */
2403 while (np->left)
2405 push_node (np, &stack);
2406 np = np->left;
2408 /* Output this subentry. */
2409 put_entry (np);
2410 /* Stack subentries that follow this one. */
2411 while (!np->right)
2413 /* Output subentries that precede the next one. */
2414 np = pop_node (&stack);
2415 if (!np)
2416 break;
2417 put_entry (np);
2419 if (np)
2420 np = np->right;
2423 else
2425 push_node (np, &stack);
2426 while ((np = pop_node (&stack)) != NULL)
2428 /* Output this subentry. */
2429 put_entry (np);
2430 while (np->right)
2432 /* Output subentries that follow this one. */
2433 put_entry (np->right);
2434 /* Stack subentries from the following files. */
2435 push_node (np->left, &stack);
2436 np = np->right;
2438 push_node (np->left, &stack);
2444 /* C extensions. */
2445 #define C_EXT 0x00fff /* C extensions */
2446 #define C_PLAIN 0x00000 /* C */
2447 #define C_PLPL 0x00001 /* C++ */
2448 #define C_STAR 0x00003 /* C* */
2449 #define C_JAVA 0x00005 /* JAVA */
2450 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2451 #define YACC 0x10000 /* yacc file */
2454 * The C symbol tables.
2456 enum sym_type
2458 st_none,
2459 st_C_objprot, st_C_objimpl, st_C_objend,
2460 st_C_gnumacro,
2461 st_C_ignore, st_C_attribute, st_C_enum_bf,
2462 st_C_javastruct,
2463 st_C_operator,
2464 st_C_class, st_C_template,
2465 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2468 /* Feed stuff between (but not including) %[ and %] lines to:
2469 gperf -m 5
2471 %compare-strncmp
2472 %enum
2473 %struct-type
2474 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2476 if, 0, st_C_ignore
2477 for, 0, st_C_ignore
2478 while, 0, st_C_ignore
2479 switch, 0, st_C_ignore
2480 return, 0, st_C_ignore
2481 __attribute__, 0, st_C_attribute
2482 GTY, 0, st_C_attribute
2483 @interface, 0, st_C_objprot
2484 @protocol, 0, st_C_objprot
2485 @implementation,0, st_C_objimpl
2486 @end, 0, st_C_objend
2487 import, (C_JAVA & ~C_PLPL), st_C_ignore
2488 package, (C_JAVA & ~C_PLPL), st_C_ignore
2489 friend, C_PLPL, st_C_ignore
2490 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2491 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2492 interface, (C_JAVA & ~C_PLPL), st_C_struct
2493 class, 0, st_C_class
2494 namespace, C_PLPL, st_C_struct
2495 domain, C_STAR, st_C_struct
2496 union, 0, st_C_struct
2497 struct, 0, st_C_struct
2498 extern, 0, st_C_extern
2499 enum, 0, st_C_enum
2500 typedef, 0, st_C_typedef
2501 define, 0, st_C_define
2502 undef, 0, st_C_define
2503 operator, C_PLPL, st_C_operator
2504 template, 0, st_C_template
2505 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2506 DEFUN, 0, st_C_gnumacro
2507 SYSCALL, 0, st_C_gnumacro
2508 ENTRY, 0, st_C_gnumacro
2509 PSEUDO, 0, st_C_gnumacro
2510 ENUM_BF, 0, st_C_enum_bf
2511 # These are defined inside C functions, so currently they are not met.
2512 # EXFUN used in glibc, DEFVAR_* in emacs.
2513 #EXFUN, 0, st_C_gnumacro
2514 #DEFVAR_, 0, st_C_gnumacro
2516 and replace lines between %< and %> with its output, then:
2517 - remove the #if characterset check
2518 - remove any #line directives
2519 - make in_word_set static and not inline
2520 - remove any 'register' qualifications from variable decls. */
2521 /*%<*/
2522 /* C code produced by gperf version 3.0.1 */
2523 /* Command-line: gperf -m 5 */
2524 /* Computed positions: -k'2-3' */
2526 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2527 /* maximum key range = 34, duplicates = 0 */
2529 static int
2530 hash (const char *str, int len)
2532 static char const asso_values[] =
2534 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2535 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2536 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2537 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2538 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2539 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2540 36, 36, 36, 36, 36, 36, 36, 36, 36, 3,
2541 27, 36, 36, 36, 36, 36, 36, 36, 26, 36,
2542 36, 36, 36, 25, 0, 0, 36, 36, 36, 0,
2543 36, 36, 36, 36, 36, 1, 36, 16, 36, 6,
2544 23, 0, 0, 36, 22, 0, 36, 36, 5, 0,
2545 0, 15, 1, 36, 6, 36, 8, 19, 36, 16,
2546 4, 5, 36, 36, 36, 36, 36, 36, 36, 36,
2547 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2548 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2549 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2550 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2551 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2552 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2553 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2554 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2555 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2556 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2557 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2558 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2559 36, 36, 36, 36, 36, 36
2561 int hval = len;
2563 switch (hval)
2565 default:
2566 hval += asso_values[(unsigned char) str[2]];
2567 FALLTHROUGH;
2568 case 2:
2569 hval += asso_values[(unsigned char) str[1]];
2570 break;
2572 return hval;
2575 static struct C_stab_entry *
2576 in_word_set (register const char *str, register unsigned int len)
2578 enum
2580 TOTAL_KEYWORDS = 34,
2581 MIN_WORD_LENGTH = 2,
2582 MAX_WORD_LENGTH = 15,
2583 MIN_HASH_VALUE = 2,
2584 MAX_HASH_VALUE = 35
2587 static struct C_stab_entry wordlist[] =
2589 {""}, {""},
2590 {"if", 0, st_C_ignore},
2591 {"GTY", 0, st_C_attribute},
2592 {"@end", 0, st_C_objend},
2593 {"union", 0, st_C_struct},
2594 {"define", 0, st_C_define},
2595 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2596 {"template", 0, st_C_template},
2597 {"operator", C_PLPL, st_C_operator},
2598 {"@interface", 0, st_C_objprot},
2599 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2600 {"friend", C_PLPL, st_C_ignore},
2601 {"typedef", 0, st_C_typedef},
2602 {"return", 0, st_C_ignore},
2603 {"@implementation",0, st_C_objimpl},
2604 {"@protocol", 0, st_C_objprot},
2605 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2606 {"extern", 0, st_C_extern},
2607 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2608 {"struct", 0, st_C_struct},
2609 {"domain", C_STAR, st_C_struct},
2610 {"switch", 0, st_C_ignore},
2611 {"enum", 0, st_C_enum},
2612 {"for", 0, st_C_ignore},
2613 {"namespace", C_PLPL, st_C_struct},
2614 {"class", 0, st_C_class},
2615 {"while", 0, st_C_ignore},
2616 {"undef", 0, st_C_define},
2617 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2618 {"__attribute__", 0, st_C_attribute},
2619 {"ENTRY", 0, st_C_gnumacro},
2620 {"SYSCALL", 0, st_C_gnumacro},
2621 {"ENUM_BF", 0, st_C_enum_bf},
2622 {"PSEUDO", 0, st_C_gnumacro},
2623 {"DEFUN", 0, st_C_gnumacro}
2626 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2628 int key = hash (str, len);
2630 if (key <= MAX_HASH_VALUE && key >= 0)
2632 const char *s = wordlist[key].name;
2634 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2635 return &wordlist[key];
2638 return 0;
2640 /*%>*/
2642 static enum sym_type
2643 C_symtype (char *str, int len, int c_ext)
2645 register struct C_stab_entry *se = in_word_set (str, len);
2647 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2648 return st_none;
2649 return se->type;
2654 * Ignoring __attribute__ ((list))
2656 static bool inattribute; /* looking at an __attribute__ construct */
2658 /* Ignoring ENUM_BF (type)
2661 static bool in_enum_bf; /* inside parentheses following ENUM_BF */
2664 * C functions and variables are recognized using a simple
2665 * finite automaton. fvdef is its state variable.
2667 static enum
2669 fvnone, /* nothing seen */
2670 fdefunkey, /* Emacs DEFUN keyword seen */
2671 fdefunname, /* Emacs DEFUN name seen */
2672 foperator, /* func: operator keyword seen (cplpl) */
2673 fvnameseen, /* function or variable name seen */
2674 fstartlist, /* func: just after open parenthesis */
2675 finlist, /* func: in parameter list */
2676 flistseen, /* func: after parameter list */
2677 fignore, /* func: before open brace */
2678 vignore /* var-like: ignore until ';' */
2679 } fvdef;
2681 static bool fvextern; /* func or var: extern keyword seen; */
2684 * typedefs are recognized using a simple finite automaton.
2685 * typdef is its state variable.
2687 static enum
2689 tnone, /* nothing seen */
2690 tkeyseen, /* typedef keyword seen */
2691 ttypeseen, /* defined type seen */
2692 tinbody, /* inside typedef body */
2693 tend, /* just before typedef tag */
2694 tignore /* junk after typedef tag */
2695 } typdef;
2698 * struct-like structures (enum, struct and union) are recognized
2699 * using another simple finite automaton. `structdef' is its state
2700 * variable.
2702 static enum
2704 snone, /* nothing seen yet,
2705 or in struct body if bracelev > 0 */
2706 skeyseen, /* struct-like keyword seen */
2707 stagseen, /* struct-like tag seen */
2708 scolonseen /* colon seen after struct-like tag */
2709 } structdef;
2712 * When objdef is different from onone, objtag is the name of the class.
2714 static const char *objtag = "<uninited>";
2717 * Yet another little state machine to deal with preprocessor lines.
2719 static enum
2721 dnone, /* nothing seen */
2722 dsharpseen, /* '#' seen as first char on line */
2723 ddefineseen, /* '#' and 'define' seen */
2724 dignorerest /* ignore rest of line */
2725 } definedef;
2728 * State machine for Objective C protocols and implementations.
2729 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2731 static enum
2733 onone, /* nothing seen */
2734 oprotocol, /* @interface or @protocol seen */
2735 oimplementation, /* @implementations seen */
2736 otagseen, /* class name seen */
2737 oparenseen, /* parenthesis before category seen */
2738 ocatseen, /* category name seen */
2739 oinbody, /* in @implementation body */
2740 omethodsign, /* in @implementation body, after +/- */
2741 omethodtag, /* after method name */
2742 omethodcolon, /* after method colon */
2743 omethodparm, /* after method parameter */
2744 oignore /* wait for @end */
2745 } objdef;
2749 * Use this structure to keep info about the token read, and how it
2750 * should be tagged. Used by the make_C_tag function to build a tag.
2752 static struct tok
2754 char *line; /* string containing the token */
2755 int offset; /* where the token starts in LINE */
2756 int length; /* token length */
2758 The previous members can be used to pass strings around for generic
2759 purposes. The following ones specifically refer to creating tags. In this
2760 case the token contained here is the pattern that will be used to create a
2761 tag.
2763 bool valid; /* do not create a tag; the token should be
2764 invalidated whenever a state machine is
2765 reset prematurely */
2766 bool named; /* create a named tag */
2767 int lineno; /* source line number of tag */
2768 long linepos; /* source char number of tag */
2769 } token; /* latest token read */
2772 * Variables and functions for dealing with nested structures.
2773 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2775 static void pushclass_above (int, char *, int);
2776 static void popclass_above (int);
2777 static void write_classname (linebuffer *, const char *qualifier);
2779 static struct {
2780 char **cname; /* nested class names */
2781 int *bracelev; /* nested class brace level */
2782 int nl; /* class nesting level (elements used) */
2783 int size; /* length of the array */
2784 } cstack; /* stack for nested declaration tags */
2785 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2786 #define nestlev (cstack.nl)
2787 /* After struct keyword or in struct body, not inside a nested function. */
2788 #define instruct (structdef == snone && nestlev > 0 \
2789 && bracelev == cstack.bracelev[nestlev-1] + 1)
2791 static void
2792 pushclass_above (int bracelev, char *str, int len)
2794 int nl;
2796 popclass_above (bracelev);
2797 nl = cstack.nl;
2798 if (nl >= cstack.size)
2800 int size = cstack.size *= 2;
2801 xrnew (cstack.cname, size, char *);
2802 xrnew (cstack.bracelev, size, int);
2804 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2805 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2806 cstack.bracelev[nl] = bracelev;
2807 cstack.nl = nl + 1;
2810 static void
2811 popclass_above (int bracelev)
2813 int nl;
2815 for (nl = cstack.nl - 1;
2816 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2817 nl--)
2819 free (cstack.cname[nl]);
2820 cstack.nl = nl;
2824 static void
2825 write_classname (linebuffer *cn, const char *qualifier)
2827 int i, len;
2828 int qlen = strlen (qualifier);
2830 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2832 len = 0;
2833 cn->len = 0;
2834 cn->buffer[0] = '\0';
2836 else
2838 len = strlen (cstack.cname[0]);
2839 linebuffer_setlen (cn, len);
2840 strcpy (cn->buffer, cstack.cname[0]);
2842 for (i = 1; i < cstack.nl; i++)
2844 char *s = cstack.cname[i];
2845 if (s == NULL)
2846 continue;
2847 linebuffer_setlen (cn, len + qlen + strlen (s));
2848 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2853 static bool consider_token (char *, int, int, int *, int, int, bool *);
2854 static void make_C_tag (bool);
2857 * consider_token ()
2858 * checks to see if the current token is at the start of a
2859 * function or variable, or corresponds to a typedef, or
2860 * is a struct/union/enum tag, or #define, or an enum constant.
2862 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2863 * with args. C_EXTP points to which language we are looking at.
2865 * Globals
2866 * fvdef IN OUT
2867 * structdef IN OUT
2868 * definedef IN OUT
2869 * typdef IN OUT
2870 * objdef IN OUT
2873 static bool
2874 consider_token (char *str, int len, int c, int *c_extp,
2875 int bracelev, int parlev, bool *is_func_or_var)
2876 /* IN: token pointer */
2877 /* IN: token length */
2878 /* IN: first char after the token */
2879 /* IN, OUT: C extensions mask */
2880 /* IN: brace level */
2881 /* IN: parenthesis level */
2882 /* OUT: function or variable found */
2884 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2885 structtype is the type of the preceding struct-like keyword, and
2886 structbracelev is the brace level where it has been seen. */
2887 static enum sym_type structtype;
2888 static int structbracelev;
2889 static enum sym_type toktype;
2892 toktype = C_symtype (str, len, *c_extp);
2895 * Skip __attribute__
2897 if (toktype == st_C_attribute)
2899 inattribute = true;
2900 return false;
2904 * Skip ENUM_BF
2906 if (toktype == st_C_enum_bf && definedef == dnone)
2908 in_enum_bf = true;
2909 return false;
2913 * Advance the definedef state machine.
2915 switch (definedef)
2917 case dnone:
2918 /* We're not on a preprocessor line. */
2919 if (toktype == st_C_gnumacro)
2921 fvdef = fdefunkey;
2922 return false;
2924 break;
2925 case dsharpseen:
2926 if (toktype == st_C_define)
2928 definedef = ddefineseen;
2930 else
2932 definedef = dignorerest;
2934 return false;
2935 case ddefineseen:
2937 * Make a tag for any macro, unless it is a constant
2938 * and constantypedefs is false.
2940 definedef = dignorerest;
2941 *is_func_or_var = (c == '(');
2942 if (!*is_func_or_var && !constantypedefs)
2943 return false;
2944 else
2945 return true;
2946 case dignorerest:
2947 return false;
2948 default:
2949 error ("internal error: definedef value.");
2953 * Now typedefs
2955 switch (typdef)
2957 case tnone:
2958 if (toktype == st_C_typedef)
2960 if (typedefs)
2961 typdef = tkeyseen;
2962 fvextern = false;
2963 fvdef = fvnone;
2964 return false;
2966 break;
2967 case tkeyseen:
2968 switch (toktype)
2970 case st_none:
2971 case st_C_class:
2972 case st_C_struct:
2973 case st_C_enum:
2974 typdef = ttypeseen;
2975 break;
2976 default:
2977 break;
2979 break;
2980 case ttypeseen:
2981 if (structdef == snone && fvdef == fvnone)
2983 fvdef = fvnameseen;
2984 return true;
2986 break;
2987 case tend:
2988 switch (toktype)
2990 case st_C_class:
2991 case st_C_struct:
2992 case st_C_enum:
2993 return false;
2994 default:
2995 return true;
2997 default:
2998 break;
3001 switch (toktype)
3003 case st_C_javastruct:
3004 if (structdef == stagseen)
3005 structdef = scolonseen;
3006 return false;
3007 case st_C_template:
3008 case st_C_class:
3009 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
3010 && bracelev == 0
3011 && definedef == dnone && structdef == snone
3012 && typdef == tnone && fvdef == fvnone)
3013 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3014 if (toktype == st_C_template)
3015 break;
3016 FALLTHROUGH;
3017 case st_C_struct:
3018 case st_C_enum:
3019 if (parlev == 0
3020 && fvdef != vignore
3021 && (typdef == tkeyseen
3022 || (typedefs_or_cplusplus && structdef == snone)))
3024 structdef = skeyseen;
3025 structtype = toktype;
3026 structbracelev = bracelev;
3027 if (fvdef == fvnameseen)
3028 fvdef = fvnone;
3030 return false;
3031 default:
3032 break;
3035 if (structdef == skeyseen)
3037 structdef = stagseen;
3038 return true;
3041 if (typdef != tnone)
3042 definedef = dnone;
3044 /* Detect Objective C constructs. */
3045 switch (objdef)
3047 case onone:
3048 switch (toktype)
3050 case st_C_objprot:
3051 objdef = oprotocol;
3052 return false;
3053 case st_C_objimpl:
3054 objdef = oimplementation;
3055 return false;
3056 default:
3057 break;
3059 break;
3060 case oimplementation:
3061 /* Save the class tag for functions or variables defined inside. */
3062 objtag = savenstr (str, len);
3063 objdef = oinbody;
3064 return false;
3065 case oprotocol:
3066 /* Save the class tag for categories. */
3067 objtag = savenstr (str, len);
3068 objdef = otagseen;
3069 *is_func_or_var = true;
3070 return true;
3071 case oparenseen:
3072 objdef = ocatseen;
3073 *is_func_or_var = true;
3074 return true;
3075 case oinbody:
3076 break;
3077 case omethodsign:
3078 if (parlev == 0)
3080 fvdef = fvnone;
3081 objdef = omethodtag;
3082 linebuffer_setlen (&token_name, len);
3083 memcpy (token_name.buffer, str, len);
3084 token_name.buffer[len] = '\0';
3085 return true;
3087 return false;
3088 case omethodcolon:
3089 if (parlev == 0)
3090 objdef = omethodparm;
3091 return false;
3092 case omethodparm:
3093 if (parlev == 0)
3095 objdef = omethodtag;
3096 if (class_qualify)
3098 int oldlen = token_name.len;
3099 fvdef = fvnone;
3100 linebuffer_setlen (&token_name, oldlen + len);
3101 memcpy (token_name.buffer + oldlen, str, len);
3102 token_name.buffer[oldlen + len] = '\0';
3104 return true;
3106 return false;
3107 case oignore:
3108 if (toktype == st_C_objend)
3110 /* Memory leakage here: the string pointed by objtag is
3111 never released, because many tests would be needed to
3112 avoid breaking on incorrect input code. The amount of
3113 memory leaked here is the sum of the lengths of the
3114 class tags.
3115 free (objtag); */
3116 objdef = onone;
3118 return false;
3119 default:
3120 break;
3123 /* A function, variable or enum constant? */
3124 switch (toktype)
3126 case st_C_extern:
3127 fvextern = true;
3128 switch (fvdef)
3130 case finlist:
3131 case flistseen:
3132 case fignore:
3133 case vignore:
3134 break;
3135 default:
3136 fvdef = fvnone;
3138 return false;
3139 case st_C_ignore:
3140 fvextern = false;
3141 fvdef = vignore;
3142 return false;
3143 case st_C_operator:
3144 fvdef = foperator;
3145 *is_func_or_var = true;
3146 return true;
3147 case st_none:
3148 if (constantypedefs
3149 && structdef == snone
3150 && structtype == st_C_enum && bracelev > structbracelev
3151 /* Don't tag tokens in expressions that assign values to enum
3152 constants. */
3153 && fvdef != vignore)
3154 return true; /* enum constant */
3155 switch (fvdef)
3157 case fdefunkey:
3158 if (bracelev > 0)
3159 break;
3160 fvdef = fdefunname; /* GNU macro */
3161 *is_func_or_var = true;
3162 return true;
3163 case fvnone:
3164 switch (typdef)
3166 case ttypeseen:
3167 return false;
3168 case tnone:
3169 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3170 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3172 fvdef = vignore;
3173 return false;
3175 break;
3176 default:
3177 break;
3179 FALLTHROUGH;
3180 case fvnameseen:
3181 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3183 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3184 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3185 fvdef = foperator;
3186 *is_func_or_var = true;
3187 return true;
3189 if (bracelev > 0 && !instruct)
3190 break;
3191 fvdef = fvnameseen; /* function or variable */
3192 *is_func_or_var = true;
3193 return true;
3194 default:
3195 break;
3197 break;
3198 default:
3199 break;
3202 return false;
3207 * C_entries often keeps pointers to tokens or lines which are older than
3208 * the line currently read. By keeping two line buffers, and switching
3209 * them at end of line, it is possible to use those pointers.
3211 static struct
3213 long linepos;
3214 linebuffer lb;
3215 } lbs[2];
3217 #define current_lb_is_new (newndx == curndx)
3218 #define switch_line_buffers() (curndx = 1 - curndx)
3220 #define curlb (lbs[curndx].lb)
3221 #define newlb (lbs[newndx].lb)
3222 #define curlinepos (lbs[curndx].linepos)
3223 #define newlinepos (lbs[newndx].linepos)
3225 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3226 #define cplpl (c_ext & C_PLPL)
3227 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3229 #define CNL_SAVE_DEFINEDEF() \
3230 do { \
3231 curlinepos = charno; \
3232 readline (&curlb, inf); \
3233 lp = curlb.buffer; \
3234 quotednl = false; \
3235 newndx = curndx; \
3236 } while (0)
3238 #define CNL() \
3239 do { \
3240 CNL_SAVE_DEFINEDEF (); \
3241 if (savetoken.valid) \
3243 token = savetoken; \
3244 savetoken.valid = false; \
3246 definedef = dnone; \
3247 } while (0)
3250 static void
3251 make_C_tag (bool isfun)
3253 /* This function is never called when token.valid is false, but
3254 we must protect against invalid input or internal errors. */
3255 if (token.valid)
3256 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3257 token.offset+token.length+1, token.lineno, token.linepos);
3258 else if (DEBUG)
3259 { /* this branch is optimized away if !DEBUG */
3260 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3261 token_name.len + 17, isfun, token.line,
3262 token.offset+token.length+1, token.lineno, token.linepos);
3263 error ("INVALID TOKEN");
3266 token.valid = false;
3269 static bool
3270 perhaps_more_input (FILE *inf)
3272 return !feof (inf) && !ferror (inf);
3277 * C_entries ()
3278 * This routine finds functions, variables, typedefs,
3279 * #define's, enum constants and struct/union/enum definitions in
3280 * C syntax and adds them to the list.
3282 static void
3283 C_entries (int c_ext, FILE *inf)
3284 /* extension of C */
3285 /* input file */
3287 register char c; /* latest char read; '\0' for end of line */
3288 register char *lp; /* pointer one beyond the character `c' */
3289 int curndx, newndx; /* indices for current and new lb */
3290 register int tokoff; /* offset in line of start of current token */
3291 register int toklen; /* length of current token */
3292 const char *qualifier; /* string used to qualify names */
3293 int qlen; /* length of qualifier */
3294 int bracelev; /* current brace level */
3295 int bracketlev; /* current bracket level */
3296 int parlev; /* current parenthesis level */
3297 int attrparlev; /* __attribute__ parenthesis level */
3298 int templatelev; /* current template level */
3299 int typdefbracelev; /* bracelev where a typedef struct body begun */
3300 bool incomm, inquote, inchar, quotednl, midtoken;
3301 bool yacc_rules; /* in the rules part of a yacc file */
3302 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3305 linebuffer_init (&lbs[0].lb);
3306 linebuffer_init (&lbs[1].lb);
3307 if (cstack.size == 0)
3309 cstack.size = (DEBUG) ? 1 : 4;
3310 cstack.nl = 0;
3311 cstack.cname = xnew (cstack.size, char *);
3312 cstack.bracelev = xnew (cstack.size, int);
3315 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3316 curndx = newndx = 0;
3317 lp = curlb.buffer;
3318 *lp = 0;
3320 fvdef = fvnone; fvextern = false; typdef = tnone;
3321 structdef = snone; definedef = dnone; objdef = onone;
3322 yacc_rules = false;
3323 midtoken = inquote = inchar = incomm = quotednl = false;
3324 token.valid = savetoken.valid = false;
3325 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3326 if (cjava)
3327 { qualifier = "."; qlen = 1; }
3328 else
3329 { qualifier = "::"; qlen = 2; }
3332 while (perhaps_more_input (inf))
3334 c = *lp++;
3335 if (c == '\\')
3337 /* If we are at the end of the line, the next character is a
3338 '\0'; do not skip it, because it is what tells us
3339 to read the next line. */
3340 if (*lp == '\0')
3342 quotednl = true;
3343 continue;
3345 lp++;
3346 c = ' ';
3348 else if (incomm)
3350 switch (c)
3352 case '*':
3353 if (*lp == '/')
3355 c = *lp++;
3356 incomm = false;
3358 break;
3359 case '\0':
3360 /* Newlines inside comments do not end macro definitions in
3361 traditional cpp. */
3362 CNL_SAVE_DEFINEDEF ();
3363 break;
3365 continue;
3367 else if (inquote)
3369 switch (c)
3371 case '"':
3372 inquote = false;
3373 break;
3374 case '\0':
3375 /* Newlines inside strings do not end macro definitions
3376 in traditional cpp, even though compilers don't
3377 usually accept them. */
3378 CNL_SAVE_DEFINEDEF ();
3379 break;
3381 continue;
3383 else if (inchar)
3385 switch (c)
3387 case '\0':
3388 /* Hmmm, something went wrong. */
3389 CNL ();
3390 FALLTHROUGH;
3391 case '\'':
3392 inchar = false;
3393 break;
3395 continue;
3397 else switch (c)
3399 case '"':
3400 inquote = true;
3401 if (bracketlev > 0)
3402 continue;
3403 if (inattribute)
3404 break;
3405 switch (fvdef)
3407 case fdefunkey:
3408 case fstartlist:
3409 case finlist:
3410 case fignore:
3411 case vignore:
3412 break;
3413 default:
3414 fvextern = false;
3415 fvdef = fvnone;
3417 continue;
3418 case '\'':
3419 inchar = true;
3420 if (bracketlev > 0)
3421 continue;
3422 if (inattribute)
3423 break;
3424 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3426 fvextern = false;
3427 fvdef = fvnone;
3429 continue;
3430 case '/':
3431 if (*lp == '*')
3433 incomm = true;
3434 lp++;
3435 c = ' ';
3436 if (bracketlev > 0)
3437 continue;
3439 else if (/* cplpl && */ *lp == '/')
3441 c = '\0';
3443 break;
3444 case '%':
3445 if ((c_ext & YACC) && *lp == '%')
3447 /* Entering or exiting rules section in yacc file. */
3448 lp++;
3449 definedef = dnone; fvdef = fvnone; fvextern = false;
3450 typdef = tnone; structdef = snone;
3451 midtoken = inquote = inchar = incomm = quotednl = false;
3452 bracelev = 0;
3453 yacc_rules = !yacc_rules;
3454 continue;
3456 else
3457 break;
3458 case '#':
3459 if (definedef == dnone)
3461 char *cp;
3462 bool cpptoken = true;
3464 /* Look back on this line. If all blanks, or nonblanks
3465 followed by an end of comment, this is a preprocessor
3466 token. */
3467 for (cp = newlb.buffer; cp < lp-1; cp++)
3468 if (!c_isspace (*cp))
3470 if (*cp == '*' && cp[1] == '/')
3472 cp++;
3473 cpptoken = true;
3475 else
3476 cpptoken = false;
3478 if (cpptoken)
3480 definedef = dsharpseen;
3481 /* This is needed for tagging enum values: when there are
3482 preprocessor conditionals inside the enum, we need to
3483 reset the value of fvdef so that the next enum value is
3484 tagged even though the one before it did not end in a
3485 comma. */
3486 if (fvdef == vignore && instruct && parlev == 0)
3488 if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3489 fvdef = fvnone;
3492 } /* if (definedef == dnone) */
3493 continue;
3494 case '[':
3495 bracketlev++;
3496 continue;
3497 default:
3498 if (bracketlev > 0)
3500 if (c == ']')
3501 --bracketlev;
3502 else if (c == '\0')
3503 CNL_SAVE_DEFINEDEF ();
3504 continue;
3506 break;
3507 } /* switch (c) */
3510 /* Consider token only if some involved conditions are satisfied. */
3511 if (typdef != tignore
3512 && definedef != dignorerest
3513 && fvdef != finlist
3514 && templatelev == 0
3515 && (definedef != dnone
3516 || structdef != scolonseen)
3517 && !inattribute
3518 && !in_enum_bf)
3520 if (midtoken)
3522 if (endtoken (c))
3524 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3525 /* This handles :: in the middle,
3526 but not at the beginning of an identifier.
3527 Also, space-separated :: is not recognized. */
3529 if (c_ext & C_AUTO) /* automatic detection of C++ */
3530 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3531 lp += 2;
3532 toklen += 2;
3533 c = lp[-1];
3534 goto still_in_token;
3536 else
3538 bool funorvar = false;
3540 if (yacc_rules
3541 || consider_token (newlb.buffer + tokoff, toklen, c,
3542 &c_ext, bracelev, parlev,
3543 &funorvar))
3545 if (fvdef == foperator)
3547 char *oldlp = lp;
3548 lp = skip_spaces (lp-1);
3549 if (*lp != '\0')
3550 lp += 1;
3551 while (*lp != '\0'
3552 && !c_isspace (*lp) && *lp != '(')
3553 lp += 1;
3554 c = *lp++;
3555 toklen += lp - oldlp;
3557 token.named = false;
3558 if (!plainc
3559 && nestlev > 0 && definedef == dnone)
3560 /* in struct body */
3562 if (class_qualify)
3564 int len;
3565 write_classname (&token_name, qualifier);
3566 len = token_name.len;
3567 linebuffer_setlen (&token_name,
3568 len + qlen + toklen);
3569 sprintf (token_name.buffer + len, "%s%.*s",
3570 qualifier, toklen,
3571 newlb.buffer + tokoff);
3573 else
3575 linebuffer_setlen (&token_name, toklen);
3576 sprintf (token_name.buffer, "%.*s",
3577 toklen, newlb.buffer + tokoff);
3579 token.named = true;
3581 else if (objdef == ocatseen)
3582 /* Objective C category */
3584 if (class_qualify)
3586 int len = strlen (objtag) + 2 + toklen;
3587 linebuffer_setlen (&token_name, len);
3588 sprintf (token_name.buffer, "%s(%.*s)",
3589 objtag, toklen,
3590 newlb.buffer + tokoff);
3592 else
3594 linebuffer_setlen (&token_name, toklen);
3595 sprintf (token_name.buffer, "%.*s",
3596 toklen, newlb.buffer + tokoff);
3598 token.named = true;
3600 else if (objdef == omethodtag
3601 || objdef == omethodparm)
3602 /* Objective C method */
3604 token.named = true;
3606 else if (fvdef == fdefunname)
3607 /* GNU DEFUN and similar macros */
3609 bool defun = (newlb.buffer[tokoff] == 'F');
3610 int off = tokoff;
3611 int len = toklen;
3613 if (defun)
3615 off += 1;
3616 len -= 1;
3618 /* First, tag it as its C name */
3619 linebuffer_setlen (&token_name, toklen);
3620 memcpy (token_name.buffer,
3621 newlb.buffer + tokoff, toklen);
3622 token_name.buffer[toklen] = '\0';
3623 token.named = true;
3624 token.lineno = lineno;
3625 token.offset = tokoff;
3626 token.length = toklen;
3627 token.line = newlb.buffer;
3628 token.linepos = newlinepos;
3629 token.valid = true;
3630 make_C_tag (funorvar);
3632 /* Rewrite the tag so that emacs lisp DEFUNs
3633 can be found also by their elisp name */
3634 linebuffer_setlen (&token_name, len);
3635 memcpy (token_name.buffer,
3636 newlb.buffer + off, len);
3637 token_name.buffer[len] = '\0';
3638 if (defun)
3639 while (--len >= 0)
3640 if (token_name.buffer[len] == '_')
3641 token_name.buffer[len] = '-';
3642 token.named = defun;
3644 else
3646 linebuffer_setlen (&token_name, toklen);
3647 memcpy (token_name.buffer,
3648 newlb.buffer + tokoff, toklen);
3649 token_name.buffer[toklen] = '\0';
3650 /* Name macros and members. */
3651 token.named = (structdef == stagseen
3652 || typdef == ttypeseen
3653 || typdef == tend
3654 || (funorvar
3655 && definedef == dignorerest)
3656 || (funorvar
3657 && definedef == dnone
3658 && structdef == snone
3659 && bracelev > 0));
3661 token.lineno = lineno;
3662 token.offset = tokoff;
3663 token.length = toklen;
3664 token.line = newlb.buffer;
3665 token.linepos = newlinepos;
3666 token.valid = true;
3668 if (definedef == dnone
3669 && (fvdef == fvnameseen
3670 || fvdef == foperator
3671 || structdef == stagseen
3672 || typdef == tend
3673 || typdef == ttypeseen
3674 || objdef != onone))
3676 if (current_lb_is_new)
3677 switch_line_buffers ();
3679 else if (definedef != dnone
3680 || fvdef == fdefunname
3681 || instruct)
3682 make_C_tag (funorvar);
3684 else /* not yacc and consider_token failed */
3686 if (inattribute && fvdef == fignore)
3688 /* We have just met __attribute__ after a
3689 function parameter list: do not tag the
3690 function again. */
3691 fvdef = fvnone;
3694 midtoken = false;
3696 } /* if (endtoken (c)) */
3697 else if (intoken (c))
3698 still_in_token:
3700 toklen++;
3701 continue;
3703 } /* if (midtoken) */
3704 else if (begtoken (c))
3706 switch (definedef)
3708 case dnone:
3709 switch (fvdef)
3711 case fstartlist:
3712 /* This prevents tagging fb in
3713 void (__attribute__((noreturn)) *fb) (void);
3714 Fixing this is not easy and not very important. */
3715 fvdef = finlist;
3716 continue;
3717 case flistseen:
3718 if (plainc || declarations)
3720 make_C_tag (true); /* a function */
3721 fvdef = fignore;
3723 break;
3724 default:
3725 break;
3727 if (structdef == stagseen && !cjava)
3729 popclass_above (bracelev);
3730 structdef = snone;
3732 break;
3733 case dsharpseen:
3734 savetoken = token;
3735 break;
3736 default:
3737 break;
3739 if (!yacc_rules || lp == newlb.buffer + 1)
3741 tokoff = lp - 1 - newlb.buffer;
3742 toklen = 1;
3743 midtoken = true;
3745 continue;
3746 } /* if (begtoken) */
3747 } /* if must look at token */
3750 /* Detect end of line, colon, comma, semicolon and various braces
3751 after having handled a token.*/
3752 switch (c)
3754 case ':':
3755 if (inattribute)
3756 break;
3757 if (yacc_rules && token.offset == 0 && token.valid)
3759 make_C_tag (false); /* a yacc function */
3760 break;
3762 if (definedef != dnone)
3763 break;
3764 switch (objdef)
3766 case otagseen:
3767 objdef = oignore;
3768 make_C_tag (true); /* an Objective C class */
3769 break;
3770 case omethodtag:
3771 case omethodparm:
3772 objdef = omethodcolon;
3773 if (class_qualify)
3775 int toklen = token_name.len;
3776 linebuffer_setlen (&token_name, toklen + 1);
3777 strcpy (token_name.buffer + toklen, ":");
3779 break;
3780 default:
3781 break;
3783 if (structdef == stagseen)
3785 structdef = scolonseen;
3786 break;
3788 /* Should be useless, but may be work as a safety net. */
3789 if (cplpl && fvdef == flistseen)
3791 make_C_tag (true); /* a function */
3792 fvdef = fignore;
3793 break;
3795 break;
3796 case ';':
3797 if (definedef != dnone || inattribute)
3798 break;
3799 switch (typdef)
3801 case tend:
3802 case ttypeseen:
3803 make_C_tag (false); /* a typedef */
3804 typdef = tnone;
3805 fvdef = fvnone;
3806 break;
3807 case tnone:
3808 case tinbody:
3809 case tignore:
3810 switch (fvdef)
3812 case fignore:
3813 if (typdef == tignore || cplpl)
3814 fvdef = fvnone;
3815 break;
3816 case fvnameseen:
3817 if ((globals && bracelev == 0 && (!fvextern || declarations))
3818 || (members && instruct))
3819 make_C_tag (false); /* a variable */
3820 fvextern = false;
3821 fvdef = fvnone;
3822 token.valid = false;
3823 break;
3824 case flistseen:
3825 if ((declarations
3826 && (cplpl || !instruct)
3827 && (typdef == tnone || (typdef != tignore && instruct)))
3828 || (members
3829 && plainc && instruct))
3830 make_C_tag (true); /* a function */
3831 FALLTHROUGH;
3832 default:
3833 fvextern = false;
3834 fvdef = fvnone;
3835 if (declarations
3836 && cplpl && structdef == stagseen)
3837 make_C_tag (false); /* forward declaration */
3838 else
3839 token.valid = false;
3840 } /* switch (fvdef) */
3841 FALLTHROUGH;
3842 default:
3843 if (!instruct)
3844 typdef = tnone;
3846 if (structdef == stagseen)
3847 structdef = snone;
3848 break;
3849 case ',':
3850 if (definedef != dnone || inattribute)
3851 break;
3852 switch (objdef)
3854 case omethodtag:
3855 case omethodparm:
3856 make_C_tag (true); /* an Objective C method */
3857 objdef = oinbody;
3858 break;
3859 default:
3860 break;
3862 switch (fvdef)
3864 case fdefunkey:
3865 case foperator:
3866 case fstartlist:
3867 case finlist:
3868 case fignore:
3869 break;
3870 case vignore:
3871 if (instruct && parlev == 0)
3872 fvdef = fvnone;
3873 break;
3874 case fdefunname:
3875 fvdef = fignore;
3876 break;
3877 case fvnameseen:
3878 if (parlev == 0
3879 && ((globals
3880 && bracelev == 0
3881 && templatelev == 0
3882 && (!fvextern || declarations))
3883 || (members && instruct)))
3884 make_C_tag (false); /* a variable */
3885 break;
3886 case flistseen:
3887 if ((declarations && typdef == tnone && !instruct)
3888 || (members && typdef != tignore && instruct))
3890 make_C_tag (true); /* a function */
3891 fvdef = fvnameseen;
3893 else if (!declarations)
3894 fvdef = fvnone;
3895 token.valid = false;
3896 break;
3897 default:
3898 fvdef = fvnone;
3900 if (structdef == stagseen)
3901 structdef = snone;
3902 break;
3903 case ']':
3904 if (definedef != dnone || inattribute)
3905 break;
3906 if (structdef == stagseen)
3907 structdef = snone;
3908 switch (typdef)
3910 case ttypeseen:
3911 case tend:
3912 typdef = tignore;
3913 make_C_tag (false); /* a typedef */
3914 break;
3915 case tnone:
3916 case tinbody:
3917 switch (fvdef)
3919 case foperator:
3920 case finlist:
3921 case fignore:
3922 case vignore:
3923 break;
3924 case fvnameseen:
3925 if ((members && bracelev == 1)
3926 || (globals && bracelev == 0
3927 && (!fvextern || declarations)))
3928 make_C_tag (false); /* a variable */
3929 FALLTHROUGH;
3930 default:
3931 fvdef = fvnone;
3933 break;
3934 default:
3935 break;
3937 break;
3938 case '(':
3939 if (inattribute)
3941 attrparlev++;
3942 break;
3944 if (definedef != dnone)
3945 break;
3946 if (objdef == otagseen && parlev == 0)
3947 objdef = oparenseen;
3948 switch (fvdef)
3950 case fvnameseen:
3951 if (typdef == ttypeseen
3952 && *lp != '*'
3953 && !instruct)
3955 /* This handles constructs like:
3956 typedef void OperatorFun (int fun); */
3957 make_C_tag (false);
3958 typdef = tignore;
3959 fvdef = fignore;
3960 break;
3962 FALLTHROUGH;
3963 case foperator:
3964 fvdef = fstartlist;
3965 break;
3966 case flistseen:
3967 fvdef = finlist;
3968 break;
3969 default:
3970 break;
3972 parlev++;
3973 break;
3974 case ')':
3975 if (inattribute)
3977 if (--attrparlev == 0)
3978 inattribute = false;
3979 break;
3981 if (in_enum_bf)
3983 if (--parlev == 0)
3984 in_enum_bf = false;
3985 break;
3987 if (definedef != dnone)
3988 break;
3989 if (objdef == ocatseen && parlev == 1)
3991 make_C_tag (true); /* an Objective C category */
3992 objdef = oignore;
3994 if (--parlev == 0)
3996 switch (fvdef)
3998 case fstartlist:
3999 case finlist:
4000 fvdef = flistseen;
4001 break;
4002 default:
4003 break;
4005 if (!instruct
4006 && (typdef == tend
4007 || typdef == ttypeseen))
4009 typdef = tignore;
4010 make_C_tag (false); /* a typedef */
4013 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
4014 parlev = 0;
4015 break;
4016 case '{':
4017 if (definedef != dnone)
4018 break;
4019 if (typdef == ttypeseen)
4021 /* Whenever typdef is set to tinbody (currently only
4022 here), typdefbracelev should be set to bracelev. */
4023 typdef = tinbody;
4024 typdefbracelev = bracelev;
4026 switch (fvdef)
4028 case flistseen:
4029 if (cplpl && !class_qualify)
4031 /* Remove class and namespace qualifiers from the token,
4032 leaving only the method/member name. */
4033 char *cc, *uqname = token_name.buffer;
4034 char *tok_end = token_name.buffer + token_name.len;
4036 for (cc = token_name.buffer; cc < tok_end; cc++)
4038 if (*cc == ':' && cc[1] == ':')
4040 uqname = cc + 2;
4041 cc++;
4044 if (uqname > token_name.buffer)
4046 int uqlen = strlen (uqname);
4047 linebuffer_setlen (&token_name, uqlen);
4048 memmove (token_name.buffer, uqname, uqlen + 1);
4051 make_C_tag (true); /* a function */
4052 FALLTHROUGH;
4053 case fignore:
4054 fvdef = fvnone;
4055 break;
4056 case fvnone:
4057 switch (objdef)
4059 case otagseen:
4060 make_C_tag (true); /* an Objective C class */
4061 objdef = oignore;
4062 break;
4063 case omethodtag:
4064 case omethodparm:
4065 make_C_tag (true); /* an Objective C method */
4066 objdef = oinbody;
4067 break;
4068 default:
4069 /* Neutralize `extern "C" {' grot. */
4070 if (bracelev == 0 && structdef == snone && nestlev == 0
4071 && typdef == tnone)
4072 bracelev = -1;
4074 break;
4075 default:
4076 break;
4078 switch (structdef)
4080 case skeyseen: /* unnamed struct */
4081 pushclass_above (bracelev, NULL, 0);
4082 structdef = snone;
4083 break;
4084 case stagseen: /* named struct or enum */
4085 case scolonseen: /* a class */
4086 pushclass_above (bracelev,token.line+token.offset, token.length);
4087 structdef = snone;
4088 make_C_tag (false); /* a struct or enum */
4089 break;
4090 default:
4091 break;
4093 bracelev += 1;
4094 break;
4095 case '*':
4096 if (definedef != dnone)
4097 break;
4098 if (fvdef == fstartlist)
4100 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
4101 token.valid = false;
4103 break;
4104 case '}':
4105 if (definedef != dnone)
4106 break;
4107 bracelev -= 1;
4108 if (!ignoreindent && lp == newlb.buffer + 1)
4110 if (bracelev != 0)
4111 token.valid = false; /* unexpected value, token unreliable */
4112 bracelev = 0; /* reset brace level if first column */
4113 parlev = 0; /* also reset paren level, just in case... */
4115 else if (bracelev < 0)
4117 token.valid = false; /* something gone amiss, token unreliable */
4118 bracelev = 0;
4120 if (bracelev == 0 && fvdef == vignore)
4121 fvdef = fvnone; /* end of function */
4122 popclass_above (bracelev);
4123 structdef = snone;
4124 /* Only if typdef == tinbody is typdefbracelev significant. */
4125 if (typdef == tinbody && bracelev <= typdefbracelev)
4127 assert (bracelev == typdefbracelev);
4128 typdef = tend;
4130 break;
4131 case '=':
4132 if (definedef != dnone)
4133 break;
4134 switch (fvdef)
4136 case foperator:
4137 case finlist:
4138 case fignore:
4139 case vignore:
4140 break;
4141 case fvnameseen:
4142 if ((members && bracelev == 1)
4143 || (globals && bracelev == 0 && (!fvextern || declarations)))
4144 make_C_tag (false); /* a variable */
4145 FALLTHROUGH;
4146 default:
4147 fvdef = vignore;
4149 break;
4150 case '<':
4151 if (cplpl
4152 && (structdef == stagseen || fvdef == fvnameseen))
4154 templatelev++;
4155 break;
4157 goto resetfvdef;
4158 case '>':
4159 if (templatelev > 0)
4161 templatelev--;
4162 break;
4164 goto resetfvdef;
4165 case '+':
4166 case '-':
4167 if (objdef == oinbody && bracelev == 0)
4169 objdef = omethodsign;
4170 break;
4172 FALLTHROUGH;
4173 resetfvdef:
4174 case '#': case '~': case '&': case '%': case '/':
4175 case '|': case '^': case '!': case '.': case '?':
4176 if (definedef != dnone)
4177 break;
4178 /* These surely cannot follow a function tag in C. */
4179 switch (fvdef)
4181 case foperator:
4182 case finlist:
4183 case fignore:
4184 case vignore:
4185 break;
4186 default:
4187 fvdef = fvnone;
4189 break;
4190 case '\0':
4191 if (objdef == otagseen)
4193 make_C_tag (true); /* an Objective C class */
4194 objdef = oignore;
4196 /* If a macro spans multiple lines don't reset its state. */
4197 if (quotednl)
4198 CNL_SAVE_DEFINEDEF ();
4199 else
4200 CNL ();
4201 break;
4202 } /* switch (c) */
4204 } /* while not eof */
4206 free (lbs[0].lb.buffer);
4207 free (lbs[1].lb.buffer);
4211 * Process either a C++ file or a C file depending on the setting
4212 * of a global flag.
4214 static void
4215 default_C_entries (FILE *inf)
4217 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4220 /* Always do plain C. */
4221 static void
4222 plain_C_entries (FILE *inf)
4224 C_entries (0, inf);
4227 /* Always do C++. */
4228 static void
4229 Cplusplus_entries (FILE *inf)
4231 C_entries (C_PLPL, inf);
4234 /* Always do Java. */
4235 static void
4236 Cjava_entries (FILE *inf)
4238 C_entries (C_JAVA, inf);
4241 /* Always do C*. */
4242 static void
4243 Cstar_entries (FILE *inf)
4245 C_entries (C_STAR, inf);
4248 /* Always do Yacc. */
4249 static void
4250 Yacc_entries (FILE *inf)
4252 C_entries (YACC, inf);
4256 /* Useful macros. */
4257 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4258 while (perhaps_more_input (file_pointer) \
4259 && (readline (&(line_buffer), file_pointer), \
4260 (char_pointer) = (line_buffer).buffer, \
4261 true)) \
4263 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4264 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4265 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4266 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
4267 && ((cp) = skip_spaces ((cp) + sizeof (kw) - 1), true)) /* skip spaces */
4269 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4270 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4271 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4272 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4273 && ((cp) += sizeof (kw) - 1, true)) /* skip spaces */
4276 * Read a file, but do no processing. This is used to do regexp
4277 * matching on files that have no language defined.
4279 static void
4280 just_read_file (FILE *inf)
4282 while (perhaps_more_input (inf))
4283 readline (&lb, inf);
4287 /* Fortran parsing */
4289 static void F_takeprec (void);
4290 static void F_getit (FILE *);
4292 static void
4293 F_takeprec (void)
4295 dbp = skip_spaces (dbp);
4296 if (*dbp != '*')
4297 return;
4298 dbp++;
4299 dbp = skip_spaces (dbp);
4300 if (strneq (dbp, "(*)", 3))
4302 dbp += 3;
4303 return;
4305 if (!c_isdigit (*dbp))
4307 --dbp; /* force failure */
4308 return;
4311 dbp++;
4312 while (c_isdigit (*dbp));
4315 static void
4316 F_getit (FILE *inf)
4318 register char *cp;
4320 dbp = skip_spaces (dbp);
4321 if (*dbp == '\0')
4323 readline (&lb, inf);
4324 dbp = lb.buffer;
4325 if (dbp[5] != '&')
4326 return;
4327 dbp += 6;
4328 dbp = skip_spaces (dbp);
4330 if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4331 return;
4332 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4333 continue;
4334 make_tag (dbp, cp-dbp, true,
4335 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4339 static void
4340 Fortran_functions (FILE *inf)
4342 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4344 if (*dbp == '%')
4345 dbp++; /* Ratfor escape to fortran */
4346 dbp = skip_spaces (dbp);
4347 if (*dbp == '\0')
4348 continue;
4350 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4351 dbp = skip_spaces (dbp);
4353 if (LOOKING_AT_NOCASE (dbp, "pure"))
4354 dbp = skip_spaces (dbp);
4356 if (LOOKING_AT_NOCASE (dbp, "elemental"))
4357 dbp = skip_spaces (dbp);
4359 switch (c_tolower (*dbp))
4361 case 'i':
4362 if (nocase_tail ("integer"))
4363 F_takeprec ();
4364 break;
4365 case 'r':
4366 if (nocase_tail ("real"))
4367 F_takeprec ();
4368 break;
4369 case 'l':
4370 if (nocase_tail ("logical"))
4371 F_takeprec ();
4372 break;
4373 case 'c':
4374 if (nocase_tail ("complex") || nocase_tail ("character"))
4375 F_takeprec ();
4376 break;
4377 case 'd':
4378 if (nocase_tail ("double"))
4380 dbp = skip_spaces (dbp);
4381 if (*dbp == '\0')
4382 continue;
4383 if (nocase_tail ("precision"))
4384 break;
4385 continue;
4387 break;
4389 dbp = skip_spaces (dbp);
4390 if (*dbp == '\0')
4391 continue;
4392 switch (c_tolower (*dbp))
4394 case 'f':
4395 if (nocase_tail ("function"))
4396 F_getit (inf);
4397 continue;
4398 case 's':
4399 if (nocase_tail ("subroutine"))
4400 F_getit (inf);
4401 continue;
4402 case 'e':
4403 if (nocase_tail ("entry"))
4404 F_getit (inf);
4405 continue;
4406 case 'b':
4407 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4409 dbp = skip_spaces (dbp);
4410 if (*dbp == '\0') /* assume un-named */
4411 make_tag ("blockdata", 9, true,
4412 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4413 else
4414 F_getit (inf); /* look for name */
4416 continue;
4423 * Go language support
4424 * Original code by Xi Lu <lx@shellcodes.org> (2016)
4426 static void
4427 Go_functions(FILE *inf)
4429 char *cp, *name;
4431 LOOP_ON_INPUT_LINES(inf, lb, cp)
4433 cp = skip_spaces (cp);
4435 if (LOOKING_AT (cp, "package"))
4437 name = cp;
4438 while (!notinname (*cp) && *cp != '\0')
4439 cp++;
4440 make_tag (name, cp - name, false, lb.buffer,
4441 cp - lb.buffer + 1, lineno, linecharno);
4443 else if (LOOKING_AT (cp, "func"))
4445 /* Go implementation of interface, such as:
4446 func (n *Integer) Add(m Integer) ...
4447 skip `(n *Integer)` part.
4449 if (*cp == '(')
4451 while (*cp != ')')
4452 cp++;
4453 cp = skip_spaces (cp+1);
4456 if (*cp)
4458 name = cp;
4460 while (!notinname (*cp))
4461 cp++;
4463 make_tag (name, cp - name, true, lb.buffer,
4464 cp - lb.buffer + 1, lineno, linecharno);
4467 else if (members && LOOKING_AT (cp, "type"))
4469 name = cp;
4471 /* Ignore the likes of the following:
4472 type (
4476 if (*cp == '(')
4477 return;
4479 while (!notinname (*cp) && *cp != '\0')
4480 cp++;
4482 make_tag (name, cp - name, false, lb.buffer,
4483 cp - lb.buffer + 1, lineno, linecharno);
4490 * Ada parsing
4491 * Original code by
4492 * Philippe Waroquiers (1998)
4495 /* Once we are positioned after an "interesting" keyword, let's get
4496 the real tag value necessary. */
4497 static void
4498 Ada_getit (FILE *inf, const char *name_qualifier)
4500 register char *cp;
4501 char *name;
4502 char c;
4504 while (perhaps_more_input (inf))
4506 dbp = skip_spaces (dbp);
4507 if (*dbp == '\0'
4508 || (dbp[0] == '-' && dbp[1] == '-'))
4510 readline (&lb, inf);
4511 dbp = lb.buffer;
4513 switch (c_tolower (*dbp))
4515 case 'b':
4516 if (nocase_tail ("body"))
4518 /* Skipping body of procedure body or package body or ....
4519 resetting qualifier to body instead of spec. */
4520 name_qualifier = "/b";
4521 continue;
4523 break;
4524 case 't':
4525 /* Skipping type of task type or protected type ... */
4526 if (nocase_tail ("type"))
4527 continue;
4528 break;
4530 if (*dbp == '"')
4532 dbp += 1;
4533 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4534 continue;
4536 else
4538 dbp = skip_spaces (dbp);
4539 for (cp = dbp;
4540 c_isalnum (*cp) || *cp == '_' || *cp == '.';
4541 cp++)
4542 continue;
4543 if (cp == dbp)
4544 return;
4546 c = *cp;
4547 *cp = '\0';
4548 name = concat (dbp, name_qualifier, "");
4549 *cp = c;
4550 make_tag (name, strlen (name), true,
4551 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4552 free (name);
4553 if (c == '"')
4554 dbp = cp + 1;
4555 return;
4559 static void
4560 Ada_funcs (FILE *inf)
4562 bool inquote = false;
4563 bool skip_till_semicolumn = false;
4565 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4567 while (*dbp != '\0')
4569 /* Skip a string i.e. "abcd". */
4570 if (inquote || (*dbp == '"'))
4572 dbp = strchr (dbp + !inquote, '"');
4573 if (dbp != NULL)
4575 inquote = false;
4576 dbp += 1;
4577 continue; /* advance char */
4579 else
4581 inquote = true;
4582 break; /* advance line */
4586 /* Skip comments. */
4587 if (dbp[0] == '-' && dbp[1] == '-')
4588 break; /* advance line */
4590 /* Skip character enclosed in single quote i.e. 'a'
4591 and skip single quote starting an attribute i.e. 'Image. */
4592 if (*dbp == '\'')
4594 dbp++ ;
4595 if (*dbp != '\0')
4596 dbp++;
4597 continue;
4600 if (skip_till_semicolumn)
4602 if (*dbp == ';')
4603 skip_till_semicolumn = false;
4604 dbp++;
4605 continue; /* advance char */
4608 /* Search for beginning of a token. */
4609 if (!begtoken (*dbp))
4611 dbp++;
4612 continue; /* advance char */
4615 /* We are at the beginning of a token. */
4616 switch (c_tolower (*dbp))
4618 case 'f':
4619 if (!packages_only && nocase_tail ("function"))
4620 Ada_getit (inf, "/f");
4621 else
4622 break; /* from switch */
4623 continue; /* advance char */
4624 case 'p':
4625 if (!packages_only && nocase_tail ("procedure"))
4626 Ada_getit (inf, "/p");
4627 else if (nocase_tail ("package"))
4628 Ada_getit (inf, "/s");
4629 else if (nocase_tail ("protected")) /* protected type */
4630 Ada_getit (inf, "/t");
4631 else
4632 break; /* from switch */
4633 continue; /* advance char */
4635 case 'u':
4636 if (typedefs && !packages_only && nocase_tail ("use"))
4638 /* when tagging types, avoid tagging use type Pack.Typename;
4639 for this, we will skip everything till a ; */
4640 skip_till_semicolumn = true;
4641 continue; /* advance char */
4644 case 't':
4645 if (!packages_only && nocase_tail ("task"))
4646 Ada_getit (inf, "/k");
4647 else if (typedefs && !packages_only && nocase_tail ("type"))
4649 Ada_getit (inf, "/t");
4650 while (*dbp != '\0')
4651 dbp += 1;
4653 else
4654 break; /* from switch */
4655 continue; /* advance char */
4658 /* Look for the end of the token. */
4659 while (!endtoken (*dbp))
4660 dbp++;
4662 } /* advance char */
4663 } /* advance line */
4668 * Unix and microcontroller assembly tag handling
4669 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4670 * Idea by Bob Weiner, Motorola Inc. (1994)
4672 static void
4673 Asm_labels (FILE *inf)
4675 register char *cp;
4677 LOOP_ON_INPUT_LINES (inf, lb, cp)
4679 /* If first char is alphabetic or one of [_.$], test for colon
4680 following identifier. */
4681 if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4683 /* Read past label. */
4684 cp++;
4685 while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4686 cp++;
4687 if (*cp == ':' || c_isspace (*cp))
4688 /* Found end of label, so copy it and add it to the table. */
4689 make_tag (lb.buffer, cp - lb.buffer, true,
4690 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4697 * Perl support
4698 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4699 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4700 * Perl variable names: /^(my|local).../
4701 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4702 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4703 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4705 static void
4706 Perl_functions (FILE *inf)
4708 char *package = savestr ("main"); /* current package name */
4709 register char *cp;
4711 LOOP_ON_INPUT_LINES (inf, lb, cp)
4713 cp = skip_spaces (cp);
4715 if (LOOKING_AT (cp, "package"))
4717 free (package);
4718 get_tag (cp, &package);
4720 else if (LOOKING_AT (cp, "sub"))
4722 char *pos, *sp;
4724 subr:
4725 sp = cp;
4726 while (!notinname (*cp))
4727 cp++;
4728 if (cp == sp)
4729 continue; /* nothing found */
4730 pos = strchr (sp, ':');
4731 if (pos && pos < cp && pos[1] == ':')
4733 /* The name is already qualified. */
4734 if (!class_qualify)
4736 char *q = pos + 2, *qpos;
4737 while ((qpos = strchr (q, ':')) != NULL
4738 && qpos < cp
4739 && qpos[1] == ':')
4740 q = qpos + 2;
4741 sp = q;
4743 make_tag (sp, cp - sp, true,
4744 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4746 else if (class_qualify)
4747 /* Qualify it. */
4749 char savechar, *name;
4751 savechar = *cp;
4752 *cp = '\0';
4753 name = concat (package, "::", sp);
4754 *cp = savechar;
4755 make_tag (name, strlen (name), true,
4756 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4757 free (name);
4759 else
4760 make_tag (sp, cp - sp, true,
4761 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4763 else if (LOOKING_AT (cp, "use constant")
4764 || LOOKING_AT (cp, "use constant::defer"))
4766 /* For hash style multi-constant like
4767 use constant { FOO => 123,
4768 BAR => 456 };
4769 only the first FOO is picked up. Parsing across the value
4770 expressions would be difficult in general, due to possible nested
4771 hashes, here-documents, etc. */
4772 if (*cp == '{')
4773 cp = skip_spaces (cp+1);
4774 goto subr;
4776 else if (globals) /* only if we are tagging global vars */
4778 /* Skip a qualifier, if any. */
4779 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4780 /* After "my" or "local", but before any following paren or space. */
4781 char *varstart = cp;
4783 if (qual /* should this be removed? If yes, how? */
4784 && (*cp == '$' || *cp == '@' || *cp == '%'))
4786 varstart += 1;
4788 cp++;
4789 while (c_isalnum (*cp) || *cp == '_');
4791 else if (qual)
4793 /* Should be examining a variable list at this point;
4794 could insist on seeing an open parenthesis. */
4795 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4796 cp++;
4798 else
4799 continue;
4801 make_tag (varstart, cp - varstart, false,
4802 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4805 free (package);
4810 * Python support
4811 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4812 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4813 * More ideas by seb bacon <seb@jamkit.com> (2002)
4815 static void
4816 Python_functions (FILE *inf)
4818 register char *cp;
4820 LOOP_ON_INPUT_LINES (inf, lb, cp)
4822 cp = skip_spaces (cp);
4823 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4825 char *name = cp;
4826 while (!notinname (*cp) && *cp != ':')
4827 cp++;
4828 make_tag (name, cp - name, true,
4829 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4835 * Ruby support
4836 * Original code by Xi Lu <lx@shellcodes.org> (2015)
4838 static void
4839 Ruby_functions (FILE *inf)
4841 char *cp = NULL;
4842 bool reader = false, writer = false, alias = false, continuation = false;
4844 LOOP_ON_INPUT_LINES (inf, lb, cp)
4846 bool is_class = false;
4847 bool is_method = false;
4848 char *name;
4850 cp = skip_spaces (cp);
4851 if (!continuation
4852 /* Constants. */
4853 && c_isalpha (*cp) && c_isupper (*cp))
4855 char *bp, *colon = NULL;
4857 name = cp;
4859 for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
4861 if (*cp == ':')
4862 colon = cp;
4864 if (cp > name + 1)
4866 bp = skip_spaces (cp);
4867 if (*bp == '=' && !(bp[1] == '=' || bp[1] == '>'))
4869 if (colon && !c_isspace (colon[1]))
4870 name = colon + 1;
4871 make_tag (name, cp - name, false,
4872 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4876 else if (!continuation
4877 /* Modules, classes, methods. */
4878 && ((is_method = LOOKING_AT (cp, "def"))
4879 || (is_class = LOOKING_AT (cp, "class"))
4880 || LOOKING_AT (cp, "module")))
4882 const char self_name[] = "self.";
4883 const size_t self_size1 = sizeof (self_name) - 1;
4885 name = cp;
4887 /* Ruby method names can end in a '='. Also, operator overloading can
4888 define operators whose names include '='. */
4889 while (!notinname (*cp) || *cp == '=')
4890 cp++;
4892 /* Remove "self." from the method name. */
4893 if (cp - name > self_size1
4894 && strneq (name, self_name, self_size1))
4895 name += self_size1;
4897 /* Remove the class/module qualifiers from method names. */
4898 if (is_method)
4900 char *q;
4902 for (q = name; q < cp && *q != '.'; q++)
4904 if (q < cp - 1) /* punt if we see just "FOO." */
4905 name = q + 1;
4908 /* Don't tag singleton classes. */
4909 if (is_class && strneq (name, "<<", 2) && cp == name + 2)
4910 continue;
4912 make_tag (name, cp - name, true,
4913 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4915 else
4917 /* Tag accessors and aliases. */
4919 if (!continuation)
4920 reader = writer = alias = false;
4922 while (*cp && *cp != '#')
4924 if (!continuation)
4926 reader = writer = alias = false;
4927 if (LOOKING_AT (cp, "attr_reader"))
4928 reader = true;
4929 else if (LOOKING_AT (cp, "attr_writer"))
4930 writer = true;
4931 else if (LOOKING_AT (cp, "attr_accessor"))
4933 reader = true;
4934 writer = true;
4936 else if (LOOKING_AT (cp, "alias_method"))
4937 alias = true;
4939 if (reader || writer || alias)
4941 do {
4942 char *np;
4944 cp = skip_spaces (cp);
4945 if (*cp == '(')
4946 cp = skip_spaces (cp + 1);
4947 np = cp;
4948 cp = skip_name (cp);
4949 if (*np != ':')
4950 continue;
4951 np++;
4952 if (reader)
4954 make_tag (np, cp - np, true,
4955 lb.buffer, cp - lb.buffer + 1,
4956 lineno, linecharno);
4957 continuation = false;
4959 if (writer)
4961 size_t name_len = cp - np + 1;
4962 char *wr_name = xnew (name_len + 1, char);
4964 memcpy (wr_name, np, name_len - 1);
4965 memcpy (wr_name + name_len - 1, "=", 2);
4966 pfnote (wr_name, true, lb.buffer, cp - lb.buffer + 1,
4967 lineno, linecharno);
4968 continuation = false;
4970 if (alias)
4972 if (!continuation)
4973 make_tag (np, cp - np, true,
4974 lb.buffer, cp - lb.buffer + 1,
4975 lineno, linecharno);
4976 continuation = false;
4977 while (*cp && *cp != '#' && *cp != ';')
4979 if (*cp == ',')
4980 continuation = true;
4981 else if (!c_isspace (*cp))
4982 continuation = false;
4983 cp++;
4985 if (*cp == ';')
4986 continuation = false;
4988 cp = skip_spaces (cp);
4989 } while ((alias
4990 ? (*cp == ',')
4991 : (continuation = (*cp == ',')))
4992 && (cp = skip_spaces (cp + 1), *cp && *cp != '#'));
4994 if (*cp != '#')
4995 cp = skip_name (cp);
4996 while (*cp && *cp != '#' && notinname (*cp))
4997 cp++;
5005 * PHP support
5006 * Look for:
5007 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
5008 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
5009 * - /^[ \t]*define\(\"[^\"]+/
5010 * Only with --members:
5011 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
5012 * Idea by Diez B. Roggisch (2001)
5014 static void
5015 PHP_functions (FILE *inf)
5017 char *cp, *name;
5018 bool search_identifier = false;
5020 LOOP_ON_INPUT_LINES (inf, lb, cp)
5022 cp = skip_spaces (cp);
5023 name = cp;
5024 if (search_identifier
5025 && *cp != '\0')
5027 while (!notinname (*cp))
5028 cp++;
5029 make_tag (name, cp - name, true,
5030 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5031 search_identifier = false;
5033 else if (LOOKING_AT (cp, "function"))
5035 if (*cp == '&')
5036 cp = skip_spaces (cp+1);
5037 if (*cp != '\0')
5039 name = cp;
5040 while (!notinname (*cp))
5041 cp++;
5042 make_tag (name, cp - name, true,
5043 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5045 else
5046 search_identifier = true;
5048 else if (LOOKING_AT (cp, "class"))
5050 if (*cp != '\0')
5052 name = cp;
5053 while (*cp != '\0' && !c_isspace (*cp))
5054 cp++;
5055 make_tag (name, cp - name, false,
5056 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5058 else
5059 search_identifier = true;
5061 else if (strneq (cp, "define", 6)
5062 && (cp = skip_spaces (cp+6))
5063 && *cp++ == '('
5064 && (*cp == '"' || *cp == '\''))
5066 char quote = *cp++;
5067 name = cp;
5068 while (*cp != quote && *cp != '\0')
5069 cp++;
5070 make_tag (name, cp - name, false,
5071 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5073 else if (members
5074 && LOOKING_AT (cp, "var")
5075 && *cp == '$')
5077 name = cp;
5078 while (!notinname (*cp))
5079 cp++;
5080 make_tag (name, cp - name, false,
5081 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5088 * Cobol tag functions
5089 * We could look for anything that could be a paragraph name.
5090 * i.e. anything that starts in column 8 is one word and ends in a full stop.
5091 * Idea by Corny de Souza (1993)
5093 static void
5094 Cobol_paragraphs (FILE *inf)
5096 register char *bp, *ep;
5098 LOOP_ON_INPUT_LINES (inf, lb, bp)
5100 if (lb.len < 9)
5101 continue;
5102 bp += 8;
5104 /* If eoln, compiler option or comment ignore whole line. */
5105 if (bp[-1] != ' ' || !c_isalnum (bp[0]))
5106 continue;
5108 for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
5109 continue;
5110 if (*ep++ == '.')
5111 make_tag (bp, ep - bp, true,
5112 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5118 * Makefile support
5119 * Ideas by Assar Westerlund <assar@sics.se> (2001)
5121 static void
5122 Makefile_targets (FILE *inf)
5124 register char *bp;
5126 LOOP_ON_INPUT_LINES (inf, lb, bp)
5128 if (*bp == '\t' || *bp == '#')
5129 continue;
5130 while (*bp != '\0' && *bp != '=' && *bp != ':')
5131 bp++;
5132 if (*bp == ':' || (globals && *bp == '='))
5134 /* We should detect if there is more than one tag, but we do not.
5135 We just skip initial and final spaces. */
5136 char * namestart = skip_spaces (lb.buffer);
5137 while (--bp > namestart)
5138 if (!notinname (*bp))
5139 break;
5140 make_tag (namestart, bp - namestart + 1, true,
5141 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
5148 * Pascal parsing
5149 * Original code by Mosur K. Mohan (1989)
5151 * Locates tags for procedures & functions. Doesn't do any type- or
5152 * var-definitions. It does look for the keyword "extern" or
5153 * "forward" immediately following the procedure statement; if found,
5154 * the tag is skipped.
5156 static void
5157 Pascal_functions (FILE *inf)
5159 linebuffer tline; /* mostly copied from C_entries */
5160 long save_lcno;
5161 int save_lineno, namelen, taglen;
5162 char c, *name;
5164 bool /* each of these flags is true if: */
5165 incomment, /* point is inside a comment */
5166 inquote, /* point is inside '..' string */
5167 get_tagname, /* point is after PROCEDURE/FUNCTION
5168 keyword, so next item = potential tag */
5169 found_tag, /* point is after a potential tag */
5170 inparms, /* point is within parameter-list */
5171 verify_tag; /* point has passed the parm-list, so the
5172 next token will determine whether this
5173 is a FORWARD/EXTERN to be ignored, or
5174 whether it is a real tag */
5176 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
5177 name = NULL; /* keep compiler quiet */
5178 dbp = lb.buffer;
5179 *dbp = '\0';
5180 linebuffer_init (&tline);
5182 incomment = inquote = false;
5183 found_tag = false; /* have a proc name; check if extern */
5184 get_tagname = false; /* found "procedure" keyword */
5185 inparms = false; /* found '(' after "proc" */
5186 verify_tag = false; /* check if "extern" is ahead */
5189 while (perhaps_more_input (inf)) /* long main loop to get next char */
5191 c = *dbp++;
5192 if (c == '\0') /* if end of line */
5194 readline (&lb, inf);
5195 dbp = lb.buffer;
5196 if (*dbp == '\0')
5197 continue;
5198 if (!((found_tag && verify_tag)
5199 || get_tagname))
5200 c = *dbp++; /* only if don't need *dbp pointing
5201 to the beginning of the name of
5202 the procedure or function */
5204 if (incomment)
5206 if (c == '}') /* within { } comments */
5207 incomment = false;
5208 else if (c == '*' && *dbp == ')') /* within (* *) comments */
5210 dbp++;
5211 incomment = false;
5213 continue;
5215 else if (inquote)
5217 if (c == '\'')
5218 inquote = false;
5219 continue;
5221 else
5222 switch (c)
5224 case '\'':
5225 inquote = true; /* found first quote */
5226 continue;
5227 case '{': /* found open { comment */
5228 incomment = true;
5229 continue;
5230 case '(':
5231 if (*dbp == '*') /* found open (* comment */
5233 incomment = true;
5234 dbp++;
5236 else if (found_tag) /* found '(' after tag, i.e., parm-list */
5237 inparms = true;
5238 continue;
5239 case ')': /* end of parms list */
5240 if (inparms)
5241 inparms = false;
5242 continue;
5243 case ';':
5244 if (found_tag && !inparms) /* end of proc or fn stmt */
5246 verify_tag = true;
5247 break;
5249 continue;
5251 if (found_tag && verify_tag && (*dbp != ' '))
5253 /* Check if this is an "extern" declaration. */
5254 if (*dbp == '\0')
5255 continue;
5256 if (c_tolower (*dbp) == 'e')
5258 if (nocase_tail ("extern")) /* superfluous, really! */
5260 found_tag = false;
5261 verify_tag = false;
5264 else if (c_tolower (*dbp) == 'f')
5266 if (nocase_tail ("forward")) /* check for forward reference */
5268 found_tag = false;
5269 verify_tag = false;
5272 if (found_tag && verify_tag) /* not external proc, so make tag */
5274 found_tag = false;
5275 verify_tag = false;
5276 make_tag (name, namelen, true,
5277 tline.buffer, taglen, save_lineno, save_lcno);
5278 continue;
5281 if (get_tagname) /* grab name of proc or fn */
5283 char *cp;
5285 if (*dbp == '\0')
5286 continue;
5288 /* Find block name. */
5289 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
5290 continue;
5292 /* Save all values for later tagging. */
5293 linebuffer_setlen (&tline, lb.len);
5294 strcpy (tline.buffer, lb.buffer);
5295 save_lineno = lineno;
5296 save_lcno = linecharno;
5297 name = tline.buffer + (dbp - lb.buffer);
5298 namelen = cp - dbp;
5299 taglen = cp - lb.buffer + 1;
5301 dbp = cp; /* set dbp to e-o-token */
5302 get_tagname = false;
5303 found_tag = true;
5304 continue;
5306 /* And proceed to check for "extern". */
5308 else if (!incomment && !inquote && !found_tag)
5310 /* Check for proc/fn keywords. */
5311 switch (c_tolower (c))
5313 case 'p':
5314 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5315 get_tagname = true;
5316 continue;
5317 case 'f':
5318 if (nocase_tail ("unction"))
5319 get_tagname = true;
5320 continue;
5323 } /* while not eof */
5325 free (tline.buffer);
5330 * Lisp tag functions
5331 * look for (def or (DEF, quote or QUOTE
5334 static void L_getit (void);
5336 static void
5337 L_getit (void)
5339 if (*dbp == '\'') /* Skip prefix quote */
5340 dbp++;
5341 else if (*dbp == '(')
5343 dbp++;
5344 /* Try to skip "(quote " */
5345 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
5346 /* Ok, then skip "(" before name in (defstruct (foo)) */
5347 dbp = skip_spaces (dbp);
5349 get_tag (dbp, NULL);
5352 static void
5353 Lisp_functions (FILE *inf)
5355 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5357 if (dbp[0] != '(')
5358 continue;
5360 /* "(defvar foo)" is a declaration rather than a definition. */
5361 if (! declarations)
5363 char *p = dbp + 1;
5364 if (LOOKING_AT (p, "defvar"))
5366 p = skip_name (p); /* past var name */
5367 p = skip_spaces (p);
5368 if (*p == ')')
5369 continue;
5373 if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
5374 dbp += 3;
5376 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5378 dbp = skip_non_spaces (dbp);
5379 dbp = skip_spaces (dbp);
5380 L_getit ();
5382 else
5384 /* Check for (foo::defmumble name-defined ... */
5386 dbp++;
5387 while (!notinname (*dbp) && *dbp != ':');
5388 if (*dbp == ':')
5391 dbp++;
5392 while (*dbp == ':');
5394 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5396 dbp = skip_non_spaces (dbp);
5397 dbp = skip_spaces (dbp);
5398 L_getit ();
5407 * Lua script language parsing
5408 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5410 * "function" and "local function" are tags if they start at column 1.
5412 static void
5413 Lua_functions (FILE *inf)
5415 register char *bp;
5417 LOOP_ON_INPUT_LINES (inf, lb, bp)
5419 bp = skip_spaces (bp);
5420 if (bp[0] != 'f' && bp[0] != 'l')
5421 continue;
5423 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5425 if (LOOKING_AT (bp, "function"))
5427 char *tag_name, *tp_dot, *tp_colon;
5429 get_tag (bp, &tag_name);
5430 /* If the tag ends with ".foo" or ":foo", make an additional tag for
5431 "foo". */
5432 tp_dot = strrchr (tag_name, '.');
5433 tp_colon = strrchr (tag_name, ':');
5434 if (tp_dot || tp_colon)
5436 char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5437 int len_add = p - tag_name + 1;
5439 get_tag (bp + len_add, NULL);
5447 * PostScript tags
5448 * Just look for lines where the first character is '/'
5449 * Also look at "defineps" for PSWrap
5450 * Ideas by:
5451 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5452 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5454 static void
5455 PS_functions (FILE *inf)
5457 register char *bp, *ep;
5459 LOOP_ON_INPUT_LINES (inf, lb, bp)
5461 if (bp[0] == '/')
5463 for (ep = bp+1;
5464 *ep != '\0' && *ep != ' ' && *ep != '{';
5465 ep++)
5466 continue;
5467 make_tag (bp, ep - bp, true,
5468 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5470 else if (LOOKING_AT (bp, "defineps"))
5471 get_tag (bp, NULL);
5477 * Forth tags
5478 * Ignore anything after \ followed by space or in ( )
5479 * Look for words defined by :
5480 * Look for constant, code, create, defer, value, and variable
5481 * OBP extensions: Look for buffer:, field,
5482 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5484 static void
5485 Forth_words (FILE *inf)
5487 register char *bp;
5489 LOOP_ON_INPUT_LINES (inf, lb, bp)
5490 while ((bp = skip_spaces (bp))[0] != '\0')
5491 if (bp[0] == '\\' && c_isspace (bp[1]))
5492 break; /* read next line */
5493 else if (bp[0] == '(' && c_isspace (bp[1]))
5494 do /* skip to ) or eol */
5495 bp++;
5496 while (*bp != ')' && *bp != '\0');
5497 else if (((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5498 || LOOKING_AT_NOCASE (bp, "constant")
5499 || LOOKING_AT_NOCASE (bp, "2constant")
5500 || LOOKING_AT_NOCASE (bp, "fconstant")
5501 || LOOKING_AT_NOCASE (bp, "code")
5502 || LOOKING_AT_NOCASE (bp, "create")
5503 || LOOKING_AT_NOCASE (bp, "defer")
5504 || LOOKING_AT_NOCASE (bp, "value")
5505 || LOOKING_AT_NOCASE (bp, "2value")
5506 || LOOKING_AT_NOCASE (bp, "fvalue")
5507 || LOOKING_AT_NOCASE (bp, "variable")
5508 || LOOKING_AT_NOCASE (bp, "2variable")
5509 || LOOKING_AT_NOCASE (bp, "fvariable")
5510 || LOOKING_AT_NOCASE (bp, "buffer:")
5511 || LOOKING_AT_NOCASE (bp, "field:")
5512 || LOOKING_AT_NOCASE (bp, "+field")
5513 || LOOKING_AT_NOCASE (bp, "field") /* not standard? */
5514 || LOOKING_AT_NOCASE (bp, "begin-structure")
5515 || LOOKING_AT_NOCASE (bp, "synonym")
5517 && c_isspace (bp[0]))
5519 /* Yay! A definition! */
5520 char* name_start = skip_spaces (bp);
5521 char* name_end = skip_non_spaces (name_start);
5522 if (name_start < name_end)
5523 make_tag (name_start, name_end - name_start,
5524 true, lb.buffer, name_end - lb.buffer,
5525 lineno, linecharno);
5526 bp = name_end;
5528 else
5529 bp = skip_non_spaces (bp);
5534 * Scheme tag functions
5535 * look for (def... xyzzy
5536 * (def... (xyzzy
5537 * (def ... ((...(xyzzy ....
5538 * (set! xyzzy
5539 * Original code by Ken Haase (1985?)
5541 static void
5542 Scheme_functions (FILE *inf)
5544 register char *bp;
5546 LOOP_ON_INPUT_LINES (inf, lb, bp)
5548 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5550 bp = skip_non_spaces (bp+4);
5551 /* Skip over open parens and white space. Don't continue past
5552 '\0'. */
5553 while (*bp && notinname (*bp))
5554 bp++;
5555 get_tag (bp, NULL);
5557 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5558 get_tag (bp, NULL);
5563 /* Find tags in TeX and LaTeX input files. */
5565 /* TEX_toktab is a table of TeX control sequences that define tags.
5566 * Each entry records one such control sequence.
5568 * Original code from who knows whom.
5569 * Ideas by:
5570 * Stefan Monnier (2002)
5573 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5575 /* Default set of control sequences to put into TEX_toktab.
5576 The value of environment var TEXTAGS is prepended to this. */
5577 static const char *TEX_defenv = "\
5578 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5579 :part:appendix:entry:index:def\
5580 :newcommand:renewcommand:newenvironment:renewenvironment";
5582 static void TEX_decode_env (const char *, const char *);
5585 * TeX/LaTeX scanning loop.
5587 static void
5588 TeX_commands (FILE *inf)
5590 char *cp;
5591 linebuffer *key;
5593 char TEX_esc = '\0';
5594 char TEX_opgrp, TEX_clgrp;
5596 /* Initialize token table once from environment. */
5597 if (TEX_toktab == NULL)
5598 TEX_decode_env ("TEXTAGS", TEX_defenv);
5600 LOOP_ON_INPUT_LINES (inf, lb, cp)
5602 /* Look at each TEX keyword in line. */
5603 for (;;)
5605 /* Look for a TEX escape. */
5606 while (true)
5608 char c = *cp++;
5609 if (c == '\0' || c == '%')
5610 goto tex_next_line;
5612 /* Select either \ or ! as escape character, whichever comes
5613 first outside a comment. */
5614 if (!TEX_esc)
5615 switch (c)
5617 case '\\':
5618 TEX_esc = c;
5619 TEX_opgrp = '{';
5620 TEX_clgrp = '}';
5621 break;
5623 case '!':
5624 TEX_esc = c;
5625 TEX_opgrp = '<';
5626 TEX_clgrp = '>';
5627 break;
5630 if (c == TEX_esc)
5631 break;
5634 for (key = TEX_toktab; key->buffer != NULL; key++)
5635 if (strneq (cp, key->buffer, key->len))
5637 char *p;
5638 int namelen, linelen;
5639 bool opgrp = false;
5641 cp = skip_spaces (cp + key->len);
5642 if (*cp == TEX_opgrp)
5644 opgrp = true;
5645 cp++;
5647 for (p = cp;
5648 (!c_isspace (*p) && *p != '#' &&
5649 *p != TEX_opgrp && *p != TEX_clgrp);
5650 p++)
5651 continue;
5652 namelen = p - cp;
5653 linelen = lb.len;
5654 if (!opgrp || *p == TEX_clgrp)
5656 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5657 p++;
5658 linelen = p - lb.buffer + 1;
5660 make_tag (cp, namelen, true,
5661 lb.buffer, linelen, lineno, linecharno);
5662 goto tex_next_line; /* We only tag a line once */
5665 tex_next_line:
5670 /* Read environment and prepend it to the default string.
5671 Build token table. */
5672 static void
5673 TEX_decode_env (const char *evarname, const char *defenv)
5675 register const char *env, *p;
5676 int i, len;
5678 /* Append default string to environment. */
5679 env = getenv (evarname);
5680 if (!env)
5681 env = defenv;
5682 else
5683 env = concat (env, defenv, "");
5685 /* Allocate a token table */
5686 for (len = 1, p = env; (p = strchr (p, ':')); )
5687 if (*++p)
5688 len++;
5689 TEX_toktab = xnew (len, linebuffer);
5691 /* Unpack environment string into token table. Be careful about */
5692 /* zero-length strings (leading ':', "::" and trailing ':') */
5693 for (i = 0; *env != '\0';)
5695 p = strchr (env, ':');
5696 if (!p) /* End of environment string. */
5697 p = env + strlen (env);
5698 if (p - env > 0)
5699 { /* Only non-zero strings. */
5700 TEX_toktab[i].buffer = savenstr (env, p - env);
5701 TEX_toktab[i].len = p - env;
5702 i++;
5704 if (*p)
5705 env = p + 1;
5706 else
5708 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5709 TEX_toktab[i].len = 0;
5710 break;
5716 /* Texinfo support. Dave Love, Mar. 2000. */
5717 static void
5718 Texinfo_nodes (FILE *inf)
5720 char *cp, *start;
5721 LOOP_ON_INPUT_LINES (inf, lb, cp)
5722 if (LOOKING_AT (cp, "@node"))
5724 start = cp;
5725 while (*cp != '\0' && *cp != ',')
5726 cp++;
5727 make_tag (start, cp - start, true,
5728 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5734 * HTML support.
5735 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5736 * Contents of <a name=xxx> are tags with name xxx.
5738 * Francesco Potortì, 2002.
5740 static void
5741 HTML_labels (FILE *inf)
5743 bool getnext = false; /* next text outside of HTML tags is a tag */
5744 bool skiptag = false; /* skip to the end of the current HTML tag */
5745 bool intag = false; /* inside an html tag, looking for ID= */
5746 bool inanchor = false; /* when INTAG, is an anchor, look for NAME= */
5747 char *end;
5750 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5752 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5753 for (;;) /* loop on the same line */
5755 if (skiptag) /* skip HTML tag */
5757 while (*dbp != '\0' && *dbp != '>')
5758 dbp++;
5759 if (*dbp == '>')
5761 dbp += 1;
5762 skiptag = false;
5763 continue; /* look on the same line */
5765 break; /* go to next line */
5768 else if (intag) /* look for "name=" or "id=" */
5770 while (*dbp != '\0' && *dbp != '>'
5771 && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5772 dbp++;
5773 if (*dbp == '\0')
5774 break; /* go to next line */
5775 if (*dbp == '>')
5777 dbp += 1;
5778 intag = false;
5779 continue; /* look on the same line */
5781 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5782 || LOOKING_AT_NOCASE (dbp, "id="))
5784 bool quoted = (dbp[0] == '"');
5786 if (quoted)
5787 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5788 continue;
5789 else
5790 for (end = dbp; *end != '\0' && intoken (*end); end++)
5791 continue;
5792 linebuffer_setlen (&token_name, end - dbp);
5793 memcpy (token_name.buffer, dbp, end - dbp);
5794 token_name.buffer[end - dbp] = '\0';
5796 dbp = end;
5797 intag = false; /* we found what we looked for */
5798 skiptag = true; /* skip to the end of the tag */
5799 getnext = true; /* then grab the text */
5800 continue; /* look on the same line */
5802 dbp += 1;
5805 else if (getnext) /* grab next tokens and tag them */
5807 dbp = skip_spaces (dbp);
5808 if (*dbp == '\0')
5809 break; /* go to next line */
5810 if (*dbp == '<')
5812 intag = true;
5813 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5814 continue; /* look on the same line */
5817 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5818 continue;
5819 make_tag (token_name.buffer, token_name.len, true,
5820 dbp, end - dbp, lineno, linecharno);
5821 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5822 getnext = false;
5823 break; /* go to next line */
5826 else /* look for an interesting HTML tag */
5828 while (*dbp != '\0' && *dbp != '<')
5829 dbp++;
5830 if (*dbp == '\0')
5831 break; /* go to next line */
5832 intag = true;
5833 if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5835 inanchor = true;
5836 continue; /* look on the same line */
5838 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5839 || LOOKING_AT_NOCASE (dbp, "<h1>")
5840 || LOOKING_AT_NOCASE (dbp, "<h2>")
5841 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5843 intag = false;
5844 getnext = true;
5845 continue; /* look on the same line */
5847 dbp += 1;
5854 * Prolog support
5856 * Assumes that the predicate or rule starts at column 0.
5857 * Only the first clause of a predicate or rule is added.
5858 * Original code by Sunichirou Sugou (1989)
5859 * Rewritten by Anders Lindgren (1996)
5861 static size_t prolog_pr (char *, char *);
5862 static void prolog_skip_comment (linebuffer *, FILE *);
5863 static size_t prolog_atom (char *, size_t);
5865 static void
5866 Prolog_functions (FILE *inf)
5868 char *cp, *last;
5869 size_t len;
5870 size_t allocated;
5872 allocated = 0;
5873 len = 0;
5874 last = NULL;
5876 LOOP_ON_INPUT_LINES (inf, lb, cp)
5878 if (cp[0] == '\0') /* Empty line */
5879 continue;
5880 else if (c_isspace (cp[0])) /* Not a predicate */
5881 continue;
5882 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5883 prolog_skip_comment (&lb, inf);
5884 else if ((len = prolog_pr (cp, last)) > 0)
5886 /* Predicate or rule. Store the function name so that we
5887 only generate a tag for the first clause. */
5888 if (last == NULL)
5889 last = xnew (len + 1, char);
5890 else if (len + 1 > allocated)
5891 xrnew (last, len + 1, char);
5892 allocated = len + 1;
5893 memcpy (last, cp, len);
5894 last[len] = '\0';
5897 free (last);
5901 static void
5902 prolog_skip_comment (linebuffer *plb, FILE *inf)
5904 char *cp;
5908 for (cp = plb->buffer; *cp != '\0'; cp++)
5909 if (cp[0] == '*' && cp[1] == '/')
5910 return;
5911 readline (plb, inf);
5913 while (perhaps_more_input (inf));
5917 * A predicate or rule definition is added if it matches:
5918 * <beginning of line><Prolog Atom><whitespace>(
5919 * or <beginning of line><Prolog Atom><whitespace>:-
5921 * It is added to the tags database if it doesn't match the
5922 * name of the previous clause header.
5924 * Return the size of the name of the predicate or rule, or 0 if no
5925 * header was found.
5927 static size_t
5928 prolog_pr (char *s, char *last)
5930 /* Name of last clause. */
5932 size_t pos;
5933 size_t len;
5935 pos = prolog_atom (s, 0);
5936 if (! pos)
5937 return 0;
5939 len = pos;
5940 pos = skip_spaces (s + pos) - s;
5942 if ((s[pos] == '.'
5943 || (s[pos] == '(' && (pos += 1))
5944 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5945 && (last == NULL /* save only the first clause */
5946 || len != strlen (last)
5947 || !strneq (s, last, len)))
5949 make_tag (s, len, true, s, pos, lineno, linecharno);
5950 return len;
5952 else
5953 return 0;
5957 * Consume a Prolog atom.
5958 * Return the number of bytes consumed, or 0 if there was an error.
5960 * A prolog atom, in this context, could be one of:
5961 * - An alphanumeric sequence, starting with a lower case letter.
5962 * - A quoted arbitrary string. Single quotes can escape themselves.
5963 * Backslash quotes everything.
5965 static size_t
5966 prolog_atom (char *s, size_t pos)
5968 size_t origpos;
5970 origpos = pos;
5972 if (c_islower (s[pos]) || s[pos] == '_')
5974 /* The atom is unquoted. */
5975 pos++;
5976 while (c_isalnum (s[pos]) || s[pos] == '_')
5978 pos++;
5980 return pos - origpos;
5982 else if (s[pos] == '\'')
5984 pos++;
5986 for (;;)
5988 if (s[pos] == '\'')
5990 pos++;
5991 if (s[pos] != '\'')
5992 break;
5993 pos++; /* A double quote */
5995 else if (s[pos] == '\0')
5996 /* Multiline quoted atoms are ignored. */
5997 return 0;
5998 else if (s[pos] == '\\')
6000 if (s[pos+1] == '\0')
6001 return 0;
6002 pos += 2;
6004 else
6005 pos++;
6007 return pos - origpos;
6009 else
6010 return 0;
6015 * Support for Erlang
6017 * Generates tags for functions, defines, and records.
6018 * Assumes that Erlang functions start at column 0.
6019 * Original code by Anders Lindgren (1996)
6021 static int erlang_func (char *, char *);
6022 static void erlang_attribute (char *);
6023 static int erlang_atom (char *);
6025 static void
6026 Erlang_functions (FILE *inf)
6028 char *cp, *last;
6029 int len;
6030 int allocated;
6032 allocated = 0;
6033 len = 0;
6034 last = NULL;
6036 LOOP_ON_INPUT_LINES (inf, lb, cp)
6038 if (cp[0] == '\0') /* Empty line */
6039 continue;
6040 else if (c_isspace (cp[0])) /* Not function nor attribute */
6041 continue;
6042 else if (cp[0] == '%') /* comment */
6043 continue;
6044 else if (cp[0] == '"') /* Sometimes, strings start in column one */
6045 continue;
6046 else if (cp[0] == '-') /* attribute, e.g. "-define" */
6048 erlang_attribute (cp);
6049 if (last != NULL)
6051 free (last);
6052 last = NULL;
6055 else if ((len = erlang_func (cp, last)) > 0)
6058 * Function. Store the function name so that we only
6059 * generates a tag for the first clause.
6061 if (last == NULL)
6062 last = xnew (len + 1, char);
6063 else if (len + 1 > allocated)
6064 xrnew (last, len + 1, char);
6065 allocated = len + 1;
6066 memcpy (last, cp, len);
6067 last[len] = '\0';
6070 free (last);
6075 * A function definition is added if it matches:
6076 * <beginning of line><Erlang Atom><whitespace>(
6078 * It is added to the tags database if it doesn't match the
6079 * name of the previous clause header.
6081 * Return the size of the name of the function, or 0 if no function
6082 * was found.
6084 static int
6085 erlang_func (char *s, char *last)
6087 /* Name of last clause. */
6089 int pos;
6090 int len;
6092 pos = erlang_atom (s);
6093 if (pos < 1)
6094 return 0;
6096 len = pos;
6097 pos = skip_spaces (s + pos) - s;
6099 /* Save only the first clause. */
6100 if (s[pos++] == '('
6101 && (last == NULL
6102 || len != (int)strlen (last)
6103 || !strneq (s, last, len)))
6105 make_tag (s, len, true, s, pos, lineno, linecharno);
6106 return len;
6109 return 0;
6114 * Handle attributes. Currently, tags are generated for defines
6115 * and records.
6117 * They are on the form:
6118 * -define(foo, bar).
6119 * -define(Foo(M, N), M+N).
6120 * -record(graph, {vtab = notable, cyclic = true}).
6122 static void
6123 erlang_attribute (char *s)
6125 char *cp = s;
6127 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
6128 && *cp++ == '(')
6130 int len = erlang_atom (skip_spaces (cp));
6131 if (len > 0)
6132 make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
6134 return;
6139 * Consume an Erlang atom (or variable).
6140 * Return the number of bytes consumed, or -1 if there was an error.
6142 static int
6143 erlang_atom (char *s)
6145 int pos = 0;
6147 if (c_isalpha (s[pos]) || s[pos] == '_')
6149 /* The atom is unquoted. */
6151 pos++;
6152 while (c_isalnum (s[pos]) || s[pos] == '_');
6154 else if (s[pos] == '\'')
6156 for (pos++; s[pos] != '\''; pos++)
6157 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
6158 || (s[pos] == '\\' && s[++pos] == '\0'))
6159 return 0;
6160 pos++;
6163 return pos;
6167 static char *scan_separators (char *);
6168 static void add_regex (char *, language *);
6169 static char *substitute (char *, char *, struct re_registers *);
6172 * Take a string like "/blah/" and turn it into "blah", verifying
6173 * that the first and last characters are the same, and handling
6174 * quoted separator characters. Actually, stops on the occurrence of
6175 * an unquoted separator. Also process \t, \n, etc. and turn into
6176 * appropriate characters. Works in place. Null terminates name string.
6177 * Returns pointer to terminating separator, or NULL for
6178 * unterminated regexps.
6180 static char *
6181 scan_separators (char *name)
6183 char sep = name[0];
6184 char *copyto = name;
6185 bool quoted = false;
6187 for (++name; *name != '\0'; ++name)
6189 if (quoted)
6191 switch (*name)
6193 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
6194 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
6195 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
6196 case 'e': *copyto++ = 033; break; /* ESC (delete) */
6197 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
6198 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
6199 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
6200 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
6201 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
6202 default:
6203 if (*name == sep)
6204 *copyto++ = sep;
6205 else
6207 /* Something else is quoted, so preserve the quote. */
6208 *copyto++ = '\\';
6209 *copyto++ = *name;
6211 break;
6213 quoted = false;
6215 else if (*name == '\\')
6216 quoted = true;
6217 else if (*name == sep)
6218 break;
6219 else
6220 *copyto++ = *name;
6222 if (*name != sep)
6223 name = NULL; /* signal unterminated regexp */
6225 /* Terminate copied string. */
6226 *copyto = '\0';
6227 return name;
6230 /* Look at the argument of --regex or --no-regex and do the right
6231 thing. Same for each line of a regexp file. */
6232 static void
6233 analyze_regex (char *regex_arg)
6235 if (regex_arg == NULL)
6237 free_regexps (); /* --no-regex: remove existing regexps */
6238 return;
6241 /* A real --regexp option or a line in a regexp file. */
6242 switch (regex_arg[0])
6244 /* Comments in regexp file or null arg to --regex. */
6245 case '\0':
6246 case ' ':
6247 case '\t':
6248 break;
6250 /* Read a regex file. This is recursive and may result in a
6251 loop, which will stop when the file descriptors are exhausted. */
6252 case '@':
6254 FILE *regexfp;
6255 linebuffer regexbuf;
6256 char *regexfile = regex_arg + 1;
6258 /* regexfile is a file containing regexps, one per line. */
6259 regexfp = fopen (regexfile, "r" FOPEN_BINARY);
6260 if (regexfp == NULL)
6261 pfatal (regexfile);
6262 linebuffer_init (&regexbuf);
6263 while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
6264 analyze_regex (regexbuf.buffer);
6265 free (regexbuf.buffer);
6266 if (fclose (regexfp) != 0)
6267 pfatal (regexfile);
6269 break;
6271 /* Regexp to be used for a specific language only. */
6272 case '{':
6274 language *lang;
6275 char *lang_name = regex_arg + 1;
6276 char *cp;
6278 for (cp = lang_name; *cp != '}'; cp++)
6279 if (*cp == '\0')
6281 error ("unterminated language name in regex: %s", regex_arg);
6282 return;
6284 *cp++ = '\0';
6285 lang = get_language_from_langname (lang_name);
6286 if (lang == NULL)
6287 return;
6288 add_regex (cp, lang);
6290 break;
6292 /* Regexp to be used for any language. */
6293 default:
6294 add_regex (regex_arg, NULL);
6295 break;
6299 /* Separate the regexp pattern, compile it,
6300 and care for optional name and modifiers. */
6301 static void
6302 add_regex (char *regexp_pattern, language *lang)
6304 static struct re_pattern_buffer zeropattern;
6305 char sep, *pat, *name, *modifiers;
6306 char empty = '\0';
6307 const char *err;
6308 struct re_pattern_buffer *patbuf;
6309 regexp *rp;
6310 bool
6311 force_explicit_name = true, /* do not use implicit tag names */
6312 ignore_case = false, /* case is significant */
6313 multi_line = false, /* matches are done one line at a time */
6314 single_line = false; /* dot does not match newline */
6317 if (strlen (regexp_pattern) < 3)
6319 error ("null regexp");
6320 return;
6322 sep = regexp_pattern[0];
6323 name = scan_separators (regexp_pattern);
6324 if (name == NULL)
6326 error ("%s: unterminated regexp", regexp_pattern);
6327 return;
6329 if (name[1] == sep)
6331 error ("null name for regexp \"%s\"", regexp_pattern);
6332 return;
6334 modifiers = scan_separators (name);
6335 if (modifiers == NULL) /* no terminating separator --> no name */
6337 modifiers = name;
6338 name = &empty;
6340 else
6341 modifiers += 1; /* skip separator */
6343 /* Parse regex modifiers. */
6344 for (; modifiers[0] != '\0'; modifiers++)
6345 switch (modifiers[0])
6347 case 'N':
6348 if (modifiers == name)
6349 error ("forcing explicit tag name but no name, ignoring");
6350 force_explicit_name = true;
6351 break;
6352 case 'i':
6353 ignore_case = true;
6354 break;
6355 case 's':
6356 single_line = true;
6357 FALLTHROUGH;
6358 case 'm':
6359 multi_line = true;
6360 need_filebuf = true;
6361 break;
6362 default:
6363 error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
6364 break;
6367 patbuf = xnew (1, struct re_pattern_buffer);
6368 *patbuf = zeropattern;
6369 if (ignore_case)
6371 static char lc_trans[UCHAR_MAX + 1];
6372 int i;
6373 for (i = 0; i < UCHAR_MAX + 1; i++)
6374 lc_trans[i] = c_tolower (i);
6375 patbuf->translate = lc_trans; /* translation table to fold case */
6378 if (multi_line)
6379 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6380 else
6381 pat = regexp_pattern;
6383 if (single_line)
6384 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6385 else
6386 re_set_syntax (RE_SYNTAX_EMACS);
6388 err = re_compile_pattern (pat, strlen (pat), patbuf);
6389 if (multi_line)
6390 free (pat);
6391 if (err != NULL)
6393 error ("%s while compiling pattern", err);
6394 return;
6397 rp = p_head;
6398 p_head = xnew (1, regexp);
6399 p_head->pattern = savestr (regexp_pattern);
6400 p_head->p_next = rp;
6401 p_head->lang = lang;
6402 p_head->pat = patbuf;
6403 p_head->name = savestr (name);
6404 p_head->error_signaled = false;
6405 p_head->force_explicit_name = force_explicit_name;
6406 p_head->ignore_case = ignore_case;
6407 p_head->multi_line = multi_line;
6411 * Do the substitutions indicated by the regular expression and
6412 * arguments.
6414 static char *
6415 substitute (char *in, char *out, struct re_registers *regs)
6417 char *result, *t;
6418 int size, dig, diglen;
6420 result = NULL;
6421 size = strlen (out);
6423 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6424 if (out[size - 1] == '\\')
6425 fatal ("pattern error in \"%s\"", out);
6426 for (t = strchr (out, '\\');
6427 t != NULL;
6428 t = strchr (t + 2, '\\'))
6429 if (c_isdigit (t[1]))
6431 dig = t[1] - '0';
6432 diglen = regs->end[dig] - regs->start[dig];
6433 size += diglen - 2;
6435 else
6436 size -= 1;
6438 /* Allocate space and do the substitutions. */
6439 assert (size >= 0);
6440 result = xnew (size + 1, char);
6442 for (t = result; *out != '\0'; out++)
6443 if (*out == '\\' && c_isdigit (*++out))
6445 dig = *out - '0';
6446 diglen = regs->end[dig] - regs->start[dig];
6447 memcpy (t, in + regs->start[dig], diglen);
6448 t += diglen;
6450 else
6451 *t++ = *out;
6452 *t = '\0';
6454 assert (t <= result + size);
6455 assert (t - result == (int)strlen (result));
6457 return result;
6460 /* Deallocate all regexps. */
6461 static void
6462 free_regexps (void)
6464 regexp *rp;
6465 while (p_head != NULL)
6467 rp = p_head->p_next;
6468 free (p_head->pattern);
6469 free (p_head->name);
6470 free (p_head);
6471 p_head = rp;
6473 return;
6477 * Reads the whole file as a single string from `filebuf' and looks for
6478 * multi-line regular expressions, creating tags on matches.
6479 * readline already dealt with normal regexps.
6481 * Idea by Ben Wing <ben@666.com> (2002).
6483 static void
6484 regex_tag_multiline (void)
6486 char *buffer = filebuf.buffer;
6487 regexp *rp;
6488 char *name;
6490 for (rp = p_head; rp != NULL; rp = rp->p_next)
6492 int match = 0;
6494 if (!rp->multi_line)
6495 continue; /* skip normal regexps */
6497 /* Generic initializations before parsing file from memory. */
6498 lineno = 1; /* reset global line number */
6499 charno = 0; /* reset global char number */
6500 linecharno = 0; /* reset global char number of line start */
6502 /* Only use generic regexps or those for the current language. */
6503 if (rp->lang != NULL && rp->lang != curfdp->lang)
6504 continue;
6506 while (match >= 0 && match < filebuf.len)
6508 match = re_search (rp->pat, buffer, filebuf.len, charno,
6509 filebuf.len - match, &rp->regs);
6510 switch (match)
6512 case -2:
6513 /* Some error. */
6514 if (!rp->error_signaled)
6516 error ("regexp stack overflow while matching \"%s\"",
6517 rp->pattern);
6518 rp->error_signaled = true;
6520 break;
6521 case -1:
6522 /* No match. */
6523 break;
6524 default:
6525 if (match == rp->regs.end[0])
6527 if (!rp->error_signaled)
6529 error ("regexp matches the empty string: \"%s\"",
6530 rp->pattern);
6531 rp->error_signaled = true;
6533 match = -3; /* exit from while loop */
6534 break;
6537 /* Match occurred. Construct a tag. */
6538 while (charno < rp->regs.end[0])
6539 if (buffer[charno++] == '\n')
6540 lineno++, linecharno = charno;
6541 name = rp->name;
6542 if (name[0] == '\0')
6543 name = NULL;
6544 else /* make a named tag */
6545 name = substitute (buffer, rp->name, &rp->regs);
6546 if (rp->force_explicit_name)
6547 /* Force explicit tag name, if a name is there. */
6548 pfnote (name, true, buffer + linecharno,
6549 charno - linecharno + 1, lineno, linecharno);
6550 else
6551 make_tag (name, strlen (name), true, buffer + linecharno,
6552 charno - linecharno + 1, lineno, linecharno);
6553 break;
6560 static bool
6561 nocase_tail (const char *cp)
6563 int len = 0;
6565 while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6566 cp++, len++;
6567 if (*cp == '\0' && !intoken (dbp[len]))
6569 dbp += len;
6570 return true;
6572 return false;
6575 static void
6576 get_tag (register char *bp, char **namepp)
6578 register char *cp = bp;
6580 if (*bp != '\0')
6582 /* Go till you get to white space or a syntactic break */
6583 for (cp = bp + 1; !notinname (*cp); cp++)
6584 continue;
6585 make_tag (bp, cp - bp, true,
6586 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6589 if (namepp != NULL)
6590 *namepp = savenstr (bp, cp - bp);
6594 * Read a line of text from `stream' into `lbp', excluding the
6595 * newline or CR-NL, if any. Return the number of characters read from
6596 * `stream', which is the length of the line including the newline.
6598 * On DOS or Windows we do not count the CR character, if any before the
6599 * NL, in the returned length; this mirrors the behavior of Emacs on those
6600 * platforms (for text files, it translates CR-NL to NL as it reads in the
6601 * file).
6603 * If multi-line regular expressions are requested, each line read is
6604 * appended to `filebuf'.
6606 static long
6607 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6609 char *buffer = lbp->buffer;
6610 char *p = lbp->buffer;
6611 char *pend;
6612 int chars_deleted;
6614 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6616 for (;;)
6618 register int c = getc (stream);
6619 if (p == pend)
6621 /* We're at the end of linebuffer: expand it. */
6622 lbp->size *= 2;
6623 xrnew (buffer, lbp->size, char);
6624 p += buffer - lbp->buffer;
6625 pend = buffer + lbp->size;
6626 lbp->buffer = buffer;
6628 if (c == EOF)
6630 if (ferror (stream))
6631 perror (filename);
6632 *p = '\0';
6633 chars_deleted = 0;
6634 break;
6636 if (c == '\n')
6638 if (p > buffer && p[-1] == '\r')
6640 p -= 1;
6641 chars_deleted = 2;
6643 else
6645 chars_deleted = 1;
6647 *p = '\0';
6648 break;
6650 *p++ = c;
6652 lbp->len = p - buffer;
6654 if (need_filebuf /* we need filebuf for multi-line regexps */
6655 && chars_deleted > 0) /* not at EOF */
6657 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6659 /* Expand filebuf. */
6660 filebuf.size *= 2;
6661 xrnew (filebuf.buffer, filebuf.size, char);
6663 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6664 filebuf.len += lbp->len;
6665 filebuf.buffer[filebuf.len++] = '\n';
6666 filebuf.buffer[filebuf.len] = '\0';
6669 return lbp->len + chars_deleted;
6673 * Like readline_internal, above, but in addition try to match the
6674 * input line against relevant regular expressions and manage #line
6675 * directives.
6677 static void
6678 readline (linebuffer *lbp, FILE *stream)
6680 long result;
6682 linecharno = charno; /* update global char number of line start */
6683 result = readline_internal (lbp, stream, infilename); /* read line */
6684 lineno += 1; /* increment global line number */
6685 charno += result; /* increment global char number */
6687 /* Honor #line directives. */
6688 if (!no_line_directive)
6690 static bool discard_until_line_directive;
6692 /* Check whether this is a #line directive. */
6693 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6695 unsigned int lno;
6696 int start = 0;
6698 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6699 && start > 0) /* double quote character found */
6701 char *endp = lbp->buffer + start;
6703 while ((endp = strchr (endp, '"')) != NULL
6704 && endp[-1] == '\\')
6705 endp++;
6706 if (endp != NULL)
6707 /* Ok, this is a real #line directive. Let's deal with it. */
6709 char *taggedabsname; /* absolute name of original file */
6710 char *taggedfname; /* name of original file as given */
6711 char *name; /* temp var */
6713 discard_until_line_directive = false; /* found it */
6714 name = lbp->buffer + start;
6715 *endp = '\0';
6716 canonicalize_filename (name);
6717 taggedabsname = absolute_filename (name, tagfiledir);
6718 if (filename_is_absolute (name)
6719 || filename_is_absolute (curfdp->infname))
6720 taggedfname = savestr (taggedabsname);
6721 else
6722 taggedfname = relative_filename (taggedabsname,tagfiledir);
6724 if (streq (curfdp->taggedfname, taggedfname))
6725 /* The #line directive is only a line number change. We
6726 deal with this afterwards. */
6727 free (taggedfname);
6728 else
6729 /* The tags following this #line directive should be
6730 attributed to taggedfname. In order to do this, set
6731 curfdp accordingly. */
6733 fdesc *fdp; /* file description pointer */
6735 /* Go look for a file description already set up for the
6736 file indicated in the #line directive. If there is
6737 one, use it from now until the next #line
6738 directive. */
6739 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6740 if (streq (fdp->infname, curfdp->infname)
6741 && streq (fdp->taggedfname, taggedfname))
6742 /* If we remove the second test above (after the &&)
6743 then all entries pertaining to the same file are
6744 coalesced in the tags file. If we use it, then
6745 entries pertaining to the same file but generated
6746 from different files (via #line directives) will
6747 go into separate sections in the tags file. These
6748 alternatives look equivalent. The first one
6749 destroys some apparently useless information. */
6751 curfdp = fdp;
6752 free (taggedfname);
6753 break;
6755 /* Else, if we already tagged the real file, skip all
6756 input lines until the next #line directive. */
6757 if (fdp == NULL) /* not found */
6758 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6759 if (streq (fdp->infabsname, taggedabsname))
6761 discard_until_line_directive = true;
6762 free (taggedfname);
6763 break;
6765 /* Else create a new file description and use that from
6766 now on, until the next #line directive. */
6767 if (fdp == NULL) /* not found */
6769 fdp = fdhead;
6770 fdhead = xnew (1, fdesc);
6771 *fdhead = *curfdp; /* copy curr. file description */
6772 fdhead->next = fdp;
6773 fdhead->infname = savestr (curfdp->infname);
6774 fdhead->infabsname = savestr (curfdp->infabsname);
6775 fdhead->infabsdir = savestr (curfdp->infabsdir);
6776 fdhead->taggedfname = taggedfname;
6777 fdhead->usecharno = false;
6778 fdhead->prop = NULL;
6779 fdhead->written = false;
6780 curfdp = fdhead;
6783 free (taggedabsname);
6784 lineno = lno - 1;
6785 readline (lbp, stream);
6786 return;
6787 } /* if a real #line directive */
6788 } /* if #line is followed by a number */
6789 } /* if line begins with "#line " */
6791 /* If we are here, no #line directive was found. */
6792 if (discard_until_line_directive)
6794 if (result > 0)
6796 /* Do a tail recursion on ourselves, thus discarding the contents
6797 of the line buffer. */
6798 readline (lbp, stream);
6799 return;
6801 /* End of file. */
6802 discard_until_line_directive = false;
6803 return;
6805 } /* if #line directives should be considered */
6808 int match;
6809 regexp *rp;
6810 char *name;
6812 /* Match against relevant regexps. */
6813 if (lbp->len > 0)
6814 for (rp = p_head; rp != NULL; rp = rp->p_next)
6816 /* Only use generic regexps or those for the current language.
6817 Also do not use multiline regexps, which is the job of
6818 regex_tag_multiline. */
6819 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6820 || rp->multi_line)
6821 continue;
6823 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6824 switch (match)
6826 case -2:
6827 /* Some error. */
6828 if (!rp->error_signaled)
6830 error ("regexp stack overflow while matching \"%s\"",
6831 rp->pattern);
6832 rp->error_signaled = true;
6834 break;
6835 case -1:
6836 /* No match. */
6837 break;
6838 case 0:
6839 /* Empty string matched. */
6840 if (!rp->error_signaled)
6842 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6843 rp->error_signaled = true;
6845 break;
6846 default:
6847 /* Match occurred. Construct a tag. */
6848 name = rp->name;
6849 if (name[0] == '\0')
6850 name = NULL;
6851 else /* make a named tag */
6852 name = substitute (lbp->buffer, rp->name, &rp->regs);
6853 if (rp->force_explicit_name)
6854 /* Force explicit tag name, if a name is there. */
6855 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6856 else
6857 make_tag (name, strlen (name), true,
6858 lbp->buffer, match, lineno, linecharno);
6859 break;
6867 * Return a pointer to a space of size strlen(cp)+1 allocated
6868 * with xnew where the string CP has been copied.
6870 static char *
6871 savestr (const char *cp)
6873 return savenstr (cp, strlen (cp));
6877 * Return a pointer to a space of size LEN+1 allocated with xnew where
6878 * the string CP has been copied for at most the first LEN characters.
6880 static char *
6881 savenstr (const char *cp, int len)
6883 char *dp = xnew (len + 1, char);
6884 dp[len] = '\0';
6885 return memcpy (dp, cp, len);
6888 /* Skip spaces (end of string is not space), return new pointer. */
6889 static char *
6890 skip_spaces (char *cp)
6892 while (c_isspace (*cp))
6893 cp++;
6894 return cp;
6897 /* Skip non spaces, except end of string, return new pointer. */
6898 static char *
6899 skip_non_spaces (char *cp)
6901 while (*cp != '\0' && !c_isspace (*cp))
6902 cp++;
6903 return cp;
6906 /* Skip any chars in the "name" class.*/
6907 static char *
6908 skip_name (char *cp)
6910 /* '\0' is a notinname() so loop stops there too */
6911 while (! notinname (*cp))
6912 cp++;
6913 return cp;
6916 /* Print error message and exit. */
6917 static void
6918 fatal (char const *format, ...)
6920 va_list ap;
6921 va_start (ap, format);
6922 verror (format, ap);
6923 va_end (ap);
6924 exit (EXIT_FAILURE);
6927 static void
6928 pfatal (const char *s1)
6930 perror (s1);
6931 exit (EXIT_FAILURE);
6934 static void
6935 suggest_asking_for_help (void)
6937 fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
6938 progname);
6939 exit (EXIT_FAILURE);
6942 /* Output a diagnostic with printf-style FORMAT and args. */
6943 static void
6944 error (const char *format, ...)
6946 va_list ap;
6947 va_start (ap, format);
6948 verror (format, ap);
6949 va_end (ap);
6952 static void
6953 verror (char const *format, va_list ap)
6955 fprintf (stderr, "%s: ", progname);
6956 vfprintf (stderr, format, ap);
6957 fprintf (stderr, "\n");
6960 /* Return a newly-allocated string whose contents
6961 concatenate those of s1, s2, s3. */
6962 static char *
6963 concat (const char *s1, const char *s2, const char *s3)
6965 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6966 char *result = xnew (len1 + len2 + len3 + 1, char);
6968 strcpy (result, s1);
6969 strcpy (result + len1, s2);
6970 strcpy (result + len1 + len2, s3);
6972 return result;
6976 /* Does the same work as the system V getcwd, but does not need to
6977 guess the buffer size in advance. */
6978 static char *
6979 etags_getcwd (void)
6981 int bufsize = 200;
6982 char *path = xnew (bufsize, char);
6984 while (getcwd (path, bufsize) == NULL)
6986 if (errno != ERANGE)
6987 pfatal ("getcwd");
6988 bufsize *= 2;
6989 free (path);
6990 path = xnew (bufsize, char);
6993 canonicalize_filename (path);
6994 return path;
6997 /* Return a newly allocated string containing a name of a temporary file. */
6998 static char *
6999 etags_mktmp (void)
7001 const char *tmpdir = getenv ("TMPDIR");
7002 const char *slash = "/";
7004 #if MSDOS || defined (DOS_NT)
7005 if (!tmpdir)
7006 tmpdir = getenv ("TEMP");
7007 if (!tmpdir)
7008 tmpdir = getenv ("TMP");
7009 if (!tmpdir)
7010 tmpdir = ".";
7011 if (tmpdir[strlen (tmpdir) - 1] == '/'
7012 || tmpdir[strlen (tmpdir) - 1] == '\\')
7013 slash = "";
7014 #else
7015 if (!tmpdir)
7016 tmpdir = "/tmp";
7017 if (tmpdir[strlen (tmpdir) - 1] == '/')
7018 slash = "";
7019 #endif
7021 char *templt = concat (tmpdir, slash, "etXXXXXX");
7022 int fd = mkostemp (templt, O_CLOEXEC);
7023 if (fd < 0 || close (fd) != 0)
7025 int temp_errno = errno;
7026 free (templt);
7027 errno = temp_errno;
7028 templt = NULL;
7031 #if defined (DOS_NT)
7032 /* The file name will be used in shell redirection, so it needs to have
7033 DOS-style backslashes, or else the Windows shell will barf. */
7034 char *p;
7035 for (p = templt; *p; p++)
7036 if (*p == '/')
7037 *p = '\\';
7038 #endif
7040 return templt;
7043 /* Return a newly allocated string containing the file name of FILE
7044 relative to the absolute directory DIR (which should end with a slash). */
7045 static char *
7046 relative_filename (char *file, char *dir)
7048 char *fp, *dp, *afn, *res;
7049 int i;
7051 /* Find the common root of file and dir (with a trailing slash). */
7052 afn = absolute_filename (file, cwd);
7053 fp = afn;
7054 dp = dir;
7055 while (*fp++ == *dp++)
7056 continue;
7057 fp--, dp--; /* back to the first differing char */
7058 #ifdef DOS_NT
7059 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
7060 return afn;
7061 #endif
7062 do /* look at the equal chars until '/' */
7063 fp--, dp--;
7064 while (*fp != '/');
7066 /* Build a sequence of "../" strings for the resulting relative file name. */
7067 i = 0;
7068 while ((dp = strchr (dp + 1, '/')) != NULL)
7069 i += 1;
7070 res = xnew (3*i + strlen (fp + 1) + 1, char);
7071 char *z = res;
7072 while (i-- > 0)
7073 z = stpcpy (z, "../");
7075 /* Add the file name relative to the common root of file and dir. */
7076 strcpy (z, fp + 1);
7077 free (afn);
7079 return res;
7082 /* Return a newly allocated string containing the absolute file name
7083 of FILE given DIR (which should end with a slash). */
7084 static char *
7085 absolute_filename (char *file, char *dir)
7087 char *slashp, *cp, *res;
7089 if (filename_is_absolute (file))
7090 res = savestr (file);
7091 #ifdef DOS_NT
7092 /* We don't support non-absolute file names with a drive
7093 letter, like `d:NAME' (it's too much hassle). */
7094 else if (file[1] == ':')
7095 fatal ("%s: relative file names with drive letters not supported", file);
7096 #endif
7097 else
7098 res = concat (dir, file, "");
7100 /* Delete the "/dirname/.." and "/." substrings. */
7101 slashp = strchr (res, '/');
7102 while (slashp != NULL && slashp[0] != '\0')
7104 if (slashp[1] == '.')
7106 if (slashp[2] == '.'
7107 && (slashp[3] == '/' || slashp[3] == '\0'))
7109 cp = slashp;
7111 cp--;
7112 while (cp >= res && !filename_is_absolute (cp));
7113 if (cp < res)
7114 cp = slashp; /* the absolute name begins with "/.." */
7115 #ifdef DOS_NT
7116 /* Under MSDOS and NT we get `d:/NAME' as absolute
7117 file name, so the luser could say `d:/../NAME'.
7118 We silently treat this as `d:/NAME'. */
7119 else if (cp[0] != '/')
7120 cp = slashp;
7121 #endif
7122 memmove (cp, slashp + 3, strlen (slashp + 2));
7123 slashp = cp;
7124 continue;
7126 else if (slashp[2] == '/' || slashp[2] == '\0')
7128 memmove (slashp, slashp + 2, strlen (slashp + 1));
7129 continue;
7133 slashp = strchr (slashp + 1, '/');
7136 if (res[0] == '\0') /* just a safety net: should never happen */
7138 free (res);
7139 return savestr ("/");
7141 else
7142 return res;
7145 /* Return a newly allocated string containing the absolute
7146 file name of dir where FILE resides given DIR (which should
7147 end with a slash). */
7148 static char *
7149 absolute_dirname (char *file, char *dir)
7151 char *slashp, *res;
7152 char save;
7154 slashp = strrchr (file, '/');
7155 if (slashp == NULL)
7156 return savestr (dir);
7157 save = slashp[1];
7158 slashp[1] = '\0';
7159 res = absolute_filename (file, dir);
7160 slashp[1] = save;
7162 return res;
7165 /* Whether the argument string is an absolute file name. The argument
7166 string must have been canonicalized with canonicalize_filename. */
7167 static bool
7168 filename_is_absolute (char *fn)
7170 return (fn[0] == '/'
7171 #ifdef DOS_NT
7172 || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
7173 #endif
7177 /* Downcase DOS drive letter and collapse separators into single slashes.
7178 Works in place. */
7179 static void
7180 canonicalize_filename (register char *fn)
7182 register char* cp;
7184 #ifdef DOS_NT
7185 /* Canonicalize drive letter case. */
7186 if (c_isupper (fn[0]) && fn[1] == ':')
7187 fn[0] = c_tolower (fn[0]);
7189 /* Collapse multiple forward- and back-slashes into a single forward
7190 slash. */
7191 for (cp = fn; *cp != '\0'; cp++, fn++)
7192 if (*cp == '/' || *cp == '\\')
7194 *fn = '/';
7195 while (cp[1] == '/' || cp[1] == '\\')
7196 cp++;
7198 else
7199 *fn = *cp;
7201 #else /* !DOS_NT */
7203 /* Collapse multiple slashes into a single slash. */
7204 for (cp = fn; *cp != '\0'; cp++, fn++)
7205 if (*cp == '/')
7207 *fn = '/';
7208 while (cp[1] == '/')
7209 cp++;
7211 else
7212 *fn = *cp;
7214 #endif /* !DOS_NT */
7216 *fn = '\0';
7220 /* Initialize a linebuffer for use. */
7221 static void
7222 linebuffer_init (linebuffer *lbp)
7224 lbp->size = (DEBUG) ? 3 : 200;
7225 lbp->buffer = xnew (lbp->size, char);
7226 lbp->buffer[0] = '\0';
7227 lbp->len = 0;
7230 /* Set the minimum size of a string contained in a linebuffer. */
7231 static void
7232 linebuffer_setlen (linebuffer *lbp, int toksize)
7234 while (lbp->size <= toksize)
7236 lbp->size *= 2;
7237 xrnew (lbp->buffer, lbp->size, char);
7239 lbp->len = toksize;
7242 /* Like malloc but get fatal error if memory is exhausted. */
7243 static void *
7244 xmalloc (size_t size)
7246 void *result = malloc (size);
7247 if (result == NULL)
7248 fatal ("virtual memory exhausted");
7249 return result;
7252 static void *
7253 xrealloc (void *ptr, size_t size)
7255 void *result = realloc (ptr, size);
7256 if (result == NULL)
7257 fatal ("virtual memory exhausted");
7258 return result;
7262 * Local Variables:
7263 * indent-tabs-mode: t
7264 * tab-width: 8
7265 * fill-column: 79
7266 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
7267 * c-file-style: "gnu"
7268 * End:
7271 /* etags.c ends here */