Ignore error after kill last file or hunk
[emacs.git] / lib-src / etags.c
blob1b6ac83c9a899e82330ecb54dcb7fcdb1fd92d84
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2017 Free Software
32 Foundation, Inc.
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or (at
39 your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
72 starting in 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
83 #ifdef DEBUG
84 # undef DEBUG
85 # define DEBUG true
86 #else
87 # define DEBUG false
88 # define NDEBUG /* disable assert */
89 #endif
91 #include <config.h>
93 /* WIN32_NATIVE is for XEmacs.
94 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
95 #ifdef WIN32_NATIVE
96 # undef MSDOS
97 # undef WINDOWSNT
98 # define WINDOWSNT
99 #endif /* WIN32_NATIVE */
101 #ifdef MSDOS
102 # undef MSDOS
103 # define MSDOS true
104 # include <sys/param.h>
105 #else
106 # define MSDOS false
107 #endif /* MSDOS */
109 #ifdef WINDOWSNT
110 # include <direct.h>
111 # undef HAVE_NTGUI
112 # undef DOS_NT
113 # define DOS_NT
114 # define O_CLOEXEC O_NOINHERIT
115 #endif /* WINDOWSNT */
117 #include <limits.h>
118 #include <unistd.h>
119 #include <stdarg.h>
120 #include <stdlib.h>
121 #include <string.h>
122 #include <sysstdio.h>
123 #include <errno.h>
124 #include <fcntl.h>
125 #include <binary-io.h>
126 #include <c-ctype.h>
127 #include <c-strcase.h>
129 #include <assert.h>
130 #ifdef NDEBUG
131 # undef assert /* some systems have a buggy assert.h */
132 # define assert(x) ((void) 0)
133 #endif
135 #include <getopt.h>
136 #include <regex.h>
138 /* Define CTAGS to make the program "ctags" compatible with the usual one.
139 Leave it undefined to make the program "etags", which makes emacs-style
140 tag tables and tags typedefs, #defines and struct/union/enum by default. */
141 #ifdef CTAGS
142 # undef CTAGS
143 # define CTAGS true
144 #else
145 # define CTAGS false
146 #endif
148 static bool
149 streq (char const *s, char const *t)
151 return strcmp (s, t) == 0;
154 static bool
155 strcaseeq (char const *s, char const *t)
157 return c_strcasecmp (s, t) == 0;
160 static bool
161 strneq (char const *s, char const *t, size_t n)
163 return strncmp (s, t, n) == 0;
166 static bool
167 strncaseeq (char const *s, char const *t, size_t n)
169 return c_strncasecmp (s, t, n) == 0;
172 /* C is not in a name. */
173 static bool
174 notinname (unsigned char c)
176 /* Look at make_tag before modifying! */
177 static bool const table[UCHAR_MAX + 1] = {
178 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
179 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
181 return table[c];
184 /* C can start a token. */
185 static bool
186 begtoken (unsigned char c)
188 static bool const table[UCHAR_MAX + 1] = {
189 ['$']=1, ['@']=1,
190 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
191 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
192 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
193 ['Y']=1, ['Z']=1,
194 ['_']=1,
195 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
196 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
197 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
198 ['y']=1, ['z']=1,
199 ['~']=1
201 return table[c];
204 /* C can be in the middle of a token. */
205 static bool
206 intoken (unsigned char c)
208 static bool const table[UCHAR_MAX + 1] = {
209 ['$']=1,
210 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
211 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
212 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
213 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
214 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
215 ['Y']=1, ['Z']=1,
216 ['_']=1,
217 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
218 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
219 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
220 ['y']=1, ['z']=1
222 return table[c];
225 /* C can end a token. */
226 static bool
227 endtoken (unsigned char c)
229 static bool const table[UCHAR_MAX + 1] = {
230 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
231 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
232 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
233 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
234 ['{']=1, ['|']=1, ['}']=1, ['~']=1
236 return table[c];
240 * xnew, xrnew -- allocate, reallocate storage
242 * SYNOPSIS: Type *xnew (int n, Type);
243 * void xrnew (OldPointer, int n, Type);
245 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
246 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
248 typedef void Lang_function (FILE *);
250 typedef struct
252 const char *suffix; /* file name suffix for this compressor */
253 const char *command; /* takes one arg and decompresses to stdout */
254 } compressor;
256 typedef struct
258 const char *name; /* language name */
259 const char *help; /* detailed help for the language */
260 Lang_function *function; /* parse function */
261 const char **suffixes; /* name suffixes of this language's files */
262 const char **filenames; /* names of this language's files */
263 const char **interpreters; /* interpreters for this language */
264 bool metasource; /* source used to generate other sources */
265 } language;
267 typedef struct fdesc
269 struct fdesc *next; /* for the linked list */
270 char *infname; /* uncompressed input file name */
271 char *infabsname; /* absolute uncompressed input file name */
272 char *infabsdir; /* absolute dir of input file */
273 char *taggedfname; /* file name to write in tagfile */
274 language *lang; /* language of file */
275 char *prop; /* file properties to write in tagfile */
276 bool usecharno; /* etags tags shall contain char number */
277 bool written; /* entry written in the tags file */
278 } fdesc;
280 typedef struct node_st
281 { /* sorting structure */
282 struct node_st *left, *right; /* left and right sons */
283 fdesc *fdp; /* description of file to whom tag belongs */
284 char *name; /* tag name */
285 char *regex; /* search regexp */
286 bool valid; /* write this tag on the tag file */
287 bool is_func; /* function tag: use regexp in CTAGS mode */
288 bool been_warned; /* warning already given for duplicated tag */
289 int lno; /* line number tag is on */
290 long cno; /* character number line starts on */
291 } node;
294 * A `linebuffer' is a structure which holds a line of text.
295 * `readline_internal' reads a line from a stream into a linebuffer
296 * and works regardless of the length of the line.
297 * SIZE is the size of BUFFER, LEN is the length of the string in
298 * BUFFER after readline reads it.
300 typedef struct
302 long size;
303 int len;
304 char *buffer;
305 } linebuffer;
307 /* Used to support mixing of --lang and file names. */
308 typedef struct
310 enum {
311 at_language, /* a language specification */
312 at_regexp, /* a regular expression */
313 at_filename, /* a file name */
314 at_stdin, /* read from stdin here */
315 at_end /* stop parsing the list */
316 } arg_type; /* argument type */
317 language *lang; /* language associated with the argument */
318 char *what; /* the argument itself */
319 } argument;
321 /* Structure defining a regular expression. */
322 typedef struct regexp
324 struct regexp *p_next; /* pointer to next in list */
325 language *lang; /* if set, use only for this language */
326 char *pattern; /* the regexp pattern */
327 char *name; /* tag name */
328 struct re_pattern_buffer *pat; /* the compiled pattern */
329 struct re_registers regs; /* re registers */
330 bool error_signaled; /* already signaled for this regexp */
331 bool force_explicit_name; /* do not allow implicit tag name */
332 bool ignore_case; /* ignore case when matching */
333 bool multi_line; /* do a multi-line match on the whole file */
334 } regexp;
337 /* Many compilers barf on this:
338 Lang_function Ada_funcs;
339 so let's write it this way */
340 static void Ada_funcs (FILE *);
341 static void Asm_labels (FILE *);
342 static void C_entries (int c_ext, FILE *);
343 static void default_C_entries (FILE *);
344 static void plain_C_entries (FILE *);
345 static void Cjava_entries (FILE *);
346 static void Cobol_paragraphs (FILE *);
347 static void Cplusplus_entries (FILE *);
348 static void Cstar_entries (FILE *);
349 static void Erlang_functions (FILE *);
350 static void Forth_words (FILE *);
351 static void Fortran_functions (FILE *);
352 static void Go_functions (FILE *);
353 static void HTML_labels (FILE *);
354 static void Lisp_functions (FILE *);
355 static void Lua_functions (FILE *);
356 static void Makefile_targets (FILE *);
357 static void Pascal_functions (FILE *);
358 static void Perl_functions (FILE *);
359 static void PHP_functions (FILE *);
360 static void PS_functions (FILE *);
361 static void Prolog_functions (FILE *);
362 static void Python_functions (FILE *);
363 static void Ruby_functions (FILE *);
364 static void Scheme_functions (FILE *);
365 static void TeX_commands (FILE *);
366 static void Texinfo_nodes (FILE *);
367 static void Yacc_entries (FILE *);
368 static void just_read_file (FILE *);
370 static language *get_language_from_langname (const char *);
371 static void readline (linebuffer *, FILE *);
372 static long readline_internal (linebuffer *, FILE *, char const *);
373 static bool nocase_tail (const char *);
374 static void get_tag (char *, char **);
376 static void analyze_regex (char *);
377 static void free_regexps (void);
378 static void regex_tag_multiline (void);
379 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
380 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
381 static _Noreturn void suggest_asking_for_help (void);
382 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
383 static _Noreturn void pfatal (const char *);
384 static void add_node (node *, node **);
386 static void process_file_name (char *, language *);
387 static void process_file (FILE *, char *, language *);
388 static void find_entries (FILE *);
389 static void free_tree (node *);
390 static void free_fdesc (fdesc *);
391 static void pfnote (char *, bool, char *, int, int, long);
392 static void invalidate_nodes (fdesc *, node **);
393 static void put_entries (node *);
395 static char *concat (const char *, const char *, const char *);
396 static char *skip_spaces (char *);
397 static char *skip_non_spaces (char *);
398 static char *skip_name (char *);
399 static char *savenstr (const char *, int);
400 static char *savestr (const char *);
401 static char *etags_getcwd (void);
402 static char *relative_filename (char *, char *);
403 static char *absolute_filename (char *, char *);
404 static char *absolute_dirname (char *, char *);
405 static bool filename_is_absolute (char *f);
406 static void canonicalize_filename (char *);
407 static char *etags_mktmp (void);
408 static void linebuffer_init (linebuffer *);
409 static void linebuffer_setlen (linebuffer *, int);
410 static void *xmalloc (size_t);
411 static void *xrealloc (void *, size_t);
414 static char searchar = '/'; /* use /.../ searches */
416 static char *tagfile; /* output file */
417 static char *progname; /* name this program was invoked with */
418 static char *cwd; /* current working directory */
419 static char *tagfiledir; /* directory of tagfile */
420 static FILE *tagf; /* ioptr for tags file */
421 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
423 static fdesc *fdhead; /* head of file description list */
424 static fdesc *curfdp; /* current file description */
425 static char *infilename; /* current input file name */
426 static int lineno; /* line number of current line */
427 static long charno; /* current character number */
428 static long linecharno; /* charno of start of current line */
429 static char *dbp; /* pointer to start of current tag */
431 static const int invalidcharno = -1;
433 static node *nodehead; /* the head of the binary tree of tags */
434 static node *last_node; /* the last node created */
436 static linebuffer lb; /* the current line */
437 static linebuffer filebuf; /* a buffer containing the whole file */
438 static linebuffer token_name; /* a buffer containing a tag name */
440 static bool append_to_tagfile; /* -a: append to tags */
441 /* The next five default to true in C and derived languages. */
442 static bool typedefs; /* -t: create tags for C and Ada typedefs */
443 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
444 /* 0 struct/enum/union decls, and C++ */
445 /* member functions. */
446 static bool constantypedefs; /* -d: create tags for C #define, enum */
447 /* constants and variables. */
448 /* -D: opposite of -d. Default under ctags. */
449 static int globals; /* create tags for global variables */
450 static int members; /* create tags for C member variables */
451 static int declarations; /* --declarations: tag them and extern in C&Co*/
452 static int no_line_directive; /* ignore #line directives (undocumented) */
453 static int no_duplicates; /* no duplicate tags for ctags (undocumented) */
454 static bool update; /* -u: update tags */
455 static bool vgrind_style; /* -v: create vgrind style index output */
456 static bool no_warnings; /* -w: suppress warnings (undocumented) */
457 static bool cxref_style; /* -x: create cxref style output */
458 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
459 static bool ignoreindent; /* -I: ignore indentation in C */
460 static int packages_only; /* --packages-only: in Ada, only tag packages*/
461 static int class_qualify; /* -Q: produce class-qualified tags in C++/Java */
463 /* STDIN is defined in LynxOS system headers */
464 #ifdef STDIN
465 # undef STDIN
466 #endif
468 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
469 static bool parsing_stdin; /* --parse-stdin used */
471 static regexp *p_head; /* list of all regexps */
472 static bool need_filebuf; /* some regexes are multi-line */
474 static struct option longopts[] =
476 { "append", no_argument, NULL, 'a' },
477 { "packages-only", no_argument, &packages_only, 1 },
478 { "c++", no_argument, NULL, 'C' },
479 { "declarations", no_argument, &declarations, 1 },
480 { "no-line-directive", no_argument, &no_line_directive, 1 },
481 { "no-duplicates", no_argument, &no_duplicates, 1 },
482 { "help", no_argument, NULL, 'h' },
483 { "help", no_argument, NULL, 'H' },
484 { "ignore-indentation", no_argument, NULL, 'I' },
485 { "language", required_argument, NULL, 'l' },
486 { "members", no_argument, &members, 1 },
487 { "no-members", no_argument, &members, 0 },
488 { "output", required_argument, NULL, 'o' },
489 { "class-qualify", no_argument, &class_qualify, 'Q' },
490 { "regex", required_argument, NULL, 'r' },
491 { "no-regex", no_argument, NULL, 'R' },
492 { "ignore-case-regex", required_argument, NULL, 'c' },
493 { "parse-stdin", required_argument, NULL, STDIN },
494 { "version", no_argument, NULL, 'V' },
496 #if CTAGS /* Ctags options */
497 { "backward-search", no_argument, NULL, 'B' },
498 { "cxref", no_argument, NULL, 'x' },
499 { "defines", no_argument, NULL, 'd' },
500 { "globals", no_argument, &globals, 1 },
501 { "typedefs", no_argument, NULL, 't' },
502 { "typedefs-and-c++", no_argument, NULL, 'T' },
503 { "update", no_argument, NULL, 'u' },
504 { "vgrind", no_argument, NULL, 'v' },
505 { "no-warn", no_argument, NULL, 'w' },
507 #else /* Etags options */
508 { "no-defines", no_argument, NULL, 'D' },
509 { "no-globals", no_argument, &globals, 0 },
510 { "include", required_argument, NULL, 'i' },
511 #endif
512 { NULL }
515 static compressor compressors[] =
517 { "z", "gzip -d -c"},
518 { "Z", "gzip -d -c"},
519 { "gz", "gzip -d -c"},
520 { "GZ", "gzip -d -c"},
521 { "bz2", "bzip2 -d -c" },
522 { "xz", "xz -d -c" },
523 { NULL }
527 * Language stuff.
530 /* Ada code */
531 static const char *Ada_suffixes [] =
532 { "ads", "adb", "ada", NULL };
533 static const char Ada_help [] =
534 "In Ada code, functions, procedures, packages, tasks and types are\n\
535 tags. Use the '--packages-only' option to create tags for\n\
536 packages only.\n\
537 Ada tag names have suffixes indicating the type of entity:\n\
538 Entity type: Qualifier:\n\
539 ------------ ----------\n\
540 function /f\n\
541 procedure /p\n\
542 package spec /s\n\
543 package body /b\n\
544 type /t\n\
545 task /k\n\
546 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
547 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
548 will just search for any tag 'bidule'.";
550 /* Assembly code */
551 static const char *Asm_suffixes [] =
552 { "a", /* Unix assembler */
553 "asm", /* Microcontroller assembly */
554 "def", /* BSO/Tasking definition includes */
555 "inc", /* Microcontroller include files */
556 "ins", /* Microcontroller include files */
557 "s", "sa", /* Unix assembler */
558 "S", /* cpp-processed Unix assembler */
559 "src", /* BSO/Tasking C compiler output */
560 NULL
562 static const char Asm_help [] =
563 "In assembler code, labels appearing at the beginning of a line,\n\
564 followed by a colon, are tags.";
567 /* Note that .c and .h can be considered C++, if the --c++ flag was
568 given, or if the `class' or `template' keywords are met inside the file.
569 That is why default_C_entries is called for these. */
570 static const char *default_C_suffixes [] =
571 { "c", "h", NULL };
572 #if CTAGS /* C help for Ctags */
573 static const char default_C_help [] =
574 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
575 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
576 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
577 Use --globals to tag global variables.\n\
578 You can tag function declarations and external variables by\n\
579 using '--declarations', and struct members by using '--members'.";
580 #else /* C help for Etags */
581 static const char default_C_help [] =
582 "In C code, any C function or typedef is a tag, and so are\n\
583 definitions of 'struct', 'union' and 'enum'. '#define' macro\n\
584 definitions and 'enum' constants are tags unless you specify\n\
585 '--no-defines'. Global variables are tags unless you specify\n\
586 '--no-globals' and so are struct members unless you specify\n\
587 '--no-members'. Use of '--no-globals', '--no-defines' and\n\
588 '--no-members' can make the tags table file much smaller.\n\
589 You can tag function declarations and external variables by\n\
590 using '--declarations'.";
591 #endif /* C help for Ctags and Etags */
593 static const char *Cplusplus_suffixes [] =
594 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
595 "M", /* Objective C++ */
596 "pdb", /* PostScript with C syntax */
597 NULL };
598 static const char Cplusplus_help [] =
599 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
600 --help --lang=c --lang=c++ for full help.)\n\
601 In addition to C tags, member functions are also recognized. Member\n\
602 variables are recognized unless you use the '--no-members' option.\n\
603 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
604 and 'CLASS::FUNCTION'. 'operator' definitions have tag names like\n\
605 'operator+'.";
607 static const char *Cjava_suffixes [] =
608 { "java", NULL };
609 static char Cjava_help [] =
610 "In Java code, all the tags constructs of C and C++ code are\n\
611 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
614 static const char *Cobol_suffixes [] =
615 { "COB", "cob", NULL };
616 static char Cobol_help [] =
617 "In Cobol code, tags are paragraph names; that is, any word\n\
618 starting in column 8 and followed by a period.";
620 static const char *Cstar_suffixes [] =
621 { "cs", "hs", NULL };
623 static const char *Erlang_suffixes [] =
624 { "erl", "hrl", NULL };
625 static const char Erlang_help [] =
626 "In Erlang code, the tags are the functions, records and macros\n\
627 defined in the file.";
629 const char *Forth_suffixes [] =
630 { "fth", "tok", NULL };
631 static const char Forth_help [] =
632 "In Forth code, tags are words defined by ':',\n\
633 constant, code, create, defer, value, variable, buffer:, field.";
635 static const char *Fortran_suffixes [] =
636 { "F", "f", "f90", "for", NULL };
637 static const char Fortran_help [] =
638 "In Fortran code, functions, subroutines and block data are tags.";
640 static const char *Go_suffixes [] = {"go", NULL};
641 static const char Go_help [] =
642 "In Go code, functions, interfaces and packages are tags.";
644 static const char *HTML_suffixes [] =
645 { "htm", "html", "shtml", NULL };
646 static const char HTML_help [] =
647 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
648 'h3' headers. Also, tags are 'name=' in anchors and all\n\
649 occurrences of 'id='.";
651 static const char *Lisp_suffixes [] =
652 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
653 static const char Lisp_help [] =
654 "In Lisp code, any function defined with 'defun', any variable\n\
655 defined with 'defvar' or 'defconst', and in general the first\n\
656 argument of any expression that starts with '(def' in column zero\n\
657 is a tag.\n\
658 The '--declarations' option tags \"(defvar foo)\" constructs too.";
660 static const char *Lua_suffixes [] =
661 { "lua", "LUA", NULL };
662 static const char Lua_help [] =
663 "In Lua scripts, all functions are tags.";
665 static const char *Makefile_filenames [] =
666 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
667 static const char Makefile_help [] =
668 "In makefiles, targets are tags; additionally, variables are tags\n\
669 unless you specify '--no-globals'.";
671 static const char *Objc_suffixes [] =
672 { "lm", /* Objective lex file */
673 "m", /* Objective C file */
674 NULL };
675 static const char Objc_help [] =
676 "In Objective C code, tags include Objective C definitions for classes,\n\
677 class categories, methods and protocols. Tags for variables and\n\
678 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\
679 \n(Use --help --lang=c --lang=objc --lang=java for full help.)";
681 static const char *Pascal_suffixes [] =
682 { "p", "pas", NULL };
683 static const char Pascal_help [] =
684 "In Pascal code, the tags are the functions and procedures defined\n\
685 in the file.";
686 /* " // this is for working around an Emacs highlighting bug... */
688 static const char *Perl_suffixes [] =
689 { "pl", "pm", NULL };
690 static const char *Perl_interpreters [] =
691 { "perl", "@PERL@", NULL };
692 static const char Perl_help [] =
693 "In Perl code, the tags are the packages, subroutines and variables\n\
694 defined by the 'package', 'sub', 'my' and 'local' keywords. Use\n\
695 '--globals' if you want to tag global variables. Tags for\n\
696 subroutines are named 'PACKAGE::SUB'. The name for subroutines\n\
697 defined in the default package is 'main::SUB'.";
699 static const char *PHP_suffixes [] =
700 { "php", "php3", "php4", NULL };
701 static const char PHP_help [] =
702 "In PHP code, tags are functions, classes and defines. Unless you use\n\
703 the '--no-members' option, vars are tags too.";
705 static const char *plain_C_suffixes [] =
706 { "pc", /* Pro*C file */
707 NULL };
709 static const char *PS_suffixes [] =
710 { "ps", "psw", NULL }; /* .psw is for PSWrap */
711 static const char PS_help [] =
712 "In PostScript code, the tags are the functions.";
714 static const char *Prolog_suffixes [] =
715 { "prolog", NULL };
716 static const char Prolog_help [] =
717 "In Prolog code, tags are predicates and rules at the beginning of\n\
718 line.";
720 static const char *Python_suffixes [] =
721 { "py", NULL };
722 static const char Python_help [] =
723 "In Python code, 'def' or 'class' at the beginning of a line\n\
724 generate a tag.";
726 static const char *Ruby_suffixes [] =
727 { "rb", "ru", "rbw", NULL };
728 static const char *Ruby_filenames [] =
729 { "Rakefile", "Thorfile", NULL };
730 static const char Ruby_help [] =
731 "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
732 a line generate a tag. Constants also generate a tag.";
734 /* Can't do the `SCM' or `scm' prefix with a version number. */
735 static const char *Scheme_suffixes [] =
736 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
737 static const char Scheme_help [] =
738 "In Scheme code, tags include anything defined with 'def' or with a\n\
739 construct whose name starts with 'def'. They also include\n\
740 variables set with 'set!' at top level in the file.";
742 static const char *TeX_suffixes [] =
743 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
744 static const char TeX_help [] =
745 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
746 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
747 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
748 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
749 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
751 Other commands can be specified by setting the environment variable\n\
752 'TEXTAGS' to a colon-separated list like, for example,\n\
753 TEXTAGS=\"mycommand:myothercommand\".";
756 static const char *Texinfo_suffixes [] =
757 { "texi", "texinfo", "txi", NULL };
758 static const char Texinfo_help [] =
759 "for texinfo files, lines starting with @node are tagged.";
761 static const char *Yacc_suffixes [] =
762 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
763 static const char Yacc_help [] =
764 "In Bison or Yacc input files, each rule defines as a tag the\n\
765 nonterminal it constructs. The portions of the file that contain\n\
766 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
767 for full help).";
769 static const char auto_help [] =
770 "'auto' is not a real language, it indicates to use\n\
771 a default language for files base on file name suffix and file contents.";
773 static const char none_help [] =
774 "'none' is not a real language, it indicates to only do\n\
775 regexp processing on files.";
777 static const char no_lang_help [] =
778 "No detailed help available for this language.";
782 * Table of languages.
784 * It is ok for a given function to be listed under more than one
785 * name. I just didn't.
788 static language lang_names [] =
790 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
791 { "asm", Asm_help, Asm_labels, Asm_suffixes },
792 { "c", default_C_help, default_C_entries, default_C_suffixes },
793 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
794 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
795 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
796 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
797 { "forth", Forth_help, Forth_words, Forth_suffixes },
798 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
799 { "go", Go_help, Go_functions, Go_suffixes },
800 { "html", HTML_help, HTML_labels, HTML_suffixes },
801 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
802 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
803 { "lua", Lua_help, Lua_functions, Lua_suffixes },
804 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
805 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
806 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
807 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
808 { "php", PHP_help, PHP_functions, PHP_suffixes },
809 { "postscript",PS_help, PS_functions, PS_suffixes },
810 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
811 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
812 { "python", Python_help, Python_functions, Python_suffixes },
813 { "ruby", Ruby_help,Ruby_functions,Ruby_suffixes,Ruby_filenames },
814 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
815 { "tex", TeX_help, TeX_commands, TeX_suffixes },
816 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
817 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
818 { "auto", auto_help }, /* default guessing scheme */
819 { "none", none_help, just_read_file }, /* regexp matching only */
820 { NULL } /* end of list */
824 static void
825 print_language_names (void)
827 language *lang;
828 const char **name, **ext;
830 puts ("\nThese are the currently supported languages, along with the\n\
831 default file names and dot suffixes:");
832 for (lang = lang_names; lang->name != NULL; lang++)
834 printf (" %-*s", 10, lang->name);
835 if (lang->filenames != NULL)
836 for (name = lang->filenames; *name != NULL; name++)
837 printf (" %s", *name);
838 if (lang->suffixes != NULL)
839 for (ext = lang->suffixes; *ext != NULL; ext++)
840 printf (" .%s", *ext);
841 puts ("");
843 puts ("where 'auto' means use default language for files based on file\n\
844 name suffix, and 'none' means only do regexp processing on files.\n\
845 If no language is specified and no matching suffix is found,\n\
846 the first line of the file is read for a sharp-bang (#!) sequence\n\
847 followed by the name of an interpreter. If no such sequence is found,\n\
848 Fortran is tried first; if no tags are found, C is tried next.\n\
849 When parsing any C file, a \"class\" or \"template\" keyword\n\
850 switches to C++.");
851 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
853 For detailed help on a given language use, for example,\n\
854 etags --help --lang=ada.");
857 #ifndef EMACS_NAME
858 # define EMACS_NAME "standalone"
859 #endif
860 #ifndef VERSION
861 # define VERSION "17.38.1.4"
862 #endif
863 static _Noreturn void
864 print_version (void)
866 char emacs_copyright[] = COPYRIGHT;
868 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
869 puts (emacs_copyright);
870 puts ("This program is distributed under the terms in ETAGS.README");
872 exit (EXIT_SUCCESS);
875 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
876 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
877 #endif
879 static _Noreturn void
880 print_help (argument *argbuffer)
882 bool help_for_lang = false;
884 for (; argbuffer->arg_type != at_end; argbuffer++)
885 if (argbuffer->arg_type == at_language)
887 if (help_for_lang)
888 puts ("");
889 puts (argbuffer->lang->help);
890 help_for_lang = true;
893 if (help_for_lang)
894 exit (EXIT_SUCCESS);
896 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
898 These are the options accepted by %s.\n", progname, progname);
899 puts ("You may use unambiguous abbreviations for the long option names.");
900 puts (" A - as file name means read names from stdin (one per line).\n\
901 Absolute names are stored in the output file as they are.\n\
902 Relative ones are stored relative to the output file's directory.\n");
904 puts ("-a, --append\n\
905 Append tag entries to existing tags file.");
907 puts ("--packages-only\n\
908 For Ada files, only generate tags for packages.");
910 if (CTAGS)
911 puts ("-B, --backward-search\n\
912 Write the search commands for the tag entries using '?', the\n\
913 backward-search command instead of '/', the forward-search command.");
915 /* This option is mostly obsolete, because etags can now automatically
916 detect C++. Retained for backward compatibility and for debugging and
917 experimentation. In principle, we could want to tag as C++ even
918 before any "class" or "template" keyword.
919 puts ("-C, --c++\n\
920 Treat files whose name suffix defaults to C language as C++ files.");
923 puts ("--declarations\n\
924 In C and derived languages, create tags for function declarations,");
925 if (CTAGS)
926 puts ("\tand create tags for extern variables if --globals is used.");
927 else
928 puts
929 ("\tand create tags for extern variables unless --no-globals is used.");
931 if (CTAGS)
932 puts ("-d, --defines\n\
933 Create tag entries for C #define constants and enum constants, too.");
934 else
935 puts ("-D, --no-defines\n\
936 Don't create tag entries for C #define constants and enum constants.\n\
937 This makes the tags file smaller.");
939 if (!CTAGS)
940 puts ("-i FILE, --include=FILE\n\
941 Include a note in tag file indicating that, when searching for\n\
942 a tag, one should also consult the tags file FILE after\n\
943 checking the current file.");
945 puts ("-l LANG, --language=LANG\n\
946 Force the following files to be considered as written in the\n\
947 named language up to the next --language=LANG option.");
949 if (CTAGS)
950 puts ("--globals\n\
951 Create tag entries for global variables in some languages.");
952 else
953 puts ("--no-globals\n\
954 Do not create tag entries for global variables in some\n\
955 languages. This makes the tags file smaller.");
957 puts ("--no-line-directive\n\
958 Ignore #line preprocessor directives in C and derived languages.");
960 if (CTAGS)
961 puts ("--members\n\
962 Create tag entries for members of structures in some languages.");
963 else
964 puts ("--no-members\n\
965 Do not create tag entries for members of structures\n\
966 in some languages.");
968 puts ("-Q, --class-qualify\n\
969 Qualify tag names with their class name in C++, ObjC, Java, and Perl.\n\
970 This produces tag names of the form \"class::member\" for C++,\n\
971 \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
972 For Objective C, this also produces class methods qualified with\n\
973 their arguments, as in \"foo:bar:baz:more\".\n\
974 For Perl, this produces \"package::member\".");
975 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
976 Make a tag for each line matching a regular expression pattern\n\
977 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
978 files only. REGEXFILE is a file containing one REGEXP per line.\n\
979 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
980 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
981 puts (" If TAGNAME/ is present, the tags created are named.\n\
982 For example Tcl named tags can be created with:\n\
983 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
984 MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
985 'm' means to allow multi-line matches, 's' implies 'm' and\n\
986 causes dot to match any character, including newline.");
988 puts ("-R, --no-regex\n\
989 Don't create tags from regexps for the following files.");
991 puts ("-I, --ignore-indentation\n\
992 In C and C++ do not assume that a closing brace in the first\n\
993 column is the final brace of a function or structure definition.");
995 puts ("-o FILE, --output=FILE\n\
996 Write the tags to FILE.");
998 puts ("--parse-stdin=NAME\n\
999 Read from standard input and record tags as belonging to file NAME.");
1001 if (CTAGS)
1003 puts ("-t, --typedefs\n\
1004 Generate tag entries for C and Ada typedefs.");
1005 puts ("-T, --typedefs-and-c++\n\
1006 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1007 and C++ member functions.");
1010 if (CTAGS)
1011 puts ("-u, --update\n\
1012 Update the tag entries for the given files, leaving tag\n\
1013 entries for other files in place. Currently, this is\n\
1014 implemented by deleting the existing entries for the given\n\
1015 files and then rewriting the new entries at the end of the\n\
1016 tags file. It is often faster to simply rebuild the entire\n\
1017 tag file than to use this.");
1019 if (CTAGS)
1021 puts ("-v, --vgrind\n\
1022 Print on the standard output an index of items intended for\n\
1023 human consumption, similar to the output of vgrind. The index\n\
1024 is sorted, and gives the page number of each item.");
1026 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1027 puts ("-w, --no-duplicates\n\
1028 Do not create duplicate tag entries, for compatibility with\n\
1029 traditional ctags.");
1031 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1032 puts ("-w, --no-warn\n\
1033 Suppress warning messages about duplicate tag entries.");
1035 puts ("-x, --cxref\n\
1036 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1037 The output uses line numbers instead of page numbers, but\n\
1038 beyond that the differences are cosmetic; try both to see\n\
1039 which you like.");
1042 puts ("-V, --version\n\
1043 Print the version of the program.\n\
1044 -h, --help\n\
1045 Print this help message.\n\
1046 Followed by one or more '--language' options prints detailed\n\
1047 help about tag generation for the specified languages.");
1049 print_language_names ();
1051 puts ("");
1052 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1054 exit (EXIT_SUCCESS);
1059 main (int argc, char **argv)
1061 int i;
1062 unsigned int nincluded_files;
1063 char **included_files;
1064 argument *argbuffer;
1065 int current_arg, file_count;
1066 linebuffer filename_lb;
1067 bool help_asked = false;
1068 ptrdiff_t len;
1069 char *optstring;
1070 int opt;
1072 progname = argv[0];
1073 nincluded_files = 0;
1074 included_files = xnew (argc, char *);
1075 current_arg = 0;
1076 file_count = 0;
1078 /* Allocate enough no matter what happens. Overkill, but each one
1079 is small. */
1080 argbuffer = xnew (argc, argument);
1083 * Always find typedefs and structure tags.
1084 * Also default to find macro constants, enum constants, struct
1085 * members and global variables. Do it for both etags and ctags.
1087 typedefs = typedefs_or_cplusplus = constantypedefs = true;
1088 globals = members = true;
1090 /* When the optstring begins with a '-' getopt_long does not rearrange the
1091 non-options arguments to be at the end, but leaves them alone. */
1092 optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1093 (CTAGS) ? "BxdtTuvw" : "Di:",
1094 "");
1096 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1097 switch (opt)
1099 case 0:
1100 /* If getopt returns 0, then it has already processed a
1101 long-named option. We should do nothing. */
1102 break;
1104 case 1:
1105 /* This means that a file name has been seen. Record it. */
1106 argbuffer[current_arg].arg_type = at_filename;
1107 argbuffer[current_arg].what = optarg;
1108 len = strlen (optarg);
1109 if (whatlen_max < len)
1110 whatlen_max = len;
1111 ++current_arg;
1112 ++file_count;
1113 break;
1115 case STDIN:
1116 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1117 argbuffer[current_arg].arg_type = at_stdin;
1118 argbuffer[current_arg].what = optarg;
1119 len = strlen (optarg);
1120 if (whatlen_max < len)
1121 whatlen_max = len;
1122 ++current_arg;
1123 ++file_count;
1124 if (parsing_stdin)
1125 fatal ("cannot parse standard input more than once");
1126 parsing_stdin = true;
1127 break;
1129 /* Common options. */
1130 case 'a': append_to_tagfile = true; break;
1131 case 'C': cplusplus = true; break;
1132 case 'f': /* for compatibility with old makefiles */
1133 case 'o':
1134 if (tagfile)
1136 error ("-o option may only be given once.");
1137 suggest_asking_for_help ();
1138 /* NOTREACHED */
1140 tagfile = optarg;
1141 break;
1142 case 'I':
1143 case 'S': /* for backward compatibility */
1144 ignoreindent = true;
1145 break;
1146 case 'l':
1148 language *lang = get_language_from_langname (optarg);
1149 if (lang != NULL)
1151 argbuffer[current_arg].lang = lang;
1152 argbuffer[current_arg].arg_type = at_language;
1153 ++current_arg;
1156 break;
1157 case 'c':
1158 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1159 optarg = concat (optarg, "i", ""); /* memory leak here */
1160 /* FALLTHRU */
1161 case 'r':
1162 argbuffer[current_arg].arg_type = at_regexp;
1163 argbuffer[current_arg].what = optarg;
1164 len = strlen (optarg);
1165 if (whatlen_max < len)
1166 whatlen_max = len;
1167 ++current_arg;
1168 break;
1169 case 'R':
1170 argbuffer[current_arg].arg_type = at_regexp;
1171 argbuffer[current_arg].what = NULL;
1172 ++current_arg;
1173 break;
1174 case 'V':
1175 print_version ();
1176 break;
1177 case 'h':
1178 case 'H':
1179 help_asked = true;
1180 break;
1181 case 'Q':
1182 class_qualify = 1;
1183 break;
1185 /* Etags options */
1186 case 'D': constantypedefs = false; break;
1187 case 'i': included_files[nincluded_files++] = optarg; break;
1189 /* Ctags options. */
1190 case 'B': searchar = '?'; break;
1191 case 'd': constantypedefs = true; break;
1192 case 't': typedefs = true; break;
1193 case 'T': typedefs = typedefs_or_cplusplus = true; break;
1194 case 'u': update = true; break;
1195 case 'v': vgrind_style = true; /*FALLTHRU*/
1196 case 'x': cxref_style = true; break;
1197 case 'w': no_warnings = true; break;
1198 default:
1199 suggest_asking_for_help ();
1200 /* NOTREACHED */
1203 /* No more options. Store the rest of arguments. */
1204 for (; optind < argc; optind++)
1206 argbuffer[current_arg].arg_type = at_filename;
1207 argbuffer[current_arg].what = argv[optind];
1208 len = strlen (argv[optind]);
1209 if (whatlen_max < len)
1210 whatlen_max = len;
1211 ++current_arg;
1212 ++file_count;
1215 argbuffer[current_arg].arg_type = at_end;
1217 if (help_asked)
1218 print_help (argbuffer);
1219 /* NOTREACHED */
1221 if (nincluded_files == 0 && file_count == 0)
1223 error ("no input files specified.");
1224 suggest_asking_for_help ();
1225 /* NOTREACHED */
1228 if (tagfile == NULL)
1229 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1230 cwd = etags_getcwd (); /* the current working directory */
1231 if (cwd[strlen (cwd) - 1] != '/')
1233 char *oldcwd = cwd;
1234 cwd = concat (oldcwd, "/", "");
1235 free (oldcwd);
1238 /* Compute base directory for relative file names. */
1239 if (streq (tagfile, "-")
1240 || strneq (tagfile, "/dev/", 5))
1241 tagfiledir = cwd; /* relative file names are relative to cwd */
1242 else
1244 canonicalize_filename (tagfile);
1245 tagfiledir = absolute_dirname (tagfile, cwd);
1248 linebuffer_init (&lb);
1249 linebuffer_init (&filename_lb);
1250 linebuffer_init (&filebuf);
1251 linebuffer_init (&token_name);
1253 if (!CTAGS)
1255 if (streq (tagfile, "-"))
1257 tagf = stdout;
1258 SET_BINARY (fileno (stdout));
1260 else
1261 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1262 if (tagf == NULL)
1263 pfatal (tagfile);
1267 * Loop through files finding functions.
1269 for (i = 0; i < current_arg; i++)
1271 static language *lang; /* non-NULL if language is forced */
1272 char *this_file;
1274 switch (argbuffer[i].arg_type)
1276 case at_language:
1277 lang = argbuffer[i].lang;
1278 break;
1279 case at_regexp:
1280 analyze_regex (argbuffer[i].what);
1281 break;
1282 case at_filename:
1283 this_file = argbuffer[i].what;
1284 /* Input file named "-" means read file names from stdin
1285 (one per line) and use them. */
1286 if (streq (this_file, "-"))
1288 if (parsing_stdin)
1289 fatal ("cannot parse standard input "
1290 "AND read file names from it");
1291 while (readline_internal (&filename_lb, stdin, "-") > 0)
1292 process_file_name (filename_lb.buffer, lang);
1294 else
1295 process_file_name (this_file, lang);
1296 break;
1297 case at_stdin:
1298 this_file = argbuffer[i].what;
1299 process_file (stdin, this_file, lang);
1300 break;
1301 default:
1302 error ("internal error: arg_type");
1306 free_regexps ();
1307 free (lb.buffer);
1308 free (filebuf.buffer);
1309 free (token_name.buffer);
1311 if (!CTAGS || cxref_style)
1313 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1314 put_entries (nodehead);
1315 free_tree (nodehead);
1316 nodehead = NULL;
1317 if (!CTAGS)
1319 fdesc *fdp;
1321 /* Output file entries that have no tags. */
1322 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1323 if (!fdp->written)
1324 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1326 while (nincluded_files-- > 0)
1327 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1329 if (fclose (tagf) == EOF)
1330 pfatal (tagfile);
1333 return EXIT_SUCCESS;
1336 /* From here on, we are in (CTAGS && !cxref_style) */
1337 if (update)
1339 char *cmd =
1340 xmalloc (strlen (tagfile) + whatlen_max +
1341 sizeof "mv..OTAGS;grep -Fv '\t\t' OTAGS >;rm OTAGS");
1342 for (i = 0; i < current_arg; ++i)
1344 switch (argbuffer[i].arg_type)
1346 case at_filename:
1347 case at_stdin:
1348 break;
1349 default:
1350 continue; /* the for loop */
1352 char *z = stpcpy (cmd, "mv ");
1353 z = stpcpy (z, tagfile);
1354 z = stpcpy (z, " OTAGS;grep -Fv '\t");
1355 z = stpcpy (z, argbuffer[i].what);
1356 z = stpcpy (z, "\t' OTAGS >");
1357 z = stpcpy (z, tagfile);
1358 strcpy (z, ";rm OTAGS");
1359 if (system (cmd) != EXIT_SUCCESS)
1360 fatal ("failed to execute shell command");
1362 free (cmd);
1363 append_to_tagfile = true;
1366 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1367 if (tagf == NULL)
1368 pfatal (tagfile);
1369 put_entries (nodehead); /* write all the tags (CTAGS) */
1370 free_tree (nodehead);
1371 nodehead = NULL;
1372 if (fclose (tagf) == EOF)
1373 pfatal (tagfile);
1375 if (CTAGS)
1376 if (append_to_tagfile || update)
1378 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1379 /* Maybe these should be used:
1380 setenv ("LC_COLLATE", "C", 1);
1381 setenv ("LC_ALL", "C", 1); */
1382 char *z = stpcpy (cmd, "sort -u -o ");
1383 z = stpcpy (z, tagfile);
1384 *z++ = ' ';
1385 strcpy (z, tagfile);
1386 return system (cmd);
1388 return EXIT_SUCCESS;
1393 * Return a compressor given the file name. If EXTPTR is non-zero,
1394 * return a pointer into FILE where the compressor-specific
1395 * extension begins. If no compressor is found, NULL is returned
1396 * and EXTPTR is not significant.
1397 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1399 static compressor *
1400 get_compressor_from_suffix (char *file, char **extptr)
1402 compressor *compr;
1403 char *slash, *suffix;
1405 /* File has been processed by canonicalize_filename,
1406 so we don't need to consider backslashes on DOS_NT. */
1407 slash = strrchr (file, '/');
1408 suffix = strrchr (file, '.');
1409 if (suffix == NULL || suffix < slash)
1410 return NULL;
1411 if (extptr != NULL)
1412 *extptr = suffix;
1413 suffix += 1;
1414 /* Let those poor souls who live with DOS 8+3 file name limits get
1415 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1416 Only the first do loop is run if not MSDOS */
1419 for (compr = compressors; compr->suffix != NULL; compr++)
1420 if (streq (compr->suffix, suffix))
1421 return compr;
1422 if (!MSDOS)
1423 break; /* do it only once: not really a loop */
1424 if (extptr != NULL)
1425 *extptr = ++suffix;
1426 } while (*suffix != '\0');
1427 return NULL;
1433 * Return a language given the name.
1435 static language *
1436 get_language_from_langname (const char *name)
1438 language *lang;
1440 if (name == NULL)
1441 error ("empty language name");
1442 else
1444 for (lang = lang_names; lang->name != NULL; lang++)
1445 if (streq (name, lang->name))
1446 return lang;
1447 error ("unknown language \"%s\"", name);
1450 return NULL;
1455 * Return a language given the interpreter name.
1457 static language *
1458 get_language_from_interpreter (char *interpreter)
1460 language *lang;
1461 const char **iname;
1463 if (interpreter == NULL)
1464 return NULL;
1465 for (lang = lang_names; lang->name != NULL; lang++)
1466 if (lang->interpreters != NULL)
1467 for (iname = lang->interpreters; *iname != NULL; iname++)
1468 if (streq (*iname, interpreter))
1469 return lang;
1471 return NULL;
1477 * Return a language given the file name.
1479 static language *
1480 get_language_from_filename (char *file, int case_sensitive)
1482 language *lang;
1483 const char **name, **ext, *suffix;
1484 char *slash;
1486 /* Try whole file name first. */
1487 slash = strrchr (file, '/');
1488 if (slash != NULL)
1489 file = slash + 1;
1490 #ifdef DOS_NT
1491 else if (file[0] && file[1] == ':')
1492 file += 2;
1493 #endif
1494 for (lang = lang_names; lang->name != NULL; lang++)
1495 if (lang->filenames != NULL)
1496 for (name = lang->filenames; *name != NULL; name++)
1497 if ((case_sensitive)
1498 ? streq (*name, file)
1499 : strcaseeq (*name, file))
1500 return lang;
1502 /* If not found, try suffix after last dot. */
1503 suffix = strrchr (file, '.');
1504 if (suffix == NULL)
1505 return NULL;
1506 suffix += 1;
1507 for (lang = lang_names; lang->name != NULL; lang++)
1508 if (lang->suffixes != NULL)
1509 for (ext = lang->suffixes; *ext != NULL; ext++)
1510 if ((case_sensitive)
1511 ? streq (*ext, suffix)
1512 : strcaseeq (*ext, suffix))
1513 return lang;
1514 return NULL;
1519 * This routine is called on each file argument.
1521 static void
1522 process_file_name (char *file, language *lang)
1524 FILE *inf;
1525 fdesc *fdp;
1526 compressor *compr;
1527 char *compressed_name, *uncompressed_name;
1528 char *ext, *real_name, *tmp_name;
1529 int retval;
1531 canonicalize_filename (file);
1532 if (streq (file, tagfile) && !streq (tagfile, "-"))
1534 error ("skipping inclusion of %s in self.", file);
1535 return;
1537 compr = get_compressor_from_suffix (file, &ext);
1538 if (compr)
1540 compressed_name = file;
1541 uncompressed_name = savenstr (file, ext - file);
1543 else
1545 compressed_name = NULL;
1546 uncompressed_name = file;
1549 /* If the canonicalized uncompressed name
1550 has already been dealt with, skip it silently. */
1551 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1553 assert (fdp->infname != NULL);
1554 if (streq (uncompressed_name, fdp->infname))
1555 goto cleanup;
1558 inf = fopen (file, "r" FOPEN_BINARY);
1559 if (inf)
1560 real_name = file;
1561 else
1563 int file_errno = errno;
1564 if (compressed_name)
1566 /* Try with the given suffix. */
1567 inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1568 if (inf)
1569 real_name = uncompressed_name;
1571 else
1573 /* Try all possible suffixes. */
1574 for (compr = compressors; compr->suffix != NULL; compr++)
1576 compressed_name = concat (file, ".", compr->suffix);
1577 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1578 if (inf)
1580 real_name = compressed_name;
1581 break;
1583 if (MSDOS)
1585 char *suf = compressed_name + strlen (file);
1586 size_t suflen = strlen (compr->suffix) + 1;
1587 for ( ; suf[1]; suf++, suflen--)
1589 memmove (suf, suf + 1, suflen);
1590 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1591 if (inf)
1593 real_name = compressed_name;
1594 break;
1597 if (inf)
1598 break;
1600 free (compressed_name);
1601 compressed_name = NULL;
1604 if (! inf)
1606 errno = file_errno;
1607 perror (file);
1608 goto cleanup;
1612 if (real_name == compressed_name)
1614 fclose (inf);
1615 tmp_name = etags_mktmp ();
1616 if (!tmp_name)
1617 inf = NULL;
1618 else
1620 #if MSDOS || defined (DOS_NT)
1621 char *cmd1 = concat (compr->command, " \"", real_name);
1622 char *cmd = concat (cmd1, "\" > ", tmp_name);
1623 #else
1624 char *cmd1 = concat (compr->command, " '", real_name);
1625 char *cmd = concat (cmd1, "' > ", tmp_name);
1626 #endif
1627 free (cmd1);
1628 int tmp_errno;
1629 if (system (cmd) == -1)
1631 inf = NULL;
1632 tmp_errno = EINVAL;
1634 else
1636 inf = fopen (tmp_name, "r" FOPEN_BINARY);
1637 tmp_errno = errno;
1639 free (cmd);
1640 errno = tmp_errno;
1643 if (!inf)
1645 perror (real_name);
1646 goto cleanup;
1650 process_file (inf, uncompressed_name, lang);
1652 retval = fclose (inf);
1653 if (real_name == compressed_name)
1655 remove (tmp_name);
1656 free (tmp_name);
1658 if (retval < 0)
1659 pfatal (file);
1661 cleanup:
1662 if (compressed_name != file)
1663 free (compressed_name);
1664 if (uncompressed_name != file)
1665 free (uncompressed_name);
1666 last_node = NULL;
1667 curfdp = NULL;
1668 return;
1671 static void
1672 process_file (FILE *fh, char *fn, language *lang)
1674 static const fdesc emptyfdesc;
1675 fdesc *fdp;
1677 infilename = fn;
1678 /* Create a new input file description entry. */
1679 fdp = xnew (1, fdesc);
1680 *fdp = emptyfdesc;
1681 fdp->next = fdhead;
1682 fdp->infname = savestr (fn);
1683 fdp->lang = lang;
1684 fdp->infabsname = absolute_filename (fn, cwd);
1685 fdp->infabsdir = absolute_dirname (fn, cwd);
1686 if (filename_is_absolute (fn))
1688 /* An absolute file name. Canonicalize it. */
1689 fdp->taggedfname = absolute_filename (fn, NULL);
1691 else
1693 /* A file name relative to cwd. Make it relative
1694 to the directory of the tags file. */
1695 fdp->taggedfname = relative_filename (fn, tagfiledir);
1697 fdp->usecharno = true; /* use char position when making tags */
1698 fdp->prop = NULL;
1699 fdp->written = false; /* not written on tags file yet */
1701 fdhead = fdp;
1702 curfdp = fdhead; /* the current file description */
1704 find_entries (fh);
1706 /* If not Ctags, and if this is not metasource and if it contained no #line
1707 directives, we can write the tags and free all nodes pointing to
1708 curfdp. */
1709 if (!CTAGS
1710 && curfdp->usecharno /* no #line directives in this file */
1711 && !curfdp->lang->metasource)
1713 node *np, *prev;
1715 /* Look for the head of the sublist relative to this file. See add_node
1716 for the structure of the node tree. */
1717 prev = NULL;
1718 for (np = nodehead; np != NULL; prev = np, np = np->left)
1719 if (np->fdp == curfdp)
1720 break;
1722 /* If we generated tags for this file, write and delete them. */
1723 if (np != NULL)
1725 /* This is the head of the last sublist, if any. The following
1726 instructions depend on this being true. */
1727 assert (np->left == NULL);
1729 assert (fdhead == curfdp);
1730 assert (last_node->fdp == curfdp);
1731 put_entries (np); /* write tags for file curfdp->taggedfname */
1732 free_tree (np); /* remove the written nodes */
1733 if (prev == NULL)
1734 nodehead = NULL; /* no nodes left */
1735 else
1736 prev->left = NULL; /* delete the pointer to the sublist */
1741 static void
1742 reset_input (FILE *inf)
1744 if (fseek (inf, 0, SEEK_SET) != 0)
1745 perror (infilename);
1749 * This routine opens the specified file and calls the function
1750 * which finds the function and type definitions.
1752 static void
1753 find_entries (FILE *inf)
1755 char *cp;
1756 language *lang = curfdp->lang;
1757 Lang_function *parser = NULL;
1759 /* If user specified a language, use it. */
1760 if (lang != NULL && lang->function != NULL)
1762 parser = lang->function;
1765 /* Else try to guess the language given the file name. */
1766 if (parser == NULL)
1768 lang = get_language_from_filename (curfdp->infname, true);
1769 if (lang != NULL && lang->function != NULL)
1771 curfdp->lang = lang;
1772 parser = lang->function;
1776 /* Else look for sharp-bang as the first two characters. */
1777 if (parser == NULL
1778 && readline_internal (&lb, inf, infilename) > 0
1779 && lb.len >= 2
1780 && lb.buffer[0] == '#'
1781 && lb.buffer[1] == '!')
1783 char *lp;
1785 /* Set lp to point at the first char after the last slash in the
1786 line or, if no slashes, at the first nonblank. Then set cp to
1787 the first successive blank and terminate the string. */
1788 lp = strrchr (lb.buffer+2, '/');
1789 if (lp != NULL)
1790 lp += 1;
1791 else
1792 lp = skip_spaces (lb.buffer + 2);
1793 cp = skip_non_spaces (lp);
1794 *cp = '\0';
1796 if (strlen (lp) > 0)
1798 lang = get_language_from_interpreter (lp);
1799 if (lang != NULL && lang->function != NULL)
1801 curfdp->lang = lang;
1802 parser = lang->function;
1807 reset_input (inf);
1809 /* Else try to guess the language given the case insensitive file name. */
1810 if (parser == NULL)
1812 lang = get_language_from_filename (curfdp->infname, false);
1813 if (lang != NULL && lang->function != NULL)
1815 curfdp->lang = lang;
1816 parser = lang->function;
1820 /* Else try Fortran or C. */
1821 if (parser == NULL)
1823 node *old_last_node = last_node;
1825 curfdp->lang = get_language_from_langname ("fortran");
1826 find_entries (inf);
1828 if (old_last_node == last_node)
1829 /* No Fortran entries found. Try C. */
1831 reset_input (inf);
1832 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1833 find_entries (inf);
1835 return;
1838 if (!no_line_directive
1839 && curfdp->lang != NULL && curfdp->lang->metasource)
1840 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1841 file, or anyway we parsed a file that is automatically generated from
1842 this one. If this is the case, the bingo.c file contained #line
1843 directives that generated tags pointing to this file. Let's delete
1844 them all before parsing this file, which is the real source. */
1846 fdesc **fdpp = &fdhead;
1847 while (*fdpp != NULL)
1848 if (*fdpp != curfdp
1849 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1850 /* We found one of those! We must delete both the file description
1851 and all tags referring to it. */
1853 fdesc *badfdp = *fdpp;
1855 /* Delete the tags referring to badfdp->taggedfname
1856 that were obtained from badfdp->infname. */
1857 invalidate_nodes (badfdp, &nodehead);
1859 *fdpp = badfdp->next; /* remove the bad description from the list */
1860 free_fdesc (badfdp);
1862 else
1863 fdpp = &(*fdpp)->next; /* advance the list pointer */
1866 assert (parser != NULL);
1868 /* Generic initializations before reading from file. */
1869 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1871 /* Generic initializations before parsing file with readline. */
1872 lineno = 0; /* reset global line number */
1873 charno = 0; /* reset global char number */
1874 linecharno = 0; /* reset global char number of line start */
1876 parser (inf);
1878 regex_tag_multiline ();
1883 * Check whether an implicitly named tag should be created,
1884 * then call `pfnote'.
1885 * NAME is a string that is internally copied by this function.
1887 * TAGS format specification
1888 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1889 * The following is explained in some more detail in etc/ETAGS.EBNF.
1891 * make_tag creates tags with "implicit tag names" (unnamed tags)
1892 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1893 * 1. NAME does not contain any of the characters in NONAM;
1894 * 2. LINESTART contains name as either a rightmost, or rightmost but
1895 * one character, substring;
1896 * 3. the character, if any, immediately before NAME in LINESTART must
1897 * be a character in NONAM;
1898 * 4. the character, if any, immediately after NAME in LINESTART must
1899 * also be a character in NONAM.
1901 * The implementation uses the notinname() macro, which recognizes the
1902 * characters stored in the string `nonam'.
1903 * etags.el needs to use the same characters that are in NONAM.
1905 static void
1906 make_tag (const char *name, /* tag name, or NULL if unnamed */
1907 int namelen, /* tag length */
1908 bool is_func, /* tag is a function */
1909 char *linestart, /* start of the line where tag is */
1910 int linelen, /* length of the line where tag is */
1911 int lno, /* line number */
1912 long int cno) /* character number */
1914 bool named = (name != NULL && namelen > 0);
1915 char *nname = NULL;
1917 if (!CTAGS && named) /* maybe set named to false */
1918 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1919 such that etags.el can guess a name from it. */
1921 int i;
1922 register const char *cp = name;
1924 for (i = 0; i < namelen; i++)
1925 if (notinname (*cp++))
1926 break;
1927 if (i == namelen) /* rule #1 */
1929 cp = linestart + linelen - namelen;
1930 if (notinname (linestart[linelen-1]))
1931 cp -= 1; /* rule #4 */
1932 if (cp >= linestart /* rule #2 */
1933 && (cp == linestart
1934 || notinname (cp[-1])) /* rule #3 */
1935 && strneq (name, cp, namelen)) /* rule #2 */
1936 named = false; /* use implicit tag name */
1940 if (named)
1941 nname = savenstr (name, namelen);
1943 pfnote (nname, is_func, linestart, linelen, lno, cno);
1946 /* Record a tag. */
1947 static void
1948 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1949 long int cno)
1950 /* tag name, or NULL if unnamed */
1951 /* tag is a function */
1952 /* start of the line where tag is */
1953 /* length of the line where tag is */
1954 /* line number */
1955 /* character number */
1957 register node *np;
1959 assert (name == NULL || name[0] != '\0');
1960 if (CTAGS && name == NULL)
1961 return;
1963 np = xnew (1, node);
1965 /* If ctags mode, change name "main" to M<thisfilename>. */
1966 if (CTAGS && !cxref_style && streq (name, "main"))
1968 char *fp = strrchr (curfdp->taggedfname, '/');
1969 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1970 fp = strrchr (np->name, '.');
1971 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1972 fp[0] = '\0';
1974 else
1975 np->name = name;
1976 np->valid = true;
1977 np->been_warned = false;
1978 np->fdp = curfdp;
1979 np->is_func = is_func;
1980 np->lno = lno;
1981 if (np->fdp->usecharno)
1982 /* Our char numbers are 0-base, because of C language tradition?
1983 ctags compatibility? old versions compatibility? I don't know.
1984 Anyway, since emacs's are 1-base we expect etags.el to take care
1985 of the difference. If we wanted to have 1-based numbers, we would
1986 uncomment the +1 below. */
1987 np->cno = cno /* + 1 */ ;
1988 else
1989 np->cno = invalidcharno;
1990 np->left = np->right = NULL;
1991 if (CTAGS && !cxref_style)
1993 if (strlen (linestart) < 50)
1994 np->regex = concat (linestart, "$", "");
1995 else
1996 np->regex = savenstr (linestart, 50);
1998 else
1999 np->regex = savenstr (linestart, linelen);
2001 add_node (np, &nodehead);
2005 * Utility functions and data to avoid recursion.
2008 typedef struct stack_entry {
2009 node *np;
2010 struct stack_entry *next;
2011 } stkentry;
2013 static void
2014 push_node (node *np, stkentry **stack_top)
2016 if (np)
2018 stkentry *new = xnew (1, stkentry);
2020 new->np = np;
2021 new->next = *stack_top;
2022 *stack_top = new;
2026 static node *
2027 pop_node (stkentry **stack_top)
2029 node *ret = NULL;
2031 if (*stack_top)
2033 stkentry *old_start = *stack_top;
2035 ret = (*stack_top)->np;
2036 *stack_top = (*stack_top)->next;
2037 free (old_start);
2039 return ret;
2043 * free_tree ()
2044 * emulate recursion on left children, iterate on right children.
2046 static void
2047 free_tree (register node *np)
2049 stkentry *stack = NULL;
2051 while (np)
2053 /* Descent on left children. */
2054 while (np->left)
2056 push_node (np, &stack);
2057 np = np->left;
2059 /* Free node without left children. */
2060 node *node_right = np->right;
2061 free (np->name);
2062 free (np->regex);
2063 free (np);
2064 if (!node_right)
2066 /* Backtrack to find a node with right children, while freeing nodes
2067 that don't have right children. */
2068 while (node_right == NULL && (np = pop_node (&stack)) != NULL)
2070 node_right = np->right;
2071 free (np->name);
2072 free (np->regex);
2073 free (np);
2076 /* Free right children. */
2077 np = node_right;
2082 * free_fdesc ()
2083 * delete a file description
2085 static void
2086 free_fdesc (register fdesc *fdp)
2088 free (fdp->infname);
2089 free (fdp->infabsname);
2090 free (fdp->infabsdir);
2091 free (fdp->taggedfname);
2092 free (fdp->prop);
2093 free (fdp);
2097 * add_node ()
2098 * Adds a node to the tree of nodes. In etags mode, sort by file
2099 * name. In ctags mode, sort by tag name. Make no attempt at
2100 * balancing.
2102 * add_node is the only function allowed to add nodes, so it can
2103 * maintain state.
2105 static void
2106 add_node (node *np, node **cur_node_p)
2108 node *cur_node = *cur_node_p;
2110 /* Make the first node. */
2111 if (cur_node == NULL)
2113 *cur_node_p = np;
2114 last_node = np;
2115 return;
2118 if (!CTAGS)
2119 /* Etags Mode */
2121 /* For each file name, tags are in a linked sublist on the right
2122 pointer. The first tags of different files are a linked list
2123 on the left pointer. last_node points to the end of the last
2124 used sublist. */
2125 if (last_node != NULL && last_node->fdp == np->fdp)
2127 /* Let's use the same sublist as the last added node. */
2128 assert (last_node->right == NULL);
2129 last_node->right = np;
2130 last_node = np;
2132 else
2134 while (cur_node->fdp != np->fdp)
2136 if (cur_node->left == NULL)
2137 break;
2138 /* The head of this sublist is not good for us. Let's try the
2139 next one. */
2140 cur_node = cur_node->left;
2142 if (cur_node->left)
2144 /* Scanning the list we found the head of a sublist which is
2145 good for us. Let's scan this sublist. */
2146 if (cur_node->right)
2148 cur_node = cur_node->right;
2149 while (cur_node->right)
2150 cur_node = cur_node->right;
2152 /* Make a new node in this sublist. */
2153 cur_node->right = np;
2155 else
2157 /* Make a new sublist. */
2158 cur_node->left = np;
2160 last_node = np;
2162 } /* if ETAGS mode */
2163 else
2165 /* Ctags Mode */
2166 node **next_node = &cur_node;
2168 while ((cur_node = *next_node) != NULL)
2170 int dif = strcmp (np->name, cur_node->name);
2172 * If this tag name matches an existing one, then
2173 * do not add the node, but maybe print a warning.
2175 if (!dif && no_duplicates)
2177 if (np->fdp == cur_node->fdp)
2179 if (!no_warnings)
2181 fprintf (stderr,
2182 "Duplicate entry in file %s, line %d: %s\n",
2183 np->fdp->infname, lineno, np->name);
2184 fprintf (stderr, "Second entry ignored\n");
2187 else if (!cur_node->been_warned && !no_warnings)
2189 fprintf
2190 (stderr,
2191 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2192 np->fdp->infname, cur_node->fdp->infname, np->name);
2193 cur_node->been_warned = true;
2195 return;
2197 else
2198 next_node = dif < 0 ? &cur_node->left : &cur_node->right;
2200 *next_node = np;
2201 last_node = np;
2202 } /* if CTAGS mode */
2206 * invalidate_nodes ()
2207 * Scan the node tree and invalidate all nodes pointing to the
2208 * given file description (CTAGS case) or free them (ETAGS case).
2210 static void
2211 invalidate_nodes (fdesc *badfdp, node **npp)
2213 node *np = *npp;
2214 stkentry *stack = NULL;
2216 if (CTAGS)
2218 while (np)
2220 /* Push all the left children on the stack. */
2221 while (np->left != NULL)
2223 push_node (np, &stack);
2224 np = np->left;
2226 /* Invalidate this node. */
2227 if (np->fdp == badfdp)
2228 np->valid = false;
2229 if (!np->right)
2231 /* Pop nodes from stack, invalidating them, until we find one
2232 with a right child. */
2233 while ((np = pop_node (&stack)) != NULL)
2235 if (np->fdp == badfdp)
2236 np->valid = false;
2237 if (np->right != NULL)
2238 break;
2241 /* Process the right child, if any. */
2242 if (np)
2243 np = np->right;
2246 else
2248 node super_root, *np_parent = NULL;
2250 super_root.left = np;
2251 super_root.fdp = (fdesc *) -1;
2252 np = &super_root;
2254 while (np)
2256 /* Descent on left children until node with BADFP. */
2257 while (np && np->fdp != badfdp)
2259 assert (np->fdp != NULL);
2260 np_parent = np;
2261 np = np->left;
2263 if (np)
2265 np_parent->left = np->left; /* detach subtree from the tree */
2266 np->left = NULL; /* isolate it */
2267 free_tree (np); /* free it */
2269 /* Continue with rest of tree. */
2270 np = np_parent->left;
2273 *npp = super_root.left;
2278 static int total_size_of_entries (node *);
2279 static int number_len (long) ATTRIBUTE_CONST;
2281 /* Length of a non-negative number's decimal representation. */
2282 static int
2283 number_len (long int num)
2285 int len = 1;
2286 while ((num /= 10) > 0)
2287 len += 1;
2288 return len;
2292 * Return total number of characters that put_entries will output for
2293 * the nodes in the linked list at the right of the specified node.
2294 * This count is irrelevant with etags.el since emacs 19.34 at least,
2295 * but is still supplied for backward compatibility.
2297 static int
2298 total_size_of_entries (register node *np)
2300 register int total = 0;
2302 for (; np != NULL; np = np->right)
2303 if (np->valid)
2305 total += strlen (np->regex) + 1; /* pat\177 */
2306 if (np->name != NULL)
2307 total += strlen (np->name) + 1; /* name\001 */
2308 total += number_len ((long) np->lno) + 1; /* lno, */
2309 if (np->cno != invalidcharno) /* cno */
2310 total += number_len (np->cno);
2311 total += 1; /* newline */
2314 return total;
2317 static void
2318 put_entry (node *np)
2320 register char *sp;
2321 static fdesc *fdp = NULL;
2323 /* Output this entry */
2324 if (np->valid)
2326 if (!CTAGS)
2328 /* Etags mode */
2329 if (fdp != np->fdp)
2331 fdp = np->fdp;
2332 fprintf (tagf, "\f\n%s,%d\n",
2333 fdp->taggedfname, total_size_of_entries (np));
2334 fdp->written = true;
2336 fputs (np->regex, tagf);
2337 fputc ('\177', tagf);
2338 if (np->name != NULL)
2340 fputs (np->name, tagf);
2341 fputc ('\001', tagf);
2343 fprintf (tagf, "%d,", np->lno);
2344 if (np->cno != invalidcharno)
2345 fprintf (tagf, "%ld", np->cno);
2346 fputs ("\n", tagf);
2348 else
2350 /* Ctags mode */
2351 if (np->name == NULL)
2352 error ("internal error: NULL name in ctags mode.");
2354 if (cxref_style)
2356 if (vgrind_style)
2357 fprintf (stdout, "%s %s %d\n",
2358 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2359 else
2360 fprintf (stdout, "%-16s %3d %-16s %s\n",
2361 np->name, np->lno, np->fdp->taggedfname, np->regex);
2363 else
2365 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2367 if (np->is_func)
2368 { /* function or #define macro with args */
2369 putc (searchar, tagf);
2370 putc ('^', tagf);
2372 for (sp = np->regex; *sp; sp++)
2374 if (*sp == '\\' || *sp == searchar)
2375 putc ('\\', tagf);
2376 putc (*sp, tagf);
2378 putc (searchar, tagf);
2380 else
2381 { /* anything else; text pattern inadequate */
2382 fprintf (tagf, "%d", np->lno);
2384 putc ('\n', tagf);
2387 } /* if this node contains a valid tag */
2390 static void
2391 put_entries (node *np)
2393 stkentry *stack = NULL;
2395 if (np == NULL)
2396 return;
2398 if (CTAGS)
2400 while (np)
2402 /* Stack subentries that precede this one. */
2403 while (np->left)
2405 push_node (np, &stack);
2406 np = np->left;
2408 /* Output this subentry. */
2409 put_entry (np);
2410 /* Stack subentries that follow this one. */
2411 while (!np->right)
2413 /* Output subentries that precede the next one. */
2414 np = pop_node (&stack);
2415 if (!np)
2416 break;
2417 put_entry (np);
2419 if (np)
2420 np = np->right;
2423 else
2425 push_node (np, &stack);
2426 while ((np = pop_node (&stack)) != NULL)
2428 /* Output this subentry. */
2429 put_entry (np);
2430 while (np->right)
2432 /* Output subentries that follow this one. */
2433 put_entry (np->right);
2434 /* Stack subentries from the following files. */
2435 push_node (np->left, &stack);
2436 np = np->right;
2438 push_node (np->left, &stack);
2444 /* C extensions. */
2445 #define C_EXT 0x00fff /* C extensions */
2446 #define C_PLAIN 0x00000 /* C */
2447 #define C_PLPL 0x00001 /* C++ */
2448 #define C_STAR 0x00003 /* C* */
2449 #define C_JAVA 0x00005 /* JAVA */
2450 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2451 #define YACC 0x10000 /* yacc file */
2454 * The C symbol tables.
2456 enum sym_type
2458 st_none,
2459 st_C_objprot, st_C_objimpl, st_C_objend,
2460 st_C_gnumacro,
2461 st_C_ignore, st_C_attribute,
2462 st_C_javastruct,
2463 st_C_operator,
2464 st_C_class, st_C_template,
2465 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2468 /* Feed stuff between (but not including) %[ and %] lines to:
2469 gperf -m 5
2471 %compare-strncmp
2472 %enum
2473 %struct-type
2474 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2476 if, 0, st_C_ignore
2477 for, 0, st_C_ignore
2478 while, 0, st_C_ignore
2479 switch, 0, st_C_ignore
2480 return, 0, st_C_ignore
2481 __attribute__, 0, st_C_attribute
2482 GTY, 0, st_C_attribute
2483 @interface, 0, st_C_objprot
2484 @protocol, 0, st_C_objprot
2485 @implementation,0, st_C_objimpl
2486 @end, 0, st_C_objend
2487 import, (C_JAVA & ~C_PLPL), st_C_ignore
2488 package, (C_JAVA & ~C_PLPL), st_C_ignore
2489 friend, C_PLPL, st_C_ignore
2490 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2491 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2492 interface, (C_JAVA & ~C_PLPL), st_C_struct
2493 class, 0, st_C_class
2494 namespace, C_PLPL, st_C_struct
2495 domain, C_STAR, st_C_struct
2496 union, 0, st_C_struct
2497 struct, 0, st_C_struct
2498 extern, 0, st_C_extern
2499 enum, 0, st_C_enum
2500 typedef, 0, st_C_typedef
2501 define, 0, st_C_define
2502 undef, 0, st_C_define
2503 operator, C_PLPL, st_C_operator
2504 template, 0, st_C_template
2505 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2506 DEFUN, 0, st_C_gnumacro
2507 SYSCALL, 0, st_C_gnumacro
2508 ENTRY, 0, st_C_gnumacro
2509 PSEUDO, 0, st_C_gnumacro
2510 # These are defined inside C functions, so currently they are not met.
2511 # EXFUN used in glibc, DEFVAR_* in emacs.
2512 #EXFUN, 0, st_C_gnumacro
2513 #DEFVAR_, 0, st_C_gnumacro
2515 and replace lines between %< and %> with its output, then:
2516 - remove the #if characterset check
2517 - make in_word_set static and not inline. */
2518 /*%<*/
2519 /* C code produced by gperf version 3.0.1 */
2520 /* Command-line: gperf -m 5 */
2521 /* Computed positions: -k'2-3' */
2523 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2524 /* maximum key range = 33, duplicates = 0 */
2526 static int
2527 hash (const char *str, int len)
2529 static char const asso_values[] =
2531 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2532 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2533 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2534 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2535 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2536 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2537 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2538 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2539 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2540 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2541 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2542 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2543 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2544 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2545 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2546 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2547 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2548 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2549 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2550 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2551 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2552 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2553 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2554 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2555 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2556 35, 35, 35, 35, 35, 35
2558 int hval = len;
2560 switch (hval)
2562 default:
2563 hval += asso_values[(unsigned char) str[2]];
2564 /*FALLTHROUGH*/
2565 case 2:
2566 hval += asso_values[(unsigned char) str[1]];
2567 break;
2569 return hval;
2572 static struct C_stab_entry *
2573 in_word_set (register const char *str, register unsigned int len)
2575 enum
2577 TOTAL_KEYWORDS = 33,
2578 MIN_WORD_LENGTH = 2,
2579 MAX_WORD_LENGTH = 15,
2580 MIN_HASH_VALUE = 2,
2581 MAX_HASH_VALUE = 34
2584 static struct C_stab_entry wordlist[] =
2586 {""}, {""},
2587 {"if", 0, st_C_ignore},
2588 {"GTY", 0, st_C_attribute},
2589 {"@end", 0, st_C_objend},
2590 {"union", 0, st_C_struct},
2591 {"define", 0, st_C_define},
2592 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2593 {"template", 0, st_C_template},
2594 {"operator", C_PLPL, st_C_operator},
2595 {"@interface", 0, st_C_objprot},
2596 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2597 {"friend", C_PLPL, st_C_ignore},
2598 {"typedef", 0, st_C_typedef},
2599 {"return", 0, st_C_ignore},
2600 {"@implementation",0, st_C_objimpl},
2601 {"@protocol", 0, st_C_objprot},
2602 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2603 {"extern", 0, st_C_extern},
2604 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2605 {"struct", 0, st_C_struct},
2606 {"domain", C_STAR, st_C_struct},
2607 {"switch", 0, st_C_ignore},
2608 {"enum", 0, st_C_enum},
2609 {"for", 0, st_C_ignore},
2610 {"namespace", C_PLPL, st_C_struct},
2611 {"class", 0, st_C_class},
2612 {"while", 0, st_C_ignore},
2613 {"undef", 0, st_C_define},
2614 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2615 {"__attribute__", 0, st_C_attribute},
2616 {"SYSCALL", 0, st_C_gnumacro},
2617 {"ENTRY", 0, st_C_gnumacro},
2618 {"PSEUDO", 0, st_C_gnumacro},
2619 {"DEFUN", 0, st_C_gnumacro}
2622 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2624 int key = hash (str, len);
2626 if (key <= MAX_HASH_VALUE && key >= 0)
2628 const char *s = wordlist[key].name;
2630 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2631 return &wordlist[key];
2634 return 0;
2636 /*%>*/
2638 static enum sym_type
2639 C_symtype (char *str, int len, int c_ext)
2641 register struct C_stab_entry *se = in_word_set (str, len);
2643 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2644 return st_none;
2645 return se->type;
2650 * Ignoring __attribute__ ((list))
2652 static bool inattribute; /* looking at an __attribute__ construct */
2655 * C functions and variables are recognized using a simple
2656 * finite automaton. fvdef is its state variable.
2658 static enum
2660 fvnone, /* nothing seen */
2661 fdefunkey, /* Emacs DEFUN keyword seen */
2662 fdefunname, /* Emacs DEFUN name seen */
2663 foperator, /* func: operator keyword seen (cplpl) */
2664 fvnameseen, /* function or variable name seen */
2665 fstartlist, /* func: just after open parenthesis */
2666 finlist, /* func: in parameter list */
2667 flistseen, /* func: after parameter list */
2668 fignore, /* func: before open brace */
2669 vignore /* var-like: ignore until ';' */
2670 } fvdef;
2672 static bool fvextern; /* func or var: extern keyword seen; */
2675 * typedefs are recognized using a simple finite automaton.
2676 * typdef is its state variable.
2678 static enum
2680 tnone, /* nothing seen */
2681 tkeyseen, /* typedef keyword seen */
2682 ttypeseen, /* defined type seen */
2683 tinbody, /* inside typedef body */
2684 tend, /* just before typedef tag */
2685 tignore /* junk after typedef tag */
2686 } typdef;
2689 * struct-like structures (enum, struct and union) are recognized
2690 * using another simple finite automaton. `structdef' is its state
2691 * variable.
2693 static enum
2695 snone, /* nothing seen yet,
2696 or in struct body if bracelev > 0 */
2697 skeyseen, /* struct-like keyword seen */
2698 stagseen, /* struct-like tag seen */
2699 scolonseen /* colon seen after struct-like tag */
2700 } structdef;
2703 * When objdef is different from onone, objtag is the name of the class.
2705 static const char *objtag = "<uninited>";
2708 * Yet another little state machine to deal with preprocessor lines.
2710 static enum
2712 dnone, /* nothing seen */
2713 dsharpseen, /* '#' seen as first char on line */
2714 ddefineseen, /* '#' and 'define' seen */
2715 dignorerest /* ignore rest of line */
2716 } definedef;
2719 * State machine for Objective C protocols and implementations.
2720 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2722 static enum
2724 onone, /* nothing seen */
2725 oprotocol, /* @interface or @protocol seen */
2726 oimplementation, /* @implementations seen */
2727 otagseen, /* class name seen */
2728 oparenseen, /* parenthesis before category seen */
2729 ocatseen, /* category name seen */
2730 oinbody, /* in @implementation body */
2731 omethodsign, /* in @implementation body, after +/- */
2732 omethodtag, /* after method name */
2733 omethodcolon, /* after method colon */
2734 omethodparm, /* after method parameter */
2735 oignore /* wait for @end */
2736 } objdef;
2740 * Use this structure to keep info about the token read, and how it
2741 * should be tagged. Used by the make_C_tag function to build a tag.
2743 static struct tok
2745 char *line; /* string containing the token */
2746 int offset; /* where the token starts in LINE */
2747 int length; /* token length */
2749 The previous members can be used to pass strings around for generic
2750 purposes. The following ones specifically refer to creating tags. In this
2751 case the token contained here is the pattern that will be used to create a
2752 tag.
2754 bool valid; /* do not create a tag; the token should be
2755 invalidated whenever a state machine is
2756 reset prematurely */
2757 bool named; /* create a named tag */
2758 int lineno; /* source line number of tag */
2759 long linepos; /* source char number of tag */
2760 } token; /* latest token read */
2763 * Variables and functions for dealing with nested structures.
2764 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2766 static void pushclass_above (int, char *, int);
2767 static void popclass_above (int);
2768 static void write_classname (linebuffer *, const char *qualifier);
2770 static struct {
2771 char **cname; /* nested class names */
2772 int *bracelev; /* nested class brace level */
2773 int nl; /* class nesting level (elements used) */
2774 int size; /* length of the array */
2775 } cstack; /* stack for nested declaration tags */
2776 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2777 #define nestlev (cstack.nl)
2778 /* After struct keyword or in struct body, not inside a nested function. */
2779 #define instruct (structdef == snone && nestlev > 0 \
2780 && bracelev == cstack.bracelev[nestlev-1] + 1)
2782 static void
2783 pushclass_above (int bracelev, char *str, int len)
2785 int nl;
2787 popclass_above (bracelev);
2788 nl = cstack.nl;
2789 if (nl >= cstack.size)
2791 int size = cstack.size *= 2;
2792 xrnew (cstack.cname, size, char *);
2793 xrnew (cstack.bracelev, size, int);
2795 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2796 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2797 cstack.bracelev[nl] = bracelev;
2798 cstack.nl = nl + 1;
2801 static void
2802 popclass_above (int bracelev)
2804 int nl;
2806 for (nl = cstack.nl - 1;
2807 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2808 nl--)
2810 free (cstack.cname[nl]);
2811 cstack.nl = nl;
2815 static void
2816 write_classname (linebuffer *cn, const char *qualifier)
2818 int i, len;
2819 int qlen = strlen (qualifier);
2821 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2823 len = 0;
2824 cn->len = 0;
2825 cn->buffer[0] = '\0';
2827 else
2829 len = strlen (cstack.cname[0]);
2830 linebuffer_setlen (cn, len);
2831 strcpy (cn->buffer, cstack.cname[0]);
2833 for (i = 1; i < cstack.nl; i++)
2835 char *s = cstack.cname[i];
2836 if (s == NULL)
2837 continue;
2838 linebuffer_setlen (cn, len + qlen + strlen (s));
2839 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2844 static bool consider_token (char *, int, int, int *, int, int, bool *);
2845 static void make_C_tag (bool);
2848 * consider_token ()
2849 * checks to see if the current token is at the start of a
2850 * function or variable, or corresponds to a typedef, or
2851 * is a struct/union/enum tag, or #define, or an enum constant.
2853 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2854 * with args. C_EXTP points to which language we are looking at.
2856 * Globals
2857 * fvdef IN OUT
2858 * structdef IN OUT
2859 * definedef IN OUT
2860 * typdef IN OUT
2861 * objdef IN OUT
2864 static bool
2865 consider_token (char *str, int len, int c, int *c_extp,
2866 int bracelev, int parlev, bool *is_func_or_var)
2867 /* IN: token pointer */
2868 /* IN: token length */
2869 /* IN: first char after the token */
2870 /* IN, OUT: C extensions mask */
2871 /* IN: brace level */
2872 /* IN: parenthesis level */
2873 /* OUT: function or variable found */
2875 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2876 structtype is the type of the preceding struct-like keyword, and
2877 structbracelev is the brace level where it has been seen. */
2878 static enum sym_type structtype;
2879 static int structbracelev;
2880 static enum sym_type toktype;
2883 toktype = C_symtype (str, len, *c_extp);
2886 * Skip __attribute__
2888 if (toktype == st_C_attribute)
2890 inattribute = true;
2891 return false;
2895 * Advance the definedef state machine.
2897 switch (definedef)
2899 case dnone:
2900 /* We're not on a preprocessor line. */
2901 if (toktype == st_C_gnumacro)
2903 fvdef = fdefunkey;
2904 return false;
2906 break;
2907 case dsharpseen:
2908 if (toktype == st_C_define)
2910 definedef = ddefineseen;
2912 else
2914 definedef = dignorerest;
2916 return false;
2917 case ddefineseen:
2919 * Make a tag for any macro, unless it is a constant
2920 * and constantypedefs is false.
2922 definedef = dignorerest;
2923 *is_func_or_var = (c == '(');
2924 if (!*is_func_or_var && !constantypedefs)
2925 return false;
2926 else
2927 return true;
2928 case dignorerest:
2929 return false;
2930 default:
2931 error ("internal error: definedef value.");
2935 * Now typedefs
2937 switch (typdef)
2939 case tnone:
2940 if (toktype == st_C_typedef)
2942 if (typedefs)
2943 typdef = tkeyseen;
2944 fvextern = false;
2945 fvdef = fvnone;
2946 return false;
2948 break;
2949 case tkeyseen:
2950 switch (toktype)
2952 case st_none:
2953 case st_C_class:
2954 case st_C_struct:
2955 case st_C_enum:
2956 typdef = ttypeseen;
2957 break;
2958 default:
2959 break;
2961 break;
2962 case ttypeseen:
2963 if (structdef == snone && fvdef == fvnone)
2965 fvdef = fvnameseen;
2966 return true;
2968 break;
2969 case tend:
2970 switch (toktype)
2972 case st_C_class:
2973 case st_C_struct:
2974 case st_C_enum:
2975 return false;
2976 default:
2977 return true;
2979 default:
2980 break;
2983 switch (toktype)
2985 case st_C_javastruct:
2986 if (structdef == stagseen)
2987 structdef = scolonseen;
2988 return false;
2989 case st_C_template:
2990 case st_C_class:
2991 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2992 && bracelev == 0
2993 && definedef == dnone && structdef == snone
2994 && typdef == tnone && fvdef == fvnone)
2995 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2996 if (toktype == st_C_template)
2997 break;
2998 /* FALLTHRU */
2999 case st_C_struct:
3000 case st_C_enum:
3001 if (parlev == 0
3002 && fvdef != vignore
3003 && (typdef == tkeyseen
3004 || (typedefs_or_cplusplus && structdef == snone)))
3006 structdef = skeyseen;
3007 structtype = toktype;
3008 structbracelev = bracelev;
3009 if (fvdef == fvnameseen)
3010 fvdef = fvnone;
3012 return false;
3013 default:
3014 break;
3017 if (structdef == skeyseen)
3019 structdef = stagseen;
3020 return true;
3023 if (typdef != tnone)
3024 definedef = dnone;
3026 /* Detect Objective C constructs. */
3027 switch (objdef)
3029 case onone:
3030 switch (toktype)
3032 case st_C_objprot:
3033 objdef = oprotocol;
3034 return false;
3035 case st_C_objimpl:
3036 objdef = oimplementation;
3037 return false;
3038 default:
3039 break;
3041 break;
3042 case oimplementation:
3043 /* Save the class tag for functions or variables defined inside. */
3044 objtag = savenstr (str, len);
3045 objdef = oinbody;
3046 return false;
3047 case oprotocol:
3048 /* Save the class tag for categories. */
3049 objtag = savenstr (str, len);
3050 objdef = otagseen;
3051 *is_func_or_var = true;
3052 return true;
3053 case oparenseen:
3054 objdef = ocatseen;
3055 *is_func_or_var = true;
3056 return true;
3057 case oinbody:
3058 break;
3059 case omethodsign:
3060 if (parlev == 0)
3062 fvdef = fvnone;
3063 objdef = omethodtag;
3064 linebuffer_setlen (&token_name, len);
3065 memcpy (token_name.buffer, str, len);
3066 token_name.buffer[len] = '\0';
3067 return true;
3069 return false;
3070 case omethodcolon:
3071 if (parlev == 0)
3072 objdef = omethodparm;
3073 return false;
3074 case omethodparm:
3075 if (parlev == 0)
3077 objdef = omethodtag;
3078 if (class_qualify)
3080 int oldlen = token_name.len;
3081 fvdef = fvnone;
3082 linebuffer_setlen (&token_name, oldlen + len);
3083 memcpy (token_name.buffer + oldlen, str, len);
3084 token_name.buffer[oldlen + len] = '\0';
3086 return true;
3088 return false;
3089 case oignore:
3090 if (toktype == st_C_objend)
3092 /* Memory leakage here: the string pointed by objtag is
3093 never released, because many tests would be needed to
3094 avoid breaking on incorrect input code. The amount of
3095 memory leaked here is the sum of the lengths of the
3096 class tags.
3097 free (objtag); */
3098 objdef = onone;
3100 return false;
3101 default:
3102 break;
3105 /* A function, variable or enum constant? */
3106 switch (toktype)
3108 case st_C_extern:
3109 fvextern = true;
3110 switch (fvdef)
3112 case finlist:
3113 case flistseen:
3114 case fignore:
3115 case vignore:
3116 break;
3117 default:
3118 fvdef = fvnone;
3120 return false;
3121 case st_C_ignore:
3122 fvextern = false;
3123 fvdef = vignore;
3124 return false;
3125 case st_C_operator:
3126 fvdef = foperator;
3127 *is_func_or_var = true;
3128 return true;
3129 case st_none:
3130 if (constantypedefs
3131 && structdef == snone
3132 && structtype == st_C_enum && bracelev > structbracelev
3133 /* Don't tag tokens in expressions that assign values to enum
3134 constants. */
3135 && fvdef != vignore)
3136 return true; /* enum constant */
3137 switch (fvdef)
3139 case fdefunkey:
3140 if (bracelev > 0)
3141 break;
3142 fvdef = fdefunname; /* GNU macro */
3143 *is_func_or_var = true;
3144 return true;
3145 case fvnone:
3146 switch (typdef)
3148 case ttypeseen:
3149 return false;
3150 case tnone:
3151 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3152 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3154 fvdef = vignore;
3155 return false;
3157 break;
3158 default:
3159 break;
3161 /* FALLTHRU */
3162 case fvnameseen:
3163 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3165 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3166 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3167 fvdef = foperator;
3168 *is_func_or_var = true;
3169 return true;
3171 if (bracelev > 0 && !instruct)
3172 break;
3173 fvdef = fvnameseen; /* function or variable */
3174 *is_func_or_var = true;
3175 return true;
3176 default:
3177 break;
3179 break;
3180 default:
3181 break;
3184 return false;
3189 * C_entries often keeps pointers to tokens or lines which are older than
3190 * the line currently read. By keeping two line buffers, and switching
3191 * them at end of line, it is possible to use those pointers.
3193 static struct
3195 long linepos;
3196 linebuffer lb;
3197 } lbs[2];
3199 #define current_lb_is_new (newndx == curndx)
3200 #define switch_line_buffers() (curndx = 1 - curndx)
3202 #define curlb (lbs[curndx].lb)
3203 #define newlb (lbs[newndx].lb)
3204 #define curlinepos (lbs[curndx].linepos)
3205 #define newlinepos (lbs[newndx].linepos)
3207 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3208 #define cplpl (c_ext & C_PLPL)
3209 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3211 #define CNL_SAVE_DEFINEDEF() \
3212 do { \
3213 curlinepos = charno; \
3214 readline (&curlb, inf); \
3215 lp = curlb.buffer; \
3216 quotednl = false; \
3217 newndx = curndx; \
3218 } while (0)
3220 #define CNL() \
3221 do { \
3222 CNL_SAVE_DEFINEDEF (); \
3223 if (savetoken.valid) \
3225 token = savetoken; \
3226 savetoken.valid = false; \
3228 definedef = dnone; \
3229 } while (0)
3232 static void
3233 make_C_tag (bool isfun)
3235 /* This function is never called when token.valid is false, but
3236 we must protect against invalid input or internal errors. */
3237 if (token.valid)
3238 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3239 token.offset+token.length+1, token.lineno, token.linepos);
3240 else if (DEBUG)
3241 { /* this branch is optimized away if !DEBUG */
3242 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3243 token_name.len + 17, isfun, token.line,
3244 token.offset+token.length+1, token.lineno, token.linepos);
3245 error ("INVALID TOKEN");
3248 token.valid = false;
3251 static bool
3252 perhaps_more_input (FILE *inf)
3254 return !feof (inf) && !ferror (inf);
3259 * C_entries ()
3260 * This routine finds functions, variables, typedefs,
3261 * #define's, enum constants and struct/union/enum definitions in
3262 * C syntax and adds them to the list.
3264 static void
3265 C_entries (int c_ext, FILE *inf)
3266 /* extension of C */
3267 /* input file */
3269 register char c; /* latest char read; '\0' for end of line */
3270 register char *lp; /* pointer one beyond the character `c' */
3271 int curndx, newndx; /* indices for current and new lb */
3272 register int tokoff; /* offset in line of start of current token */
3273 register int toklen; /* length of current token */
3274 const char *qualifier; /* string used to qualify names */
3275 int qlen; /* length of qualifier */
3276 int bracelev; /* current brace level */
3277 int bracketlev; /* current bracket level */
3278 int parlev; /* current parenthesis level */
3279 int attrparlev; /* __attribute__ parenthesis level */
3280 int templatelev; /* current template level */
3281 int typdefbracelev; /* bracelev where a typedef struct body begun */
3282 bool incomm, inquote, inchar, quotednl, midtoken;
3283 bool yacc_rules; /* in the rules part of a yacc file */
3284 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3287 linebuffer_init (&lbs[0].lb);
3288 linebuffer_init (&lbs[1].lb);
3289 if (cstack.size == 0)
3291 cstack.size = (DEBUG) ? 1 : 4;
3292 cstack.nl = 0;
3293 cstack.cname = xnew (cstack.size, char *);
3294 cstack.bracelev = xnew (cstack.size, int);
3297 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3298 curndx = newndx = 0;
3299 lp = curlb.buffer;
3300 *lp = 0;
3302 fvdef = fvnone; fvextern = false; typdef = tnone;
3303 structdef = snone; definedef = dnone; objdef = onone;
3304 yacc_rules = false;
3305 midtoken = inquote = inchar = incomm = quotednl = false;
3306 token.valid = savetoken.valid = false;
3307 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3308 if (cjava)
3309 { qualifier = "."; qlen = 1; }
3310 else
3311 { qualifier = "::"; qlen = 2; }
3314 while (perhaps_more_input (inf))
3316 c = *lp++;
3317 if (c == '\\')
3319 /* If we are at the end of the line, the next character is a
3320 '\0'; do not skip it, because it is what tells us
3321 to read the next line. */
3322 if (*lp == '\0')
3324 quotednl = true;
3325 continue;
3327 lp++;
3328 c = ' ';
3330 else if (incomm)
3332 switch (c)
3334 case '*':
3335 if (*lp == '/')
3337 c = *lp++;
3338 incomm = false;
3340 break;
3341 case '\0':
3342 /* Newlines inside comments do not end macro definitions in
3343 traditional cpp. */
3344 CNL_SAVE_DEFINEDEF ();
3345 break;
3347 continue;
3349 else if (inquote)
3351 switch (c)
3353 case '"':
3354 inquote = false;
3355 break;
3356 case '\0':
3357 /* Newlines inside strings do not end macro definitions
3358 in traditional cpp, even though compilers don't
3359 usually accept them. */
3360 CNL_SAVE_DEFINEDEF ();
3361 break;
3363 continue;
3365 else if (inchar)
3367 switch (c)
3369 case '\0':
3370 /* Hmmm, something went wrong. */
3371 CNL ();
3372 /* FALLTHRU */
3373 case '\'':
3374 inchar = false;
3375 break;
3377 continue;
3379 else switch (c)
3381 case '"':
3382 inquote = true;
3383 if (bracketlev > 0)
3384 continue;
3385 if (inattribute)
3386 break;
3387 switch (fvdef)
3389 case fdefunkey:
3390 case fstartlist:
3391 case finlist:
3392 case fignore:
3393 case vignore:
3394 break;
3395 default:
3396 fvextern = false;
3397 fvdef = fvnone;
3399 continue;
3400 case '\'':
3401 inchar = true;
3402 if (bracketlev > 0)
3403 continue;
3404 if (inattribute)
3405 break;
3406 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3408 fvextern = false;
3409 fvdef = fvnone;
3411 continue;
3412 case '/':
3413 if (*lp == '*')
3415 incomm = true;
3416 lp++;
3417 c = ' ';
3418 if (bracketlev > 0)
3419 continue;
3421 else if (/* cplpl && */ *lp == '/')
3423 c = '\0';
3425 break;
3426 case '%':
3427 if ((c_ext & YACC) && *lp == '%')
3429 /* Entering or exiting rules section in yacc file. */
3430 lp++;
3431 definedef = dnone; fvdef = fvnone; fvextern = false;
3432 typdef = tnone; structdef = snone;
3433 midtoken = inquote = inchar = incomm = quotednl = false;
3434 bracelev = 0;
3435 yacc_rules = !yacc_rules;
3436 continue;
3438 else
3439 break;
3440 case '#':
3441 if (definedef == dnone)
3443 char *cp;
3444 bool cpptoken = true;
3446 /* Look back on this line. If all blanks, or nonblanks
3447 followed by an end of comment, this is a preprocessor
3448 token. */
3449 for (cp = newlb.buffer; cp < lp-1; cp++)
3450 if (!c_isspace (*cp))
3452 if (*cp == '*' && cp[1] == '/')
3454 cp++;
3455 cpptoken = true;
3457 else
3458 cpptoken = false;
3460 if (cpptoken)
3462 definedef = dsharpseen;
3463 /* This is needed for tagging enum values: when there are
3464 preprocessor conditionals inside the enum, we need to
3465 reset the value of fvdef so that the next enum value is
3466 tagged even though the one before it did not end in a
3467 comma. */
3468 if (fvdef == vignore && instruct && parlev == 0)
3470 if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3471 fvdef = fvnone;
3474 } /* if (definedef == dnone) */
3475 continue;
3476 case '[':
3477 bracketlev++;
3478 continue;
3479 default:
3480 if (bracketlev > 0)
3482 if (c == ']')
3483 --bracketlev;
3484 else if (c == '\0')
3485 CNL_SAVE_DEFINEDEF ();
3486 continue;
3488 break;
3489 } /* switch (c) */
3492 /* Consider token only if some involved conditions are satisfied. */
3493 if (typdef != tignore
3494 && definedef != dignorerest
3495 && fvdef != finlist
3496 && templatelev == 0
3497 && (definedef != dnone
3498 || structdef != scolonseen)
3499 && !inattribute)
3501 if (midtoken)
3503 if (endtoken (c))
3505 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3506 /* This handles :: in the middle,
3507 but not at the beginning of an identifier.
3508 Also, space-separated :: is not recognized. */
3510 if (c_ext & C_AUTO) /* automatic detection of C++ */
3511 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3512 lp += 2;
3513 toklen += 2;
3514 c = lp[-1];
3515 goto still_in_token;
3517 else
3519 bool funorvar = false;
3521 if (yacc_rules
3522 || consider_token (newlb.buffer + tokoff, toklen, c,
3523 &c_ext, bracelev, parlev,
3524 &funorvar))
3526 if (fvdef == foperator)
3528 char *oldlp = lp;
3529 lp = skip_spaces (lp-1);
3530 if (*lp != '\0')
3531 lp += 1;
3532 while (*lp != '\0'
3533 && !c_isspace (*lp) && *lp != '(')
3534 lp += 1;
3535 c = *lp++;
3536 toklen += lp - oldlp;
3538 token.named = false;
3539 if (!plainc
3540 && nestlev > 0 && definedef == dnone)
3541 /* in struct body */
3543 if (class_qualify)
3545 int len;
3546 write_classname (&token_name, qualifier);
3547 len = token_name.len;
3548 linebuffer_setlen (&token_name,
3549 len + qlen + toklen);
3550 sprintf (token_name.buffer + len, "%s%.*s",
3551 qualifier, toklen,
3552 newlb.buffer + tokoff);
3554 else
3556 linebuffer_setlen (&token_name, toklen);
3557 sprintf (token_name.buffer, "%.*s",
3558 toklen, newlb.buffer + tokoff);
3560 token.named = true;
3562 else if (objdef == ocatseen)
3563 /* Objective C category */
3565 if (class_qualify)
3567 int len = strlen (objtag) + 2 + toklen;
3568 linebuffer_setlen (&token_name, len);
3569 sprintf (token_name.buffer, "%s(%.*s)",
3570 objtag, toklen,
3571 newlb.buffer + tokoff);
3573 else
3575 linebuffer_setlen (&token_name, toklen);
3576 sprintf (token_name.buffer, "%.*s",
3577 toklen, newlb.buffer + tokoff);
3579 token.named = true;
3581 else if (objdef == omethodtag
3582 || objdef == omethodparm)
3583 /* Objective C method */
3585 token.named = true;
3587 else if (fvdef == fdefunname)
3588 /* GNU DEFUN and similar macros */
3590 bool defun = (newlb.buffer[tokoff] == 'F');
3591 int off = tokoff;
3592 int len = toklen;
3594 if (defun)
3596 off += 1;
3597 len -= 1;
3599 /* First, tag it as its C name */
3600 linebuffer_setlen (&token_name, toklen);
3601 memcpy (token_name.buffer,
3602 newlb.buffer + tokoff, toklen);
3603 token_name.buffer[toklen] = '\0';
3604 token.named = true;
3605 token.lineno = lineno;
3606 token.offset = tokoff;
3607 token.length = toklen;
3608 token.line = newlb.buffer;
3609 token.linepos = newlinepos;
3610 token.valid = true;
3611 make_C_tag (funorvar);
3613 /* Rewrite the tag so that emacs lisp DEFUNs
3614 can be found also by their elisp name */
3615 linebuffer_setlen (&token_name, len);
3616 memcpy (token_name.buffer,
3617 newlb.buffer + off, len);
3618 token_name.buffer[len] = '\0';
3619 if (defun)
3620 while (--len >= 0)
3621 if (token_name.buffer[len] == '_')
3622 token_name.buffer[len] = '-';
3623 token.named = defun;
3625 else
3627 linebuffer_setlen (&token_name, toklen);
3628 memcpy (token_name.buffer,
3629 newlb.buffer + tokoff, toklen);
3630 token_name.buffer[toklen] = '\0';
3631 /* Name macros and members. */
3632 token.named = (structdef == stagseen
3633 || typdef == ttypeseen
3634 || typdef == tend
3635 || (funorvar
3636 && definedef == dignorerest)
3637 || (funorvar
3638 && definedef == dnone
3639 && structdef == snone
3640 && bracelev > 0));
3642 token.lineno = lineno;
3643 token.offset = tokoff;
3644 token.length = toklen;
3645 token.line = newlb.buffer;
3646 token.linepos = newlinepos;
3647 token.valid = true;
3649 if (definedef == dnone
3650 && (fvdef == fvnameseen
3651 || fvdef == foperator
3652 || structdef == stagseen
3653 || typdef == tend
3654 || typdef == ttypeseen
3655 || objdef != onone))
3657 if (current_lb_is_new)
3658 switch_line_buffers ();
3660 else if (definedef != dnone
3661 || fvdef == fdefunname
3662 || instruct)
3663 make_C_tag (funorvar);
3665 else /* not yacc and consider_token failed */
3667 if (inattribute && fvdef == fignore)
3669 /* We have just met __attribute__ after a
3670 function parameter list: do not tag the
3671 function again. */
3672 fvdef = fvnone;
3675 midtoken = false;
3677 } /* if (endtoken (c)) */
3678 else if (intoken (c))
3679 still_in_token:
3681 toklen++;
3682 continue;
3684 } /* if (midtoken) */
3685 else if (begtoken (c))
3687 switch (definedef)
3689 case dnone:
3690 switch (fvdef)
3692 case fstartlist:
3693 /* This prevents tagging fb in
3694 void (__attribute__((noreturn)) *fb) (void);
3695 Fixing this is not easy and not very important. */
3696 fvdef = finlist;
3697 continue;
3698 case flistseen:
3699 if (plainc || declarations)
3701 make_C_tag (true); /* a function */
3702 fvdef = fignore;
3704 break;
3705 default:
3706 break;
3708 if (structdef == stagseen && !cjava)
3710 popclass_above (bracelev);
3711 structdef = snone;
3713 break;
3714 case dsharpseen:
3715 savetoken = token;
3716 break;
3717 default:
3718 break;
3720 if (!yacc_rules || lp == newlb.buffer + 1)
3722 tokoff = lp - 1 - newlb.buffer;
3723 toklen = 1;
3724 midtoken = true;
3726 continue;
3727 } /* if (begtoken) */
3728 } /* if must look at token */
3731 /* Detect end of line, colon, comma, semicolon and various braces
3732 after having handled a token.*/
3733 switch (c)
3735 case ':':
3736 if (inattribute)
3737 break;
3738 if (yacc_rules && token.offset == 0 && token.valid)
3740 make_C_tag (false); /* a yacc function */
3741 break;
3743 if (definedef != dnone)
3744 break;
3745 switch (objdef)
3747 case otagseen:
3748 objdef = oignore;
3749 make_C_tag (true); /* an Objective C class */
3750 break;
3751 case omethodtag:
3752 case omethodparm:
3753 objdef = omethodcolon;
3754 if (class_qualify)
3756 int toklen = token_name.len;
3757 linebuffer_setlen (&token_name, toklen + 1);
3758 strcpy (token_name.buffer + toklen, ":");
3760 break;
3761 default:
3762 break;
3764 if (structdef == stagseen)
3766 structdef = scolonseen;
3767 break;
3769 /* Should be useless, but may be work as a safety net. */
3770 if (cplpl && fvdef == flistseen)
3772 make_C_tag (true); /* a function */
3773 fvdef = fignore;
3774 break;
3776 break;
3777 case ';':
3778 if (definedef != dnone || inattribute)
3779 break;
3780 switch (typdef)
3782 case tend:
3783 case ttypeseen:
3784 make_C_tag (false); /* a typedef */
3785 typdef = tnone;
3786 fvdef = fvnone;
3787 break;
3788 case tnone:
3789 case tinbody:
3790 case tignore:
3791 switch (fvdef)
3793 case fignore:
3794 if (typdef == tignore || cplpl)
3795 fvdef = fvnone;
3796 break;
3797 case fvnameseen:
3798 if ((globals && bracelev == 0 && (!fvextern || declarations))
3799 || (members && instruct))
3800 make_C_tag (false); /* a variable */
3801 fvextern = false;
3802 fvdef = fvnone;
3803 token.valid = false;
3804 break;
3805 case flistseen:
3806 if ((declarations
3807 && (cplpl || !instruct)
3808 && (typdef == tnone || (typdef != tignore && instruct)))
3809 || (members
3810 && plainc && instruct))
3811 make_C_tag (true); /* a function */
3812 /* FALLTHRU */
3813 default:
3814 fvextern = false;
3815 fvdef = fvnone;
3816 if (declarations
3817 && cplpl && structdef == stagseen)
3818 make_C_tag (false); /* forward declaration */
3819 else
3820 token.valid = false;
3821 } /* switch (fvdef) */
3822 /* FALLTHRU */
3823 default:
3824 if (!instruct)
3825 typdef = tnone;
3827 if (structdef == stagseen)
3828 structdef = snone;
3829 break;
3830 case ',':
3831 if (definedef != dnone || inattribute)
3832 break;
3833 switch (objdef)
3835 case omethodtag:
3836 case omethodparm:
3837 make_C_tag (true); /* an Objective C method */
3838 objdef = oinbody;
3839 break;
3840 default:
3841 break;
3843 switch (fvdef)
3845 case fdefunkey:
3846 case foperator:
3847 case fstartlist:
3848 case finlist:
3849 case fignore:
3850 break;
3851 case vignore:
3852 if (instruct && parlev == 0)
3853 fvdef = fvnone;
3854 break;
3855 case fdefunname:
3856 fvdef = fignore;
3857 break;
3858 case fvnameseen:
3859 if (parlev == 0
3860 && ((globals
3861 && bracelev == 0
3862 && templatelev == 0
3863 && (!fvextern || declarations))
3864 || (members && instruct)))
3865 make_C_tag (false); /* a variable */
3866 break;
3867 case flistseen:
3868 if ((declarations && typdef == tnone && !instruct)
3869 || (members && typdef != tignore && instruct))
3871 make_C_tag (true); /* a function */
3872 fvdef = fvnameseen;
3874 else if (!declarations)
3875 fvdef = fvnone;
3876 token.valid = false;
3877 break;
3878 default:
3879 fvdef = fvnone;
3881 if (structdef == stagseen)
3882 structdef = snone;
3883 break;
3884 case ']':
3885 if (definedef != dnone || inattribute)
3886 break;
3887 if (structdef == stagseen)
3888 structdef = snone;
3889 switch (typdef)
3891 case ttypeseen:
3892 case tend:
3893 typdef = tignore;
3894 make_C_tag (false); /* a typedef */
3895 break;
3896 case tnone:
3897 case tinbody:
3898 switch (fvdef)
3900 case foperator:
3901 case finlist:
3902 case fignore:
3903 case vignore:
3904 break;
3905 case fvnameseen:
3906 if ((members && bracelev == 1)
3907 || (globals && bracelev == 0
3908 && (!fvextern || declarations)))
3909 make_C_tag (false); /* a variable */
3910 /* FALLTHRU */
3911 default:
3912 fvdef = fvnone;
3914 break;
3915 default:
3916 break;
3918 break;
3919 case '(':
3920 if (inattribute)
3922 attrparlev++;
3923 break;
3925 if (definedef != dnone)
3926 break;
3927 if (objdef == otagseen && parlev == 0)
3928 objdef = oparenseen;
3929 switch (fvdef)
3931 case fvnameseen:
3932 if (typdef == ttypeseen
3933 && *lp != '*'
3934 && !instruct)
3936 /* This handles constructs like:
3937 typedef void OperatorFun (int fun); */
3938 make_C_tag (false);
3939 typdef = tignore;
3940 fvdef = fignore;
3941 break;
3943 /* FALLTHRU */
3944 case foperator:
3945 fvdef = fstartlist;
3946 break;
3947 case flistseen:
3948 fvdef = finlist;
3949 break;
3950 default:
3951 break;
3953 parlev++;
3954 break;
3955 case ')':
3956 if (inattribute)
3958 if (--attrparlev == 0)
3959 inattribute = false;
3960 break;
3962 if (definedef != dnone)
3963 break;
3964 if (objdef == ocatseen && parlev == 1)
3966 make_C_tag (true); /* an Objective C category */
3967 objdef = oignore;
3969 if (--parlev == 0)
3971 switch (fvdef)
3973 case fstartlist:
3974 case finlist:
3975 fvdef = flistseen;
3976 break;
3977 default:
3978 break;
3980 if (!instruct
3981 && (typdef == tend
3982 || typdef == ttypeseen))
3984 typdef = tignore;
3985 make_C_tag (false); /* a typedef */
3988 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3989 parlev = 0;
3990 break;
3991 case '{':
3992 if (definedef != dnone)
3993 break;
3994 if (typdef == ttypeseen)
3996 /* Whenever typdef is set to tinbody (currently only
3997 here), typdefbracelev should be set to bracelev. */
3998 typdef = tinbody;
3999 typdefbracelev = bracelev;
4001 switch (fvdef)
4003 case flistseen:
4004 if (cplpl && !class_qualify)
4006 /* Remove class and namespace qualifiers from the token,
4007 leaving only the method/member name. */
4008 char *cc, *uqname = token_name.buffer;
4009 char *tok_end = token_name.buffer + token_name.len;
4011 for (cc = token_name.buffer; cc < tok_end; cc++)
4013 if (*cc == ':' && cc[1] == ':')
4015 uqname = cc + 2;
4016 cc++;
4019 if (uqname > token_name.buffer)
4021 int uqlen = strlen (uqname);
4022 linebuffer_setlen (&token_name, uqlen);
4023 memmove (token_name.buffer, uqname, uqlen + 1);
4026 make_C_tag (true); /* a function */
4027 /* FALLTHRU */
4028 case fignore:
4029 fvdef = fvnone;
4030 break;
4031 case fvnone:
4032 switch (objdef)
4034 case otagseen:
4035 make_C_tag (true); /* an Objective C class */
4036 objdef = oignore;
4037 break;
4038 case omethodtag:
4039 case omethodparm:
4040 make_C_tag (true); /* an Objective C method */
4041 objdef = oinbody;
4042 break;
4043 default:
4044 /* Neutralize `extern "C" {' grot. */
4045 if (bracelev == 0 && structdef == snone && nestlev == 0
4046 && typdef == tnone)
4047 bracelev = -1;
4049 break;
4050 default:
4051 break;
4053 switch (structdef)
4055 case skeyseen: /* unnamed struct */
4056 pushclass_above (bracelev, NULL, 0);
4057 structdef = snone;
4058 break;
4059 case stagseen: /* named struct or enum */
4060 case scolonseen: /* a class */
4061 pushclass_above (bracelev,token.line+token.offset, token.length);
4062 structdef = snone;
4063 make_C_tag (false); /* a struct or enum */
4064 break;
4065 default:
4066 break;
4068 bracelev += 1;
4069 break;
4070 case '*':
4071 if (definedef != dnone)
4072 break;
4073 if (fvdef == fstartlist)
4075 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
4076 token.valid = false;
4078 break;
4079 case '}':
4080 if (definedef != dnone)
4081 break;
4082 bracelev -= 1;
4083 if (!ignoreindent && lp == newlb.buffer + 1)
4085 if (bracelev != 0)
4086 token.valid = false; /* unexpected value, token unreliable */
4087 bracelev = 0; /* reset brace level if first column */
4088 parlev = 0; /* also reset paren level, just in case... */
4090 else if (bracelev < 0)
4092 token.valid = false; /* something gone amiss, token unreliable */
4093 bracelev = 0;
4095 if (bracelev == 0 && fvdef == vignore)
4096 fvdef = fvnone; /* end of function */
4097 popclass_above (bracelev);
4098 structdef = snone;
4099 /* Only if typdef == tinbody is typdefbracelev significant. */
4100 if (typdef == tinbody && bracelev <= typdefbracelev)
4102 assert (bracelev == typdefbracelev);
4103 typdef = tend;
4105 break;
4106 case '=':
4107 if (definedef != dnone)
4108 break;
4109 switch (fvdef)
4111 case foperator:
4112 case finlist:
4113 case fignore:
4114 case vignore:
4115 break;
4116 case fvnameseen:
4117 if ((members && bracelev == 1)
4118 || (globals && bracelev == 0 && (!fvextern || declarations)))
4119 make_C_tag (false); /* a variable */
4120 /* FALLTHRU */
4121 default:
4122 fvdef = vignore;
4124 break;
4125 case '<':
4126 if (cplpl
4127 && (structdef == stagseen || fvdef == fvnameseen))
4129 templatelev++;
4130 break;
4132 goto resetfvdef;
4133 case '>':
4134 if (templatelev > 0)
4136 templatelev--;
4137 break;
4139 goto resetfvdef;
4140 case '+':
4141 case '-':
4142 if (objdef == oinbody && bracelev == 0)
4144 objdef = omethodsign;
4145 break;
4147 /* FALLTHRU */
4148 resetfvdef:
4149 case '#': case '~': case '&': case '%': case '/':
4150 case '|': case '^': case '!': case '.': case '?':
4151 if (definedef != dnone)
4152 break;
4153 /* These surely cannot follow a function tag in C. */
4154 switch (fvdef)
4156 case foperator:
4157 case finlist:
4158 case fignore:
4159 case vignore:
4160 break;
4161 default:
4162 fvdef = fvnone;
4164 break;
4165 case '\0':
4166 if (objdef == otagseen)
4168 make_C_tag (true); /* an Objective C class */
4169 objdef = oignore;
4171 /* If a macro spans multiple lines don't reset its state. */
4172 if (quotednl)
4173 CNL_SAVE_DEFINEDEF ();
4174 else
4175 CNL ();
4176 break;
4177 } /* switch (c) */
4179 } /* while not eof */
4181 free (lbs[0].lb.buffer);
4182 free (lbs[1].lb.buffer);
4186 * Process either a C++ file or a C file depending on the setting
4187 * of a global flag.
4189 static void
4190 default_C_entries (FILE *inf)
4192 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4195 /* Always do plain C. */
4196 static void
4197 plain_C_entries (FILE *inf)
4199 C_entries (0, inf);
4202 /* Always do C++. */
4203 static void
4204 Cplusplus_entries (FILE *inf)
4206 C_entries (C_PLPL, inf);
4209 /* Always do Java. */
4210 static void
4211 Cjava_entries (FILE *inf)
4213 C_entries (C_JAVA, inf);
4216 /* Always do C*. */
4217 static void
4218 Cstar_entries (FILE *inf)
4220 C_entries (C_STAR, inf);
4223 /* Always do Yacc. */
4224 static void
4225 Yacc_entries (FILE *inf)
4227 C_entries (YACC, inf);
4231 /* Useful macros. */
4232 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4233 while (perhaps_more_input (file_pointer) \
4234 && (readline (&(line_buffer), file_pointer), \
4235 (char_pointer) = (line_buffer).buffer, \
4236 true)) \
4238 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4239 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4240 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4241 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
4242 && ((cp) = skip_spaces ((cp) + sizeof (kw) - 1), true)) /* skip spaces */
4244 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4245 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4246 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4247 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4248 && ((cp) += sizeof (kw) - 1, true)) /* skip spaces */
4251 * Read a file, but do no processing. This is used to do regexp
4252 * matching on files that have no language defined.
4254 static void
4255 just_read_file (FILE *inf)
4257 while (perhaps_more_input (inf))
4258 readline (&lb, inf);
4262 /* Fortran parsing */
4264 static void F_takeprec (void);
4265 static void F_getit (FILE *);
4267 static void
4268 F_takeprec (void)
4270 dbp = skip_spaces (dbp);
4271 if (*dbp != '*')
4272 return;
4273 dbp++;
4274 dbp = skip_spaces (dbp);
4275 if (strneq (dbp, "(*)", 3))
4277 dbp += 3;
4278 return;
4280 if (!c_isdigit (*dbp))
4282 --dbp; /* force failure */
4283 return;
4286 dbp++;
4287 while (c_isdigit (*dbp));
4290 static void
4291 F_getit (FILE *inf)
4293 register char *cp;
4295 dbp = skip_spaces (dbp);
4296 if (*dbp == '\0')
4298 readline (&lb, inf);
4299 dbp = lb.buffer;
4300 if (dbp[5] != '&')
4301 return;
4302 dbp += 6;
4303 dbp = skip_spaces (dbp);
4305 if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4306 return;
4307 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4308 continue;
4309 make_tag (dbp, cp-dbp, true,
4310 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4314 static void
4315 Fortran_functions (FILE *inf)
4317 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4319 if (*dbp == '%')
4320 dbp++; /* Ratfor escape to fortran */
4321 dbp = skip_spaces (dbp);
4322 if (*dbp == '\0')
4323 continue;
4325 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4326 dbp = skip_spaces (dbp);
4328 if (LOOKING_AT_NOCASE (dbp, "pure"))
4329 dbp = skip_spaces (dbp);
4331 if (LOOKING_AT_NOCASE (dbp, "elemental"))
4332 dbp = skip_spaces (dbp);
4334 switch (c_tolower (*dbp))
4336 case 'i':
4337 if (nocase_tail ("integer"))
4338 F_takeprec ();
4339 break;
4340 case 'r':
4341 if (nocase_tail ("real"))
4342 F_takeprec ();
4343 break;
4344 case 'l':
4345 if (nocase_tail ("logical"))
4346 F_takeprec ();
4347 break;
4348 case 'c':
4349 if (nocase_tail ("complex") || nocase_tail ("character"))
4350 F_takeprec ();
4351 break;
4352 case 'd':
4353 if (nocase_tail ("double"))
4355 dbp = skip_spaces (dbp);
4356 if (*dbp == '\0')
4357 continue;
4358 if (nocase_tail ("precision"))
4359 break;
4360 continue;
4362 break;
4364 dbp = skip_spaces (dbp);
4365 if (*dbp == '\0')
4366 continue;
4367 switch (c_tolower (*dbp))
4369 case 'f':
4370 if (nocase_tail ("function"))
4371 F_getit (inf);
4372 continue;
4373 case 's':
4374 if (nocase_tail ("subroutine"))
4375 F_getit (inf);
4376 continue;
4377 case 'e':
4378 if (nocase_tail ("entry"))
4379 F_getit (inf);
4380 continue;
4381 case 'b':
4382 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4384 dbp = skip_spaces (dbp);
4385 if (*dbp == '\0') /* assume un-named */
4386 make_tag ("blockdata", 9, true,
4387 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4388 else
4389 F_getit (inf); /* look for name */
4391 continue;
4398 * Go language support
4399 * Original code by Xi Lu <lx@shellcodes.org> (2016)
4401 static void
4402 Go_functions(FILE *inf)
4404 char *cp, *name;
4406 LOOP_ON_INPUT_LINES(inf, lb, cp)
4408 cp = skip_spaces (cp);
4410 if (LOOKING_AT (cp, "package"))
4412 name = cp;
4413 while (!notinname (*cp) && *cp != '\0')
4414 cp++;
4415 make_tag (name, cp - name, false, lb.buffer,
4416 cp - lb.buffer + 1, lineno, linecharno);
4418 else if (LOOKING_AT (cp, "func"))
4420 /* Go implementation of interface, such as:
4421 func (n *Integer) Add(m Integer) ...
4422 skip `(n *Integer)` part.
4424 if (*cp == '(')
4426 while (*cp != ')')
4427 cp++;
4428 cp = skip_spaces (cp+1);
4431 if (*cp)
4433 name = cp;
4435 while (!notinname (*cp))
4436 cp++;
4438 make_tag (name, cp - name, true, lb.buffer,
4439 cp - lb.buffer + 1, lineno, linecharno);
4442 else if (members && LOOKING_AT (cp, "type"))
4444 name = cp;
4446 /* Ignore the likes of the following:
4447 type (
4451 if (*cp == '(')
4452 return;
4454 while (!notinname (*cp) && *cp != '\0')
4455 cp++;
4457 make_tag (name, cp - name, false, lb.buffer,
4458 cp - lb.buffer + 1, lineno, linecharno);
4465 * Ada parsing
4466 * Original code by
4467 * Philippe Waroquiers (1998)
4470 /* Once we are positioned after an "interesting" keyword, let's get
4471 the real tag value necessary. */
4472 static void
4473 Ada_getit (FILE *inf, const char *name_qualifier)
4475 register char *cp;
4476 char *name;
4477 char c;
4479 while (perhaps_more_input (inf))
4481 dbp = skip_spaces (dbp);
4482 if (*dbp == '\0'
4483 || (dbp[0] == '-' && dbp[1] == '-'))
4485 readline (&lb, inf);
4486 dbp = lb.buffer;
4488 switch (c_tolower (*dbp))
4490 case 'b':
4491 if (nocase_tail ("body"))
4493 /* Skipping body of procedure body or package body or ....
4494 resetting qualifier to body instead of spec. */
4495 name_qualifier = "/b";
4496 continue;
4498 break;
4499 case 't':
4500 /* Skipping type of task type or protected type ... */
4501 if (nocase_tail ("type"))
4502 continue;
4503 break;
4505 if (*dbp == '"')
4507 dbp += 1;
4508 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4509 continue;
4511 else
4513 dbp = skip_spaces (dbp);
4514 for (cp = dbp;
4515 c_isalnum (*cp) || *cp == '_' || *cp == '.';
4516 cp++)
4517 continue;
4518 if (cp == dbp)
4519 return;
4521 c = *cp;
4522 *cp = '\0';
4523 name = concat (dbp, name_qualifier, "");
4524 *cp = c;
4525 make_tag (name, strlen (name), true,
4526 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4527 free (name);
4528 if (c == '"')
4529 dbp = cp + 1;
4530 return;
4534 static void
4535 Ada_funcs (FILE *inf)
4537 bool inquote = false;
4538 bool skip_till_semicolumn = false;
4540 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4542 while (*dbp != '\0')
4544 /* Skip a string i.e. "abcd". */
4545 if (inquote || (*dbp == '"'))
4547 dbp = strchr (dbp + !inquote, '"');
4548 if (dbp != NULL)
4550 inquote = false;
4551 dbp += 1;
4552 continue; /* advance char */
4554 else
4556 inquote = true;
4557 break; /* advance line */
4561 /* Skip comments. */
4562 if (dbp[0] == '-' && dbp[1] == '-')
4563 break; /* advance line */
4565 /* Skip character enclosed in single quote i.e. 'a'
4566 and skip single quote starting an attribute i.e. 'Image. */
4567 if (*dbp == '\'')
4569 dbp++ ;
4570 if (*dbp != '\0')
4571 dbp++;
4572 continue;
4575 if (skip_till_semicolumn)
4577 if (*dbp == ';')
4578 skip_till_semicolumn = false;
4579 dbp++;
4580 continue; /* advance char */
4583 /* Search for beginning of a token. */
4584 if (!begtoken (*dbp))
4586 dbp++;
4587 continue; /* advance char */
4590 /* We are at the beginning of a token. */
4591 switch (c_tolower (*dbp))
4593 case 'f':
4594 if (!packages_only && nocase_tail ("function"))
4595 Ada_getit (inf, "/f");
4596 else
4597 break; /* from switch */
4598 continue; /* advance char */
4599 case 'p':
4600 if (!packages_only && nocase_tail ("procedure"))
4601 Ada_getit (inf, "/p");
4602 else if (nocase_tail ("package"))
4603 Ada_getit (inf, "/s");
4604 else if (nocase_tail ("protected")) /* protected type */
4605 Ada_getit (inf, "/t");
4606 else
4607 break; /* from switch */
4608 continue; /* advance char */
4610 case 'u':
4611 if (typedefs && !packages_only && nocase_tail ("use"))
4613 /* when tagging types, avoid tagging use type Pack.Typename;
4614 for this, we will skip everything till a ; */
4615 skip_till_semicolumn = true;
4616 continue; /* advance char */
4619 case 't':
4620 if (!packages_only && nocase_tail ("task"))
4621 Ada_getit (inf, "/k");
4622 else if (typedefs && !packages_only && nocase_tail ("type"))
4624 Ada_getit (inf, "/t");
4625 while (*dbp != '\0')
4626 dbp += 1;
4628 else
4629 break; /* from switch */
4630 continue; /* advance char */
4633 /* Look for the end of the token. */
4634 while (!endtoken (*dbp))
4635 dbp++;
4637 } /* advance char */
4638 } /* advance line */
4643 * Unix and microcontroller assembly tag handling
4644 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4645 * Idea by Bob Weiner, Motorola Inc. (1994)
4647 static void
4648 Asm_labels (FILE *inf)
4650 register char *cp;
4652 LOOP_ON_INPUT_LINES (inf, lb, cp)
4654 /* If first char is alphabetic or one of [_.$], test for colon
4655 following identifier. */
4656 if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4658 /* Read past label. */
4659 cp++;
4660 while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4661 cp++;
4662 if (*cp == ':' || c_isspace (*cp))
4663 /* Found end of label, so copy it and add it to the table. */
4664 make_tag (lb.buffer, cp - lb.buffer, true,
4665 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4672 * Perl support
4673 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4674 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4675 * Perl variable names: /^(my|local).../
4676 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4677 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4678 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4680 static void
4681 Perl_functions (FILE *inf)
4683 char *package = savestr ("main"); /* current package name */
4684 register char *cp;
4686 LOOP_ON_INPUT_LINES (inf, lb, cp)
4688 cp = skip_spaces (cp);
4690 if (LOOKING_AT (cp, "package"))
4692 free (package);
4693 get_tag (cp, &package);
4695 else if (LOOKING_AT (cp, "sub"))
4697 char *pos, *sp;
4699 subr:
4700 sp = cp;
4701 while (!notinname (*cp))
4702 cp++;
4703 if (cp == sp)
4704 continue; /* nothing found */
4705 pos = strchr (sp, ':');
4706 if (pos && pos < cp && pos[1] == ':')
4708 /* The name is already qualified. */
4709 if (!class_qualify)
4711 char *q = pos + 2, *qpos;
4712 while ((qpos = strchr (q, ':')) != NULL
4713 && qpos < cp
4714 && qpos[1] == ':')
4715 q = qpos + 2;
4716 sp = q;
4718 make_tag (sp, cp - sp, true,
4719 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4721 else if (class_qualify)
4722 /* Qualify it. */
4724 char savechar, *name;
4726 savechar = *cp;
4727 *cp = '\0';
4728 name = concat (package, "::", sp);
4729 *cp = savechar;
4730 make_tag (name, strlen (name), true,
4731 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4732 free (name);
4734 else
4735 make_tag (sp, cp - sp, true,
4736 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4738 else if (LOOKING_AT (cp, "use constant")
4739 || LOOKING_AT (cp, "use constant::defer"))
4741 /* For hash style multi-constant like
4742 use constant { FOO => 123,
4743 BAR => 456 };
4744 only the first FOO is picked up. Parsing across the value
4745 expressions would be difficult in general, due to possible nested
4746 hashes, here-documents, etc. */
4747 if (*cp == '{')
4748 cp = skip_spaces (cp+1);
4749 goto subr;
4751 else if (globals) /* only if we are tagging global vars */
4753 /* Skip a qualifier, if any. */
4754 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4755 /* After "my" or "local", but before any following paren or space. */
4756 char *varstart = cp;
4758 if (qual /* should this be removed? If yes, how? */
4759 && (*cp == '$' || *cp == '@' || *cp == '%'))
4761 varstart += 1;
4763 cp++;
4764 while (c_isalnum (*cp) || *cp == '_');
4766 else if (qual)
4768 /* Should be examining a variable list at this point;
4769 could insist on seeing an open parenthesis. */
4770 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4771 cp++;
4773 else
4774 continue;
4776 make_tag (varstart, cp - varstart, false,
4777 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4780 free (package);
4785 * Python support
4786 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4787 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4788 * More ideas by seb bacon <seb@jamkit.com> (2002)
4790 static void
4791 Python_functions (FILE *inf)
4793 register char *cp;
4795 LOOP_ON_INPUT_LINES (inf, lb, cp)
4797 cp = skip_spaces (cp);
4798 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4800 char *name = cp;
4801 while (!notinname (*cp) && *cp != ':')
4802 cp++;
4803 make_tag (name, cp - name, true,
4804 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4810 * Ruby support
4811 * Original code by Xi Lu <lx@shellcodes.org> (2015)
4813 static void
4814 Ruby_functions (FILE *inf)
4816 char *cp = NULL;
4817 bool reader = false, writer = false, alias = false, continuation = false;
4819 LOOP_ON_INPUT_LINES (inf, lb, cp)
4821 bool is_class = false;
4822 bool is_method = false;
4823 char *name;
4825 cp = skip_spaces (cp);
4826 if (!continuation
4827 /* Constants. */
4828 && c_isalpha (*cp) && c_isupper (*cp))
4830 char *bp, *colon = NULL;
4832 name = cp;
4834 for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
4836 if (*cp == ':')
4837 colon = cp;
4839 if (cp > name + 1)
4841 bp = skip_spaces (cp);
4842 if (*bp == '=' && !(bp[1] == '=' || bp[1] == '>'))
4844 if (colon && !c_isspace (colon[1]))
4845 name = colon + 1;
4846 make_tag (name, cp - name, false,
4847 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4851 else if (!continuation
4852 /* Modules, classes, methods. */
4853 && ((is_method = LOOKING_AT (cp, "def"))
4854 || (is_class = LOOKING_AT (cp, "class"))
4855 || LOOKING_AT (cp, "module")))
4857 const char self_name[] = "self.";
4858 const size_t self_size1 = sizeof (self_name) - 1;
4860 name = cp;
4862 /* Ruby method names can end in a '='. Also, operator overloading can
4863 define operators whose names include '='. */
4864 while (!notinname (*cp) || *cp == '=')
4865 cp++;
4867 /* Remove "self." from the method name. */
4868 if (cp - name > self_size1
4869 && strneq (name, self_name, self_size1))
4870 name += self_size1;
4872 /* Remove the class/module qualifiers from method names. */
4873 if (is_method)
4875 char *q;
4877 for (q = name; q < cp && *q != '.'; q++)
4879 if (q < cp - 1) /* punt if we see just "FOO." */
4880 name = q + 1;
4883 /* Don't tag singleton classes. */
4884 if (is_class && strneq (name, "<<", 2) && cp == name + 2)
4885 continue;
4887 make_tag (name, cp - name, true,
4888 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4890 else
4892 /* Tag accessors and aliases. */
4894 if (!continuation)
4895 reader = writer = alias = false;
4897 while (*cp && *cp != '#')
4899 if (!continuation)
4901 reader = writer = alias = false;
4902 if (LOOKING_AT (cp, "attr_reader"))
4903 reader = true;
4904 else if (LOOKING_AT (cp, "attr_writer"))
4905 writer = true;
4906 else if (LOOKING_AT (cp, "attr_accessor"))
4908 reader = true;
4909 writer = true;
4911 else if (LOOKING_AT (cp, "alias_method"))
4912 alias = true;
4914 if (reader || writer || alias)
4916 do {
4917 char *np;
4919 cp = skip_spaces (cp);
4920 if (*cp == '(')
4921 cp = skip_spaces (cp + 1);
4922 np = cp;
4923 cp = skip_name (cp);
4924 if (*np != ':')
4925 continue;
4926 np++;
4927 if (reader)
4929 make_tag (np, cp - np, true,
4930 lb.buffer, cp - lb.buffer + 1,
4931 lineno, linecharno);
4932 continuation = false;
4934 if (writer)
4936 size_t name_len = cp - np + 1;
4937 char *wr_name = xnew (name_len + 1, char);
4939 memcpy (wr_name, np, name_len - 1);
4940 memcpy (wr_name + name_len - 1, "=", 2);
4941 pfnote (wr_name, true, lb.buffer, cp - lb.buffer + 1,
4942 lineno, linecharno);
4943 continuation = false;
4945 if (alias)
4947 if (!continuation)
4948 make_tag (np, cp - np, true,
4949 lb.buffer, cp - lb.buffer + 1,
4950 lineno, linecharno);
4951 continuation = false;
4952 while (*cp && *cp != '#' && *cp != ';')
4954 if (*cp == ',')
4955 continuation = true;
4956 else if (!c_isspace (*cp))
4957 continuation = false;
4958 cp++;
4960 if (*cp == ';')
4961 continuation = false;
4963 cp = skip_spaces (cp);
4964 } while ((alias
4965 ? (*cp == ',')
4966 : (continuation = (*cp == ',')))
4967 && (cp = skip_spaces (cp + 1), *cp && *cp != '#'));
4969 if (*cp != '#')
4970 cp = skip_name (cp);
4971 while (*cp && *cp != '#' && notinname (*cp))
4972 cp++;
4980 * PHP support
4981 * Look for:
4982 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4983 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4984 * - /^[ \t]*define\(\"[^\"]+/
4985 * Only with --members:
4986 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4987 * Idea by Diez B. Roggisch (2001)
4989 static void
4990 PHP_functions (FILE *inf)
4992 char *cp, *name;
4993 bool search_identifier = false;
4995 LOOP_ON_INPUT_LINES (inf, lb, cp)
4997 cp = skip_spaces (cp);
4998 name = cp;
4999 if (search_identifier
5000 && *cp != '\0')
5002 while (!notinname (*cp))
5003 cp++;
5004 make_tag (name, cp - name, true,
5005 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5006 search_identifier = false;
5008 else if (LOOKING_AT (cp, "function"))
5010 if (*cp == '&')
5011 cp = skip_spaces (cp+1);
5012 if (*cp != '\0')
5014 name = cp;
5015 while (!notinname (*cp))
5016 cp++;
5017 make_tag (name, cp - name, true,
5018 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5020 else
5021 search_identifier = true;
5023 else if (LOOKING_AT (cp, "class"))
5025 if (*cp != '\0')
5027 name = cp;
5028 while (*cp != '\0' && !c_isspace (*cp))
5029 cp++;
5030 make_tag (name, cp - name, false,
5031 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5033 else
5034 search_identifier = true;
5036 else if (strneq (cp, "define", 6)
5037 && (cp = skip_spaces (cp+6))
5038 && *cp++ == '('
5039 && (*cp == '"' || *cp == '\''))
5041 char quote = *cp++;
5042 name = cp;
5043 while (*cp != quote && *cp != '\0')
5044 cp++;
5045 make_tag (name, cp - name, false,
5046 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5048 else if (members
5049 && LOOKING_AT (cp, "var")
5050 && *cp == '$')
5052 name = cp;
5053 while (!notinname (*cp))
5054 cp++;
5055 make_tag (name, cp - name, false,
5056 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5063 * Cobol tag functions
5064 * We could look for anything that could be a paragraph name.
5065 * i.e. anything that starts in column 8 is one word and ends in a full stop.
5066 * Idea by Corny de Souza (1993)
5068 static void
5069 Cobol_paragraphs (FILE *inf)
5071 register char *bp, *ep;
5073 LOOP_ON_INPUT_LINES (inf, lb, bp)
5075 if (lb.len < 9)
5076 continue;
5077 bp += 8;
5079 /* If eoln, compiler option or comment ignore whole line. */
5080 if (bp[-1] != ' ' || !c_isalnum (bp[0]))
5081 continue;
5083 for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
5084 continue;
5085 if (*ep++ == '.')
5086 make_tag (bp, ep - bp, true,
5087 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5093 * Makefile support
5094 * Ideas by Assar Westerlund <assar@sics.se> (2001)
5096 static void
5097 Makefile_targets (FILE *inf)
5099 register char *bp;
5101 LOOP_ON_INPUT_LINES (inf, lb, bp)
5103 if (*bp == '\t' || *bp == '#')
5104 continue;
5105 while (*bp != '\0' && *bp != '=' && *bp != ':')
5106 bp++;
5107 if (*bp == ':' || (globals && *bp == '='))
5109 /* We should detect if there is more than one tag, but we do not.
5110 We just skip initial and final spaces. */
5111 char * namestart = skip_spaces (lb.buffer);
5112 while (--bp > namestart)
5113 if (!notinname (*bp))
5114 break;
5115 make_tag (namestart, bp - namestart + 1, true,
5116 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
5123 * Pascal parsing
5124 * Original code by Mosur K. Mohan (1989)
5126 * Locates tags for procedures & functions. Doesn't do any type- or
5127 * var-definitions. It does look for the keyword "extern" or
5128 * "forward" immediately following the procedure statement; if found,
5129 * the tag is skipped.
5131 static void
5132 Pascal_functions (FILE *inf)
5134 linebuffer tline; /* mostly copied from C_entries */
5135 long save_lcno;
5136 int save_lineno, namelen, taglen;
5137 char c, *name;
5139 bool /* each of these flags is true if: */
5140 incomment, /* point is inside a comment */
5141 inquote, /* point is inside '..' string */
5142 get_tagname, /* point is after PROCEDURE/FUNCTION
5143 keyword, so next item = potential tag */
5144 found_tag, /* point is after a potential tag */
5145 inparms, /* point is within parameter-list */
5146 verify_tag; /* point has passed the parm-list, so the
5147 next token will determine whether this
5148 is a FORWARD/EXTERN to be ignored, or
5149 whether it is a real tag */
5151 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
5152 name = NULL; /* keep compiler quiet */
5153 dbp = lb.buffer;
5154 *dbp = '\0';
5155 linebuffer_init (&tline);
5157 incomment = inquote = false;
5158 found_tag = false; /* have a proc name; check if extern */
5159 get_tagname = false; /* found "procedure" keyword */
5160 inparms = false; /* found '(' after "proc" */
5161 verify_tag = false; /* check if "extern" is ahead */
5164 while (perhaps_more_input (inf)) /* long main loop to get next char */
5166 c = *dbp++;
5167 if (c == '\0') /* if end of line */
5169 readline (&lb, inf);
5170 dbp = lb.buffer;
5171 if (*dbp == '\0')
5172 continue;
5173 if (!((found_tag && verify_tag)
5174 || get_tagname))
5175 c = *dbp++; /* only if don't need *dbp pointing
5176 to the beginning of the name of
5177 the procedure or function */
5179 if (incomment)
5181 if (c == '}') /* within { } comments */
5182 incomment = false;
5183 else if (c == '*' && *dbp == ')') /* within (* *) comments */
5185 dbp++;
5186 incomment = false;
5188 continue;
5190 else if (inquote)
5192 if (c == '\'')
5193 inquote = false;
5194 continue;
5196 else
5197 switch (c)
5199 case '\'':
5200 inquote = true; /* found first quote */
5201 continue;
5202 case '{': /* found open { comment */
5203 incomment = true;
5204 continue;
5205 case '(':
5206 if (*dbp == '*') /* found open (* comment */
5208 incomment = true;
5209 dbp++;
5211 else if (found_tag) /* found '(' after tag, i.e., parm-list */
5212 inparms = true;
5213 continue;
5214 case ')': /* end of parms list */
5215 if (inparms)
5216 inparms = false;
5217 continue;
5218 case ';':
5219 if (found_tag && !inparms) /* end of proc or fn stmt */
5221 verify_tag = true;
5222 break;
5224 continue;
5226 if (found_tag && verify_tag && (*dbp != ' '))
5228 /* Check if this is an "extern" declaration. */
5229 if (*dbp == '\0')
5230 continue;
5231 if (c_tolower (*dbp) == 'e')
5233 if (nocase_tail ("extern")) /* superfluous, really! */
5235 found_tag = false;
5236 verify_tag = false;
5239 else if (c_tolower (*dbp) == 'f')
5241 if (nocase_tail ("forward")) /* check for forward reference */
5243 found_tag = false;
5244 verify_tag = false;
5247 if (found_tag && verify_tag) /* not external proc, so make tag */
5249 found_tag = false;
5250 verify_tag = false;
5251 make_tag (name, namelen, true,
5252 tline.buffer, taglen, save_lineno, save_lcno);
5253 continue;
5256 if (get_tagname) /* grab name of proc or fn */
5258 char *cp;
5260 if (*dbp == '\0')
5261 continue;
5263 /* Find block name. */
5264 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
5265 continue;
5267 /* Save all values for later tagging. */
5268 linebuffer_setlen (&tline, lb.len);
5269 strcpy (tline.buffer, lb.buffer);
5270 save_lineno = lineno;
5271 save_lcno = linecharno;
5272 name = tline.buffer + (dbp - lb.buffer);
5273 namelen = cp - dbp;
5274 taglen = cp - lb.buffer + 1;
5276 dbp = cp; /* set dbp to e-o-token */
5277 get_tagname = false;
5278 found_tag = true;
5279 continue;
5281 /* And proceed to check for "extern". */
5283 else if (!incomment && !inquote && !found_tag)
5285 /* Check for proc/fn keywords. */
5286 switch (c_tolower (c))
5288 case 'p':
5289 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5290 get_tagname = true;
5291 continue;
5292 case 'f':
5293 if (nocase_tail ("unction"))
5294 get_tagname = true;
5295 continue;
5298 } /* while not eof */
5300 free (tline.buffer);
5305 * Lisp tag functions
5306 * look for (def or (DEF, quote or QUOTE
5309 static void L_getit (void);
5311 static void
5312 L_getit (void)
5314 if (*dbp == '\'') /* Skip prefix quote */
5315 dbp++;
5316 else if (*dbp == '(')
5318 dbp++;
5319 /* Try to skip "(quote " */
5320 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
5321 /* Ok, then skip "(" before name in (defstruct (foo)) */
5322 dbp = skip_spaces (dbp);
5324 get_tag (dbp, NULL);
5327 static void
5328 Lisp_functions (FILE *inf)
5330 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5332 if (dbp[0] != '(')
5333 continue;
5335 /* "(defvar foo)" is a declaration rather than a definition. */
5336 if (! declarations)
5338 char *p = dbp + 1;
5339 if (LOOKING_AT (p, "defvar"))
5341 p = skip_name (p); /* past var name */
5342 p = skip_spaces (p);
5343 if (*p == ')')
5344 continue;
5348 if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
5349 dbp += 3;
5351 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5353 dbp = skip_non_spaces (dbp);
5354 dbp = skip_spaces (dbp);
5355 L_getit ();
5357 else
5359 /* Check for (foo::defmumble name-defined ... */
5361 dbp++;
5362 while (!notinname (*dbp) && *dbp != ':');
5363 if (*dbp == ':')
5366 dbp++;
5367 while (*dbp == ':');
5369 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5371 dbp = skip_non_spaces (dbp);
5372 dbp = skip_spaces (dbp);
5373 L_getit ();
5382 * Lua script language parsing
5383 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5385 * "function" and "local function" are tags if they start at column 1.
5387 static void
5388 Lua_functions (FILE *inf)
5390 register char *bp;
5392 LOOP_ON_INPUT_LINES (inf, lb, bp)
5394 bp = skip_spaces (bp);
5395 if (bp[0] != 'f' && bp[0] != 'l')
5396 continue;
5398 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5400 if (LOOKING_AT (bp, "function"))
5402 char *tag_name, *tp_dot, *tp_colon;
5404 get_tag (bp, &tag_name);
5405 /* If the tag ends with ".foo" or ":foo", make an additional tag for
5406 "foo". */
5407 tp_dot = strrchr (tag_name, '.');
5408 tp_colon = strrchr (tag_name, ':');
5409 if (tp_dot || tp_colon)
5411 char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5412 int len_add = p - tag_name + 1;
5414 get_tag (bp + len_add, NULL);
5422 * PostScript tags
5423 * Just look for lines where the first character is '/'
5424 * Also look at "defineps" for PSWrap
5425 * Ideas by:
5426 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5427 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5429 static void
5430 PS_functions (FILE *inf)
5432 register char *bp, *ep;
5434 LOOP_ON_INPUT_LINES (inf, lb, bp)
5436 if (bp[0] == '/')
5438 for (ep = bp+1;
5439 *ep != '\0' && *ep != ' ' && *ep != '{';
5440 ep++)
5441 continue;
5442 make_tag (bp, ep - bp, true,
5443 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5445 else if (LOOKING_AT (bp, "defineps"))
5446 get_tag (bp, NULL);
5452 * Forth tags
5453 * Ignore anything after \ followed by space or in ( )
5454 * Look for words defined by :
5455 * Look for constant, code, create, defer, value, and variable
5456 * OBP extensions: Look for buffer:, field,
5457 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5459 static void
5460 Forth_words (FILE *inf)
5462 register char *bp;
5464 LOOP_ON_INPUT_LINES (inf, lb, bp)
5465 while ((bp = skip_spaces (bp))[0] != '\0')
5466 if (bp[0] == '\\' && c_isspace (bp[1]))
5467 break; /* read next line */
5468 else if (bp[0] == '(' && c_isspace (bp[1]))
5469 do /* skip to ) or eol */
5470 bp++;
5471 while (*bp != ')' && *bp != '\0');
5472 else if (((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5473 || LOOKING_AT_NOCASE (bp, "constant")
5474 || LOOKING_AT_NOCASE (bp, "2constant")
5475 || LOOKING_AT_NOCASE (bp, "fconstant")
5476 || LOOKING_AT_NOCASE (bp, "code")
5477 || LOOKING_AT_NOCASE (bp, "create")
5478 || LOOKING_AT_NOCASE (bp, "defer")
5479 || LOOKING_AT_NOCASE (bp, "value")
5480 || LOOKING_AT_NOCASE (bp, "2value")
5481 || LOOKING_AT_NOCASE (bp, "fvalue")
5482 || LOOKING_AT_NOCASE (bp, "variable")
5483 || LOOKING_AT_NOCASE (bp, "2variable")
5484 || LOOKING_AT_NOCASE (bp, "fvariable")
5485 || LOOKING_AT_NOCASE (bp, "buffer:")
5486 || LOOKING_AT_NOCASE (bp, "field:")
5487 || LOOKING_AT_NOCASE (bp, "+field")
5488 || LOOKING_AT_NOCASE (bp, "field") /* not standard? */
5489 || LOOKING_AT_NOCASE (bp, "begin-structure")
5490 || LOOKING_AT_NOCASE (bp, "synonym")
5492 && c_isspace (bp[0]))
5494 /* Yay! A definition! */
5495 char* name_start = skip_spaces (bp);
5496 char* name_end = skip_non_spaces (name_start);
5497 if (name_start < name_end)
5498 make_tag (name_start, name_end - name_start,
5499 true, lb.buffer, name_end - lb.buffer,
5500 lineno, linecharno);
5501 bp = name_end;
5503 else
5504 bp = skip_non_spaces (bp);
5509 * Scheme tag functions
5510 * look for (def... xyzzy
5511 * (def... (xyzzy
5512 * (def ... ((...(xyzzy ....
5513 * (set! xyzzy
5514 * Original code by Ken Haase (1985?)
5516 static void
5517 Scheme_functions (FILE *inf)
5519 register char *bp;
5521 LOOP_ON_INPUT_LINES (inf, lb, bp)
5523 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5525 bp = skip_non_spaces (bp+4);
5526 /* Skip over open parens and white space. Don't continue past
5527 '\0'. */
5528 while (*bp && notinname (*bp))
5529 bp++;
5530 get_tag (bp, NULL);
5532 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5533 get_tag (bp, NULL);
5538 /* Find tags in TeX and LaTeX input files. */
5540 /* TEX_toktab is a table of TeX control sequences that define tags.
5541 * Each entry records one such control sequence.
5543 * Original code from who knows whom.
5544 * Ideas by:
5545 * Stefan Monnier (2002)
5548 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5550 /* Default set of control sequences to put into TEX_toktab.
5551 The value of environment var TEXTAGS is prepended to this. */
5552 static const char *TEX_defenv = "\
5553 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5554 :part:appendix:entry:index:def\
5555 :newcommand:renewcommand:newenvironment:renewenvironment";
5557 static void TEX_decode_env (const char *, const char *);
5560 * TeX/LaTeX scanning loop.
5562 static void
5563 TeX_commands (FILE *inf)
5565 char *cp;
5566 linebuffer *key;
5568 char TEX_esc = '\0';
5569 char TEX_opgrp, TEX_clgrp;
5571 /* Initialize token table once from environment. */
5572 if (TEX_toktab == NULL)
5573 TEX_decode_env ("TEXTAGS", TEX_defenv);
5575 LOOP_ON_INPUT_LINES (inf, lb, cp)
5577 /* Look at each TEX keyword in line. */
5578 for (;;)
5580 /* Look for a TEX escape. */
5581 while (true)
5583 char c = *cp++;
5584 if (c == '\0' || c == '%')
5585 goto tex_next_line;
5587 /* Select either \ or ! as escape character, whichever comes
5588 first outside a comment. */
5589 if (!TEX_esc)
5590 switch (c)
5592 case '\\':
5593 TEX_esc = c;
5594 TEX_opgrp = '{';
5595 TEX_clgrp = '}';
5596 break;
5598 case '!':
5599 TEX_esc = c;
5600 TEX_opgrp = '<';
5601 TEX_clgrp = '>';
5602 break;
5605 if (c == TEX_esc)
5606 break;
5609 for (key = TEX_toktab; key->buffer != NULL; key++)
5610 if (strneq (cp, key->buffer, key->len))
5612 char *p;
5613 int namelen, linelen;
5614 bool opgrp = false;
5616 cp = skip_spaces (cp + key->len);
5617 if (*cp == TEX_opgrp)
5619 opgrp = true;
5620 cp++;
5622 for (p = cp;
5623 (!c_isspace (*p) && *p != '#' &&
5624 *p != TEX_opgrp && *p != TEX_clgrp);
5625 p++)
5626 continue;
5627 namelen = p - cp;
5628 linelen = lb.len;
5629 if (!opgrp || *p == TEX_clgrp)
5631 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5632 p++;
5633 linelen = p - lb.buffer + 1;
5635 make_tag (cp, namelen, true,
5636 lb.buffer, linelen, lineno, linecharno);
5637 goto tex_next_line; /* We only tag a line once */
5640 tex_next_line:
5645 /* Read environment and prepend it to the default string.
5646 Build token table. */
5647 static void
5648 TEX_decode_env (const char *evarname, const char *defenv)
5650 register const char *env, *p;
5651 int i, len;
5653 /* Append default string to environment. */
5654 env = getenv (evarname);
5655 if (!env)
5656 env = defenv;
5657 else
5658 env = concat (env, defenv, "");
5660 /* Allocate a token table */
5661 for (len = 1, p = env; (p = strchr (p, ':')); )
5662 if (*++p)
5663 len++;
5664 TEX_toktab = xnew (len, linebuffer);
5666 /* Unpack environment string into token table. Be careful about */
5667 /* zero-length strings (leading ':', "::" and trailing ':') */
5668 for (i = 0; *env != '\0';)
5670 p = strchr (env, ':');
5671 if (!p) /* End of environment string. */
5672 p = env + strlen (env);
5673 if (p - env > 0)
5674 { /* Only non-zero strings. */
5675 TEX_toktab[i].buffer = savenstr (env, p - env);
5676 TEX_toktab[i].len = p - env;
5677 i++;
5679 if (*p)
5680 env = p + 1;
5681 else
5683 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5684 TEX_toktab[i].len = 0;
5685 break;
5691 /* Texinfo support. Dave Love, Mar. 2000. */
5692 static void
5693 Texinfo_nodes (FILE *inf)
5695 char *cp, *start;
5696 LOOP_ON_INPUT_LINES (inf, lb, cp)
5697 if (LOOKING_AT (cp, "@node"))
5699 start = cp;
5700 while (*cp != '\0' && *cp != ',')
5701 cp++;
5702 make_tag (start, cp - start, true,
5703 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5709 * HTML support.
5710 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5711 * Contents of <a name=xxx> are tags with name xxx.
5713 * Francesco Potortì, 2002.
5715 static void
5716 HTML_labels (FILE *inf)
5718 bool getnext = false; /* next text outside of HTML tags is a tag */
5719 bool skiptag = false; /* skip to the end of the current HTML tag */
5720 bool intag = false; /* inside an html tag, looking for ID= */
5721 bool inanchor = false; /* when INTAG, is an anchor, look for NAME= */
5722 char *end;
5725 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5727 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5728 for (;;) /* loop on the same line */
5730 if (skiptag) /* skip HTML tag */
5732 while (*dbp != '\0' && *dbp != '>')
5733 dbp++;
5734 if (*dbp == '>')
5736 dbp += 1;
5737 skiptag = false;
5738 continue; /* look on the same line */
5740 break; /* go to next line */
5743 else if (intag) /* look for "name=" or "id=" */
5745 while (*dbp != '\0' && *dbp != '>'
5746 && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5747 dbp++;
5748 if (*dbp == '\0')
5749 break; /* go to next line */
5750 if (*dbp == '>')
5752 dbp += 1;
5753 intag = false;
5754 continue; /* look on the same line */
5756 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5757 || LOOKING_AT_NOCASE (dbp, "id="))
5759 bool quoted = (dbp[0] == '"');
5761 if (quoted)
5762 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5763 continue;
5764 else
5765 for (end = dbp; *end != '\0' && intoken (*end); end++)
5766 continue;
5767 linebuffer_setlen (&token_name, end - dbp);
5768 memcpy (token_name.buffer, dbp, end - dbp);
5769 token_name.buffer[end - dbp] = '\0';
5771 dbp = end;
5772 intag = false; /* we found what we looked for */
5773 skiptag = true; /* skip to the end of the tag */
5774 getnext = true; /* then grab the text */
5775 continue; /* look on the same line */
5777 dbp += 1;
5780 else if (getnext) /* grab next tokens and tag them */
5782 dbp = skip_spaces (dbp);
5783 if (*dbp == '\0')
5784 break; /* go to next line */
5785 if (*dbp == '<')
5787 intag = true;
5788 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5789 continue; /* look on the same line */
5792 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5793 continue;
5794 make_tag (token_name.buffer, token_name.len, true,
5795 dbp, end - dbp, lineno, linecharno);
5796 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5797 getnext = false;
5798 break; /* go to next line */
5801 else /* look for an interesting HTML tag */
5803 while (*dbp != '\0' && *dbp != '<')
5804 dbp++;
5805 if (*dbp == '\0')
5806 break; /* go to next line */
5807 intag = true;
5808 if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5810 inanchor = true;
5811 continue; /* look on the same line */
5813 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5814 || LOOKING_AT_NOCASE (dbp, "<h1>")
5815 || LOOKING_AT_NOCASE (dbp, "<h2>")
5816 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5818 intag = false;
5819 getnext = true;
5820 continue; /* look on the same line */
5822 dbp += 1;
5829 * Prolog support
5831 * Assumes that the predicate or rule starts at column 0.
5832 * Only the first clause of a predicate or rule is added.
5833 * Original code by Sunichirou Sugou (1989)
5834 * Rewritten by Anders Lindgren (1996)
5836 static size_t prolog_pr (char *, char *);
5837 static void prolog_skip_comment (linebuffer *, FILE *);
5838 static size_t prolog_atom (char *, size_t);
5840 static void
5841 Prolog_functions (FILE *inf)
5843 char *cp, *last;
5844 size_t len;
5845 size_t allocated;
5847 allocated = 0;
5848 len = 0;
5849 last = NULL;
5851 LOOP_ON_INPUT_LINES (inf, lb, cp)
5853 if (cp[0] == '\0') /* Empty line */
5854 continue;
5855 else if (c_isspace (cp[0])) /* Not a predicate */
5856 continue;
5857 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5858 prolog_skip_comment (&lb, inf);
5859 else if ((len = prolog_pr (cp, last)) > 0)
5861 /* Predicate or rule. Store the function name so that we
5862 only generate a tag for the first clause. */
5863 if (last == NULL)
5864 last = xnew (len + 1, char);
5865 else if (len + 1 > allocated)
5866 xrnew (last, len + 1, char);
5867 allocated = len + 1;
5868 memcpy (last, cp, len);
5869 last[len] = '\0';
5872 free (last);
5876 static void
5877 prolog_skip_comment (linebuffer *plb, FILE *inf)
5879 char *cp;
5883 for (cp = plb->buffer; *cp != '\0'; cp++)
5884 if (cp[0] == '*' && cp[1] == '/')
5885 return;
5886 readline (plb, inf);
5888 while (perhaps_more_input (inf));
5892 * A predicate or rule definition is added if it matches:
5893 * <beginning of line><Prolog Atom><whitespace>(
5894 * or <beginning of line><Prolog Atom><whitespace>:-
5896 * It is added to the tags database if it doesn't match the
5897 * name of the previous clause header.
5899 * Return the size of the name of the predicate or rule, or 0 if no
5900 * header was found.
5902 static size_t
5903 prolog_pr (char *s, char *last)
5905 /* Name of last clause. */
5907 size_t pos;
5908 size_t len;
5910 pos = prolog_atom (s, 0);
5911 if (! pos)
5912 return 0;
5914 len = pos;
5915 pos = skip_spaces (s + pos) - s;
5917 if ((s[pos] == '.'
5918 || (s[pos] == '(' && (pos += 1))
5919 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5920 && (last == NULL /* save only the first clause */
5921 || len != strlen (last)
5922 || !strneq (s, last, len)))
5924 make_tag (s, len, true, s, pos, lineno, linecharno);
5925 return len;
5927 else
5928 return 0;
5932 * Consume a Prolog atom.
5933 * Return the number of bytes consumed, or 0 if there was an error.
5935 * A prolog atom, in this context, could be one of:
5936 * - An alphanumeric sequence, starting with a lower case letter.
5937 * - A quoted arbitrary string. Single quotes can escape themselves.
5938 * Backslash quotes everything.
5940 static size_t
5941 prolog_atom (char *s, size_t pos)
5943 size_t origpos;
5945 origpos = pos;
5947 if (c_islower (s[pos]) || s[pos] == '_')
5949 /* The atom is unquoted. */
5950 pos++;
5951 while (c_isalnum (s[pos]) || s[pos] == '_')
5953 pos++;
5955 return pos - origpos;
5957 else if (s[pos] == '\'')
5959 pos++;
5961 for (;;)
5963 if (s[pos] == '\'')
5965 pos++;
5966 if (s[pos] != '\'')
5967 break;
5968 pos++; /* A double quote */
5970 else if (s[pos] == '\0')
5971 /* Multiline quoted atoms are ignored. */
5972 return 0;
5973 else if (s[pos] == '\\')
5975 if (s[pos+1] == '\0')
5976 return 0;
5977 pos += 2;
5979 else
5980 pos++;
5982 return pos - origpos;
5984 else
5985 return 0;
5990 * Support for Erlang
5992 * Generates tags for functions, defines, and records.
5993 * Assumes that Erlang functions start at column 0.
5994 * Original code by Anders Lindgren (1996)
5996 static int erlang_func (char *, char *);
5997 static void erlang_attribute (char *);
5998 static int erlang_atom (char *);
6000 static void
6001 Erlang_functions (FILE *inf)
6003 char *cp, *last;
6004 int len;
6005 int allocated;
6007 allocated = 0;
6008 len = 0;
6009 last = NULL;
6011 LOOP_ON_INPUT_LINES (inf, lb, cp)
6013 if (cp[0] == '\0') /* Empty line */
6014 continue;
6015 else if (c_isspace (cp[0])) /* Not function nor attribute */
6016 continue;
6017 else if (cp[0] == '%') /* comment */
6018 continue;
6019 else if (cp[0] == '"') /* Sometimes, strings start in column one */
6020 continue;
6021 else if (cp[0] == '-') /* attribute, e.g. "-define" */
6023 erlang_attribute (cp);
6024 if (last != NULL)
6026 free (last);
6027 last = NULL;
6030 else if ((len = erlang_func (cp, last)) > 0)
6033 * Function. Store the function name so that we only
6034 * generates a tag for the first clause.
6036 if (last == NULL)
6037 last = xnew (len + 1, char);
6038 else if (len + 1 > allocated)
6039 xrnew (last, len + 1, char);
6040 allocated = len + 1;
6041 memcpy (last, cp, len);
6042 last[len] = '\0';
6045 free (last);
6050 * A function definition is added if it matches:
6051 * <beginning of line><Erlang Atom><whitespace>(
6053 * It is added to the tags database if it doesn't match the
6054 * name of the previous clause header.
6056 * Return the size of the name of the function, or 0 if no function
6057 * was found.
6059 static int
6060 erlang_func (char *s, char *last)
6062 /* Name of last clause. */
6064 int pos;
6065 int len;
6067 pos = erlang_atom (s);
6068 if (pos < 1)
6069 return 0;
6071 len = pos;
6072 pos = skip_spaces (s + pos) - s;
6074 /* Save only the first clause. */
6075 if (s[pos++] == '('
6076 && (last == NULL
6077 || len != (int)strlen (last)
6078 || !strneq (s, last, len)))
6080 make_tag (s, len, true, s, pos, lineno, linecharno);
6081 return len;
6084 return 0;
6089 * Handle attributes. Currently, tags are generated for defines
6090 * and records.
6092 * They are on the form:
6093 * -define(foo, bar).
6094 * -define(Foo(M, N), M+N).
6095 * -record(graph, {vtab = notable, cyclic = true}).
6097 static void
6098 erlang_attribute (char *s)
6100 char *cp = s;
6102 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
6103 && *cp++ == '(')
6105 int len = erlang_atom (skip_spaces (cp));
6106 if (len > 0)
6107 make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
6109 return;
6114 * Consume an Erlang atom (or variable).
6115 * Return the number of bytes consumed, or -1 if there was an error.
6117 static int
6118 erlang_atom (char *s)
6120 int pos = 0;
6122 if (c_isalpha (s[pos]) || s[pos] == '_')
6124 /* The atom is unquoted. */
6126 pos++;
6127 while (c_isalnum (s[pos]) || s[pos] == '_');
6129 else if (s[pos] == '\'')
6131 for (pos++; s[pos] != '\''; pos++)
6132 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
6133 || (s[pos] == '\\' && s[++pos] == '\0'))
6134 return 0;
6135 pos++;
6138 return pos;
6142 static char *scan_separators (char *);
6143 static void add_regex (char *, language *);
6144 static char *substitute (char *, char *, struct re_registers *);
6147 * Take a string like "/blah/" and turn it into "blah", verifying
6148 * that the first and last characters are the same, and handling
6149 * quoted separator characters. Actually, stops on the occurrence of
6150 * an unquoted separator. Also process \t, \n, etc. and turn into
6151 * appropriate characters. Works in place. Null terminates name string.
6152 * Returns pointer to terminating separator, or NULL for
6153 * unterminated regexps.
6155 static char *
6156 scan_separators (char *name)
6158 char sep = name[0];
6159 char *copyto = name;
6160 bool quoted = false;
6162 for (++name; *name != '\0'; ++name)
6164 if (quoted)
6166 switch (*name)
6168 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
6169 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
6170 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
6171 case 'e': *copyto++ = 033; break; /* ESC (delete) */
6172 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
6173 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
6174 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
6175 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
6176 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
6177 default:
6178 if (*name == sep)
6179 *copyto++ = sep;
6180 else
6182 /* Something else is quoted, so preserve the quote. */
6183 *copyto++ = '\\';
6184 *copyto++ = *name;
6186 break;
6188 quoted = false;
6190 else if (*name == '\\')
6191 quoted = true;
6192 else if (*name == sep)
6193 break;
6194 else
6195 *copyto++ = *name;
6197 if (*name != sep)
6198 name = NULL; /* signal unterminated regexp */
6200 /* Terminate copied string. */
6201 *copyto = '\0';
6202 return name;
6205 /* Look at the argument of --regex or --no-regex and do the right
6206 thing. Same for each line of a regexp file. */
6207 static void
6208 analyze_regex (char *regex_arg)
6210 if (regex_arg == NULL)
6212 free_regexps (); /* --no-regex: remove existing regexps */
6213 return;
6216 /* A real --regexp option or a line in a regexp file. */
6217 switch (regex_arg[0])
6219 /* Comments in regexp file or null arg to --regex. */
6220 case '\0':
6221 case ' ':
6222 case '\t':
6223 break;
6225 /* Read a regex file. This is recursive and may result in a
6226 loop, which will stop when the file descriptors are exhausted. */
6227 case '@':
6229 FILE *regexfp;
6230 linebuffer regexbuf;
6231 char *regexfile = regex_arg + 1;
6233 /* regexfile is a file containing regexps, one per line. */
6234 regexfp = fopen (regexfile, "r" FOPEN_BINARY);
6235 if (regexfp == NULL)
6236 pfatal (regexfile);
6237 linebuffer_init (&regexbuf);
6238 while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
6239 analyze_regex (regexbuf.buffer);
6240 free (regexbuf.buffer);
6241 if (fclose (regexfp) != 0)
6242 pfatal (regexfile);
6244 break;
6246 /* Regexp to be used for a specific language only. */
6247 case '{':
6249 language *lang;
6250 char *lang_name = regex_arg + 1;
6251 char *cp;
6253 for (cp = lang_name; *cp != '}'; cp++)
6254 if (*cp == '\0')
6256 error ("unterminated language name in regex: %s", regex_arg);
6257 return;
6259 *cp++ = '\0';
6260 lang = get_language_from_langname (lang_name);
6261 if (lang == NULL)
6262 return;
6263 add_regex (cp, lang);
6265 break;
6267 /* Regexp to be used for any language. */
6268 default:
6269 add_regex (regex_arg, NULL);
6270 break;
6274 /* Separate the regexp pattern, compile it,
6275 and care for optional name and modifiers. */
6276 static void
6277 add_regex (char *regexp_pattern, language *lang)
6279 static struct re_pattern_buffer zeropattern;
6280 char sep, *pat, *name, *modifiers;
6281 char empty = '\0';
6282 const char *err;
6283 struct re_pattern_buffer *patbuf;
6284 regexp *rp;
6285 bool
6286 force_explicit_name = true, /* do not use implicit tag names */
6287 ignore_case = false, /* case is significant */
6288 multi_line = false, /* matches are done one line at a time */
6289 single_line = false; /* dot does not match newline */
6292 if (strlen (regexp_pattern) < 3)
6294 error ("null regexp");
6295 return;
6297 sep = regexp_pattern[0];
6298 name = scan_separators (regexp_pattern);
6299 if (name == NULL)
6301 error ("%s: unterminated regexp", regexp_pattern);
6302 return;
6304 if (name[1] == sep)
6306 error ("null name for regexp \"%s\"", regexp_pattern);
6307 return;
6309 modifiers = scan_separators (name);
6310 if (modifiers == NULL) /* no terminating separator --> no name */
6312 modifiers = name;
6313 name = &empty;
6315 else
6316 modifiers += 1; /* skip separator */
6318 /* Parse regex modifiers. */
6319 for (; modifiers[0] != '\0'; modifiers++)
6320 switch (modifiers[0])
6322 case 'N':
6323 if (modifiers == name)
6324 error ("forcing explicit tag name but no name, ignoring");
6325 force_explicit_name = true;
6326 break;
6327 case 'i':
6328 ignore_case = true;
6329 break;
6330 case 's':
6331 single_line = true;
6332 /* FALLTHRU */
6333 case 'm':
6334 multi_line = true;
6335 need_filebuf = true;
6336 break;
6337 default:
6338 error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
6339 break;
6342 patbuf = xnew (1, struct re_pattern_buffer);
6343 *patbuf = zeropattern;
6344 if (ignore_case)
6346 static char lc_trans[UCHAR_MAX + 1];
6347 int i;
6348 for (i = 0; i < UCHAR_MAX + 1; i++)
6349 lc_trans[i] = c_tolower (i);
6350 patbuf->translate = lc_trans; /* translation table to fold case */
6353 if (multi_line)
6354 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6355 else
6356 pat = regexp_pattern;
6358 if (single_line)
6359 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6360 else
6361 re_set_syntax (RE_SYNTAX_EMACS);
6363 err = re_compile_pattern (pat, strlen (pat), patbuf);
6364 if (multi_line)
6365 free (pat);
6366 if (err != NULL)
6368 error ("%s while compiling pattern", err);
6369 return;
6372 rp = p_head;
6373 p_head = xnew (1, regexp);
6374 p_head->pattern = savestr (regexp_pattern);
6375 p_head->p_next = rp;
6376 p_head->lang = lang;
6377 p_head->pat = patbuf;
6378 p_head->name = savestr (name);
6379 p_head->error_signaled = false;
6380 p_head->force_explicit_name = force_explicit_name;
6381 p_head->ignore_case = ignore_case;
6382 p_head->multi_line = multi_line;
6386 * Do the substitutions indicated by the regular expression and
6387 * arguments.
6389 static char *
6390 substitute (char *in, char *out, struct re_registers *regs)
6392 char *result, *t;
6393 int size, dig, diglen;
6395 result = NULL;
6396 size = strlen (out);
6398 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6399 if (out[size - 1] == '\\')
6400 fatal ("pattern error in \"%s\"", out);
6401 for (t = strchr (out, '\\');
6402 t != NULL;
6403 t = strchr (t + 2, '\\'))
6404 if (c_isdigit (t[1]))
6406 dig = t[1] - '0';
6407 diglen = regs->end[dig] - regs->start[dig];
6408 size += diglen - 2;
6410 else
6411 size -= 1;
6413 /* Allocate space and do the substitutions. */
6414 assert (size >= 0);
6415 result = xnew (size + 1, char);
6417 for (t = result; *out != '\0'; out++)
6418 if (*out == '\\' && c_isdigit (*++out))
6420 dig = *out - '0';
6421 diglen = regs->end[dig] - regs->start[dig];
6422 memcpy (t, in + regs->start[dig], diglen);
6423 t += diglen;
6425 else
6426 *t++ = *out;
6427 *t = '\0';
6429 assert (t <= result + size);
6430 assert (t - result == (int)strlen (result));
6432 return result;
6435 /* Deallocate all regexps. */
6436 static void
6437 free_regexps (void)
6439 regexp *rp;
6440 while (p_head != NULL)
6442 rp = p_head->p_next;
6443 free (p_head->pattern);
6444 free (p_head->name);
6445 free (p_head);
6446 p_head = rp;
6448 return;
6452 * Reads the whole file as a single string from `filebuf' and looks for
6453 * multi-line regular expressions, creating tags on matches.
6454 * readline already dealt with normal regexps.
6456 * Idea by Ben Wing <ben@666.com> (2002).
6458 static void
6459 regex_tag_multiline (void)
6461 char *buffer = filebuf.buffer;
6462 regexp *rp;
6463 char *name;
6465 for (rp = p_head; rp != NULL; rp = rp->p_next)
6467 int match = 0;
6469 if (!rp->multi_line)
6470 continue; /* skip normal regexps */
6472 /* Generic initializations before parsing file from memory. */
6473 lineno = 1; /* reset global line number */
6474 charno = 0; /* reset global char number */
6475 linecharno = 0; /* reset global char number of line start */
6477 /* Only use generic regexps or those for the current language. */
6478 if (rp->lang != NULL && rp->lang != curfdp->lang)
6479 continue;
6481 while (match >= 0 && match < filebuf.len)
6483 match = re_search (rp->pat, buffer, filebuf.len, charno,
6484 filebuf.len - match, &rp->regs);
6485 switch (match)
6487 case -2:
6488 /* Some error. */
6489 if (!rp->error_signaled)
6491 error ("regexp stack overflow while matching \"%s\"",
6492 rp->pattern);
6493 rp->error_signaled = true;
6495 break;
6496 case -1:
6497 /* No match. */
6498 break;
6499 default:
6500 if (match == rp->regs.end[0])
6502 if (!rp->error_signaled)
6504 error ("regexp matches the empty string: \"%s\"",
6505 rp->pattern);
6506 rp->error_signaled = true;
6508 match = -3; /* exit from while loop */
6509 break;
6512 /* Match occurred. Construct a tag. */
6513 while (charno < rp->regs.end[0])
6514 if (buffer[charno++] == '\n')
6515 lineno++, linecharno = charno;
6516 name = rp->name;
6517 if (name[0] == '\0')
6518 name = NULL;
6519 else /* make a named tag */
6520 name = substitute (buffer, rp->name, &rp->regs);
6521 if (rp->force_explicit_name)
6522 /* Force explicit tag name, if a name is there. */
6523 pfnote (name, true, buffer + linecharno,
6524 charno - linecharno + 1, lineno, linecharno);
6525 else
6526 make_tag (name, strlen (name), true, buffer + linecharno,
6527 charno - linecharno + 1, lineno, linecharno);
6528 break;
6535 static bool
6536 nocase_tail (const char *cp)
6538 int len = 0;
6540 while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6541 cp++, len++;
6542 if (*cp == '\0' && !intoken (dbp[len]))
6544 dbp += len;
6545 return true;
6547 return false;
6550 static void
6551 get_tag (register char *bp, char **namepp)
6553 register char *cp = bp;
6555 if (*bp != '\0')
6557 /* Go till you get to white space or a syntactic break */
6558 for (cp = bp + 1; !notinname (*cp); cp++)
6559 continue;
6560 make_tag (bp, cp - bp, true,
6561 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6564 if (namepp != NULL)
6565 *namepp = savenstr (bp, cp - bp);
6569 * Read a line of text from `stream' into `lbp', excluding the
6570 * newline or CR-NL, if any. Return the number of characters read from
6571 * `stream', which is the length of the line including the newline.
6573 * On DOS or Windows we do not count the CR character, if any before the
6574 * NL, in the returned length; this mirrors the behavior of Emacs on those
6575 * platforms (for text files, it translates CR-NL to NL as it reads in the
6576 * file).
6578 * If multi-line regular expressions are requested, each line read is
6579 * appended to `filebuf'.
6581 static long
6582 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6584 char *buffer = lbp->buffer;
6585 char *p = lbp->buffer;
6586 char *pend;
6587 int chars_deleted;
6589 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6591 for (;;)
6593 register int c = getc (stream);
6594 if (p == pend)
6596 /* We're at the end of linebuffer: expand it. */
6597 lbp->size *= 2;
6598 xrnew (buffer, lbp->size, char);
6599 p += buffer - lbp->buffer;
6600 pend = buffer + lbp->size;
6601 lbp->buffer = buffer;
6603 if (c == EOF)
6605 if (ferror (stream))
6606 perror (filename);
6607 *p = '\0';
6608 chars_deleted = 0;
6609 break;
6611 if (c == '\n')
6613 if (p > buffer && p[-1] == '\r')
6615 p -= 1;
6616 chars_deleted = 2;
6618 else
6620 chars_deleted = 1;
6622 *p = '\0';
6623 break;
6625 *p++ = c;
6627 lbp->len = p - buffer;
6629 if (need_filebuf /* we need filebuf for multi-line regexps */
6630 && chars_deleted > 0) /* not at EOF */
6632 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6634 /* Expand filebuf. */
6635 filebuf.size *= 2;
6636 xrnew (filebuf.buffer, filebuf.size, char);
6638 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6639 filebuf.len += lbp->len;
6640 filebuf.buffer[filebuf.len++] = '\n';
6641 filebuf.buffer[filebuf.len] = '\0';
6644 return lbp->len + chars_deleted;
6648 * Like readline_internal, above, but in addition try to match the
6649 * input line against relevant regular expressions and manage #line
6650 * directives.
6652 static void
6653 readline (linebuffer *lbp, FILE *stream)
6655 long result;
6657 linecharno = charno; /* update global char number of line start */
6658 result = readline_internal (lbp, stream, infilename); /* read line */
6659 lineno += 1; /* increment global line number */
6660 charno += result; /* increment global char number */
6662 /* Honor #line directives. */
6663 if (!no_line_directive)
6665 static bool discard_until_line_directive;
6667 /* Check whether this is a #line directive. */
6668 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6670 unsigned int lno;
6671 int start = 0;
6673 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6674 && start > 0) /* double quote character found */
6676 char *endp = lbp->buffer + start;
6678 while ((endp = strchr (endp, '"')) != NULL
6679 && endp[-1] == '\\')
6680 endp++;
6681 if (endp != NULL)
6682 /* Ok, this is a real #line directive. Let's deal with it. */
6684 char *taggedabsname; /* absolute name of original file */
6685 char *taggedfname; /* name of original file as given */
6686 char *name; /* temp var */
6688 discard_until_line_directive = false; /* found it */
6689 name = lbp->buffer + start;
6690 *endp = '\0';
6691 canonicalize_filename (name);
6692 taggedabsname = absolute_filename (name, tagfiledir);
6693 if (filename_is_absolute (name)
6694 || filename_is_absolute (curfdp->infname))
6695 taggedfname = savestr (taggedabsname);
6696 else
6697 taggedfname = relative_filename (taggedabsname,tagfiledir);
6699 if (streq (curfdp->taggedfname, taggedfname))
6700 /* The #line directive is only a line number change. We
6701 deal with this afterwards. */
6702 free (taggedfname);
6703 else
6704 /* The tags following this #line directive should be
6705 attributed to taggedfname. In order to do this, set
6706 curfdp accordingly. */
6708 fdesc *fdp; /* file description pointer */
6710 /* Go look for a file description already set up for the
6711 file indicated in the #line directive. If there is
6712 one, use it from now until the next #line
6713 directive. */
6714 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6715 if (streq (fdp->infname, curfdp->infname)
6716 && streq (fdp->taggedfname, taggedfname))
6717 /* If we remove the second test above (after the &&)
6718 then all entries pertaining to the same file are
6719 coalesced in the tags file. If we use it, then
6720 entries pertaining to the same file but generated
6721 from different files (via #line directives) will
6722 go into separate sections in the tags file. These
6723 alternatives look equivalent. The first one
6724 destroys some apparently useless information. */
6726 curfdp = fdp;
6727 free (taggedfname);
6728 break;
6730 /* Else, if we already tagged the real file, skip all
6731 input lines until the next #line directive. */
6732 if (fdp == NULL) /* not found */
6733 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6734 if (streq (fdp->infabsname, taggedabsname))
6736 discard_until_line_directive = true;
6737 free (taggedfname);
6738 break;
6740 /* Else create a new file description and use that from
6741 now on, until the next #line directive. */
6742 if (fdp == NULL) /* not found */
6744 fdp = fdhead;
6745 fdhead = xnew (1, fdesc);
6746 *fdhead = *curfdp; /* copy curr. file description */
6747 fdhead->next = fdp;
6748 fdhead->infname = savestr (curfdp->infname);
6749 fdhead->infabsname = savestr (curfdp->infabsname);
6750 fdhead->infabsdir = savestr (curfdp->infabsdir);
6751 fdhead->taggedfname = taggedfname;
6752 fdhead->usecharno = false;
6753 fdhead->prop = NULL;
6754 fdhead->written = false;
6755 curfdp = fdhead;
6758 free (taggedabsname);
6759 lineno = lno - 1;
6760 readline (lbp, stream);
6761 return;
6762 } /* if a real #line directive */
6763 } /* if #line is followed by a number */
6764 } /* if line begins with "#line " */
6766 /* If we are here, no #line directive was found. */
6767 if (discard_until_line_directive)
6769 if (result > 0)
6771 /* Do a tail recursion on ourselves, thus discarding the contents
6772 of the line buffer. */
6773 readline (lbp, stream);
6774 return;
6776 /* End of file. */
6777 discard_until_line_directive = false;
6778 return;
6780 } /* if #line directives should be considered */
6783 int match;
6784 regexp *rp;
6785 char *name;
6787 /* Match against relevant regexps. */
6788 if (lbp->len > 0)
6789 for (rp = p_head; rp != NULL; rp = rp->p_next)
6791 /* Only use generic regexps or those for the current language.
6792 Also do not use multiline regexps, which is the job of
6793 regex_tag_multiline. */
6794 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6795 || rp->multi_line)
6796 continue;
6798 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6799 switch (match)
6801 case -2:
6802 /* Some error. */
6803 if (!rp->error_signaled)
6805 error ("regexp stack overflow while matching \"%s\"",
6806 rp->pattern);
6807 rp->error_signaled = true;
6809 break;
6810 case -1:
6811 /* No match. */
6812 break;
6813 case 0:
6814 /* Empty string matched. */
6815 if (!rp->error_signaled)
6817 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6818 rp->error_signaled = true;
6820 break;
6821 default:
6822 /* Match occurred. Construct a tag. */
6823 name = rp->name;
6824 if (name[0] == '\0')
6825 name = NULL;
6826 else /* make a named tag */
6827 name = substitute (lbp->buffer, rp->name, &rp->regs);
6828 if (rp->force_explicit_name)
6829 /* Force explicit tag name, if a name is there. */
6830 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6831 else
6832 make_tag (name, strlen (name), true,
6833 lbp->buffer, match, lineno, linecharno);
6834 break;
6842 * Return a pointer to a space of size strlen(cp)+1 allocated
6843 * with xnew where the string CP has been copied.
6845 static char *
6846 savestr (const char *cp)
6848 return savenstr (cp, strlen (cp));
6852 * Return a pointer to a space of size LEN+1 allocated with xnew where
6853 * the string CP has been copied for at most the first LEN characters.
6855 static char *
6856 savenstr (const char *cp, int len)
6858 char *dp = xnew (len + 1, char);
6859 dp[len] = '\0';
6860 return memcpy (dp, cp, len);
6863 /* Skip spaces (end of string is not space), return new pointer. */
6864 static char *
6865 skip_spaces (char *cp)
6867 while (c_isspace (*cp))
6868 cp++;
6869 return cp;
6872 /* Skip non spaces, except end of string, return new pointer. */
6873 static char *
6874 skip_non_spaces (char *cp)
6876 while (*cp != '\0' && !c_isspace (*cp))
6877 cp++;
6878 return cp;
6881 /* Skip any chars in the "name" class.*/
6882 static char *
6883 skip_name (char *cp)
6885 /* '\0' is a notinname() so loop stops there too */
6886 while (! notinname (*cp))
6887 cp++;
6888 return cp;
6891 /* Print error message and exit. */
6892 static void
6893 fatal (char const *format, ...)
6895 va_list ap;
6896 va_start (ap, format);
6897 verror (format, ap);
6898 va_end (ap);
6899 exit (EXIT_FAILURE);
6902 static void
6903 pfatal (const char *s1)
6905 perror (s1);
6906 exit (EXIT_FAILURE);
6909 static void
6910 suggest_asking_for_help (void)
6912 fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
6913 progname);
6914 exit (EXIT_FAILURE);
6917 /* Output a diagnostic with printf-style FORMAT and args. */
6918 static void
6919 error (const char *format, ...)
6921 va_list ap;
6922 va_start (ap, format);
6923 verror (format, ap);
6924 va_end (ap);
6927 static void
6928 verror (char const *format, va_list ap)
6930 fprintf (stderr, "%s: ", progname);
6931 vfprintf (stderr, format, ap);
6932 fprintf (stderr, "\n");
6935 /* Return a newly-allocated string whose contents
6936 concatenate those of s1, s2, s3. */
6937 static char *
6938 concat (const char *s1, const char *s2, const char *s3)
6940 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6941 char *result = xnew (len1 + len2 + len3 + 1, char);
6943 strcpy (result, s1);
6944 strcpy (result + len1, s2);
6945 strcpy (result + len1 + len2, s3);
6947 return result;
6951 /* Does the same work as the system V getcwd, but does not need to
6952 guess the buffer size in advance. */
6953 static char *
6954 etags_getcwd (void)
6956 int bufsize = 200;
6957 char *path = xnew (bufsize, char);
6959 while (getcwd (path, bufsize) == NULL)
6961 if (errno != ERANGE)
6962 pfatal ("getcwd");
6963 bufsize *= 2;
6964 free (path);
6965 path = xnew (bufsize, char);
6968 canonicalize_filename (path);
6969 return path;
6972 /* Return a newly allocated string containing a name of a temporary file. */
6973 static char *
6974 etags_mktmp (void)
6976 const char *tmpdir = getenv ("TMPDIR");
6977 const char *slash = "/";
6979 #if MSDOS || defined (DOS_NT)
6980 if (!tmpdir)
6981 tmpdir = getenv ("TEMP");
6982 if (!tmpdir)
6983 tmpdir = getenv ("TMP");
6984 if (!tmpdir)
6985 tmpdir = ".";
6986 if (tmpdir[strlen (tmpdir) - 1] == '/'
6987 || tmpdir[strlen (tmpdir) - 1] == '\\')
6988 slash = "";
6989 #else
6990 if (!tmpdir)
6991 tmpdir = "/tmp";
6992 if (tmpdir[strlen (tmpdir) - 1] == '/')
6993 slash = "";
6994 #endif
6996 char *templt = concat (tmpdir, slash, "etXXXXXX");
6997 int fd = mkostemp (templt, O_CLOEXEC);
6998 if (fd < 0 || close (fd) != 0)
7000 int temp_errno = errno;
7001 free (templt);
7002 errno = temp_errno;
7003 templt = NULL;
7006 #if defined (DOS_NT)
7007 /* The file name will be used in shell redirection, so it needs to have
7008 DOS-style backslashes, or else the Windows shell will barf. */
7009 char *p;
7010 for (p = templt; *p; p++)
7011 if (*p == '/')
7012 *p = '\\';
7013 #endif
7015 return templt;
7018 /* Return a newly allocated string containing the file name of FILE
7019 relative to the absolute directory DIR (which should end with a slash). */
7020 static char *
7021 relative_filename (char *file, char *dir)
7023 char *fp, *dp, *afn, *res;
7024 int i;
7026 /* Find the common root of file and dir (with a trailing slash). */
7027 afn = absolute_filename (file, cwd);
7028 fp = afn;
7029 dp = dir;
7030 while (*fp++ == *dp++)
7031 continue;
7032 fp--, dp--; /* back to the first differing char */
7033 #ifdef DOS_NT
7034 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
7035 return afn;
7036 #endif
7037 do /* look at the equal chars until '/' */
7038 fp--, dp--;
7039 while (*fp != '/');
7041 /* Build a sequence of "../" strings for the resulting relative file name. */
7042 i = 0;
7043 while ((dp = strchr (dp + 1, '/')) != NULL)
7044 i += 1;
7045 res = xnew (3*i + strlen (fp + 1) + 1, char);
7046 char *z = res;
7047 while (i-- > 0)
7048 z = stpcpy (z, "../");
7050 /* Add the file name relative to the common root of file and dir. */
7051 strcpy (z, fp + 1);
7052 free (afn);
7054 return res;
7057 /* Return a newly allocated string containing the absolute file name
7058 of FILE given DIR (which should end with a slash). */
7059 static char *
7060 absolute_filename (char *file, char *dir)
7062 char *slashp, *cp, *res;
7064 if (filename_is_absolute (file))
7065 res = savestr (file);
7066 #ifdef DOS_NT
7067 /* We don't support non-absolute file names with a drive
7068 letter, like `d:NAME' (it's too much hassle). */
7069 else if (file[1] == ':')
7070 fatal ("%s: relative file names with drive letters not supported", file);
7071 #endif
7072 else
7073 res = concat (dir, file, "");
7075 /* Delete the "/dirname/.." and "/." substrings. */
7076 slashp = strchr (res, '/');
7077 while (slashp != NULL && slashp[0] != '\0')
7079 if (slashp[1] == '.')
7081 if (slashp[2] == '.'
7082 && (slashp[3] == '/' || slashp[3] == '\0'))
7084 cp = slashp;
7086 cp--;
7087 while (cp >= res && !filename_is_absolute (cp));
7088 if (cp < res)
7089 cp = slashp; /* the absolute name begins with "/.." */
7090 #ifdef DOS_NT
7091 /* Under MSDOS and NT we get `d:/NAME' as absolute
7092 file name, so the luser could say `d:/../NAME'.
7093 We silently treat this as `d:/NAME'. */
7094 else if (cp[0] != '/')
7095 cp = slashp;
7096 #endif
7097 memmove (cp, slashp + 3, strlen (slashp + 2));
7098 slashp = cp;
7099 continue;
7101 else if (slashp[2] == '/' || slashp[2] == '\0')
7103 memmove (slashp, slashp + 2, strlen (slashp + 1));
7104 continue;
7108 slashp = strchr (slashp + 1, '/');
7111 if (res[0] == '\0') /* just a safety net: should never happen */
7113 free (res);
7114 return savestr ("/");
7116 else
7117 return res;
7120 /* Return a newly allocated string containing the absolute
7121 file name of dir where FILE resides given DIR (which should
7122 end with a slash). */
7123 static char *
7124 absolute_dirname (char *file, char *dir)
7126 char *slashp, *res;
7127 char save;
7129 slashp = strrchr (file, '/');
7130 if (slashp == NULL)
7131 return savestr (dir);
7132 save = slashp[1];
7133 slashp[1] = '\0';
7134 res = absolute_filename (file, dir);
7135 slashp[1] = save;
7137 return res;
7140 /* Whether the argument string is an absolute file name. The argument
7141 string must have been canonicalized with canonicalize_filename. */
7142 static bool
7143 filename_is_absolute (char *fn)
7145 return (fn[0] == '/'
7146 #ifdef DOS_NT
7147 || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
7148 #endif
7152 /* Downcase DOS drive letter and collapse separators into single slashes.
7153 Works in place. */
7154 static void
7155 canonicalize_filename (register char *fn)
7157 register char* cp;
7159 #ifdef DOS_NT
7160 /* Canonicalize drive letter case. */
7161 if (c_isupper (fn[0]) && fn[1] == ':')
7162 fn[0] = c_tolower (fn[0]);
7164 /* Collapse multiple forward- and back-slashes into a single forward
7165 slash. */
7166 for (cp = fn; *cp != '\0'; cp++, fn++)
7167 if (*cp == '/' || *cp == '\\')
7169 *fn = '/';
7170 while (cp[1] == '/' || cp[1] == '\\')
7171 cp++;
7173 else
7174 *fn = *cp;
7176 #else /* !DOS_NT */
7178 /* Collapse multiple slashes into a single slash. */
7179 for (cp = fn; *cp != '\0'; cp++, fn++)
7180 if (*cp == '/')
7182 *fn = '/';
7183 while (cp[1] == '/')
7184 cp++;
7186 else
7187 *fn = *cp;
7189 #endif /* !DOS_NT */
7191 *fn = '\0';
7195 /* Initialize a linebuffer for use. */
7196 static void
7197 linebuffer_init (linebuffer *lbp)
7199 lbp->size = (DEBUG) ? 3 : 200;
7200 lbp->buffer = xnew (lbp->size, char);
7201 lbp->buffer[0] = '\0';
7202 lbp->len = 0;
7205 /* Set the minimum size of a string contained in a linebuffer. */
7206 static void
7207 linebuffer_setlen (linebuffer *lbp, int toksize)
7209 while (lbp->size <= toksize)
7211 lbp->size *= 2;
7212 xrnew (lbp->buffer, lbp->size, char);
7214 lbp->len = toksize;
7217 /* Like malloc but get fatal error if memory is exhausted. */
7218 static void *
7219 xmalloc (size_t size)
7221 void *result = malloc (size);
7222 if (result == NULL)
7223 fatal ("virtual memory exhausted");
7224 return result;
7227 static void *
7228 xrealloc (void *ptr, size_t size)
7230 void *result = realloc (ptr, size);
7231 if (result == NULL)
7232 fatal ("virtual memory exhausted");
7233 return result;
7237 * Local Variables:
7238 * indent-tabs-mode: t
7239 * tab-width: 8
7240 * fill-column: 79
7241 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
7242 * c-file-style: "gnu"
7243 * End:
7246 /* etags.c ends here */