; Spelling fixes (American spelling)
[emacs.git] / lib-src / etags.c
blob54ed1b428e9b9d5b860570f3bf85314f88e431bc
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2016 Free Software
32 Foundation, Inc.
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
72 starting in 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
83 #ifdef DEBUG
84 # undef DEBUG
85 # define DEBUG true
86 #else
87 # define DEBUG false
88 # define NDEBUG /* disable assert */
89 #endif
91 #include <config.h>
93 #ifndef _GNU_SOURCE
94 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
95 #endif
97 /* WIN32_NATIVE is for XEmacs.
98 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
99 #ifdef WIN32_NATIVE
100 # undef MSDOS
101 # undef WINDOWSNT
102 # define WINDOWSNT
103 #endif /* WIN32_NATIVE */
105 #ifdef MSDOS
106 # undef MSDOS
107 # define MSDOS true
108 # include <sys/param.h>
109 #else
110 # define MSDOS false
111 #endif /* MSDOS */
113 #ifdef WINDOWSNT
114 # include <direct.h>
115 # define MAXPATHLEN _MAX_PATH
116 # undef HAVE_NTGUI
117 # undef DOS_NT
118 # define DOS_NT
119 # define O_CLOEXEC O_NOINHERIT
120 #endif /* WINDOWSNT */
122 #include <limits.h>
123 #include <unistd.h>
124 #include <stdarg.h>
125 #include <stdlib.h>
126 #include <string.h>
127 #include <sysstdio.h>
128 #include <errno.h>
129 #include <fcntl.h>
130 #include <binary-io.h>
131 #include <c-ctype.h>
132 #include <c-strcase.h>
134 #include <assert.h>
135 #ifdef NDEBUG
136 # undef assert /* some systems have a buggy assert.h */
137 # define assert(x) ((void) 0)
138 #endif
140 #include <getopt.h>
141 #include <regex.h>
143 /* Define CTAGS to make the program "ctags" compatible with the usual one.
144 Leave it undefined to make the program "etags", which makes emacs-style
145 tag tables and tags typedefs, #defines and struct/union/enum by default. */
146 #ifdef CTAGS
147 # undef CTAGS
148 # define CTAGS true
149 #else
150 # define CTAGS false
151 #endif
153 static bool
154 streq (char const *s, char const *t)
156 return strcmp (s, t) == 0;
159 static bool
160 strcaseeq (char const *s, char const *t)
162 return c_strcasecmp (s, t) == 0;
165 static bool
166 strneq (char const *s, char const *t, size_t n)
168 return strncmp (s, t, n) == 0;
171 static bool
172 strncaseeq (char const *s, char const *t, size_t n)
174 return c_strncasecmp (s, t, n) == 0;
177 /* C is not in a name. */
178 static bool
179 notinname (unsigned char c)
181 /* Look at make_tag before modifying! */
182 static bool const table[UCHAR_MAX + 1] = {
183 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
184 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
186 return table[c];
189 /* C can start a token. */
190 static bool
191 begtoken (unsigned char c)
193 static bool const table[UCHAR_MAX + 1] = {
194 ['$']=1, ['@']=1,
195 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
196 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
197 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
198 ['Y']=1, ['Z']=1,
199 ['_']=1,
200 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
201 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
202 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
203 ['y']=1, ['z']=1,
204 ['~']=1
206 return table[c];
209 /* C can be in the middle of a token. */
210 static bool
211 intoken (unsigned char c)
213 static bool const table[UCHAR_MAX + 1] = {
214 ['$']=1,
215 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
216 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
217 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
218 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
219 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
220 ['Y']=1, ['Z']=1,
221 ['_']=1,
222 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
223 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
224 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
225 ['y']=1, ['z']=1
227 return table[c];
230 /* C can end a token. */
231 static bool
232 endtoken (unsigned char c)
234 static bool const table[UCHAR_MAX + 1] = {
235 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
236 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
237 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
238 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
239 ['{']=1, ['|']=1, ['}']=1, ['~']=1
241 return table[c];
245 * xnew, xrnew -- allocate, reallocate storage
247 * SYNOPSIS: Type *xnew (int n, Type);
248 * void xrnew (OldPointer, int n, Type);
250 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
251 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
253 typedef void Lang_function (FILE *);
255 typedef struct
257 const char *suffix; /* file name suffix for this compressor */
258 const char *command; /* takes one arg and decompresses to stdout */
259 } compressor;
261 typedef struct
263 const char *name; /* language name */
264 const char *help; /* detailed help for the language */
265 Lang_function *function; /* parse function */
266 const char **suffixes; /* name suffixes of this language's files */
267 const char **filenames; /* names of this language's files */
268 const char **interpreters; /* interpreters for this language */
269 bool metasource; /* source used to generate other sources */
270 } language;
272 typedef struct fdesc
274 struct fdesc *next; /* for the linked list */
275 char *infname; /* uncompressed input file name */
276 char *infabsname; /* absolute uncompressed input file name */
277 char *infabsdir; /* absolute dir of input file */
278 char *taggedfname; /* file name to write in tagfile */
279 language *lang; /* language of file */
280 char *prop; /* file properties to write in tagfile */
281 bool usecharno; /* etags tags shall contain char number */
282 bool written; /* entry written in the tags file */
283 } fdesc;
285 typedef struct node_st
286 { /* sorting structure */
287 struct node_st *left, *right; /* left and right sons */
288 fdesc *fdp; /* description of file to whom tag belongs */
289 char *name; /* tag name */
290 char *regex; /* search regexp */
291 bool valid; /* write this tag on the tag file */
292 bool is_func; /* function tag: use regexp in CTAGS mode */
293 bool been_warned; /* warning already given for duplicated tag */
294 int lno; /* line number tag is on */
295 long cno; /* character number line starts on */
296 } node;
299 * A `linebuffer' is a structure which holds a line of text.
300 * `readline_internal' reads a line from a stream into a linebuffer
301 * and works regardless of the length of the line.
302 * SIZE is the size of BUFFER, LEN is the length of the string in
303 * BUFFER after readline reads it.
305 typedef struct
307 long size;
308 int len;
309 char *buffer;
310 } linebuffer;
312 /* Used to support mixing of --lang and file names. */
313 typedef struct
315 enum {
316 at_language, /* a language specification */
317 at_regexp, /* a regular expression */
318 at_filename, /* a file name */
319 at_stdin, /* read from stdin here */
320 at_end /* stop parsing the list */
321 } arg_type; /* argument type */
322 language *lang; /* language associated with the argument */
323 char *what; /* the argument itself */
324 } argument;
326 /* Structure defining a regular expression. */
327 typedef struct regexp
329 struct regexp *p_next; /* pointer to next in list */
330 language *lang; /* if set, use only for this language */
331 char *pattern; /* the regexp pattern */
332 char *name; /* tag name */
333 struct re_pattern_buffer *pat; /* the compiled pattern */
334 struct re_registers regs; /* re registers */
335 bool error_signaled; /* already signaled for this regexp */
336 bool force_explicit_name; /* do not allow implicit tag name */
337 bool ignore_case; /* ignore case when matching */
338 bool multi_line; /* do a multi-line match on the whole file */
339 } regexp;
342 /* Many compilers barf on this:
343 Lang_function Ada_funcs;
344 so let's write it this way */
345 static void Ada_funcs (FILE *);
346 static void Asm_labels (FILE *);
347 static void C_entries (int c_ext, FILE *);
348 static void default_C_entries (FILE *);
349 static void plain_C_entries (FILE *);
350 static void Cjava_entries (FILE *);
351 static void Cobol_paragraphs (FILE *);
352 static void Cplusplus_entries (FILE *);
353 static void Cstar_entries (FILE *);
354 static void Erlang_functions (FILE *);
355 static void Forth_words (FILE *);
356 static void Fortran_functions (FILE *);
357 static void HTML_labels (FILE *);
358 static void Lisp_functions (FILE *);
359 static void Lua_functions (FILE *);
360 static void Makefile_targets (FILE *);
361 static void Pascal_functions (FILE *);
362 static void Perl_functions (FILE *);
363 static void PHP_functions (FILE *);
364 static void PS_functions (FILE *);
365 static void Prolog_functions (FILE *);
366 static void Python_functions (FILE *);
367 static void Ruby_functions (FILE *);
368 static void Scheme_functions (FILE *);
369 static void TeX_commands (FILE *);
370 static void Texinfo_nodes (FILE *);
371 static void Yacc_entries (FILE *);
372 static void just_read_file (FILE *);
374 static language *get_language_from_langname (const char *);
375 static void readline (linebuffer *, FILE *);
376 static long readline_internal (linebuffer *, FILE *, char const *);
377 static bool nocase_tail (const char *);
378 static void get_tag (char *, char **);
380 static void analyze_regex (char *);
381 static void free_regexps (void);
382 static void regex_tag_multiline (void);
383 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
384 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
385 static _Noreturn void suggest_asking_for_help (void);
386 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
387 static _Noreturn void pfatal (const char *);
388 static void add_node (node *, node **);
390 static void process_file_name (char *, language *);
391 static void process_file (FILE *, char *, language *);
392 static void find_entries (FILE *);
393 static void free_tree (node *);
394 static void free_fdesc (fdesc *);
395 static void pfnote (char *, bool, char *, int, int, long);
396 static void invalidate_nodes (fdesc *, node **);
397 static void put_entries (node *);
399 static char *concat (const char *, const char *, const char *);
400 static char *skip_spaces (char *);
401 static char *skip_non_spaces (char *);
402 static char *skip_name (char *);
403 static char *savenstr (const char *, int);
404 static char *savestr (const char *);
405 static char *etags_getcwd (void);
406 static char *relative_filename (char *, char *);
407 static char *absolute_filename (char *, char *);
408 static char *absolute_dirname (char *, char *);
409 static bool filename_is_absolute (char *f);
410 static void canonicalize_filename (char *);
411 static char *etags_mktmp (void);
412 static void linebuffer_init (linebuffer *);
413 static void linebuffer_setlen (linebuffer *, int);
414 static void *xmalloc (size_t);
415 static void *xrealloc (void *, size_t);
418 static char searchar = '/'; /* use /.../ searches */
420 static char *tagfile; /* output file */
421 static char *progname; /* name this program was invoked with */
422 static char *cwd; /* current working directory */
423 static char *tagfiledir; /* directory of tagfile */
424 static FILE *tagf; /* ioptr for tags file */
425 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
427 static fdesc *fdhead; /* head of file description list */
428 static fdesc *curfdp; /* current file description */
429 static char *infilename; /* current input file name */
430 static int lineno; /* line number of current line */
431 static long charno; /* current character number */
432 static long linecharno; /* charno of start of current line */
433 static char *dbp; /* pointer to start of current tag */
435 static const int invalidcharno = -1;
437 static node *nodehead; /* the head of the binary tree of tags */
438 static node *last_node; /* the last node created */
440 static linebuffer lb; /* the current line */
441 static linebuffer filebuf; /* a buffer containing the whole file */
442 static linebuffer token_name; /* a buffer containing a tag name */
444 static bool append_to_tagfile; /* -a: append to tags */
445 /* The next five default to true in C and derived languages. */
446 static bool typedefs; /* -t: create tags for C and Ada typedefs */
447 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
448 /* 0 struct/enum/union decls, and C++ */
449 /* member functions. */
450 static bool constantypedefs; /* -d: create tags for C #define, enum */
451 /* constants and variables. */
452 /* -D: opposite of -d. Default under ctags. */
453 static int globals; /* create tags for global variables */
454 static int members; /* create tags for C member variables */
455 static int declarations; /* --declarations: tag them and extern in C&Co*/
456 static int no_line_directive; /* ignore #line directives (undocumented) */
457 static int no_duplicates; /* no duplicate tags for ctags (undocumented) */
458 static bool update; /* -u: update tags */
459 static bool vgrind_style; /* -v: create vgrind style index output */
460 static bool no_warnings; /* -w: suppress warnings (undocumented) */
461 static bool cxref_style; /* -x: create cxref style output */
462 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
463 static bool ignoreindent; /* -I: ignore indentation in C */
464 static int packages_only; /* --packages-only: in Ada, only tag packages*/
465 static int class_qualify; /* -Q: produce class-qualified tags in C++/Java */
467 /* STDIN is defined in LynxOS system headers */
468 #ifdef STDIN
469 # undef STDIN
470 #endif
472 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
473 static bool parsing_stdin; /* --parse-stdin used */
475 static regexp *p_head; /* list of all regexps */
476 static bool need_filebuf; /* some regexes are multi-line */
478 static struct option longopts[] =
480 { "append", no_argument, NULL, 'a' },
481 { "packages-only", no_argument, &packages_only, 1 },
482 { "c++", no_argument, NULL, 'C' },
483 { "declarations", no_argument, &declarations, 1 },
484 { "no-line-directive", no_argument, &no_line_directive, 1 },
485 { "no-duplicates", no_argument, &no_duplicates, 1 },
486 { "help", no_argument, NULL, 'h' },
487 { "help", no_argument, NULL, 'H' },
488 { "ignore-indentation", no_argument, NULL, 'I' },
489 { "language", required_argument, NULL, 'l' },
490 { "members", no_argument, &members, 1 },
491 { "no-members", no_argument, &members, 0 },
492 { "output", required_argument, NULL, 'o' },
493 { "class-qualify", no_argument, &class_qualify, 'Q' },
494 { "regex", required_argument, NULL, 'r' },
495 { "no-regex", no_argument, NULL, 'R' },
496 { "ignore-case-regex", required_argument, NULL, 'c' },
497 { "parse-stdin", required_argument, NULL, STDIN },
498 { "version", no_argument, NULL, 'V' },
500 #if CTAGS /* Ctags options */
501 { "backward-search", no_argument, NULL, 'B' },
502 { "cxref", no_argument, NULL, 'x' },
503 { "defines", no_argument, NULL, 'd' },
504 { "globals", no_argument, &globals, 1 },
505 { "typedefs", no_argument, NULL, 't' },
506 { "typedefs-and-c++", no_argument, NULL, 'T' },
507 { "update", no_argument, NULL, 'u' },
508 { "vgrind", no_argument, NULL, 'v' },
509 { "no-warn", no_argument, NULL, 'w' },
511 #else /* Etags options */
512 { "no-defines", no_argument, NULL, 'D' },
513 { "no-globals", no_argument, &globals, 0 },
514 { "include", required_argument, NULL, 'i' },
515 #endif
516 { NULL }
519 static compressor compressors[] =
521 { "z", "gzip -d -c"},
522 { "Z", "gzip -d -c"},
523 { "gz", "gzip -d -c"},
524 { "GZ", "gzip -d -c"},
525 { "bz2", "bzip2 -d -c" },
526 { "xz", "xz -d -c" },
527 { NULL }
531 * Language stuff.
534 /* Ada code */
535 static const char *Ada_suffixes [] =
536 { "ads", "adb", "ada", NULL };
537 static const char Ada_help [] =
538 "In Ada code, functions, procedures, packages, tasks and types are\n\
539 tags. Use the '--packages-only' option to create tags for\n\
540 packages only.\n\
541 Ada tag names have suffixes indicating the type of entity:\n\
542 Entity type: Qualifier:\n\
543 ------------ ----------\n\
544 function /f\n\
545 procedure /p\n\
546 package spec /s\n\
547 package body /b\n\
548 type /t\n\
549 task /k\n\
550 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
551 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
552 will just search for any tag 'bidule'.";
554 /* Assembly code */
555 static const char *Asm_suffixes [] =
556 { "a", /* Unix assembler */
557 "asm", /* Microcontroller assembly */
558 "def", /* BSO/Tasking definition includes */
559 "inc", /* Microcontroller include files */
560 "ins", /* Microcontroller include files */
561 "s", "sa", /* Unix assembler */
562 "S", /* cpp-processed Unix assembler */
563 "src", /* BSO/Tasking C compiler output */
564 NULL
566 static const char Asm_help [] =
567 "In assembler code, labels appearing at the beginning of a line,\n\
568 followed by a colon, are tags.";
571 /* Note that .c and .h can be considered C++, if the --c++ flag was
572 given, or if the `class' or `template' keywords are met inside the file.
573 That is why default_C_entries is called for these. */
574 static const char *default_C_suffixes [] =
575 { "c", "h", NULL };
576 #if CTAGS /* C help for Ctags */
577 static const char default_C_help [] =
578 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
579 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
580 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
581 Use --globals to tag global variables.\n\
582 You can tag function declarations and external variables by\n\
583 using '--declarations', and struct members by using '--members'.";
584 #else /* C help for Etags */
585 static const char default_C_help [] =
586 "In C code, any C function or typedef is a tag, and so are\n\
587 definitions of 'struct', 'union' and 'enum'. '#define' macro\n\
588 definitions and 'enum' constants are tags unless you specify\n\
589 '--no-defines'. Global variables are tags unless you specify\n\
590 '--no-globals' and so are struct members unless you specify\n\
591 '--no-members'. Use of '--no-globals', '--no-defines' and\n\
592 '--no-members' can make the tags table file much smaller.\n\
593 You can tag function declarations and external variables by\n\
594 using '--declarations'.";
595 #endif /* C help for Ctags and Etags */
597 static const char *Cplusplus_suffixes [] =
598 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
599 "M", /* Objective C++ */
600 "pdb", /* PostScript with C syntax */
601 NULL };
602 static const char Cplusplus_help [] =
603 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
604 --help --lang=c --lang=c++ for full help.)\n\
605 In addition to C tags, member functions are also recognized. Member\n\
606 variables are recognized unless you use the '--no-members' option.\n\
607 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
608 and 'CLASS::FUNCTION'. 'operator' definitions have tag names like\n\
609 'operator+'.";
611 static const char *Cjava_suffixes [] =
612 { "java", NULL };
613 static char Cjava_help [] =
614 "In Java code, all the tags constructs of C and C++ code are\n\
615 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
618 static const char *Cobol_suffixes [] =
619 { "COB", "cob", NULL };
620 static char Cobol_help [] =
621 "In Cobol code, tags are paragraph names; that is, any word\n\
622 starting in column 8 and followed by a period.";
624 static const char *Cstar_suffixes [] =
625 { "cs", "hs", NULL };
627 static const char *Erlang_suffixes [] =
628 { "erl", "hrl", NULL };
629 static const char Erlang_help [] =
630 "In Erlang code, the tags are the functions, records and macros\n\
631 defined in the file.";
633 const char *Forth_suffixes [] =
634 { "fth", "tok", NULL };
635 static const char Forth_help [] =
636 "In Forth code, tags are words defined by ':',\n\
637 constant, code, create, defer, value, variable, buffer:, field.";
639 static const char *Fortran_suffixes [] =
640 { "F", "f", "f90", "for", NULL };
641 static const char Fortran_help [] =
642 "In Fortran code, functions, subroutines and block data are tags.";
644 static const char *HTML_suffixes [] =
645 { "htm", "html", "shtml", NULL };
646 static const char HTML_help [] =
647 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
648 'h3' headers. Also, tags are 'name=' in anchors and all\n\
649 occurrences of 'id='.";
651 static const char *Lisp_suffixes [] =
652 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
653 static const char Lisp_help [] =
654 "In Lisp code, any function defined with 'defun', any variable\n\
655 defined with 'defvar' or 'defconst', and in general the first\n\
656 argument of any expression that starts with '(def' in column zero\n\
657 is a tag.\n\
658 The '--declarations' option tags \"(defvar foo)\" constructs too.";
660 static const char *Lua_suffixes [] =
661 { "lua", "LUA", NULL };
662 static const char Lua_help [] =
663 "In Lua scripts, all functions are tags.";
665 static const char *Makefile_filenames [] =
666 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
667 static const char Makefile_help [] =
668 "In makefiles, targets are tags; additionally, variables are tags\n\
669 unless you specify '--no-globals'.";
671 static const char *Objc_suffixes [] =
672 { "lm", /* Objective lex file */
673 "m", /* Objective C file */
674 NULL };
675 static const char Objc_help [] =
676 "In Objective C code, tags include Objective C definitions for classes,\n\
677 class categories, methods and protocols. Tags for variables and\n\
678 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\n\
679 (Use --help --lang=c --lang=objc --lang=java for full help.)";
681 static const char *Pascal_suffixes [] =
682 { "p", "pas", NULL };
683 static const char Pascal_help [] =
684 "In Pascal code, the tags are the functions and procedures defined\n\
685 in the file.";
686 /* " // this is for working around an Emacs highlighting bug... */
688 static const char *Perl_suffixes [] =
689 { "pl", "pm", NULL };
690 static const char *Perl_interpreters [] =
691 { "perl", "@PERL@", NULL };
692 static const char Perl_help [] =
693 "In Perl code, the tags are the packages, subroutines and variables\n\
694 defined by the 'package', 'sub', 'my' and 'local' keywords. Use\n\
695 '--globals' if you want to tag global variables. Tags for\n\
696 subroutines are named 'PACKAGE::SUB'. The name for subroutines\n\
697 defined in the default package is 'main::SUB'.";
699 static const char *PHP_suffixes [] =
700 { "php", "php3", "php4", NULL };
701 static const char PHP_help [] =
702 "In PHP code, tags are functions, classes and defines. Unless you use\n\
703 the '--no-members' option, vars are tags too.";
705 static const char *plain_C_suffixes [] =
706 { "pc", /* Pro*C file */
707 NULL };
709 static const char *PS_suffixes [] =
710 { "ps", "psw", NULL }; /* .psw is for PSWrap */
711 static const char PS_help [] =
712 "In PostScript code, the tags are the functions.";
714 static const char *Prolog_suffixes [] =
715 { "prolog", NULL };
716 static const char Prolog_help [] =
717 "In Prolog code, tags are predicates and rules at the beginning of\n\
718 line.";
720 static const char *Python_suffixes [] =
721 { "py", NULL };
722 static const char Python_help [] =
723 "In Python code, 'def' or 'class' at the beginning of a line\n\
724 generate a tag.";
726 static const char *Ruby_suffixes [] =
727 { "rb", "ruby", NULL };
728 static const char Ruby_help [] =
729 "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
730 a line generate a tag.";
732 /* Can't do the `SCM' or `scm' prefix with a version number. */
733 static const char *Scheme_suffixes [] =
734 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
735 static const char Scheme_help [] =
736 "In Scheme code, tags include anything defined with 'def' or with a\n\
737 construct whose name starts with 'def'. They also include\n\
738 variables set with 'set!' at top level in the file.";
740 static const char *TeX_suffixes [] =
741 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
742 static const char TeX_help [] =
743 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
744 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
745 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
746 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
747 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
749 Other commands can be specified by setting the environment variable\n\
750 'TEXTAGS' to a colon-separated list like, for example,\n\
751 TEXTAGS=\"mycommand:myothercommand\".";
754 static const char *Texinfo_suffixes [] =
755 { "texi", "texinfo", "txi", NULL };
756 static const char Texinfo_help [] =
757 "for texinfo files, lines starting with @node are tagged.";
759 static const char *Yacc_suffixes [] =
760 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
761 static const char Yacc_help [] =
762 "In Bison or Yacc input files, each rule defines as a tag the\n\
763 nonterminal it constructs. The portions of the file that contain\n\
764 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
765 for full help).";
767 static const char auto_help [] =
768 "'auto' is not a real language, it indicates to use\n\
769 a default language for files base on file name suffix and file contents.";
771 static const char none_help [] =
772 "'none' is not a real language, it indicates to only do\n\
773 regexp processing on files.";
775 static const char no_lang_help [] =
776 "No detailed help available for this language.";
780 * Table of languages.
782 * It is ok for a given function to be listed under more than one
783 * name. I just didn't.
786 static language lang_names [] =
788 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
789 { "asm", Asm_help, Asm_labels, Asm_suffixes },
790 { "c", default_C_help, default_C_entries, default_C_suffixes },
791 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
792 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
793 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
794 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
795 { "forth", Forth_help, Forth_words, Forth_suffixes },
796 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
797 { "html", HTML_help, HTML_labels, HTML_suffixes },
798 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
799 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
800 { "lua", Lua_help, Lua_functions, Lua_suffixes },
801 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
802 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
803 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
804 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
805 { "php", PHP_help, PHP_functions, PHP_suffixes },
806 { "postscript",PS_help, PS_functions, PS_suffixes },
807 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
808 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
809 { "python", Python_help, Python_functions, Python_suffixes },
810 { "ruby", Ruby_help, Ruby_functions, Ruby_suffixes },
811 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
812 { "tex", TeX_help, TeX_commands, TeX_suffixes },
813 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
814 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
815 { "auto", auto_help }, /* default guessing scheme */
816 { "none", none_help, just_read_file }, /* regexp matching only */
817 { NULL } /* end of list */
821 static void
822 print_language_names (void)
824 language *lang;
825 const char **name, **ext;
827 puts ("\nThese are the currently supported languages, along with the\n\
828 default file names and dot suffixes:");
829 for (lang = lang_names; lang->name != NULL; lang++)
831 printf (" %-*s", 10, lang->name);
832 if (lang->filenames != NULL)
833 for (name = lang->filenames; *name != NULL; name++)
834 printf (" %s", *name);
835 if (lang->suffixes != NULL)
836 for (ext = lang->suffixes; *ext != NULL; ext++)
837 printf (" .%s", *ext);
838 puts ("");
840 puts ("where 'auto' means use default language for files based on file\n\
841 name suffix, and 'none' means only do regexp processing on files.\n\
842 If no language is specified and no matching suffix is found,\n\
843 the first line of the file is read for a sharp-bang (#!) sequence\n\
844 followed by the name of an interpreter. If no such sequence is found,\n\
845 Fortran is tried first; if no tags are found, C is tried next.\n\
846 When parsing any C file, a \"class\" or \"template\" keyword\n\
847 switches to C++.");
848 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
850 For detailed help on a given language use, for example,\n\
851 etags --help --lang=ada.");
854 #ifndef EMACS_NAME
855 # define EMACS_NAME "standalone"
856 #endif
857 #ifndef VERSION
858 # define VERSION "17.38.1.4"
859 #endif
860 static _Noreturn void
861 print_version (void)
863 char emacs_copyright[] = COPYRIGHT;
865 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
866 puts (emacs_copyright);
867 puts ("This program is distributed under the terms in ETAGS.README");
869 exit (EXIT_SUCCESS);
872 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
873 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
874 #endif
876 static _Noreturn void
877 print_help (argument *argbuffer)
879 bool help_for_lang = false;
881 for (; argbuffer->arg_type != at_end; argbuffer++)
882 if (argbuffer->arg_type == at_language)
884 if (help_for_lang)
885 puts ("");
886 puts (argbuffer->lang->help);
887 help_for_lang = true;
890 if (help_for_lang)
891 exit (EXIT_SUCCESS);
893 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
895 These are the options accepted by %s.\n", progname, progname);
896 puts ("You may use unambiguous abbreviations for the long option names.");
897 puts (" A - as file name means read names from stdin (one per line).\n\
898 Absolute names are stored in the output file as they are.\n\
899 Relative ones are stored relative to the output file's directory.\n");
901 puts ("-a, --append\n\
902 Append tag entries to existing tags file.");
904 puts ("--packages-only\n\
905 For Ada files, only generate tags for packages.");
907 if (CTAGS)
908 puts ("-B, --backward-search\n\
909 Write the search commands for the tag entries using '?', the\n\
910 backward-search command instead of '/', the forward-search command.");
912 /* This option is mostly obsolete, because etags can now automatically
913 detect C++. Retained for backward compatibility and for debugging and
914 experimentation. In principle, we could want to tag as C++ even
915 before any "class" or "template" keyword.
916 puts ("-C, --c++\n\
917 Treat files whose name suffix defaults to C language as C++ files.");
920 puts ("--declarations\n\
921 In C and derived languages, create tags for function declarations,");
922 if (CTAGS)
923 puts ("\tand create tags for extern variables if --globals is used.");
924 else
925 puts
926 ("\tand create tags for extern variables unless --no-globals is used.");
928 if (CTAGS)
929 puts ("-d, --defines\n\
930 Create tag entries for C #define constants and enum constants, too.");
931 else
932 puts ("-D, --no-defines\n\
933 Don't create tag entries for C #define constants and enum constants.\n\
934 This makes the tags file smaller.");
936 if (!CTAGS)
937 puts ("-i FILE, --include=FILE\n\
938 Include a note in tag file indicating that, when searching for\n\
939 a tag, one should also consult the tags file FILE after\n\
940 checking the current file.");
942 puts ("-l LANG, --language=LANG\n\
943 Force the following files to be considered as written in the\n\
944 named language up to the next --language=LANG option.");
946 if (CTAGS)
947 puts ("--globals\n\
948 Create tag entries for global variables in some languages.");
949 else
950 puts ("--no-globals\n\
951 Do not create tag entries for global variables in some\n\
952 languages. This makes the tags file smaller.");
954 puts ("--no-line-directive\n\
955 Ignore #line preprocessor directives in C and derived languages.");
957 if (CTAGS)
958 puts ("--members\n\
959 Create tag entries for members of structures in some languages.");
960 else
961 puts ("--no-members\n\
962 Do not create tag entries for members of structures\n\
963 in some languages.");
965 puts ("-Q, --class-qualify\n\
966 Qualify tag names with their class name in C++, ObjC, and Java.\n\
967 This produces tag names of the form \"class::member\" for C++,\n\
968 \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
969 For Objective C, this also produces class methods qualified with\n\
970 their arguments, as in \"foo:bar:baz:more\".");
971 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
972 Make a tag for each line matching a regular expression pattern\n\
973 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
974 files only. REGEXFILE is a file containing one REGEXP per line.\n\
975 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
976 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
977 puts (" If TAGNAME/ is present, the tags created are named.\n\
978 For example Tcl named tags can be created with:\n\
979 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
980 MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
981 'm' means to allow multi-line matches, 's' implies 'm' and\n\
982 causes dot to match any character, including newline.");
984 puts ("-R, --no-regex\n\
985 Don't create tags from regexps for the following files.");
987 puts ("-I, --ignore-indentation\n\
988 In C and C++ do not assume that a closing brace in the first\n\
989 column is the final brace of a function or structure definition.");
991 puts ("-o FILE, --output=FILE\n\
992 Write the tags to FILE.");
994 puts ("--parse-stdin=NAME\n\
995 Read from standard input and record tags as belonging to file NAME.");
997 if (CTAGS)
999 puts ("-t, --typedefs\n\
1000 Generate tag entries for C and Ada typedefs.");
1001 puts ("-T, --typedefs-and-c++\n\
1002 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1003 and C++ member functions.");
1006 if (CTAGS)
1007 puts ("-u, --update\n\
1008 Update the tag entries for the given files, leaving tag\n\
1009 entries for other files in place. Currently, this is\n\
1010 implemented by deleting the existing entries for the given\n\
1011 files and then rewriting the new entries at the end of the\n\
1012 tags file. It is often faster to simply rebuild the entire\n\
1013 tag file than to use this.");
1015 if (CTAGS)
1017 puts ("-v, --vgrind\n\
1018 Print on the standard output an index of items intended for\n\
1019 human consumption, similar to the output of vgrind. The index\n\
1020 is sorted, and gives the page number of each item.");
1022 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1023 puts ("-w, --no-duplicates\n\
1024 Do not create duplicate tag entries, for compatibility with\n\
1025 traditional ctags.");
1027 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1028 puts ("-w, --no-warn\n\
1029 Suppress warning messages about duplicate tag entries.");
1031 puts ("-x, --cxref\n\
1032 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1033 The output uses line numbers instead of page numbers, but\n\
1034 beyond that the differences are cosmetic; try both to see\n\
1035 which you like.");
1038 puts ("-V, --version\n\
1039 Print the version of the program.\n\
1040 -h, --help\n\
1041 Print this help message.\n\
1042 Followed by one or more '--language' options prints detailed\n\
1043 help about tag generation for the specified languages.");
1045 print_language_names ();
1047 puts ("");
1048 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1050 exit (EXIT_SUCCESS);
1055 main (int argc, char **argv)
1057 int i;
1058 unsigned int nincluded_files;
1059 char **included_files;
1060 argument *argbuffer;
1061 int current_arg, file_count;
1062 linebuffer filename_lb;
1063 bool help_asked = false;
1064 ptrdiff_t len;
1065 char *optstring;
1066 int opt;
1068 progname = argv[0];
1069 nincluded_files = 0;
1070 included_files = xnew (argc, char *);
1071 current_arg = 0;
1072 file_count = 0;
1074 /* Allocate enough no matter what happens. Overkill, but each one
1075 is small. */
1076 argbuffer = xnew (argc, argument);
1079 * Always find typedefs and structure tags.
1080 * Also default to find macro constants, enum constants, struct
1081 * members and global variables. Do it for both etags and ctags.
1083 typedefs = typedefs_or_cplusplus = constantypedefs = true;
1084 globals = members = true;
1086 /* When the optstring begins with a '-' getopt_long does not rearrange the
1087 non-options arguments to be at the end, but leaves them alone. */
1088 optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1089 (CTAGS) ? "BxdtTuvw" : "Di:",
1090 "");
1092 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1093 switch (opt)
1095 case 0:
1096 /* If getopt returns 0, then it has already processed a
1097 long-named option. We should do nothing. */
1098 break;
1100 case 1:
1101 /* This means that a file name has been seen. Record it. */
1102 argbuffer[current_arg].arg_type = at_filename;
1103 argbuffer[current_arg].what = optarg;
1104 len = strlen (optarg);
1105 if (whatlen_max < len)
1106 whatlen_max = len;
1107 ++current_arg;
1108 ++file_count;
1109 break;
1111 case STDIN:
1112 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1113 argbuffer[current_arg].arg_type = at_stdin;
1114 argbuffer[current_arg].what = optarg;
1115 len = strlen (optarg);
1116 if (whatlen_max < len)
1117 whatlen_max = len;
1118 ++current_arg;
1119 ++file_count;
1120 if (parsing_stdin)
1121 fatal ("cannot parse standard input more than once");
1122 parsing_stdin = true;
1123 break;
1125 /* Common options. */
1126 case 'a': append_to_tagfile = true; break;
1127 case 'C': cplusplus = true; break;
1128 case 'f': /* for compatibility with old makefiles */
1129 case 'o':
1130 if (tagfile)
1132 error ("-o option may only be given once.");
1133 suggest_asking_for_help ();
1134 /* NOTREACHED */
1136 tagfile = optarg;
1137 break;
1138 case 'I':
1139 case 'S': /* for backward compatibility */
1140 ignoreindent = true;
1141 break;
1142 case 'l':
1144 language *lang = get_language_from_langname (optarg);
1145 if (lang != NULL)
1147 argbuffer[current_arg].lang = lang;
1148 argbuffer[current_arg].arg_type = at_language;
1149 ++current_arg;
1152 break;
1153 case 'c':
1154 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1155 optarg = concat (optarg, "i", ""); /* memory leak here */
1156 /* FALLTHRU */
1157 case 'r':
1158 argbuffer[current_arg].arg_type = at_regexp;
1159 argbuffer[current_arg].what = optarg;
1160 len = strlen (optarg);
1161 if (whatlen_max < len)
1162 whatlen_max = len;
1163 ++current_arg;
1164 break;
1165 case 'R':
1166 argbuffer[current_arg].arg_type = at_regexp;
1167 argbuffer[current_arg].what = NULL;
1168 ++current_arg;
1169 break;
1170 case 'V':
1171 print_version ();
1172 break;
1173 case 'h':
1174 case 'H':
1175 help_asked = true;
1176 break;
1177 case 'Q':
1178 class_qualify = 1;
1179 break;
1181 /* Etags options */
1182 case 'D': constantypedefs = false; break;
1183 case 'i': included_files[nincluded_files++] = optarg; break;
1185 /* Ctags options. */
1186 case 'B': searchar = '?'; break;
1187 case 'd': constantypedefs = true; break;
1188 case 't': typedefs = true; break;
1189 case 'T': typedefs = typedefs_or_cplusplus = true; break;
1190 case 'u': update = true; break;
1191 case 'v': vgrind_style = true; /*FALLTHRU*/
1192 case 'x': cxref_style = true; break;
1193 case 'w': no_warnings = true; break;
1194 default:
1195 suggest_asking_for_help ();
1196 /* NOTREACHED */
1199 /* No more options. Store the rest of arguments. */
1200 for (; optind < argc; optind++)
1202 argbuffer[current_arg].arg_type = at_filename;
1203 argbuffer[current_arg].what = argv[optind];
1204 len = strlen (argv[optind]);
1205 if (whatlen_max < len)
1206 whatlen_max = len;
1207 ++current_arg;
1208 ++file_count;
1211 argbuffer[current_arg].arg_type = at_end;
1213 if (help_asked)
1214 print_help (argbuffer);
1215 /* NOTREACHED */
1217 if (nincluded_files == 0 && file_count == 0)
1219 error ("no input files specified.");
1220 suggest_asking_for_help ();
1221 /* NOTREACHED */
1224 if (tagfile == NULL)
1225 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1226 cwd = etags_getcwd (); /* the current working directory */
1227 if (cwd[strlen (cwd) - 1] != '/')
1229 char *oldcwd = cwd;
1230 cwd = concat (oldcwd, "/", "");
1231 free (oldcwd);
1234 /* Compute base directory for relative file names. */
1235 if (streq (tagfile, "-")
1236 || strneq (tagfile, "/dev/", 5))
1237 tagfiledir = cwd; /* relative file names are relative to cwd */
1238 else
1240 canonicalize_filename (tagfile);
1241 tagfiledir = absolute_dirname (tagfile, cwd);
1244 linebuffer_init (&lb);
1245 linebuffer_init (&filename_lb);
1246 linebuffer_init (&filebuf);
1247 linebuffer_init (&token_name);
1249 if (!CTAGS)
1251 if (streq (tagfile, "-"))
1253 tagf = stdout;
1254 SET_BINARY (fileno (stdout));
1256 else
1257 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1258 if (tagf == NULL)
1259 pfatal (tagfile);
1263 * Loop through files finding functions.
1265 for (i = 0; i < current_arg; i++)
1267 static language *lang; /* non-NULL if language is forced */
1268 char *this_file;
1270 switch (argbuffer[i].arg_type)
1272 case at_language:
1273 lang = argbuffer[i].lang;
1274 break;
1275 case at_regexp:
1276 analyze_regex (argbuffer[i].what);
1277 break;
1278 case at_filename:
1279 this_file = argbuffer[i].what;
1280 /* Input file named "-" means read file names from stdin
1281 (one per line) and use them. */
1282 if (streq (this_file, "-"))
1284 if (parsing_stdin)
1285 fatal ("cannot parse standard input "
1286 "AND read file names from it");
1287 while (readline_internal (&filename_lb, stdin, "-") > 0)
1288 process_file_name (filename_lb.buffer, lang);
1290 else
1291 process_file_name (this_file, lang);
1292 break;
1293 case at_stdin:
1294 this_file = argbuffer[i].what;
1295 process_file (stdin, this_file, lang);
1296 break;
1297 default:
1298 error ("internal error: arg_type");
1302 free_regexps ();
1303 free (lb.buffer);
1304 free (filebuf.buffer);
1305 free (token_name.buffer);
1307 if (!CTAGS || cxref_style)
1309 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1310 put_entries (nodehead);
1311 free_tree (nodehead);
1312 nodehead = NULL;
1313 if (!CTAGS)
1315 fdesc *fdp;
1317 /* Output file entries that have no tags. */
1318 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1319 if (!fdp->written)
1320 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1322 while (nincluded_files-- > 0)
1323 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1325 if (fclose (tagf) == EOF)
1326 pfatal (tagfile);
1329 exit (EXIT_SUCCESS);
1332 /* From here on, we are in (CTAGS && !cxref_style) */
1333 if (update)
1335 char *cmd =
1336 xmalloc (strlen (tagfile) + whatlen_max +
1337 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1338 for (i = 0; i < current_arg; ++i)
1340 switch (argbuffer[i].arg_type)
1342 case at_filename:
1343 case at_stdin:
1344 break;
1345 default:
1346 continue; /* the for loop */
1348 char *z = stpcpy (cmd, "mv ");
1349 z = stpcpy (z, tagfile);
1350 z = stpcpy (z, " OTAGS;fgrep -v '\t");
1351 z = stpcpy (z, argbuffer[i].what);
1352 z = stpcpy (z, "\t' OTAGS >");
1353 z = stpcpy (z, tagfile);
1354 strcpy (z, ";rm OTAGS");
1355 if (system (cmd) != EXIT_SUCCESS)
1356 fatal ("failed to execute shell command");
1358 free (cmd);
1359 append_to_tagfile = true;
1362 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1363 if (tagf == NULL)
1364 pfatal (tagfile);
1365 put_entries (nodehead); /* write all the tags (CTAGS) */
1366 free_tree (nodehead);
1367 nodehead = NULL;
1368 if (fclose (tagf) == EOF)
1369 pfatal (tagfile);
1371 if (CTAGS)
1372 if (append_to_tagfile || update)
1374 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1375 /* Maybe these should be used:
1376 setenv ("LC_COLLATE", "C", 1);
1377 setenv ("LC_ALL", "C", 1); */
1378 char *z = stpcpy (cmd, "sort -u -o ");
1379 z = stpcpy (z, tagfile);
1380 *z++ = ' ';
1381 strcpy (z, tagfile);
1382 exit (system (cmd));
1384 return EXIT_SUCCESS;
1389 * Return a compressor given the file name. If EXTPTR is non-zero,
1390 * return a pointer into FILE where the compressor-specific
1391 * extension begins. If no compressor is found, NULL is returned
1392 * and EXTPTR is not significant.
1393 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1395 static compressor *
1396 get_compressor_from_suffix (char *file, char **extptr)
1398 compressor *compr;
1399 char *slash, *suffix;
1401 /* File has been processed by canonicalize_filename,
1402 so we don't need to consider backslashes on DOS_NT. */
1403 slash = strrchr (file, '/');
1404 suffix = strrchr (file, '.');
1405 if (suffix == NULL || suffix < slash)
1406 return NULL;
1407 if (extptr != NULL)
1408 *extptr = suffix;
1409 suffix += 1;
1410 /* Let those poor souls who live with DOS 8+3 file name limits get
1411 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1412 Only the first do loop is run if not MSDOS */
1415 for (compr = compressors; compr->suffix != NULL; compr++)
1416 if (streq (compr->suffix, suffix))
1417 return compr;
1418 if (!MSDOS)
1419 break; /* do it only once: not really a loop */
1420 if (extptr != NULL)
1421 *extptr = ++suffix;
1422 } while (*suffix != '\0');
1423 return NULL;
1429 * Return a language given the name.
1431 static language *
1432 get_language_from_langname (const char *name)
1434 language *lang;
1436 if (name == NULL)
1437 error ("empty language name");
1438 else
1440 for (lang = lang_names; lang->name != NULL; lang++)
1441 if (streq (name, lang->name))
1442 return lang;
1443 error ("unknown language \"%s\"", name);
1446 return NULL;
1451 * Return a language given the interpreter name.
1453 static language *
1454 get_language_from_interpreter (char *interpreter)
1456 language *lang;
1457 const char **iname;
1459 if (interpreter == NULL)
1460 return NULL;
1461 for (lang = lang_names; lang->name != NULL; lang++)
1462 if (lang->interpreters != NULL)
1463 for (iname = lang->interpreters; *iname != NULL; iname++)
1464 if (streq (*iname, interpreter))
1465 return lang;
1467 return NULL;
1473 * Return a language given the file name.
1475 static language *
1476 get_language_from_filename (char *file, int case_sensitive)
1478 language *lang;
1479 const char **name, **ext, *suffix;
1481 /* Try whole file name first. */
1482 for (lang = lang_names; lang->name != NULL; lang++)
1483 if (lang->filenames != NULL)
1484 for (name = lang->filenames; *name != NULL; name++)
1485 if ((case_sensitive)
1486 ? streq (*name, file)
1487 : strcaseeq (*name, file))
1488 return lang;
1490 /* If not found, try suffix after last dot. */
1491 suffix = strrchr (file, '.');
1492 if (suffix == NULL)
1493 return NULL;
1494 suffix += 1;
1495 for (lang = lang_names; lang->name != NULL; lang++)
1496 if (lang->suffixes != NULL)
1497 for (ext = lang->suffixes; *ext != NULL; ext++)
1498 if ((case_sensitive)
1499 ? streq (*ext, suffix)
1500 : strcaseeq (*ext, suffix))
1501 return lang;
1502 return NULL;
1507 * This routine is called on each file argument.
1509 static void
1510 process_file_name (char *file, language *lang)
1512 FILE *inf;
1513 fdesc *fdp;
1514 compressor *compr;
1515 char *compressed_name, *uncompressed_name;
1516 char *ext, *real_name, *tmp_name;
1517 int retval;
1519 canonicalize_filename (file);
1520 if (streq (file, tagfile) && !streq (tagfile, "-"))
1522 error ("skipping inclusion of %s in self.", file);
1523 return;
1525 compr = get_compressor_from_suffix (file, &ext);
1526 if (compr)
1528 compressed_name = file;
1529 uncompressed_name = savenstr (file, ext - file);
1531 else
1533 compressed_name = NULL;
1534 uncompressed_name = file;
1537 /* If the canonicalized uncompressed name
1538 has already been dealt with, skip it silently. */
1539 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1541 assert (fdp->infname != NULL);
1542 if (streq (uncompressed_name, fdp->infname))
1543 goto cleanup;
1546 inf = fopen (file, "r" FOPEN_BINARY);
1547 if (inf)
1548 real_name = file;
1549 else
1551 int file_errno = errno;
1552 if (compressed_name)
1554 /* Try with the given suffix. */
1555 inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1556 if (inf)
1557 real_name = uncompressed_name;
1559 else
1561 /* Try all possible suffixes. */
1562 for (compr = compressors; compr->suffix != NULL; compr++)
1564 compressed_name = concat (file, ".", compr->suffix);
1565 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1566 if (inf)
1568 real_name = compressed_name;
1569 break;
1571 if (MSDOS)
1573 char *suf = compressed_name + strlen (file);
1574 size_t suflen = strlen (compr->suffix) + 1;
1575 for ( ; suf[1]; suf++, suflen--)
1577 memmove (suf, suf + 1, suflen);
1578 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1579 if (inf)
1581 real_name = compressed_name;
1582 break;
1585 if (inf)
1586 break;
1588 free (compressed_name);
1589 compressed_name = NULL;
1592 if (! inf)
1594 errno = file_errno;
1595 perror (file);
1596 goto cleanup;
1600 if (real_name == compressed_name)
1602 fclose (inf);
1603 tmp_name = etags_mktmp ();
1604 if (!tmp_name)
1605 inf = NULL;
1606 else
1608 #if MSDOS || defined (DOS_NT)
1609 char *cmd1 = concat (compr->command, " \"", real_name);
1610 char *cmd = concat (cmd1, "\" > ", tmp_name);
1611 #else
1612 char *cmd1 = concat (compr->command, " '", real_name);
1613 char *cmd = concat (cmd1, "' > ", tmp_name);
1614 #endif
1615 free (cmd1);
1616 int tmp_errno;
1617 if (system (cmd) == -1)
1619 inf = NULL;
1620 tmp_errno = EINVAL;
1622 else
1624 inf = fopen (tmp_name, "r" FOPEN_BINARY);
1625 tmp_errno = errno;
1627 free (cmd);
1628 errno = tmp_errno;
1631 if (!inf)
1633 perror (real_name);
1634 goto cleanup;
1638 process_file (inf, uncompressed_name, lang);
1640 retval = fclose (inf);
1641 if (real_name == compressed_name)
1643 remove (tmp_name);
1644 free (tmp_name);
1646 if (retval < 0)
1647 pfatal (file);
1649 cleanup:
1650 if (compressed_name != file)
1651 free (compressed_name);
1652 if (uncompressed_name != file)
1653 free (uncompressed_name);
1654 last_node = NULL;
1655 curfdp = NULL;
1656 return;
1659 static void
1660 process_file (FILE *fh, char *fn, language *lang)
1662 static const fdesc emptyfdesc;
1663 fdesc *fdp;
1665 infilename = fn;
1666 /* Create a new input file description entry. */
1667 fdp = xnew (1, fdesc);
1668 *fdp = emptyfdesc;
1669 fdp->next = fdhead;
1670 fdp->infname = savestr (fn);
1671 fdp->lang = lang;
1672 fdp->infabsname = absolute_filename (fn, cwd);
1673 fdp->infabsdir = absolute_dirname (fn, cwd);
1674 if (filename_is_absolute (fn))
1676 /* An absolute file name. Canonicalize it. */
1677 fdp->taggedfname = absolute_filename (fn, NULL);
1679 else
1681 /* A file name relative to cwd. Make it relative
1682 to the directory of the tags file. */
1683 fdp->taggedfname = relative_filename (fn, tagfiledir);
1685 fdp->usecharno = true; /* use char position when making tags */
1686 fdp->prop = NULL;
1687 fdp->written = false; /* not written on tags file yet */
1689 fdhead = fdp;
1690 curfdp = fdhead; /* the current file description */
1692 find_entries (fh);
1694 /* If not Ctags, and if this is not metasource and if it contained no #line
1695 directives, we can write the tags and free all nodes pointing to
1696 curfdp. */
1697 if (!CTAGS
1698 && curfdp->usecharno /* no #line directives in this file */
1699 && !curfdp->lang->metasource)
1701 node *np, *prev;
1703 /* Look for the head of the sublist relative to this file. See add_node
1704 for the structure of the node tree. */
1705 prev = NULL;
1706 for (np = nodehead; np != NULL; prev = np, np = np->left)
1707 if (np->fdp == curfdp)
1708 break;
1710 /* If we generated tags for this file, write and delete them. */
1711 if (np != NULL)
1713 /* This is the head of the last sublist, if any. The following
1714 instructions depend on this being true. */
1715 assert (np->left == NULL);
1717 assert (fdhead == curfdp);
1718 assert (last_node->fdp == curfdp);
1719 put_entries (np); /* write tags for file curfdp->taggedfname */
1720 free_tree (np); /* remove the written nodes */
1721 if (prev == NULL)
1722 nodehead = NULL; /* no nodes left */
1723 else
1724 prev->left = NULL; /* delete the pointer to the sublist */
1729 static void
1730 reset_input (FILE *inf)
1732 if (fseek (inf, 0, SEEK_SET) != 0)
1733 perror (infilename);
1737 * This routine opens the specified file and calls the function
1738 * which finds the function and type definitions.
1740 static void
1741 find_entries (FILE *inf)
1743 char *cp;
1744 language *lang = curfdp->lang;
1745 Lang_function *parser = NULL;
1747 /* If user specified a language, use it. */
1748 if (lang != NULL && lang->function != NULL)
1750 parser = lang->function;
1753 /* Else try to guess the language given the file name. */
1754 if (parser == NULL)
1756 lang = get_language_from_filename (curfdp->infname, true);
1757 if (lang != NULL && lang->function != NULL)
1759 curfdp->lang = lang;
1760 parser = lang->function;
1764 /* Else look for sharp-bang as the first two characters. */
1765 if (parser == NULL
1766 && readline_internal (&lb, inf, infilename) > 0
1767 && lb.len >= 2
1768 && lb.buffer[0] == '#'
1769 && lb.buffer[1] == '!')
1771 char *lp;
1773 /* Set lp to point at the first char after the last slash in the
1774 line or, if no slashes, at the first nonblank. Then set cp to
1775 the first successive blank and terminate the string. */
1776 lp = strrchr (lb.buffer+2, '/');
1777 if (lp != NULL)
1778 lp += 1;
1779 else
1780 lp = skip_spaces (lb.buffer + 2);
1781 cp = skip_non_spaces (lp);
1782 *cp = '\0';
1784 if (strlen (lp) > 0)
1786 lang = get_language_from_interpreter (lp);
1787 if (lang != NULL && lang->function != NULL)
1789 curfdp->lang = lang;
1790 parser = lang->function;
1795 reset_input (inf);
1797 /* Else try to guess the language given the case insensitive file name. */
1798 if (parser == NULL)
1800 lang = get_language_from_filename (curfdp->infname, false);
1801 if (lang != NULL && lang->function != NULL)
1803 curfdp->lang = lang;
1804 parser = lang->function;
1808 /* Else try Fortran or C. */
1809 if (parser == NULL)
1811 node *old_last_node = last_node;
1813 curfdp->lang = get_language_from_langname ("fortran");
1814 find_entries (inf);
1816 if (old_last_node == last_node)
1817 /* No Fortran entries found. Try C. */
1819 reset_input (inf);
1820 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1821 find_entries (inf);
1823 return;
1826 if (!no_line_directive
1827 && curfdp->lang != NULL && curfdp->lang->metasource)
1828 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1829 file, or anyway we parsed a file that is automatically generated from
1830 this one. If this is the case, the bingo.c file contained #line
1831 directives that generated tags pointing to this file. Let's delete
1832 them all before parsing this file, which is the real source. */
1834 fdesc **fdpp = &fdhead;
1835 while (*fdpp != NULL)
1836 if (*fdpp != curfdp
1837 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1838 /* We found one of those! We must delete both the file description
1839 and all tags referring to it. */
1841 fdesc *badfdp = *fdpp;
1843 /* Delete the tags referring to badfdp->taggedfname
1844 that were obtained from badfdp->infname. */
1845 invalidate_nodes (badfdp, &nodehead);
1847 *fdpp = badfdp->next; /* remove the bad description from the list */
1848 free_fdesc (badfdp);
1850 else
1851 fdpp = &(*fdpp)->next; /* advance the list pointer */
1854 assert (parser != NULL);
1856 /* Generic initializations before reading from file. */
1857 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1859 /* Generic initializations before parsing file with readline. */
1860 lineno = 0; /* reset global line number */
1861 charno = 0; /* reset global char number */
1862 linecharno = 0; /* reset global char number of line start */
1864 parser (inf);
1866 regex_tag_multiline ();
1871 * Check whether an implicitly named tag should be created,
1872 * then call `pfnote'.
1873 * NAME is a string that is internally copied by this function.
1875 * TAGS format specification
1876 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1877 * The following is explained in some more detail in etc/ETAGS.EBNF.
1879 * make_tag creates tags with "implicit tag names" (unnamed tags)
1880 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1881 * 1. NAME does not contain any of the characters in NONAM;
1882 * 2. LINESTART contains name as either a rightmost, or rightmost but
1883 * one character, substring;
1884 * 3. the character, if any, immediately before NAME in LINESTART must
1885 * be a character in NONAM;
1886 * 4. the character, if any, immediately after NAME in LINESTART must
1887 * also be a character in NONAM.
1889 * The implementation uses the notinname() macro, which recognizes the
1890 * characters stored in the string `nonam'.
1891 * etags.el needs to use the same characters that are in NONAM.
1893 static void
1894 make_tag (const char *name, /* tag name, or NULL if unnamed */
1895 int namelen, /* tag length */
1896 bool is_func, /* tag is a function */
1897 char *linestart, /* start of the line where tag is */
1898 int linelen, /* length of the line where tag is */
1899 int lno, /* line number */
1900 long int cno) /* character number */
1902 bool named = (name != NULL && namelen > 0);
1903 char *nname = NULL;
1905 if (!CTAGS && named) /* maybe set named to false */
1906 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1907 such that etags.el can guess a name from it. */
1909 int i;
1910 register const char *cp = name;
1912 for (i = 0; i < namelen; i++)
1913 if (notinname (*cp++))
1914 break;
1915 if (i == namelen) /* rule #1 */
1917 cp = linestart + linelen - namelen;
1918 if (notinname (linestart[linelen-1]))
1919 cp -= 1; /* rule #4 */
1920 if (cp >= linestart /* rule #2 */
1921 && (cp == linestart
1922 || notinname (cp[-1])) /* rule #3 */
1923 && strneq (name, cp, namelen)) /* rule #2 */
1924 named = false; /* use implicit tag name */
1928 if (named)
1929 nname = savenstr (name, namelen);
1931 pfnote (nname, is_func, linestart, linelen, lno, cno);
1934 /* Record a tag. */
1935 static void
1936 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1937 long int cno)
1938 /* tag name, or NULL if unnamed */
1939 /* tag is a function */
1940 /* start of the line where tag is */
1941 /* length of the line where tag is */
1942 /* line number */
1943 /* character number */
1945 register node *np;
1947 assert (name == NULL || name[0] != '\0');
1948 if (CTAGS && name == NULL)
1949 return;
1951 np = xnew (1, node);
1953 /* If ctags mode, change name "main" to M<thisfilename>. */
1954 if (CTAGS && !cxref_style && streq (name, "main"))
1956 char *fp = strrchr (curfdp->taggedfname, '/');
1957 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1958 fp = strrchr (np->name, '.');
1959 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1960 fp[0] = '\0';
1962 else
1963 np->name = name;
1964 np->valid = true;
1965 np->been_warned = false;
1966 np->fdp = curfdp;
1967 np->is_func = is_func;
1968 np->lno = lno;
1969 if (np->fdp->usecharno)
1970 /* Our char numbers are 0-base, because of C language tradition?
1971 ctags compatibility? old versions compatibility? I don't know.
1972 Anyway, since emacs's are 1-base we expect etags.el to take care
1973 of the difference. If we wanted to have 1-based numbers, we would
1974 uncomment the +1 below. */
1975 np->cno = cno /* + 1 */ ;
1976 else
1977 np->cno = invalidcharno;
1978 np->left = np->right = NULL;
1979 if (CTAGS && !cxref_style)
1981 if (strlen (linestart) < 50)
1982 np->regex = concat (linestart, "$", "");
1983 else
1984 np->regex = savenstr (linestart, 50);
1986 else
1987 np->regex = savenstr (linestart, linelen);
1989 add_node (np, &nodehead);
1993 * free_tree ()
1994 * recurse on left children, iterate on right children.
1996 static void
1997 free_tree (register node *np)
1999 while (np)
2001 register node *node_right = np->right;
2002 free_tree (np->left);
2003 free (np->name);
2004 free (np->regex);
2005 free (np);
2006 np = node_right;
2011 * free_fdesc ()
2012 * delete a file description
2014 static void
2015 free_fdesc (register fdesc *fdp)
2017 free (fdp->infname);
2018 free (fdp->infabsname);
2019 free (fdp->infabsdir);
2020 free (fdp->taggedfname);
2021 free (fdp->prop);
2022 free (fdp);
2026 * add_node ()
2027 * Adds a node to the tree of nodes. In etags mode, sort by file
2028 * name. In ctags mode, sort by tag name. Make no attempt at
2029 * balancing.
2031 * add_node is the only function allowed to add nodes, so it can
2032 * maintain state.
2034 static void
2035 add_node (node *np, node **cur_node_p)
2037 register int dif;
2038 register node *cur_node = *cur_node_p;
2040 if (cur_node == NULL)
2042 *cur_node_p = np;
2043 last_node = np;
2044 return;
2047 if (!CTAGS)
2048 /* Etags Mode */
2050 /* For each file name, tags are in a linked sublist on the right
2051 pointer. The first tags of different files are a linked list
2052 on the left pointer. last_node points to the end of the last
2053 used sublist. */
2054 if (last_node != NULL && last_node->fdp == np->fdp)
2056 /* Let's use the same sublist as the last added node. */
2057 assert (last_node->right == NULL);
2058 last_node->right = np;
2059 last_node = np;
2061 else if (cur_node->fdp == np->fdp)
2063 /* Scanning the list we found the head of a sublist which is
2064 good for us. Let's scan this sublist. */
2065 add_node (np, &cur_node->right);
2067 else
2068 /* The head of this sublist is not good for us. Let's try the
2069 next one. */
2070 add_node (np, &cur_node->left);
2071 } /* if ETAGS mode */
2073 else
2075 /* Ctags Mode */
2076 dif = strcmp (np->name, cur_node->name);
2079 * If this tag name matches an existing one, then
2080 * do not add the node, but maybe print a warning.
2082 if (no_duplicates && !dif)
2084 if (np->fdp == cur_node->fdp)
2086 if (!no_warnings)
2088 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2089 np->fdp->infname, lineno, np->name);
2090 fprintf (stderr, "Second entry ignored\n");
2093 else if (!cur_node->been_warned && !no_warnings)
2095 fprintf
2096 (stderr,
2097 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2098 np->fdp->infname, cur_node->fdp->infname, np->name);
2099 cur_node->been_warned = true;
2101 return;
2104 /* Actually add the node */
2105 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2106 } /* if CTAGS mode */
2110 * invalidate_nodes ()
2111 * Scan the node tree and invalidate all nodes pointing to the
2112 * given file description (CTAGS case) or free them (ETAGS case).
2114 static void
2115 invalidate_nodes (fdesc *badfdp, node **npp)
2117 node *np = *npp;
2119 if (np == NULL)
2120 return;
2122 if (CTAGS)
2124 if (np->left != NULL)
2125 invalidate_nodes (badfdp, &np->left);
2126 if (np->fdp == badfdp)
2127 np->valid = false;
2128 if (np->right != NULL)
2129 invalidate_nodes (badfdp, &np->right);
2131 else
2133 assert (np->fdp != NULL);
2134 if (np->fdp == badfdp)
2136 *npp = np->left; /* detach the sublist from the list */
2137 np->left = NULL; /* isolate it */
2138 free_tree (np); /* free it */
2139 invalidate_nodes (badfdp, npp);
2141 else
2142 invalidate_nodes (badfdp, &np->left);
2147 static int total_size_of_entries (node *);
2148 static int number_len (long) ATTRIBUTE_CONST;
2150 /* Length of a non-negative number's decimal representation. */
2151 static int
2152 number_len (long int num)
2154 int len = 1;
2155 while ((num /= 10) > 0)
2156 len += 1;
2157 return len;
2161 * Return total number of characters that put_entries will output for
2162 * the nodes in the linked list at the right of the specified node.
2163 * This count is irrelevant with etags.el since emacs 19.34 at least,
2164 * but is still supplied for backward compatibility.
2166 static int
2167 total_size_of_entries (register node *np)
2169 register int total = 0;
2171 for (; np != NULL; np = np->right)
2172 if (np->valid)
2174 total += strlen (np->regex) + 1; /* pat\177 */
2175 if (np->name != NULL)
2176 total += strlen (np->name) + 1; /* name\001 */
2177 total += number_len ((long) np->lno) + 1; /* lno, */
2178 if (np->cno != invalidcharno) /* cno */
2179 total += number_len (np->cno);
2180 total += 1; /* newline */
2183 return total;
2186 static void
2187 put_entries (register node *np)
2189 register char *sp;
2190 static fdesc *fdp = NULL;
2192 if (np == NULL)
2193 return;
2195 /* Output subentries that precede this one */
2196 if (CTAGS)
2197 put_entries (np->left);
2199 /* Output this entry */
2200 if (np->valid)
2202 if (!CTAGS)
2204 /* Etags mode */
2205 if (fdp != np->fdp)
2207 fdp = np->fdp;
2208 fprintf (tagf, "\f\n%s,%d\n",
2209 fdp->taggedfname, total_size_of_entries (np));
2210 fdp->written = true;
2212 fputs (np->regex, tagf);
2213 fputc ('\177', tagf);
2214 if (np->name != NULL)
2216 fputs (np->name, tagf);
2217 fputc ('\001', tagf);
2219 fprintf (tagf, "%d,", np->lno);
2220 if (np->cno != invalidcharno)
2221 fprintf (tagf, "%ld", np->cno);
2222 fputs ("\n", tagf);
2224 else
2226 /* Ctags mode */
2227 if (np->name == NULL)
2228 error ("internal error: NULL name in ctags mode.");
2230 if (cxref_style)
2232 if (vgrind_style)
2233 fprintf (stdout, "%s %s %d\n",
2234 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2235 else
2236 fprintf (stdout, "%-16s %3d %-16s %s\n",
2237 np->name, np->lno, np->fdp->taggedfname, np->regex);
2239 else
2241 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2243 if (np->is_func)
2244 { /* function or #define macro with args */
2245 putc (searchar, tagf);
2246 putc ('^', tagf);
2248 for (sp = np->regex; *sp; sp++)
2250 if (*sp == '\\' || *sp == searchar)
2251 putc ('\\', tagf);
2252 putc (*sp, tagf);
2254 putc (searchar, tagf);
2256 else
2257 { /* anything else; text pattern inadequate */
2258 fprintf (tagf, "%d", np->lno);
2260 putc ('\n', tagf);
2263 } /* if this node contains a valid tag */
2265 /* Output subentries that follow this one */
2266 put_entries (np->right);
2267 if (!CTAGS)
2268 put_entries (np->left);
2272 /* C extensions. */
2273 #define C_EXT 0x00fff /* C extensions */
2274 #define C_PLAIN 0x00000 /* C */
2275 #define C_PLPL 0x00001 /* C++ */
2276 #define C_STAR 0x00003 /* C* */
2277 #define C_JAVA 0x00005 /* JAVA */
2278 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2279 #define YACC 0x10000 /* yacc file */
2282 * The C symbol tables.
2284 enum sym_type
2286 st_none,
2287 st_C_objprot, st_C_objimpl, st_C_objend,
2288 st_C_gnumacro,
2289 st_C_ignore, st_C_attribute,
2290 st_C_javastruct,
2291 st_C_operator,
2292 st_C_class, st_C_template,
2293 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2296 /* Feed stuff between (but not including) %[ and %] lines to:
2297 gperf -m 5
2299 %compare-strncmp
2300 %enum
2301 %struct-type
2302 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2304 if, 0, st_C_ignore
2305 for, 0, st_C_ignore
2306 while, 0, st_C_ignore
2307 switch, 0, st_C_ignore
2308 return, 0, st_C_ignore
2309 __attribute__, 0, st_C_attribute
2310 GTY, 0, st_C_attribute
2311 @interface, 0, st_C_objprot
2312 @protocol, 0, st_C_objprot
2313 @implementation,0, st_C_objimpl
2314 @end, 0, st_C_objend
2315 import, (C_JAVA & ~C_PLPL), st_C_ignore
2316 package, (C_JAVA & ~C_PLPL), st_C_ignore
2317 friend, C_PLPL, st_C_ignore
2318 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2319 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2320 interface, (C_JAVA & ~C_PLPL), st_C_struct
2321 class, 0, st_C_class
2322 namespace, C_PLPL, st_C_struct
2323 domain, C_STAR, st_C_struct
2324 union, 0, st_C_struct
2325 struct, 0, st_C_struct
2326 extern, 0, st_C_extern
2327 enum, 0, st_C_enum
2328 typedef, 0, st_C_typedef
2329 define, 0, st_C_define
2330 undef, 0, st_C_define
2331 operator, C_PLPL, st_C_operator
2332 template, 0, st_C_template
2333 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2334 DEFUN, 0, st_C_gnumacro
2335 SYSCALL, 0, st_C_gnumacro
2336 ENTRY, 0, st_C_gnumacro
2337 PSEUDO, 0, st_C_gnumacro
2338 # These are defined inside C functions, so currently they are not met.
2339 # EXFUN used in glibc, DEFVAR_* in emacs.
2340 #EXFUN, 0, st_C_gnumacro
2341 #DEFVAR_, 0, st_C_gnumacro
2343 and replace lines between %< and %> with its output, then:
2344 - remove the #if characterset check
2345 - make in_word_set static and not inline. */
2346 /*%<*/
2347 /* C code produced by gperf version 3.0.1 */
2348 /* Command-line: gperf -m 5 */
2349 /* Computed positions: -k'2-3' */
2351 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2352 /* maximum key range = 33, duplicates = 0 */
2354 static int
2355 hash (const char *str, int len)
2357 static char const asso_values[] =
2359 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2360 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2361 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2362 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2363 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2364 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2365 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2366 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2367 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2368 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2369 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2370 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2371 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2372 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2373 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2374 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2375 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2376 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2377 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2378 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2379 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2380 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2381 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2382 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2383 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2384 35, 35, 35, 35, 35, 35
2386 int hval = len;
2388 switch (hval)
2390 default:
2391 hval += asso_values[(unsigned char) str[2]];
2392 /*FALLTHROUGH*/
2393 case 2:
2394 hval += asso_values[(unsigned char) str[1]];
2395 break;
2397 return hval;
2400 static struct C_stab_entry *
2401 in_word_set (register const char *str, register unsigned int len)
2403 enum
2405 TOTAL_KEYWORDS = 33,
2406 MIN_WORD_LENGTH = 2,
2407 MAX_WORD_LENGTH = 15,
2408 MIN_HASH_VALUE = 2,
2409 MAX_HASH_VALUE = 34
2412 static struct C_stab_entry wordlist[] =
2414 {""}, {""},
2415 {"if", 0, st_C_ignore},
2416 {"GTY", 0, st_C_attribute},
2417 {"@end", 0, st_C_objend},
2418 {"union", 0, st_C_struct},
2419 {"define", 0, st_C_define},
2420 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2421 {"template", 0, st_C_template},
2422 {"operator", C_PLPL, st_C_operator},
2423 {"@interface", 0, st_C_objprot},
2424 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2425 {"friend", C_PLPL, st_C_ignore},
2426 {"typedef", 0, st_C_typedef},
2427 {"return", 0, st_C_ignore},
2428 {"@implementation",0, st_C_objimpl},
2429 {"@protocol", 0, st_C_objprot},
2430 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2431 {"extern", 0, st_C_extern},
2432 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2433 {"struct", 0, st_C_struct},
2434 {"domain", C_STAR, st_C_struct},
2435 {"switch", 0, st_C_ignore},
2436 {"enum", 0, st_C_enum},
2437 {"for", 0, st_C_ignore},
2438 {"namespace", C_PLPL, st_C_struct},
2439 {"class", 0, st_C_class},
2440 {"while", 0, st_C_ignore},
2441 {"undef", 0, st_C_define},
2442 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2443 {"__attribute__", 0, st_C_attribute},
2444 {"SYSCALL", 0, st_C_gnumacro},
2445 {"ENTRY", 0, st_C_gnumacro},
2446 {"PSEUDO", 0, st_C_gnumacro},
2447 {"DEFUN", 0, st_C_gnumacro}
2450 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2452 int key = hash (str, len);
2454 if (key <= MAX_HASH_VALUE && key >= 0)
2456 const char *s = wordlist[key].name;
2458 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2459 return &wordlist[key];
2462 return 0;
2464 /*%>*/
2466 static enum sym_type
2467 C_symtype (char *str, int len, int c_ext)
2469 register struct C_stab_entry *se = in_word_set (str, len);
2471 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2472 return st_none;
2473 return se->type;
2478 * Ignoring __attribute__ ((list))
2480 static bool inattribute; /* looking at an __attribute__ construct */
2483 * C functions and variables are recognized using a simple
2484 * finite automaton. fvdef is its state variable.
2486 static enum
2488 fvnone, /* nothing seen */
2489 fdefunkey, /* Emacs DEFUN keyword seen */
2490 fdefunname, /* Emacs DEFUN name seen */
2491 foperator, /* func: operator keyword seen (cplpl) */
2492 fvnameseen, /* function or variable name seen */
2493 fstartlist, /* func: just after open parenthesis */
2494 finlist, /* func: in parameter list */
2495 flistseen, /* func: after parameter list */
2496 fignore, /* func: before open brace */
2497 vignore /* var-like: ignore until ';' */
2498 } fvdef;
2500 static bool fvextern; /* func or var: extern keyword seen; */
2503 * typedefs are recognized using a simple finite automaton.
2504 * typdef is its state variable.
2506 static enum
2508 tnone, /* nothing seen */
2509 tkeyseen, /* typedef keyword seen */
2510 ttypeseen, /* defined type seen */
2511 tinbody, /* inside typedef body */
2512 tend, /* just before typedef tag */
2513 tignore /* junk after typedef tag */
2514 } typdef;
2517 * struct-like structures (enum, struct and union) are recognized
2518 * using another simple finite automaton. `structdef' is its state
2519 * variable.
2521 static enum
2523 snone, /* nothing seen yet,
2524 or in struct body if bracelev > 0 */
2525 skeyseen, /* struct-like keyword seen */
2526 stagseen, /* struct-like tag seen */
2527 scolonseen /* colon seen after struct-like tag */
2528 } structdef;
2531 * When objdef is different from onone, objtag is the name of the class.
2533 static const char *objtag = "<uninited>";
2536 * Yet another little state machine to deal with preprocessor lines.
2538 static enum
2540 dnone, /* nothing seen */
2541 dsharpseen, /* '#' seen as first char on line */
2542 ddefineseen, /* '#' and 'define' seen */
2543 dignorerest /* ignore rest of line */
2544 } definedef;
2547 * State machine for Objective C protocols and implementations.
2548 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2550 static enum
2552 onone, /* nothing seen */
2553 oprotocol, /* @interface or @protocol seen */
2554 oimplementation, /* @implementations seen */
2555 otagseen, /* class name seen */
2556 oparenseen, /* parenthesis before category seen */
2557 ocatseen, /* category name seen */
2558 oinbody, /* in @implementation body */
2559 omethodsign, /* in @implementation body, after +/- */
2560 omethodtag, /* after method name */
2561 omethodcolon, /* after method colon */
2562 omethodparm, /* after method parameter */
2563 oignore /* wait for @end */
2564 } objdef;
2568 * Use this structure to keep info about the token read, and how it
2569 * should be tagged. Used by the make_C_tag function to build a tag.
2571 static struct tok
2573 char *line; /* string containing the token */
2574 int offset; /* where the token starts in LINE */
2575 int length; /* token length */
2577 The previous members can be used to pass strings around for generic
2578 purposes. The following ones specifically refer to creating tags. In this
2579 case the token contained here is the pattern that will be used to create a
2580 tag.
2582 bool valid; /* do not create a tag; the token should be
2583 invalidated whenever a state machine is
2584 reset prematurely */
2585 bool named; /* create a named tag */
2586 int lineno; /* source line number of tag */
2587 long linepos; /* source char number of tag */
2588 } token; /* latest token read */
2591 * Variables and functions for dealing with nested structures.
2592 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2594 static void pushclass_above (int, char *, int);
2595 static void popclass_above (int);
2596 static void write_classname (linebuffer *, const char *qualifier);
2598 static struct {
2599 char **cname; /* nested class names */
2600 int *bracelev; /* nested class brace level */
2601 int nl; /* class nesting level (elements used) */
2602 int size; /* length of the array */
2603 } cstack; /* stack for nested declaration tags */
2604 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2605 #define nestlev (cstack.nl)
2606 /* After struct keyword or in struct body, not inside a nested function. */
2607 #define instruct (structdef == snone && nestlev > 0 \
2608 && bracelev == cstack.bracelev[nestlev-1] + 1)
2610 static void
2611 pushclass_above (int bracelev, char *str, int len)
2613 int nl;
2615 popclass_above (bracelev);
2616 nl = cstack.nl;
2617 if (nl >= cstack.size)
2619 int size = cstack.size *= 2;
2620 xrnew (cstack.cname, size, char *);
2621 xrnew (cstack.bracelev, size, int);
2623 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2624 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2625 cstack.bracelev[nl] = bracelev;
2626 cstack.nl = nl + 1;
2629 static void
2630 popclass_above (int bracelev)
2632 int nl;
2634 for (nl = cstack.nl - 1;
2635 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2636 nl--)
2638 free (cstack.cname[nl]);
2639 cstack.nl = nl;
2643 static void
2644 write_classname (linebuffer *cn, const char *qualifier)
2646 int i, len;
2647 int qlen = strlen (qualifier);
2649 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2651 len = 0;
2652 cn->len = 0;
2653 cn->buffer[0] = '\0';
2655 else
2657 len = strlen (cstack.cname[0]);
2658 linebuffer_setlen (cn, len);
2659 strcpy (cn->buffer, cstack.cname[0]);
2661 for (i = 1; i < cstack.nl; i++)
2663 char *s = cstack.cname[i];
2664 if (s == NULL)
2665 continue;
2666 linebuffer_setlen (cn, len + qlen + strlen (s));
2667 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2672 static bool consider_token (char *, int, int, int *, int, int, bool *);
2673 static void make_C_tag (bool);
2676 * consider_token ()
2677 * checks to see if the current token is at the start of a
2678 * function or variable, or corresponds to a typedef, or
2679 * is a struct/union/enum tag, or #define, or an enum constant.
2681 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2682 * with args. C_EXTP points to which language we are looking at.
2684 * Globals
2685 * fvdef IN OUT
2686 * structdef IN OUT
2687 * definedef IN OUT
2688 * typdef IN OUT
2689 * objdef IN OUT
2692 static bool
2693 consider_token (char *str, int len, int c, int *c_extp,
2694 int bracelev, int parlev, bool *is_func_or_var)
2695 /* IN: token pointer */
2696 /* IN: token length */
2697 /* IN: first char after the token */
2698 /* IN, OUT: C extensions mask */
2699 /* IN: brace level */
2700 /* IN: parenthesis level */
2701 /* OUT: function or variable found */
2703 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2704 structtype is the type of the preceding struct-like keyword, and
2705 structbracelev is the brace level where it has been seen. */
2706 static enum sym_type structtype;
2707 static int structbracelev;
2708 static enum sym_type toktype;
2711 toktype = C_symtype (str, len, *c_extp);
2714 * Skip __attribute__
2716 if (toktype == st_C_attribute)
2718 inattribute = true;
2719 return false;
2723 * Advance the definedef state machine.
2725 switch (definedef)
2727 case dnone:
2728 /* We're not on a preprocessor line. */
2729 if (toktype == st_C_gnumacro)
2731 fvdef = fdefunkey;
2732 return false;
2734 break;
2735 case dsharpseen:
2736 if (toktype == st_C_define)
2738 definedef = ddefineseen;
2740 else
2742 definedef = dignorerest;
2744 return false;
2745 case ddefineseen:
2747 * Make a tag for any macro, unless it is a constant
2748 * and constantypedefs is false.
2750 definedef = dignorerest;
2751 *is_func_or_var = (c == '(');
2752 if (!*is_func_or_var && !constantypedefs)
2753 return false;
2754 else
2755 return true;
2756 case dignorerest:
2757 return false;
2758 default:
2759 error ("internal error: definedef value.");
2763 * Now typedefs
2765 switch (typdef)
2767 case tnone:
2768 if (toktype == st_C_typedef)
2770 if (typedefs)
2771 typdef = tkeyseen;
2772 fvextern = false;
2773 fvdef = fvnone;
2774 return false;
2776 break;
2777 case tkeyseen:
2778 switch (toktype)
2780 case st_none:
2781 case st_C_class:
2782 case st_C_struct:
2783 case st_C_enum:
2784 typdef = ttypeseen;
2785 break;
2786 default:
2787 break;
2789 break;
2790 case ttypeseen:
2791 if (structdef == snone && fvdef == fvnone)
2793 fvdef = fvnameseen;
2794 return true;
2796 break;
2797 case tend:
2798 switch (toktype)
2800 case st_C_class:
2801 case st_C_struct:
2802 case st_C_enum:
2803 return false;
2804 default:
2805 return true;
2807 default:
2808 break;
2811 switch (toktype)
2813 case st_C_javastruct:
2814 if (structdef == stagseen)
2815 structdef = scolonseen;
2816 return false;
2817 case st_C_template:
2818 case st_C_class:
2819 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2820 && bracelev == 0
2821 && definedef == dnone && structdef == snone
2822 && typdef == tnone && fvdef == fvnone)
2823 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2824 if (toktype == st_C_template)
2825 break;
2826 /* FALLTHRU */
2827 case st_C_struct:
2828 case st_C_enum:
2829 if (parlev == 0
2830 && fvdef != vignore
2831 && (typdef == tkeyseen
2832 || (typedefs_or_cplusplus && structdef == snone)))
2834 structdef = skeyseen;
2835 structtype = toktype;
2836 structbracelev = bracelev;
2837 if (fvdef == fvnameseen)
2838 fvdef = fvnone;
2840 return false;
2841 default:
2842 break;
2845 if (structdef == skeyseen)
2847 structdef = stagseen;
2848 return true;
2851 if (typdef != tnone)
2852 definedef = dnone;
2854 /* Detect Objective C constructs. */
2855 switch (objdef)
2857 case onone:
2858 switch (toktype)
2860 case st_C_objprot:
2861 objdef = oprotocol;
2862 return false;
2863 case st_C_objimpl:
2864 objdef = oimplementation;
2865 return false;
2866 default:
2867 break;
2869 break;
2870 case oimplementation:
2871 /* Save the class tag for functions or variables defined inside. */
2872 objtag = savenstr (str, len);
2873 objdef = oinbody;
2874 return false;
2875 case oprotocol:
2876 /* Save the class tag for categories. */
2877 objtag = savenstr (str, len);
2878 objdef = otagseen;
2879 *is_func_or_var = true;
2880 return true;
2881 case oparenseen:
2882 objdef = ocatseen;
2883 *is_func_or_var = true;
2884 return true;
2885 case oinbody:
2886 break;
2887 case omethodsign:
2888 if (parlev == 0)
2890 fvdef = fvnone;
2891 objdef = omethodtag;
2892 linebuffer_setlen (&token_name, len);
2893 memcpy (token_name.buffer, str, len);
2894 token_name.buffer[len] = '\0';
2895 return true;
2897 return false;
2898 case omethodcolon:
2899 if (parlev == 0)
2900 objdef = omethodparm;
2901 return false;
2902 case omethodparm:
2903 if (parlev == 0)
2905 objdef = omethodtag;
2906 if (class_qualify)
2908 int oldlen = token_name.len;
2909 fvdef = fvnone;
2910 linebuffer_setlen (&token_name, oldlen + len);
2911 memcpy (token_name.buffer + oldlen, str, len);
2912 token_name.buffer[oldlen + len] = '\0';
2914 return true;
2916 return false;
2917 case oignore:
2918 if (toktype == st_C_objend)
2920 /* Memory leakage here: the string pointed by objtag is
2921 never released, because many tests would be needed to
2922 avoid breaking on incorrect input code. The amount of
2923 memory leaked here is the sum of the lengths of the
2924 class tags.
2925 free (objtag); */
2926 objdef = onone;
2928 return false;
2929 default:
2930 break;
2933 /* A function, variable or enum constant? */
2934 switch (toktype)
2936 case st_C_extern:
2937 fvextern = true;
2938 switch (fvdef)
2940 case finlist:
2941 case flistseen:
2942 case fignore:
2943 case vignore:
2944 break;
2945 default:
2946 fvdef = fvnone;
2948 return false;
2949 case st_C_ignore:
2950 fvextern = false;
2951 fvdef = vignore;
2952 return false;
2953 case st_C_operator:
2954 fvdef = foperator;
2955 *is_func_or_var = true;
2956 return true;
2957 case st_none:
2958 if (constantypedefs
2959 && structdef == snone
2960 && structtype == st_C_enum && bracelev > structbracelev
2961 /* Don't tag tokens in expressions that assign values to enum
2962 constants. */
2963 && fvdef != vignore)
2964 return true; /* enum constant */
2965 switch (fvdef)
2967 case fdefunkey:
2968 if (bracelev > 0)
2969 break;
2970 fvdef = fdefunname; /* GNU macro */
2971 *is_func_or_var = true;
2972 return true;
2973 case fvnone:
2974 switch (typdef)
2976 case ttypeseen:
2977 return false;
2978 case tnone:
2979 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2980 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2982 fvdef = vignore;
2983 return false;
2985 break;
2986 default:
2987 break;
2989 /* FALLTHRU */
2990 case fvnameseen:
2991 if (len >= 10 && strneq (str+len-10, "::operator", 10))
2993 if (*c_extp & C_AUTO) /* automatic detection of C++ */
2994 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2995 fvdef = foperator;
2996 *is_func_or_var = true;
2997 return true;
2999 if (bracelev > 0 && !instruct)
3000 break;
3001 fvdef = fvnameseen; /* function or variable */
3002 *is_func_or_var = true;
3003 return true;
3004 default:
3005 break;
3007 break;
3008 default:
3009 break;
3012 return false;
3017 * C_entries often keeps pointers to tokens or lines which are older than
3018 * the line currently read. By keeping two line buffers, and switching
3019 * them at end of line, it is possible to use those pointers.
3021 static struct
3023 long linepos;
3024 linebuffer lb;
3025 } lbs[2];
3027 #define current_lb_is_new (newndx == curndx)
3028 #define switch_line_buffers() (curndx = 1 - curndx)
3030 #define curlb (lbs[curndx].lb)
3031 #define newlb (lbs[newndx].lb)
3032 #define curlinepos (lbs[curndx].linepos)
3033 #define newlinepos (lbs[newndx].linepos)
3035 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3036 #define cplpl (c_ext & C_PLPL)
3037 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3039 #define CNL_SAVE_DEFINEDEF() \
3040 do { \
3041 curlinepos = charno; \
3042 readline (&curlb, inf); \
3043 lp = curlb.buffer; \
3044 quotednl = false; \
3045 newndx = curndx; \
3046 } while (0)
3048 #define CNL() \
3049 do { \
3050 CNL_SAVE_DEFINEDEF (); \
3051 if (savetoken.valid) \
3053 token = savetoken; \
3054 savetoken.valid = false; \
3056 definedef = dnone; \
3057 } while (0)
3060 static void
3061 make_C_tag (bool isfun)
3063 /* This function is never called when token.valid is false, but
3064 we must protect against invalid input or internal errors. */
3065 if (token.valid)
3066 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3067 token.offset+token.length+1, token.lineno, token.linepos);
3068 else if (DEBUG)
3069 { /* this branch is optimized away if !DEBUG */
3070 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3071 token_name.len + 17, isfun, token.line,
3072 token.offset+token.length+1, token.lineno, token.linepos);
3073 error ("INVALID TOKEN");
3076 token.valid = false;
3079 static bool
3080 perhaps_more_input (FILE *inf)
3082 return !feof (inf) && !ferror (inf);
3087 * C_entries ()
3088 * This routine finds functions, variables, typedefs,
3089 * #define's, enum constants and struct/union/enum definitions in
3090 * C syntax and adds them to the list.
3092 static void
3093 C_entries (int c_ext, FILE *inf)
3094 /* extension of C */
3095 /* input file */
3097 register char c; /* latest char read; '\0' for end of line */
3098 register char *lp; /* pointer one beyond the character `c' */
3099 int curndx, newndx; /* indices for current and new lb */
3100 register int tokoff; /* offset in line of start of current token */
3101 register int toklen; /* length of current token */
3102 const char *qualifier; /* string used to qualify names */
3103 int qlen; /* length of qualifier */
3104 int bracelev; /* current brace level */
3105 int bracketlev; /* current bracket level */
3106 int parlev; /* current parenthesis level */
3107 int attrparlev; /* __attribute__ parenthesis level */
3108 int templatelev; /* current template level */
3109 int typdefbracelev; /* bracelev where a typedef struct body begun */
3110 bool incomm, inquote, inchar, quotednl, midtoken;
3111 bool yacc_rules; /* in the rules part of a yacc file */
3112 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3115 linebuffer_init (&lbs[0].lb);
3116 linebuffer_init (&lbs[1].lb);
3117 if (cstack.size == 0)
3119 cstack.size = (DEBUG) ? 1 : 4;
3120 cstack.nl = 0;
3121 cstack.cname = xnew (cstack.size, char *);
3122 cstack.bracelev = xnew (cstack.size, int);
3125 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3126 curndx = newndx = 0;
3127 lp = curlb.buffer;
3128 *lp = 0;
3130 fvdef = fvnone; fvextern = false; typdef = tnone;
3131 structdef = snone; definedef = dnone; objdef = onone;
3132 yacc_rules = false;
3133 midtoken = inquote = inchar = incomm = quotednl = false;
3134 token.valid = savetoken.valid = false;
3135 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3136 if (cjava)
3137 { qualifier = "."; qlen = 1; }
3138 else
3139 { qualifier = "::"; qlen = 2; }
3142 while (perhaps_more_input (inf))
3144 c = *lp++;
3145 if (c == '\\')
3147 /* If we are at the end of the line, the next character is a
3148 '\0'; do not skip it, because it is what tells us
3149 to read the next line. */
3150 if (*lp == '\0')
3152 quotednl = true;
3153 continue;
3155 lp++;
3156 c = ' ';
3158 else if (incomm)
3160 switch (c)
3162 case '*':
3163 if (*lp == '/')
3165 c = *lp++;
3166 incomm = false;
3168 break;
3169 case '\0':
3170 /* Newlines inside comments do not end macro definitions in
3171 traditional cpp. */
3172 CNL_SAVE_DEFINEDEF ();
3173 break;
3175 continue;
3177 else if (inquote)
3179 switch (c)
3181 case '"':
3182 inquote = false;
3183 break;
3184 case '\0':
3185 /* Newlines inside strings do not end macro definitions
3186 in traditional cpp, even though compilers don't
3187 usually accept them. */
3188 CNL_SAVE_DEFINEDEF ();
3189 break;
3191 continue;
3193 else if (inchar)
3195 switch (c)
3197 case '\0':
3198 /* Hmmm, something went wrong. */
3199 CNL ();
3200 /* FALLTHRU */
3201 case '\'':
3202 inchar = false;
3203 break;
3205 continue;
3207 else switch (c)
3209 case '"':
3210 inquote = true;
3211 if (bracketlev > 0)
3212 continue;
3213 if (inattribute)
3214 break;
3215 switch (fvdef)
3217 case fdefunkey:
3218 case fstartlist:
3219 case finlist:
3220 case fignore:
3221 case vignore:
3222 break;
3223 default:
3224 fvextern = false;
3225 fvdef = fvnone;
3227 continue;
3228 case '\'':
3229 inchar = true;
3230 if (bracketlev > 0)
3231 continue;
3232 if (inattribute)
3233 break;
3234 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3236 fvextern = false;
3237 fvdef = fvnone;
3239 continue;
3240 case '/':
3241 if (*lp == '*')
3243 incomm = true;
3244 lp++;
3245 c = ' ';
3246 if (bracketlev > 0)
3247 continue;
3249 else if (/* cplpl && */ *lp == '/')
3251 c = '\0';
3253 break;
3254 case '%':
3255 if ((c_ext & YACC) && *lp == '%')
3257 /* Entering or exiting rules section in yacc file. */
3258 lp++;
3259 definedef = dnone; fvdef = fvnone; fvextern = false;
3260 typdef = tnone; structdef = snone;
3261 midtoken = inquote = inchar = incomm = quotednl = false;
3262 bracelev = 0;
3263 yacc_rules = !yacc_rules;
3264 continue;
3266 else
3267 break;
3268 case '#':
3269 if (definedef == dnone)
3271 char *cp;
3272 bool cpptoken = true;
3274 /* Look back on this line. If all blanks, or nonblanks
3275 followed by an end of comment, this is a preprocessor
3276 token. */
3277 for (cp = newlb.buffer; cp < lp-1; cp++)
3278 if (!c_isspace (*cp))
3280 if (*cp == '*' && cp[1] == '/')
3282 cp++;
3283 cpptoken = true;
3285 else
3286 cpptoken = false;
3288 if (cpptoken)
3290 definedef = dsharpseen;
3291 /* This is needed for tagging enum values: when there are
3292 preprocessor conditionals inside the enum, we need to
3293 reset the value of fvdef so that the next enum value is
3294 tagged even though the one before it did not end in a
3295 comma. */
3296 if (fvdef == vignore && instruct && parlev == 0)
3298 if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3299 fvdef = fvnone;
3302 } /* if (definedef == dnone) */
3303 continue;
3304 case '[':
3305 bracketlev++;
3306 continue;
3307 default:
3308 if (bracketlev > 0)
3310 if (c == ']')
3311 --bracketlev;
3312 else if (c == '\0')
3313 CNL_SAVE_DEFINEDEF ();
3314 continue;
3316 break;
3317 } /* switch (c) */
3320 /* Consider token only if some involved conditions are satisfied. */
3321 if (typdef != tignore
3322 && definedef != dignorerest
3323 && fvdef != finlist
3324 && templatelev == 0
3325 && (definedef != dnone
3326 || structdef != scolonseen)
3327 && !inattribute)
3329 if (midtoken)
3331 if (endtoken (c))
3333 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3334 /* This handles :: in the middle,
3335 but not at the beginning of an identifier.
3336 Also, space-separated :: is not recognized. */
3338 if (c_ext & C_AUTO) /* automatic detection of C++ */
3339 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3340 lp += 2;
3341 toklen += 2;
3342 c = lp[-1];
3343 goto still_in_token;
3345 else
3347 bool funorvar = false;
3349 if (yacc_rules
3350 || consider_token (newlb.buffer + tokoff, toklen, c,
3351 &c_ext, bracelev, parlev,
3352 &funorvar))
3354 if (fvdef == foperator)
3356 char *oldlp = lp;
3357 lp = skip_spaces (lp-1);
3358 if (*lp != '\0')
3359 lp += 1;
3360 while (*lp != '\0'
3361 && !c_isspace (*lp) && *lp != '(')
3362 lp += 1;
3363 c = *lp++;
3364 toklen += lp - oldlp;
3366 token.named = false;
3367 if (!plainc
3368 && nestlev > 0 && definedef == dnone)
3369 /* in struct body */
3371 if (class_qualify)
3373 int len;
3374 write_classname (&token_name, qualifier);
3375 len = token_name.len;
3376 linebuffer_setlen (&token_name,
3377 len + qlen + toklen);
3378 sprintf (token_name.buffer + len, "%s%.*s",
3379 qualifier, toklen,
3380 newlb.buffer + tokoff);
3382 else
3384 linebuffer_setlen (&token_name, toklen);
3385 sprintf (token_name.buffer, "%.*s",
3386 toklen, newlb.buffer + tokoff);
3388 token.named = true;
3390 else if (objdef == ocatseen)
3391 /* Objective C category */
3393 if (class_qualify)
3395 int len = strlen (objtag) + 2 + toklen;
3396 linebuffer_setlen (&token_name, len);
3397 sprintf (token_name.buffer, "%s(%.*s)",
3398 objtag, toklen,
3399 newlb.buffer + tokoff);
3401 else
3403 linebuffer_setlen (&token_name, toklen);
3404 sprintf (token_name.buffer, "%.*s",
3405 toklen, newlb.buffer + tokoff);
3407 token.named = true;
3409 else if (objdef == omethodtag
3410 || objdef == omethodparm)
3411 /* Objective C method */
3413 token.named = true;
3415 else if (fvdef == fdefunname)
3416 /* GNU DEFUN and similar macros */
3418 bool defun = (newlb.buffer[tokoff] == 'F');
3419 int off = tokoff;
3420 int len = toklen;
3422 /* Rewrite the tag so that emacs lisp DEFUNs
3423 can be found by their elisp name */
3424 if (defun)
3426 off += 1;
3427 len -= 1;
3429 linebuffer_setlen (&token_name, len);
3430 memcpy (token_name.buffer,
3431 newlb.buffer + off, len);
3432 token_name.buffer[len] = '\0';
3433 if (defun)
3434 while (--len >= 0)
3435 if (token_name.buffer[len] == '_')
3436 token_name.buffer[len] = '-';
3437 token.named = defun;
3439 else
3441 linebuffer_setlen (&token_name, toklen);
3442 memcpy (token_name.buffer,
3443 newlb.buffer + tokoff, toklen);
3444 token_name.buffer[toklen] = '\0';
3445 /* Name macros and members. */
3446 token.named = (structdef == stagseen
3447 || typdef == ttypeseen
3448 || typdef == tend
3449 || (funorvar
3450 && definedef == dignorerest)
3451 || (funorvar
3452 && definedef == dnone
3453 && structdef == snone
3454 && bracelev > 0));
3456 token.lineno = lineno;
3457 token.offset = tokoff;
3458 token.length = toklen;
3459 token.line = newlb.buffer;
3460 token.linepos = newlinepos;
3461 token.valid = true;
3463 if (definedef == dnone
3464 && (fvdef == fvnameseen
3465 || fvdef == foperator
3466 || structdef == stagseen
3467 || typdef == tend
3468 || typdef == ttypeseen
3469 || objdef != onone))
3471 if (current_lb_is_new)
3472 switch_line_buffers ();
3474 else if (definedef != dnone
3475 || fvdef == fdefunname
3476 || instruct)
3477 make_C_tag (funorvar);
3479 else /* not yacc and consider_token failed */
3481 if (inattribute && fvdef == fignore)
3483 /* We have just met __attribute__ after a
3484 function parameter list: do not tag the
3485 function again. */
3486 fvdef = fvnone;
3489 midtoken = false;
3491 } /* if (endtoken (c)) */
3492 else if (intoken (c))
3493 still_in_token:
3495 toklen++;
3496 continue;
3498 } /* if (midtoken) */
3499 else if (begtoken (c))
3501 switch (definedef)
3503 case dnone:
3504 switch (fvdef)
3506 case fstartlist:
3507 /* This prevents tagging fb in
3508 void (__attribute__((noreturn)) *fb) (void);
3509 Fixing this is not easy and not very important. */
3510 fvdef = finlist;
3511 continue;
3512 case flistseen:
3513 if (plainc || declarations)
3515 make_C_tag (true); /* a function */
3516 fvdef = fignore;
3518 break;
3519 default:
3520 break;
3522 if (structdef == stagseen && !cjava)
3524 popclass_above (bracelev);
3525 structdef = snone;
3527 break;
3528 case dsharpseen:
3529 savetoken = token;
3530 break;
3531 default:
3532 break;
3534 if (!yacc_rules || lp == newlb.buffer + 1)
3536 tokoff = lp - 1 - newlb.buffer;
3537 toklen = 1;
3538 midtoken = true;
3540 continue;
3541 } /* if (begtoken) */
3542 } /* if must look at token */
3545 /* Detect end of line, colon, comma, semicolon and various braces
3546 after having handled a token.*/
3547 switch (c)
3549 case ':':
3550 if (inattribute)
3551 break;
3552 if (yacc_rules && token.offset == 0 && token.valid)
3554 make_C_tag (false); /* a yacc function */
3555 break;
3557 if (definedef != dnone)
3558 break;
3559 switch (objdef)
3561 case otagseen:
3562 objdef = oignore;
3563 make_C_tag (true); /* an Objective C class */
3564 break;
3565 case omethodtag:
3566 case omethodparm:
3567 objdef = omethodcolon;
3568 if (class_qualify)
3570 int toklen = token_name.len;
3571 linebuffer_setlen (&token_name, toklen + 1);
3572 strcpy (token_name.buffer + toklen, ":");
3574 break;
3575 default:
3576 break;
3578 if (structdef == stagseen)
3580 structdef = scolonseen;
3581 break;
3583 /* Should be useless, but may be work as a safety net. */
3584 if (cplpl && fvdef == flistseen)
3586 make_C_tag (true); /* a function */
3587 fvdef = fignore;
3588 break;
3590 break;
3591 case ';':
3592 if (definedef != dnone || inattribute)
3593 break;
3594 switch (typdef)
3596 case tend:
3597 case ttypeseen:
3598 make_C_tag (false); /* a typedef */
3599 typdef = tnone;
3600 fvdef = fvnone;
3601 break;
3602 case tnone:
3603 case tinbody:
3604 case tignore:
3605 switch (fvdef)
3607 case fignore:
3608 if (typdef == tignore || cplpl)
3609 fvdef = fvnone;
3610 break;
3611 case fvnameseen:
3612 if ((globals && bracelev == 0 && (!fvextern || declarations))
3613 || (members && instruct))
3614 make_C_tag (false); /* a variable */
3615 fvextern = false;
3616 fvdef = fvnone;
3617 token.valid = false;
3618 break;
3619 case flistseen:
3620 if ((declarations
3621 && (cplpl || !instruct)
3622 && (typdef == tnone || (typdef != tignore && instruct)))
3623 || (members
3624 && plainc && instruct))
3625 make_C_tag (true); /* a function */
3626 /* FALLTHRU */
3627 default:
3628 fvextern = false;
3629 fvdef = fvnone;
3630 if (declarations
3631 && cplpl && structdef == stagseen)
3632 make_C_tag (false); /* forward declaration */
3633 else
3634 token.valid = false;
3635 } /* switch (fvdef) */
3636 /* FALLTHRU */
3637 default:
3638 if (!instruct)
3639 typdef = tnone;
3641 if (structdef == stagseen)
3642 structdef = snone;
3643 break;
3644 case ',':
3645 if (definedef != dnone || inattribute)
3646 break;
3647 switch (objdef)
3649 case omethodtag:
3650 case omethodparm:
3651 make_C_tag (true); /* an Objective C method */
3652 objdef = oinbody;
3653 break;
3654 default:
3655 break;
3657 switch (fvdef)
3659 case fdefunkey:
3660 case foperator:
3661 case fstartlist:
3662 case finlist:
3663 case fignore:
3664 break;
3665 case vignore:
3666 if (instruct && parlev == 0)
3667 fvdef = fvnone;
3668 break;
3669 case fdefunname:
3670 fvdef = fignore;
3671 break;
3672 case fvnameseen:
3673 if (parlev == 0
3674 && ((globals
3675 && bracelev == 0
3676 && templatelev == 0
3677 && (!fvextern || declarations))
3678 || (members && instruct)))
3679 make_C_tag (false); /* a variable */
3680 break;
3681 case flistseen:
3682 if ((declarations && typdef == tnone && !instruct)
3683 || (members && typdef != tignore && instruct))
3685 make_C_tag (true); /* a function */
3686 fvdef = fvnameseen;
3688 else if (!declarations)
3689 fvdef = fvnone;
3690 token.valid = false;
3691 break;
3692 default:
3693 fvdef = fvnone;
3695 if (structdef == stagseen)
3696 structdef = snone;
3697 break;
3698 case ']':
3699 if (definedef != dnone || inattribute)
3700 break;
3701 if (structdef == stagseen)
3702 structdef = snone;
3703 switch (typdef)
3705 case ttypeseen:
3706 case tend:
3707 typdef = tignore;
3708 make_C_tag (false); /* a typedef */
3709 break;
3710 case tnone:
3711 case tinbody:
3712 switch (fvdef)
3714 case foperator:
3715 case finlist:
3716 case fignore:
3717 case vignore:
3718 break;
3719 case fvnameseen:
3720 if ((members && bracelev == 1)
3721 || (globals && bracelev == 0
3722 && (!fvextern || declarations)))
3723 make_C_tag (false); /* a variable */
3724 /* FALLTHRU */
3725 default:
3726 fvdef = fvnone;
3728 break;
3729 default:
3730 break;
3732 break;
3733 case '(':
3734 if (inattribute)
3736 attrparlev++;
3737 break;
3739 if (definedef != dnone)
3740 break;
3741 if (objdef == otagseen && parlev == 0)
3742 objdef = oparenseen;
3743 switch (fvdef)
3745 case fvnameseen:
3746 if (typdef == ttypeseen
3747 && *lp != '*'
3748 && !instruct)
3750 /* This handles constructs like:
3751 typedef void OperatorFun (int fun); */
3752 make_C_tag (false);
3753 typdef = tignore;
3754 fvdef = fignore;
3755 break;
3757 /* FALLTHRU */
3758 case foperator:
3759 fvdef = fstartlist;
3760 break;
3761 case flistseen:
3762 fvdef = finlist;
3763 break;
3764 default:
3765 break;
3767 parlev++;
3768 break;
3769 case ')':
3770 if (inattribute)
3772 if (--attrparlev == 0)
3773 inattribute = false;
3774 break;
3776 if (definedef != dnone)
3777 break;
3778 if (objdef == ocatseen && parlev == 1)
3780 make_C_tag (true); /* an Objective C category */
3781 objdef = oignore;
3783 if (--parlev == 0)
3785 switch (fvdef)
3787 case fstartlist:
3788 case finlist:
3789 fvdef = flistseen;
3790 break;
3791 default:
3792 break;
3794 if (!instruct
3795 && (typdef == tend
3796 || typdef == ttypeseen))
3798 typdef = tignore;
3799 make_C_tag (false); /* a typedef */
3802 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3803 parlev = 0;
3804 break;
3805 case '{':
3806 if (definedef != dnone)
3807 break;
3808 if (typdef == ttypeseen)
3810 /* Whenever typdef is set to tinbody (currently only
3811 here), typdefbracelev should be set to bracelev. */
3812 typdef = tinbody;
3813 typdefbracelev = bracelev;
3815 switch (fvdef)
3817 case flistseen:
3818 if (cplpl && !class_qualify)
3820 /* Remove class and namespace qualifiers from the token,
3821 leaving only the method/member name. */
3822 char *cc, *uqname = token_name.buffer;
3823 char *tok_end = token_name.buffer + token_name.len;
3825 for (cc = token_name.buffer; cc < tok_end; cc++)
3827 if (*cc == ':' && cc[1] == ':')
3829 uqname = cc + 2;
3830 cc++;
3833 if (uqname > token_name.buffer)
3835 int uqlen = strlen (uqname);
3836 linebuffer_setlen (&token_name, uqlen);
3837 memmove (token_name.buffer, uqname, uqlen + 1);
3840 make_C_tag (true); /* a function */
3841 /* FALLTHRU */
3842 case fignore:
3843 fvdef = fvnone;
3844 break;
3845 case fvnone:
3846 switch (objdef)
3848 case otagseen:
3849 make_C_tag (true); /* an Objective C class */
3850 objdef = oignore;
3851 break;
3852 case omethodtag:
3853 case omethodparm:
3854 make_C_tag (true); /* an Objective C method */
3855 objdef = oinbody;
3856 break;
3857 default:
3858 /* Neutralize `extern "C" {' grot. */
3859 if (bracelev == 0 && structdef == snone && nestlev == 0
3860 && typdef == tnone)
3861 bracelev = -1;
3863 break;
3864 default:
3865 break;
3867 switch (structdef)
3869 case skeyseen: /* unnamed struct */
3870 pushclass_above (bracelev, NULL, 0);
3871 structdef = snone;
3872 break;
3873 case stagseen: /* named struct or enum */
3874 case scolonseen: /* a class */
3875 pushclass_above (bracelev,token.line+token.offset, token.length);
3876 structdef = snone;
3877 make_C_tag (false); /* a struct or enum */
3878 break;
3879 default:
3880 break;
3882 bracelev += 1;
3883 break;
3884 case '*':
3885 if (definedef != dnone)
3886 break;
3887 if (fvdef == fstartlist)
3889 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3890 token.valid = false;
3892 break;
3893 case '}':
3894 if (definedef != dnone)
3895 break;
3896 bracelev -= 1;
3897 if (!ignoreindent && lp == newlb.buffer + 1)
3899 if (bracelev != 0)
3900 token.valid = false; /* unexpected value, token unreliable */
3901 bracelev = 0; /* reset brace level if first column */
3902 parlev = 0; /* also reset paren level, just in case... */
3904 else if (bracelev < 0)
3906 token.valid = false; /* something gone amiss, token unreliable */
3907 bracelev = 0;
3909 if (bracelev == 0 && fvdef == vignore)
3910 fvdef = fvnone; /* end of function */
3911 popclass_above (bracelev);
3912 structdef = snone;
3913 /* Only if typdef == tinbody is typdefbracelev significant. */
3914 if (typdef == tinbody && bracelev <= typdefbracelev)
3916 assert (bracelev == typdefbracelev);
3917 typdef = tend;
3919 break;
3920 case '=':
3921 if (definedef != dnone)
3922 break;
3923 switch (fvdef)
3925 case foperator:
3926 case finlist:
3927 case fignore:
3928 case vignore:
3929 break;
3930 case fvnameseen:
3931 if ((members && bracelev == 1)
3932 || (globals && bracelev == 0 && (!fvextern || declarations)))
3933 make_C_tag (false); /* a variable */
3934 /* FALLTHRU */
3935 default:
3936 fvdef = vignore;
3938 break;
3939 case '<':
3940 if (cplpl
3941 && (structdef == stagseen || fvdef == fvnameseen))
3943 templatelev++;
3944 break;
3946 goto resetfvdef;
3947 case '>':
3948 if (templatelev > 0)
3950 templatelev--;
3951 break;
3953 goto resetfvdef;
3954 case '+':
3955 case '-':
3956 if (objdef == oinbody && bracelev == 0)
3958 objdef = omethodsign;
3959 break;
3961 /* FALLTHRU */
3962 resetfvdef:
3963 case '#': case '~': case '&': case '%': case '/':
3964 case '|': case '^': case '!': case '.': case '?':
3965 if (definedef != dnone)
3966 break;
3967 /* These surely cannot follow a function tag in C. */
3968 switch (fvdef)
3970 case foperator:
3971 case finlist:
3972 case fignore:
3973 case vignore:
3974 break;
3975 default:
3976 fvdef = fvnone;
3978 break;
3979 case '\0':
3980 if (objdef == otagseen)
3982 make_C_tag (true); /* an Objective C class */
3983 objdef = oignore;
3985 /* If a macro spans multiple lines don't reset its state. */
3986 if (quotednl)
3987 CNL_SAVE_DEFINEDEF ();
3988 else
3989 CNL ();
3990 break;
3991 } /* switch (c) */
3993 } /* while not eof */
3995 free (lbs[0].lb.buffer);
3996 free (lbs[1].lb.buffer);
4000 * Process either a C++ file or a C file depending on the setting
4001 * of a global flag.
4003 static void
4004 default_C_entries (FILE *inf)
4006 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4009 /* Always do plain C. */
4010 static void
4011 plain_C_entries (FILE *inf)
4013 C_entries (0, inf);
4016 /* Always do C++. */
4017 static void
4018 Cplusplus_entries (FILE *inf)
4020 C_entries (C_PLPL, inf);
4023 /* Always do Java. */
4024 static void
4025 Cjava_entries (FILE *inf)
4027 C_entries (C_JAVA, inf);
4030 /* Always do C*. */
4031 static void
4032 Cstar_entries (FILE *inf)
4034 C_entries (C_STAR, inf);
4037 /* Always do Yacc. */
4038 static void
4039 Yacc_entries (FILE *inf)
4041 C_entries (YACC, inf);
4045 /* Useful macros. */
4046 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4047 while (perhaps_more_input (file_pointer) \
4048 && (readline (&(line_buffer), file_pointer), \
4049 (char_pointer) = (line_buffer).buffer, \
4050 true)) \
4052 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4053 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4054 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4055 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
4056 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
4058 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4059 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4060 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4061 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4062 && ((cp) += sizeof (kw)-1)) /* skip spaces */
4065 * Read a file, but do no processing. This is used to do regexp
4066 * matching on files that have no language defined.
4068 static void
4069 just_read_file (FILE *inf)
4071 while (perhaps_more_input (inf))
4072 readline (&lb, inf);
4076 /* Fortran parsing */
4078 static void F_takeprec (void);
4079 static void F_getit (FILE *);
4081 static void
4082 F_takeprec (void)
4084 dbp = skip_spaces (dbp);
4085 if (*dbp != '*')
4086 return;
4087 dbp++;
4088 dbp = skip_spaces (dbp);
4089 if (strneq (dbp, "(*)", 3))
4091 dbp += 3;
4092 return;
4094 if (!c_isdigit (*dbp))
4096 --dbp; /* force failure */
4097 return;
4100 dbp++;
4101 while (c_isdigit (*dbp));
4104 static void
4105 F_getit (FILE *inf)
4107 register char *cp;
4109 dbp = skip_spaces (dbp);
4110 if (*dbp == '\0')
4112 readline (&lb, inf);
4113 dbp = lb.buffer;
4114 if (dbp[5] != '&')
4115 return;
4116 dbp += 6;
4117 dbp = skip_spaces (dbp);
4119 if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4120 return;
4121 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4122 continue;
4123 make_tag (dbp, cp-dbp, true,
4124 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4128 static void
4129 Fortran_functions (FILE *inf)
4131 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4133 if (*dbp == '%')
4134 dbp++; /* Ratfor escape to fortran */
4135 dbp = skip_spaces (dbp);
4136 if (*dbp == '\0')
4137 continue;
4139 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4140 dbp = skip_spaces (dbp);
4142 if (LOOKING_AT_NOCASE (dbp, "pure"))
4143 dbp = skip_spaces (dbp);
4145 if (LOOKING_AT_NOCASE (dbp, "elemental"))
4146 dbp = skip_spaces (dbp);
4148 switch (c_tolower (*dbp))
4150 case 'i':
4151 if (nocase_tail ("integer"))
4152 F_takeprec ();
4153 break;
4154 case 'r':
4155 if (nocase_tail ("real"))
4156 F_takeprec ();
4157 break;
4158 case 'l':
4159 if (nocase_tail ("logical"))
4160 F_takeprec ();
4161 break;
4162 case 'c':
4163 if (nocase_tail ("complex") || nocase_tail ("character"))
4164 F_takeprec ();
4165 break;
4166 case 'd':
4167 if (nocase_tail ("double"))
4169 dbp = skip_spaces (dbp);
4170 if (*dbp == '\0')
4171 continue;
4172 if (nocase_tail ("precision"))
4173 break;
4174 continue;
4176 break;
4178 dbp = skip_spaces (dbp);
4179 if (*dbp == '\0')
4180 continue;
4181 switch (c_tolower (*dbp))
4183 case 'f':
4184 if (nocase_tail ("function"))
4185 F_getit (inf);
4186 continue;
4187 case 's':
4188 if (nocase_tail ("subroutine"))
4189 F_getit (inf);
4190 continue;
4191 case 'e':
4192 if (nocase_tail ("entry"))
4193 F_getit (inf);
4194 continue;
4195 case 'b':
4196 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4198 dbp = skip_spaces (dbp);
4199 if (*dbp == '\0') /* assume un-named */
4200 make_tag ("blockdata", 9, true,
4201 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4202 else
4203 F_getit (inf); /* look for name */
4205 continue;
4212 * Ada parsing
4213 * Original code by
4214 * Philippe Waroquiers (1998)
4217 /* Once we are positioned after an "interesting" keyword, let's get
4218 the real tag value necessary. */
4219 static void
4220 Ada_getit (FILE *inf, const char *name_qualifier)
4222 register char *cp;
4223 char *name;
4224 char c;
4226 while (perhaps_more_input (inf))
4228 dbp = skip_spaces (dbp);
4229 if (*dbp == '\0'
4230 || (dbp[0] == '-' && dbp[1] == '-'))
4232 readline (&lb, inf);
4233 dbp = lb.buffer;
4235 switch (c_tolower (*dbp))
4237 case 'b':
4238 if (nocase_tail ("body"))
4240 /* Skipping body of procedure body or package body or ....
4241 resetting qualifier to body instead of spec. */
4242 name_qualifier = "/b";
4243 continue;
4245 break;
4246 case 't':
4247 /* Skipping type of task type or protected type ... */
4248 if (nocase_tail ("type"))
4249 continue;
4250 break;
4252 if (*dbp == '"')
4254 dbp += 1;
4255 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4256 continue;
4258 else
4260 dbp = skip_spaces (dbp);
4261 for (cp = dbp;
4262 c_isalnum (*cp) || *cp == '_' || *cp == '.';
4263 cp++)
4264 continue;
4265 if (cp == dbp)
4266 return;
4268 c = *cp;
4269 *cp = '\0';
4270 name = concat (dbp, name_qualifier, "");
4271 *cp = c;
4272 make_tag (name, strlen (name), true,
4273 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4274 free (name);
4275 if (c == '"')
4276 dbp = cp + 1;
4277 return;
4281 static void
4282 Ada_funcs (FILE *inf)
4284 bool inquote = false;
4285 bool skip_till_semicolumn = false;
4287 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4289 while (*dbp != '\0')
4291 /* Skip a string i.e. "abcd". */
4292 if (inquote || (*dbp == '"'))
4294 dbp = strchr (dbp + !inquote, '"');
4295 if (dbp != NULL)
4297 inquote = false;
4298 dbp += 1;
4299 continue; /* advance char */
4301 else
4303 inquote = true;
4304 break; /* advance line */
4308 /* Skip comments. */
4309 if (dbp[0] == '-' && dbp[1] == '-')
4310 break; /* advance line */
4312 /* Skip character enclosed in single quote i.e. 'a'
4313 and skip single quote starting an attribute i.e. 'Image. */
4314 if (*dbp == '\'')
4316 dbp++ ;
4317 if (*dbp != '\0')
4318 dbp++;
4319 continue;
4322 if (skip_till_semicolumn)
4324 if (*dbp == ';')
4325 skip_till_semicolumn = false;
4326 dbp++;
4327 continue; /* advance char */
4330 /* Search for beginning of a token. */
4331 if (!begtoken (*dbp))
4333 dbp++;
4334 continue; /* advance char */
4337 /* We are at the beginning of a token. */
4338 switch (c_tolower (*dbp))
4340 case 'f':
4341 if (!packages_only && nocase_tail ("function"))
4342 Ada_getit (inf, "/f");
4343 else
4344 break; /* from switch */
4345 continue; /* advance char */
4346 case 'p':
4347 if (!packages_only && nocase_tail ("procedure"))
4348 Ada_getit (inf, "/p");
4349 else if (nocase_tail ("package"))
4350 Ada_getit (inf, "/s");
4351 else if (nocase_tail ("protected")) /* protected type */
4352 Ada_getit (inf, "/t");
4353 else
4354 break; /* from switch */
4355 continue; /* advance char */
4357 case 'u':
4358 if (typedefs && !packages_only && nocase_tail ("use"))
4360 /* when tagging types, avoid tagging use type Pack.Typename;
4361 for this, we will skip everything till a ; */
4362 skip_till_semicolumn = true;
4363 continue; /* advance char */
4366 case 't':
4367 if (!packages_only && nocase_tail ("task"))
4368 Ada_getit (inf, "/k");
4369 else if (typedefs && !packages_only && nocase_tail ("type"))
4371 Ada_getit (inf, "/t");
4372 while (*dbp != '\0')
4373 dbp += 1;
4375 else
4376 break; /* from switch */
4377 continue; /* advance char */
4380 /* Look for the end of the token. */
4381 while (!endtoken (*dbp))
4382 dbp++;
4384 } /* advance char */
4385 } /* advance line */
4390 * Unix and microcontroller assembly tag handling
4391 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4392 * Idea by Bob Weiner, Motorola Inc. (1994)
4394 static void
4395 Asm_labels (FILE *inf)
4397 register char *cp;
4399 LOOP_ON_INPUT_LINES (inf, lb, cp)
4401 /* If first char is alphabetic or one of [_.$], test for colon
4402 following identifier. */
4403 if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4405 /* Read past label. */
4406 cp++;
4407 while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4408 cp++;
4409 if (*cp == ':' || c_isspace (*cp))
4410 /* Found end of label, so copy it and add it to the table. */
4411 make_tag (lb.buffer, cp - lb.buffer, true,
4412 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4419 * Perl support
4420 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4421 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4422 * Perl variable names: /^(my|local).../
4423 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4424 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4425 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4427 static void
4428 Perl_functions (FILE *inf)
4430 char *package = savestr ("main"); /* current package name */
4431 register char *cp;
4433 LOOP_ON_INPUT_LINES (inf, lb, cp)
4435 cp = skip_spaces (cp);
4437 if (LOOKING_AT (cp, "package"))
4439 free (package);
4440 get_tag (cp, &package);
4442 else if (LOOKING_AT (cp, "sub"))
4444 char *pos, *sp;
4446 subr:
4447 sp = cp;
4448 while (!notinname (*cp))
4449 cp++;
4450 if (cp == sp)
4451 continue; /* nothing found */
4452 pos = strchr (sp, ':');
4453 if (pos && pos < cp && pos[1] == ':')
4454 /* The name is already qualified. */
4455 make_tag (sp, cp - sp, true,
4456 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4457 else
4458 /* Qualify it. */
4460 char savechar, *name;
4462 savechar = *cp;
4463 *cp = '\0';
4464 name = concat (package, "::", sp);
4465 *cp = savechar;
4466 make_tag (name, strlen (name), true,
4467 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4468 free (name);
4471 else if (LOOKING_AT (cp, "use constant")
4472 || LOOKING_AT (cp, "use constant::defer"))
4474 /* For hash style multi-constant like
4475 use constant { FOO => 123,
4476 BAR => 456 };
4477 only the first FOO is picked up. Parsing across the value
4478 expressions would be difficult in general, due to possible nested
4479 hashes, here-documents, etc. */
4480 if (*cp == '{')
4481 cp = skip_spaces (cp+1);
4482 goto subr;
4484 else if (globals) /* only if we are tagging global vars */
4486 /* Skip a qualifier, if any. */
4487 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4488 /* After "my" or "local", but before any following paren or space. */
4489 char *varstart = cp;
4491 if (qual /* should this be removed? If yes, how? */
4492 && (*cp == '$' || *cp == '@' || *cp == '%'))
4494 varstart += 1;
4496 cp++;
4497 while (c_isalnum (*cp) || *cp == '_');
4499 else if (qual)
4501 /* Should be examining a variable list at this point;
4502 could insist on seeing an open parenthesis. */
4503 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4504 cp++;
4506 else
4507 continue;
4509 make_tag (varstart, cp - varstart, false,
4510 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4513 free (package);
4518 * Python support
4519 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4520 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4521 * More ideas by seb bacon <seb@jamkit.com> (2002)
4523 static void
4524 Python_functions (FILE *inf)
4526 register char *cp;
4528 LOOP_ON_INPUT_LINES (inf, lb, cp)
4530 cp = skip_spaces (cp);
4531 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4533 char *name = cp;
4534 while (!notinname (*cp) && *cp != ':')
4535 cp++;
4536 make_tag (name, cp - name, true,
4537 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4543 * Ruby support
4544 * Original code by Xi Lu <lx@shellcodes.org> (2015)
4546 static void
4547 Ruby_functions (FILE *inf)
4549 char *cp = NULL;
4551 LOOP_ON_INPUT_LINES (inf, lb, cp)
4553 cp = skip_spaces (cp);
4554 if (LOOKING_AT (cp, "def")
4555 || LOOKING_AT (cp, "class")
4556 || LOOKING_AT (cp, "module"))
4558 char *name = cp;
4560 /* Ruby method names can end in a '='. Also, operator overloading can
4561 define operators whose names include '='. */
4562 while (!notinname (*cp) || *cp == '=')
4563 cp++;
4565 make_tag (name, cp - name, true,
4566 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4573 * PHP support
4574 * Look for:
4575 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4576 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4577 * - /^[ \t]*define\(\"[^\"]+/
4578 * Only with --members:
4579 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4580 * Idea by Diez B. Roggisch (2001)
4582 static void
4583 PHP_functions (FILE *inf)
4585 char *cp, *name;
4586 bool search_identifier = false;
4588 LOOP_ON_INPUT_LINES (inf, lb, cp)
4590 cp = skip_spaces (cp);
4591 name = cp;
4592 if (search_identifier
4593 && *cp != '\0')
4595 while (!notinname (*cp))
4596 cp++;
4597 make_tag (name, cp - name, true,
4598 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4599 search_identifier = false;
4601 else if (LOOKING_AT (cp, "function"))
4603 if (*cp == '&')
4604 cp = skip_spaces (cp+1);
4605 if (*cp != '\0')
4607 name = cp;
4608 while (!notinname (*cp))
4609 cp++;
4610 make_tag (name, cp - name, true,
4611 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4613 else
4614 search_identifier = true;
4616 else if (LOOKING_AT (cp, "class"))
4618 if (*cp != '\0')
4620 name = cp;
4621 while (*cp != '\0' && !c_isspace (*cp))
4622 cp++;
4623 make_tag (name, cp - name, false,
4624 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4626 else
4627 search_identifier = true;
4629 else if (strneq (cp, "define", 6)
4630 && (cp = skip_spaces (cp+6))
4631 && *cp++ == '('
4632 && (*cp == '"' || *cp == '\''))
4634 char quote = *cp++;
4635 name = cp;
4636 while (*cp != quote && *cp != '\0')
4637 cp++;
4638 make_tag (name, cp - name, false,
4639 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4641 else if (members
4642 && LOOKING_AT (cp, "var")
4643 && *cp == '$')
4645 name = cp;
4646 while (!notinname (*cp))
4647 cp++;
4648 make_tag (name, cp - name, false,
4649 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4656 * Cobol tag functions
4657 * We could look for anything that could be a paragraph name.
4658 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4659 * Idea by Corny de Souza (1993)
4661 static void
4662 Cobol_paragraphs (FILE *inf)
4664 register char *bp, *ep;
4666 LOOP_ON_INPUT_LINES (inf, lb, bp)
4668 if (lb.len < 9)
4669 continue;
4670 bp += 8;
4672 /* If eoln, compiler option or comment ignore whole line. */
4673 if (bp[-1] != ' ' || !c_isalnum (bp[0]))
4674 continue;
4676 for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
4677 continue;
4678 if (*ep++ == '.')
4679 make_tag (bp, ep - bp, true,
4680 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4686 * Makefile support
4687 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4689 static void
4690 Makefile_targets (FILE *inf)
4692 register char *bp;
4694 LOOP_ON_INPUT_LINES (inf, lb, bp)
4696 if (*bp == '\t' || *bp == '#')
4697 continue;
4698 while (*bp != '\0' && *bp != '=' && *bp != ':')
4699 bp++;
4700 if (*bp == ':' || (globals && *bp == '='))
4702 /* We should detect if there is more than one tag, but we do not.
4703 We just skip initial and final spaces. */
4704 char * namestart = skip_spaces (lb.buffer);
4705 while (--bp > namestart)
4706 if (!notinname (*bp))
4707 break;
4708 make_tag (namestart, bp - namestart + 1, true,
4709 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4716 * Pascal parsing
4717 * Original code by Mosur K. Mohan (1989)
4719 * Locates tags for procedures & functions. Doesn't do any type- or
4720 * var-definitions. It does look for the keyword "extern" or
4721 * "forward" immediately following the procedure statement; if found,
4722 * the tag is skipped.
4724 static void
4725 Pascal_functions (FILE *inf)
4727 linebuffer tline; /* mostly copied from C_entries */
4728 long save_lcno;
4729 int save_lineno, namelen, taglen;
4730 char c, *name;
4732 bool /* each of these flags is true if: */
4733 incomment, /* point is inside a comment */
4734 inquote, /* point is inside '..' string */
4735 get_tagname, /* point is after PROCEDURE/FUNCTION
4736 keyword, so next item = potential tag */
4737 found_tag, /* point is after a potential tag */
4738 inparms, /* point is within parameter-list */
4739 verify_tag; /* point has passed the parm-list, so the
4740 next token will determine whether this
4741 is a FORWARD/EXTERN to be ignored, or
4742 whether it is a real tag */
4744 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4745 name = NULL; /* keep compiler quiet */
4746 dbp = lb.buffer;
4747 *dbp = '\0';
4748 linebuffer_init (&tline);
4750 incomment = inquote = false;
4751 found_tag = false; /* have a proc name; check if extern */
4752 get_tagname = false; /* found "procedure" keyword */
4753 inparms = false; /* found '(' after "proc" */
4754 verify_tag = false; /* check if "extern" is ahead */
4757 while (perhaps_more_input (inf)) /* long main loop to get next char */
4759 c = *dbp++;
4760 if (c == '\0') /* if end of line */
4762 readline (&lb, inf);
4763 dbp = lb.buffer;
4764 if (*dbp == '\0')
4765 continue;
4766 if (!((found_tag && verify_tag)
4767 || get_tagname))
4768 c = *dbp++; /* only if don't need *dbp pointing
4769 to the beginning of the name of
4770 the procedure or function */
4772 if (incomment)
4774 if (c == '}') /* within { } comments */
4775 incomment = false;
4776 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4778 dbp++;
4779 incomment = false;
4781 continue;
4783 else if (inquote)
4785 if (c == '\'')
4786 inquote = false;
4787 continue;
4789 else
4790 switch (c)
4792 case '\'':
4793 inquote = true; /* found first quote */
4794 continue;
4795 case '{': /* found open { comment */
4796 incomment = true;
4797 continue;
4798 case '(':
4799 if (*dbp == '*') /* found open (* comment */
4801 incomment = true;
4802 dbp++;
4804 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4805 inparms = true;
4806 continue;
4807 case ')': /* end of parms list */
4808 if (inparms)
4809 inparms = false;
4810 continue;
4811 case ';':
4812 if (found_tag && !inparms) /* end of proc or fn stmt */
4814 verify_tag = true;
4815 break;
4817 continue;
4819 if (found_tag && verify_tag && (*dbp != ' '))
4821 /* Check if this is an "extern" declaration. */
4822 if (*dbp == '\0')
4823 continue;
4824 if (c_tolower (*dbp) == 'e')
4826 if (nocase_tail ("extern")) /* superfluous, really! */
4828 found_tag = false;
4829 verify_tag = false;
4832 else if (c_tolower (*dbp) == 'f')
4834 if (nocase_tail ("forward")) /* check for forward reference */
4836 found_tag = false;
4837 verify_tag = false;
4840 if (found_tag && verify_tag) /* not external proc, so make tag */
4842 found_tag = false;
4843 verify_tag = false;
4844 make_tag (name, namelen, true,
4845 tline.buffer, taglen, save_lineno, save_lcno);
4846 continue;
4849 if (get_tagname) /* grab name of proc or fn */
4851 char *cp;
4853 if (*dbp == '\0')
4854 continue;
4856 /* Find block name. */
4857 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4858 continue;
4860 /* Save all values for later tagging. */
4861 linebuffer_setlen (&tline, lb.len);
4862 strcpy (tline.buffer, lb.buffer);
4863 save_lineno = lineno;
4864 save_lcno = linecharno;
4865 name = tline.buffer + (dbp - lb.buffer);
4866 namelen = cp - dbp;
4867 taglen = cp - lb.buffer + 1;
4869 dbp = cp; /* set dbp to e-o-token */
4870 get_tagname = false;
4871 found_tag = true;
4872 continue;
4874 /* And proceed to check for "extern". */
4876 else if (!incomment && !inquote && !found_tag)
4878 /* Check for proc/fn keywords. */
4879 switch (c_tolower (c))
4881 case 'p':
4882 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4883 get_tagname = true;
4884 continue;
4885 case 'f':
4886 if (nocase_tail ("unction"))
4887 get_tagname = true;
4888 continue;
4891 } /* while not eof */
4893 free (tline.buffer);
4898 * Lisp tag functions
4899 * look for (def or (DEF, quote or QUOTE
4902 static void L_getit (void);
4904 static void
4905 L_getit (void)
4907 if (*dbp == '\'') /* Skip prefix quote */
4908 dbp++;
4909 else if (*dbp == '(')
4911 dbp++;
4912 /* Try to skip "(quote " */
4913 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4914 /* Ok, then skip "(" before name in (defstruct (foo)) */
4915 dbp = skip_spaces (dbp);
4917 get_tag (dbp, NULL);
4920 static void
4921 Lisp_functions (FILE *inf)
4923 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4925 if (dbp[0] != '(')
4926 continue;
4928 /* "(defvar foo)" is a declaration rather than a definition. */
4929 if (! declarations)
4931 char *p = dbp + 1;
4932 if (LOOKING_AT (p, "defvar"))
4934 p = skip_name (p); /* past var name */
4935 p = skip_spaces (p);
4936 if (*p == ')')
4937 continue;
4941 if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
4942 dbp += 3;
4944 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4946 dbp = skip_non_spaces (dbp);
4947 dbp = skip_spaces (dbp);
4948 L_getit ();
4950 else
4952 /* Check for (foo::defmumble name-defined ... */
4954 dbp++;
4955 while (!notinname (*dbp) && *dbp != ':');
4956 if (*dbp == ':')
4959 dbp++;
4960 while (*dbp == ':');
4962 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4964 dbp = skip_non_spaces (dbp);
4965 dbp = skip_spaces (dbp);
4966 L_getit ();
4975 * Lua script language parsing
4976 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4978 * "function" and "local function" are tags if they start at column 1.
4980 static void
4981 Lua_functions (FILE *inf)
4983 register char *bp;
4985 LOOP_ON_INPUT_LINES (inf, lb, bp)
4987 bp = skip_spaces (bp);
4988 if (bp[0] != 'f' && bp[0] != 'l')
4989 continue;
4991 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4993 if (LOOKING_AT (bp, "function"))
4995 char *tag_name, *tp_dot, *tp_colon;
4997 get_tag (bp, &tag_name);
4998 /* If the tag ends with ".foo" or ":foo", make an additional tag for
4999 "foo". */
5000 tp_dot = strrchr (tag_name, '.');
5001 tp_colon = strrchr (tag_name, ':');
5002 if (tp_dot || tp_colon)
5004 char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5005 int len_add = p - tag_name + 1;
5007 get_tag (bp + len_add, NULL);
5015 * PostScript tags
5016 * Just look for lines where the first character is '/'
5017 * Also look at "defineps" for PSWrap
5018 * Ideas by:
5019 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5020 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5022 static void
5023 PS_functions (FILE *inf)
5025 register char *bp, *ep;
5027 LOOP_ON_INPUT_LINES (inf, lb, bp)
5029 if (bp[0] == '/')
5031 for (ep = bp+1;
5032 *ep != '\0' && *ep != ' ' && *ep != '{';
5033 ep++)
5034 continue;
5035 make_tag (bp, ep - bp, true,
5036 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5038 else if (LOOKING_AT (bp, "defineps"))
5039 get_tag (bp, NULL);
5045 * Forth tags
5046 * Ignore anything after \ followed by space or in ( )
5047 * Look for words defined by :
5048 * Look for constant, code, create, defer, value, and variable
5049 * OBP extensions: Look for buffer:, field,
5050 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5052 static void
5053 Forth_words (FILE *inf)
5055 register char *bp;
5057 LOOP_ON_INPUT_LINES (inf, lb, bp)
5058 while ((bp = skip_spaces (bp))[0] != '\0')
5059 if (bp[0] == '\\' && c_isspace (bp[1]))
5060 break; /* read next line */
5061 else if (bp[0] == '(' && c_isspace (bp[1]))
5062 do /* skip to ) or eol */
5063 bp++;
5064 while (*bp != ')' && *bp != '\0');
5065 else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5066 || LOOKING_AT_NOCASE (bp, "constant")
5067 || LOOKING_AT_NOCASE (bp, "code")
5068 || LOOKING_AT_NOCASE (bp, "create")
5069 || LOOKING_AT_NOCASE (bp, "defer")
5070 || LOOKING_AT_NOCASE (bp, "value")
5071 || LOOKING_AT_NOCASE (bp, "variable")
5072 || LOOKING_AT_NOCASE (bp, "buffer:")
5073 || LOOKING_AT_NOCASE (bp, "field"))
5074 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5075 else
5076 bp = skip_non_spaces (bp);
5081 * Scheme tag functions
5082 * look for (def... xyzzy
5083 * (def... (xyzzy
5084 * (def ... ((...(xyzzy ....
5085 * (set! xyzzy
5086 * Original code by Ken Haase (1985?)
5088 static void
5089 Scheme_functions (FILE *inf)
5091 register char *bp;
5093 LOOP_ON_INPUT_LINES (inf, lb, bp)
5095 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5097 bp = skip_non_spaces (bp+4);
5098 /* Skip over open parens and white space. Don't continue past
5099 '\0'. */
5100 while (*bp && notinname (*bp))
5101 bp++;
5102 get_tag (bp, NULL);
5104 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5105 get_tag (bp, NULL);
5110 /* Find tags in TeX and LaTeX input files. */
5112 /* TEX_toktab is a table of TeX control sequences that define tags.
5113 * Each entry records one such control sequence.
5115 * Original code from who knows whom.
5116 * Ideas by:
5117 * Stefan Monnier (2002)
5120 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5122 /* Default set of control sequences to put into TEX_toktab.
5123 The value of environment var TEXTAGS is prepended to this. */
5124 static const char *TEX_defenv = "\
5125 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5126 :part:appendix:entry:index:def\
5127 :newcommand:renewcommand:newenvironment:renewenvironment";
5129 static void TEX_decode_env (const char *, const char *);
5132 * TeX/LaTeX scanning loop.
5134 static void
5135 TeX_commands (FILE *inf)
5137 char *cp;
5138 linebuffer *key;
5140 char TEX_esc = '\0';
5141 char TEX_opgrp, TEX_clgrp;
5143 /* Initialize token table once from environment. */
5144 if (TEX_toktab == NULL)
5145 TEX_decode_env ("TEXTAGS", TEX_defenv);
5147 LOOP_ON_INPUT_LINES (inf, lb, cp)
5149 /* Look at each TEX keyword in line. */
5150 for (;;)
5152 /* Look for a TEX escape. */
5153 while (true)
5155 char c = *cp++;
5156 if (c == '\0' || c == '%')
5157 goto tex_next_line;
5159 /* Select either \ or ! as escape character, whichever comes
5160 first outside a comment. */
5161 if (!TEX_esc)
5162 switch (c)
5164 case '\\':
5165 TEX_esc = c;
5166 TEX_opgrp = '{';
5167 TEX_clgrp = '}';
5168 break;
5170 case '!':
5171 TEX_esc = c;
5172 TEX_opgrp = '<';
5173 TEX_clgrp = '>';
5174 break;
5177 if (c == TEX_esc)
5178 break;
5181 for (key = TEX_toktab; key->buffer != NULL; key++)
5182 if (strneq (cp, key->buffer, key->len))
5184 char *p;
5185 int namelen, linelen;
5186 bool opgrp = false;
5188 cp = skip_spaces (cp + key->len);
5189 if (*cp == TEX_opgrp)
5191 opgrp = true;
5192 cp++;
5194 for (p = cp;
5195 (!c_isspace (*p) && *p != '#' &&
5196 *p != TEX_opgrp && *p != TEX_clgrp);
5197 p++)
5198 continue;
5199 namelen = p - cp;
5200 linelen = lb.len;
5201 if (!opgrp || *p == TEX_clgrp)
5203 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5204 p++;
5205 linelen = p - lb.buffer + 1;
5207 make_tag (cp, namelen, true,
5208 lb.buffer, linelen, lineno, linecharno);
5209 goto tex_next_line; /* We only tag a line once */
5212 tex_next_line:
5217 /* Read environment and prepend it to the default string.
5218 Build token table. */
5219 static void
5220 TEX_decode_env (const char *evarname, const char *defenv)
5222 register const char *env, *p;
5223 int i, len;
5225 /* Append default string to environment. */
5226 env = getenv (evarname);
5227 if (!env)
5228 env = defenv;
5229 else
5230 env = concat (env, defenv, "");
5232 /* Allocate a token table */
5233 for (len = 1, p = env; (p = strchr (p, ':')); )
5234 if (*++p)
5235 len++;
5236 TEX_toktab = xnew (len, linebuffer);
5238 /* Unpack environment string into token table. Be careful about */
5239 /* zero-length strings (leading ':', "::" and trailing ':') */
5240 for (i = 0; *env != '\0';)
5242 p = strchr (env, ':');
5243 if (!p) /* End of environment string. */
5244 p = env + strlen (env);
5245 if (p - env > 0)
5246 { /* Only non-zero strings. */
5247 TEX_toktab[i].buffer = savenstr (env, p - env);
5248 TEX_toktab[i].len = p - env;
5249 i++;
5251 if (*p)
5252 env = p + 1;
5253 else
5255 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5256 TEX_toktab[i].len = 0;
5257 break;
5263 /* Texinfo support. Dave Love, Mar. 2000. */
5264 static void
5265 Texinfo_nodes (FILE *inf)
5267 char *cp, *start;
5268 LOOP_ON_INPUT_LINES (inf, lb, cp)
5269 if (LOOKING_AT (cp, "@node"))
5271 start = cp;
5272 while (*cp != '\0' && *cp != ',')
5273 cp++;
5274 make_tag (start, cp - start, true,
5275 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5281 * HTML support.
5282 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5283 * Contents of <a name=xxx> are tags with name xxx.
5285 * Francesco Potortì, 2002.
5287 static void
5288 HTML_labels (FILE *inf)
5290 bool getnext = false; /* next text outside of HTML tags is a tag */
5291 bool skiptag = false; /* skip to the end of the current HTML tag */
5292 bool intag = false; /* inside an html tag, looking for ID= */
5293 bool inanchor = false; /* when INTAG, is an anchor, look for NAME= */
5294 char *end;
5297 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5299 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5300 for (;;) /* loop on the same line */
5302 if (skiptag) /* skip HTML tag */
5304 while (*dbp != '\0' && *dbp != '>')
5305 dbp++;
5306 if (*dbp == '>')
5308 dbp += 1;
5309 skiptag = false;
5310 continue; /* look on the same line */
5312 break; /* go to next line */
5315 else if (intag) /* look for "name=" or "id=" */
5317 while (*dbp != '\0' && *dbp != '>'
5318 && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5319 dbp++;
5320 if (*dbp == '\0')
5321 break; /* go to next line */
5322 if (*dbp == '>')
5324 dbp += 1;
5325 intag = false;
5326 continue; /* look on the same line */
5328 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5329 || LOOKING_AT_NOCASE (dbp, "id="))
5331 bool quoted = (dbp[0] == '"');
5333 if (quoted)
5334 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5335 continue;
5336 else
5337 for (end = dbp; *end != '\0' && intoken (*end); end++)
5338 continue;
5339 linebuffer_setlen (&token_name, end - dbp);
5340 memcpy (token_name.buffer, dbp, end - dbp);
5341 token_name.buffer[end - dbp] = '\0';
5343 dbp = end;
5344 intag = false; /* we found what we looked for */
5345 skiptag = true; /* skip to the end of the tag */
5346 getnext = true; /* then grab the text */
5347 continue; /* look on the same line */
5349 dbp += 1;
5352 else if (getnext) /* grab next tokens and tag them */
5354 dbp = skip_spaces (dbp);
5355 if (*dbp == '\0')
5356 break; /* go to next line */
5357 if (*dbp == '<')
5359 intag = true;
5360 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5361 continue; /* look on the same line */
5364 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5365 continue;
5366 make_tag (token_name.buffer, token_name.len, true,
5367 dbp, end - dbp, lineno, linecharno);
5368 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5369 getnext = false;
5370 break; /* go to next line */
5373 else /* look for an interesting HTML tag */
5375 while (*dbp != '\0' && *dbp != '<')
5376 dbp++;
5377 if (*dbp == '\0')
5378 break; /* go to next line */
5379 intag = true;
5380 if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5382 inanchor = true;
5383 continue; /* look on the same line */
5385 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5386 || LOOKING_AT_NOCASE (dbp, "<h1>")
5387 || LOOKING_AT_NOCASE (dbp, "<h2>")
5388 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5390 intag = false;
5391 getnext = true;
5392 continue; /* look on the same line */
5394 dbp += 1;
5401 * Prolog support
5403 * Assumes that the predicate or rule starts at column 0.
5404 * Only the first clause of a predicate or rule is added.
5405 * Original code by Sunichirou Sugou (1989)
5406 * Rewritten by Anders Lindgren (1996)
5408 static size_t prolog_pr (char *, char *);
5409 static void prolog_skip_comment (linebuffer *, FILE *);
5410 static size_t prolog_atom (char *, size_t);
5412 static void
5413 Prolog_functions (FILE *inf)
5415 char *cp, *last;
5416 size_t len;
5417 size_t allocated;
5419 allocated = 0;
5420 len = 0;
5421 last = NULL;
5423 LOOP_ON_INPUT_LINES (inf, lb, cp)
5425 if (cp[0] == '\0') /* Empty line */
5426 continue;
5427 else if (c_isspace (cp[0])) /* Not a predicate */
5428 continue;
5429 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5430 prolog_skip_comment (&lb, inf);
5431 else if ((len = prolog_pr (cp, last)) > 0)
5433 /* Predicate or rule. Store the function name so that we
5434 only generate a tag for the first clause. */
5435 if (last == NULL)
5436 last = xnew (len + 1, char);
5437 else if (len + 1 > allocated)
5438 xrnew (last, len + 1, char);
5439 allocated = len + 1;
5440 memcpy (last, cp, len);
5441 last[len] = '\0';
5444 free (last);
5448 static void
5449 prolog_skip_comment (linebuffer *plb, FILE *inf)
5451 char *cp;
5455 for (cp = plb->buffer; *cp != '\0'; cp++)
5456 if (cp[0] == '*' && cp[1] == '/')
5457 return;
5458 readline (plb, inf);
5460 while (perhaps_more_input (inf));
5464 * A predicate or rule definition is added if it matches:
5465 * <beginning of line><Prolog Atom><whitespace>(
5466 * or <beginning of line><Prolog Atom><whitespace>:-
5468 * It is added to the tags database if it doesn't match the
5469 * name of the previous clause header.
5471 * Return the size of the name of the predicate or rule, or 0 if no
5472 * header was found.
5474 static size_t
5475 prolog_pr (char *s, char *last)
5477 /* Name of last clause. */
5479 size_t pos;
5480 size_t len;
5482 pos = prolog_atom (s, 0);
5483 if (! pos)
5484 return 0;
5486 len = pos;
5487 pos = skip_spaces (s + pos) - s;
5489 if ((s[pos] == '.'
5490 || (s[pos] == '(' && (pos += 1))
5491 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5492 && (last == NULL /* save only the first clause */
5493 || len != strlen (last)
5494 || !strneq (s, last, len)))
5496 make_tag (s, len, true, s, pos, lineno, linecharno);
5497 return len;
5499 else
5500 return 0;
5504 * Consume a Prolog atom.
5505 * Return the number of bytes consumed, or 0 if there was an error.
5507 * A prolog atom, in this context, could be one of:
5508 * - An alphanumeric sequence, starting with a lower case letter.
5509 * - A quoted arbitrary string. Single quotes can escape themselves.
5510 * Backslash quotes everything.
5512 static size_t
5513 prolog_atom (char *s, size_t pos)
5515 size_t origpos;
5517 origpos = pos;
5519 if (c_islower (s[pos]) || s[pos] == '_')
5521 /* The atom is unquoted. */
5522 pos++;
5523 while (c_isalnum (s[pos]) || s[pos] == '_')
5525 pos++;
5527 return pos - origpos;
5529 else if (s[pos] == '\'')
5531 pos++;
5533 for (;;)
5535 if (s[pos] == '\'')
5537 pos++;
5538 if (s[pos] != '\'')
5539 break;
5540 pos++; /* A double quote */
5542 else if (s[pos] == '\0')
5543 /* Multiline quoted atoms are ignored. */
5544 return 0;
5545 else if (s[pos] == '\\')
5547 if (s[pos+1] == '\0')
5548 return 0;
5549 pos += 2;
5551 else
5552 pos++;
5554 return pos - origpos;
5556 else
5557 return 0;
5562 * Support for Erlang
5564 * Generates tags for functions, defines, and records.
5565 * Assumes that Erlang functions start at column 0.
5566 * Original code by Anders Lindgren (1996)
5568 static int erlang_func (char *, char *);
5569 static void erlang_attribute (char *);
5570 static int erlang_atom (char *);
5572 static void
5573 Erlang_functions (FILE *inf)
5575 char *cp, *last;
5576 int len;
5577 int allocated;
5579 allocated = 0;
5580 len = 0;
5581 last = NULL;
5583 LOOP_ON_INPUT_LINES (inf, lb, cp)
5585 if (cp[0] == '\0') /* Empty line */
5586 continue;
5587 else if (c_isspace (cp[0])) /* Not function nor attribute */
5588 continue;
5589 else if (cp[0] == '%') /* comment */
5590 continue;
5591 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5592 continue;
5593 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5595 erlang_attribute (cp);
5596 if (last != NULL)
5598 free (last);
5599 last = NULL;
5602 else if ((len = erlang_func (cp, last)) > 0)
5605 * Function. Store the function name so that we only
5606 * generates a tag for the first clause.
5608 if (last == NULL)
5609 last = xnew (len + 1, char);
5610 else if (len + 1 > allocated)
5611 xrnew (last, len + 1, char);
5612 allocated = len + 1;
5613 memcpy (last, cp, len);
5614 last[len] = '\0';
5617 free (last);
5622 * A function definition is added if it matches:
5623 * <beginning of line><Erlang Atom><whitespace>(
5625 * It is added to the tags database if it doesn't match the
5626 * name of the previous clause header.
5628 * Return the size of the name of the function, or 0 if no function
5629 * was found.
5631 static int
5632 erlang_func (char *s, char *last)
5634 /* Name of last clause. */
5636 int pos;
5637 int len;
5639 pos = erlang_atom (s);
5640 if (pos < 1)
5641 return 0;
5643 len = pos;
5644 pos = skip_spaces (s + pos) - s;
5646 /* Save only the first clause. */
5647 if (s[pos++] == '('
5648 && (last == NULL
5649 || len != (int)strlen (last)
5650 || !strneq (s, last, len)))
5652 make_tag (s, len, true, s, pos, lineno, linecharno);
5653 return len;
5656 return 0;
5661 * Handle attributes. Currently, tags are generated for defines
5662 * and records.
5664 * They are on the form:
5665 * -define(foo, bar).
5666 * -define(Foo(M, N), M+N).
5667 * -record(graph, {vtab = notable, cyclic = true}).
5669 static void
5670 erlang_attribute (char *s)
5672 char *cp = s;
5674 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5675 && *cp++ == '(')
5677 int len = erlang_atom (skip_spaces (cp));
5678 if (len > 0)
5679 make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5681 return;
5686 * Consume an Erlang atom (or variable).
5687 * Return the number of bytes consumed, or -1 if there was an error.
5689 static int
5690 erlang_atom (char *s)
5692 int pos = 0;
5694 if (c_isalpha (s[pos]) || s[pos] == '_')
5696 /* The atom is unquoted. */
5698 pos++;
5699 while (c_isalnum (s[pos]) || s[pos] == '_');
5701 else if (s[pos] == '\'')
5703 for (pos++; s[pos] != '\''; pos++)
5704 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5705 || (s[pos] == '\\' && s[++pos] == '\0'))
5706 return 0;
5707 pos++;
5710 return pos;
5714 static char *scan_separators (char *);
5715 static void add_regex (char *, language *);
5716 static char *substitute (char *, char *, struct re_registers *);
5719 * Take a string like "/blah/" and turn it into "blah", verifying
5720 * that the first and last characters are the same, and handling
5721 * quoted separator characters. Actually, stops on the occurrence of
5722 * an unquoted separator. Also process \t, \n, etc. and turn into
5723 * appropriate characters. Works in place. Null terminates name string.
5724 * Returns pointer to terminating separator, or NULL for
5725 * unterminated regexps.
5727 static char *
5728 scan_separators (char *name)
5730 char sep = name[0];
5731 char *copyto = name;
5732 bool quoted = false;
5734 for (++name; *name != '\0'; ++name)
5736 if (quoted)
5738 switch (*name)
5740 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5741 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5742 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5743 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5744 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5745 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5746 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5747 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5748 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5749 default:
5750 if (*name == sep)
5751 *copyto++ = sep;
5752 else
5754 /* Something else is quoted, so preserve the quote. */
5755 *copyto++ = '\\';
5756 *copyto++ = *name;
5758 break;
5760 quoted = false;
5762 else if (*name == '\\')
5763 quoted = true;
5764 else if (*name == sep)
5765 break;
5766 else
5767 *copyto++ = *name;
5769 if (*name != sep)
5770 name = NULL; /* signal unterminated regexp */
5772 /* Terminate copied string. */
5773 *copyto = '\0';
5774 return name;
5777 /* Look at the argument of --regex or --no-regex and do the right
5778 thing. Same for each line of a regexp file. */
5779 static void
5780 analyze_regex (char *regex_arg)
5782 if (regex_arg == NULL)
5784 free_regexps (); /* --no-regex: remove existing regexps */
5785 return;
5788 /* A real --regexp option or a line in a regexp file. */
5789 switch (regex_arg[0])
5791 /* Comments in regexp file or null arg to --regex. */
5792 case '\0':
5793 case ' ':
5794 case '\t':
5795 break;
5797 /* Read a regex file. This is recursive and may result in a
5798 loop, which will stop when the file descriptors are exhausted. */
5799 case '@':
5801 FILE *regexfp;
5802 linebuffer regexbuf;
5803 char *regexfile = regex_arg + 1;
5805 /* regexfile is a file containing regexps, one per line. */
5806 regexfp = fopen (regexfile, "r" FOPEN_BINARY);
5807 if (regexfp == NULL)
5808 pfatal (regexfile);
5809 linebuffer_init (&regexbuf);
5810 while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
5811 analyze_regex (regexbuf.buffer);
5812 free (regexbuf.buffer);
5813 if (fclose (regexfp) != 0)
5814 pfatal (regexfile);
5816 break;
5818 /* Regexp to be used for a specific language only. */
5819 case '{':
5821 language *lang;
5822 char *lang_name = regex_arg + 1;
5823 char *cp;
5825 for (cp = lang_name; *cp != '}'; cp++)
5826 if (*cp == '\0')
5828 error ("unterminated language name in regex: %s", regex_arg);
5829 return;
5831 *cp++ = '\0';
5832 lang = get_language_from_langname (lang_name);
5833 if (lang == NULL)
5834 return;
5835 add_regex (cp, lang);
5837 break;
5839 /* Regexp to be used for any language. */
5840 default:
5841 add_regex (regex_arg, NULL);
5842 break;
5846 /* Separate the regexp pattern, compile it,
5847 and care for optional name and modifiers. */
5848 static void
5849 add_regex (char *regexp_pattern, language *lang)
5851 static struct re_pattern_buffer zeropattern;
5852 char sep, *pat, *name, *modifiers;
5853 char empty = '\0';
5854 const char *err;
5855 struct re_pattern_buffer *patbuf;
5856 regexp *rp;
5857 bool
5858 force_explicit_name = true, /* do not use implicit tag names */
5859 ignore_case = false, /* case is significant */
5860 multi_line = false, /* matches are done one line at a time */
5861 single_line = false; /* dot does not match newline */
5864 if (strlen (regexp_pattern) < 3)
5866 error ("null regexp");
5867 return;
5869 sep = regexp_pattern[0];
5870 name = scan_separators (regexp_pattern);
5871 if (name == NULL)
5873 error ("%s: unterminated regexp", regexp_pattern);
5874 return;
5876 if (name[1] == sep)
5878 error ("null name for regexp \"%s\"", regexp_pattern);
5879 return;
5881 modifiers = scan_separators (name);
5882 if (modifiers == NULL) /* no terminating separator --> no name */
5884 modifiers = name;
5885 name = &empty;
5887 else
5888 modifiers += 1; /* skip separator */
5890 /* Parse regex modifiers. */
5891 for (; modifiers[0] != '\0'; modifiers++)
5892 switch (modifiers[0])
5894 case 'N':
5895 if (modifiers == name)
5896 error ("forcing explicit tag name but no name, ignoring");
5897 force_explicit_name = true;
5898 break;
5899 case 'i':
5900 ignore_case = true;
5901 break;
5902 case 's':
5903 single_line = true;
5904 /* FALLTHRU */
5905 case 'm':
5906 multi_line = true;
5907 need_filebuf = true;
5908 break;
5909 default:
5910 error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
5911 break;
5914 patbuf = xnew (1, struct re_pattern_buffer);
5915 *patbuf = zeropattern;
5916 if (ignore_case)
5918 static char lc_trans[UCHAR_MAX + 1];
5919 int i;
5920 for (i = 0; i < UCHAR_MAX + 1; i++)
5921 lc_trans[i] = c_tolower (i);
5922 patbuf->translate = lc_trans; /* translation table to fold case */
5925 if (multi_line)
5926 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5927 else
5928 pat = regexp_pattern;
5930 if (single_line)
5931 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5932 else
5933 re_set_syntax (RE_SYNTAX_EMACS);
5935 err = re_compile_pattern (pat, strlen (pat), patbuf);
5936 if (multi_line)
5937 free (pat);
5938 if (err != NULL)
5940 error ("%s while compiling pattern", err);
5941 return;
5944 rp = p_head;
5945 p_head = xnew (1, regexp);
5946 p_head->pattern = savestr (regexp_pattern);
5947 p_head->p_next = rp;
5948 p_head->lang = lang;
5949 p_head->pat = patbuf;
5950 p_head->name = savestr (name);
5951 p_head->error_signaled = false;
5952 p_head->force_explicit_name = force_explicit_name;
5953 p_head->ignore_case = ignore_case;
5954 p_head->multi_line = multi_line;
5958 * Do the substitutions indicated by the regular expression and
5959 * arguments.
5961 static char *
5962 substitute (char *in, char *out, struct re_registers *regs)
5964 char *result, *t;
5965 int size, dig, diglen;
5967 result = NULL;
5968 size = strlen (out);
5970 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5971 if (out[size - 1] == '\\')
5972 fatal ("pattern error in \"%s\"", out);
5973 for (t = strchr (out, '\\');
5974 t != NULL;
5975 t = strchr (t + 2, '\\'))
5976 if (c_isdigit (t[1]))
5978 dig = t[1] - '0';
5979 diglen = regs->end[dig] - regs->start[dig];
5980 size += diglen - 2;
5982 else
5983 size -= 1;
5985 /* Allocate space and do the substitutions. */
5986 assert (size >= 0);
5987 result = xnew (size + 1, char);
5989 for (t = result; *out != '\0'; out++)
5990 if (*out == '\\' && c_isdigit (*++out))
5992 dig = *out - '0';
5993 diglen = regs->end[dig] - regs->start[dig];
5994 memcpy (t, in + regs->start[dig], diglen);
5995 t += diglen;
5997 else
5998 *t++ = *out;
5999 *t = '\0';
6001 assert (t <= result + size);
6002 assert (t - result == (int)strlen (result));
6004 return result;
6007 /* Deallocate all regexps. */
6008 static void
6009 free_regexps (void)
6011 regexp *rp;
6012 while (p_head != NULL)
6014 rp = p_head->p_next;
6015 free (p_head->pattern);
6016 free (p_head->name);
6017 free (p_head);
6018 p_head = rp;
6020 return;
6024 * Reads the whole file as a single string from `filebuf' and looks for
6025 * multi-line regular expressions, creating tags on matches.
6026 * readline already dealt with normal regexps.
6028 * Idea by Ben Wing <ben@666.com> (2002).
6030 static void
6031 regex_tag_multiline (void)
6033 char *buffer = filebuf.buffer;
6034 regexp *rp;
6035 char *name;
6037 for (rp = p_head; rp != NULL; rp = rp->p_next)
6039 int match = 0;
6041 if (!rp->multi_line)
6042 continue; /* skip normal regexps */
6044 /* Generic initializations before parsing file from memory. */
6045 lineno = 1; /* reset global line number */
6046 charno = 0; /* reset global char number */
6047 linecharno = 0; /* reset global char number of line start */
6049 /* Only use generic regexps or those for the current language. */
6050 if (rp->lang != NULL && rp->lang != curfdp->lang)
6051 continue;
6053 while (match >= 0 && match < filebuf.len)
6055 match = re_search (rp->pat, buffer, filebuf.len, charno,
6056 filebuf.len - match, &rp->regs);
6057 switch (match)
6059 case -2:
6060 /* Some error. */
6061 if (!rp->error_signaled)
6063 error ("regexp stack overflow while matching \"%s\"",
6064 rp->pattern);
6065 rp->error_signaled = true;
6067 break;
6068 case -1:
6069 /* No match. */
6070 break;
6071 default:
6072 if (match == rp->regs.end[0])
6074 if (!rp->error_signaled)
6076 error ("regexp matches the empty string: \"%s\"",
6077 rp->pattern);
6078 rp->error_signaled = true;
6080 match = -3; /* exit from while loop */
6081 break;
6084 /* Match occurred. Construct a tag. */
6085 while (charno < rp->regs.end[0])
6086 if (buffer[charno++] == '\n')
6087 lineno++, linecharno = charno;
6088 name = rp->name;
6089 if (name[0] == '\0')
6090 name = NULL;
6091 else /* make a named tag */
6092 name = substitute (buffer, rp->name, &rp->regs);
6093 if (rp->force_explicit_name)
6094 /* Force explicit tag name, if a name is there. */
6095 pfnote (name, true, buffer + linecharno,
6096 charno - linecharno + 1, lineno, linecharno);
6097 else
6098 make_tag (name, strlen (name), true, buffer + linecharno,
6099 charno - linecharno + 1, lineno, linecharno);
6100 break;
6107 static bool
6108 nocase_tail (const char *cp)
6110 int len = 0;
6112 while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6113 cp++, len++;
6114 if (*cp == '\0' && !intoken (dbp[len]))
6116 dbp += len;
6117 return true;
6119 return false;
6122 static void
6123 get_tag (register char *bp, char **namepp)
6125 register char *cp = bp;
6127 if (*bp != '\0')
6129 /* Go till you get to white space or a syntactic break */
6130 for (cp = bp + 1; !notinname (*cp); cp++)
6131 continue;
6132 make_tag (bp, cp - bp, true,
6133 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6136 if (namepp != NULL)
6137 *namepp = savenstr (bp, cp - bp);
6141 * Read a line of text from `stream' into `lbp', excluding the
6142 * newline or CR-NL, if any. Return the number of characters read from
6143 * `stream', which is the length of the line including the newline.
6145 * On DOS or Windows we do not count the CR character, if any before the
6146 * NL, in the returned length; this mirrors the behavior of Emacs on those
6147 * platforms (for text files, it translates CR-NL to NL as it reads in the
6148 * file).
6150 * If multi-line regular expressions are requested, each line read is
6151 * appended to `filebuf'.
6153 static long
6154 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6156 char *buffer = lbp->buffer;
6157 char *p = lbp->buffer;
6158 char *pend;
6159 int chars_deleted;
6161 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6163 for (;;)
6165 register int c = getc (stream);
6166 if (p == pend)
6168 /* We're at the end of linebuffer: expand it. */
6169 lbp->size *= 2;
6170 xrnew (buffer, lbp->size, char);
6171 p += buffer - lbp->buffer;
6172 pend = buffer + lbp->size;
6173 lbp->buffer = buffer;
6175 if (c == EOF)
6177 if (ferror (stream))
6178 perror (filename);
6179 *p = '\0';
6180 chars_deleted = 0;
6181 break;
6183 if (c == '\n')
6185 if (p > buffer && p[-1] == '\r')
6187 p -= 1;
6188 chars_deleted = 2;
6190 else
6192 chars_deleted = 1;
6194 *p = '\0';
6195 break;
6197 *p++ = c;
6199 lbp->len = p - buffer;
6201 if (need_filebuf /* we need filebuf for multi-line regexps */
6202 && chars_deleted > 0) /* not at EOF */
6204 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6206 /* Expand filebuf. */
6207 filebuf.size *= 2;
6208 xrnew (filebuf.buffer, filebuf.size, char);
6210 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6211 filebuf.len += lbp->len;
6212 filebuf.buffer[filebuf.len++] = '\n';
6213 filebuf.buffer[filebuf.len] = '\0';
6216 return lbp->len + chars_deleted;
6220 * Like readline_internal, above, but in addition try to match the
6221 * input line against relevant regular expressions and manage #line
6222 * directives.
6224 static void
6225 readline (linebuffer *lbp, FILE *stream)
6227 long result;
6229 linecharno = charno; /* update global char number of line start */
6230 result = readline_internal (lbp, stream, infilename); /* read line */
6231 lineno += 1; /* increment global line number */
6232 charno += result; /* increment global char number */
6234 /* Honor #line directives. */
6235 if (!no_line_directive)
6237 static bool discard_until_line_directive;
6239 /* Check whether this is a #line directive. */
6240 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6242 unsigned int lno;
6243 int start = 0;
6245 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6246 && start > 0) /* double quote character found */
6248 char *endp = lbp->buffer + start;
6250 while ((endp = strchr (endp, '"')) != NULL
6251 && endp[-1] == '\\')
6252 endp++;
6253 if (endp != NULL)
6254 /* Ok, this is a real #line directive. Let's deal with it. */
6256 char *taggedabsname; /* absolute name of original file */
6257 char *taggedfname; /* name of original file as given */
6258 char *name; /* temp var */
6260 discard_until_line_directive = false; /* found it */
6261 name = lbp->buffer + start;
6262 *endp = '\0';
6263 canonicalize_filename (name);
6264 taggedabsname = absolute_filename (name, tagfiledir);
6265 if (filename_is_absolute (name)
6266 || filename_is_absolute (curfdp->infname))
6267 taggedfname = savestr (taggedabsname);
6268 else
6269 taggedfname = relative_filename (taggedabsname,tagfiledir);
6271 if (streq (curfdp->taggedfname, taggedfname))
6272 /* The #line directive is only a line number change. We
6273 deal with this afterwards. */
6274 free (taggedfname);
6275 else
6276 /* The tags following this #line directive should be
6277 attributed to taggedfname. In order to do this, set
6278 curfdp accordingly. */
6280 fdesc *fdp; /* file description pointer */
6282 /* Go look for a file description already set up for the
6283 file indicated in the #line directive. If there is
6284 one, use it from now until the next #line
6285 directive. */
6286 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6287 if (streq (fdp->infname, curfdp->infname)
6288 && streq (fdp->taggedfname, taggedfname))
6289 /* If we remove the second test above (after the &&)
6290 then all entries pertaining to the same file are
6291 coalesced in the tags file. If we use it, then
6292 entries pertaining to the same file but generated
6293 from different files (via #line directives) will
6294 go into separate sections in the tags file. These
6295 alternatives look equivalent. The first one
6296 destroys some apparently useless information. */
6298 curfdp = fdp;
6299 free (taggedfname);
6300 break;
6302 /* Else, if we already tagged the real file, skip all
6303 input lines until the next #line directive. */
6304 if (fdp == NULL) /* not found */
6305 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6306 if (streq (fdp->infabsname, taggedabsname))
6308 discard_until_line_directive = true;
6309 free (taggedfname);
6310 break;
6312 /* Else create a new file description and use that from
6313 now on, until the next #line directive. */
6314 if (fdp == NULL) /* not found */
6316 fdp = fdhead;
6317 fdhead = xnew (1, fdesc);
6318 *fdhead = *curfdp; /* copy curr. file description */
6319 fdhead->next = fdp;
6320 fdhead->infname = savestr (curfdp->infname);
6321 fdhead->infabsname = savestr (curfdp->infabsname);
6322 fdhead->infabsdir = savestr (curfdp->infabsdir);
6323 fdhead->taggedfname = taggedfname;
6324 fdhead->usecharno = false;
6325 fdhead->prop = NULL;
6326 fdhead->written = false;
6327 curfdp = fdhead;
6330 free (taggedabsname);
6331 lineno = lno - 1;
6332 readline (lbp, stream);
6333 return;
6334 } /* if a real #line directive */
6335 } /* if #line is followed by a number */
6336 } /* if line begins with "#line " */
6338 /* If we are here, no #line directive was found. */
6339 if (discard_until_line_directive)
6341 if (result > 0)
6343 /* Do a tail recursion on ourselves, thus discarding the contents
6344 of the line buffer. */
6345 readline (lbp, stream);
6346 return;
6348 /* End of file. */
6349 discard_until_line_directive = false;
6350 return;
6352 } /* if #line directives should be considered */
6355 int match;
6356 regexp *rp;
6357 char *name;
6359 /* Match against relevant regexps. */
6360 if (lbp->len > 0)
6361 for (rp = p_head; rp != NULL; rp = rp->p_next)
6363 /* Only use generic regexps or those for the current language.
6364 Also do not use multiline regexps, which is the job of
6365 regex_tag_multiline. */
6366 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6367 || rp->multi_line)
6368 continue;
6370 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6371 switch (match)
6373 case -2:
6374 /* Some error. */
6375 if (!rp->error_signaled)
6377 error ("regexp stack overflow while matching \"%s\"",
6378 rp->pattern);
6379 rp->error_signaled = true;
6381 break;
6382 case -1:
6383 /* No match. */
6384 break;
6385 case 0:
6386 /* Empty string matched. */
6387 if (!rp->error_signaled)
6389 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6390 rp->error_signaled = true;
6392 break;
6393 default:
6394 /* Match occurred. Construct a tag. */
6395 name = rp->name;
6396 if (name[0] == '\0')
6397 name = NULL;
6398 else /* make a named tag */
6399 name = substitute (lbp->buffer, rp->name, &rp->regs);
6400 if (rp->force_explicit_name)
6401 /* Force explicit tag name, if a name is there. */
6402 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6403 else
6404 make_tag (name, strlen (name), true,
6405 lbp->buffer, match, lineno, linecharno);
6406 break;
6414 * Return a pointer to a space of size strlen(cp)+1 allocated
6415 * with xnew where the string CP has been copied.
6417 static char *
6418 savestr (const char *cp)
6420 return savenstr (cp, strlen (cp));
6424 * Return a pointer to a space of size LEN+1 allocated with xnew where
6425 * the string CP has been copied for at most the first LEN characters.
6427 static char *
6428 savenstr (const char *cp, int len)
6430 char *dp = xnew (len + 1, char);
6431 dp[len] = '\0';
6432 return memcpy (dp, cp, len);
6435 /* Skip spaces (end of string is not space), return new pointer. */
6436 static char *
6437 skip_spaces (char *cp)
6439 while (c_isspace (*cp))
6440 cp++;
6441 return cp;
6444 /* Skip non spaces, except end of string, return new pointer. */
6445 static char *
6446 skip_non_spaces (char *cp)
6448 while (*cp != '\0' && !c_isspace (*cp))
6449 cp++;
6450 return cp;
6453 /* Skip any chars in the "name" class.*/
6454 static char *
6455 skip_name (char *cp)
6457 /* '\0' is a notinname() so loop stops there too */
6458 while (! notinname (*cp))
6459 cp++;
6460 return cp;
6463 /* Print error message and exit. */
6464 static void
6465 fatal (char const *format, ...)
6467 va_list ap;
6468 va_start (ap, format);
6469 verror (format, ap);
6470 va_end (ap);
6471 exit (EXIT_FAILURE);
6474 static void
6475 pfatal (const char *s1)
6477 perror (s1);
6478 exit (EXIT_FAILURE);
6481 static void
6482 suggest_asking_for_help (void)
6484 fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
6485 progname);
6486 exit (EXIT_FAILURE);
6489 /* Output a diagnostic with printf-style FORMAT and args. */
6490 static void
6491 error (const char *format, ...)
6493 va_list ap;
6494 va_start (ap, format);
6495 verror (format, ap);
6496 va_end (ap);
6499 static void
6500 verror (char const *format, va_list ap)
6502 fprintf (stderr, "%s: ", progname);
6503 vfprintf (stderr, format, ap);
6504 fprintf (stderr, "\n");
6507 /* Return a newly-allocated string whose contents
6508 concatenate those of s1, s2, s3. */
6509 static char *
6510 concat (const char *s1, const char *s2, const char *s3)
6512 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6513 char *result = xnew (len1 + len2 + len3 + 1, char);
6515 strcpy (result, s1);
6516 strcpy (result + len1, s2);
6517 strcpy (result + len1 + len2, s3);
6519 return result;
6523 /* Does the same work as the system V getcwd, but does not need to
6524 guess the buffer size in advance. */
6525 static char *
6526 etags_getcwd (void)
6528 int bufsize = 200;
6529 char *path = xnew (bufsize, char);
6531 while (getcwd (path, bufsize) == NULL)
6533 if (errno != ERANGE)
6534 pfatal ("getcwd");
6535 bufsize *= 2;
6536 free (path);
6537 path = xnew (bufsize, char);
6540 canonicalize_filename (path);
6541 return path;
6544 /* Return a newly allocated string containing a name of a temporary file. */
6545 static char *
6546 etags_mktmp (void)
6548 const char *tmpdir = getenv ("TMPDIR");
6549 const char *slash = "/";
6551 #if MSDOS || defined (DOS_NT)
6552 if (!tmpdir)
6553 tmpdir = getenv ("TEMP");
6554 if (!tmpdir)
6555 tmpdir = getenv ("TMP");
6556 if (!tmpdir)
6557 tmpdir = ".";
6558 if (tmpdir[strlen (tmpdir) - 1] == '/'
6559 || tmpdir[strlen (tmpdir) - 1] == '\\')
6560 slash = "";
6561 #else
6562 if (!tmpdir)
6563 tmpdir = "/tmp";
6564 if (tmpdir[strlen (tmpdir) - 1] == '/')
6565 slash = "";
6566 #endif
6568 char *templt = concat (tmpdir, slash, "etXXXXXX");
6569 int fd = mkostemp (templt, O_CLOEXEC);
6570 if (fd < 0 || close (fd) != 0)
6572 int temp_errno = errno;
6573 free (templt);
6574 errno = temp_errno;
6575 templt = NULL;
6578 #if defined (DOS_NT)
6579 /* The file name will be used in shell redirection, so it needs to have
6580 DOS-style backslashes, or else the Windows shell will barf. */
6581 char *p;
6582 for (p = templt; *p; p++)
6583 if (*p == '/')
6584 *p = '\\';
6585 #endif
6587 return templt;
6590 /* Return a newly allocated string containing the file name of FILE
6591 relative to the absolute directory DIR (which should end with a slash). */
6592 static char *
6593 relative_filename (char *file, char *dir)
6595 char *fp, *dp, *afn, *res;
6596 int i;
6598 /* Find the common root of file and dir (with a trailing slash). */
6599 afn = absolute_filename (file, cwd);
6600 fp = afn;
6601 dp = dir;
6602 while (*fp++ == *dp++)
6603 continue;
6604 fp--, dp--; /* back to the first differing char */
6605 #ifdef DOS_NT
6606 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6607 return afn;
6608 #endif
6609 do /* look at the equal chars until '/' */
6610 fp--, dp--;
6611 while (*fp != '/');
6613 /* Build a sequence of "../" strings for the resulting relative file name. */
6614 i = 0;
6615 while ((dp = strchr (dp + 1, '/')) != NULL)
6616 i += 1;
6617 res = xnew (3*i + strlen (fp + 1) + 1, char);
6618 char *z = res;
6619 while (i-- > 0)
6620 z = stpcpy (z, "../");
6622 /* Add the file name relative to the common root of file and dir. */
6623 strcpy (z, fp + 1);
6624 free (afn);
6626 return res;
6629 /* Return a newly allocated string containing the absolute file name
6630 of FILE given DIR (which should end with a slash). */
6631 static char *
6632 absolute_filename (char *file, char *dir)
6634 char *slashp, *cp, *res;
6636 if (filename_is_absolute (file))
6637 res = savestr (file);
6638 #ifdef DOS_NT
6639 /* We don't support non-absolute file names with a drive
6640 letter, like `d:NAME' (it's too much hassle). */
6641 else if (file[1] == ':')
6642 fatal ("%s: relative file names with drive letters not supported", file);
6643 #endif
6644 else
6645 res = concat (dir, file, "");
6647 /* Delete the "/dirname/.." and "/." substrings. */
6648 slashp = strchr (res, '/');
6649 while (slashp != NULL && slashp[0] != '\0')
6651 if (slashp[1] == '.')
6653 if (slashp[2] == '.'
6654 && (slashp[3] == '/' || slashp[3] == '\0'))
6656 cp = slashp;
6658 cp--;
6659 while (cp >= res && !filename_is_absolute (cp));
6660 if (cp < res)
6661 cp = slashp; /* the absolute name begins with "/.." */
6662 #ifdef DOS_NT
6663 /* Under MSDOS and NT we get `d:/NAME' as absolute
6664 file name, so the luser could say `d:/../NAME'.
6665 We silently treat this as `d:/NAME'. */
6666 else if (cp[0] != '/')
6667 cp = slashp;
6668 #endif
6669 memmove (cp, slashp + 3, strlen (slashp + 2));
6670 slashp = cp;
6671 continue;
6673 else if (slashp[2] == '/' || slashp[2] == '\0')
6675 memmove (slashp, slashp + 2, strlen (slashp + 1));
6676 continue;
6680 slashp = strchr (slashp + 1, '/');
6683 if (res[0] == '\0') /* just a safety net: should never happen */
6685 free (res);
6686 return savestr ("/");
6688 else
6689 return res;
6692 /* Return a newly allocated string containing the absolute
6693 file name of dir where FILE resides given DIR (which should
6694 end with a slash). */
6695 static char *
6696 absolute_dirname (char *file, char *dir)
6698 char *slashp, *res;
6699 char save;
6701 slashp = strrchr (file, '/');
6702 if (slashp == NULL)
6703 return savestr (dir);
6704 save = slashp[1];
6705 slashp[1] = '\0';
6706 res = absolute_filename (file, dir);
6707 slashp[1] = save;
6709 return res;
6712 /* Whether the argument string is an absolute file name. The argument
6713 string must have been canonicalized with canonicalize_filename. */
6714 static bool
6715 filename_is_absolute (char *fn)
6717 return (fn[0] == '/'
6718 #ifdef DOS_NT
6719 || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
6720 #endif
6724 /* Downcase DOS drive letter and collapse separators into single slashes.
6725 Works in place. */
6726 static void
6727 canonicalize_filename (register char *fn)
6729 register char* cp;
6731 #ifdef DOS_NT
6732 /* Canonicalize drive letter case. */
6733 if (c_isupper (fn[0]) && fn[1] == ':')
6734 fn[0] = c_tolower (fn[0]);
6736 /* Collapse multiple forward- and back-slashes into a single forward
6737 slash. */
6738 for (cp = fn; *cp != '\0'; cp++, fn++)
6739 if (*cp == '/' || *cp == '\\')
6741 *fn = '/';
6742 while (cp[1] == '/' || cp[1] == '\\')
6743 cp++;
6745 else
6746 *fn = *cp;
6748 #else /* !DOS_NT */
6750 /* Collapse multiple slashes into a single slash. */
6751 for (cp = fn; *cp != '\0'; cp++, fn++)
6752 if (*cp == '/')
6754 *fn = '/';
6755 while (cp[1] == '/')
6756 cp++;
6758 else
6759 *fn = *cp;
6761 #endif /* !DOS_NT */
6763 *fn = '\0';
6767 /* Initialize a linebuffer for use. */
6768 static void
6769 linebuffer_init (linebuffer *lbp)
6771 lbp->size = (DEBUG) ? 3 : 200;
6772 lbp->buffer = xnew (lbp->size, char);
6773 lbp->buffer[0] = '\0';
6774 lbp->len = 0;
6777 /* Set the minimum size of a string contained in a linebuffer. */
6778 static void
6779 linebuffer_setlen (linebuffer *lbp, int toksize)
6781 while (lbp->size <= toksize)
6783 lbp->size *= 2;
6784 xrnew (lbp->buffer, lbp->size, char);
6786 lbp->len = toksize;
6789 /* Like malloc but get fatal error if memory is exhausted. */
6790 static void *
6791 xmalloc (size_t size)
6793 void *result = malloc (size);
6794 if (result == NULL)
6795 fatal ("virtual memory exhausted");
6796 return result;
6799 static void *
6800 xrealloc (void *ptr, size_t size)
6802 void *result = realloc (ptr, size);
6803 if (result == NULL)
6804 fatal ("virtual memory exhausted");
6805 return result;
6809 * Local Variables:
6810 * indent-tabs-mode: t
6811 * tab-width: 8
6812 * fill-column: 79
6813 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6814 * c-file-style: "gnu"
6815 * End:
6818 /* etags.c ends here */