; NEWS tweak
[emacs.git] / lib-src / etags.c
blob8b980d365efafeb4d8a212a787a100a4df7fa614
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2015 Free Software
32 Foundation, Inc.
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
72 starting in 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
83 #ifdef DEBUG
84 # undef DEBUG
85 # define DEBUG true
86 #else
87 # define DEBUG false
88 # define NDEBUG /* disable assert */
89 #endif
91 #include <config.h>
93 #ifndef _GNU_SOURCE
94 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
95 #endif
97 /* WIN32_NATIVE is for XEmacs.
98 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
99 #ifdef WIN32_NATIVE
100 # undef MSDOS
101 # undef WINDOWSNT
102 # define WINDOWSNT
103 #endif /* WIN32_NATIVE */
105 #ifdef MSDOS
106 # undef MSDOS
107 # define MSDOS true
108 # include <sys/param.h>
109 #else
110 # define MSDOS false
111 #endif /* MSDOS */
113 #ifdef WINDOWSNT
114 # include <direct.h>
115 # define MAXPATHLEN _MAX_PATH
116 # undef HAVE_NTGUI
117 # undef DOS_NT
118 # define DOS_NT
119 # define O_CLOEXEC O_NOINHERIT
120 #endif /* WINDOWSNT */
122 #include <limits.h>
123 #include <unistd.h>
124 #include <stdarg.h>
125 #include <stdlib.h>
126 #include <string.h>
127 #include <sysstdio.h>
128 #include <errno.h>
129 #include <fcntl.h>
130 #include <binary-io.h>
131 #include <c-ctype.h>
132 #include <c-strcase.h>
134 #include <assert.h>
135 #ifdef NDEBUG
136 # undef assert /* some systems have a buggy assert.h */
137 # define assert(x) ((void) 0)
138 #endif
140 #include <getopt.h>
141 #include <regex.h>
143 /* Define CTAGS to make the program "ctags" compatible with the usual one.
144 Leave it undefined to make the program "etags", which makes emacs-style
145 tag tables and tags typedefs, #defines and struct/union/enum by default. */
146 #ifdef CTAGS
147 # undef CTAGS
148 # define CTAGS true
149 #else
150 # define CTAGS false
151 #endif
153 static bool
154 streq (char const *s, char const *t)
156 return strcmp (s, t) == 0;
159 static bool
160 strcaseeq (char const *s, char const *t)
162 return c_strcasecmp (s, t) == 0;
165 static bool
166 strneq (char const *s, char const *t, size_t n)
168 return strncmp (s, t, n) == 0;
171 static bool
172 strncaseeq (char const *s, char const *t, size_t n)
174 return c_strncasecmp (s, t, n) == 0;
177 /* C is not in a name. */
178 static bool
179 notinname (unsigned char c)
181 /* Look at make_tag before modifying! */
182 static bool const table[UCHAR_MAX + 1] = {
183 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
184 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
186 return table[c];
189 /* C can start a token. */
190 static bool
191 begtoken (unsigned char c)
193 static bool const table[UCHAR_MAX + 1] = {
194 ['$']=1, ['@']=1,
195 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
196 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
197 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
198 ['Y']=1, ['Z']=1,
199 ['_']=1,
200 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
201 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
202 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
203 ['y']=1, ['z']=1,
204 ['~']=1
206 return table[c];
209 /* C can be in the middle of a token. */
210 static bool
211 intoken (unsigned char c)
213 static bool const table[UCHAR_MAX + 1] = {
214 ['$']=1,
215 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
216 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
217 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
218 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
219 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
220 ['Y']=1, ['Z']=1,
221 ['_']=1,
222 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
223 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
224 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
225 ['y']=1, ['z']=1
227 return table[c];
230 /* C can end a token. */
231 static bool
232 endtoken (unsigned char c)
234 static bool const table[UCHAR_MAX + 1] = {
235 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
236 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
237 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
238 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
239 ['{']=1, ['|']=1, ['}']=1, ['~']=1
241 return table[c];
245 * xnew, xrnew -- allocate, reallocate storage
247 * SYNOPSIS: Type *xnew (int n, Type);
248 * void xrnew (OldPointer, int n, Type);
250 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
251 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
253 typedef void Lang_function (FILE *);
255 typedef struct
257 const char *suffix; /* file name suffix for this compressor */
258 const char *command; /* takes one arg and decompresses to stdout */
259 } compressor;
261 typedef struct
263 const char *name; /* language name */
264 const char *help; /* detailed help for the language */
265 Lang_function *function; /* parse function */
266 const char **suffixes; /* name suffixes of this language's files */
267 const char **filenames; /* names of this language's files */
268 const char **interpreters; /* interpreters for this language */
269 bool metasource; /* source used to generate other sources */
270 } language;
272 typedef struct fdesc
274 struct fdesc *next; /* for the linked list */
275 char *infname; /* uncompressed input file name */
276 char *infabsname; /* absolute uncompressed input file name */
277 char *infabsdir; /* absolute dir of input file */
278 char *taggedfname; /* file name to write in tagfile */
279 language *lang; /* language of file */
280 char *prop; /* file properties to write in tagfile */
281 bool usecharno; /* etags tags shall contain char number */
282 bool written; /* entry written in the tags file */
283 } fdesc;
285 typedef struct node_st
286 { /* sorting structure */
287 struct node_st *left, *right; /* left and right sons */
288 fdesc *fdp; /* description of file to whom tag belongs */
289 char *name; /* tag name */
290 char *regex; /* search regexp */
291 bool valid; /* write this tag on the tag file */
292 bool is_func; /* function tag: use regexp in CTAGS mode */
293 bool been_warned; /* warning already given for duplicated tag */
294 int lno; /* line number tag is on */
295 long cno; /* character number line starts on */
296 } node;
299 * A `linebuffer' is a structure which holds a line of text.
300 * `readline_internal' reads a line from a stream into a linebuffer
301 * and works regardless of the length of the line.
302 * SIZE is the size of BUFFER, LEN is the length of the string in
303 * BUFFER after readline reads it.
305 typedef struct
307 long size;
308 int len;
309 char *buffer;
310 } linebuffer;
312 /* Used to support mixing of --lang and file names. */
313 typedef struct
315 enum {
316 at_language, /* a language specification */
317 at_regexp, /* a regular expression */
318 at_filename, /* a file name */
319 at_stdin, /* read from stdin here */
320 at_end /* stop parsing the list */
321 } arg_type; /* argument type */
322 language *lang; /* language associated with the argument */
323 char *what; /* the argument itself */
324 } argument;
326 /* Structure defining a regular expression. */
327 typedef struct regexp
329 struct regexp *p_next; /* pointer to next in list */
330 language *lang; /* if set, use only for this language */
331 char *pattern; /* the regexp pattern */
332 char *name; /* tag name */
333 struct re_pattern_buffer *pat; /* the compiled pattern */
334 struct re_registers regs; /* re registers */
335 bool error_signaled; /* already signaled for this regexp */
336 bool force_explicit_name; /* do not allow implicit tag name */
337 bool ignore_case; /* ignore case when matching */
338 bool multi_line; /* do a multi-line match on the whole file */
339 } regexp;
342 /* Many compilers barf on this:
343 Lang_function Ada_funcs;
344 so let's write it this way */
345 static void Ada_funcs (FILE *);
346 static void Asm_labels (FILE *);
347 static void C_entries (int c_ext, FILE *);
348 static void default_C_entries (FILE *);
349 static void plain_C_entries (FILE *);
350 static void Cjava_entries (FILE *);
351 static void Cobol_paragraphs (FILE *);
352 static void Cplusplus_entries (FILE *);
353 static void Cstar_entries (FILE *);
354 static void Erlang_functions (FILE *);
355 static void Forth_words (FILE *);
356 static void Fortran_functions (FILE *);
357 static void HTML_labels (FILE *);
358 static void Lisp_functions (FILE *);
359 static void Lua_functions (FILE *);
360 static void Makefile_targets (FILE *);
361 static void Pascal_functions (FILE *);
362 static void Perl_functions (FILE *);
363 static void PHP_functions (FILE *);
364 static void PS_functions (FILE *);
365 static void Prolog_functions (FILE *);
366 static void Python_functions (FILE *);
367 static void Scheme_functions (FILE *);
368 static void TeX_commands (FILE *);
369 static void Texinfo_nodes (FILE *);
370 static void Yacc_entries (FILE *);
371 static void just_read_file (FILE *);
373 static language *get_language_from_langname (const char *);
374 static void readline (linebuffer *, FILE *);
375 static long readline_internal (linebuffer *, FILE *, char const *);
376 static bool nocase_tail (const char *);
377 static void get_tag (char *, char **);
379 static void analyze_regex (char *);
380 static void free_regexps (void);
381 static void regex_tag_multiline (void);
382 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
383 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
384 static _Noreturn void suggest_asking_for_help (void);
385 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
386 static _Noreturn void pfatal (const char *);
387 static void add_node (node *, node **);
389 static void process_file_name (char *, language *);
390 static void process_file (FILE *, char *, language *);
391 static void find_entries (FILE *);
392 static void free_tree (node *);
393 static void free_fdesc (fdesc *);
394 static void pfnote (char *, bool, char *, int, int, long);
395 static void invalidate_nodes (fdesc *, node **);
396 static void put_entries (node *);
398 static char *concat (const char *, const char *, const char *);
399 static char *skip_spaces (char *);
400 static char *skip_non_spaces (char *);
401 static char *skip_name (char *);
402 static char *savenstr (const char *, int);
403 static char *savestr (const char *);
404 static char *etags_getcwd (void);
405 static char *relative_filename (char *, char *);
406 static char *absolute_filename (char *, char *);
407 static char *absolute_dirname (char *, char *);
408 static bool filename_is_absolute (char *f);
409 static void canonicalize_filename (char *);
410 static char *etags_mktmp (void);
411 static void linebuffer_init (linebuffer *);
412 static void linebuffer_setlen (linebuffer *, int);
413 static void *xmalloc (size_t);
414 static void *xrealloc (void *, size_t);
417 static char searchar = '/'; /* use /.../ searches */
419 static char *tagfile; /* output file */
420 static char *progname; /* name this program was invoked with */
421 static char *cwd; /* current working directory */
422 static char *tagfiledir; /* directory of tagfile */
423 static FILE *tagf; /* ioptr for tags file */
424 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
426 static fdesc *fdhead; /* head of file description list */
427 static fdesc *curfdp; /* current file description */
428 static char *infilename; /* current input file name */
429 static int lineno; /* line number of current line */
430 static long charno; /* current character number */
431 static long linecharno; /* charno of start of current line */
432 static char *dbp; /* pointer to start of current tag */
434 static const int invalidcharno = -1;
436 static node *nodehead; /* the head of the binary tree of tags */
437 static node *last_node; /* the last node created */
439 static linebuffer lb; /* the current line */
440 static linebuffer filebuf; /* a buffer containing the whole file */
441 static linebuffer token_name; /* a buffer containing a tag name */
443 static bool append_to_tagfile; /* -a: append to tags */
444 /* The next five default to true in C and derived languages. */
445 static bool typedefs; /* -t: create tags for C and Ada typedefs */
446 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
447 /* 0 struct/enum/union decls, and C++ */
448 /* member functions. */
449 static bool constantypedefs; /* -d: create tags for C #define, enum */
450 /* constants and variables. */
451 /* -D: opposite of -d. Default under ctags. */
452 static int globals; /* create tags for global variables */
453 static int members; /* create tags for C member variables */
454 static int declarations; /* --declarations: tag them and extern in C&Co*/
455 static int no_line_directive; /* ignore #line directives (undocumented) */
456 static int no_duplicates; /* no duplicate tags for ctags (undocumented) */
457 static bool update; /* -u: update tags */
458 static bool vgrind_style; /* -v: create vgrind style index output */
459 static bool no_warnings; /* -w: suppress warnings (undocumented) */
460 static bool cxref_style; /* -x: create cxref style output */
461 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
462 static bool ignoreindent; /* -I: ignore indentation in C */
463 static int packages_only; /* --packages-only: in Ada, only tag packages*/
464 static int class_qualify; /* -Q: produce class-qualified tags in C++/Java */
466 /* STDIN is defined in LynxOS system headers */
467 #ifdef STDIN
468 # undef STDIN
469 #endif
471 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
472 static bool parsing_stdin; /* --parse-stdin used */
474 static regexp *p_head; /* list of all regexps */
475 static bool need_filebuf; /* some regexes are multi-line */
477 static struct option longopts[] =
479 { "append", no_argument, NULL, 'a' },
480 { "packages-only", no_argument, &packages_only, 1 },
481 { "c++", no_argument, NULL, 'C' },
482 { "declarations", no_argument, &declarations, 1 },
483 { "no-line-directive", no_argument, &no_line_directive, 1 },
484 { "no-duplicates", no_argument, &no_duplicates, 1 },
485 { "help", no_argument, NULL, 'h' },
486 { "help", no_argument, NULL, 'H' },
487 { "ignore-indentation", no_argument, NULL, 'I' },
488 { "language", required_argument, NULL, 'l' },
489 { "members", no_argument, &members, 1 },
490 { "no-members", no_argument, &members, 0 },
491 { "output", required_argument, NULL, 'o' },
492 { "class-qualify", no_argument, &class_qualify, 'Q' },
493 { "regex", required_argument, NULL, 'r' },
494 { "no-regex", no_argument, NULL, 'R' },
495 { "ignore-case-regex", required_argument, NULL, 'c' },
496 { "parse-stdin", required_argument, NULL, STDIN },
497 { "version", no_argument, NULL, 'V' },
499 #if CTAGS /* Ctags options */
500 { "backward-search", no_argument, NULL, 'B' },
501 { "cxref", no_argument, NULL, 'x' },
502 { "defines", no_argument, NULL, 'd' },
503 { "globals", no_argument, &globals, 1 },
504 { "typedefs", no_argument, NULL, 't' },
505 { "typedefs-and-c++", no_argument, NULL, 'T' },
506 { "update", no_argument, NULL, 'u' },
507 { "vgrind", no_argument, NULL, 'v' },
508 { "no-warn", no_argument, NULL, 'w' },
510 #else /* Etags options */
511 { "no-defines", no_argument, NULL, 'D' },
512 { "no-globals", no_argument, &globals, 0 },
513 { "include", required_argument, NULL, 'i' },
514 #endif
515 { NULL }
518 static compressor compressors[] =
520 { "z", "gzip -d -c"},
521 { "Z", "gzip -d -c"},
522 { "gz", "gzip -d -c"},
523 { "GZ", "gzip -d -c"},
524 { "bz2", "bzip2 -d -c" },
525 { "xz", "xz -d -c" },
526 { NULL }
530 * Language stuff.
533 /* Ada code */
534 static const char *Ada_suffixes [] =
535 { "ads", "adb", "ada", NULL };
536 static const char Ada_help [] =
537 "In Ada code, functions, procedures, packages, tasks and types are\n\
538 tags. Use the '--packages-only' option to create tags for\n\
539 packages only.\n\
540 Ada tag names have suffixes indicating the type of entity:\n\
541 Entity type: Qualifier:\n\
542 ------------ ----------\n\
543 function /f\n\
544 procedure /p\n\
545 package spec /s\n\
546 package body /b\n\
547 type /t\n\
548 task /k\n\
549 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
550 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
551 will just search for any tag 'bidule'.";
553 /* Assembly code */
554 static const char *Asm_suffixes [] =
555 { "a", /* Unix assembler */
556 "asm", /* Microcontroller assembly */
557 "def", /* BSO/Tasking definition includes */
558 "inc", /* Microcontroller include files */
559 "ins", /* Microcontroller include files */
560 "s", "sa", /* Unix assembler */
561 "S", /* cpp-processed Unix assembler */
562 "src", /* BSO/Tasking C compiler output */
563 NULL
565 static const char Asm_help [] =
566 "In assembler code, labels appearing at the beginning of a line,\n\
567 followed by a colon, are tags.";
570 /* Note that .c and .h can be considered C++, if the --c++ flag was
571 given, or if the `class' or `template' keywords are met inside the file.
572 That is why default_C_entries is called for these. */
573 static const char *default_C_suffixes [] =
574 { "c", "h", NULL };
575 #if CTAGS /* C help for Ctags */
576 static const char default_C_help [] =
577 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
578 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
579 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
580 Use --globals to tag global variables.\n\
581 You can tag function declarations and external variables by\n\
582 using '--declarations', and struct members by using '--members'.";
583 #else /* C help for Etags */
584 static const char default_C_help [] =
585 "In C code, any C function or typedef is a tag, and so are\n\
586 definitions of 'struct', 'union' and 'enum'. '#define' macro\n\
587 definitions and 'enum' constants are tags unless you specify\n\
588 '--no-defines'. Global variables are tags unless you specify\n\
589 '--no-globals' and so are struct members unless you specify\n\
590 '--no-members'. Use of '--no-globals', '--no-defines' and\n\
591 '--no-members' can make the tags table file much smaller.\n\
592 You can tag function declarations and external variables by\n\
593 using '--declarations'.";
594 #endif /* C help for Ctags and Etags */
596 static const char *Cplusplus_suffixes [] =
597 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
598 "M", /* Objective C++ */
599 "pdb", /* PostScript with C syntax */
600 NULL };
601 static const char Cplusplus_help [] =
602 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
603 --help --lang=c --lang=c++ for full help.)\n\
604 In addition to C tags, member functions are also recognized. Member\n\
605 variables are recognized unless you use the '--no-members' option.\n\
606 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
607 and 'CLASS::FUNCTION'. 'operator' definitions have tag names like\n\
608 'operator+'.";
610 static const char *Cjava_suffixes [] =
611 { "java", NULL };
612 static char Cjava_help [] =
613 "In Java code, all the tags constructs of C and C++ code are\n\
614 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
617 static const char *Cobol_suffixes [] =
618 { "COB", "cob", NULL };
619 static char Cobol_help [] =
620 "In Cobol code, tags are paragraph names; that is, any word\n\
621 starting in column 8 and followed by a period.";
623 static const char *Cstar_suffixes [] =
624 { "cs", "hs", NULL };
626 static const char *Erlang_suffixes [] =
627 { "erl", "hrl", NULL };
628 static const char Erlang_help [] =
629 "In Erlang code, the tags are the functions, records and macros\n\
630 defined in the file.";
632 const char *Forth_suffixes [] =
633 { "fth", "tok", NULL };
634 static const char Forth_help [] =
635 "In Forth code, tags are words defined by ':',\n\
636 constant, code, create, defer, value, variable, buffer:, field.";
638 static const char *Fortran_suffixes [] =
639 { "F", "f", "f90", "for", NULL };
640 static const char Fortran_help [] =
641 "In Fortran code, functions, subroutines and block data are tags.";
643 static const char *HTML_suffixes [] =
644 { "htm", "html", "shtml", NULL };
645 static const char HTML_help [] =
646 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
647 'h3' headers. Also, tags are 'name=' in anchors and all\n\
648 occurrences of 'id='.";
650 static const char *Lisp_suffixes [] =
651 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
652 static const char Lisp_help [] =
653 "In Lisp code, any function defined with 'defun', any variable\n\
654 defined with 'defvar' or 'defconst', and in general the first\n\
655 argument of any expression that starts with '(def' in column zero\n\
656 is a tag.\n\
657 The '--declarations' option tags \"(defvar foo)\" constructs too.";
659 static const char *Lua_suffixes [] =
660 { "lua", "LUA", NULL };
661 static const char Lua_help [] =
662 "In Lua scripts, all functions are tags.";
664 static const char *Makefile_filenames [] =
665 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
666 static const char Makefile_help [] =
667 "In makefiles, targets are tags; additionally, variables are tags\n\
668 unless you specify '--no-globals'.";
670 static const char *Objc_suffixes [] =
671 { "lm", /* Objective lex file */
672 "m", /* Objective C file */
673 NULL };
674 static const char Objc_help [] =
675 "In Objective C code, tags include Objective C definitions for classes,\n\
676 class categories, methods and protocols. Tags for variables and\n\
677 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\n\
678 (Use --help --lang=c --lang=objc --lang=java for full help.)";
680 static const char *Pascal_suffixes [] =
681 { "p", "pas", NULL };
682 static const char Pascal_help [] =
683 "In Pascal code, the tags are the functions and procedures defined\n\
684 in the file.";
685 /* " // this is for working around an Emacs highlighting bug... */
687 static const char *Perl_suffixes [] =
688 { "pl", "pm", NULL };
689 static const char *Perl_interpreters [] =
690 { "perl", "@PERL@", NULL };
691 static const char Perl_help [] =
692 "In Perl code, the tags are the packages, subroutines and variables\n\
693 defined by the 'package', 'sub', 'my' and 'local' keywords. Use\n\
694 '--globals' if you want to tag global variables. Tags for\n\
695 subroutines are named 'PACKAGE::SUB'. The name for subroutines\n\
696 defined in the default package is 'main::SUB'.";
698 static const char *PHP_suffixes [] =
699 { "php", "php3", "php4", NULL };
700 static const char PHP_help [] =
701 "In PHP code, tags are functions, classes and defines. Unless you use\n\
702 the '--no-members' option, vars are tags too.";
704 static const char *plain_C_suffixes [] =
705 { "pc", /* Pro*C file */
706 NULL };
708 static const char *PS_suffixes [] =
709 { "ps", "psw", NULL }; /* .psw is for PSWrap */
710 static const char PS_help [] =
711 "In PostScript code, the tags are the functions.";
713 static const char *Prolog_suffixes [] =
714 { "prolog", NULL };
715 static const char Prolog_help [] =
716 "In Prolog code, tags are predicates and rules at the beginning of\n\
717 line.";
719 static const char *Python_suffixes [] =
720 { "py", NULL };
721 static const char Python_help [] =
722 "In Python code, 'def' or 'class' at the beginning of a line\n\
723 generate a tag.";
725 /* Can't do the `SCM' or `scm' prefix with a version number. */
726 static const char *Scheme_suffixes [] =
727 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
728 static const char Scheme_help [] =
729 "In Scheme code, tags include anything defined with 'def' or with a\n\
730 construct whose name starts with 'def'. They also include\n\
731 variables set with 'set!' at top level in the file.";
733 static const char *TeX_suffixes [] =
734 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
735 static const char TeX_help [] =
736 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
737 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
738 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
739 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
740 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
742 Other commands can be specified by setting the environment variable\n\
743 'TEXTAGS' to a colon-separated list like, for example,\n\
744 TEXTAGS=\"mycommand:myothercommand\".";
747 static const char *Texinfo_suffixes [] =
748 { "texi", "texinfo", "txi", NULL };
749 static const char Texinfo_help [] =
750 "for texinfo files, lines starting with @node are tagged.";
752 static const char *Yacc_suffixes [] =
753 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
754 static const char Yacc_help [] =
755 "In Bison or Yacc input files, each rule defines as a tag the\n\
756 nonterminal it constructs. The portions of the file that contain\n\
757 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
758 for full help).";
760 static const char auto_help [] =
761 "'auto' is not a real language, it indicates to use\n\
762 a default language for files base on file name suffix and file contents.";
764 static const char none_help [] =
765 "'none' is not a real language, it indicates to only do\n\
766 regexp processing on files.";
768 static const char no_lang_help [] =
769 "No detailed help available for this language.";
773 * Table of languages.
775 * It is ok for a given function to be listed under more than one
776 * name. I just didn't.
779 static language lang_names [] =
781 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
782 { "asm", Asm_help, Asm_labels, Asm_suffixes },
783 { "c", default_C_help, default_C_entries, default_C_suffixes },
784 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
785 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
786 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
787 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
788 { "forth", Forth_help, Forth_words, Forth_suffixes },
789 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
790 { "html", HTML_help, HTML_labels, HTML_suffixes },
791 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
792 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
793 { "lua", Lua_help, Lua_functions, Lua_suffixes },
794 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
795 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
796 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
797 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
798 { "php", PHP_help, PHP_functions, PHP_suffixes },
799 { "postscript",PS_help, PS_functions, PS_suffixes },
800 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
801 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
802 { "python", Python_help, Python_functions, Python_suffixes },
803 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
804 { "tex", TeX_help, TeX_commands, TeX_suffixes },
805 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
806 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
807 { "auto", auto_help }, /* default guessing scheme */
808 { "none", none_help, just_read_file }, /* regexp matching only */
809 { NULL } /* end of list */
813 static void
814 print_language_names (void)
816 language *lang;
817 const char **name, **ext;
819 puts ("\nThese are the currently supported languages, along with the\n\
820 default file names and dot suffixes:");
821 for (lang = lang_names; lang->name != NULL; lang++)
823 printf (" %-*s", 10, lang->name);
824 if (lang->filenames != NULL)
825 for (name = lang->filenames; *name != NULL; name++)
826 printf (" %s", *name);
827 if (lang->suffixes != NULL)
828 for (ext = lang->suffixes; *ext != NULL; ext++)
829 printf (" .%s", *ext);
830 puts ("");
832 puts ("where 'auto' means use default language for files based on file\n\
833 name suffix, and 'none' means only do regexp processing on files.\n\
834 If no language is specified and no matching suffix is found,\n\
835 the first line of the file is read for a sharp-bang (#!) sequence\n\
836 followed by the name of an interpreter. If no such sequence is found,\n\
837 Fortran is tried first; if no tags are found, C is tried next.\n\
838 When parsing any C file, a \"class\" or \"template\" keyword\n\
839 switches to C++.");
840 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
842 For detailed help on a given language use, for example,\n\
843 etags --help --lang=ada.");
846 #ifndef EMACS_NAME
847 # define EMACS_NAME "standalone"
848 #endif
849 #ifndef VERSION
850 # define VERSION "17.38.1.4"
851 #endif
852 static _Noreturn void
853 print_version (void)
855 char emacs_copyright[] = COPYRIGHT;
857 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
858 puts (emacs_copyright);
859 puts ("This program is distributed under the terms in ETAGS.README");
861 exit (EXIT_SUCCESS);
864 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
865 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
866 #endif
868 static _Noreturn void
869 print_help (argument *argbuffer)
871 bool help_for_lang = false;
873 for (; argbuffer->arg_type != at_end; argbuffer++)
874 if (argbuffer->arg_type == at_language)
876 if (help_for_lang)
877 puts ("");
878 puts (argbuffer->lang->help);
879 help_for_lang = true;
882 if (help_for_lang)
883 exit (EXIT_SUCCESS);
885 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
887 These are the options accepted by %s.\n", progname, progname);
888 puts ("You may use unambiguous abbreviations for the long option names.");
889 puts (" A - as file name means read names from stdin (one per line).\n\
890 Absolute names are stored in the output file as they are.\n\
891 Relative ones are stored relative to the output file's directory.\n");
893 puts ("-a, --append\n\
894 Append tag entries to existing tags file.");
896 puts ("--packages-only\n\
897 For Ada files, only generate tags for packages.");
899 if (CTAGS)
900 puts ("-B, --backward-search\n\
901 Write the search commands for the tag entries using '?', the\n\
902 backward-search command instead of '/', the forward-search command.");
904 /* This option is mostly obsolete, because etags can now automatically
905 detect C++. Retained for backward compatibility and for debugging and
906 experimentation. In principle, we could want to tag as C++ even
907 before any "class" or "template" keyword.
908 puts ("-C, --c++\n\
909 Treat files whose name suffix defaults to C language as C++ files.");
912 puts ("--declarations\n\
913 In C and derived languages, create tags for function declarations,");
914 if (CTAGS)
915 puts ("\tand create tags for extern variables if --globals is used.");
916 else
917 puts
918 ("\tand create tags for extern variables unless --no-globals is used.");
920 if (CTAGS)
921 puts ("-d, --defines\n\
922 Create tag entries for C #define constants and enum constants, too.");
923 else
924 puts ("-D, --no-defines\n\
925 Don't create tag entries for C #define constants and enum constants.\n\
926 This makes the tags file smaller.");
928 if (!CTAGS)
929 puts ("-i FILE, --include=FILE\n\
930 Include a note in tag file indicating that, when searching for\n\
931 a tag, one should also consult the tags file FILE after\n\
932 checking the current file.");
934 puts ("-l LANG, --language=LANG\n\
935 Force the following files to be considered as written in the\n\
936 named language up to the next --language=LANG option.");
938 if (CTAGS)
939 puts ("--globals\n\
940 Create tag entries for global variables in some languages.");
941 else
942 puts ("--no-globals\n\
943 Do not create tag entries for global variables in some\n\
944 languages. This makes the tags file smaller.");
946 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
947 puts ("--no-line-directive\n\
948 Ignore #line preprocessor directives in C and derived languages.");
950 if (CTAGS)
951 puts ("--members\n\
952 Create tag entries for members of structures in some languages.");
953 else
954 puts ("--no-members\n\
955 Do not create tag entries for members of structures\n\
956 in some languages.");
958 puts ("-Q, --class-qualify\n\
959 Qualify tag names with their class name in C++, ObjC, and Java.\n\
960 This produces tag names of the form \"class::member\" for C++,\n\
961 \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
962 For Objective C, this also produces class methods qualified with\n\
963 their arguments, as in \"foo:bar:baz:more\".");
964 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
965 Make a tag for each line matching a regular expression pattern\n\
966 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
967 files only. REGEXFILE is a file containing one REGEXP per line.\n\
968 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
969 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
970 puts (" If TAGNAME/ is present, the tags created are named.\n\
971 For example Tcl named tags can be created with:\n\
972 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
973 MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
974 'm' means to allow multi-line matches, 's' implies 'm' and\n\
975 causes dot to match any character, including newline.");
977 puts ("-R, --no-regex\n\
978 Don't create tags from regexps for the following files.");
980 puts ("-I, --ignore-indentation\n\
981 In C and C++ do not assume that a closing brace in the first\n\
982 column is the final brace of a function or structure definition.");
984 puts ("-o FILE, --output=FILE\n\
985 Write the tags to FILE.");
987 puts ("--parse-stdin=NAME\n\
988 Read from standard input and record tags as belonging to file NAME.");
990 if (CTAGS)
992 puts ("-t, --typedefs\n\
993 Generate tag entries for C and Ada typedefs.");
994 puts ("-T, --typedefs-and-c++\n\
995 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
996 and C++ member functions.");
999 if (CTAGS)
1000 puts ("-u, --update\n\
1001 Update the tag entries for the given files, leaving tag\n\
1002 entries for other files in place. Currently, this is\n\
1003 implemented by deleting the existing entries for the given\n\
1004 files and then rewriting the new entries at the end of the\n\
1005 tags file. It is often faster to simply rebuild the entire\n\
1006 tag file than to use this.");
1008 if (CTAGS)
1010 puts ("-v, --vgrind\n\
1011 Print on the standard output an index of items intended for\n\
1012 human consumption, similar to the output of vgrind. The index\n\
1013 is sorted, and gives the page number of each item.");
1015 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1016 puts ("-w, --no-duplicates\n\
1017 Do not create duplicate tag entries, for compatibility with\n\
1018 traditional ctags.");
1020 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1021 puts ("-w, --no-warn\n\
1022 Suppress warning messages about duplicate tag entries.");
1024 puts ("-x, --cxref\n\
1025 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1026 The output uses line numbers instead of page numbers, but\n\
1027 beyond that the differences are cosmetic; try both to see\n\
1028 which you like.");
1031 puts ("-V, --version\n\
1032 Print the version of the program.\n\
1033 -h, --help\n\
1034 Print this help message.\n\
1035 Followed by one or more '--language' options prints detailed\n\
1036 help about tag generation for the specified languages.");
1038 print_language_names ();
1040 puts ("");
1041 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1043 exit (EXIT_SUCCESS);
1048 main (int argc, char **argv)
1050 int i;
1051 unsigned int nincluded_files;
1052 char **included_files;
1053 argument *argbuffer;
1054 int current_arg, file_count;
1055 linebuffer filename_lb;
1056 bool help_asked = false;
1057 ptrdiff_t len;
1058 char *optstring;
1059 int opt;
1061 progname = argv[0];
1062 nincluded_files = 0;
1063 included_files = xnew (argc, char *);
1064 current_arg = 0;
1065 file_count = 0;
1067 /* Allocate enough no matter what happens. Overkill, but each one
1068 is small. */
1069 argbuffer = xnew (argc, argument);
1072 * Always find typedefs and structure tags.
1073 * Also default to find macro constants, enum constants, struct
1074 * members and global variables. Do it for both etags and ctags.
1076 typedefs = typedefs_or_cplusplus = constantypedefs = true;
1077 globals = members = true;
1079 /* When the optstring begins with a '-' getopt_long does not rearrange the
1080 non-options arguments to be at the end, but leaves them alone. */
1081 optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1082 (CTAGS) ? "BxdtTuvw" : "Di:",
1083 "");
1085 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1086 switch (opt)
1088 case 0:
1089 /* If getopt returns 0, then it has already processed a
1090 long-named option. We should do nothing. */
1091 break;
1093 case 1:
1094 /* This means that a file name has been seen. Record it. */
1095 argbuffer[current_arg].arg_type = at_filename;
1096 argbuffer[current_arg].what = optarg;
1097 len = strlen (optarg);
1098 if (whatlen_max < len)
1099 whatlen_max = len;
1100 ++current_arg;
1101 ++file_count;
1102 break;
1104 case STDIN:
1105 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1106 argbuffer[current_arg].arg_type = at_stdin;
1107 argbuffer[current_arg].what = optarg;
1108 len = strlen (optarg);
1109 if (whatlen_max < len)
1110 whatlen_max = len;
1111 ++current_arg;
1112 ++file_count;
1113 if (parsing_stdin)
1114 fatal ("cannot parse standard input more than once");
1115 parsing_stdin = true;
1116 break;
1118 /* Common options. */
1119 case 'a': append_to_tagfile = true; break;
1120 case 'C': cplusplus = true; break;
1121 case 'f': /* for compatibility with old makefiles */
1122 case 'o':
1123 if (tagfile)
1125 error ("-o option may only be given once.");
1126 suggest_asking_for_help ();
1127 /* NOTREACHED */
1129 tagfile = optarg;
1130 break;
1131 case 'I':
1132 case 'S': /* for backward compatibility */
1133 ignoreindent = true;
1134 break;
1135 case 'l':
1137 language *lang = get_language_from_langname (optarg);
1138 if (lang != NULL)
1140 argbuffer[current_arg].lang = lang;
1141 argbuffer[current_arg].arg_type = at_language;
1142 ++current_arg;
1145 break;
1146 case 'c':
1147 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1148 optarg = concat (optarg, "i", ""); /* memory leak here */
1149 /* FALLTHRU */
1150 case 'r':
1151 argbuffer[current_arg].arg_type = at_regexp;
1152 argbuffer[current_arg].what = optarg;
1153 len = strlen (optarg);
1154 if (whatlen_max < len)
1155 whatlen_max = len;
1156 ++current_arg;
1157 break;
1158 case 'R':
1159 argbuffer[current_arg].arg_type = at_regexp;
1160 argbuffer[current_arg].what = NULL;
1161 ++current_arg;
1162 break;
1163 case 'V':
1164 print_version ();
1165 break;
1166 case 'h':
1167 case 'H':
1168 help_asked = true;
1169 break;
1170 case 'Q':
1171 class_qualify = 1;
1172 break;
1174 /* Etags options */
1175 case 'D': constantypedefs = false; break;
1176 case 'i': included_files[nincluded_files++] = optarg; break;
1178 /* Ctags options. */
1179 case 'B': searchar = '?'; break;
1180 case 'd': constantypedefs = true; break;
1181 case 't': typedefs = true; break;
1182 case 'T': typedefs = typedefs_or_cplusplus = true; break;
1183 case 'u': update = true; break;
1184 case 'v': vgrind_style = true; /*FALLTHRU*/
1185 case 'x': cxref_style = true; break;
1186 case 'w': no_warnings = true; break;
1187 default:
1188 suggest_asking_for_help ();
1189 /* NOTREACHED */
1192 /* No more options. Store the rest of arguments. */
1193 for (; optind < argc; optind++)
1195 argbuffer[current_arg].arg_type = at_filename;
1196 argbuffer[current_arg].what = argv[optind];
1197 len = strlen (argv[optind]);
1198 if (whatlen_max < len)
1199 whatlen_max = len;
1200 ++current_arg;
1201 ++file_count;
1204 argbuffer[current_arg].arg_type = at_end;
1206 if (help_asked)
1207 print_help (argbuffer);
1208 /* NOTREACHED */
1210 if (nincluded_files == 0 && file_count == 0)
1212 error ("no input files specified.");
1213 suggest_asking_for_help ();
1214 /* NOTREACHED */
1217 if (tagfile == NULL)
1218 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1219 cwd = etags_getcwd (); /* the current working directory */
1220 if (cwd[strlen (cwd) - 1] != '/')
1222 char *oldcwd = cwd;
1223 cwd = concat (oldcwd, "/", "");
1224 free (oldcwd);
1227 /* Compute base directory for relative file names. */
1228 if (streq (tagfile, "-")
1229 || strneq (tagfile, "/dev/", 5))
1230 tagfiledir = cwd; /* relative file names are relative to cwd */
1231 else
1233 canonicalize_filename (tagfile);
1234 tagfiledir = absolute_dirname (tagfile, cwd);
1237 linebuffer_init (&lb);
1238 linebuffer_init (&filename_lb);
1239 linebuffer_init (&filebuf);
1240 linebuffer_init (&token_name);
1242 if (!CTAGS)
1244 if (streq (tagfile, "-"))
1246 tagf = stdout;
1247 SET_BINARY (fileno (stdout));
1249 else
1250 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1251 if (tagf == NULL)
1252 pfatal (tagfile);
1256 * Loop through files finding functions.
1258 for (i = 0; i < current_arg; i++)
1260 static language *lang; /* non-NULL if language is forced */
1261 char *this_file;
1263 switch (argbuffer[i].arg_type)
1265 case at_language:
1266 lang = argbuffer[i].lang;
1267 break;
1268 case at_regexp:
1269 analyze_regex (argbuffer[i].what);
1270 break;
1271 case at_filename:
1272 this_file = argbuffer[i].what;
1273 /* Input file named "-" means read file names from stdin
1274 (one per line) and use them. */
1275 if (streq (this_file, "-"))
1277 if (parsing_stdin)
1278 fatal ("cannot parse standard input "
1279 "AND read file names from it");
1280 while (readline_internal (&filename_lb, stdin, "-") > 0)
1281 process_file_name (filename_lb.buffer, lang);
1283 else
1284 process_file_name (this_file, lang);
1285 break;
1286 case at_stdin:
1287 this_file = argbuffer[i].what;
1288 process_file (stdin, this_file, lang);
1289 break;
1290 default:
1291 error ("internal error: arg_type");
1295 free_regexps ();
1296 free (lb.buffer);
1297 free (filebuf.buffer);
1298 free (token_name.buffer);
1300 if (!CTAGS || cxref_style)
1302 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1303 put_entries (nodehead);
1304 free_tree (nodehead);
1305 nodehead = NULL;
1306 if (!CTAGS)
1308 fdesc *fdp;
1310 /* Output file entries that have no tags. */
1311 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1312 if (!fdp->written)
1313 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1315 while (nincluded_files-- > 0)
1316 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1318 if (fclose (tagf) == EOF)
1319 pfatal (tagfile);
1322 exit (EXIT_SUCCESS);
1325 /* From here on, we are in (CTAGS && !cxref_style) */
1326 if (update)
1328 char *cmd =
1329 xmalloc (strlen (tagfile) + whatlen_max +
1330 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1331 for (i = 0; i < current_arg; ++i)
1333 switch (argbuffer[i].arg_type)
1335 case at_filename:
1336 case at_stdin:
1337 break;
1338 default:
1339 continue; /* the for loop */
1341 char *z = stpcpy (cmd, "mv ");
1342 z = stpcpy (z, tagfile);
1343 z = stpcpy (z, " OTAGS;fgrep -v '\t");
1344 z = stpcpy (z, argbuffer[i].what);
1345 z = stpcpy (z, "\t' OTAGS >");
1346 z = stpcpy (z, tagfile);
1347 strcpy (z, ";rm OTAGS");
1348 if (system (cmd) != EXIT_SUCCESS)
1349 fatal ("failed to execute shell command");
1351 free (cmd);
1352 append_to_tagfile = true;
1355 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1356 if (tagf == NULL)
1357 pfatal (tagfile);
1358 put_entries (nodehead); /* write all the tags (CTAGS) */
1359 free_tree (nodehead);
1360 nodehead = NULL;
1361 if (fclose (tagf) == EOF)
1362 pfatal (tagfile);
1364 if (CTAGS)
1365 if (append_to_tagfile || update)
1367 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1368 /* Maybe these should be used:
1369 setenv ("LC_COLLATE", "C", 1);
1370 setenv ("LC_ALL", "C", 1); */
1371 char *z = stpcpy (cmd, "sort -u -o ");
1372 z = stpcpy (z, tagfile);
1373 *z++ = ' ';
1374 strcpy (z, tagfile);
1375 exit (system (cmd));
1377 return EXIT_SUCCESS;
1382 * Return a compressor given the file name. If EXTPTR is non-zero,
1383 * return a pointer into FILE where the compressor-specific
1384 * extension begins. If no compressor is found, NULL is returned
1385 * and EXTPTR is not significant.
1386 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1388 static compressor *
1389 get_compressor_from_suffix (char *file, char **extptr)
1391 compressor *compr;
1392 char *slash, *suffix;
1394 /* File has been processed by canonicalize_filename,
1395 so we don't need to consider backslashes on DOS_NT. */
1396 slash = strrchr (file, '/');
1397 suffix = strrchr (file, '.');
1398 if (suffix == NULL || suffix < slash)
1399 return NULL;
1400 if (extptr != NULL)
1401 *extptr = suffix;
1402 suffix += 1;
1403 /* Let those poor souls who live with DOS 8+3 file name limits get
1404 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1405 Only the first do loop is run if not MSDOS */
1408 for (compr = compressors; compr->suffix != NULL; compr++)
1409 if (streq (compr->suffix, suffix))
1410 return compr;
1411 if (!MSDOS)
1412 break; /* do it only once: not really a loop */
1413 if (extptr != NULL)
1414 *extptr = ++suffix;
1415 } while (*suffix != '\0');
1416 return NULL;
1422 * Return a language given the name.
1424 static language *
1425 get_language_from_langname (const char *name)
1427 language *lang;
1429 if (name == NULL)
1430 error ("empty language name");
1431 else
1433 for (lang = lang_names; lang->name != NULL; lang++)
1434 if (streq (name, lang->name))
1435 return lang;
1436 error ("unknown language \"%s\"", name);
1439 return NULL;
1444 * Return a language given the interpreter name.
1446 static language *
1447 get_language_from_interpreter (char *interpreter)
1449 language *lang;
1450 const char **iname;
1452 if (interpreter == NULL)
1453 return NULL;
1454 for (lang = lang_names; lang->name != NULL; lang++)
1455 if (lang->interpreters != NULL)
1456 for (iname = lang->interpreters; *iname != NULL; iname++)
1457 if (streq (*iname, interpreter))
1458 return lang;
1460 return NULL;
1466 * Return a language given the file name.
1468 static language *
1469 get_language_from_filename (char *file, int case_sensitive)
1471 language *lang;
1472 const char **name, **ext, *suffix;
1474 /* Try whole file name first. */
1475 for (lang = lang_names; lang->name != NULL; lang++)
1476 if (lang->filenames != NULL)
1477 for (name = lang->filenames; *name != NULL; name++)
1478 if ((case_sensitive)
1479 ? streq (*name, file)
1480 : strcaseeq (*name, file))
1481 return lang;
1483 /* If not found, try suffix after last dot. */
1484 suffix = strrchr (file, '.');
1485 if (suffix == NULL)
1486 return NULL;
1487 suffix += 1;
1488 for (lang = lang_names; lang->name != NULL; lang++)
1489 if (lang->suffixes != NULL)
1490 for (ext = lang->suffixes; *ext != NULL; ext++)
1491 if ((case_sensitive)
1492 ? streq (*ext, suffix)
1493 : strcaseeq (*ext, suffix))
1494 return lang;
1495 return NULL;
1500 * This routine is called on each file argument.
1502 static void
1503 process_file_name (char *file, language *lang)
1505 FILE *inf;
1506 fdesc *fdp;
1507 compressor *compr;
1508 char *compressed_name, *uncompressed_name;
1509 char *ext, *real_name, *tmp_name;
1510 int retval;
1512 canonicalize_filename (file);
1513 if (streq (file, tagfile) && !streq (tagfile, "-"))
1515 error ("skipping inclusion of %s in self.", file);
1516 return;
1518 compr = get_compressor_from_suffix (file, &ext);
1519 if (compr)
1521 compressed_name = file;
1522 uncompressed_name = savenstr (file, ext - file);
1524 else
1526 compressed_name = NULL;
1527 uncompressed_name = file;
1530 /* If the canonicalized uncompressed name
1531 has already been dealt with, skip it silently. */
1532 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1534 assert (fdp->infname != NULL);
1535 if (streq (uncompressed_name, fdp->infname))
1536 goto cleanup;
1539 inf = fopen (file, "r" FOPEN_BINARY);
1540 if (inf)
1541 real_name = file;
1542 else
1544 int file_errno = errno;
1545 if (compressed_name)
1547 /* Try with the given suffix. */
1548 inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1549 if (inf)
1550 real_name = uncompressed_name;
1552 else
1554 /* Try all possible suffixes. */
1555 for (compr = compressors; compr->suffix != NULL; compr++)
1557 compressed_name = concat (file, ".", compr->suffix);
1558 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1559 if (inf)
1561 real_name = compressed_name;
1562 break;
1564 if (MSDOS)
1566 char *suf = compressed_name + strlen (file);
1567 size_t suflen = strlen (compr->suffix) + 1;
1568 for ( ; suf[1]; suf++, suflen--)
1570 memmove (suf, suf + 1, suflen);
1571 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1572 if (inf)
1574 real_name = compressed_name;
1575 break;
1578 if (inf)
1579 break;
1581 free (compressed_name);
1582 compressed_name = NULL;
1585 if (! inf)
1587 errno = file_errno;
1588 perror (file);
1589 goto cleanup;
1593 if (real_name == compressed_name)
1595 fclose (inf);
1596 tmp_name = etags_mktmp ();
1597 if (!tmp_name)
1598 inf = NULL;
1599 else
1601 #if MSDOS || defined (DOS_NT)
1602 char *cmd1 = concat (compr->command, " \"", real_name);
1603 char *cmd = concat (cmd1, "\" > ", tmp_name);
1604 #else
1605 char *cmd1 = concat (compr->command, " '", real_name);
1606 char *cmd = concat (cmd1, "' > ", tmp_name);
1607 #endif
1608 free (cmd1);
1609 int tmp_errno;
1610 if (system (cmd) == -1)
1612 inf = NULL;
1613 tmp_errno = EINVAL;
1615 else
1617 inf = fopen (tmp_name, "r" FOPEN_BINARY);
1618 tmp_errno = errno;
1620 free (cmd);
1621 errno = tmp_errno;
1624 if (!inf)
1626 perror (real_name);
1627 goto cleanup;
1631 process_file (inf, uncompressed_name, lang);
1633 retval = fclose (inf);
1634 if (real_name == compressed_name)
1636 remove (tmp_name);
1637 free (tmp_name);
1639 if (retval < 0)
1640 pfatal (file);
1642 cleanup:
1643 if (compressed_name != file)
1644 free (compressed_name);
1645 if (uncompressed_name != file)
1646 free (uncompressed_name);
1647 last_node = NULL;
1648 curfdp = NULL;
1649 return;
1652 static void
1653 process_file (FILE *fh, char *fn, language *lang)
1655 static const fdesc emptyfdesc;
1656 fdesc *fdp;
1658 infilename = fn;
1659 /* Create a new input file description entry. */
1660 fdp = xnew (1, fdesc);
1661 *fdp = emptyfdesc;
1662 fdp->next = fdhead;
1663 fdp->infname = savestr (fn);
1664 fdp->lang = lang;
1665 fdp->infabsname = absolute_filename (fn, cwd);
1666 fdp->infabsdir = absolute_dirname (fn, cwd);
1667 if (filename_is_absolute (fn))
1669 /* An absolute file name. Canonicalize it. */
1670 fdp->taggedfname = absolute_filename (fn, NULL);
1672 else
1674 /* A file name relative to cwd. Make it relative
1675 to the directory of the tags file. */
1676 fdp->taggedfname = relative_filename (fn, tagfiledir);
1678 fdp->usecharno = true; /* use char position when making tags */
1679 fdp->prop = NULL;
1680 fdp->written = false; /* not written on tags file yet */
1682 fdhead = fdp;
1683 curfdp = fdhead; /* the current file description */
1685 find_entries (fh);
1687 /* If not Ctags, and if this is not metasource and if it contained no #line
1688 directives, we can write the tags and free all nodes pointing to
1689 curfdp. */
1690 if (!CTAGS
1691 && curfdp->usecharno /* no #line directives in this file */
1692 && !curfdp->lang->metasource)
1694 node *np, *prev;
1696 /* Look for the head of the sublist relative to this file. See add_node
1697 for the structure of the node tree. */
1698 prev = NULL;
1699 for (np = nodehead; np != NULL; prev = np, np = np->left)
1700 if (np->fdp == curfdp)
1701 break;
1703 /* If we generated tags for this file, write and delete them. */
1704 if (np != NULL)
1706 /* This is the head of the last sublist, if any. The following
1707 instructions depend on this being true. */
1708 assert (np->left == NULL);
1710 assert (fdhead == curfdp);
1711 assert (last_node->fdp == curfdp);
1712 put_entries (np); /* write tags for file curfdp->taggedfname */
1713 free_tree (np); /* remove the written nodes */
1714 if (prev == NULL)
1715 nodehead = NULL; /* no nodes left */
1716 else
1717 prev->left = NULL; /* delete the pointer to the sublist */
1722 static void
1723 reset_input (FILE *inf)
1725 if (fseek (inf, 0, SEEK_SET) != 0)
1726 perror (infilename);
1730 * This routine opens the specified file and calls the function
1731 * which finds the function and type definitions.
1733 static void
1734 find_entries (FILE *inf)
1736 char *cp;
1737 language *lang = curfdp->lang;
1738 Lang_function *parser = NULL;
1740 /* If user specified a language, use it. */
1741 if (lang != NULL && lang->function != NULL)
1743 parser = lang->function;
1746 /* Else try to guess the language given the file name. */
1747 if (parser == NULL)
1749 lang = get_language_from_filename (curfdp->infname, true);
1750 if (lang != NULL && lang->function != NULL)
1752 curfdp->lang = lang;
1753 parser = lang->function;
1757 /* Else look for sharp-bang as the first two characters. */
1758 if (parser == NULL
1759 && readline_internal (&lb, inf, infilename) > 0
1760 && lb.len >= 2
1761 && lb.buffer[0] == '#'
1762 && lb.buffer[1] == '!')
1764 char *lp;
1766 /* Set lp to point at the first char after the last slash in the
1767 line or, if no slashes, at the first nonblank. Then set cp to
1768 the first successive blank and terminate the string. */
1769 lp = strrchr (lb.buffer+2, '/');
1770 if (lp != NULL)
1771 lp += 1;
1772 else
1773 lp = skip_spaces (lb.buffer + 2);
1774 cp = skip_non_spaces (lp);
1775 *cp = '\0';
1777 if (strlen (lp) > 0)
1779 lang = get_language_from_interpreter (lp);
1780 if (lang != NULL && lang->function != NULL)
1782 curfdp->lang = lang;
1783 parser = lang->function;
1788 reset_input (inf);
1790 /* Else try to guess the language given the case insensitive file name. */
1791 if (parser == NULL)
1793 lang = get_language_from_filename (curfdp->infname, false);
1794 if (lang != NULL && lang->function != NULL)
1796 curfdp->lang = lang;
1797 parser = lang->function;
1801 /* Else try Fortran or C. */
1802 if (parser == NULL)
1804 node *old_last_node = last_node;
1806 curfdp->lang = get_language_from_langname ("fortran");
1807 find_entries (inf);
1809 if (old_last_node == last_node)
1810 /* No Fortran entries found. Try C. */
1812 reset_input (inf);
1813 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1814 find_entries (inf);
1816 return;
1819 if (!no_line_directive
1820 && curfdp->lang != NULL && curfdp->lang->metasource)
1821 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1822 file, or anyway we parsed a file that is automatically generated from
1823 this one. If this is the case, the bingo.c file contained #line
1824 directives that generated tags pointing to this file. Let's delete
1825 them all before parsing this file, which is the real source. */
1827 fdesc **fdpp = &fdhead;
1828 while (*fdpp != NULL)
1829 if (*fdpp != curfdp
1830 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1831 /* We found one of those! We must delete both the file description
1832 and all tags referring to it. */
1834 fdesc *badfdp = *fdpp;
1836 /* Delete the tags referring to badfdp->taggedfname
1837 that were obtained from badfdp->infname. */
1838 invalidate_nodes (badfdp, &nodehead);
1840 *fdpp = badfdp->next; /* remove the bad description from the list */
1841 free_fdesc (badfdp);
1843 else
1844 fdpp = &(*fdpp)->next; /* advance the list pointer */
1847 assert (parser != NULL);
1849 /* Generic initializations before reading from file. */
1850 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1852 /* Generic initializations before parsing file with readline. */
1853 lineno = 0; /* reset global line number */
1854 charno = 0; /* reset global char number */
1855 linecharno = 0; /* reset global char number of line start */
1857 parser (inf);
1859 regex_tag_multiline ();
1864 * Check whether an implicitly named tag should be created,
1865 * then call `pfnote'.
1866 * NAME is a string that is internally copied by this function.
1868 * TAGS format specification
1869 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1870 * The following is explained in some more detail in etc/ETAGS.EBNF.
1872 * make_tag creates tags with "implicit tag names" (unnamed tags)
1873 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1874 * 1. NAME does not contain any of the characters in NONAM;
1875 * 2. LINESTART contains name as either a rightmost, or rightmost but
1876 * one character, substring;
1877 * 3. the character, if any, immediately before NAME in LINESTART must
1878 * be a character in NONAM;
1879 * 4. the character, if any, immediately after NAME in LINESTART must
1880 * also be a character in NONAM.
1882 * The implementation uses the notinname() macro, which recognizes the
1883 * characters stored in the string `nonam'.
1884 * etags.el needs to use the same characters that are in NONAM.
1886 static void
1887 make_tag (const char *name, /* tag name, or NULL if unnamed */
1888 int namelen, /* tag length */
1889 bool is_func, /* tag is a function */
1890 char *linestart, /* start of the line where tag is */
1891 int linelen, /* length of the line where tag is */
1892 int lno, /* line number */
1893 long int cno) /* character number */
1895 bool named = (name != NULL && namelen > 0);
1896 char *nname = NULL;
1898 if (!CTAGS && named) /* maybe set named to false */
1899 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1900 such that etags.el can guess a name from it. */
1902 int i;
1903 register const char *cp = name;
1905 for (i = 0; i < namelen; i++)
1906 if (notinname (*cp++))
1907 break;
1908 if (i == namelen) /* rule #1 */
1910 cp = linestart + linelen - namelen;
1911 if (notinname (linestart[linelen-1]))
1912 cp -= 1; /* rule #4 */
1913 if (cp >= linestart /* rule #2 */
1914 && (cp == linestart
1915 || notinname (cp[-1])) /* rule #3 */
1916 && strneq (name, cp, namelen)) /* rule #2 */
1917 named = false; /* use implicit tag name */
1921 if (named)
1922 nname = savenstr (name, namelen);
1924 pfnote (nname, is_func, linestart, linelen, lno, cno);
1927 /* Record a tag. */
1928 static void
1929 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1930 long int cno)
1931 /* tag name, or NULL if unnamed */
1932 /* tag is a function */
1933 /* start of the line where tag is */
1934 /* length of the line where tag is */
1935 /* line number */
1936 /* character number */
1938 register node *np;
1940 assert (name == NULL || name[0] != '\0');
1941 if (CTAGS && name == NULL)
1942 return;
1944 np = xnew (1, node);
1946 /* If ctags mode, change name "main" to M<thisfilename>. */
1947 if (CTAGS && !cxref_style && streq (name, "main"))
1949 char *fp = strrchr (curfdp->taggedfname, '/');
1950 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1951 fp = strrchr (np->name, '.');
1952 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1953 fp[0] = '\0';
1955 else
1956 np->name = name;
1957 np->valid = true;
1958 np->been_warned = false;
1959 np->fdp = curfdp;
1960 np->is_func = is_func;
1961 np->lno = lno;
1962 if (np->fdp->usecharno)
1963 /* Our char numbers are 0-base, because of C language tradition?
1964 ctags compatibility? old versions compatibility? I don't know.
1965 Anyway, since emacs's are 1-base we expect etags.el to take care
1966 of the difference. If we wanted to have 1-based numbers, we would
1967 uncomment the +1 below. */
1968 np->cno = cno /* + 1 */ ;
1969 else
1970 np->cno = invalidcharno;
1971 np->left = np->right = NULL;
1972 if (CTAGS && !cxref_style)
1974 if (strlen (linestart) < 50)
1975 np->regex = concat (linestart, "$", "");
1976 else
1977 np->regex = savenstr (linestart, 50);
1979 else
1980 np->regex = savenstr (linestart, linelen);
1982 add_node (np, &nodehead);
1986 * free_tree ()
1987 * recurse on left children, iterate on right children.
1989 static void
1990 free_tree (register node *np)
1992 while (np)
1994 register node *node_right = np->right;
1995 free_tree (np->left);
1996 free (np->name);
1997 free (np->regex);
1998 free (np);
1999 np = node_right;
2004 * free_fdesc ()
2005 * delete a file description
2007 static void
2008 free_fdesc (register fdesc *fdp)
2010 free (fdp->infname);
2011 free (fdp->infabsname);
2012 free (fdp->infabsdir);
2013 free (fdp->taggedfname);
2014 free (fdp->prop);
2015 free (fdp);
2019 * add_node ()
2020 * Adds a node to the tree of nodes. In etags mode, sort by file
2021 * name. In ctags mode, sort by tag name. Make no attempt at
2022 * balancing.
2024 * add_node is the only function allowed to add nodes, so it can
2025 * maintain state.
2027 static void
2028 add_node (node *np, node **cur_node_p)
2030 register int dif;
2031 register node *cur_node = *cur_node_p;
2033 if (cur_node == NULL)
2035 *cur_node_p = np;
2036 last_node = np;
2037 return;
2040 if (!CTAGS)
2041 /* Etags Mode */
2043 /* For each file name, tags are in a linked sublist on the right
2044 pointer. The first tags of different files are a linked list
2045 on the left pointer. last_node points to the end of the last
2046 used sublist. */
2047 if (last_node != NULL && last_node->fdp == np->fdp)
2049 /* Let's use the same sublist as the last added node. */
2050 assert (last_node->right == NULL);
2051 last_node->right = np;
2052 last_node = np;
2054 else if (cur_node->fdp == np->fdp)
2056 /* Scanning the list we found the head of a sublist which is
2057 good for us. Let's scan this sublist. */
2058 add_node (np, &cur_node->right);
2060 else
2061 /* The head of this sublist is not good for us. Let's try the
2062 next one. */
2063 add_node (np, &cur_node->left);
2064 } /* if ETAGS mode */
2066 else
2068 /* Ctags Mode */
2069 dif = strcmp (np->name, cur_node->name);
2072 * If this tag name matches an existing one, then
2073 * do not add the node, but maybe print a warning.
2075 if (no_duplicates && !dif)
2077 if (np->fdp == cur_node->fdp)
2079 if (!no_warnings)
2081 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2082 np->fdp->infname, lineno, np->name);
2083 fprintf (stderr, "Second entry ignored\n");
2086 else if (!cur_node->been_warned && !no_warnings)
2088 fprintf
2089 (stderr,
2090 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2091 np->fdp->infname, cur_node->fdp->infname, np->name);
2092 cur_node->been_warned = true;
2094 return;
2097 /* Actually add the node */
2098 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2099 } /* if CTAGS mode */
2103 * invalidate_nodes ()
2104 * Scan the node tree and invalidate all nodes pointing to the
2105 * given file description (CTAGS case) or free them (ETAGS case).
2107 static void
2108 invalidate_nodes (fdesc *badfdp, node **npp)
2110 node *np = *npp;
2112 if (np == NULL)
2113 return;
2115 if (CTAGS)
2117 if (np->left != NULL)
2118 invalidate_nodes (badfdp, &np->left);
2119 if (np->fdp == badfdp)
2120 np->valid = false;
2121 if (np->right != NULL)
2122 invalidate_nodes (badfdp, &np->right);
2124 else
2126 assert (np->fdp != NULL);
2127 if (np->fdp == badfdp)
2129 *npp = np->left; /* detach the sublist from the list */
2130 np->left = NULL; /* isolate it */
2131 free_tree (np); /* free it */
2132 invalidate_nodes (badfdp, npp);
2134 else
2135 invalidate_nodes (badfdp, &np->left);
2140 static int total_size_of_entries (node *);
2141 static int number_len (long) ATTRIBUTE_CONST;
2143 /* Length of a non-negative number's decimal representation. */
2144 static int
2145 number_len (long int num)
2147 int len = 1;
2148 while ((num /= 10) > 0)
2149 len += 1;
2150 return len;
2154 * Return total number of characters that put_entries will output for
2155 * the nodes in the linked list at the right of the specified node.
2156 * This count is irrelevant with etags.el since emacs 19.34 at least,
2157 * but is still supplied for backward compatibility.
2159 static int
2160 total_size_of_entries (register node *np)
2162 register int total = 0;
2164 for (; np != NULL; np = np->right)
2165 if (np->valid)
2167 total += strlen (np->regex) + 1; /* pat\177 */
2168 if (np->name != NULL)
2169 total += strlen (np->name) + 1; /* name\001 */
2170 total += number_len ((long) np->lno) + 1; /* lno, */
2171 if (np->cno != invalidcharno) /* cno */
2172 total += number_len (np->cno);
2173 total += 1; /* newline */
2176 return total;
2179 static void
2180 put_entries (register node *np)
2182 register char *sp;
2183 static fdesc *fdp = NULL;
2185 if (np == NULL)
2186 return;
2188 /* Output subentries that precede this one */
2189 if (CTAGS)
2190 put_entries (np->left);
2192 /* Output this entry */
2193 if (np->valid)
2195 if (!CTAGS)
2197 /* Etags mode */
2198 if (fdp != np->fdp)
2200 fdp = np->fdp;
2201 fprintf (tagf, "\f\n%s,%d\n",
2202 fdp->taggedfname, total_size_of_entries (np));
2203 fdp->written = true;
2205 fputs (np->regex, tagf);
2206 fputc ('\177', tagf);
2207 if (np->name != NULL)
2209 fputs (np->name, tagf);
2210 fputc ('\001', tagf);
2212 fprintf (tagf, "%d,", np->lno);
2213 if (np->cno != invalidcharno)
2214 fprintf (tagf, "%ld", np->cno);
2215 fputs ("\n", tagf);
2217 else
2219 /* Ctags mode */
2220 if (np->name == NULL)
2221 error ("internal error: NULL name in ctags mode.");
2223 if (cxref_style)
2225 if (vgrind_style)
2226 fprintf (stdout, "%s %s %d\n",
2227 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2228 else
2229 fprintf (stdout, "%-16s %3d %-16s %s\n",
2230 np->name, np->lno, np->fdp->taggedfname, np->regex);
2232 else
2234 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2236 if (np->is_func)
2237 { /* function or #define macro with args */
2238 putc (searchar, tagf);
2239 putc ('^', tagf);
2241 for (sp = np->regex; *sp; sp++)
2243 if (*sp == '\\' || *sp == searchar)
2244 putc ('\\', tagf);
2245 putc (*sp, tagf);
2247 putc (searchar, tagf);
2249 else
2250 { /* anything else; text pattern inadequate */
2251 fprintf (tagf, "%d", np->lno);
2253 putc ('\n', tagf);
2256 } /* if this node contains a valid tag */
2258 /* Output subentries that follow this one */
2259 put_entries (np->right);
2260 if (!CTAGS)
2261 put_entries (np->left);
2265 /* C extensions. */
2266 #define C_EXT 0x00fff /* C extensions */
2267 #define C_PLAIN 0x00000 /* C */
2268 #define C_PLPL 0x00001 /* C++ */
2269 #define C_STAR 0x00003 /* C* */
2270 #define C_JAVA 0x00005 /* JAVA */
2271 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2272 #define YACC 0x10000 /* yacc file */
2275 * The C symbol tables.
2277 enum sym_type
2279 st_none,
2280 st_C_objprot, st_C_objimpl, st_C_objend,
2281 st_C_gnumacro,
2282 st_C_ignore, st_C_attribute,
2283 st_C_javastruct,
2284 st_C_operator,
2285 st_C_class, st_C_template,
2286 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2289 /* Feed stuff between (but not including) %[ and %] lines to:
2290 gperf -m 5
2292 %compare-strncmp
2293 %enum
2294 %struct-type
2295 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2297 if, 0, st_C_ignore
2298 for, 0, st_C_ignore
2299 while, 0, st_C_ignore
2300 switch, 0, st_C_ignore
2301 return, 0, st_C_ignore
2302 __attribute__, 0, st_C_attribute
2303 GTY, 0, st_C_attribute
2304 @interface, 0, st_C_objprot
2305 @protocol, 0, st_C_objprot
2306 @implementation,0, st_C_objimpl
2307 @end, 0, st_C_objend
2308 import, (C_JAVA & ~C_PLPL), st_C_ignore
2309 package, (C_JAVA & ~C_PLPL), st_C_ignore
2310 friend, C_PLPL, st_C_ignore
2311 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2312 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2313 interface, (C_JAVA & ~C_PLPL), st_C_struct
2314 class, 0, st_C_class
2315 namespace, C_PLPL, st_C_struct
2316 domain, C_STAR, st_C_struct
2317 union, 0, st_C_struct
2318 struct, 0, st_C_struct
2319 extern, 0, st_C_extern
2320 enum, 0, st_C_enum
2321 typedef, 0, st_C_typedef
2322 define, 0, st_C_define
2323 undef, 0, st_C_define
2324 operator, C_PLPL, st_C_operator
2325 template, 0, st_C_template
2326 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2327 DEFUN, 0, st_C_gnumacro
2328 SYSCALL, 0, st_C_gnumacro
2329 ENTRY, 0, st_C_gnumacro
2330 PSEUDO, 0, st_C_gnumacro
2331 # These are defined inside C functions, so currently they are not met.
2332 # EXFUN used in glibc, DEFVAR_* in emacs.
2333 #EXFUN, 0, st_C_gnumacro
2334 #DEFVAR_, 0, st_C_gnumacro
2336 and replace lines between %< and %> with its output, then:
2337 - remove the #if characterset check
2338 - make in_word_set static and not inline. */
2339 /*%<*/
2340 /* C code produced by gperf version 3.0.1 */
2341 /* Command-line: gperf -m 5 */
2342 /* Computed positions: -k'2-3' */
2344 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2345 /* maximum key range = 33, duplicates = 0 */
2347 static int
2348 hash (const char *str, int len)
2350 static char const asso_values[] =
2352 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2353 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2354 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2355 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2356 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2357 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2358 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2359 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2360 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2361 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2362 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2363 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2364 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2365 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2366 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2367 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2368 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2369 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2370 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2371 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2372 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2373 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2374 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2375 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2376 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2377 35, 35, 35, 35, 35, 35
2379 int hval = len;
2381 switch (hval)
2383 default:
2384 hval += asso_values[(unsigned char) str[2]];
2385 /*FALLTHROUGH*/
2386 case 2:
2387 hval += asso_values[(unsigned char) str[1]];
2388 break;
2390 return hval;
2393 static struct C_stab_entry *
2394 in_word_set (register const char *str, register unsigned int len)
2396 enum
2398 TOTAL_KEYWORDS = 33,
2399 MIN_WORD_LENGTH = 2,
2400 MAX_WORD_LENGTH = 15,
2401 MIN_HASH_VALUE = 2,
2402 MAX_HASH_VALUE = 34
2405 static struct C_stab_entry wordlist[] =
2407 {""}, {""},
2408 {"if", 0, st_C_ignore},
2409 {"GTY", 0, st_C_attribute},
2410 {"@end", 0, st_C_objend},
2411 {"union", 0, st_C_struct},
2412 {"define", 0, st_C_define},
2413 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2414 {"template", 0, st_C_template},
2415 {"operator", C_PLPL, st_C_operator},
2416 {"@interface", 0, st_C_objprot},
2417 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2418 {"friend", C_PLPL, st_C_ignore},
2419 {"typedef", 0, st_C_typedef},
2420 {"return", 0, st_C_ignore},
2421 {"@implementation",0, st_C_objimpl},
2422 {"@protocol", 0, st_C_objprot},
2423 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2424 {"extern", 0, st_C_extern},
2425 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2426 {"struct", 0, st_C_struct},
2427 {"domain", C_STAR, st_C_struct},
2428 {"switch", 0, st_C_ignore},
2429 {"enum", 0, st_C_enum},
2430 {"for", 0, st_C_ignore},
2431 {"namespace", C_PLPL, st_C_struct},
2432 {"class", 0, st_C_class},
2433 {"while", 0, st_C_ignore},
2434 {"undef", 0, st_C_define},
2435 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2436 {"__attribute__", 0, st_C_attribute},
2437 {"SYSCALL", 0, st_C_gnumacro},
2438 {"ENTRY", 0, st_C_gnumacro},
2439 {"PSEUDO", 0, st_C_gnumacro},
2440 {"DEFUN", 0, st_C_gnumacro}
2443 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2445 int key = hash (str, len);
2447 if (key <= MAX_HASH_VALUE && key >= 0)
2449 const char *s = wordlist[key].name;
2451 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2452 return &wordlist[key];
2455 return 0;
2457 /*%>*/
2459 static enum sym_type
2460 C_symtype (char *str, int len, int c_ext)
2462 register struct C_stab_entry *se = in_word_set (str, len);
2464 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2465 return st_none;
2466 return se->type;
2471 * Ignoring __attribute__ ((list))
2473 static bool inattribute; /* looking at an __attribute__ construct */
2476 * C functions and variables are recognized using a simple
2477 * finite automaton. fvdef is its state variable.
2479 static enum
2481 fvnone, /* nothing seen */
2482 fdefunkey, /* Emacs DEFUN keyword seen */
2483 fdefunname, /* Emacs DEFUN name seen */
2484 foperator, /* func: operator keyword seen (cplpl) */
2485 fvnameseen, /* function or variable name seen */
2486 fstartlist, /* func: just after open parenthesis */
2487 finlist, /* func: in parameter list */
2488 flistseen, /* func: after parameter list */
2489 fignore, /* func: before open brace */
2490 vignore /* var-like: ignore until ';' */
2491 } fvdef;
2493 static bool fvextern; /* func or var: extern keyword seen; */
2496 * typedefs are recognized using a simple finite automaton.
2497 * typdef is its state variable.
2499 static enum
2501 tnone, /* nothing seen */
2502 tkeyseen, /* typedef keyword seen */
2503 ttypeseen, /* defined type seen */
2504 tinbody, /* inside typedef body */
2505 tend, /* just before typedef tag */
2506 tignore /* junk after typedef tag */
2507 } typdef;
2510 * struct-like structures (enum, struct and union) are recognized
2511 * using another simple finite automaton. `structdef' is its state
2512 * variable.
2514 static enum
2516 snone, /* nothing seen yet,
2517 or in struct body if bracelev > 0 */
2518 skeyseen, /* struct-like keyword seen */
2519 stagseen, /* struct-like tag seen */
2520 scolonseen /* colon seen after struct-like tag */
2521 } structdef;
2524 * When objdef is different from onone, objtag is the name of the class.
2526 static const char *objtag = "<uninited>";
2529 * Yet another little state machine to deal with preprocessor lines.
2531 static enum
2533 dnone, /* nothing seen */
2534 dsharpseen, /* '#' seen as first char on line */
2535 ddefineseen, /* '#' and 'define' seen */
2536 dignorerest /* ignore rest of line */
2537 } definedef;
2540 * State machine for Objective C protocols and implementations.
2541 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2543 static enum
2545 onone, /* nothing seen */
2546 oprotocol, /* @interface or @protocol seen */
2547 oimplementation, /* @implementations seen */
2548 otagseen, /* class name seen */
2549 oparenseen, /* parenthesis before category seen */
2550 ocatseen, /* category name seen */
2551 oinbody, /* in @implementation body */
2552 omethodsign, /* in @implementation body, after +/- */
2553 omethodtag, /* after method name */
2554 omethodcolon, /* after method colon */
2555 omethodparm, /* after method parameter */
2556 oignore /* wait for @end */
2557 } objdef;
2561 * Use this structure to keep info about the token read, and how it
2562 * should be tagged. Used by the make_C_tag function to build a tag.
2564 static struct tok
2566 char *line; /* string containing the token */
2567 int offset; /* where the token starts in LINE */
2568 int length; /* token length */
2570 The previous members can be used to pass strings around for generic
2571 purposes. The following ones specifically refer to creating tags. In this
2572 case the token contained here is the pattern that will be used to create a
2573 tag.
2575 bool valid; /* do not create a tag; the token should be
2576 invalidated whenever a state machine is
2577 reset prematurely */
2578 bool named; /* create a named tag */
2579 int lineno; /* source line number of tag */
2580 long linepos; /* source char number of tag */
2581 } token; /* latest token read */
2584 * Variables and functions for dealing with nested structures.
2585 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2587 static void pushclass_above (int, char *, int);
2588 static void popclass_above (int);
2589 static void write_classname (linebuffer *, const char *qualifier);
2591 static struct {
2592 char **cname; /* nested class names */
2593 int *bracelev; /* nested class brace level */
2594 int nl; /* class nesting level (elements used) */
2595 int size; /* length of the array */
2596 } cstack; /* stack for nested declaration tags */
2597 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2598 #define nestlev (cstack.nl)
2599 /* After struct keyword or in struct body, not inside a nested function. */
2600 #define instruct (structdef == snone && nestlev > 0 \
2601 && bracelev == cstack.bracelev[nestlev-1] + 1)
2603 static void
2604 pushclass_above (int bracelev, char *str, int len)
2606 int nl;
2608 popclass_above (bracelev);
2609 nl = cstack.nl;
2610 if (nl >= cstack.size)
2612 int size = cstack.size *= 2;
2613 xrnew (cstack.cname, size, char *);
2614 xrnew (cstack.bracelev, size, int);
2616 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2617 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2618 cstack.bracelev[nl] = bracelev;
2619 cstack.nl = nl + 1;
2622 static void
2623 popclass_above (int bracelev)
2625 int nl;
2627 for (nl = cstack.nl - 1;
2628 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2629 nl--)
2631 free (cstack.cname[nl]);
2632 cstack.nl = nl;
2636 static void
2637 write_classname (linebuffer *cn, const char *qualifier)
2639 int i, len;
2640 int qlen = strlen (qualifier);
2642 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2644 len = 0;
2645 cn->len = 0;
2646 cn->buffer[0] = '\0';
2648 else
2650 len = strlen (cstack.cname[0]);
2651 linebuffer_setlen (cn, len);
2652 strcpy (cn->buffer, cstack.cname[0]);
2654 for (i = 1; i < cstack.nl; i++)
2656 char *s = cstack.cname[i];
2657 if (s == NULL)
2658 continue;
2659 linebuffer_setlen (cn, len + qlen + strlen (s));
2660 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2665 static bool consider_token (char *, int, int, int *, int, int, bool *);
2666 static void make_C_tag (bool);
2669 * consider_token ()
2670 * checks to see if the current token is at the start of a
2671 * function or variable, or corresponds to a typedef, or
2672 * is a struct/union/enum tag, or #define, or an enum constant.
2674 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2675 * with args. C_EXTP points to which language we are looking at.
2677 * Globals
2678 * fvdef IN OUT
2679 * structdef IN OUT
2680 * definedef IN OUT
2681 * typdef IN OUT
2682 * objdef IN OUT
2685 static bool
2686 consider_token (char *str, int len, int c, int *c_extp,
2687 int bracelev, int parlev, bool *is_func_or_var)
2688 /* IN: token pointer */
2689 /* IN: token length */
2690 /* IN: first char after the token */
2691 /* IN, OUT: C extensions mask */
2692 /* IN: brace level */
2693 /* IN: parenthesis level */
2694 /* OUT: function or variable found */
2696 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2697 structtype is the type of the preceding struct-like keyword, and
2698 structbracelev is the brace level where it has been seen. */
2699 static enum sym_type structtype;
2700 static int structbracelev;
2701 static enum sym_type toktype;
2704 toktype = C_symtype (str, len, *c_extp);
2707 * Skip __attribute__
2709 if (toktype == st_C_attribute)
2711 inattribute = true;
2712 return false;
2716 * Advance the definedef state machine.
2718 switch (definedef)
2720 case dnone:
2721 /* We're not on a preprocessor line. */
2722 if (toktype == st_C_gnumacro)
2724 fvdef = fdefunkey;
2725 return false;
2727 break;
2728 case dsharpseen:
2729 if (toktype == st_C_define)
2731 definedef = ddefineseen;
2733 else
2735 definedef = dignorerest;
2737 return false;
2738 case ddefineseen:
2740 * Make a tag for any macro, unless it is a constant
2741 * and constantypedefs is false.
2743 definedef = dignorerest;
2744 *is_func_or_var = (c == '(');
2745 if (!*is_func_or_var && !constantypedefs)
2746 return false;
2747 else
2748 return true;
2749 case dignorerest:
2750 return false;
2751 default:
2752 error ("internal error: definedef value.");
2756 * Now typedefs
2758 switch (typdef)
2760 case tnone:
2761 if (toktype == st_C_typedef)
2763 if (typedefs)
2764 typdef = tkeyseen;
2765 fvextern = false;
2766 fvdef = fvnone;
2767 return false;
2769 break;
2770 case tkeyseen:
2771 switch (toktype)
2773 case st_none:
2774 case st_C_class:
2775 case st_C_struct:
2776 case st_C_enum:
2777 typdef = ttypeseen;
2778 break;
2779 default:
2780 break;
2782 break;
2783 case ttypeseen:
2784 if (structdef == snone && fvdef == fvnone)
2786 fvdef = fvnameseen;
2787 return true;
2789 break;
2790 case tend:
2791 switch (toktype)
2793 case st_C_class:
2794 case st_C_struct:
2795 case st_C_enum:
2796 return false;
2797 default:
2798 return true;
2800 default:
2801 break;
2804 switch (toktype)
2806 case st_C_javastruct:
2807 if (structdef == stagseen)
2808 structdef = scolonseen;
2809 return false;
2810 case st_C_template:
2811 case st_C_class:
2812 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2813 && bracelev == 0
2814 && definedef == dnone && structdef == snone
2815 && typdef == tnone && fvdef == fvnone)
2816 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2817 if (toktype == st_C_template)
2818 break;
2819 /* FALLTHRU */
2820 case st_C_struct:
2821 case st_C_enum:
2822 if (parlev == 0
2823 && fvdef != vignore
2824 && (typdef == tkeyseen
2825 || (typedefs_or_cplusplus && structdef == snone)))
2827 structdef = skeyseen;
2828 structtype = toktype;
2829 structbracelev = bracelev;
2830 if (fvdef == fvnameseen)
2831 fvdef = fvnone;
2833 return false;
2834 default:
2835 break;
2838 if (structdef == skeyseen)
2840 structdef = stagseen;
2841 return true;
2844 if (typdef != tnone)
2845 definedef = dnone;
2847 /* Detect Objective C constructs. */
2848 switch (objdef)
2850 case onone:
2851 switch (toktype)
2853 case st_C_objprot:
2854 objdef = oprotocol;
2855 return false;
2856 case st_C_objimpl:
2857 objdef = oimplementation;
2858 return false;
2859 default:
2860 break;
2862 break;
2863 case oimplementation:
2864 /* Save the class tag for functions or variables defined inside. */
2865 objtag = savenstr (str, len);
2866 objdef = oinbody;
2867 return false;
2868 case oprotocol:
2869 /* Save the class tag for categories. */
2870 objtag = savenstr (str, len);
2871 objdef = otagseen;
2872 *is_func_or_var = true;
2873 return true;
2874 case oparenseen:
2875 objdef = ocatseen;
2876 *is_func_or_var = true;
2877 return true;
2878 case oinbody:
2879 break;
2880 case omethodsign:
2881 if (parlev == 0)
2883 fvdef = fvnone;
2884 objdef = omethodtag;
2885 linebuffer_setlen (&token_name, len);
2886 memcpy (token_name.buffer, str, len);
2887 token_name.buffer[len] = '\0';
2888 return true;
2890 return false;
2891 case omethodcolon:
2892 if (parlev == 0)
2893 objdef = omethodparm;
2894 return false;
2895 case omethodparm:
2896 if (parlev == 0)
2898 objdef = omethodtag;
2899 if (class_qualify)
2901 int oldlen = token_name.len;
2902 fvdef = fvnone;
2903 linebuffer_setlen (&token_name, oldlen + len);
2904 memcpy (token_name.buffer + oldlen, str, len);
2905 token_name.buffer[oldlen + len] = '\0';
2907 return true;
2909 return false;
2910 case oignore:
2911 if (toktype == st_C_objend)
2913 /* Memory leakage here: the string pointed by objtag is
2914 never released, because many tests would be needed to
2915 avoid breaking on incorrect input code. The amount of
2916 memory leaked here is the sum of the lengths of the
2917 class tags.
2918 free (objtag); */
2919 objdef = onone;
2921 return false;
2922 default:
2923 break;
2926 /* A function, variable or enum constant? */
2927 switch (toktype)
2929 case st_C_extern:
2930 fvextern = true;
2931 switch (fvdef)
2933 case finlist:
2934 case flistseen:
2935 case fignore:
2936 case vignore:
2937 break;
2938 default:
2939 fvdef = fvnone;
2941 return false;
2942 case st_C_ignore:
2943 fvextern = false;
2944 fvdef = vignore;
2945 return false;
2946 case st_C_operator:
2947 fvdef = foperator;
2948 *is_func_or_var = true;
2949 return true;
2950 case st_none:
2951 if (constantypedefs
2952 && structdef == snone
2953 && structtype == st_C_enum && bracelev > structbracelev
2954 /* Don't tag tokens in expressions that assign values to enum
2955 constants. */
2956 && fvdef != vignore)
2957 return true; /* enum constant */
2958 switch (fvdef)
2960 case fdefunkey:
2961 if (bracelev > 0)
2962 break;
2963 fvdef = fdefunname; /* GNU macro */
2964 *is_func_or_var = true;
2965 return true;
2966 case fvnone:
2967 switch (typdef)
2969 case ttypeseen:
2970 return false;
2971 case tnone:
2972 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2973 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2975 fvdef = vignore;
2976 return false;
2978 break;
2979 default:
2980 break;
2982 /* FALLTHRU */
2983 case fvnameseen:
2984 if (len >= 10 && strneq (str+len-10, "::operator", 10))
2986 if (*c_extp & C_AUTO) /* automatic detection of C++ */
2987 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2988 fvdef = foperator;
2989 *is_func_or_var = true;
2990 return true;
2992 if (bracelev > 0 && !instruct)
2993 break;
2994 fvdef = fvnameseen; /* function or variable */
2995 *is_func_or_var = true;
2996 return true;
2997 default:
2998 break;
3000 break;
3001 default:
3002 break;
3005 return false;
3010 * C_entries often keeps pointers to tokens or lines which are older than
3011 * the line currently read. By keeping two line buffers, and switching
3012 * them at end of line, it is possible to use those pointers.
3014 static struct
3016 long linepos;
3017 linebuffer lb;
3018 } lbs[2];
3020 #define current_lb_is_new (newndx == curndx)
3021 #define switch_line_buffers() (curndx = 1 - curndx)
3023 #define curlb (lbs[curndx].lb)
3024 #define newlb (lbs[newndx].lb)
3025 #define curlinepos (lbs[curndx].linepos)
3026 #define newlinepos (lbs[newndx].linepos)
3028 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3029 #define cplpl (c_ext & C_PLPL)
3030 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3032 #define CNL_SAVE_DEFINEDEF() \
3033 do { \
3034 curlinepos = charno; \
3035 readline (&curlb, inf); \
3036 lp = curlb.buffer; \
3037 quotednl = false; \
3038 newndx = curndx; \
3039 } while (0)
3041 #define CNL() \
3042 do { \
3043 CNL_SAVE_DEFINEDEF (); \
3044 if (savetoken.valid) \
3046 token = savetoken; \
3047 savetoken.valid = false; \
3049 definedef = dnone; \
3050 } while (0)
3053 static void
3054 make_C_tag (bool isfun)
3056 /* This function is never called when token.valid is false, but
3057 we must protect against invalid input or internal errors. */
3058 if (token.valid)
3059 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3060 token.offset+token.length+1, token.lineno, token.linepos);
3061 else if (DEBUG)
3062 { /* this branch is optimized away if !DEBUG */
3063 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3064 token_name.len + 17, isfun, token.line,
3065 token.offset+token.length+1, token.lineno, token.linepos);
3066 error ("INVALID TOKEN");
3069 token.valid = false;
3072 static bool
3073 perhaps_more_input (FILE *inf)
3075 return !feof (inf) && !ferror (inf);
3080 * C_entries ()
3081 * This routine finds functions, variables, typedefs,
3082 * #define's, enum constants and struct/union/enum definitions in
3083 * C syntax and adds them to the list.
3085 static void
3086 C_entries (int c_ext, FILE *inf)
3087 /* extension of C */
3088 /* input file */
3090 register char c; /* latest char read; '\0' for end of line */
3091 register char *lp; /* pointer one beyond the character `c' */
3092 int curndx, newndx; /* indices for current and new lb */
3093 register int tokoff; /* offset in line of start of current token */
3094 register int toklen; /* length of current token */
3095 const char *qualifier; /* string used to qualify names */
3096 int qlen; /* length of qualifier */
3097 int bracelev; /* current brace level */
3098 int bracketlev; /* current bracket level */
3099 int parlev; /* current parenthesis level */
3100 int attrparlev; /* __attribute__ parenthesis level */
3101 int templatelev; /* current template level */
3102 int typdefbracelev; /* bracelev where a typedef struct body begun */
3103 bool incomm, inquote, inchar, quotednl, midtoken;
3104 bool yacc_rules; /* in the rules part of a yacc file */
3105 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3108 linebuffer_init (&lbs[0].lb);
3109 linebuffer_init (&lbs[1].lb);
3110 if (cstack.size == 0)
3112 cstack.size = (DEBUG) ? 1 : 4;
3113 cstack.nl = 0;
3114 cstack.cname = xnew (cstack.size, char *);
3115 cstack.bracelev = xnew (cstack.size, int);
3118 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3119 curndx = newndx = 0;
3120 lp = curlb.buffer;
3121 *lp = 0;
3123 fvdef = fvnone; fvextern = false; typdef = tnone;
3124 structdef = snone; definedef = dnone; objdef = onone;
3125 yacc_rules = false;
3126 midtoken = inquote = inchar = incomm = quotednl = false;
3127 token.valid = savetoken.valid = false;
3128 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3129 if (cjava)
3130 { qualifier = "."; qlen = 1; }
3131 else
3132 { qualifier = "::"; qlen = 2; }
3135 while (perhaps_more_input (inf))
3137 c = *lp++;
3138 if (c == '\\')
3140 /* If we are at the end of the line, the next character is a
3141 '\0'; do not skip it, because it is what tells us
3142 to read the next line. */
3143 if (*lp == '\0')
3145 quotednl = true;
3146 continue;
3148 lp++;
3149 c = ' ';
3151 else if (incomm)
3153 switch (c)
3155 case '*':
3156 if (*lp == '/')
3158 c = *lp++;
3159 incomm = false;
3161 break;
3162 case '\0':
3163 /* Newlines inside comments do not end macro definitions in
3164 traditional cpp. */
3165 CNL_SAVE_DEFINEDEF ();
3166 break;
3168 continue;
3170 else if (inquote)
3172 switch (c)
3174 case '"':
3175 inquote = false;
3176 break;
3177 case '\0':
3178 /* Newlines inside strings do not end macro definitions
3179 in traditional cpp, even though compilers don't
3180 usually accept them. */
3181 CNL_SAVE_DEFINEDEF ();
3182 break;
3184 continue;
3186 else if (inchar)
3188 switch (c)
3190 case '\0':
3191 /* Hmmm, something went wrong. */
3192 CNL ();
3193 /* FALLTHRU */
3194 case '\'':
3195 inchar = false;
3196 break;
3198 continue;
3200 else switch (c)
3202 case '"':
3203 inquote = true;
3204 if (bracketlev > 0)
3205 continue;
3206 if (inattribute)
3207 break;
3208 switch (fvdef)
3210 case fdefunkey:
3211 case fstartlist:
3212 case finlist:
3213 case fignore:
3214 case vignore:
3215 break;
3216 default:
3217 fvextern = false;
3218 fvdef = fvnone;
3220 continue;
3221 case '\'':
3222 inchar = true;
3223 if (bracketlev > 0)
3224 continue;
3225 if (inattribute)
3226 break;
3227 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3229 fvextern = false;
3230 fvdef = fvnone;
3232 continue;
3233 case '/':
3234 if (*lp == '*')
3236 incomm = true;
3237 lp++;
3238 c = ' ';
3239 if (bracketlev > 0)
3240 continue;
3242 else if (/* cplpl && */ *lp == '/')
3244 c = '\0';
3246 break;
3247 case '%':
3248 if ((c_ext & YACC) && *lp == '%')
3250 /* Entering or exiting rules section in yacc file. */
3251 lp++;
3252 definedef = dnone; fvdef = fvnone; fvextern = false;
3253 typdef = tnone; structdef = snone;
3254 midtoken = inquote = inchar = incomm = quotednl = false;
3255 bracelev = 0;
3256 yacc_rules = !yacc_rules;
3257 continue;
3259 else
3260 break;
3261 case '#':
3262 if (definedef == dnone)
3264 char *cp;
3265 bool cpptoken = true;
3267 /* Look back on this line. If all blanks, or nonblanks
3268 followed by an end of comment, this is a preprocessor
3269 token. */
3270 for (cp = newlb.buffer; cp < lp-1; cp++)
3271 if (!c_isspace (*cp))
3273 if (*cp == '*' && cp[1] == '/')
3275 cp++;
3276 cpptoken = true;
3278 else
3279 cpptoken = false;
3281 if (cpptoken)
3283 definedef = dsharpseen;
3284 /* This is needed for tagging enum values: when there are
3285 preprocessor conditionals inside the enum, we need to
3286 reset the value of fvdef so that the next enum value is
3287 tagged even though the one before it did not end in a
3288 comma. */
3289 if (fvdef == vignore && instruct && parlev == 0)
3291 if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3292 fvdef = fvnone;
3295 } /* if (definedef == dnone) */
3296 continue;
3297 case '[':
3298 bracketlev++;
3299 continue;
3300 default:
3301 if (bracketlev > 0)
3303 if (c == ']')
3304 --bracketlev;
3305 else if (c == '\0')
3306 CNL_SAVE_DEFINEDEF ();
3307 continue;
3309 break;
3310 } /* switch (c) */
3313 /* Consider token only if some involved conditions are satisfied. */
3314 if (typdef != tignore
3315 && definedef != dignorerest
3316 && fvdef != finlist
3317 && templatelev == 0
3318 && (definedef != dnone
3319 || structdef != scolonseen)
3320 && !inattribute)
3322 if (midtoken)
3324 if (endtoken (c))
3326 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3327 /* This handles :: in the middle,
3328 but not at the beginning of an identifier.
3329 Also, space-separated :: is not recognized. */
3331 if (c_ext & C_AUTO) /* automatic detection of C++ */
3332 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3333 lp += 2;
3334 toklen += 2;
3335 c = lp[-1];
3336 goto still_in_token;
3338 else
3340 bool funorvar = false;
3342 if (yacc_rules
3343 || consider_token (newlb.buffer + tokoff, toklen, c,
3344 &c_ext, bracelev, parlev,
3345 &funorvar))
3347 if (fvdef == foperator)
3349 char *oldlp = lp;
3350 lp = skip_spaces (lp-1);
3351 if (*lp != '\0')
3352 lp += 1;
3353 while (*lp != '\0'
3354 && !c_isspace (*lp) && *lp != '(')
3355 lp += 1;
3356 c = *lp++;
3357 toklen += lp - oldlp;
3359 token.named = false;
3360 if (!plainc
3361 && nestlev > 0 && definedef == dnone)
3362 /* in struct body */
3364 if (class_qualify)
3366 int len;
3367 write_classname (&token_name, qualifier);
3368 len = token_name.len;
3369 linebuffer_setlen (&token_name,
3370 len + qlen + toklen);
3371 sprintf (token_name.buffer + len, "%s%.*s",
3372 qualifier, toklen,
3373 newlb.buffer + tokoff);
3375 else
3377 linebuffer_setlen (&token_name, toklen);
3378 sprintf (token_name.buffer, "%.*s",
3379 toklen, newlb.buffer + tokoff);
3381 token.named = true;
3383 else if (objdef == ocatseen)
3384 /* Objective C category */
3386 if (class_qualify)
3388 int len = strlen (objtag) + 2 + toklen;
3389 linebuffer_setlen (&token_name, len);
3390 sprintf (token_name.buffer, "%s(%.*s)",
3391 objtag, toklen,
3392 newlb.buffer + tokoff);
3394 else
3396 linebuffer_setlen (&token_name, toklen);
3397 sprintf (token_name.buffer, "%.*s",
3398 toklen, newlb.buffer + tokoff);
3400 token.named = true;
3402 else if (objdef == omethodtag
3403 || objdef == omethodparm)
3404 /* Objective C method */
3406 token.named = true;
3408 else if (fvdef == fdefunname)
3409 /* GNU DEFUN and similar macros */
3411 bool defun = (newlb.buffer[tokoff] == 'F');
3412 int off = tokoff;
3413 int len = toklen;
3415 /* Rewrite the tag so that emacs lisp DEFUNs
3416 can be found by their elisp name */
3417 if (defun)
3419 off += 1;
3420 len -= 1;
3422 linebuffer_setlen (&token_name, len);
3423 memcpy (token_name.buffer,
3424 newlb.buffer + off, len);
3425 token_name.buffer[len] = '\0';
3426 if (defun)
3427 while (--len >= 0)
3428 if (token_name.buffer[len] == '_')
3429 token_name.buffer[len] = '-';
3430 token.named = defun;
3432 else
3434 linebuffer_setlen (&token_name, toklen);
3435 memcpy (token_name.buffer,
3436 newlb.buffer + tokoff, toklen);
3437 token_name.buffer[toklen] = '\0';
3438 /* Name macros and members. */
3439 token.named = (structdef == stagseen
3440 || typdef == ttypeseen
3441 || typdef == tend
3442 || (funorvar
3443 && definedef == dignorerest)
3444 || (funorvar
3445 && definedef == dnone
3446 && structdef == snone
3447 && bracelev > 0));
3449 token.lineno = lineno;
3450 token.offset = tokoff;
3451 token.length = toklen;
3452 token.line = newlb.buffer;
3453 token.linepos = newlinepos;
3454 token.valid = true;
3456 if (definedef == dnone
3457 && (fvdef == fvnameseen
3458 || fvdef == foperator
3459 || structdef == stagseen
3460 || typdef == tend
3461 || typdef == ttypeseen
3462 || objdef != onone))
3464 if (current_lb_is_new)
3465 switch_line_buffers ();
3467 else if (definedef != dnone
3468 || fvdef == fdefunname
3469 || instruct)
3470 make_C_tag (funorvar);
3472 else /* not yacc and consider_token failed */
3474 if (inattribute && fvdef == fignore)
3476 /* We have just met __attribute__ after a
3477 function parameter list: do not tag the
3478 function again. */
3479 fvdef = fvnone;
3482 midtoken = false;
3484 } /* if (endtoken (c)) */
3485 else if (intoken (c))
3486 still_in_token:
3488 toklen++;
3489 continue;
3491 } /* if (midtoken) */
3492 else if (begtoken (c))
3494 switch (definedef)
3496 case dnone:
3497 switch (fvdef)
3499 case fstartlist:
3500 /* This prevents tagging fb in
3501 void (__attribute__((noreturn)) *fb) (void);
3502 Fixing this is not easy and not very important. */
3503 fvdef = finlist;
3504 continue;
3505 case flistseen:
3506 if (plainc || declarations)
3508 make_C_tag (true); /* a function */
3509 fvdef = fignore;
3511 break;
3512 default:
3513 break;
3515 if (structdef == stagseen && !cjava)
3517 popclass_above (bracelev);
3518 structdef = snone;
3520 break;
3521 case dsharpseen:
3522 savetoken = token;
3523 break;
3524 default:
3525 break;
3527 if (!yacc_rules || lp == newlb.buffer + 1)
3529 tokoff = lp - 1 - newlb.buffer;
3530 toklen = 1;
3531 midtoken = true;
3533 continue;
3534 } /* if (begtoken) */
3535 } /* if must look at token */
3538 /* Detect end of line, colon, comma, semicolon and various braces
3539 after having handled a token.*/
3540 switch (c)
3542 case ':':
3543 if (inattribute)
3544 break;
3545 if (yacc_rules && token.offset == 0 && token.valid)
3547 make_C_tag (false); /* a yacc function */
3548 break;
3550 if (definedef != dnone)
3551 break;
3552 switch (objdef)
3554 case otagseen:
3555 objdef = oignore;
3556 make_C_tag (true); /* an Objective C class */
3557 break;
3558 case omethodtag:
3559 case omethodparm:
3560 objdef = omethodcolon;
3561 if (class_qualify)
3563 int toklen = token_name.len;
3564 linebuffer_setlen (&token_name, toklen + 1);
3565 strcpy (token_name.buffer + toklen, ":");
3567 break;
3568 default:
3569 break;
3571 if (structdef == stagseen)
3573 structdef = scolonseen;
3574 break;
3576 /* Should be useless, but may be work as a safety net. */
3577 if (cplpl && fvdef == flistseen)
3579 make_C_tag (true); /* a function */
3580 fvdef = fignore;
3581 break;
3583 break;
3584 case ';':
3585 if (definedef != dnone || inattribute)
3586 break;
3587 switch (typdef)
3589 case tend:
3590 case ttypeseen:
3591 make_C_tag (false); /* a typedef */
3592 typdef = tnone;
3593 fvdef = fvnone;
3594 break;
3595 case tnone:
3596 case tinbody:
3597 case tignore:
3598 switch (fvdef)
3600 case fignore:
3601 if (typdef == tignore || cplpl)
3602 fvdef = fvnone;
3603 break;
3604 case fvnameseen:
3605 if ((globals && bracelev == 0 && (!fvextern || declarations))
3606 || (members && instruct))
3607 make_C_tag (false); /* a variable */
3608 fvextern = false;
3609 fvdef = fvnone;
3610 token.valid = false;
3611 break;
3612 case flistseen:
3613 if ((declarations
3614 && (cplpl || !instruct)
3615 && (typdef == tnone || (typdef != tignore && instruct)))
3616 || (members
3617 && plainc && instruct))
3618 make_C_tag (true); /* a function */
3619 /* FALLTHRU */
3620 default:
3621 fvextern = false;
3622 fvdef = fvnone;
3623 if (declarations
3624 && cplpl && structdef == stagseen)
3625 make_C_tag (false); /* forward declaration */
3626 else
3627 token.valid = false;
3628 } /* switch (fvdef) */
3629 /* FALLTHRU */
3630 default:
3631 if (!instruct)
3632 typdef = tnone;
3634 if (structdef == stagseen)
3635 structdef = snone;
3636 break;
3637 case ',':
3638 if (definedef != dnone || inattribute)
3639 break;
3640 switch (objdef)
3642 case omethodtag:
3643 case omethodparm:
3644 make_C_tag (true); /* an Objective C method */
3645 objdef = oinbody;
3646 break;
3647 default:
3648 break;
3650 switch (fvdef)
3652 case fdefunkey:
3653 case foperator:
3654 case fstartlist:
3655 case finlist:
3656 case fignore:
3657 break;
3658 case vignore:
3659 if (instruct && parlev == 0)
3660 fvdef = fvnone;
3661 break;
3662 case fdefunname:
3663 fvdef = fignore;
3664 break;
3665 case fvnameseen:
3666 if (parlev == 0
3667 && ((globals
3668 && bracelev == 0
3669 && templatelev == 0
3670 && (!fvextern || declarations))
3671 || (members && instruct)))
3672 make_C_tag (false); /* a variable */
3673 break;
3674 case flistseen:
3675 if ((declarations && typdef == tnone && !instruct)
3676 || (members && typdef != tignore && instruct))
3678 make_C_tag (true); /* a function */
3679 fvdef = fvnameseen;
3681 else if (!declarations)
3682 fvdef = fvnone;
3683 token.valid = false;
3684 break;
3685 default:
3686 fvdef = fvnone;
3688 if (structdef == stagseen)
3689 structdef = snone;
3690 break;
3691 case ']':
3692 if (definedef != dnone || inattribute)
3693 break;
3694 if (structdef == stagseen)
3695 structdef = snone;
3696 switch (typdef)
3698 case ttypeseen:
3699 case tend:
3700 typdef = tignore;
3701 make_C_tag (false); /* a typedef */
3702 break;
3703 case tnone:
3704 case tinbody:
3705 switch (fvdef)
3707 case foperator:
3708 case finlist:
3709 case fignore:
3710 case vignore:
3711 break;
3712 case fvnameseen:
3713 if ((members && bracelev == 1)
3714 || (globals && bracelev == 0
3715 && (!fvextern || declarations)))
3716 make_C_tag (false); /* a variable */
3717 /* FALLTHRU */
3718 default:
3719 fvdef = fvnone;
3721 break;
3722 default:
3723 break;
3725 break;
3726 case '(':
3727 if (inattribute)
3729 attrparlev++;
3730 break;
3732 if (definedef != dnone)
3733 break;
3734 if (objdef == otagseen && parlev == 0)
3735 objdef = oparenseen;
3736 switch (fvdef)
3738 case fvnameseen:
3739 if (typdef == ttypeseen
3740 && *lp != '*'
3741 && !instruct)
3743 /* This handles constructs like:
3744 typedef void OperatorFun (int fun); */
3745 make_C_tag (false);
3746 typdef = tignore;
3747 fvdef = fignore;
3748 break;
3750 /* FALLTHRU */
3751 case foperator:
3752 fvdef = fstartlist;
3753 break;
3754 case flistseen:
3755 fvdef = finlist;
3756 break;
3757 default:
3758 break;
3760 parlev++;
3761 break;
3762 case ')':
3763 if (inattribute)
3765 if (--attrparlev == 0)
3766 inattribute = false;
3767 break;
3769 if (definedef != dnone)
3770 break;
3771 if (objdef == ocatseen && parlev == 1)
3773 make_C_tag (true); /* an Objective C category */
3774 objdef = oignore;
3776 if (--parlev == 0)
3778 switch (fvdef)
3780 case fstartlist:
3781 case finlist:
3782 fvdef = flistseen;
3783 break;
3784 default:
3785 break;
3787 if (!instruct
3788 && (typdef == tend
3789 || typdef == ttypeseen))
3791 typdef = tignore;
3792 make_C_tag (false); /* a typedef */
3795 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3796 parlev = 0;
3797 break;
3798 case '{':
3799 if (definedef != dnone)
3800 break;
3801 if (typdef == ttypeseen)
3803 /* Whenever typdef is set to tinbody (currently only
3804 here), typdefbracelev should be set to bracelev. */
3805 typdef = tinbody;
3806 typdefbracelev = bracelev;
3808 switch (fvdef)
3810 case flistseen:
3811 if (cplpl && !class_qualify)
3813 /* Remove class and namespace qualifiers from the token,
3814 leaving only the method/member name. */
3815 char *cc, *uqname = token_name.buffer;
3816 char *tok_end = token_name.buffer + token_name.len;
3818 for (cc = token_name.buffer; cc < tok_end; cc++)
3820 if (*cc == ':' && cc[1] == ':')
3822 uqname = cc + 2;
3823 cc++;
3826 if (uqname > token_name.buffer)
3828 int uqlen = strlen (uqname);
3829 linebuffer_setlen (&token_name, uqlen);
3830 memmove (token_name.buffer, uqname, uqlen + 1);
3833 make_C_tag (true); /* a function */
3834 /* FALLTHRU */
3835 case fignore:
3836 fvdef = fvnone;
3837 break;
3838 case fvnone:
3839 switch (objdef)
3841 case otagseen:
3842 make_C_tag (true); /* an Objective C class */
3843 objdef = oignore;
3844 break;
3845 case omethodtag:
3846 case omethodparm:
3847 make_C_tag (true); /* an Objective C method */
3848 objdef = oinbody;
3849 break;
3850 default:
3851 /* Neutralize `extern "C" {' grot. */
3852 if (bracelev == 0 && structdef == snone && nestlev == 0
3853 && typdef == tnone)
3854 bracelev = -1;
3856 break;
3857 default:
3858 break;
3860 switch (structdef)
3862 case skeyseen: /* unnamed struct */
3863 pushclass_above (bracelev, NULL, 0);
3864 structdef = snone;
3865 break;
3866 case stagseen: /* named struct or enum */
3867 case scolonseen: /* a class */
3868 pushclass_above (bracelev,token.line+token.offset, token.length);
3869 structdef = snone;
3870 make_C_tag (false); /* a struct or enum */
3871 break;
3872 default:
3873 break;
3875 bracelev += 1;
3876 break;
3877 case '*':
3878 if (definedef != dnone)
3879 break;
3880 if (fvdef == fstartlist)
3882 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3883 token.valid = false;
3885 break;
3886 case '}':
3887 if (definedef != dnone)
3888 break;
3889 bracelev -= 1;
3890 if (!ignoreindent && lp == newlb.buffer + 1)
3892 if (bracelev != 0)
3893 token.valid = false; /* unexpected value, token unreliable */
3894 bracelev = 0; /* reset brace level if first column */
3895 parlev = 0; /* also reset paren level, just in case... */
3897 else if (bracelev < 0)
3899 token.valid = false; /* something gone amiss, token unreliable */
3900 bracelev = 0;
3902 if (bracelev == 0 && fvdef == vignore)
3903 fvdef = fvnone; /* end of function */
3904 popclass_above (bracelev);
3905 structdef = snone;
3906 /* Only if typdef == tinbody is typdefbracelev significant. */
3907 if (typdef == tinbody && bracelev <= typdefbracelev)
3909 assert (bracelev == typdefbracelev);
3910 typdef = tend;
3912 break;
3913 case '=':
3914 if (definedef != dnone)
3915 break;
3916 switch (fvdef)
3918 case foperator:
3919 case finlist:
3920 case fignore:
3921 case vignore:
3922 break;
3923 case fvnameseen:
3924 if ((members && bracelev == 1)
3925 || (globals && bracelev == 0 && (!fvextern || declarations)))
3926 make_C_tag (false); /* a variable */
3927 /* FALLTHRU */
3928 default:
3929 fvdef = vignore;
3931 break;
3932 case '<':
3933 if (cplpl
3934 && (structdef == stagseen || fvdef == fvnameseen))
3936 templatelev++;
3937 break;
3939 goto resetfvdef;
3940 case '>':
3941 if (templatelev > 0)
3943 templatelev--;
3944 break;
3946 goto resetfvdef;
3947 case '+':
3948 case '-':
3949 if (objdef == oinbody && bracelev == 0)
3951 objdef = omethodsign;
3952 break;
3954 /* FALLTHRU */
3955 resetfvdef:
3956 case '#': case '~': case '&': case '%': case '/':
3957 case '|': case '^': case '!': case '.': case '?':
3958 if (definedef != dnone)
3959 break;
3960 /* These surely cannot follow a function tag in C. */
3961 switch (fvdef)
3963 case foperator:
3964 case finlist:
3965 case fignore:
3966 case vignore:
3967 break;
3968 default:
3969 fvdef = fvnone;
3971 break;
3972 case '\0':
3973 if (objdef == otagseen)
3975 make_C_tag (true); /* an Objective C class */
3976 objdef = oignore;
3978 /* If a macro spans multiple lines don't reset its state. */
3979 if (quotednl)
3980 CNL_SAVE_DEFINEDEF ();
3981 else
3982 CNL ();
3983 break;
3984 } /* switch (c) */
3986 } /* while not eof */
3988 free (lbs[0].lb.buffer);
3989 free (lbs[1].lb.buffer);
3993 * Process either a C++ file or a C file depending on the setting
3994 * of a global flag.
3996 static void
3997 default_C_entries (FILE *inf)
3999 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4002 /* Always do plain C. */
4003 static void
4004 plain_C_entries (FILE *inf)
4006 C_entries (0, inf);
4009 /* Always do C++. */
4010 static void
4011 Cplusplus_entries (FILE *inf)
4013 C_entries (C_PLPL, inf);
4016 /* Always do Java. */
4017 static void
4018 Cjava_entries (FILE *inf)
4020 C_entries (C_JAVA, inf);
4023 /* Always do C*. */
4024 static void
4025 Cstar_entries (FILE *inf)
4027 C_entries (C_STAR, inf);
4030 /* Always do Yacc. */
4031 static void
4032 Yacc_entries (FILE *inf)
4034 C_entries (YACC, inf);
4038 /* Useful macros. */
4039 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4040 while (perhaps_more_input (file_pointer) \
4041 && (readline (&(line_buffer), file_pointer), \
4042 (char_pointer) = (line_buffer).buffer, \
4043 true)) \
4045 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4046 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4047 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4048 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
4049 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
4051 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4052 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4053 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4054 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4055 && ((cp) += sizeof (kw)-1)) /* skip spaces */
4058 * Read a file, but do no processing. This is used to do regexp
4059 * matching on files that have no language defined.
4061 static void
4062 just_read_file (FILE *inf)
4064 while (perhaps_more_input (inf))
4065 readline (&lb, inf);
4069 /* Fortran parsing */
4071 static void F_takeprec (void);
4072 static void F_getit (FILE *);
4074 static void
4075 F_takeprec (void)
4077 dbp = skip_spaces (dbp);
4078 if (*dbp != '*')
4079 return;
4080 dbp++;
4081 dbp = skip_spaces (dbp);
4082 if (strneq (dbp, "(*)", 3))
4084 dbp += 3;
4085 return;
4087 if (!c_isdigit (*dbp))
4089 --dbp; /* force failure */
4090 return;
4093 dbp++;
4094 while (c_isdigit (*dbp));
4097 static void
4098 F_getit (FILE *inf)
4100 register char *cp;
4102 dbp = skip_spaces (dbp);
4103 if (*dbp == '\0')
4105 readline (&lb, inf);
4106 dbp = lb.buffer;
4107 if (dbp[5] != '&')
4108 return;
4109 dbp += 6;
4110 dbp = skip_spaces (dbp);
4112 if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4113 return;
4114 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4115 continue;
4116 make_tag (dbp, cp-dbp, true,
4117 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4121 static void
4122 Fortran_functions (FILE *inf)
4124 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4126 if (*dbp == '%')
4127 dbp++; /* Ratfor escape to fortran */
4128 dbp = skip_spaces (dbp);
4129 if (*dbp == '\0')
4130 continue;
4132 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4133 dbp = skip_spaces (dbp);
4135 if (LOOKING_AT_NOCASE (dbp, "pure"))
4136 dbp = skip_spaces (dbp);
4138 if (LOOKING_AT_NOCASE (dbp, "elemental"))
4139 dbp = skip_spaces (dbp);
4141 switch (c_tolower (*dbp))
4143 case 'i':
4144 if (nocase_tail ("integer"))
4145 F_takeprec ();
4146 break;
4147 case 'r':
4148 if (nocase_tail ("real"))
4149 F_takeprec ();
4150 break;
4151 case 'l':
4152 if (nocase_tail ("logical"))
4153 F_takeprec ();
4154 break;
4155 case 'c':
4156 if (nocase_tail ("complex") || nocase_tail ("character"))
4157 F_takeprec ();
4158 break;
4159 case 'd':
4160 if (nocase_tail ("double"))
4162 dbp = skip_spaces (dbp);
4163 if (*dbp == '\0')
4164 continue;
4165 if (nocase_tail ("precision"))
4166 break;
4167 continue;
4169 break;
4171 dbp = skip_spaces (dbp);
4172 if (*dbp == '\0')
4173 continue;
4174 switch (c_tolower (*dbp))
4176 case 'f':
4177 if (nocase_tail ("function"))
4178 F_getit (inf);
4179 continue;
4180 case 's':
4181 if (nocase_tail ("subroutine"))
4182 F_getit (inf);
4183 continue;
4184 case 'e':
4185 if (nocase_tail ("entry"))
4186 F_getit (inf);
4187 continue;
4188 case 'b':
4189 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4191 dbp = skip_spaces (dbp);
4192 if (*dbp == '\0') /* assume un-named */
4193 make_tag ("blockdata", 9, true,
4194 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4195 else
4196 F_getit (inf); /* look for name */
4198 continue;
4205 * Ada parsing
4206 * Original code by
4207 * Philippe Waroquiers (1998)
4210 /* Once we are positioned after an "interesting" keyword, let's get
4211 the real tag value necessary. */
4212 static void
4213 Ada_getit (FILE *inf, const char *name_qualifier)
4215 register char *cp;
4216 char *name;
4217 char c;
4219 while (perhaps_more_input (inf))
4221 dbp = skip_spaces (dbp);
4222 if (*dbp == '\0'
4223 || (dbp[0] == '-' && dbp[1] == '-'))
4225 readline (&lb, inf);
4226 dbp = lb.buffer;
4228 switch (c_tolower (*dbp))
4230 case 'b':
4231 if (nocase_tail ("body"))
4233 /* Skipping body of procedure body or package body or ....
4234 resetting qualifier to body instead of spec. */
4235 name_qualifier = "/b";
4236 continue;
4238 break;
4239 case 't':
4240 /* Skipping type of task type or protected type ... */
4241 if (nocase_tail ("type"))
4242 continue;
4243 break;
4245 if (*dbp == '"')
4247 dbp += 1;
4248 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4249 continue;
4251 else
4253 dbp = skip_spaces (dbp);
4254 for (cp = dbp;
4255 c_isalnum (*cp) || *cp == '_' || *cp == '.';
4256 cp++)
4257 continue;
4258 if (cp == dbp)
4259 return;
4261 c = *cp;
4262 *cp = '\0';
4263 name = concat (dbp, name_qualifier, "");
4264 *cp = c;
4265 make_tag (name, strlen (name), true,
4266 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4267 free (name);
4268 if (c == '"')
4269 dbp = cp + 1;
4270 return;
4274 static void
4275 Ada_funcs (FILE *inf)
4277 bool inquote = false;
4278 bool skip_till_semicolumn = false;
4280 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4282 while (*dbp != '\0')
4284 /* Skip a string i.e. "abcd". */
4285 if (inquote || (*dbp == '"'))
4287 dbp = strchr (dbp + !inquote, '"');
4288 if (dbp != NULL)
4290 inquote = false;
4291 dbp += 1;
4292 continue; /* advance char */
4294 else
4296 inquote = true;
4297 break; /* advance line */
4301 /* Skip comments. */
4302 if (dbp[0] == '-' && dbp[1] == '-')
4303 break; /* advance line */
4305 /* Skip character enclosed in single quote i.e. 'a'
4306 and skip single quote starting an attribute i.e. 'Image. */
4307 if (*dbp == '\'')
4309 dbp++ ;
4310 if (*dbp != '\0')
4311 dbp++;
4312 continue;
4315 if (skip_till_semicolumn)
4317 if (*dbp == ';')
4318 skip_till_semicolumn = false;
4319 dbp++;
4320 continue; /* advance char */
4323 /* Search for beginning of a token. */
4324 if (!begtoken (*dbp))
4326 dbp++;
4327 continue; /* advance char */
4330 /* We are at the beginning of a token. */
4331 switch (c_tolower (*dbp))
4333 case 'f':
4334 if (!packages_only && nocase_tail ("function"))
4335 Ada_getit (inf, "/f");
4336 else
4337 break; /* from switch */
4338 continue; /* advance char */
4339 case 'p':
4340 if (!packages_only && nocase_tail ("procedure"))
4341 Ada_getit (inf, "/p");
4342 else if (nocase_tail ("package"))
4343 Ada_getit (inf, "/s");
4344 else if (nocase_tail ("protected")) /* protected type */
4345 Ada_getit (inf, "/t");
4346 else
4347 break; /* from switch */
4348 continue; /* advance char */
4350 case 'u':
4351 if (typedefs && !packages_only && nocase_tail ("use"))
4353 /* when tagging types, avoid tagging use type Pack.Typename;
4354 for this, we will skip everything till a ; */
4355 skip_till_semicolumn = true;
4356 continue; /* advance char */
4359 case 't':
4360 if (!packages_only && nocase_tail ("task"))
4361 Ada_getit (inf, "/k");
4362 else if (typedefs && !packages_only && nocase_tail ("type"))
4364 Ada_getit (inf, "/t");
4365 while (*dbp != '\0')
4366 dbp += 1;
4368 else
4369 break; /* from switch */
4370 continue; /* advance char */
4373 /* Look for the end of the token. */
4374 while (!endtoken (*dbp))
4375 dbp++;
4377 } /* advance char */
4378 } /* advance line */
4383 * Unix and microcontroller assembly tag handling
4384 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4385 * Idea by Bob Weiner, Motorola Inc. (1994)
4387 static void
4388 Asm_labels (FILE *inf)
4390 register char *cp;
4392 LOOP_ON_INPUT_LINES (inf, lb, cp)
4394 /* If first char is alphabetic or one of [_.$], test for colon
4395 following identifier. */
4396 if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4398 /* Read past label. */
4399 cp++;
4400 while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4401 cp++;
4402 if (*cp == ':' || c_isspace (*cp))
4403 /* Found end of label, so copy it and add it to the table. */
4404 make_tag (lb.buffer, cp - lb.buffer, true,
4405 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4412 * Perl support
4413 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4414 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4415 * Perl variable names: /^(my|local).../
4416 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4417 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4418 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4420 static void
4421 Perl_functions (FILE *inf)
4423 char *package = savestr ("main"); /* current package name */
4424 register char *cp;
4426 LOOP_ON_INPUT_LINES (inf, lb, cp)
4428 cp = skip_spaces (cp);
4430 if (LOOKING_AT (cp, "package"))
4432 free (package);
4433 get_tag (cp, &package);
4435 else if (LOOKING_AT (cp, "sub"))
4437 char *pos, *sp;
4439 subr:
4440 sp = cp;
4441 while (!notinname (*cp))
4442 cp++;
4443 if (cp == sp)
4444 continue; /* nothing found */
4445 pos = strchr (sp, ':');
4446 if (pos && pos < cp && pos[1] == ':')
4447 /* The name is already qualified. */
4448 make_tag (sp, cp - sp, true,
4449 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4450 else
4451 /* Qualify it. */
4453 char savechar, *name;
4455 savechar = *cp;
4456 *cp = '\0';
4457 name = concat (package, "::", sp);
4458 *cp = savechar;
4459 make_tag (name, strlen (name), true,
4460 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4461 free (name);
4464 else if (LOOKING_AT (cp, "use constant")
4465 || LOOKING_AT (cp, "use constant::defer"))
4467 /* For hash style multi-constant like
4468 use constant { FOO => 123,
4469 BAR => 456 };
4470 only the first FOO is picked up. Parsing across the value
4471 expressions would be difficult in general, due to possible nested
4472 hashes, here-documents, etc. */
4473 if (*cp == '{')
4474 cp = skip_spaces (cp+1);
4475 goto subr;
4477 else if (globals) /* only if we are tagging global vars */
4479 /* Skip a qualifier, if any. */
4480 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4481 /* After "my" or "local", but before any following paren or space. */
4482 char *varstart = cp;
4484 if (qual /* should this be removed? If yes, how? */
4485 && (*cp == '$' || *cp == '@' || *cp == '%'))
4487 varstart += 1;
4489 cp++;
4490 while (c_isalnum (*cp) || *cp == '_');
4492 else if (qual)
4494 /* Should be examining a variable list at this point;
4495 could insist on seeing an open parenthesis. */
4496 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4497 cp++;
4499 else
4500 continue;
4502 make_tag (varstart, cp - varstart, false,
4503 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4506 free (package);
4511 * Python support
4512 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4513 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4514 * More ideas by seb bacon <seb@jamkit.com> (2002)
4516 static void
4517 Python_functions (FILE *inf)
4519 register char *cp;
4521 LOOP_ON_INPUT_LINES (inf, lb, cp)
4523 cp = skip_spaces (cp);
4524 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4526 char *name = cp;
4527 while (!notinname (*cp) && *cp != ':')
4528 cp++;
4529 make_tag (name, cp - name, true,
4530 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4537 * PHP support
4538 * Look for:
4539 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4540 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4541 * - /^[ \t]*define\(\"[^\"]+/
4542 * Only with --members:
4543 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4544 * Idea by Diez B. Roggisch (2001)
4546 static void
4547 PHP_functions (FILE *inf)
4549 char *cp, *name;
4550 bool search_identifier = false;
4552 LOOP_ON_INPUT_LINES (inf, lb, cp)
4554 cp = skip_spaces (cp);
4555 name = cp;
4556 if (search_identifier
4557 && *cp != '\0')
4559 while (!notinname (*cp))
4560 cp++;
4561 make_tag (name, cp - name, true,
4562 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4563 search_identifier = false;
4565 else if (LOOKING_AT (cp, "function"))
4567 if (*cp == '&')
4568 cp = skip_spaces (cp+1);
4569 if (*cp != '\0')
4571 name = cp;
4572 while (!notinname (*cp))
4573 cp++;
4574 make_tag (name, cp - name, true,
4575 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4577 else
4578 search_identifier = true;
4580 else if (LOOKING_AT (cp, "class"))
4582 if (*cp != '\0')
4584 name = cp;
4585 while (*cp != '\0' && !c_isspace (*cp))
4586 cp++;
4587 make_tag (name, cp - name, false,
4588 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4590 else
4591 search_identifier = true;
4593 else if (strneq (cp, "define", 6)
4594 && (cp = skip_spaces (cp+6))
4595 && *cp++ == '('
4596 && (*cp == '"' || *cp == '\''))
4598 char quote = *cp++;
4599 name = cp;
4600 while (*cp != quote && *cp != '\0')
4601 cp++;
4602 make_tag (name, cp - name, false,
4603 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4605 else if (members
4606 && LOOKING_AT (cp, "var")
4607 && *cp == '$')
4609 name = cp;
4610 while (!notinname (*cp))
4611 cp++;
4612 make_tag (name, cp - name, false,
4613 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4620 * Cobol tag functions
4621 * We could look for anything that could be a paragraph name.
4622 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4623 * Idea by Corny de Souza (1993)
4625 static void
4626 Cobol_paragraphs (FILE *inf)
4628 register char *bp, *ep;
4630 LOOP_ON_INPUT_LINES (inf, lb, bp)
4632 if (lb.len < 9)
4633 continue;
4634 bp += 8;
4636 /* If eoln, compiler option or comment ignore whole line. */
4637 if (bp[-1] != ' ' || !c_isalnum (bp[0]))
4638 continue;
4640 for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
4641 continue;
4642 if (*ep++ == '.')
4643 make_tag (bp, ep - bp, true,
4644 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4650 * Makefile support
4651 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4653 static void
4654 Makefile_targets (FILE *inf)
4656 register char *bp;
4658 LOOP_ON_INPUT_LINES (inf, lb, bp)
4660 if (*bp == '\t' || *bp == '#')
4661 continue;
4662 while (*bp != '\0' && *bp != '=' && *bp != ':')
4663 bp++;
4664 if (*bp == ':' || (globals && *bp == '='))
4666 /* We should detect if there is more than one tag, but we do not.
4667 We just skip initial and final spaces. */
4668 char * namestart = skip_spaces (lb.buffer);
4669 while (--bp > namestart)
4670 if (!notinname (*bp))
4671 break;
4672 make_tag (namestart, bp - namestart + 1, true,
4673 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4680 * Pascal parsing
4681 * Original code by Mosur K. Mohan (1989)
4683 * Locates tags for procedures & functions. Doesn't do any type- or
4684 * var-definitions. It does look for the keyword "extern" or
4685 * "forward" immediately following the procedure statement; if found,
4686 * the tag is skipped.
4688 static void
4689 Pascal_functions (FILE *inf)
4691 linebuffer tline; /* mostly copied from C_entries */
4692 long save_lcno;
4693 int save_lineno, namelen, taglen;
4694 char c, *name;
4696 bool /* each of these flags is true if: */
4697 incomment, /* point is inside a comment */
4698 inquote, /* point is inside '..' string */
4699 get_tagname, /* point is after PROCEDURE/FUNCTION
4700 keyword, so next item = potential tag */
4701 found_tag, /* point is after a potential tag */
4702 inparms, /* point is within parameter-list */
4703 verify_tag; /* point has passed the parm-list, so the
4704 next token will determine whether this
4705 is a FORWARD/EXTERN to be ignored, or
4706 whether it is a real tag */
4708 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4709 name = NULL; /* keep compiler quiet */
4710 dbp = lb.buffer;
4711 *dbp = '\0';
4712 linebuffer_init (&tline);
4714 incomment = inquote = false;
4715 found_tag = false; /* have a proc name; check if extern */
4716 get_tagname = false; /* found "procedure" keyword */
4717 inparms = false; /* found '(' after "proc" */
4718 verify_tag = false; /* check if "extern" is ahead */
4721 while (perhaps_more_input (inf)) /* long main loop to get next char */
4723 c = *dbp++;
4724 if (c == '\0') /* if end of line */
4726 readline (&lb, inf);
4727 dbp = lb.buffer;
4728 if (*dbp == '\0')
4729 continue;
4730 if (!((found_tag && verify_tag)
4731 || get_tagname))
4732 c = *dbp++; /* only if don't need *dbp pointing
4733 to the beginning of the name of
4734 the procedure or function */
4736 if (incomment)
4738 if (c == '}') /* within { } comments */
4739 incomment = false;
4740 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4742 dbp++;
4743 incomment = false;
4745 continue;
4747 else if (inquote)
4749 if (c == '\'')
4750 inquote = false;
4751 continue;
4753 else
4754 switch (c)
4756 case '\'':
4757 inquote = true; /* found first quote */
4758 continue;
4759 case '{': /* found open { comment */
4760 incomment = true;
4761 continue;
4762 case '(':
4763 if (*dbp == '*') /* found open (* comment */
4765 incomment = true;
4766 dbp++;
4768 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4769 inparms = true;
4770 continue;
4771 case ')': /* end of parms list */
4772 if (inparms)
4773 inparms = false;
4774 continue;
4775 case ';':
4776 if (found_tag && !inparms) /* end of proc or fn stmt */
4778 verify_tag = true;
4779 break;
4781 continue;
4783 if (found_tag && verify_tag && (*dbp != ' '))
4785 /* Check if this is an "extern" declaration. */
4786 if (*dbp == '\0')
4787 continue;
4788 if (c_tolower (*dbp) == 'e')
4790 if (nocase_tail ("extern")) /* superfluous, really! */
4792 found_tag = false;
4793 verify_tag = false;
4796 else if (c_tolower (*dbp) == 'f')
4798 if (nocase_tail ("forward")) /* check for forward reference */
4800 found_tag = false;
4801 verify_tag = false;
4804 if (found_tag && verify_tag) /* not external proc, so make tag */
4806 found_tag = false;
4807 verify_tag = false;
4808 make_tag (name, namelen, true,
4809 tline.buffer, taglen, save_lineno, save_lcno);
4810 continue;
4813 if (get_tagname) /* grab name of proc or fn */
4815 char *cp;
4817 if (*dbp == '\0')
4818 continue;
4820 /* Find block name. */
4821 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4822 continue;
4824 /* Save all values for later tagging. */
4825 linebuffer_setlen (&tline, lb.len);
4826 strcpy (tline.buffer, lb.buffer);
4827 save_lineno = lineno;
4828 save_lcno = linecharno;
4829 name = tline.buffer + (dbp - lb.buffer);
4830 namelen = cp - dbp;
4831 taglen = cp - lb.buffer + 1;
4833 dbp = cp; /* set dbp to e-o-token */
4834 get_tagname = false;
4835 found_tag = true;
4836 continue;
4838 /* And proceed to check for "extern". */
4840 else if (!incomment && !inquote && !found_tag)
4842 /* Check for proc/fn keywords. */
4843 switch (c_tolower (c))
4845 case 'p':
4846 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4847 get_tagname = true;
4848 continue;
4849 case 'f':
4850 if (nocase_tail ("unction"))
4851 get_tagname = true;
4852 continue;
4855 } /* while not eof */
4857 free (tline.buffer);
4862 * Lisp tag functions
4863 * look for (def or (DEF, quote or QUOTE
4866 static void L_getit (void);
4868 static void
4869 L_getit (void)
4871 if (*dbp == '\'') /* Skip prefix quote */
4872 dbp++;
4873 else if (*dbp == '(')
4875 dbp++;
4876 /* Try to skip "(quote " */
4877 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4878 /* Ok, then skip "(" before name in (defstruct (foo)) */
4879 dbp = skip_spaces (dbp);
4881 get_tag (dbp, NULL);
4884 static void
4885 Lisp_functions (FILE *inf)
4887 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4889 if (dbp[0] != '(')
4890 continue;
4892 /* "(defvar foo)" is a declaration rather than a definition. */
4893 if (! declarations)
4895 char *p = dbp + 1;
4896 if (LOOKING_AT (p, "defvar"))
4898 p = skip_name (p); /* past var name */
4899 p = skip_spaces (p);
4900 if (*p == ')')
4901 continue;
4905 if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
4906 dbp += 3;
4908 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4910 dbp = skip_non_spaces (dbp);
4911 dbp = skip_spaces (dbp);
4912 L_getit ();
4914 else
4916 /* Check for (foo::defmumble name-defined ... */
4918 dbp++;
4919 while (!notinname (*dbp) && *dbp != ':');
4920 if (*dbp == ':')
4923 dbp++;
4924 while (*dbp == ':');
4926 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4928 dbp = skip_non_spaces (dbp);
4929 dbp = skip_spaces (dbp);
4930 L_getit ();
4939 * Lua script language parsing
4940 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4942 * "function" and "local function" are tags if they start at column 1.
4944 static void
4945 Lua_functions (FILE *inf)
4947 register char *bp;
4949 LOOP_ON_INPUT_LINES (inf, lb, bp)
4951 if (bp[0] != 'f' && bp[0] != 'l')
4952 continue;
4954 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4956 if (LOOKING_AT (bp, "function"))
4957 get_tag (bp, NULL);
4963 * PostScript tags
4964 * Just look for lines where the first character is '/'
4965 * Also look at "defineps" for PSWrap
4966 * Ideas by:
4967 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4968 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4970 static void
4971 PS_functions (FILE *inf)
4973 register char *bp, *ep;
4975 LOOP_ON_INPUT_LINES (inf, lb, bp)
4977 if (bp[0] == '/')
4979 for (ep = bp+1;
4980 *ep != '\0' && *ep != ' ' && *ep != '{';
4981 ep++)
4982 continue;
4983 make_tag (bp, ep - bp, true,
4984 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4986 else if (LOOKING_AT (bp, "defineps"))
4987 get_tag (bp, NULL);
4993 * Forth tags
4994 * Ignore anything after \ followed by space or in ( )
4995 * Look for words defined by :
4996 * Look for constant, code, create, defer, value, and variable
4997 * OBP extensions: Look for buffer:, field,
4998 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5000 static void
5001 Forth_words (FILE *inf)
5003 register char *bp;
5005 LOOP_ON_INPUT_LINES (inf, lb, bp)
5006 while ((bp = skip_spaces (bp))[0] != '\0')
5007 if (bp[0] == '\\' && c_isspace (bp[1]))
5008 break; /* read next line */
5009 else if (bp[0] == '(' && c_isspace (bp[1]))
5010 do /* skip to ) or eol */
5011 bp++;
5012 while (*bp != ')' && *bp != '\0');
5013 else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5014 || LOOKING_AT_NOCASE (bp, "constant")
5015 || LOOKING_AT_NOCASE (bp, "code")
5016 || LOOKING_AT_NOCASE (bp, "create")
5017 || LOOKING_AT_NOCASE (bp, "defer")
5018 || LOOKING_AT_NOCASE (bp, "value")
5019 || LOOKING_AT_NOCASE (bp, "variable")
5020 || LOOKING_AT_NOCASE (bp, "buffer:")
5021 || LOOKING_AT_NOCASE (bp, "field"))
5022 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5023 else
5024 bp = skip_non_spaces (bp);
5029 * Scheme tag functions
5030 * look for (def... xyzzy
5031 * (def... (xyzzy
5032 * (def ... ((...(xyzzy ....
5033 * (set! xyzzy
5034 * Original code by Ken Haase (1985?)
5036 static void
5037 Scheme_functions (FILE *inf)
5039 register char *bp;
5041 LOOP_ON_INPUT_LINES (inf, lb, bp)
5043 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5045 bp = skip_non_spaces (bp+4);
5046 /* Skip over open parens and white space. Don't continue past
5047 '\0'. */
5048 while (*bp && notinname (*bp))
5049 bp++;
5050 get_tag (bp, NULL);
5052 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5053 get_tag (bp, NULL);
5058 /* Find tags in TeX and LaTeX input files. */
5060 /* TEX_toktab is a table of TeX control sequences that define tags.
5061 * Each entry records one such control sequence.
5063 * Original code from who knows whom.
5064 * Ideas by:
5065 * Stefan Monnier (2002)
5068 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5070 /* Default set of control sequences to put into TEX_toktab.
5071 The value of environment var TEXTAGS is prepended to this. */
5072 static const char *TEX_defenv = "\
5073 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5074 :part:appendix:entry:index:def\
5075 :newcommand:renewcommand:newenvironment:renewenvironment";
5077 static void TEX_decode_env (const char *, const char *);
5080 * TeX/LaTeX scanning loop.
5082 static void
5083 TeX_commands (FILE *inf)
5085 char *cp;
5086 linebuffer *key;
5088 char TEX_esc = '\0';
5089 char TEX_opgrp, TEX_clgrp;
5091 /* Initialize token table once from environment. */
5092 if (TEX_toktab == NULL)
5093 TEX_decode_env ("TEXTAGS", TEX_defenv);
5095 LOOP_ON_INPUT_LINES (inf, lb, cp)
5097 /* Look at each TEX keyword in line. */
5098 for (;;)
5100 /* Look for a TEX escape. */
5101 while (true)
5103 char c = *cp++;
5104 if (c == '\0' || c == '%')
5105 goto tex_next_line;
5107 /* Select either \ or ! as escape character, whichever comes
5108 first outside a comment. */
5109 if (!TEX_esc)
5110 switch (c)
5112 case '\\':
5113 TEX_esc = c;
5114 TEX_opgrp = '{';
5115 TEX_clgrp = '}';
5116 break;
5118 case '!':
5119 TEX_esc = c;
5120 TEX_opgrp = '<';
5121 TEX_clgrp = '>';
5122 break;
5125 if (c == TEX_esc)
5126 break;
5129 for (key = TEX_toktab; key->buffer != NULL; key++)
5130 if (strneq (cp, key->buffer, key->len))
5132 char *p;
5133 int namelen, linelen;
5134 bool opgrp = false;
5136 cp = skip_spaces (cp + key->len);
5137 if (*cp == TEX_opgrp)
5139 opgrp = true;
5140 cp++;
5142 for (p = cp;
5143 (!c_isspace (*p) && *p != '#' &&
5144 *p != TEX_opgrp && *p != TEX_clgrp);
5145 p++)
5146 continue;
5147 namelen = p - cp;
5148 linelen = lb.len;
5149 if (!opgrp || *p == TEX_clgrp)
5151 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5152 p++;
5153 linelen = p - lb.buffer + 1;
5155 make_tag (cp, namelen, true,
5156 lb.buffer, linelen, lineno, linecharno);
5157 goto tex_next_line; /* We only tag a line once */
5160 tex_next_line:
5165 /* Read environment and prepend it to the default string.
5166 Build token table. */
5167 static void
5168 TEX_decode_env (const char *evarname, const char *defenv)
5170 register const char *env, *p;
5171 int i, len;
5173 /* Append default string to environment. */
5174 env = getenv (evarname);
5175 if (!env)
5176 env = defenv;
5177 else
5178 env = concat (env, defenv, "");
5180 /* Allocate a token table */
5181 for (len = 1, p = env; (p = strchr (p, ':')); )
5182 if (*++p)
5183 len++;
5184 TEX_toktab = xnew (len, linebuffer);
5186 /* Unpack environment string into token table. Be careful about */
5187 /* zero-length strings (leading ':', "::" and trailing ':') */
5188 for (i = 0; *env != '\0';)
5190 p = strchr (env, ':');
5191 if (!p) /* End of environment string. */
5192 p = env + strlen (env);
5193 if (p - env > 0)
5194 { /* Only non-zero strings. */
5195 TEX_toktab[i].buffer = savenstr (env, p - env);
5196 TEX_toktab[i].len = p - env;
5197 i++;
5199 if (*p)
5200 env = p + 1;
5201 else
5203 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5204 TEX_toktab[i].len = 0;
5205 break;
5211 /* Texinfo support. Dave Love, Mar. 2000. */
5212 static void
5213 Texinfo_nodes (FILE *inf)
5215 char *cp, *start;
5216 LOOP_ON_INPUT_LINES (inf, lb, cp)
5217 if (LOOKING_AT (cp, "@node"))
5219 start = cp;
5220 while (*cp != '\0' && *cp != ',')
5221 cp++;
5222 make_tag (start, cp - start, true,
5223 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5229 * HTML support.
5230 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5231 * Contents of <a name=xxx> are tags with name xxx.
5233 * Francesco Potortì, 2002.
5235 static void
5236 HTML_labels (FILE *inf)
5238 bool getnext = false; /* next text outside of HTML tags is a tag */
5239 bool skiptag = false; /* skip to the end of the current HTML tag */
5240 bool intag = false; /* inside an html tag, looking for ID= */
5241 bool inanchor = false; /* when INTAG, is an anchor, look for NAME= */
5242 char *end;
5245 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5247 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5248 for (;;) /* loop on the same line */
5250 if (skiptag) /* skip HTML tag */
5252 while (*dbp != '\0' && *dbp != '>')
5253 dbp++;
5254 if (*dbp == '>')
5256 dbp += 1;
5257 skiptag = false;
5258 continue; /* look on the same line */
5260 break; /* go to next line */
5263 else if (intag) /* look for "name=" or "id=" */
5265 while (*dbp != '\0' && *dbp != '>'
5266 && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5267 dbp++;
5268 if (*dbp == '\0')
5269 break; /* go to next line */
5270 if (*dbp == '>')
5272 dbp += 1;
5273 intag = false;
5274 continue; /* look on the same line */
5276 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5277 || LOOKING_AT_NOCASE (dbp, "id="))
5279 bool quoted = (dbp[0] == '"');
5281 if (quoted)
5282 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5283 continue;
5284 else
5285 for (end = dbp; *end != '\0' && intoken (*end); end++)
5286 continue;
5287 linebuffer_setlen (&token_name, end - dbp);
5288 memcpy (token_name.buffer, dbp, end - dbp);
5289 token_name.buffer[end - dbp] = '\0';
5291 dbp = end;
5292 intag = false; /* we found what we looked for */
5293 skiptag = true; /* skip to the end of the tag */
5294 getnext = true; /* then grab the text */
5295 continue; /* look on the same line */
5297 dbp += 1;
5300 else if (getnext) /* grab next tokens and tag them */
5302 dbp = skip_spaces (dbp);
5303 if (*dbp == '\0')
5304 break; /* go to next line */
5305 if (*dbp == '<')
5307 intag = true;
5308 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5309 continue; /* look on the same line */
5312 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5313 continue;
5314 make_tag (token_name.buffer, token_name.len, true,
5315 dbp, end - dbp, lineno, linecharno);
5316 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5317 getnext = false;
5318 break; /* go to next line */
5321 else /* look for an interesting HTML tag */
5323 while (*dbp != '\0' && *dbp != '<')
5324 dbp++;
5325 if (*dbp == '\0')
5326 break; /* go to next line */
5327 intag = true;
5328 if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5330 inanchor = true;
5331 continue; /* look on the same line */
5333 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5334 || LOOKING_AT_NOCASE (dbp, "<h1>")
5335 || LOOKING_AT_NOCASE (dbp, "<h2>")
5336 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5338 intag = false;
5339 getnext = true;
5340 continue; /* look on the same line */
5342 dbp += 1;
5349 * Prolog support
5351 * Assumes that the predicate or rule starts at column 0.
5352 * Only the first clause of a predicate or rule is added.
5353 * Original code by Sunichirou Sugou (1989)
5354 * Rewritten by Anders Lindgren (1996)
5356 static size_t prolog_pr (char *, char *);
5357 static void prolog_skip_comment (linebuffer *, FILE *);
5358 static size_t prolog_atom (char *, size_t);
5360 static void
5361 Prolog_functions (FILE *inf)
5363 char *cp, *last;
5364 size_t len;
5365 size_t allocated;
5367 allocated = 0;
5368 len = 0;
5369 last = NULL;
5371 LOOP_ON_INPUT_LINES (inf, lb, cp)
5373 if (cp[0] == '\0') /* Empty line */
5374 continue;
5375 else if (c_isspace (cp[0])) /* Not a predicate */
5376 continue;
5377 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5378 prolog_skip_comment (&lb, inf);
5379 else if ((len = prolog_pr (cp, last)) > 0)
5381 /* Predicate or rule. Store the function name so that we
5382 only generate a tag for the first clause. */
5383 if (last == NULL)
5384 last = xnew (len + 1, char);
5385 else if (len + 1 > allocated)
5386 xrnew (last, len + 1, char);
5387 allocated = len + 1;
5388 memcpy (last, cp, len);
5389 last[len] = '\0';
5392 free (last);
5396 static void
5397 prolog_skip_comment (linebuffer *plb, FILE *inf)
5399 char *cp;
5403 for (cp = plb->buffer; *cp != '\0'; cp++)
5404 if (cp[0] == '*' && cp[1] == '/')
5405 return;
5406 readline (plb, inf);
5408 while (perhaps_more_input (inf));
5412 * A predicate or rule definition is added if it matches:
5413 * <beginning of line><Prolog Atom><whitespace>(
5414 * or <beginning of line><Prolog Atom><whitespace>:-
5416 * It is added to the tags database if it doesn't match the
5417 * name of the previous clause header.
5419 * Return the size of the name of the predicate or rule, or 0 if no
5420 * header was found.
5422 static size_t
5423 prolog_pr (char *s, char *last)
5425 /* Name of last clause. */
5427 size_t pos;
5428 size_t len;
5430 pos = prolog_atom (s, 0);
5431 if (! pos)
5432 return 0;
5434 len = pos;
5435 pos = skip_spaces (s + pos) - s;
5437 if ((s[pos] == '.'
5438 || (s[pos] == '(' && (pos += 1))
5439 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5440 && (last == NULL /* save only the first clause */
5441 || len != strlen (last)
5442 || !strneq (s, last, len)))
5444 make_tag (s, len, true, s, pos, lineno, linecharno);
5445 return len;
5447 else
5448 return 0;
5452 * Consume a Prolog atom.
5453 * Return the number of bytes consumed, or 0 if there was an error.
5455 * A prolog atom, in this context, could be one of:
5456 * - An alphanumeric sequence, starting with a lower case letter.
5457 * - A quoted arbitrary string. Single quotes can escape themselves.
5458 * Backslash quotes everything.
5460 static size_t
5461 prolog_atom (char *s, size_t pos)
5463 size_t origpos;
5465 origpos = pos;
5467 if (c_islower (s[pos]) || s[pos] == '_')
5469 /* The atom is unquoted. */
5470 pos++;
5471 while (c_isalnum (s[pos]) || s[pos] == '_')
5473 pos++;
5475 return pos - origpos;
5477 else if (s[pos] == '\'')
5479 pos++;
5481 for (;;)
5483 if (s[pos] == '\'')
5485 pos++;
5486 if (s[pos] != '\'')
5487 break;
5488 pos++; /* A double quote */
5490 else if (s[pos] == '\0')
5491 /* Multiline quoted atoms are ignored. */
5492 return 0;
5493 else if (s[pos] == '\\')
5495 if (s[pos+1] == '\0')
5496 return 0;
5497 pos += 2;
5499 else
5500 pos++;
5502 return pos - origpos;
5504 else
5505 return 0;
5510 * Support for Erlang
5512 * Generates tags for functions, defines, and records.
5513 * Assumes that Erlang functions start at column 0.
5514 * Original code by Anders Lindgren (1996)
5516 static int erlang_func (char *, char *);
5517 static void erlang_attribute (char *);
5518 static int erlang_atom (char *);
5520 static void
5521 Erlang_functions (FILE *inf)
5523 char *cp, *last;
5524 int len;
5525 int allocated;
5527 allocated = 0;
5528 len = 0;
5529 last = NULL;
5531 LOOP_ON_INPUT_LINES (inf, lb, cp)
5533 if (cp[0] == '\0') /* Empty line */
5534 continue;
5535 else if (c_isspace (cp[0])) /* Not function nor attribute */
5536 continue;
5537 else if (cp[0] == '%') /* comment */
5538 continue;
5539 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5540 continue;
5541 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5543 erlang_attribute (cp);
5544 if (last != NULL)
5546 free (last);
5547 last = NULL;
5550 else if ((len = erlang_func (cp, last)) > 0)
5553 * Function. Store the function name so that we only
5554 * generates a tag for the first clause.
5556 if (last == NULL)
5557 last = xnew (len + 1, char);
5558 else if (len + 1 > allocated)
5559 xrnew (last, len + 1, char);
5560 allocated = len + 1;
5561 memcpy (last, cp, len);
5562 last[len] = '\0';
5565 free (last);
5570 * A function definition is added if it matches:
5571 * <beginning of line><Erlang Atom><whitespace>(
5573 * It is added to the tags database if it doesn't match the
5574 * name of the previous clause header.
5576 * Return the size of the name of the function, or 0 if no function
5577 * was found.
5579 static int
5580 erlang_func (char *s, char *last)
5582 /* Name of last clause. */
5584 int pos;
5585 int len;
5587 pos = erlang_atom (s);
5588 if (pos < 1)
5589 return 0;
5591 len = pos;
5592 pos = skip_spaces (s + pos) - s;
5594 /* Save only the first clause. */
5595 if (s[pos++] == '('
5596 && (last == NULL
5597 || len != (int)strlen (last)
5598 || !strneq (s, last, len)))
5600 make_tag (s, len, true, s, pos, lineno, linecharno);
5601 return len;
5604 return 0;
5609 * Handle attributes. Currently, tags are generated for defines
5610 * and records.
5612 * They are on the form:
5613 * -define(foo, bar).
5614 * -define(Foo(M, N), M+N).
5615 * -record(graph, {vtab = notable, cyclic = true}).
5617 static void
5618 erlang_attribute (char *s)
5620 char *cp = s;
5622 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5623 && *cp++ == '(')
5625 int len = erlang_atom (skip_spaces (cp));
5626 if (len > 0)
5627 make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5629 return;
5634 * Consume an Erlang atom (or variable).
5635 * Return the number of bytes consumed, or -1 if there was an error.
5637 static int
5638 erlang_atom (char *s)
5640 int pos = 0;
5642 if (c_isalpha (s[pos]) || s[pos] == '_')
5644 /* The atom is unquoted. */
5646 pos++;
5647 while (c_isalnum (s[pos]) || s[pos] == '_');
5649 else if (s[pos] == '\'')
5651 for (pos++; s[pos] != '\''; pos++)
5652 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5653 || (s[pos] == '\\' && s[++pos] == '\0'))
5654 return 0;
5655 pos++;
5658 return pos;
5662 static char *scan_separators (char *);
5663 static void add_regex (char *, language *);
5664 static char *substitute (char *, char *, struct re_registers *);
5667 * Take a string like "/blah/" and turn it into "blah", verifying
5668 * that the first and last characters are the same, and handling
5669 * quoted separator characters. Actually, stops on the occurrence of
5670 * an unquoted separator. Also process \t, \n, etc. and turn into
5671 * appropriate characters. Works in place. Null terminates name string.
5672 * Returns pointer to terminating separator, or NULL for
5673 * unterminated regexps.
5675 static char *
5676 scan_separators (char *name)
5678 char sep = name[0];
5679 char *copyto = name;
5680 bool quoted = false;
5682 for (++name; *name != '\0'; ++name)
5684 if (quoted)
5686 switch (*name)
5688 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5689 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5690 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5691 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5692 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5693 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5694 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5695 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5696 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5697 default:
5698 if (*name == sep)
5699 *copyto++ = sep;
5700 else
5702 /* Something else is quoted, so preserve the quote. */
5703 *copyto++ = '\\';
5704 *copyto++ = *name;
5706 break;
5708 quoted = false;
5710 else if (*name == '\\')
5711 quoted = true;
5712 else if (*name == sep)
5713 break;
5714 else
5715 *copyto++ = *name;
5717 if (*name != sep)
5718 name = NULL; /* signal unterminated regexp */
5720 /* Terminate copied string. */
5721 *copyto = '\0';
5722 return name;
5725 /* Look at the argument of --regex or --no-regex and do the right
5726 thing. Same for each line of a regexp file. */
5727 static void
5728 analyze_regex (char *regex_arg)
5730 if (regex_arg == NULL)
5732 free_regexps (); /* --no-regex: remove existing regexps */
5733 return;
5736 /* A real --regexp option or a line in a regexp file. */
5737 switch (regex_arg[0])
5739 /* Comments in regexp file or null arg to --regex. */
5740 case '\0':
5741 case ' ':
5742 case '\t':
5743 break;
5745 /* Read a regex file. This is recursive and may result in a
5746 loop, which will stop when the file descriptors are exhausted. */
5747 case '@':
5749 FILE *regexfp;
5750 linebuffer regexbuf;
5751 char *regexfile = regex_arg + 1;
5753 /* regexfile is a file containing regexps, one per line. */
5754 regexfp = fopen (regexfile, "r" FOPEN_BINARY);
5755 if (regexfp == NULL)
5756 pfatal (regexfile);
5757 linebuffer_init (&regexbuf);
5758 while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
5759 analyze_regex (regexbuf.buffer);
5760 free (regexbuf.buffer);
5761 if (fclose (regexfp) != 0)
5762 pfatal (regexfile);
5764 break;
5766 /* Regexp to be used for a specific language only. */
5767 case '{':
5769 language *lang;
5770 char *lang_name = regex_arg + 1;
5771 char *cp;
5773 for (cp = lang_name; *cp != '}'; cp++)
5774 if (*cp == '\0')
5776 error ("unterminated language name in regex: %s", regex_arg);
5777 return;
5779 *cp++ = '\0';
5780 lang = get_language_from_langname (lang_name);
5781 if (lang == NULL)
5782 return;
5783 add_regex (cp, lang);
5785 break;
5787 /* Regexp to be used for any language. */
5788 default:
5789 add_regex (regex_arg, NULL);
5790 break;
5794 /* Separate the regexp pattern, compile it,
5795 and care for optional name and modifiers. */
5796 static void
5797 add_regex (char *regexp_pattern, language *lang)
5799 static struct re_pattern_buffer zeropattern;
5800 char sep, *pat, *name, *modifiers;
5801 char empty = '\0';
5802 const char *err;
5803 struct re_pattern_buffer *patbuf;
5804 regexp *rp;
5805 bool
5806 force_explicit_name = true, /* do not use implicit tag names */
5807 ignore_case = false, /* case is significant */
5808 multi_line = false, /* matches are done one line at a time */
5809 single_line = false; /* dot does not match newline */
5812 if (strlen (regexp_pattern) < 3)
5814 error ("null regexp");
5815 return;
5817 sep = regexp_pattern[0];
5818 name = scan_separators (regexp_pattern);
5819 if (name == NULL)
5821 error ("%s: unterminated regexp", regexp_pattern);
5822 return;
5824 if (name[1] == sep)
5826 error ("null name for regexp \"%s\"", regexp_pattern);
5827 return;
5829 modifiers = scan_separators (name);
5830 if (modifiers == NULL) /* no terminating separator --> no name */
5832 modifiers = name;
5833 name = &empty;
5835 else
5836 modifiers += 1; /* skip separator */
5838 /* Parse regex modifiers. */
5839 for (; modifiers[0] != '\0'; modifiers++)
5840 switch (modifiers[0])
5842 case 'N':
5843 if (modifiers == name)
5844 error ("forcing explicit tag name but no name, ignoring");
5845 force_explicit_name = true;
5846 break;
5847 case 'i':
5848 ignore_case = true;
5849 break;
5850 case 's':
5851 single_line = true;
5852 /* FALLTHRU */
5853 case 'm':
5854 multi_line = true;
5855 need_filebuf = true;
5856 break;
5857 default:
5858 error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
5859 break;
5862 patbuf = xnew (1, struct re_pattern_buffer);
5863 *patbuf = zeropattern;
5864 if (ignore_case)
5866 static char lc_trans[UCHAR_MAX + 1];
5867 int i;
5868 for (i = 0; i < UCHAR_MAX + 1; i++)
5869 lc_trans[i] = c_tolower (i);
5870 patbuf->translate = lc_trans; /* translation table to fold case */
5873 if (multi_line)
5874 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5875 else
5876 pat = regexp_pattern;
5878 if (single_line)
5879 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5880 else
5881 re_set_syntax (RE_SYNTAX_EMACS);
5883 err = re_compile_pattern (pat, strlen (pat), patbuf);
5884 if (multi_line)
5885 free (pat);
5886 if (err != NULL)
5888 error ("%s while compiling pattern", err);
5889 return;
5892 rp = p_head;
5893 p_head = xnew (1, regexp);
5894 p_head->pattern = savestr (regexp_pattern);
5895 p_head->p_next = rp;
5896 p_head->lang = lang;
5897 p_head->pat = patbuf;
5898 p_head->name = savestr (name);
5899 p_head->error_signaled = false;
5900 p_head->force_explicit_name = force_explicit_name;
5901 p_head->ignore_case = ignore_case;
5902 p_head->multi_line = multi_line;
5906 * Do the substitutions indicated by the regular expression and
5907 * arguments.
5909 static char *
5910 substitute (char *in, char *out, struct re_registers *regs)
5912 char *result, *t;
5913 int size, dig, diglen;
5915 result = NULL;
5916 size = strlen (out);
5918 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5919 if (out[size - 1] == '\\')
5920 fatal ("pattern error in \"%s\"", out);
5921 for (t = strchr (out, '\\');
5922 t != NULL;
5923 t = strchr (t + 2, '\\'))
5924 if (c_isdigit (t[1]))
5926 dig = t[1] - '0';
5927 diglen = regs->end[dig] - regs->start[dig];
5928 size += diglen - 2;
5930 else
5931 size -= 1;
5933 /* Allocate space and do the substitutions. */
5934 assert (size >= 0);
5935 result = xnew (size + 1, char);
5937 for (t = result; *out != '\0'; out++)
5938 if (*out == '\\' && c_isdigit (*++out))
5940 dig = *out - '0';
5941 diglen = regs->end[dig] - regs->start[dig];
5942 memcpy (t, in + regs->start[dig], diglen);
5943 t += diglen;
5945 else
5946 *t++ = *out;
5947 *t = '\0';
5949 assert (t <= result + size);
5950 assert (t - result == (int)strlen (result));
5952 return result;
5955 /* Deallocate all regexps. */
5956 static void
5957 free_regexps (void)
5959 regexp *rp;
5960 while (p_head != NULL)
5962 rp = p_head->p_next;
5963 free (p_head->pattern);
5964 free (p_head->name);
5965 free (p_head);
5966 p_head = rp;
5968 return;
5972 * Reads the whole file as a single string from `filebuf' and looks for
5973 * multi-line regular expressions, creating tags on matches.
5974 * readline already dealt with normal regexps.
5976 * Idea by Ben Wing <ben@666.com> (2002).
5978 static void
5979 regex_tag_multiline (void)
5981 char *buffer = filebuf.buffer;
5982 regexp *rp;
5983 char *name;
5985 for (rp = p_head; rp != NULL; rp = rp->p_next)
5987 int match = 0;
5989 if (!rp->multi_line)
5990 continue; /* skip normal regexps */
5992 /* Generic initializations before parsing file from memory. */
5993 lineno = 1; /* reset global line number */
5994 charno = 0; /* reset global char number */
5995 linecharno = 0; /* reset global char number of line start */
5997 /* Only use generic regexps or those for the current language. */
5998 if (rp->lang != NULL && rp->lang != curfdp->lang)
5999 continue;
6001 while (match >= 0 && match < filebuf.len)
6003 match = re_search (rp->pat, buffer, filebuf.len, charno,
6004 filebuf.len - match, &rp->regs);
6005 switch (match)
6007 case -2:
6008 /* Some error. */
6009 if (!rp->error_signaled)
6011 error ("regexp stack overflow while matching \"%s\"",
6012 rp->pattern);
6013 rp->error_signaled = true;
6015 break;
6016 case -1:
6017 /* No match. */
6018 break;
6019 default:
6020 if (match == rp->regs.end[0])
6022 if (!rp->error_signaled)
6024 error ("regexp matches the empty string: \"%s\"",
6025 rp->pattern);
6026 rp->error_signaled = true;
6028 match = -3; /* exit from while loop */
6029 break;
6032 /* Match occurred. Construct a tag. */
6033 while (charno < rp->regs.end[0])
6034 if (buffer[charno++] == '\n')
6035 lineno++, linecharno = charno;
6036 name = rp->name;
6037 if (name[0] == '\0')
6038 name = NULL;
6039 else /* make a named tag */
6040 name = substitute (buffer, rp->name, &rp->regs);
6041 if (rp->force_explicit_name)
6042 /* Force explicit tag name, if a name is there. */
6043 pfnote (name, true, buffer + linecharno,
6044 charno - linecharno + 1, lineno, linecharno);
6045 else
6046 make_tag (name, strlen (name), true, buffer + linecharno,
6047 charno - linecharno + 1, lineno, linecharno);
6048 break;
6055 static bool
6056 nocase_tail (const char *cp)
6058 int len = 0;
6060 while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6061 cp++, len++;
6062 if (*cp == '\0' && !intoken (dbp[len]))
6064 dbp += len;
6065 return true;
6067 return false;
6070 static void
6071 get_tag (register char *bp, char **namepp)
6073 register char *cp = bp;
6075 if (*bp != '\0')
6077 /* Go till you get to white space or a syntactic break */
6078 for (cp = bp + 1; !notinname (*cp); cp++)
6079 continue;
6080 make_tag (bp, cp - bp, true,
6081 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6084 if (namepp != NULL)
6085 *namepp = savenstr (bp, cp - bp);
6089 * Read a line of text from `stream' into `lbp', excluding the
6090 * newline or CR-NL, if any. Return the number of characters read from
6091 * `stream', which is the length of the line including the newline.
6093 * On DOS or Windows we do not count the CR character, if any before the
6094 * NL, in the returned length; this mirrors the behavior of Emacs on those
6095 * platforms (for text files, it translates CR-NL to NL as it reads in the
6096 * file).
6098 * If multi-line regular expressions are requested, each line read is
6099 * appended to `filebuf'.
6101 static long
6102 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6104 char *buffer = lbp->buffer;
6105 char *p = lbp->buffer;
6106 char *pend;
6107 int chars_deleted;
6109 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6111 for (;;)
6113 register int c = getc (stream);
6114 if (p == pend)
6116 /* We're at the end of linebuffer: expand it. */
6117 lbp->size *= 2;
6118 xrnew (buffer, lbp->size, char);
6119 p += buffer - lbp->buffer;
6120 pend = buffer + lbp->size;
6121 lbp->buffer = buffer;
6123 if (c == EOF)
6125 if (ferror (stream))
6126 perror (filename);
6127 *p = '\0';
6128 chars_deleted = 0;
6129 break;
6131 if (c == '\n')
6133 if (p > buffer && p[-1] == '\r')
6135 p -= 1;
6136 chars_deleted = 2;
6138 else
6140 chars_deleted = 1;
6142 *p = '\0';
6143 break;
6145 *p++ = c;
6147 lbp->len = p - buffer;
6149 if (need_filebuf /* we need filebuf for multi-line regexps */
6150 && chars_deleted > 0) /* not at EOF */
6152 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6154 /* Expand filebuf. */
6155 filebuf.size *= 2;
6156 xrnew (filebuf.buffer, filebuf.size, char);
6158 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6159 filebuf.len += lbp->len;
6160 filebuf.buffer[filebuf.len++] = '\n';
6161 filebuf.buffer[filebuf.len] = '\0';
6164 return lbp->len + chars_deleted;
6168 * Like readline_internal, above, but in addition try to match the
6169 * input line against relevant regular expressions and manage #line
6170 * directives.
6172 static void
6173 readline (linebuffer *lbp, FILE *stream)
6175 long result;
6177 linecharno = charno; /* update global char number of line start */
6178 result = readline_internal (lbp, stream, infilename); /* read line */
6179 lineno += 1; /* increment global line number */
6180 charno += result; /* increment global char number */
6182 /* Honor #line directives. */
6183 if (!no_line_directive)
6185 static bool discard_until_line_directive;
6187 /* Check whether this is a #line directive. */
6188 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6190 unsigned int lno;
6191 int start = 0;
6193 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6194 && start > 0) /* double quote character found */
6196 char *endp = lbp->buffer + start;
6198 while ((endp = strchr (endp, '"')) != NULL
6199 && endp[-1] == '\\')
6200 endp++;
6201 if (endp != NULL)
6202 /* Ok, this is a real #line directive. Let's deal with it. */
6204 char *taggedabsname; /* absolute name of original file */
6205 char *taggedfname; /* name of original file as given */
6206 char *name; /* temp var */
6208 discard_until_line_directive = false; /* found it */
6209 name = lbp->buffer + start;
6210 *endp = '\0';
6211 canonicalize_filename (name);
6212 taggedabsname = absolute_filename (name, tagfiledir);
6213 if (filename_is_absolute (name)
6214 || filename_is_absolute (curfdp->infname))
6215 taggedfname = savestr (taggedabsname);
6216 else
6217 taggedfname = relative_filename (taggedabsname,tagfiledir);
6219 if (streq (curfdp->taggedfname, taggedfname))
6220 /* The #line directive is only a line number change. We
6221 deal with this afterwards. */
6222 free (taggedfname);
6223 else
6224 /* The tags following this #line directive should be
6225 attributed to taggedfname. In order to do this, set
6226 curfdp accordingly. */
6228 fdesc *fdp; /* file description pointer */
6230 /* Go look for a file description already set up for the
6231 file indicated in the #line directive. If there is
6232 one, use it from now until the next #line
6233 directive. */
6234 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6235 if (streq (fdp->infname, curfdp->infname)
6236 && streq (fdp->taggedfname, taggedfname))
6237 /* If we remove the second test above (after the &&)
6238 then all entries pertaining to the same file are
6239 coalesced in the tags file. If we use it, then
6240 entries pertaining to the same file but generated
6241 from different files (via #line directives) will
6242 go into separate sections in the tags file. These
6243 alternatives look equivalent. The first one
6244 destroys some apparently useless information. */
6246 curfdp = fdp;
6247 free (taggedfname);
6248 break;
6250 /* Else, if we already tagged the real file, skip all
6251 input lines until the next #line directive. */
6252 if (fdp == NULL) /* not found */
6253 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6254 if (streq (fdp->infabsname, taggedabsname))
6256 discard_until_line_directive = true;
6257 free (taggedfname);
6258 break;
6260 /* Else create a new file description and use that from
6261 now on, until the next #line directive. */
6262 if (fdp == NULL) /* not found */
6264 fdp = fdhead;
6265 fdhead = xnew (1, fdesc);
6266 *fdhead = *curfdp; /* copy curr. file description */
6267 fdhead->next = fdp;
6268 fdhead->infname = savestr (curfdp->infname);
6269 fdhead->infabsname = savestr (curfdp->infabsname);
6270 fdhead->infabsdir = savestr (curfdp->infabsdir);
6271 fdhead->taggedfname = taggedfname;
6272 fdhead->usecharno = false;
6273 fdhead->prop = NULL;
6274 fdhead->written = false;
6275 curfdp = fdhead;
6278 free (taggedabsname);
6279 lineno = lno - 1;
6280 readline (lbp, stream);
6281 return;
6282 } /* if a real #line directive */
6283 } /* if #line is followed by a number */
6284 } /* if line begins with "#line " */
6286 /* If we are here, no #line directive was found. */
6287 if (discard_until_line_directive)
6289 if (result > 0)
6291 /* Do a tail recursion on ourselves, thus discarding the contents
6292 of the line buffer. */
6293 readline (lbp, stream);
6294 return;
6296 /* End of file. */
6297 discard_until_line_directive = false;
6298 return;
6300 } /* if #line directives should be considered */
6303 int match;
6304 regexp *rp;
6305 char *name;
6307 /* Match against relevant regexps. */
6308 if (lbp->len > 0)
6309 for (rp = p_head; rp != NULL; rp = rp->p_next)
6311 /* Only use generic regexps or those for the current language.
6312 Also do not use multiline regexps, which is the job of
6313 regex_tag_multiline. */
6314 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6315 || rp->multi_line)
6316 continue;
6318 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6319 switch (match)
6321 case -2:
6322 /* Some error. */
6323 if (!rp->error_signaled)
6325 error ("regexp stack overflow while matching \"%s\"",
6326 rp->pattern);
6327 rp->error_signaled = true;
6329 break;
6330 case -1:
6331 /* No match. */
6332 break;
6333 case 0:
6334 /* Empty string matched. */
6335 if (!rp->error_signaled)
6337 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6338 rp->error_signaled = true;
6340 break;
6341 default:
6342 /* Match occurred. Construct a tag. */
6343 name = rp->name;
6344 if (name[0] == '\0')
6345 name = NULL;
6346 else /* make a named tag */
6347 name = substitute (lbp->buffer, rp->name, &rp->regs);
6348 if (rp->force_explicit_name)
6349 /* Force explicit tag name, if a name is there. */
6350 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6351 else
6352 make_tag (name, strlen (name), true,
6353 lbp->buffer, match, lineno, linecharno);
6354 break;
6362 * Return a pointer to a space of size strlen(cp)+1 allocated
6363 * with xnew where the string CP has been copied.
6365 static char *
6366 savestr (const char *cp)
6368 return savenstr (cp, strlen (cp));
6372 * Return a pointer to a space of size LEN+1 allocated with xnew where
6373 * the string CP has been copied for at most the first LEN characters.
6375 static char *
6376 savenstr (const char *cp, int len)
6378 char *dp = xnew (len + 1, char);
6379 dp[len] = '\0';
6380 return memcpy (dp, cp, len);
6383 /* Skip spaces (end of string is not space), return new pointer. */
6384 static char *
6385 skip_spaces (char *cp)
6387 while (c_isspace (*cp))
6388 cp++;
6389 return cp;
6392 /* Skip non spaces, except end of string, return new pointer. */
6393 static char *
6394 skip_non_spaces (char *cp)
6396 while (*cp != '\0' && !c_isspace (*cp))
6397 cp++;
6398 return cp;
6401 /* Skip any chars in the "name" class.*/
6402 static char *
6403 skip_name (char *cp)
6405 /* '\0' is a notinname() so loop stops there too */
6406 while (! notinname (*cp))
6407 cp++;
6408 return cp;
6411 /* Print error message and exit. */
6412 static void
6413 fatal (char const *format, ...)
6415 va_list ap;
6416 va_start (ap, format);
6417 verror (format, ap);
6418 va_end (ap);
6419 exit (EXIT_FAILURE);
6422 static void
6423 pfatal (const char *s1)
6425 perror (s1);
6426 exit (EXIT_FAILURE);
6429 static void
6430 suggest_asking_for_help (void)
6432 fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
6433 progname);
6434 exit (EXIT_FAILURE);
6437 /* Output a diagnostic with printf-style FORMAT and args. */
6438 static void
6439 error (const char *format, ...)
6441 va_list ap;
6442 va_start (ap, format);
6443 verror (format, ap);
6444 va_end (ap);
6447 static void
6448 verror (char const *format, va_list ap)
6450 fprintf (stderr, "%s: ", progname);
6451 vfprintf (stderr, format, ap);
6452 fprintf (stderr, "\n");
6455 /* Return a newly-allocated string whose contents
6456 concatenate those of s1, s2, s3. */
6457 static char *
6458 concat (const char *s1, const char *s2, const char *s3)
6460 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6461 char *result = xnew (len1 + len2 + len3 + 1, char);
6463 strcpy (result, s1);
6464 strcpy (result + len1, s2);
6465 strcpy (result + len1 + len2, s3);
6467 return result;
6471 /* Does the same work as the system V getcwd, but does not need to
6472 guess the buffer size in advance. */
6473 static char *
6474 etags_getcwd (void)
6476 int bufsize = 200;
6477 char *path = xnew (bufsize, char);
6479 while (getcwd (path, bufsize) == NULL)
6481 if (errno != ERANGE)
6482 pfatal ("getcwd");
6483 bufsize *= 2;
6484 free (path);
6485 path = xnew (bufsize, char);
6488 canonicalize_filename (path);
6489 return path;
6492 /* Return a newly allocated string containing a name of a temporary file. */
6493 static char *
6494 etags_mktmp (void)
6496 const char *tmpdir = getenv ("TMPDIR");
6497 const char *slash = "/";
6499 #if MSDOS || defined (DOS_NT)
6500 if (!tmpdir)
6501 tmpdir = getenv ("TEMP");
6502 if (!tmpdir)
6503 tmpdir = getenv ("TMP");
6504 if (!tmpdir)
6505 tmpdir = ".";
6506 if (tmpdir[strlen (tmpdir) - 1] == '/'
6507 || tmpdir[strlen (tmpdir) - 1] == '\\')
6508 slash = "";
6509 #else
6510 if (!tmpdir)
6511 tmpdir = "/tmp";
6512 if (tmpdir[strlen (tmpdir) - 1] == '/')
6513 slash = "";
6514 #endif
6516 char *templt = concat (tmpdir, slash, "etXXXXXX");
6517 int fd = mkostemp (templt, O_CLOEXEC);
6518 if (fd < 0 || close (fd) != 0)
6520 int temp_errno = errno;
6521 free (templt);
6522 errno = temp_errno;
6523 templt = NULL;
6526 #if defined (DOS_NT)
6527 /* The file name will be used in shell redirection, so it needs to have
6528 DOS-style backslashes, or else the Windows shell will barf. */
6529 char *p;
6530 for (p = templt; *p; p++)
6531 if (*p == '/')
6532 *p = '\\';
6533 #endif
6535 return templt;
6538 /* Return a newly allocated string containing the file name of FILE
6539 relative to the absolute directory DIR (which should end with a slash). */
6540 static char *
6541 relative_filename (char *file, char *dir)
6543 char *fp, *dp, *afn, *res;
6544 int i;
6546 /* Find the common root of file and dir (with a trailing slash). */
6547 afn = absolute_filename (file, cwd);
6548 fp = afn;
6549 dp = dir;
6550 while (*fp++ == *dp++)
6551 continue;
6552 fp--, dp--; /* back to the first differing char */
6553 #ifdef DOS_NT
6554 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6555 return afn;
6556 #endif
6557 do /* look at the equal chars until '/' */
6558 fp--, dp--;
6559 while (*fp != '/');
6561 /* Build a sequence of "../" strings for the resulting relative file name. */
6562 i = 0;
6563 while ((dp = strchr (dp + 1, '/')) != NULL)
6564 i += 1;
6565 res = xnew (3*i + strlen (fp + 1) + 1, char);
6566 char *z = res;
6567 while (i-- > 0)
6568 z = stpcpy (z, "../");
6570 /* Add the file name relative to the common root of file and dir. */
6571 strcpy (z, fp + 1);
6572 free (afn);
6574 return res;
6577 /* Return a newly allocated string containing the absolute file name
6578 of FILE given DIR (which should end with a slash). */
6579 static char *
6580 absolute_filename (char *file, char *dir)
6582 char *slashp, *cp, *res;
6584 if (filename_is_absolute (file))
6585 res = savestr (file);
6586 #ifdef DOS_NT
6587 /* We don't support non-absolute file names with a drive
6588 letter, like `d:NAME' (it's too much hassle). */
6589 else if (file[1] == ':')
6590 fatal ("%s: relative file names with drive letters not supported", file);
6591 #endif
6592 else
6593 res = concat (dir, file, "");
6595 /* Delete the "/dirname/.." and "/." substrings. */
6596 slashp = strchr (res, '/');
6597 while (slashp != NULL && slashp[0] != '\0')
6599 if (slashp[1] == '.')
6601 if (slashp[2] == '.'
6602 && (slashp[3] == '/' || slashp[3] == '\0'))
6604 cp = slashp;
6606 cp--;
6607 while (cp >= res && !filename_is_absolute (cp));
6608 if (cp < res)
6609 cp = slashp; /* the absolute name begins with "/.." */
6610 #ifdef DOS_NT
6611 /* Under MSDOS and NT we get `d:/NAME' as absolute
6612 file name, so the luser could say `d:/../NAME'.
6613 We silently treat this as `d:/NAME'. */
6614 else if (cp[0] != '/')
6615 cp = slashp;
6616 #endif
6617 memmove (cp, slashp + 3, strlen (slashp + 2));
6618 slashp = cp;
6619 continue;
6621 else if (slashp[2] == '/' || slashp[2] == '\0')
6623 memmove (slashp, slashp + 2, strlen (slashp + 1));
6624 continue;
6628 slashp = strchr (slashp + 1, '/');
6631 if (res[0] == '\0') /* just a safety net: should never happen */
6633 free (res);
6634 return savestr ("/");
6636 else
6637 return res;
6640 /* Return a newly allocated string containing the absolute
6641 file name of dir where FILE resides given DIR (which should
6642 end with a slash). */
6643 static char *
6644 absolute_dirname (char *file, char *dir)
6646 char *slashp, *res;
6647 char save;
6649 slashp = strrchr (file, '/');
6650 if (slashp == NULL)
6651 return savestr (dir);
6652 save = slashp[1];
6653 slashp[1] = '\0';
6654 res = absolute_filename (file, dir);
6655 slashp[1] = save;
6657 return res;
6660 /* Whether the argument string is an absolute file name. The argument
6661 string must have been canonicalized with canonicalize_filename. */
6662 static bool
6663 filename_is_absolute (char *fn)
6665 return (fn[0] == '/'
6666 #ifdef DOS_NT
6667 || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
6668 #endif
6672 /* Downcase DOS drive letter and collapse separators into single slashes.
6673 Works in place. */
6674 static void
6675 canonicalize_filename (register char *fn)
6677 register char* cp;
6679 #ifdef DOS_NT
6680 /* Canonicalize drive letter case. */
6681 if (c_isupper (fn[0]) && fn[1] == ':')
6682 fn[0] = c_tolower (fn[0]);
6684 /* Collapse multiple forward- and back-slashes into a single forward
6685 slash. */
6686 for (cp = fn; *cp != '\0'; cp++, fn++)
6687 if (*cp == '/' || *cp == '\\')
6689 *fn = '/';
6690 while (cp[1] == '/' || cp[1] == '\\')
6691 cp++;
6693 else
6694 *fn = *cp;
6696 #else /* !DOS_NT */
6698 /* Collapse multiple slashes into a single slash. */
6699 for (cp = fn; *cp != '\0'; cp++, fn++)
6700 if (*cp == '/')
6702 *fn = '/';
6703 while (cp[1] == '/')
6704 cp++;
6706 else
6707 *fn = *cp;
6709 #endif /* !DOS_NT */
6711 *fn = '\0';
6715 /* Initialize a linebuffer for use. */
6716 static void
6717 linebuffer_init (linebuffer *lbp)
6719 lbp->size = (DEBUG) ? 3 : 200;
6720 lbp->buffer = xnew (lbp->size, char);
6721 lbp->buffer[0] = '\0';
6722 lbp->len = 0;
6725 /* Set the minimum size of a string contained in a linebuffer. */
6726 static void
6727 linebuffer_setlen (linebuffer *lbp, int toksize)
6729 while (lbp->size <= toksize)
6731 lbp->size *= 2;
6732 xrnew (lbp->buffer, lbp->size, char);
6734 lbp->len = toksize;
6737 /* Like malloc but get fatal error if memory is exhausted. */
6738 static void *
6739 xmalloc (size_t size)
6741 void *result = malloc (size);
6742 if (result == NULL)
6743 fatal ("virtual memory exhausted");
6744 return result;
6747 static void *
6748 xrealloc (void *ptr, size_t size)
6750 void *result = realloc (ptr, size);
6751 if (result == NULL)
6752 fatal ("virtual memory exhausted");
6753 return result;
6757 * Local Variables:
6758 * indent-tabs-mode: t
6759 * tab-width: 8
6760 * fill-column: 79
6761 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6762 * c-file-style: "gnu"
6763 * End:
6766 /* etags.c ends here */