; Auto-commit of ChangeLog files.
[emacs.git] / lib-src / etags.c
blob8b7f53c808bb61d61ed10b5db7472fc7961ec0fb
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2015 Free Software
32 Foundation, Inc.
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
72 starting in 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
83 #ifdef DEBUG
84 # undef DEBUG
85 # define DEBUG true
86 #else
87 # define DEBUG false
88 # define NDEBUG /* disable assert */
89 #endif
91 #include <config.h>
93 #ifndef _GNU_SOURCE
94 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
95 #endif
97 /* WIN32_NATIVE is for XEmacs.
98 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
99 #ifdef WIN32_NATIVE
100 # undef MSDOS
101 # undef WINDOWSNT
102 # define WINDOWSNT
103 #endif /* WIN32_NATIVE */
105 #ifdef MSDOS
106 # undef MSDOS
107 # define MSDOS true
108 # include <sys/param.h>
109 #else
110 # define MSDOS false
111 #endif /* MSDOS */
113 #ifdef WINDOWSNT
114 # include <direct.h>
115 # define MAXPATHLEN _MAX_PATH
116 # undef HAVE_NTGUI
117 # undef DOS_NT
118 # define DOS_NT
119 # define O_CLOEXEC O_NOINHERIT
120 #endif /* WINDOWSNT */
122 #include <limits.h>
123 #include <unistd.h>
124 #include <stdarg.h>
125 #include <stdlib.h>
126 #include <string.h>
127 #include <sysstdio.h>
128 #include <errno.h>
129 #include <fcntl.h>
130 #include <binary-io.h>
131 #include <c-ctype.h>
132 #include <c-strcase.h>
134 #include <assert.h>
135 #ifdef NDEBUG
136 # undef assert /* some systems have a buggy assert.h */
137 # define assert(x) ((void) 0)
138 #endif
140 #include <getopt.h>
141 #include <regex.h>
143 /* Define CTAGS to make the program "ctags" compatible with the usual one.
144 Leave it undefined to make the program "etags", which makes emacs-style
145 tag tables and tags typedefs, #defines and struct/union/enum by default. */
146 #ifdef CTAGS
147 # undef CTAGS
148 # define CTAGS true
149 #else
150 # define CTAGS false
151 #endif
153 #define streq(s,t) (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
154 #define strcaseeq(s,t) (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
155 #define strneq(s,t,n) (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
156 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
158 /* C is not in a name. */
159 static bool
160 notinname (unsigned char c)
162 /* Look at make_tag before modifying! */
163 static bool const table[UCHAR_MAX + 1] = {
164 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
165 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
167 return table[c];
170 /* C can start a token. */
171 static bool
172 begtoken (unsigned char c)
174 static bool const table[UCHAR_MAX + 1] = {
175 ['$']=1, ['@']=1,
176 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
177 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
178 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
179 ['Y']=1, ['Z']=1,
180 ['_']=1,
181 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
182 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
183 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
184 ['y']=1, ['z']=1,
185 ['~']=1
187 return table[c];
190 /* C can be in the middle of a token. */
191 static bool
192 intoken (unsigned char c)
194 static bool const table[UCHAR_MAX + 1] = {
195 ['$']=1,
196 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
197 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
198 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
199 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
200 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
201 ['Y']=1, ['Z']=1,
202 ['_']=1,
203 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
204 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
205 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
206 ['y']=1, ['z']=1
208 return table[c];
211 /* C can end a token. */
212 static bool
213 endtoken (unsigned char c)
215 static bool const table[UCHAR_MAX + 1] = {
216 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
217 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
218 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
219 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
220 ['{']=1, ['|']=1, ['}']=1, ['~']=1
222 return table[c];
226 * xnew, xrnew -- allocate, reallocate storage
228 * SYNOPSIS: Type *xnew (int n, Type);
229 * void xrnew (OldPointer, int n, Type);
231 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
232 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
234 typedef void Lang_function (FILE *);
236 typedef struct
238 const char *suffix; /* file name suffix for this compressor */
239 const char *command; /* takes one arg and decompresses to stdout */
240 } compressor;
242 typedef struct
244 const char *name; /* language name */
245 const char *help; /* detailed help for the language */
246 Lang_function *function; /* parse function */
247 const char **suffixes; /* name suffixes of this language's files */
248 const char **filenames; /* names of this language's files */
249 const char **interpreters; /* interpreters for this language */
250 bool metasource; /* source used to generate other sources */
251 } language;
253 typedef struct fdesc
255 struct fdesc *next; /* for the linked list */
256 char *infname; /* uncompressed input file name */
257 char *infabsname; /* absolute uncompressed input file name */
258 char *infabsdir; /* absolute dir of input file */
259 char *taggedfname; /* file name to write in tagfile */
260 language *lang; /* language of file */
261 char *prop; /* file properties to write in tagfile */
262 bool usecharno; /* etags tags shall contain char number */
263 bool written; /* entry written in the tags file */
264 } fdesc;
266 typedef struct node_st
267 { /* sorting structure */
268 struct node_st *left, *right; /* left and right sons */
269 fdesc *fdp; /* description of file to whom tag belongs */
270 char *name; /* tag name */
271 char *regex; /* search regexp */
272 bool valid; /* write this tag on the tag file */
273 bool is_func; /* function tag: use regexp in CTAGS mode */
274 bool been_warned; /* warning already given for duplicated tag */
275 int lno; /* line number tag is on */
276 long cno; /* character number line starts on */
277 } node;
280 * A `linebuffer' is a structure which holds a line of text.
281 * `readline_internal' reads a line from a stream into a linebuffer
282 * and works regardless of the length of the line.
283 * SIZE is the size of BUFFER, LEN is the length of the string in
284 * BUFFER after readline reads it.
286 typedef struct
288 long size;
289 int len;
290 char *buffer;
291 } linebuffer;
293 /* Used to support mixing of --lang and file names. */
294 typedef struct
296 enum {
297 at_language, /* a language specification */
298 at_regexp, /* a regular expression */
299 at_filename, /* a file name */
300 at_stdin, /* read from stdin here */
301 at_end /* stop parsing the list */
302 } arg_type; /* argument type */
303 language *lang; /* language associated with the argument */
304 char *what; /* the argument itself */
305 } argument;
307 /* Structure defining a regular expression. */
308 typedef struct regexp
310 struct regexp *p_next; /* pointer to next in list */
311 language *lang; /* if set, use only for this language */
312 char *pattern; /* the regexp pattern */
313 char *name; /* tag name */
314 struct re_pattern_buffer *pat; /* the compiled pattern */
315 struct re_registers regs; /* re registers */
316 bool error_signaled; /* already signaled for this regexp */
317 bool force_explicit_name; /* do not allow implicit tag name */
318 bool ignore_case; /* ignore case when matching */
319 bool multi_line; /* do a multi-line match on the whole file */
320 } regexp;
323 /* Many compilers barf on this:
324 Lang_function Ada_funcs;
325 so let's write it this way */
326 static void Ada_funcs (FILE *);
327 static void Asm_labels (FILE *);
328 static void C_entries (int c_ext, FILE *);
329 static void default_C_entries (FILE *);
330 static void plain_C_entries (FILE *);
331 static void Cjava_entries (FILE *);
332 static void Cobol_paragraphs (FILE *);
333 static void Cplusplus_entries (FILE *);
334 static void Cstar_entries (FILE *);
335 static void Erlang_functions (FILE *);
336 static void Forth_words (FILE *);
337 static void Fortran_functions (FILE *);
338 static void HTML_labels (FILE *);
339 static void Lisp_functions (FILE *);
340 static void Lua_functions (FILE *);
341 static void Makefile_targets (FILE *);
342 static void Pascal_functions (FILE *);
343 static void Perl_functions (FILE *);
344 static void PHP_functions (FILE *);
345 static void PS_functions (FILE *);
346 static void Prolog_functions (FILE *);
347 static void Python_functions (FILE *);
348 static void Scheme_functions (FILE *);
349 static void TeX_commands (FILE *);
350 static void Texinfo_nodes (FILE *);
351 static void Yacc_entries (FILE *);
352 static void just_read_file (FILE *);
354 static language *get_language_from_langname (const char *);
355 static void readline (linebuffer *, FILE *);
356 static long readline_internal (linebuffer *, FILE *, char const *);
357 static bool nocase_tail (const char *);
358 static void get_tag (char *, char **);
360 static void analyze_regex (char *);
361 static void free_regexps (void);
362 static void regex_tag_multiline (void);
363 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
364 static _Noreturn void suggest_asking_for_help (void);
365 _Noreturn void fatal (const char *, const char *);
366 static _Noreturn void pfatal (const char *);
367 static void add_node (node *, node **);
369 static void process_file_name (char *, language *);
370 static void process_file (FILE *, char *, language *);
371 static void find_entries (FILE *);
372 static void free_tree (node *);
373 static void free_fdesc (fdesc *);
374 static void pfnote (char *, bool, char *, int, int, long);
375 static void invalidate_nodes (fdesc *, node **);
376 static void put_entries (node *);
378 static char *concat (const char *, const char *, const char *);
379 static char *skip_spaces (char *);
380 static char *skip_non_spaces (char *);
381 static char *skip_name (char *);
382 static char *savenstr (const char *, int);
383 static char *savestr (const char *);
384 static char *etags_getcwd (void);
385 static char *relative_filename (char *, char *);
386 static char *absolute_filename (char *, char *);
387 static char *absolute_dirname (char *, char *);
388 static bool filename_is_absolute (char *f);
389 static void canonicalize_filename (char *);
390 static char *etags_mktmp (void);
391 static void linebuffer_init (linebuffer *);
392 static void linebuffer_setlen (linebuffer *, int);
393 static void *xmalloc (size_t);
394 static void *xrealloc (void *, size_t);
397 static char searchar = '/'; /* use /.../ searches */
399 static char *tagfile; /* output file */
400 static char *progname; /* name this program was invoked with */
401 static char *cwd; /* current working directory */
402 static char *tagfiledir; /* directory of tagfile */
403 static FILE *tagf; /* ioptr for tags file */
404 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
406 static fdesc *fdhead; /* head of file description list */
407 static fdesc *curfdp; /* current file description */
408 static char *infilename; /* current input file name */
409 static int lineno; /* line number of current line */
410 static long charno; /* current character number */
411 static long linecharno; /* charno of start of current line */
412 static char *dbp; /* pointer to start of current tag */
414 static const int invalidcharno = -1;
416 static node *nodehead; /* the head of the binary tree of tags */
417 static node *last_node; /* the last node created */
419 static linebuffer lb; /* the current line */
420 static linebuffer filebuf; /* a buffer containing the whole file */
421 static linebuffer token_name; /* a buffer containing a tag name */
423 static bool append_to_tagfile; /* -a: append to tags */
424 /* The next five default to true in C and derived languages. */
425 static bool typedefs; /* -t: create tags for C and Ada typedefs */
426 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
427 /* 0 struct/enum/union decls, and C++ */
428 /* member functions. */
429 static bool constantypedefs; /* -d: create tags for C #define, enum */
430 /* constants and variables. */
431 /* -D: opposite of -d. Default under ctags. */
432 static int globals; /* create tags for global variables */
433 static int members; /* create tags for C member variables */
434 static int declarations; /* --declarations: tag them and extern in C&Co*/
435 static int no_line_directive; /* ignore #line directives (undocumented) */
436 static int no_duplicates; /* no duplicate tags for ctags (undocumented) */
437 static bool update; /* -u: update tags */
438 static bool vgrind_style; /* -v: create vgrind style index output */
439 static bool no_warnings; /* -w: suppress warnings (undocumented) */
440 static bool cxref_style; /* -x: create cxref style output */
441 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
442 static bool ignoreindent; /* -I: ignore indentation in C */
443 static int packages_only; /* --packages-only: in Ada, only tag packages*/
444 static int class_qualify; /* -Q: produce class-qualified tags in C++/Java */
446 /* STDIN is defined in LynxOS system headers */
447 #ifdef STDIN
448 # undef STDIN
449 #endif
451 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
452 static bool parsing_stdin; /* --parse-stdin used */
454 static regexp *p_head; /* list of all regexps */
455 static bool need_filebuf; /* some regexes are multi-line */
457 static struct option longopts[] =
459 { "append", no_argument, NULL, 'a' },
460 { "packages-only", no_argument, &packages_only, 1 },
461 { "c++", no_argument, NULL, 'C' },
462 { "declarations", no_argument, &declarations, 1 },
463 { "no-line-directive", no_argument, &no_line_directive, 1 },
464 { "no-duplicates", no_argument, &no_duplicates, 1 },
465 { "help", no_argument, NULL, 'h' },
466 { "help", no_argument, NULL, 'H' },
467 { "ignore-indentation", no_argument, NULL, 'I' },
468 { "language", required_argument, NULL, 'l' },
469 { "members", no_argument, &members, 1 },
470 { "no-members", no_argument, &members, 0 },
471 { "output", required_argument, NULL, 'o' },
472 { "class-qualify", no_argument, &class_qualify, 'Q' },
473 { "regex", required_argument, NULL, 'r' },
474 { "no-regex", no_argument, NULL, 'R' },
475 { "ignore-case-regex", required_argument, NULL, 'c' },
476 { "parse-stdin", required_argument, NULL, STDIN },
477 { "version", no_argument, NULL, 'V' },
479 #if CTAGS /* Ctags options */
480 { "backward-search", no_argument, NULL, 'B' },
481 { "cxref", no_argument, NULL, 'x' },
482 { "defines", no_argument, NULL, 'd' },
483 { "globals", no_argument, &globals, 1 },
484 { "typedefs", no_argument, NULL, 't' },
485 { "typedefs-and-c++", no_argument, NULL, 'T' },
486 { "update", no_argument, NULL, 'u' },
487 { "vgrind", no_argument, NULL, 'v' },
488 { "no-warn", no_argument, NULL, 'w' },
490 #else /* Etags options */
491 { "no-defines", no_argument, NULL, 'D' },
492 { "no-globals", no_argument, &globals, 0 },
493 { "include", required_argument, NULL, 'i' },
494 #endif
495 { NULL }
498 static compressor compressors[] =
500 { "z", "gzip -d -c"},
501 { "Z", "gzip -d -c"},
502 { "gz", "gzip -d -c"},
503 { "GZ", "gzip -d -c"},
504 { "bz2", "bzip2 -d -c" },
505 { "xz", "xz -d -c" },
506 { NULL }
510 * Language stuff.
513 /* Ada code */
514 static const char *Ada_suffixes [] =
515 { "ads", "adb", "ada", NULL };
516 static const char Ada_help [] =
517 "In Ada code, functions, procedures, packages, tasks and types are\n\
518 tags. Use the `--packages-only' option to create tags for\n\
519 packages only.\n\
520 Ada tag names have suffixes indicating the type of entity:\n\
521 Entity type: Qualifier:\n\
522 ------------ ----------\n\
523 function /f\n\
524 procedure /p\n\
525 package spec /s\n\
526 package body /b\n\
527 type /t\n\
528 task /k\n\
529 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
530 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
531 will just search for any tag `bidule'.";
533 /* Assembly code */
534 static const char *Asm_suffixes [] =
535 { "a", /* Unix assembler */
536 "asm", /* Microcontroller assembly */
537 "def", /* BSO/Tasking definition includes */
538 "inc", /* Microcontroller include files */
539 "ins", /* Microcontroller include files */
540 "s", "sa", /* Unix assembler */
541 "S", /* cpp-processed Unix assembler */
542 "src", /* BSO/Tasking C compiler output */
543 NULL
545 static const char Asm_help [] =
546 "In assembler code, labels appearing at the beginning of a line,\n\
547 followed by a colon, are tags.";
550 /* Note that .c and .h can be considered C++, if the --c++ flag was
551 given, or if the `class' or `template' keywords are met inside the file.
552 That is why default_C_entries is called for these. */
553 static const char *default_C_suffixes [] =
554 { "c", "h", NULL };
555 #if CTAGS /* C help for Ctags */
556 static const char default_C_help [] =
557 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
558 Use -T to tag definitions of `struct', `union' and `enum'.\n\
559 Use -d to tag `#define' macro definitions and `enum' constants.\n\
560 Use --globals to tag global variables.\n\
561 You can tag function declarations and external variables by\n\
562 using `--declarations', and struct members by using `--members'.";
563 #else /* C help for Etags */
564 static const char default_C_help [] =
565 "In C code, any C function or typedef is a tag, and so are\n\
566 definitions of `struct', `union' and `enum'. `#define' macro\n\
567 definitions and `enum' constants are tags unless you specify\n\
568 `--no-defines'. Global variables are tags unless you specify\n\
569 `--no-globals' and so are struct members unless you specify\n\
570 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
571 `--no-members' can make the tags table file much smaller.\n\
572 You can tag function declarations and external variables by\n\
573 using `--declarations'.";
574 #endif /* C help for Ctags and Etags */
576 static const char *Cplusplus_suffixes [] =
577 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
578 "M", /* Objective C++ */
579 "pdb", /* PostScript with C syntax */
580 NULL };
581 static const char Cplusplus_help [] =
582 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
583 --help --lang=c --lang=c++ for full help.)\n\
584 In addition to C tags, member functions are also recognized. Member\n\
585 variables are recognized unless you use the `--no-members' option.\n\
586 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
587 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
588 `operator+'.";
590 static const char *Cjava_suffixes [] =
591 { "java", NULL };
592 static char Cjava_help [] =
593 "In Java code, all the tags constructs of C and C++ code are\n\
594 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
597 static const char *Cobol_suffixes [] =
598 { "COB", "cob", NULL };
599 static char Cobol_help [] =
600 "In Cobol code, tags are paragraph names; that is, any word\n\
601 starting in column 8 and followed by a period.";
603 static const char *Cstar_suffixes [] =
604 { "cs", "hs", NULL };
606 static const char *Erlang_suffixes [] =
607 { "erl", "hrl", NULL };
608 static const char Erlang_help [] =
609 "In Erlang code, the tags are the functions, records and macros\n\
610 defined in the file.";
612 const char *Forth_suffixes [] =
613 { "fth", "tok", NULL };
614 static const char Forth_help [] =
615 "In Forth code, tags are words defined by `:',\n\
616 constant, code, create, defer, value, variable, buffer:, field.";
618 static const char *Fortran_suffixes [] =
619 { "F", "f", "f90", "for", NULL };
620 static const char Fortran_help [] =
621 "In Fortran code, functions, subroutines and block data are tags.";
623 static const char *HTML_suffixes [] =
624 { "htm", "html", "shtml", NULL };
625 static const char HTML_help [] =
626 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
627 `h3' headers. Also, tags are `name=' in anchors and all\n\
628 occurrences of `id='.";
630 static const char *Lisp_suffixes [] =
631 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
632 static const char Lisp_help [] =
633 "In Lisp code, any function defined with `defun', any variable\n\
634 defined with `defvar' or `defconst', and in general the first\n\
635 argument of any expression that starts with `(def' in column zero\n\
636 is a tag.\n\
637 The `--declarations' option tags \"(defvar foo)\" constructs too.";
639 static const char *Lua_suffixes [] =
640 { "lua", "LUA", NULL };
641 static const char Lua_help [] =
642 "In Lua scripts, all functions are tags.";
644 static const char *Makefile_filenames [] =
645 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
646 static const char Makefile_help [] =
647 "In makefiles, targets are tags; additionally, variables are tags\n\
648 unless you specify `--no-globals'.";
650 static const char *Objc_suffixes [] =
651 { "lm", /* Objective lex file */
652 "m", /* Objective C file */
653 NULL };
654 static const char Objc_help [] =
655 "In Objective C code, tags include Objective C definitions for classes,\n\
656 class categories, methods and protocols. Tags for variables and\n\
657 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
658 (Use --help --lang=c --lang=objc --lang=java for full help.)";
660 static const char *Pascal_suffixes [] =
661 { "p", "pas", NULL };
662 static const char Pascal_help [] =
663 "In Pascal code, the tags are the functions and procedures defined\n\
664 in the file.";
665 /* " // this is for working around an Emacs highlighting bug... */
667 static const char *Perl_suffixes [] =
668 { "pl", "pm", NULL };
669 static const char *Perl_interpreters [] =
670 { "perl", "@PERL@", NULL };
671 static const char Perl_help [] =
672 "In Perl code, the tags are the packages, subroutines and variables\n\
673 defined by the `package', `sub', `my' and `local' keywords. Use\n\
674 `--globals' if you want to tag global variables. Tags for\n\
675 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
676 defined in the default package is `main::SUB'.";
678 static const char *PHP_suffixes [] =
679 { "php", "php3", "php4", NULL };
680 static const char PHP_help [] =
681 "In PHP code, tags are functions, classes and defines. Unless you use\n\
682 the `--no-members' option, vars are tags too.";
684 static const char *plain_C_suffixes [] =
685 { "pc", /* Pro*C file */
686 NULL };
688 static const char *PS_suffixes [] =
689 { "ps", "psw", NULL }; /* .psw is for PSWrap */
690 static const char PS_help [] =
691 "In PostScript code, the tags are the functions.";
693 static const char *Prolog_suffixes [] =
694 { "prolog", NULL };
695 static const char Prolog_help [] =
696 "In Prolog code, tags are predicates and rules at the beginning of\n\
697 line.";
699 static const char *Python_suffixes [] =
700 { "py", NULL };
701 static const char Python_help [] =
702 "In Python code, `def' or `class' at the beginning of a line\n\
703 generate a tag.";
705 /* Can't do the `SCM' or `scm' prefix with a version number. */
706 static const char *Scheme_suffixes [] =
707 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
708 static const char Scheme_help [] =
709 "In Scheme code, tags include anything defined with `def' or with a\n\
710 construct whose name starts with `def'. They also include\n\
711 variables set with `set!' at top level in the file.";
713 static const char *TeX_suffixes [] =
714 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
715 static const char TeX_help [] =
716 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
717 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
718 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
719 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
720 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
722 Other commands can be specified by setting the environment variable\n\
723 `TEXTAGS' to a colon-separated list like, for example,\n\
724 TEXTAGS=\"mycommand:myothercommand\".";
727 static const char *Texinfo_suffixes [] =
728 { "texi", "texinfo", "txi", NULL };
729 static const char Texinfo_help [] =
730 "for texinfo files, lines starting with @node are tagged.";
732 static const char *Yacc_suffixes [] =
733 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
734 static const char Yacc_help [] =
735 "In Bison or Yacc input files, each rule defines as a tag the\n\
736 nonterminal it constructs. The portions of the file that contain\n\
737 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
738 for full help).";
740 static const char auto_help [] =
741 "`auto' is not a real language, it indicates to use\n\
742 a default language for files base on file name suffix and file contents.";
744 static const char none_help [] =
745 "`none' is not a real language, it indicates to only do\n\
746 regexp processing on files.";
748 static const char no_lang_help [] =
749 "No detailed help available for this language.";
753 * Table of languages.
755 * It is ok for a given function to be listed under more than one
756 * name. I just didn't.
759 static language lang_names [] =
761 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
762 { "asm", Asm_help, Asm_labels, Asm_suffixes },
763 { "c", default_C_help, default_C_entries, default_C_suffixes },
764 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
765 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
766 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
767 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
768 { "forth", Forth_help, Forth_words, Forth_suffixes },
769 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
770 { "html", HTML_help, HTML_labels, HTML_suffixes },
771 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
772 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
773 { "lua", Lua_help, Lua_functions, Lua_suffixes },
774 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
775 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
776 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
777 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
778 { "php", PHP_help, PHP_functions, PHP_suffixes },
779 { "postscript",PS_help, PS_functions, PS_suffixes },
780 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
781 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
782 { "python", Python_help, Python_functions, Python_suffixes },
783 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
784 { "tex", TeX_help, TeX_commands, TeX_suffixes },
785 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
786 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
787 { "auto", auto_help }, /* default guessing scheme */
788 { "none", none_help, just_read_file }, /* regexp matching only */
789 { NULL } /* end of list */
793 static void
794 print_language_names (void)
796 language *lang;
797 const char **name, **ext;
799 puts ("\nThese are the currently supported languages, along with the\n\
800 default file names and dot suffixes:");
801 for (lang = lang_names; lang->name != NULL; lang++)
803 printf (" %-*s", 10, lang->name);
804 if (lang->filenames != NULL)
805 for (name = lang->filenames; *name != NULL; name++)
806 printf (" %s", *name);
807 if (lang->suffixes != NULL)
808 for (ext = lang->suffixes; *ext != NULL; ext++)
809 printf (" .%s", *ext);
810 puts ("");
812 puts ("where `auto' means use default language for files based on file\n\
813 name suffix, and `none' means only do regexp processing on files.\n\
814 If no language is specified and no matching suffix is found,\n\
815 the first line of the file is read for a sharp-bang (#!) sequence\n\
816 followed by the name of an interpreter. If no such sequence is found,\n\
817 Fortran is tried first; if no tags are found, C is tried next.\n\
818 When parsing any C file, a \"class\" or \"template\" keyword\n\
819 switches to C++.");
820 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
822 For detailed help on a given language use, for example,\n\
823 etags --help --lang=ada.");
826 #ifndef EMACS_NAME
827 # define EMACS_NAME "standalone"
828 #endif
829 #ifndef VERSION
830 # define VERSION "17.38.1.4"
831 #endif
832 static _Noreturn void
833 print_version (void)
835 char emacs_copyright[] = COPYRIGHT;
837 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
838 puts (emacs_copyright);
839 puts ("This program is distributed under the terms in ETAGS.README");
841 exit (EXIT_SUCCESS);
844 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
845 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
846 #endif
848 static _Noreturn void
849 print_help (argument *argbuffer)
851 bool help_for_lang = false;
853 for (; argbuffer->arg_type != at_end; argbuffer++)
854 if (argbuffer->arg_type == at_language)
856 if (help_for_lang)
857 puts ("");
858 puts (argbuffer->lang->help);
859 help_for_lang = true;
862 if (help_for_lang)
863 exit (EXIT_SUCCESS);
865 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
867 These are the options accepted by %s.\n", progname, progname);
868 puts ("You may use unambiguous abbreviations for the long option names.");
869 puts (" A - as file name means read names from stdin (one per line).\n\
870 Absolute names are stored in the output file as they are.\n\
871 Relative ones are stored relative to the output file's directory.\n");
873 puts ("-a, --append\n\
874 Append tag entries to existing tags file.");
876 puts ("--packages-only\n\
877 For Ada files, only generate tags for packages.");
879 if (CTAGS)
880 puts ("-B, --backward-search\n\
881 Write the search commands for the tag entries using '?', the\n\
882 backward-search command instead of '/', the forward-search command.");
884 /* This option is mostly obsolete, because etags can now automatically
885 detect C++. Retained for backward compatibility and for debugging and
886 experimentation. In principle, we could want to tag as C++ even
887 before any "class" or "template" keyword.
888 puts ("-C, --c++\n\
889 Treat files whose name suffix defaults to C language as C++ files.");
892 puts ("--declarations\n\
893 In C and derived languages, create tags for function declarations,");
894 if (CTAGS)
895 puts ("\tand create tags for extern variables if --globals is used.");
896 else
897 puts
898 ("\tand create tags for extern variables unless --no-globals is used.");
900 if (CTAGS)
901 puts ("-d, --defines\n\
902 Create tag entries for C #define constants and enum constants, too.");
903 else
904 puts ("-D, --no-defines\n\
905 Don't create tag entries for C #define constants and enum constants.\n\
906 This makes the tags file smaller.");
908 if (!CTAGS)
909 puts ("-i FILE, --include=FILE\n\
910 Include a note in tag file indicating that, when searching for\n\
911 a tag, one should also consult the tags file FILE after\n\
912 checking the current file.");
914 puts ("-l LANG, --language=LANG\n\
915 Force the following files to be considered as written in the\n\
916 named language up to the next --language=LANG option.");
918 if (CTAGS)
919 puts ("--globals\n\
920 Create tag entries for global variables in some languages.");
921 else
922 puts ("--no-globals\n\
923 Do not create tag entries for global variables in some\n\
924 languages. This makes the tags file smaller.");
926 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
927 puts ("--no-line-directive\n\
928 Ignore #line preprocessor directives in C and derived languages.");
930 if (CTAGS)
931 puts ("--members\n\
932 Create tag entries for members of structures in some languages.");
933 else
934 puts ("--no-members\n\
935 Do not create tag entries for members of structures\n\
936 in some languages.");
938 puts ("-Q, --class-qualify\n\
939 Qualify tag names with their class name in C++, ObjC, and Java.\n\
940 This produces tag names of the form \"class::member\" for C++,\n\
941 \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
942 For Objective C, this also produces class methods qualified with\n\
943 their arguments, as in \"foo:bar:baz:more\".");
944 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
945 Make a tag for each line matching a regular expression pattern\n\
946 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
947 files only. REGEXFILE is a file containing one REGEXP per line.\n\
948 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
949 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
950 puts (" If TAGNAME/ is present, the tags created are named.\n\
951 For example Tcl named tags can be created with:\n\
952 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
953 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
954 `m' means to allow multi-line matches, `s' implies `m' and\n\
955 causes dot to match any character, including newline.");
957 puts ("-R, --no-regex\n\
958 Don't create tags from regexps for the following files.");
960 puts ("-I, --ignore-indentation\n\
961 In C and C++ do not assume that a closing brace in the first\n\
962 column is the final brace of a function or structure definition.");
964 puts ("-o FILE, --output=FILE\n\
965 Write the tags to FILE.");
967 puts ("--parse-stdin=NAME\n\
968 Read from standard input and record tags as belonging to file NAME.");
970 if (CTAGS)
972 puts ("-t, --typedefs\n\
973 Generate tag entries for C and Ada typedefs.");
974 puts ("-T, --typedefs-and-c++\n\
975 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
976 and C++ member functions.");
979 if (CTAGS)
980 puts ("-u, --update\n\
981 Update the tag entries for the given files, leaving tag\n\
982 entries for other files in place. Currently, this is\n\
983 implemented by deleting the existing entries for the given\n\
984 files and then rewriting the new entries at the end of the\n\
985 tags file. It is often faster to simply rebuild the entire\n\
986 tag file than to use this.");
988 if (CTAGS)
990 puts ("-v, --vgrind\n\
991 Print on the standard output an index of items intended for\n\
992 human consumption, similar to the output of vgrind. The index\n\
993 is sorted, and gives the page number of each item.");
995 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
996 puts ("-w, --no-duplicates\n\
997 Do not create duplicate tag entries, for compatibility with\n\
998 traditional ctags.");
1000 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1001 puts ("-w, --no-warn\n\
1002 Suppress warning messages about duplicate tag entries.");
1004 puts ("-x, --cxref\n\
1005 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1006 The output uses line numbers instead of page numbers, but\n\
1007 beyond that the differences are cosmetic; try both to see\n\
1008 which you like.");
1011 puts ("-V, --version\n\
1012 Print the version of the program.\n\
1013 -h, --help\n\
1014 Print this help message.\n\
1015 Followed by one or more `--language' options prints detailed\n\
1016 help about tag generation for the specified languages.");
1018 print_language_names ();
1020 puts ("");
1021 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1023 exit (EXIT_SUCCESS);
1028 main (int argc, char **argv)
1030 int i;
1031 unsigned int nincluded_files;
1032 char **included_files;
1033 argument *argbuffer;
1034 int current_arg, file_count;
1035 linebuffer filename_lb;
1036 bool help_asked = false;
1037 ptrdiff_t len;
1038 char *optstring;
1039 int opt;
1041 progname = argv[0];
1042 nincluded_files = 0;
1043 included_files = xnew (argc, char *);
1044 current_arg = 0;
1045 file_count = 0;
1047 /* Allocate enough no matter what happens. Overkill, but each one
1048 is small. */
1049 argbuffer = xnew (argc, argument);
1052 * Always find typedefs and structure tags.
1053 * Also default to find macro constants, enum constants, struct
1054 * members and global variables. Do it for both etags and ctags.
1056 typedefs = typedefs_or_cplusplus = constantypedefs = true;
1057 globals = members = true;
1059 /* When the optstring begins with a '-' getopt_long does not rearrange the
1060 non-options arguments to be at the end, but leaves them alone. */
1061 optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1062 (CTAGS) ? "BxdtTuvw" : "Di:",
1063 "");
1065 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1066 switch (opt)
1068 case 0:
1069 /* If getopt returns 0, then it has already processed a
1070 long-named option. We should do nothing. */
1071 break;
1073 case 1:
1074 /* This means that a file name has been seen. Record it. */
1075 argbuffer[current_arg].arg_type = at_filename;
1076 argbuffer[current_arg].what = optarg;
1077 len = strlen (optarg);
1078 if (whatlen_max < len)
1079 whatlen_max = len;
1080 ++current_arg;
1081 ++file_count;
1082 break;
1084 case STDIN:
1085 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1086 argbuffer[current_arg].arg_type = at_stdin;
1087 argbuffer[current_arg].what = optarg;
1088 len = strlen (optarg);
1089 if (whatlen_max < len)
1090 whatlen_max = len;
1091 ++current_arg;
1092 ++file_count;
1093 if (parsing_stdin)
1094 fatal ("cannot parse standard input more than once", (char *)NULL);
1095 parsing_stdin = true;
1096 break;
1098 /* Common options. */
1099 case 'a': append_to_tagfile = true; break;
1100 case 'C': cplusplus = true; break;
1101 case 'f': /* for compatibility with old makefiles */
1102 case 'o':
1103 if (tagfile)
1105 error ("-o option may only be given once.");
1106 suggest_asking_for_help ();
1107 /* NOTREACHED */
1109 tagfile = optarg;
1110 break;
1111 case 'I':
1112 case 'S': /* for backward compatibility */
1113 ignoreindent = true;
1114 break;
1115 case 'l':
1117 language *lang = get_language_from_langname (optarg);
1118 if (lang != NULL)
1120 argbuffer[current_arg].lang = lang;
1121 argbuffer[current_arg].arg_type = at_language;
1122 ++current_arg;
1125 break;
1126 case 'c':
1127 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1128 optarg = concat (optarg, "i", ""); /* memory leak here */
1129 /* FALLTHRU */
1130 case 'r':
1131 argbuffer[current_arg].arg_type = at_regexp;
1132 argbuffer[current_arg].what = optarg;
1133 len = strlen (optarg);
1134 if (whatlen_max < len)
1135 whatlen_max = len;
1136 ++current_arg;
1137 break;
1138 case 'R':
1139 argbuffer[current_arg].arg_type = at_regexp;
1140 argbuffer[current_arg].what = NULL;
1141 ++current_arg;
1142 break;
1143 case 'V':
1144 print_version ();
1145 break;
1146 case 'h':
1147 case 'H':
1148 help_asked = true;
1149 break;
1150 case 'Q':
1151 class_qualify = 1;
1152 break;
1154 /* Etags options */
1155 case 'D': constantypedefs = false; break;
1156 case 'i': included_files[nincluded_files++] = optarg; break;
1158 /* Ctags options. */
1159 case 'B': searchar = '?'; break;
1160 case 'd': constantypedefs = true; break;
1161 case 't': typedefs = true; break;
1162 case 'T': typedefs = typedefs_or_cplusplus = true; break;
1163 case 'u': update = true; break;
1164 case 'v': vgrind_style = true; /*FALLTHRU*/
1165 case 'x': cxref_style = true; break;
1166 case 'w': no_warnings = true; break;
1167 default:
1168 suggest_asking_for_help ();
1169 /* NOTREACHED */
1172 /* No more options. Store the rest of arguments. */
1173 for (; optind < argc; optind++)
1175 argbuffer[current_arg].arg_type = at_filename;
1176 argbuffer[current_arg].what = argv[optind];
1177 len = strlen (argv[optind]);
1178 if (whatlen_max < len)
1179 whatlen_max = len;
1180 ++current_arg;
1181 ++file_count;
1184 argbuffer[current_arg].arg_type = at_end;
1186 if (help_asked)
1187 print_help (argbuffer);
1188 /* NOTREACHED */
1190 if (nincluded_files == 0 && file_count == 0)
1192 error ("no input files specified.");
1193 suggest_asking_for_help ();
1194 /* NOTREACHED */
1197 if (tagfile == NULL)
1198 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1199 cwd = etags_getcwd (); /* the current working directory */
1200 if (cwd[strlen (cwd) - 1] != '/')
1202 char *oldcwd = cwd;
1203 cwd = concat (oldcwd, "/", "");
1204 free (oldcwd);
1207 /* Compute base directory for relative file names. */
1208 if (streq (tagfile, "-")
1209 || strneq (tagfile, "/dev/", 5))
1210 tagfiledir = cwd; /* relative file names are relative to cwd */
1211 else
1213 canonicalize_filename (tagfile);
1214 tagfiledir = absolute_dirname (tagfile, cwd);
1217 linebuffer_init (&lb);
1218 linebuffer_init (&filename_lb);
1219 linebuffer_init (&filebuf);
1220 linebuffer_init (&token_name);
1222 if (!CTAGS)
1224 if (streq (tagfile, "-"))
1226 tagf = stdout;
1227 SET_BINARY (fileno (stdout));
1229 else
1230 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1231 if (tagf == NULL)
1232 pfatal (tagfile);
1236 * Loop through files finding functions.
1238 for (i = 0; i < current_arg; i++)
1240 static language *lang; /* non-NULL if language is forced */
1241 char *this_file;
1243 switch (argbuffer[i].arg_type)
1245 case at_language:
1246 lang = argbuffer[i].lang;
1247 break;
1248 case at_regexp:
1249 analyze_regex (argbuffer[i].what);
1250 break;
1251 case at_filename:
1252 this_file = argbuffer[i].what;
1253 /* Input file named "-" means read file names from stdin
1254 (one per line) and use them. */
1255 if (streq (this_file, "-"))
1257 if (parsing_stdin)
1258 fatal ("cannot parse standard input AND read file names from it",
1259 (char *)NULL);
1260 while (readline_internal (&filename_lb, stdin, "-") > 0)
1261 process_file_name (filename_lb.buffer, lang);
1263 else
1264 process_file_name (this_file, lang);
1265 break;
1266 case at_stdin:
1267 this_file = argbuffer[i].what;
1268 process_file (stdin, this_file, lang);
1269 break;
1273 free_regexps ();
1274 free (lb.buffer);
1275 free (filebuf.buffer);
1276 free (token_name.buffer);
1278 if (!CTAGS || cxref_style)
1280 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1281 put_entries (nodehead);
1282 free_tree (nodehead);
1283 nodehead = NULL;
1284 if (!CTAGS)
1286 fdesc *fdp;
1288 /* Output file entries that have no tags. */
1289 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1290 if (!fdp->written)
1291 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1293 while (nincluded_files-- > 0)
1294 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1296 if (fclose (tagf) == EOF)
1297 pfatal (tagfile);
1300 exit (EXIT_SUCCESS);
1303 /* From here on, we are in (CTAGS && !cxref_style) */
1304 if (update)
1306 char *cmd =
1307 xmalloc (strlen (tagfile) + whatlen_max +
1308 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1309 for (i = 0; i < current_arg; ++i)
1311 switch (argbuffer[i].arg_type)
1313 case at_filename:
1314 case at_stdin:
1315 break;
1316 default:
1317 continue; /* the for loop */
1319 char *z = stpcpy (cmd, "mv ");
1320 z = stpcpy (z, tagfile);
1321 z = stpcpy (z, " OTAGS;fgrep -v '\t");
1322 z = stpcpy (z, argbuffer[i].what);
1323 z = stpcpy (z, "\t' OTAGS >");
1324 z = stpcpy (z, tagfile);
1325 strcpy (z, ";rm OTAGS");
1326 if (system (cmd) != EXIT_SUCCESS)
1327 fatal ("failed to execute shell command", (char *)NULL);
1329 free (cmd);
1330 append_to_tagfile = true;
1333 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1334 if (tagf == NULL)
1335 pfatal (tagfile);
1336 put_entries (nodehead); /* write all the tags (CTAGS) */
1337 free_tree (nodehead);
1338 nodehead = NULL;
1339 if (fclose (tagf) == EOF)
1340 pfatal (tagfile);
1342 if (CTAGS)
1343 if (append_to_tagfile || update)
1345 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1346 /* Maybe these should be used:
1347 setenv ("LC_COLLATE", "C", 1);
1348 setenv ("LC_ALL", "C", 1); */
1349 char *z = stpcpy (cmd, "sort -u -o ");
1350 z = stpcpy (z, tagfile);
1351 *z++ = ' ';
1352 strcpy (z, tagfile);
1353 exit (system (cmd));
1355 return EXIT_SUCCESS;
1360 * Return a compressor given the file name. If EXTPTR is non-zero,
1361 * return a pointer into FILE where the compressor-specific
1362 * extension begins. If no compressor is found, NULL is returned
1363 * and EXTPTR is not significant.
1364 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1366 static compressor *
1367 get_compressor_from_suffix (char *file, char **extptr)
1369 compressor *compr;
1370 char *slash, *suffix;
1372 /* File has been processed by canonicalize_filename,
1373 so we don't need to consider backslashes on DOS_NT. */
1374 slash = strrchr (file, '/');
1375 suffix = strrchr (file, '.');
1376 if (suffix == NULL || suffix < slash)
1377 return NULL;
1378 if (extptr != NULL)
1379 *extptr = suffix;
1380 suffix += 1;
1381 /* Let those poor souls who live with DOS 8+3 file name limits get
1382 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1383 Only the first do loop is run if not MSDOS */
1386 for (compr = compressors; compr->suffix != NULL; compr++)
1387 if (streq (compr->suffix, suffix))
1388 return compr;
1389 if (!MSDOS)
1390 break; /* do it only once: not really a loop */
1391 if (extptr != NULL)
1392 *extptr = ++suffix;
1393 } while (*suffix != '\0');
1394 return NULL;
1400 * Return a language given the name.
1402 static language *
1403 get_language_from_langname (const char *name)
1405 language *lang;
1407 if (name == NULL)
1408 error ("empty language name");
1409 else
1411 for (lang = lang_names; lang->name != NULL; lang++)
1412 if (streq (name, lang->name))
1413 return lang;
1414 error ("unknown language \"%s\"", name);
1417 return NULL;
1422 * Return a language given the interpreter name.
1424 static language *
1425 get_language_from_interpreter (char *interpreter)
1427 language *lang;
1428 const char **iname;
1430 if (interpreter == NULL)
1431 return NULL;
1432 for (lang = lang_names; lang->name != NULL; lang++)
1433 if (lang->interpreters != NULL)
1434 for (iname = lang->interpreters; *iname != NULL; iname++)
1435 if (streq (*iname, interpreter))
1436 return lang;
1438 return NULL;
1444 * Return a language given the file name.
1446 static language *
1447 get_language_from_filename (char *file, int case_sensitive)
1449 language *lang;
1450 const char **name, **ext, *suffix;
1452 /* Try whole file name first. */
1453 for (lang = lang_names; lang->name != NULL; lang++)
1454 if (lang->filenames != NULL)
1455 for (name = lang->filenames; *name != NULL; name++)
1456 if ((case_sensitive)
1457 ? streq (*name, file)
1458 : strcaseeq (*name, file))
1459 return lang;
1461 /* If not found, try suffix after last dot. */
1462 suffix = strrchr (file, '.');
1463 if (suffix == NULL)
1464 return NULL;
1465 suffix += 1;
1466 for (lang = lang_names; lang->name != NULL; lang++)
1467 if (lang->suffixes != NULL)
1468 for (ext = lang->suffixes; *ext != NULL; ext++)
1469 if ((case_sensitive)
1470 ? streq (*ext, suffix)
1471 : strcaseeq (*ext, suffix))
1472 return lang;
1473 return NULL;
1478 * This routine is called on each file argument.
1480 static void
1481 process_file_name (char *file, language *lang)
1483 FILE *inf;
1484 fdesc *fdp;
1485 compressor *compr;
1486 char *compressed_name, *uncompressed_name;
1487 char *ext, *real_name, *tmp_name;
1488 int retval;
1490 canonicalize_filename (file);
1491 if (streq (file, tagfile) && !streq (tagfile, "-"))
1493 error ("skipping inclusion of %s in self.", file);
1494 return;
1496 compr = get_compressor_from_suffix (file, &ext);
1497 if (compr)
1499 compressed_name = file;
1500 uncompressed_name = savenstr (file, ext - file);
1502 else
1504 compressed_name = NULL;
1505 uncompressed_name = file;
1508 /* If the canonicalized uncompressed name
1509 has already been dealt with, skip it silently. */
1510 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1512 assert (fdp->infname != NULL);
1513 if (streq (uncompressed_name, fdp->infname))
1514 goto cleanup;
1517 inf = fopen (file, "r" FOPEN_BINARY);
1518 if (inf)
1519 real_name = file;
1520 else
1522 int file_errno = errno;
1523 if (compressed_name)
1525 /* Try with the given suffix. */
1526 inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1527 if (inf)
1528 real_name = uncompressed_name;
1530 else
1532 /* Try all possible suffixes. */
1533 for (compr = compressors; compr->suffix != NULL; compr++)
1535 compressed_name = concat (file, ".", compr->suffix);
1536 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1537 if (inf)
1539 real_name = compressed_name;
1540 break;
1542 if (MSDOS)
1544 char *suf = compressed_name + strlen (file);
1545 size_t suflen = strlen (compr->suffix) + 1;
1546 for ( ; suf[1]; suf++, suflen--)
1548 memmove (suf, suf + 1, suflen);
1549 inf = fopen (compressed_name, "r" FOPEN_BINARY);
1550 if (inf)
1552 real_name = compressed_name;
1553 break;
1556 if (inf)
1557 break;
1559 free (compressed_name);
1560 compressed_name = NULL;
1563 if (! inf)
1565 errno = file_errno;
1566 perror (file);
1567 goto cleanup;
1571 if (real_name == compressed_name)
1573 fclose (inf);
1574 tmp_name = etags_mktmp ();
1575 if (!tmp_name)
1576 inf = NULL;
1577 else
1579 #if MSDOS || defined (DOS_NT)
1580 char *cmd1 = concat (compr->command, " \"", real_name);
1581 char *cmd = concat (cmd1, "\" > ", tmp_name);
1582 #else
1583 char *cmd1 = concat (compr->command, " '", real_name);
1584 char *cmd = concat (cmd1, "' > ", tmp_name);
1585 #endif
1586 free (cmd1);
1587 int tmp_errno;
1588 if (system (cmd) == -1)
1590 inf = NULL;
1591 tmp_errno = EINVAL;
1593 else
1595 inf = fopen (tmp_name, "r" FOPEN_BINARY);
1596 tmp_errno = errno;
1598 free (cmd);
1599 errno = tmp_errno;
1602 if (!inf)
1604 perror (real_name);
1605 goto cleanup;
1609 process_file (inf, uncompressed_name, lang);
1611 retval = fclose (inf);
1612 if (real_name == compressed_name)
1614 remove (tmp_name);
1615 free (tmp_name);
1617 if (retval < 0)
1618 pfatal (file);
1620 cleanup:
1621 if (compressed_name != file)
1622 free (compressed_name);
1623 if (uncompressed_name != file)
1624 free (uncompressed_name);
1625 last_node = NULL;
1626 curfdp = NULL;
1627 return;
1630 static void
1631 process_file (FILE *fh, char *fn, language *lang)
1633 static const fdesc emptyfdesc;
1634 fdesc *fdp;
1636 infilename = fn;
1637 /* Create a new input file description entry. */
1638 fdp = xnew (1, fdesc);
1639 *fdp = emptyfdesc;
1640 fdp->next = fdhead;
1641 fdp->infname = savestr (fn);
1642 fdp->lang = lang;
1643 fdp->infabsname = absolute_filename (fn, cwd);
1644 fdp->infabsdir = absolute_dirname (fn, cwd);
1645 if (filename_is_absolute (fn))
1647 /* An absolute file name. Canonicalize it. */
1648 fdp->taggedfname = absolute_filename (fn, NULL);
1650 else
1652 /* A file name relative to cwd. Make it relative
1653 to the directory of the tags file. */
1654 fdp->taggedfname = relative_filename (fn, tagfiledir);
1656 fdp->usecharno = true; /* use char position when making tags */
1657 fdp->prop = NULL;
1658 fdp->written = false; /* not written on tags file yet */
1660 fdhead = fdp;
1661 curfdp = fdhead; /* the current file description */
1663 find_entries (fh);
1665 /* If not Ctags, and if this is not metasource and if it contained no #line
1666 directives, we can write the tags and free all nodes pointing to
1667 curfdp. */
1668 if (!CTAGS
1669 && curfdp->usecharno /* no #line directives in this file */
1670 && !curfdp->lang->metasource)
1672 node *np, *prev;
1674 /* Look for the head of the sublist relative to this file. See add_node
1675 for the structure of the node tree. */
1676 prev = NULL;
1677 for (np = nodehead; np != NULL; prev = np, np = np->left)
1678 if (np->fdp == curfdp)
1679 break;
1681 /* If we generated tags for this file, write and delete them. */
1682 if (np != NULL)
1684 /* This is the head of the last sublist, if any. The following
1685 instructions depend on this being true. */
1686 assert (np->left == NULL);
1688 assert (fdhead == curfdp);
1689 assert (last_node->fdp == curfdp);
1690 put_entries (np); /* write tags for file curfdp->taggedfname */
1691 free_tree (np); /* remove the written nodes */
1692 if (prev == NULL)
1693 nodehead = NULL; /* no nodes left */
1694 else
1695 prev->left = NULL; /* delete the pointer to the sublist */
1700 static void
1701 reset_input (FILE *inf)
1703 if (fseek (inf, 0, SEEK_SET) != 0)
1704 perror (infilename);
1708 * This routine opens the specified file and calls the function
1709 * which finds the function and type definitions.
1711 static void
1712 find_entries (FILE *inf)
1714 char *cp;
1715 language *lang = curfdp->lang;
1716 Lang_function *parser = NULL;
1718 /* If user specified a language, use it. */
1719 if (lang != NULL && lang->function != NULL)
1721 parser = lang->function;
1724 /* Else try to guess the language given the file name. */
1725 if (parser == NULL)
1727 lang = get_language_from_filename (curfdp->infname, true);
1728 if (lang != NULL && lang->function != NULL)
1730 curfdp->lang = lang;
1731 parser = lang->function;
1735 /* Else look for sharp-bang as the first two characters. */
1736 if (parser == NULL
1737 && readline_internal (&lb, inf, infilename) > 0
1738 && lb.len >= 2
1739 && lb.buffer[0] == '#'
1740 && lb.buffer[1] == '!')
1742 char *lp;
1744 /* Set lp to point at the first char after the last slash in the
1745 line or, if no slashes, at the first nonblank. Then set cp to
1746 the first successive blank and terminate the string. */
1747 lp = strrchr (lb.buffer+2, '/');
1748 if (lp != NULL)
1749 lp += 1;
1750 else
1751 lp = skip_spaces (lb.buffer + 2);
1752 cp = skip_non_spaces (lp);
1753 *cp = '\0';
1755 if (strlen (lp) > 0)
1757 lang = get_language_from_interpreter (lp);
1758 if (lang != NULL && lang->function != NULL)
1760 curfdp->lang = lang;
1761 parser = lang->function;
1766 reset_input (inf);
1768 /* Else try to guess the language given the case insensitive file name. */
1769 if (parser == NULL)
1771 lang = get_language_from_filename (curfdp->infname, false);
1772 if (lang != NULL && lang->function != NULL)
1774 curfdp->lang = lang;
1775 parser = lang->function;
1779 /* Else try Fortran or C. */
1780 if (parser == NULL)
1782 node *old_last_node = last_node;
1784 curfdp->lang = get_language_from_langname ("fortran");
1785 find_entries (inf);
1787 if (old_last_node == last_node)
1788 /* No Fortran entries found. Try C. */
1790 reset_input (inf);
1791 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1792 find_entries (inf);
1794 return;
1797 if (!no_line_directive
1798 && curfdp->lang != NULL && curfdp->lang->metasource)
1799 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1800 file, or anyway we parsed a file that is automatically generated from
1801 this one. If this is the case, the bingo.c file contained #line
1802 directives that generated tags pointing to this file. Let's delete
1803 them all before parsing this file, which is the real source. */
1805 fdesc **fdpp = &fdhead;
1806 while (*fdpp != NULL)
1807 if (*fdpp != curfdp
1808 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1809 /* We found one of those! We must delete both the file description
1810 and all tags referring to it. */
1812 fdesc *badfdp = *fdpp;
1814 /* Delete the tags referring to badfdp->taggedfname
1815 that were obtained from badfdp->infname. */
1816 invalidate_nodes (badfdp, &nodehead);
1818 *fdpp = badfdp->next; /* remove the bad description from the list */
1819 free_fdesc (badfdp);
1821 else
1822 fdpp = &(*fdpp)->next; /* advance the list pointer */
1825 assert (parser != NULL);
1827 /* Generic initializations before reading from file. */
1828 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1830 /* Generic initializations before parsing file with readline. */
1831 lineno = 0; /* reset global line number */
1832 charno = 0; /* reset global char number */
1833 linecharno = 0; /* reset global char number of line start */
1835 parser (inf);
1837 regex_tag_multiline ();
1842 * Check whether an implicitly named tag should be created,
1843 * then call `pfnote'.
1844 * NAME is a string that is internally copied by this function.
1846 * TAGS format specification
1847 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1848 * The following is explained in some more detail in etc/ETAGS.EBNF.
1850 * make_tag creates tags with "implicit tag names" (unnamed tags)
1851 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1852 * 1. NAME does not contain any of the characters in NONAM;
1853 * 2. LINESTART contains name as either a rightmost, or rightmost but
1854 * one character, substring;
1855 * 3. the character, if any, immediately before NAME in LINESTART must
1856 * be a character in NONAM;
1857 * 4. the character, if any, immediately after NAME in LINESTART must
1858 * also be a character in NONAM.
1860 * The implementation uses the notinname() macro, which recognizes the
1861 * characters stored in the string `nonam'.
1862 * etags.el needs to use the same characters that are in NONAM.
1864 static void
1865 make_tag (const char *name, /* tag name, or NULL if unnamed */
1866 int namelen, /* tag length */
1867 bool is_func, /* tag is a function */
1868 char *linestart, /* start of the line where tag is */
1869 int linelen, /* length of the line where tag is */
1870 int lno, /* line number */
1871 long int cno) /* character number */
1873 bool named = (name != NULL && namelen > 0);
1874 char *nname = NULL;
1876 if (!CTAGS && named) /* maybe set named to false */
1877 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1878 such that etags.el can guess a name from it. */
1880 int i;
1881 register const char *cp = name;
1883 for (i = 0; i < namelen; i++)
1884 if (notinname (*cp++))
1885 break;
1886 if (i == namelen) /* rule #1 */
1888 cp = linestart + linelen - namelen;
1889 if (notinname (linestart[linelen-1]))
1890 cp -= 1; /* rule #4 */
1891 if (cp >= linestart /* rule #2 */
1892 && (cp == linestart
1893 || notinname (cp[-1])) /* rule #3 */
1894 && strneq (name, cp, namelen)) /* rule #2 */
1895 named = false; /* use implicit tag name */
1899 if (named)
1900 nname = savenstr (name, namelen);
1902 pfnote (nname, is_func, linestart, linelen, lno, cno);
1905 /* Record a tag. */
1906 static void
1907 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1908 long int cno)
1909 /* tag name, or NULL if unnamed */
1910 /* tag is a function */
1911 /* start of the line where tag is */
1912 /* length of the line where tag is */
1913 /* line number */
1914 /* character number */
1916 register node *np;
1918 assert (name == NULL || name[0] != '\0');
1919 if (CTAGS && name == NULL)
1920 return;
1922 np = xnew (1, node);
1924 /* If ctags mode, change name "main" to M<thisfilename>. */
1925 if (CTAGS && !cxref_style && streq (name, "main"))
1927 char *fp = strrchr (curfdp->taggedfname, '/');
1928 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1929 fp = strrchr (np->name, '.');
1930 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1931 fp[0] = '\0';
1933 else
1934 np->name = name;
1935 np->valid = true;
1936 np->been_warned = false;
1937 np->fdp = curfdp;
1938 np->is_func = is_func;
1939 np->lno = lno;
1940 if (np->fdp->usecharno)
1941 /* Our char numbers are 0-base, because of C language tradition?
1942 ctags compatibility? old versions compatibility? I don't know.
1943 Anyway, since emacs's are 1-base we expect etags.el to take care
1944 of the difference. If we wanted to have 1-based numbers, we would
1945 uncomment the +1 below. */
1946 np->cno = cno /* + 1 */ ;
1947 else
1948 np->cno = invalidcharno;
1949 np->left = np->right = NULL;
1950 if (CTAGS && !cxref_style)
1952 if (strlen (linestart) < 50)
1953 np->regex = concat (linestart, "$", "");
1954 else
1955 np->regex = savenstr (linestart, 50);
1957 else
1958 np->regex = savenstr (linestart, linelen);
1960 add_node (np, &nodehead);
1964 * free_tree ()
1965 * recurse on left children, iterate on right children.
1967 static void
1968 free_tree (register node *np)
1970 while (np)
1972 register node *node_right = np->right;
1973 free_tree (np->left);
1974 free (np->name);
1975 free (np->regex);
1976 free (np);
1977 np = node_right;
1982 * free_fdesc ()
1983 * delete a file description
1985 static void
1986 free_fdesc (register fdesc *fdp)
1988 free (fdp->infname);
1989 free (fdp->infabsname);
1990 free (fdp->infabsdir);
1991 free (fdp->taggedfname);
1992 free (fdp->prop);
1993 free (fdp);
1997 * add_node ()
1998 * Adds a node to the tree of nodes. In etags mode, sort by file
1999 * name. In ctags mode, sort by tag name. Make no attempt at
2000 * balancing.
2002 * add_node is the only function allowed to add nodes, so it can
2003 * maintain state.
2005 static void
2006 add_node (node *np, node **cur_node_p)
2008 register int dif;
2009 register node *cur_node = *cur_node_p;
2011 if (cur_node == NULL)
2013 *cur_node_p = np;
2014 last_node = np;
2015 return;
2018 if (!CTAGS)
2019 /* Etags Mode */
2021 /* For each file name, tags are in a linked sublist on the right
2022 pointer. The first tags of different files are a linked list
2023 on the left pointer. last_node points to the end of the last
2024 used sublist. */
2025 if (last_node != NULL && last_node->fdp == np->fdp)
2027 /* Let's use the same sublist as the last added node. */
2028 assert (last_node->right == NULL);
2029 last_node->right = np;
2030 last_node = np;
2032 else if (cur_node->fdp == np->fdp)
2034 /* Scanning the list we found the head of a sublist which is
2035 good for us. Let's scan this sublist. */
2036 add_node (np, &cur_node->right);
2038 else
2039 /* The head of this sublist is not good for us. Let's try the
2040 next one. */
2041 add_node (np, &cur_node->left);
2042 } /* if ETAGS mode */
2044 else
2046 /* Ctags Mode */
2047 dif = strcmp (np->name, cur_node->name);
2050 * If this tag name matches an existing one, then
2051 * do not add the node, but maybe print a warning.
2053 if (no_duplicates && !dif)
2055 if (np->fdp == cur_node->fdp)
2057 if (!no_warnings)
2059 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2060 np->fdp->infname, lineno, np->name);
2061 fprintf (stderr, "Second entry ignored\n");
2064 else if (!cur_node->been_warned && !no_warnings)
2066 fprintf
2067 (stderr,
2068 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2069 np->fdp->infname, cur_node->fdp->infname, np->name);
2070 cur_node->been_warned = true;
2072 return;
2075 /* Actually add the node */
2076 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2077 } /* if CTAGS mode */
2081 * invalidate_nodes ()
2082 * Scan the node tree and invalidate all nodes pointing to the
2083 * given file description (CTAGS case) or free them (ETAGS case).
2085 static void
2086 invalidate_nodes (fdesc *badfdp, node **npp)
2088 node *np = *npp;
2090 if (np == NULL)
2091 return;
2093 if (CTAGS)
2095 if (np->left != NULL)
2096 invalidate_nodes (badfdp, &np->left);
2097 if (np->fdp == badfdp)
2098 np->valid = false;
2099 if (np->right != NULL)
2100 invalidate_nodes (badfdp, &np->right);
2102 else
2104 assert (np->fdp != NULL);
2105 if (np->fdp == badfdp)
2107 *npp = np->left; /* detach the sublist from the list */
2108 np->left = NULL; /* isolate it */
2109 free_tree (np); /* free it */
2110 invalidate_nodes (badfdp, npp);
2112 else
2113 invalidate_nodes (badfdp, &np->left);
2118 static int total_size_of_entries (node *);
2119 static int number_len (long) ATTRIBUTE_CONST;
2121 /* Length of a non-negative number's decimal representation. */
2122 static int
2123 number_len (long int num)
2125 int len = 1;
2126 while ((num /= 10) > 0)
2127 len += 1;
2128 return len;
2132 * Return total number of characters that put_entries will output for
2133 * the nodes in the linked list at the right of the specified node.
2134 * This count is irrelevant with etags.el since emacs 19.34 at least,
2135 * but is still supplied for backward compatibility.
2137 static int
2138 total_size_of_entries (register node *np)
2140 register int total = 0;
2142 for (; np != NULL; np = np->right)
2143 if (np->valid)
2145 total += strlen (np->regex) + 1; /* pat\177 */
2146 if (np->name != NULL)
2147 total += strlen (np->name) + 1; /* name\001 */
2148 total += number_len ((long) np->lno) + 1; /* lno, */
2149 if (np->cno != invalidcharno) /* cno */
2150 total += number_len (np->cno);
2151 total += 1; /* newline */
2154 return total;
2157 static void
2158 put_entries (register node *np)
2160 register char *sp;
2161 static fdesc *fdp = NULL;
2163 if (np == NULL)
2164 return;
2166 /* Output subentries that precede this one */
2167 if (CTAGS)
2168 put_entries (np->left);
2170 /* Output this entry */
2171 if (np->valid)
2173 if (!CTAGS)
2175 /* Etags mode */
2176 if (fdp != np->fdp)
2178 fdp = np->fdp;
2179 fprintf (tagf, "\f\n%s,%d\n",
2180 fdp->taggedfname, total_size_of_entries (np));
2181 fdp->written = true;
2183 fputs (np->regex, tagf);
2184 fputc ('\177', tagf);
2185 if (np->name != NULL)
2187 fputs (np->name, tagf);
2188 fputc ('\001', tagf);
2190 fprintf (tagf, "%d,", np->lno);
2191 if (np->cno != invalidcharno)
2192 fprintf (tagf, "%ld", np->cno);
2193 fputs ("\n", tagf);
2195 else
2197 /* Ctags mode */
2198 if (np->name == NULL)
2199 error ("internal error: NULL name in ctags mode.");
2201 if (cxref_style)
2203 if (vgrind_style)
2204 fprintf (stdout, "%s %s %d\n",
2205 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2206 else
2207 fprintf (stdout, "%-16s %3d %-16s %s\n",
2208 np->name, np->lno, np->fdp->taggedfname, np->regex);
2210 else
2212 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2214 if (np->is_func)
2215 { /* function or #define macro with args */
2216 putc (searchar, tagf);
2217 putc ('^', tagf);
2219 for (sp = np->regex; *sp; sp++)
2221 if (*sp == '\\' || *sp == searchar)
2222 putc ('\\', tagf);
2223 putc (*sp, tagf);
2225 putc (searchar, tagf);
2227 else
2228 { /* anything else; text pattern inadequate */
2229 fprintf (tagf, "%d", np->lno);
2231 putc ('\n', tagf);
2234 } /* if this node contains a valid tag */
2236 /* Output subentries that follow this one */
2237 put_entries (np->right);
2238 if (!CTAGS)
2239 put_entries (np->left);
2243 /* C extensions. */
2244 #define C_EXT 0x00fff /* C extensions */
2245 #define C_PLAIN 0x00000 /* C */
2246 #define C_PLPL 0x00001 /* C++ */
2247 #define C_STAR 0x00003 /* C* */
2248 #define C_JAVA 0x00005 /* JAVA */
2249 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2250 #define YACC 0x10000 /* yacc file */
2253 * The C symbol tables.
2255 enum sym_type
2257 st_none,
2258 st_C_objprot, st_C_objimpl, st_C_objend,
2259 st_C_gnumacro,
2260 st_C_ignore, st_C_attribute,
2261 st_C_javastruct,
2262 st_C_operator,
2263 st_C_class, st_C_template,
2264 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2267 /* Feed stuff between (but not including) %[ and %] lines to:
2268 gperf -m 5
2270 %compare-strncmp
2271 %enum
2272 %struct-type
2273 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2275 if, 0, st_C_ignore
2276 for, 0, st_C_ignore
2277 while, 0, st_C_ignore
2278 switch, 0, st_C_ignore
2279 return, 0, st_C_ignore
2280 __attribute__, 0, st_C_attribute
2281 GTY, 0, st_C_attribute
2282 @interface, 0, st_C_objprot
2283 @protocol, 0, st_C_objprot
2284 @implementation,0, st_C_objimpl
2285 @end, 0, st_C_objend
2286 import, (C_JAVA & ~C_PLPL), st_C_ignore
2287 package, (C_JAVA & ~C_PLPL), st_C_ignore
2288 friend, C_PLPL, st_C_ignore
2289 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2290 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2291 interface, (C_JAVA & ~C_PLPL), st_C_struct
2292 class, 0, st_C_class
2293 namespace, C_PLPL, st_C_struct
2294 domain, C_STAR, st_C_struct
2295 union, 0, st_C_struct
2296 struct, 0, st_C_struct
2297 extern, 0, st_C_extern
2298 enum, 0, st_C_enum
2299 typedef, 0, st_C_typedef
2300 define, 0, st_C_define
2301 undef, 0, st_C_define
2302 operator, C_PLPL, st_C_operator
2303 template, 0, st_C_template
2304 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2305 DEFUN, 0, st_C_gnumacro
2306 SYSCALL, 0, st_C_gnumacro
2307 ENTRY, 0, st_C_gnumacro
2308 PSEUDO, 0, st_C_gnumacro
2309 # These are defined inside C functions, so currently they are not met.
2310 # EXFUN used in glibc, DEFVAR_* in emacs.
2311 #EXFUN, 0, st_C_gnumacro
2312 #DEFVAR_, 0, st_C_gnumacro
2314 and replace lines between %< and %> with its output, then:
2315 - remove the #if characterset check
2316 - make in_word_set static and not inline. */
2317 /*%<*/
2318 /* C code produced by gperf version 3.0.1 */
2319 /* Command-line: gperf -m 5 */
2320 /* Computed positions: -k'2-3' */
2322 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2323 /* maximum key range = 33, duplicates = 0 */
2325 static int
2326 hash (const char *str, int len)
2328 static char const asso_values[] =
2330 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2331 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2332 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2333 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2334 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2335 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2336 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2337 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2338 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2339 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2340 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2341 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2342 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2343 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2344 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2345 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2346 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2347 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2348 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2349 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2350 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2351 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2352 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2353 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2354 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2355 35, 35, 35, 35, 35, 35
2357 int hval = len;
2359 switch (hval)
2361 default:
2362 hval += asso_values[(unsigned char) str[2]];
2363 /*FALLTHROUGH*/
2364 case 2:
2365 hval += asso_values[(unsigned char) str[1]];
2366 break;
2368 return hval;
2371 static struct C_stab_entry *
2372 in_word_set (register const char *str, register unsigned int len)
2374 enum
2376 TOTAL_KEYWORDS = 33,
2377 MIN_WORD_LENGTH = 2,
2378 MAX_WORD_LENGTH = 15,
2379 MIN_HASH_VALUE = 2,
2380 MAX_HASH_VALUE = 34
2383 static struct C_stab_entry wordlist[] =
2385 {""}, {""},
2386 {"if", 0, st_C_ignore},
2387 {"GTY", 0, st_C_attribute},
2388 {"@end", 0, st_C_objend},
2389 {"union", 0, st_C_struct},
2390 {"define", 0, st_C_define},
2391 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2392 {"template", 0, st_C_template},
2393 {"operator", C_PLPL, st_C_operator},
2394 {"@interface", 0, st_C_objprot},
2395 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2396 {"friend", C_PLPL, st_C_ignore},
2397 {"typedef", 0, st_C_typedef},
2398 {"return", 0, st_C_ignore},
2399 {"@implementation",0, st_C_objimpl},
2400 {"@protocol", 0, st_C_objprot},
2401 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2402 {"extern", 0, st_C_extern},
2403 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2404 {"struct", 0, st_C_struct},
2405 {"domain", C_STAR, st_C_struct},
2406 {"switch", 0, st_C_ignore},
2407 {"enum", 0, st_C_enum},
2408 {"for", 0, st_C_ignore},
2409 {"namespace", C_PLPL, st_C_struct},
2410 {"class", 0, st_C_class},
2411 {"while", 0, st_C_ignore},
2412 {"undef", 0, st_C_define},
2413 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2414 {"__attribute__", 0, st_C_attribute},
2415 {"SYSCALL", 0, st_C_gnumacro},
2416 {"ENTRY", 0, st_C_gnumacro},
2417 {"PSEUDO", 0, st_C_gnumacro},
2418 {"DEFUN", 0, st_C_gnumacro}
2421 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2423 int key = hash (str, len);
2425 if (key <= MAX_HASH_VALUE && key >= 0)
2427 const char *s = wordlist[key].name;
2429 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2430 return &wordlist[key];
2433 return 0;
2435 /*%>*/
2437 static enum sym_type
2438 C_symtype (char *str, int len, int c_ext)
2440 register struct C_stab_entry *se = in_word_set (str, len);
2442 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2443 return st_none;
2444 return se->type;
2449 * Ignoring __attribute__ ((list))
2451 static bool inattribute; /* looking at an __attribute__ construct */
2454 * C functions and variables are recognized using a simple
2455 * finite automaton. fvdef is its state variable.
2457 static enum
2459 fvnone, /* nothing seen */
2460 fdefunkey, /* Emacs DEFUN keyword seen */
2461 fdefunname, /* Emacs DEFUN name seen */
2462 foperator, /* func: operator keyword seen (cplpl) */
2463 fvnameseen, /* function or variable name seen */
2464 fstartlist, /* func: just after open parenthesis */
2465 finlist, /* func: in parameter list */
2466 flistseen, /* func: after parameter list */
2467 fignore, /* func: before open brace */
2468 vignore /* var-like: ignore until ';' */
2469 } fvdef;
2471 static bool fvextern; /* func or var: extern keyword seen; */
2474 * typedefs are recognized using a simple finite automaton.
2475 * typdef is its state variable.
2477 static enum
2479 tnone, /* nothing seen */
2480 tkeyseen, /* typedef keyword seen */
2481 ttypeseen, /* defined type seen */
2482 tinbody, /* inside typedef body */
2483 tend, /* just before typedef tag */
2484 tignore /* junk after typedef tag */
2485 } typdef;
2488 * struct-like structures (enum, struct and union) are recognized
2489 * using another simple finite automaton. `structdef' is its state
2490 * variable.
2492 static enum
2494 snone, /* nothing seen yet,
2495 or in struct body if bracelev > 0 */
2496 skeyseen, /* struct-like keyword seen */
2497 stagseen, /* struct-like tag seen */
2498 scolonseen /* colon seen after struct-like tag */
2499 } structdef;
2502 * When objdef is different from onone, objtag is the name of the class.
2504 static const char *objtag = "<uninited>";
2507 * Yet another little state machine to deal with preprocessor lines.
2509 static enum
2511 dnone, /* nothing seen */
2512 dsharpseen, /* '#' seen as first char on line */
2513 ddefineseen, /* '#' and 'define' seen */
2514 dignorerest /* ignore rest of line */
2515 } definedef;
2518 * State machine for Objective C protocols and implementations.
2519 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2521 static enum
2523 onone, /* nothing seen */
2524 oprotocol, /* @interface or @protocol seen */
2525 oimplementation, /* @implementations seen */
2526 otagseen, /* class name seen */
2527 oparenseen, /* parenthesis before category seen */
2528 ocatseen, /* category name seen */
2529 oinbody, /* in @implementation body */
2530 omethodsign, /* in @implementation body, after +/- */
2531 omethodtag, /* after method name */
2532 omethodcolon, /* after method colon */
2533 omethodparm, /* after method parameter */
2534 oignore /* wait for @end */
2535 } objdef;
2539 * Use this structure to keep info about the token read, and how it
2540 * should be tagged. Used by the make_C_tag function to build a tag.
2542 static struct tok
2544 char *line; /* string containing the token */
2545 int offset; /* where the token starts in LINE */
2546 int length; /* token length */
2548 The previous members can be used to pass strings around for generic
2549 purposes. The following ones specifically refer to creating tags. In this
2550 case the token contained here is the pattern that will be used to create a
2551 tag.
2553 bool valid; /* do not create a tag; the token should be
2554 invalidated whenever a state machine is
2555 reset prematurely */
2556 bool named; /* create a named tag */
2557 int lineno; /* source line number of tag */
2558 long linepos; /* source char number of tag */
2559 } token; /* latest token read */
2562 * Variables and functions for dealing with nested structures.
2563 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2565 static void pushclass_above (int, char *, int);
2566 static void popclass_above (int);
2567 static void write_classname (linebuffer *, const char *qualifier);
2569 static struct {
2570 char **cname; /* nested class names */
2571 int *bracelev; /* nested class brace level */
2572 int nl; /* class nesting level (elements used) */
2573 int size; /* length of the array */
2574 } cstack; /* stack for nested declaration tags */
2575 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2576 #define nestlev (cstack.nl)
2577 /* After struct keyword or in struct body, not inside a nested function. */
2578 #define instruct (structdef == snone && nestlev > 0 \
2579 && bracelev == cstack.bracelev[nestlev-1] + 1)
2581 static void
2582 pushclass_above (int bracelev, char *str, int len)
2584 int nl;
2586 popclass_above (bracelev);
2587 nl = cstack.nl;
2588 if (nl >= cstack.size)
2590 int size = cstack.size *= 2;
2591 xrnew (cstack.cname, size, char *);
2592 xrnew (cstack.bracelev, size, int);
2594 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2595 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2596 cstack.bracelev[nl] = bracelev;
2597 cstack.nl = nl + 1;
2600 static void
2601 popclass_above (int bracelev)
2603 int nl;
2605 for (nl = cstack.nl - 1;
2606 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2607 nl--)
2609 free (cstack.cname[nl]);
2610 cstack.nl = nl;
2614 static void
2615 write_classname (linebuffer *cn, const char *qualifier)
2617 int i, len;
2618 int qlen = strlen (qualifier);
2620 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2622 len = 0;
2623 cn->len = 0;
2624 cn->buffer[0] = '\0';
2626 else
2628 len = strlen (cstack.cname[0]);
2629 linebuffer_setlen (cn, len);
2630 strcpy (cn->buffer, cstack.cname[0]);
2632 for (i = 1; i < cstack.nl; i++)
2634 char *s = cstack.cname[i];
2635 if (s == NULL)
2636 continue;
2637 linebuffer_setlen (cn, len + qlen + strlen (s));
2638 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2643 static bool consider_token (char *, int, int, int *, int, int, bool *);
2644 static void make_C_tag (bool);
2647 * consider_token ()
2648 * checks to see if the current token is at the start of a
2649 * function or variable, or corresponds to a typedef, or
2650 * is a struct/union/enum tag, or #define, or an enum constant.
2652 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2653 * with args. C_EXTP points to which language we are looking at.
2655 * Globals
2656 * fvdef IN OUT
2657 * structdef IN OUT
2658 * definedef IN OUT
2659 * typdef IN OUT
2660 * objdef IN OUT
2663 static bool
2664 consider_token (char *str, int len, int c, int *c_extp,
2665 int bracelev, int parlev, bool *is_func_or_var)
2666 /* IN: token pointer */
2667 /* IN: token length */
2668 /* IN: first char after the token */
2669 /* IN, OUT: C extensions mask */
2670 /* IN: brace level */
2671 /* IN: parenthesis level */
2672 /* OUT: function or variable found */
2674 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2675 structtype is the type of the preceding struct-like keyword, and
2676 structbracelev is the brace level where it has been seen. */
2677 static enum sym_type structtype;
2678 static int structbracelev;
2679 static enum sym_type toktype;
2682 toktype = C_symtype (str, len, *c_extp);
2685 * Skip __attribute__
2687 if (toktype == st_C_attribute)
2689 inattribute = true;
2690 return false;
2694 * Advance the definedef state machine.
2696 switch (definedef)
2698 case dnone:
2699 /* We're not on a preprocessor line. */
2700 if (toktype == st_C_gnumacro)
2702 fvdef = fdefunkey;
2703 return false;
2705 break;
2706 case dsharpseen:
2707 if (toktype == st_C_define)
2709 definedef = ddefineseen;
2711 else
2713 definedef = dignorerest;
2715 return false;
2716 case ddefineseen:
2718 * Make a tag for any macro, unless it is a constant
2719 * and constantypedefs is false.
2721 definedef = dignorerest;
2722 *is_func_or_var = (c == '(');
2723 if (!*is_func_or_var && !constantypedefs)
2724 return false;
2725 else
2726 return true;
2727 case dignorerest:
2728 return false;
2729 default:
2730 error ("internal error: definedef value.");
2734 * Now typedefs
2736 switch (typdef)
2738 case tnone:
2739 if (toktype == st_C_typedef)
2741 if (typedefs)
2742 typdef = tkeyseen;
2743 fvextern = false;
2744 fvdef = fvnone;
2745 return false;
2747 break;
2748 case tkeyseen:
2749 switch (toktype)
2751 case st_none:
2752 case st_C_class:
2753 case st_C_struct:
2754 case st_C_enum:
2755 typdef = ttypeseen;
2757 break;
2758 case ttypeseen:
2759 if (structdef == snone && fvdef == fvnone)
2761 fvdef = fvnameseen;
2762 return true;
2764 break;
2765 case tend:
2766 switch (toktype)
2768 case st_C_class:
2769 case st_C_struct:
2770 case st_C_enum:
2771 return false;
2773 return true;
2776 switch (toktype)
2778 case st_C_javastruct:
2779 if (structdef == stagseen)
2780 structdef = scolonseen;
2781 return false;
2782 case st_C_template:
2783 case st_C_class:
2784 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2785 && bracelev == 0
2786 && definedef == dnone && structdef == snone
2787 && typdef == tnone && fvdef == fvnone)
2788 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2789 if (toktype == st_C_template)
2790 break;
2791 /* FALLTHRU */
2792 case st_C_struct:
2793 case st_C_enum:
2794 if (parlev == 0
2795 && fvdef != vignore
2796 && (typdef == tkeyseen
2797 || (typedefs_or_cplusplus && structdef == snone)))
2799 structdef = skeyseen;
2800 structtype = toktype;
2801 structbracelev = bracelev;
2802 if (fvdef == fvnameseen)
2803 fvdef = fvnone;
2805 return false;
2808 if (structdef == skeyseen)
2810 structdef = stagseen;
2811 return true;
2814 if (typdef != tnone)
2815 definedef = dnone;
2817 /* Detect Objective C constructs. */
2818 switch (objdef)
2820 case onone:
2821 switch (toktype)
2823 case st_C_objprot:
2824 objdef = oprotocol;
2825 return false;
2826 case st_C_objimpl:
2827 objdef = oimplementation;
2828 return false;
2830 break;
2831 case oimplementation:
2832 /* Save the class tag for functions or variables defined inside. */
2833 objtag = savenstr (str, len);
2834 objdef = oinbody;
2835 return false;
2836 case oprotocol:
2837 /* Save the class tag for categories. */
2838 objtag = savenstr (str, len);
2839 objdef = otagseen;
2840 *is_func_or_var = true;
2841 return true;
2842 case oparenseen:
2843 objdef = ocatseen;
2844 *is_func_or_var = true;
2845 return true;
2846 case oinbody:
2847 break;
2848 case omethodsign:
2849 if (parlev == 0)
2851 fvdef = fvnone;
2852 objdef = omethodtag;
2853 linebuffer_setlen (&token_name, len);
2854 memcpy (token_name.buffer, str, len);
2855 token_name.buffer[len] = '\0';
2856 return true;
2858 return false;
2859 case omethodcolon:
2860 if (parlev == 0)
2861 objdef = omethodparm;
2862 return false;
2863 case omethodparm:
2864 if (parlev == 0)
2866 objdef = omethodtag;
2867 if (class_qualify)
2869 int oldlen = token_name.len;
2870 fvdef = fvnone;
2871 linebuffer_setlen (&token_name, oldlen + len);
2872 memcpy (token_name.buffer + oldlen, str, len);
2873 token_name.buffer[oldlen + len] = '\0';
2875 return true;
2877 return false;
2878 case oignore:
2879 if (toktype == st_C_objend)
2881 /* Memory leakage here: the string pointed by objtag is
2882 never released, because many tests would be needed to
2883 avoid breaking on incorrect input code. The amount of
2884 memory leaked here is the sum of the lengths of the
2885 class tags.
2886 free (objtag); */
2887 objdef = onone;
2889 return false;
2892 /* A function, variable or enum constant? */
2893 switch (toktype)
2895 case st_C_extern:
2896 fvextern = true;
2897 switch (fvdef)
2899 case finlist:
2900 case flistseen:
2901 case fignore:
2902 case vignore:
2903 break;
2904 default:
2905 fvdef = fvnone;
2907 return false;
2908 case st_C_ignore:
2909 fvextern = false;
2910 fvdef = vignore;
2911 return false;
2912 case st_C_operator:
2913 fvdef = foperator;
2914 *is_func_or_var = true;
2915 return true;
2916 case st_none:
2917 if (constantypedefs
2918 && structdef == snone
2919 && structtype == st_C_enum && bracelev > structbracelev
2920 /* Don't tag tokens in expressions that assign values to enum
2921 constants. */
2922 && fvdef != vignore)
2923 return true; /* enum constant */
2924 switch (fvdef)
2926 case fdefunkey:
2927 if (bracelev > 0)
2928 break;
2929 fvdef = fdefunname; /* GNU macro */
2930 *is_func_or_var = true;
2931 return true;
2932 case fvnone:
2933 switch (typdef)
2935 case ttypeseen:
2936 return false;
2937 case tnone:
2938 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2939 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2941 fvdef = vignore;
2942 return false;
2944 break;
2946 /* FALLTHRU */
2947 case fvnameseen:
2948 if (len >= 10 && strneq (str+len-10, "::operator", 10))
2950 if (*c_extp & C_AUTO) /* automatic detection of C++ */
2951 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2952 fvdef = foperator;
2953 *is_func_or_var = true;
2954 return true;
2956 if (bracelev > 0 && !instruct)
2957 break;
2958 fvdef = fvnameseen; /* function or variable */
2959 *is_func_or_var = true;
2960 return true;
2962 break;
2965 return false;
2970 * C_entries often keeps pointers to tokens or lines which are older than
2971 * the line currently read. By keeping two line buffers, and switching
2972 * them at end of line, it is possible to use those pointers.
2974 static struct
2976 long linepos;
2977 linebuffer lb;
2978 } lbs[2];
2980 #define current_lb_is_new (newndx == curndx)
2981 #define switch_line_buffers() (curndx = 1 - curndx)
2983 #define curlb (lbs[curndx].lb)
2984 #define newlb (lbs[newndx].lb)
2985 #define curlinepos (lbs[curndx].linepos)
2986 #define newlinepos (lbs[newndx].linepos)
2988 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2989 #define cplpl (c_ext & C_PLPL)
2990 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2992 #define CNL_SAVE_DEFINEDEF() \
2993 do { \
2994 curlinepos = charno; \
2995 readline (&curlb, inf); \
2996 lp = curlb.buffer; \
2997 quotednl = false; \
2998 newndx = curndx; \
2999 } while (0)
3001 #define CNL() \
3002 do { \
3003 CNL_SAVE_DEFINEDEF (); \
3004 if (savetoken.valid) \
3006 token = savetoken; \
3007 savetoken.valid = false; \
3009 definedef = dnone; \
3010 } while (0)
3013 static void
3014 make_C_tag (bool isfun)
3016 /* This function is never called when token.valid is false, but
3017 we must protect against invalid input or internal errors. */
3018 if (token.valid)
3019 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3020 token.offset+token.length+1, token.lineno, token.linepos);
3021 else if (DEBUG)
3022 { /* this branch is optimized away if !DEBUG */
3023 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3024 token_name.len + 17, isfun, token.line,
3025 token.offset+token.length+1, token.lineno, token.linepos);
3026 error ("INVALID TOKEN");
3029 token.valid = false;
3032 static bool
3033 perhaps_more_input (FILE *inf)
3035 return !feof (inf) && !ferror (inf);
3040 * C_entries ()
3041 * This routine finds functions, variables, typedefs,
3042 * #define's, enum constants and struct/union/enum definitions in
3043 * C syntax and adds them to the list.
3045 static void
3046 C_entries (int c_ext, FILE *inf)
3047 /* extension of C */
3048 /* input file */
3050 register char c; /* latest char read; '\0' for end of line */
3051 register char *lp; /* pointer one beyond the character `c' */
3052 int curndx, newndx; /* indices for current and new lb */
3053 register int tokoff; /* offset in line of start of current token */
3054 register int toklen; /* length of current token */
3055 const char *qualifier; /* string used to qualify names */
3056 int qlen; /* length of qualifier */
3057 int bracelev; /* current brace level */
3058 int bracketlev; /* current bracket level */
3059 int parlev; /* current parenthesis level */
3060 int attrparlev; /* __attribute__ parenthesis level */
3061 int templatelev; /* current template level */
3062 int typdefbracelev; /* bracelev where a typedef struct body begun */
3063 bool incomm, inquote, inchar, quotednl, midtoken;
3064 bool yacc_rules; /* in the rules part of a yacc file */
3065 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3068 linebuffer_init (&lbs[0].lb);
3069 linebuffer_init (&lbs[1].lb);
3070 if (cstack.size == 0)
3072 cstack.size = (DEBUG) ? 1 : 4;
3073 cstack.nl = 0;
3074 cstack.cname = xnew (cstack.size, char *);
3075 cstack.bracelev = xnew (cstack.size, int);
3078 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3079 curndx = newndx = 0;
3080 lp = curlb.buffer;
3081 *lp = 0;
3083 fvdef = fvnone; fvextern = false; typdef = tnone;
3084 structdef = snone; definedef = dnone; objdef = onone;
3085 yacc_rules = false;
3086 midtoken = inquote = inchar = incomm = quotednl = false;
3087 token.valid = savetoken.valid = false;
3088 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3089 if (cjava)
3090 { qualifier = "."; qlen = 1; }
3091 else
3092 { qualifier = "::"; qlen = 2; }
3095 while (perhaps_more_input (inf))
3097 c = *lp++;
3098 if (c == '\\')
3100 /* If we are at the end of the line, the next character is a
3101 '\0'; do not skip it, because it is what tells us
3102 to read the next line. */
3103 if (*lp == '\0')
3105 quotednl = true;
3106 continue;
3108 lp++;
3109 c = ' ';
3111 else if (incomm)
3113 switch (c)
3115 case '*':
3116 if (*lp == '/')
3118 c = *lp++;
3119 incomm = false;
3121 break;
3122 case '\0':
3123 /* Newlines inside comments do not end macro definitions in
3124 traditional cpp. */
3125 CNL_SAVE_DEFINEDEF ();
3126 break;
3128 continue;
3130 else if (inquote)
3132 switch (c)
3134 case '"':
3135 inquote = false;
3136 break;
3137 case '\0':
3138 /* Newlines inside strings do not end macro definitions
3139 in traditional cpp, even though compilers don't
3140 usually accept them. */
3141 CNL_SAVE_DEFINEDEF ();
3142 break;
3144 continue;
3146 else if (inchar)
3148 switch (c)
3150 case '\0':
3151 /* Hmmm, something went wrong. */
3152 CNL ();
3153 /* FALLTHRU */
3154 case '\'':
3155 inchar = false;
3156 break;
3158 continue;
3160 else switch (c)
3162 case '"':
3163 inquote = true;
3164 if (bracketlev > 0)
3165 continue;
3166 if (inattribute)
3167 break;
3168 switch (fvdef)
3170 case fdefunkey:
3171 case fstartlist:
3172 case finlist:
3173 case fignore:
3174 case vignore:
3175 break;
3176 default:
3177 fvextern = false;
3178 fvdef = fvnone;
3180 continue;
3181 case '\'':
3182 inchar = true;
3183 if (bracketlev > 0)
3184 continue;
3185 if (inattribute)
3186 break;
3187 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3189 fvextern = false;
3190 fvdef = fvnone;
3192 continue;
3193 case '/':
3194 if (*lp == '*')
3196 incomm = true;
3197 lp++;
3198 c = ' ';
3199 if (bracketlev > 0)
3200 continue;
3202 else if (/* cplpl && */ *lp == '/')
3204 c = '\0';
3206 break;
3207 case '%':
3208 if ((c_ext & YACC) && *lp == '%')
3210 /* Entering or exiting rules section in yacc file. */
3211 lp++;
3212 definedef = dnone; fvdef = fvnone; fvextern = false;
3213 typdef = tnone; structdef = snone;
3214 midtoken = inquote = inchar = incomm = quotednl = false;
3215 bracelev = 0;
3216 yacc_rules = !yacc_rules;
3217 continue;
3219 else
3220 break;
3221 case '#':
3222 if (definedef == dnone)
3224 char *cp;
3225 bool cpptoken = true;
3227 /* Look back on this line. If all blanks, or nonblanks
3228 followed by an end of comment, this is a preprocessor
3229 token. */
3230 for (cp = newlb.buffer; cp < lp-1; cp++)
3231 if (!c_isspace (*cp))
3233 if (*cp == '*' && cp[1] == '/')
3235 cp++;
3236 cpptoken = true;
3238 else
3239 cpptoken = false;
3241 if (cpptoken)
3243 definedef = dsharpseen;
3244 /* This is needed for tagging enum values: when there are
3245 preprocessor conditionals inside the enum, we need to
3246 reset the value of fvdef so that the next enum value is
3247 tagged even though the one before it did not end in a
3248 comma. */
3249 if (fvdef == vignore && instruct && parlev == 0)
3251 if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3252 fvdef = fvnone;
3255 } /* if (definedef == dnone) */
3256 continue;
3257 case '[':
3258 bracketlev++;
3259 continue;
3260 default:
3261 if (bracketlev > 0)
3263 if (c == ']')
3264 --bracketlev;
3265 else if (c == '\0')
3266 CNL_SAVE_DEFINEDEF ();
3267 continue;
3269 break;
3270 } /* switch (c) */
3273 /* Consider token only if some involved conditions are satisfied. */
3274 if (typdef != tignore
3275 && definedef != dignorerest
3276 && fvdef != finlist
3277 && templatelev == 0
3278 && (definedef != dnone
3279 || structdef != scolonseen)
3280 && !inattribute)
3282 if (midtoken)
3284 if (endtoken (c))
3286 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3287 /* This handles :: in the middle,
3288 but not at the beginning of an identifier.
3289 Also, space-separated :: is not recognized. */
3291 if (c_ext & C_AUTO) /* automatic detection of C++ */
3292 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3293 lp += 2;
3294 toklen += 2;
3295 c = lp[-1];
3296 goto still_in_token;
3298 else
3300 bool funorvar = false;
3302 if (yacc_rules
3303 || consider_token (newlb.buffer + tokoff, toklen, c,
3304 &c_ext, bracelev, parlev,
3305 &funorvar))
3307 if (fvdef == foperator)
3309 char *oldlp = lp;
3310 lp = skip_spaces (lp-1);
3311 if (*lp != '\0')
3312 lp += 1;
3313 while (*lp != '\0'
3314 && !c_isspace (*lp) && *lp != '(')
3315 lp += 1;
3316 c = *lp++;
3317 toklen += lp - oldlp;
3319 token.named = false;
3320 if (!plainc
3321 && nestlev > 0 && definedef == dnone)
3322 /* in struct body */
3324 if (class_qualify)
3326 int len;
3327 write_classname (&token_name, qualifier);
3328 len = token_name.len;
3329 linebuffer_setlen (&token_name,
3330 len + qlen + toklen);
3331 sprintf (token_name.buffer + len, "%s%.*s",
3332 qualifier, toklen,
3333 newlb.buffer + tokoff);
3335 else
3337 linebuffer_setlen (&token_name, toklen);
3338 sprintf (token_name.buffer, "%.*s",
3339 toklen, newlb.buffer + tokoff);
3341 token.named = true;
3343 else if (objdef == ocatseen)
3344 /* Objective C category */
3346 if (class_qualify)
3348 int len = strlen (objtag) + 2 + toklen;
3349 linebuffer_setlen (&token_name, len);
3350 sprintf (token_name.buffer, "%s(%.*s)",
3351 objtag, toklen,
3352 newlb.buffer + tokoff);
3354 else
3356 linebuffer_setlen (&token_name, toklen);
3357 sprintf (token_name.buffer, "%.*s",
3358 toklen, newlb.buffer + tokoff);
3360 token.named = true;
3362 else if (objdef == omethodtag
3363 || objdef == omethodparm)
3364 /* Objective C method */
3366 token.named = true;
3368 else if (fvdef == fdefunname)
3369 /* GNU DEFUN and similar macros */
3371 bool defun = (newlb.buffer[tokoff] == 'F');
3372 int off = tokoff;
3373 int len = toklen;
3375 /* Rewrite the tag so that emacs lisp DEFUNs
3376 can be found by their elisp name */
3377 if (defun)
3379 off += 1;
3380 len -= 1;
3382 linebuffer_setlen (&token_name, len);
3383 memcpy (token_name.buffer,
3384 newlb.buffer + off, len);
3385 token_name.buffer[len] = '\0';
3386 if (defun)
3387 while (--len >= 0)
3388 if (token_name.buffer[len] == '_')
3389 token_name.buffer[len] = '-';
3390 token.named = defun;
3392 else
3394 linebuffer_setlen (&token_name, toklen);
3395 memcpy (token_name.buffer,
3396 newlb.buffer + tokoff, toklen);
3397 token_name.buffer[toklen] = '\0';
3398 /* Name macros and members. */
3399 token.named = (structdef == stagseen
3400 || typdef == ttypeseen
3401 || typdef == tend
3402 || (funorvar
3403 && definedef == dignorerest)
3404 || (funorvar
3405 && definedef == dnone
3406 && structdef == snone
3407 && bracelev > 0));
3409 token.lineno = lineno;
3410 token.offset = tokoff;
3411 token.length = toklen;
3412 token.line = newlb.buffer;
3413 token.linepos = newlinepos;
3414 token.valid = true;
3416 if (definedef == dnone
3417 && (fvdef == fvnameseen
3418 || fvdef == foperator
3419 || structdef == stagseen
3420 || typdef == tend
3421 || typdef == ttypeseen
3422 || objdef != onone))
3424 if (current_lb_is_new)
3425 switch_line_buffers ();
3427 else if (definedef != dnone
3428 || fvdef == fdefunname
3429 || instruct)
3430 make_C_tag (funorvar);
3432 else /* not yacc and consider_token failed */
3434 if (inattribute && fvdef == fignore)
3436 /* We have just met __attribute__ after a
3437 function parameter list: do not tag the
3438 function again. */
3439 fvdef = fvnone;
3442 midtoken = false;
3444 } /* if (endtoken (c)) */
3445 else if (intoken (c))
3446 still_in_token:
3448 toklen++;
3449 continue;
3451 } /* if (midtoken) */
3452 else if (begtoken (c))
3454 switch (definedef)
3456 case dnone:
3457 switch (fvdef)
3459 case fstartlist:
3460 /* This prevents tagging fb in
3461 void (__attribute__((noreturn)) *fb) (void);
3462 Fixing this is not easy and not very important. */
3463 fvdef = finlist;
3464 continue;
3465 case flistseen:
3466 if (plainc || declarations)
3468 make_C_tag (true); /* a function */
3469 fvdef = fignore;
3471 break;
3473 if (structdef == stagseen && !cjava)
3475 popclass_above (bracelev);
3476 structdef = snone;
3478 break;
3479 case dsharpseen:
3480 savetoken = token;
3481 break;
3483 if (!yacc_rules || lp == newlb.buffer + 1)
3485 tokoff = lp - 1 - newlb.buffer;
3486 toklen = 1;
3487 midtoken = true;
3489 continue;
3490 } /* if (begtoken) */
3491 } /* if must look at token */
3494 /* Detect end of line, colon, comma, semicolon and various braces
3495 after having handled a token.*/
3496 switch (c)
3498 case ':':
3499 if (inattribute)
3500 break;
3501 if (yacc_rules && token.offset == 0 && token.valid)
3503 make_C_tag (false); /* a yacc function */
3504 break;
3506 if (definedef != dnone)
3507 break;
3508 switch (objdef)
3510 case otagseen:
3511 objdef = oignore;
3512 make_C_tag (true); /* an Objective C class */
3513 break;
3514 case omethodtag:
3515 case omethodparm:
3516 objdef = omethodcolon;
3517 if (class_qualify)
3519 int toklen = token_name.len;
3520 linebuffer_setlen (&token_name, toklen + 1);
3521 strcpy (token_name.buffer + toklen, ":");
3523 break;
3525 if (structdef == stagseen)
3527 structdef = scolonseen;
3528 break;
3530 /* Should be useless, but may be work as a safety net. */
3531 if (cplpl && fvdef == flistseen)
3533 make_C_tag (true); /* a function */
3534 fvdef = fignore;
3535 break;
3537 break;
3538 case ';':
3539 if (definedef != dnone || inattribute)
3540 break;
3541 switch (typdef)
3543 case tend:
3544 case ttypeseen:
3545 make_C_tag (false); /* a typedef */
3546 typdef = tnone;
3547 fvdef = fvnone;
3548 break;
3549 case tnone:
3550 case tinbody:
3551 case tignore:
3552 switch (fvdef)
3554 case fignore:
3555 if (typdef == tignore || cplpl)
3556 fvdef = fvnone;
3557 break;
3558 case fvnameseen:
3559 if ((globals && bracelev == 0 && (!fvextern || declarations))
3560 || (members && instruct))
3561 make_C_tag (false); /* a variable */
3562 fvextern = false;
3563 fvdef = fvnone;
3564 token.valid = false;
3565 break;
3566 case flistseen:
3567 if ((declarations
3568 && (cplpl || !instruct)
3569 && (typdef == tnone || (typdef != tignore && instruct)))
3570 || (members
3571 && plainc && instruct))
3572 make_C_tag (true); /* a function */
3573 /* FALLTHRU */
3574 default:
3575 fvextern = false;
3576 fvdef = fvnone;
3577 if (declarations
3578 && cplpl && structdef == stagseen)
3579 make_C_tag (false); /* forward declaration */
3580 else
3581 token.valid = false;
3582 } /* switch (fvdef) */
3583 /* FALLTHRU */
3584 default:
3585 if (!instruct)
3586 typdef = tnone;
3588 if (structdef == stagseen)
3589 structdef = snone;
3590 break;
3591 case ',':
3592 if (definedef != dnone || inattribute)
3593 break;
3594 switch (objdef)
3596 case omethodtag:
3597 case omethodparm:
3598 make_C_tag (true); /* an Objective C method */
3599 objdef = oinbody;
3600 break;
3602 switch (fvdef)
3604 case fdefunkey:
3605 case foperator:
3606 case fstartlist:
3607 case finlist:
3608 case fignore:
3609 break;
3610 case vignore:
3611 if (instruct && parlev == 0)
3612 fvdef = fvnone;
3613 break;
3614 case fdefunname:
3615 fvdef = fignore;
3616 break;
3617 case fvnameseen:
3618 if (parlev == 0
3619 && ((globals
3620 && bracelev == 0
3621 && templatelev == 0
3622 && (!fvextern || declarations))
3623 || (members && instruct)))
3624 make_C_tag (false); /* a variable */
3625 break;
3626 case flistseen:
3627 if ((declarations && typdef == tnone && !instruct)
3628 || (members && typdef != tignore && instruct))
3630 make_C_tag (true); /* a function */
3631 fvdef = fvnameseen;
3633 else if (!declarations)
3634 fvdef = fvnone;
3635 token.valid = false;
3636 break;
3637 default:
3638 fvdef = fvnone;
3640 if (structdef == stagseen)
3641 structdef = snone;
3642 break;
3643 case ']':
3644 if (definedef != dnone || inattribute)
3645 break;
3646 if (structdef == stagseen)
3647 structdef = snone;
3648 switch (typdef)
3650 case ttypeseen:
3651 case tend:
3652 typdef = tignore;
3653 make_C_tag (false); /* a typedef */
3654 break;
3655 case tnone:
3656 case tinbody:
3657 switch (fvdef)
3659 case foperator:
3660 case finlist:
3661 case fignore:
3662 case vignore:
3663 break;
3664 case fvnameseen:
3665 if ((members && bracelev == 1)
3666 || (globals && bracelev == 0
3667 && (!fvextern || declarations)))
3668 make_C_tag (false); /* a variable */
3669 /* FALLTHRU */
3670 default:
3671 fvdef = fvnone;
3673 break;
3675 break;
3676 case '(':
3677 if (inattribute)
3679 attrparlev++;
3680 break;
3682 if (definedef != dnone)
3683 break;
3684 if (objdef == otagseen && parlev == 0)
3685 objdef = oparenseen;
3686 switch (fvdef)
3688 case fvnameseen:
3689 if (typdef == ttypeseen
3690 && *lp != '*'
3691 && !instruct)
3693 /* This handles constructs like:
3694 typedef void OperatorFun (int fun); */
3695 make_C_tag (false);
3696 typdef = tignore;
3697 fvdef = fignore;
3698 break;
3700 /* FALLTHRU */
3701 case foperator:
3702 fvdef = fstartlist;
3703 break;
3704 case flistseen:
3705 fvdef = finlist;
3706 break;
3708 parlev++;
3709 break;
3710 case ')':
3711 if (inattribute)
3713 if (--attrparlev == 0)
3714 inattribute = false;
3715 break;
3717 if (definedef != dnone)
3718 break;
3719 if (objdef == ocatseen && parlev == 1)
3721 make_C_tag (true); /* an Objective C category */
3722 objdef = oignore;
3724 if (--parlev == 0)
3726 switch (fvdef)
3728 case fstartlist:
3729 case finlist:
3730 fvdef = flistseen;
3731 break;
3733 if (!instruct
3734 && (typdef == tend
3735 || typdef == ttypeseen))
3737 typdef = tignore;
3738 make_C_tag (false); /* a typedef */
3741 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3742 parlev = 0;
3743 break;
3744 case '{':
3745 if (definedef != dnone)
3746 break;
3747 if (typdef == ttypeseen)
3749 /* Whenever typdef is set to tinbody (currently only
3750 here), typdefbracelev should be set to bracelev. */
3751 typdef = tinbody;
3752 typdefbracelev = bracelev;
3754 switch (fvdef)
3756 case flistseen:
3757 if (cplpl && !class_qualify)
3759 /* Remove class and namespace qualifiers from the token,
3760 leaving only the method/member name. */
3761 char *cc, *uqname = token_name.buffer;
3762 char *tok_end = token_name.buffer + token_name.len;
3764 for (cc = token_name.buffer; cc < tok_end; cc++)
3766 if (*cc == ':' && cc[1] == ':')
3768 uqname = cc + 2;
3769 cc++;
3772 if (uqname > token_name.buffer)
3774 int uqlen = strlen (uqname);
3775 linebuffer_setlen (&token_name, uqlen);
3776 memmove (token_name.buffer, uqname, uqlen + 1);
3779 make_C_tag (true); /* a function */
3780 /* FALLTHRU */
3781 case fignore:
3782 fvdef = fvnone;
3783 break;
3784 case fvnone:
3785 switch (objdef)
3787 case otagseen:
3788 make_C_tag (true); /* an Objective C class */
3789 objdef = oignore;
3790 break;
3791 case omethodtag:
3792 case omethodparm:
3793 make_C_tag (true); /* an Objective C method */
3794 objdef = oinbody;
3795 break;
3796 default:
3797 /* Neutralize `extern "C" {' grot. */
3798 if (bracelev == 0 && structdef == snone && nestlev == 0
3799 && typdef == tnone)
3800 bracelev = -1;
3802 break;
3804 switch (structdef)
3806 case skeyseen: /* unnamed struct */
3807 pushclass_above (bracelev, NULL, 0);
3808 structdef = snone;
3809 break;
3810 case stagseen: /* named struct or enum */
3811 case scolonseen: /* a class */
3812 pushclass_above (bracelev,token.line+token.offset, token.length);
3813 structdef = snone;
3814 make_C_tag (false); /* a struct or enum */
3815 break;
3817 bracelev += 1;
3818 break;
3819 case '*':
3820 if (definedef != dnone)
3821 break;
3822 if (fvdef == fstartlist)
3824 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3825 token.valid = false;
3827 break;
3828 case '}':
3829 if (definedef != dnone)
3830 break;
3831 bracelev -= 1;
3832 if (!ignoreindent && lp == newlb.buffer + 1)
3834 if (bracelev != 0)
3835 token.valid = false; /* unexpected value, token unreliable */
3836 bracelev = 0; /* reset brace level if first column */
3837 parlev = 0; /* also reset paren level, just in case... */
3839 else if (bracelev < 0)
3841 token.valid = false; /* something gone amiss, token unreliable */
3842 bracelev = 0;
3844 if (bracelev == 0 && fvdef == vignore)
3845 fvdef = fvnone; /* end of function */
3846 popclass_above (bracelev);
3847 structdef = snone;
3848 /* Only if typdef == tinbody is typdefbracelev significant. */
3849 if (typdef == tinbody && bracelev <= typdefbracelev)
3851 assert (bracelev == typdefbracelev);
3852 typdef = tend;
3854 break;
3855 case '=':
3856 if (definedef != dnone)
3857 break;
3858 switch (fvdef)
3860 case foperator:
3861 case finlist:
3862 case fignore:
3863 case vignore:
3864 break;
3865 case fvnameseen:
3866 if ((members && bracelev == 1)
3867 || (globals && bracelev == 0 && (!fvextern || declarations)))
3868 make_C_tag (false); /* a variable */
3869 /* FALLTHRU */
3870 default:
3871 fvdef = vignore;
3873 break;
3874 case '<':
3875 if (cplpl
3876 && (structdef == stagseen || fvdef == fvnameseen))
3878 templatelev++;
3879 break;
3881 goto resetfvdef;
3882 case '>':
3883 if (templatelev > 0)
3885 templatelev--;
3886 break;
3888 goto resetfvdef;
3889 case '+':
3890 case '-':
3891 if (objdef == oinbody && bracelev == 0)
3893 objdef = omethodsign;
3894 break;
3896 /* FALLTHRU */
3897 resetfvdef:
3898 case '#': case '~': case '&': case '%': case '/':
3899 case '|': case '^': case '!': case '.': case '?':
3900 if (definedef != dnone)
3901 break;
3902 /* These surely cannot follow a function tag in C. */
3903 switch (fvdef)
3905 case foperator:
3906 case finlist:
3907 case fignore:
3908 case vignore:
3909 break;
3910 default:
3911 fvdef = fvnone;
3913 break;
3914 case '\0':
3915 if (objdef == otagseen)
3917 make_C_tag (true); /* an Objective C class */
3918 objdef = oignore;
3920 /* If a macro spans multiple lines don't reset its state. */
3921 if (quotednl)
3922 CNL_SAVE_DEFINEDEF ();
3923 else
3924 CNL ();
3925 break;
3926 } /* switch (c) */
3928 } /* while not eof */
3930 free (lbs[0].lb.buffer);
3931 free (lbs[1].lb.buffer);
3935 * Process either a C++ file or a C file depending on the setting
3936 * of a global flag.
3938 static void
3939 default_C_entries (FILE *inf)
3941 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3944 /* Always do plain C. */
3945 static void
3946 plain_C_entries (FILE *inf)
3948 C_entries (0, inf);
3951 /* Always do C++. */
3952 static void
3953 Cplusplus_entries (FILE *inf)
3955 C_entries (C_PLPL, inf);
3958 /* Always do Java. */
3959 static void
3960 Cjava_entries (FILE *inf)
3962 C_entries (C_JAVA, inf);
3965 /* Always do C*. */
3966 static void
3967 Cstar_entries (FILE *inf)
3969 C_entries (C_STAR, inf);
3972 /* Always do Yacc. */
3973 static void
3974 Yacc_entries (FILE *inf)
3976 C_entries (YACC, inf);
3980 /* Useful macros. */
3981 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3982 while (perhaps_more_input (file_pointer) \
3983 && (readline (&(line_buffer), file_pointer), \
3984 (char_pointer) = (line_buffer).buffer, \
3985 true)) \
3987 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
3988 ((assert ("" kw), true) /* syntax error if not a literal string */ \
3989 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3990 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
3991 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3993 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3994 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3995 ((assert ("" kw), true) /* syntax error if not a literal string */ \
3996 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3997 && ((cp) += sizeof (kw)-1)) /* skip spaces */
4000 * Read a file, but do no processing. This is used to do regexp
4001 * matching on files that have no language defined.
4003 static void
4004 just_read_file (FILE *inf)
4006 while (perhaps_more_input (inf))
4007 readline (&lb, inf);
4011 /* Fortran parsing */
4013 static void F_takeprec (void);
4014 static void F_getit (FILE *);
4016 static void
4017 F_takeprec (void)
4019 dbp = skip_spaces (dbp);
4020 if (*dbp != '*')
4021 return;
4022 dbp++;
4023 dbp = skip_spaces (dbp);
4024 if (strneq (dbp, "(*)", 3))
4026 dbp += 3;
4027 return;
4029 if (!c_isdigit (*dbp))
4031 --dbp; /* force failure */
4032 return;
4035 dbp++;
4036 while (c_isdigit (*dbp));
4039 static void
4040 F_getit (FILE *inf)
4042 register char *cp;
4044 dbp = skip_spaces (dbp);
4045 if (*dbp == '\0')
4047 readline (&lb, inf);
4048 dbp = lb.buffer;
4049 if (dbp[5] != '&')
4050 return;
4051 dbp += 6;
4052 dbp = skip_spaces (dbp);
4054 if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4055 return;
4056 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4057 continue;
4058 make_tag (dbp, cp-dbp, true,
4059 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4063 static void
4064 Fortran_functions (FILE *inf)
4066 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4068 if (*dbp == '%')
4069 dbp++; /* Ratfor escape to fortran */
4070 dbp = skip_spaces (dbp);
4071 if (*dbp == '\0')
4072 continue;
4074 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4075 dbp = skip_spaces (dbp);
4077 if (LOOKING_AT_NOCASE (dbp, "pure"))
4078 dbp = skip_spaces (dbp);
4080 if (LOOKING_AT_NOCASE (dbp, "elemental"))
4081 dbp = skip_spaces (dbp);
4083 switch (c_tolower (*dbp))
4085 case 'i':
4086 if (nocase_tail ("integer"))
4087 F_takeprec ();
4088 break;
4089 case 'r':
4090 if (nocase_tail ("real"))
4091 F_takeprec ();
4092 break;
4093 case 'l':
4094 if (nocase_tail ("logical"))
4095 F_takeprec ();
4096 break;
4097 case 'c':
4098 if (nocase_tail ("complex") || nocase_tail ("character"))
4099 F_takeprec ();
4100 break;
4101 case 'd':
4102 if (nocase_tail ("double"))
4104 dbp = skip_spaces (dbp);
4105 if (*dbp == '\0')
4106 continue;
4107 if (nocase_tail ("precision"))
4108 break;
4109 continue;
4111 break;
4113 dbp = skip_spaces (dbp);
4114 if (*dbp == '\0')
4115 continue;
4116 switch (c_tolower (*dbp))
4118 case 'f':
4119 if (nocase_tail ("function"))
4120 F_getit (inf);
4121 continue;
4122 case 's':
4123 if (nocase_tail ("subroutine"))
4124 F_getit (inf);
4125 continue;
4126 case 'e':
4127 if (nocase_tail ("entry"))
4128 F_getit (inf);
4129 continue;
4130 case 'b':
4131 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4133 dbp = skip_spaces (dbp);
4134 if (*dbp == '\0') /* assume un-named */
4135 make_tag ("blockdata", 9, true,
4136 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4137 else
4138 F_getit (inf); /* look for name */
4140 continue;
4147 * Ada parsing
4148 * Original code by
4149 * Philippe Waroquiers (1998)
4152 /* Once we are positioned after an "interesting" keyword, let's get
4153 the real tag value necessary. */
4154 static void
4155 Ada_getit (FILE *inf, const char *name_qualifier)
4157 register char *cp;
4158 char *name;
4159 char c;
4161 while (perhaps_more_input (inf))
4163 dbp = skip_spaces (dbp);
4164 if (*dbp == '\0'
4165 || (dbp[0] == '-' && dbp[1] == '-'))
4167 readline (&lb, inf);
4168 dbp = lb.buffer;
4170 switch (c_tolower (*dbp))
4172 case 'b':
4173 if (nocase_tail ("body"))
4175 /* Skipping body of procedure body or package body or ....
4176 resetting qualifier to body instead of spec. */
4177 name_qualifier = "/b";
4178 continue;
4180 break;
4181 case 't':
4182 /* Skipping type of task type or protected type ... */
4183 if (nocase_tail ("type"))
4184 continue;
4185 break;
4187 if (*dbp == '"')
4189 dbp += 1;
4190 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4191 continue;
4193 else
4195 dbp = skip_spaces (dbp);
4196 for (cp = dbp;
4197 c_isalnum (*cp) || *cp == '_' || *cp == '.';
4198 cp++)
4199 continue;
4200 if (cp == dbp)
4201 return;
4203 c = *cp;
4204 *cp = '\0';
4205 name = concat (dbp, name_qualifier, "");
4206 *cp = c;
4207 make_tag (name, strlen (name), true,
4208 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4209 free (name);
4210 if (c == '"')
4211 dbp = cp + 1;
4212 return;
4216 static void
4217 Ada_funcs (FILE *inf)
4219 bool inquote = false;
4220 bool skip_till_semicolumn = false;
4222 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4224 while (*dbp != '\0')
4226 /* Skip a string i.e. "abcd". */
4227 if (inquote || (*dbp == '"'))
4229 dbp = strchr (dbp + !inquote, '"');
4230 if (dbp != NULL)
4232 inquote = false;
4233 dbp += 1;
4234 continue; /* advance char */
4236 else
4238 inquote = true;
4239 break; /* advance line */
4243 /* Skip comments. */
4244 if (dbp[0] == '-' && dbp[1] == '-')
4245 break; /* advance line */
4247 /* Skip character enclosed in single quote i.e. 'a'
4248 and skip single quote starting an attribute i.e. 'Image. */
4249 if (*dbp == '\'')
4251 dbp++ ;
4252 if (*dbp != '\0')
4253 dbp++;
4254 continue;
4257 if (skip_till_semicolumn)
4259 if (*dbp == ';')
4260 skip_till_semicolumn = false;
4261 dbp++;
4262 continue; /* advance char */
4265 /* Search for beginning of a token. */
4266 if (!begtoken (*dbp))
4268 dbp++;
4269 continue; /* advance char */
4272 /* We are at the beginning of a token. */
4273 switch (c_tolower (*dbp))
4275 case 'f':
4276 if (!packages_only && nocase_tail ("function"))
4277 Ada_getit (inf, "/f");
4278 else
4279 break; /* from switch */
4280 continue; /* advance char */
4281 case 'p':
4282 if (!packages_only && nocase_tail ("procedure"))
4283 Ada_getit (inf, "/p");
4284 else if (nocase_tail ("package"))
4285 Ada_getit (inf, "/s");
4286 else if (nocase_tail ("protected")) /* protected type */
4287 Ada_getit (inf, "/t");
4288 else
4289 break; /* from switch */
4290 continue; /* advance char */
4292 case 'u':
4293 if (typedefs && !packages_only && nocase_tail ("use"))
4295 /* when tagging types, avoid tagging use type Pack.Typename;
4296 for this, we will skip everything till a ; */
4297 skip_till_semicolumn = true;
4298 continue; /* advance char */
4301 case 't':
4302 if (!packages_only && nocase_tail ("task"))
4303 Ada_getit (inf, "/k");
4304 else if (typedefs && !packages_only && nocase_tail ("type"))
4306 Ada_getit (inf, "/t");
4307 while (*dbp != '\0')
4308 dbp += 1;
4310 else
4311 break; /* from switch */
4312 continue; /* advance char */
4315 /* Look for the end of the token. */
4316 while (!endtoken (*dbp))
4317 dbp++;
4319 } /* advance char */
4320 } /* advance line */
4325 * Unix and microcontroller assembly tag handling
4326 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4327 * Idea by Bob Weiner, Motorola Inc. (1994)
4329 static void
4330 Asm_labels (FILE *inf)
4332 register char *cp;
4334 LOOP_ON_INPUT_LINES (inf, lb, cp)
4336 /* If first char is alphabetic or one of [_.$], test for colon
4337 following identifier. */
4338 if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4340 /* Read past label. */
4341 cp++;
4342 while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4343 cp++;
4344 if (*cp == ':' || c_isspace (*cp))
4345 /* Found end of label, so copy it and add it to the table. */
4346 make_tag (lb.buffer, cp - lb.buffer, true,
4347 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4354 * Perl support
4355 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4356 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4357 * Perl variable names: /^(my|local).../
4358 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4359 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4360 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4362 static void
4363 Perl_functions (FILE *inf)
4365 char *package = savestr ("main"); /* current package name */
4366 register char *cp;
4368 LOOP_ON_INPUT_LINES (inf, lb, cp)
4370 cp = skip_spaces (cp);
4372 if (LOOKING_AT (cp, "package"))
4374 free (package);
4375 get_tag (cp, &package);
4377 else if (LOOKING_AT (cp, "sub"))
4379 char *pos, *sp;
4381 subr:
4382 sp = cp;
4383 while (!notinname (*cp))
4384 cp++;
4385 if (cp == sp)
4386 continue; /* nothing found */
4387 pos = strchr (sp, ':');
4388 if (pos && pos < cp && pos[1] == ':')
4389 /* The name is already qualified. */
4390 make_tag (sp, cp - sp, true,
4391 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4392 else
4393 /* Qualify it. */
4395 char savechar, *name;
4397 savechar = *cp;
4398 *cp = '\0';
4399 name = concat (package, "::", sp);
4400 *cp = savechar;
4401 make_tag (name, strlen (name), true,
4402 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4403 free (name);
4406 else if (LOOKING_AT (cp, "use constant")
4407 || LOOKING_AT (cp, "use constant::defer"))
4409 /* For hash style multi-constant like
4410 use constant { FOO => 123,
4411 BAR => 456 };
4412 only the first FOO is picked up. Parsing across the value
4413 expressions would be difficult in general, due to possible nested
4414 hashes, here-documents, etc. */
4415 if (*cp == '{')
4416 cp = skip_spaces (cp+1);
4417 goto subr;
4419 else if (globals) /* only if we are tagging global vars */
4421 /* Skip a qualifier, if any. */
4422 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4423 /* After "my" or "local", but before any following paren or space. */
4424 char *varstart = cp;
4426 if (qual /* should this be removed? If yes, how? */
4427 && (*cp == '$' || *cp == '@' || *cp == '%'))
4429 varstart += 1;
4431 cp++;
4432 while (c_isalnum (*cp) || *cp == '_');
4434 else if (qual)
4436 /* Should be examining a variable list at this point;
4437 could insist on seeing an open parenthesis. */
4438 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4439 cp++;
4441 else
4442 continue;
4444 make_tag (varstart, cp - varstart, false,
4445 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4448 free (package);
4453 * Python support
4454 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4455 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4456 * More ideas by seb bacon <seb@jamkit.com> (2002)
4458 static void
4459 Python_functions (FILE *inf)
4461 register char *cp;
4463 LOOP_ON_INPUT_LINES (inf, lb, cp)
4465 cp = skip_spaces (cp);
4466 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4468 char *name = cp;
4469 while (!notinname (*cp) && *cp != ':')
4470 cp++;
4471 make_tag (name, cp - name, true,
4472 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4479 * PHP support
4480 * Look for:
4481 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4482 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4483 * - /^[ \t]*define\(\"[^\"]+/
4484 * Only with --members:
4485 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4486 * Idea by Diez B. Roggisch (2001)
4488 static void
4489 PHP_functions (FILE *inf)
4491 char *cp, *name;
4492 bool search_identifier = false;
4494 LOOP_ON_INPUT_LINES (inf, lb, cp)
4496 cp = skip_spaces (cp);
4497 name = cp;
4498 if (search_identifier
4499 && *cp != '\0')
4501 while (!notinname (*cp))
4502 cp++;
4503 make_tag (name, cp - name, true,
4504 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4505 search_identifier = false;
4507 else if (LOOKING_AT (cp, "function"))
4509 if (*cp == '&')
4510 cp = skip_spaces (cp+1);
4511 if (*cp != '\0')
4513 name = cp;
4514 while (!notinname (*cp))
4515 cp++;
4516 make_tag (name, cp - name, true,
4517 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4519 else
4520 search_identifier = true;
4522 else if (LOOKING_AT (cp, "class"))
4524 if (*cp != '\0')
4526 name = cp;
4527 while (*cp != '\0' && !c_isspace (*cp))
4528 cp++;
4529 make_tag (name, cp - name, false,
4530 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4532 else
4533 search_identifier = true;
4535 else if (strneq (cp, "define", 6)
4536 && (cp = skip_spaces (cp+6))
4537 && *cp++ == '('
4538 && (*cp == '"' || *cp == '\''))
4540 char quote = *cp++;
4541 name = cp;
4542 while (*cp != quote && *cp != '\0')
4543 cp++;
4544 make_tag (name, cp - name, false,
4545 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4547 else if (members
4548 && LOOKING_AT (cp, "var")
4549 && *cp == '$')
4551 name = cp;
4552 while (!notinname (*cp))
4553 cp++;
4554 make_tag (name, cp - name, false,
4555 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4562 * Cobol tag functions
4563 * We could look for anything that could be a paragraph name.
4564 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4565 * Idea by Corny de Souza (1993)
4567 static void
4568 Cobol_paragraphs (FILE *inf)
4570 register char *bp, *ep;
4572 LOOP_ON_INPUT_LINES (inf, lb, bp)
4574 if (lb.len < 9)
4575 continue;
4576 bp += 8;
4578 /* If eoln, compiler option or comment ignore whole line. */
4579 if (bp[-1] != ' ' || !c_isalnum (bp[0]))
4580 continue;
4582 for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
4583 continue;
4584 if (*ep++ == '.')
4585 make_tag (bp, ep - bp, true,
4586 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4592 * Makefile support
4593 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4595 static void
4596 Makefile_targets (FILE *inf)
4598 register char *bp;
4600 LOOP_ON_INPUT_LINES (inf, lb, bp)
4602 if (*bp == '\t' || *bp == '#')
4603 continue;
4604 while (*bp != '\0' && *bp != '=' && *bp != ':')
4605 bp++;
4606 if (*bp == ':' || (globals && *bp == '='))
4608 /* We should detect if there is more than one tag, but we do not.
4609 We just skip initial and final spaces. */
4610 char * namestart = skip_spaces (lb.buffer);
4611 while (--bp > namestart)
4612 if (!notinname (*bp))
4613 break;
4614 make_tag (namestart, bp - namestart + 1, true,
4615 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4622 * Pascal parsing
4623 * Original code by Mosur K. Mohan (1989)
4625 * Locates tags for procedures & functions. Doesn't do any type- or
4626 * var-definitions. It does look for the keyword "extern" or
4627 * "forward" immediately following the procedure statement; if found,
4628 * the tag is skipped.
4630 static void
4631 Pascal_functions (FILE *inf)
4633 linebuffer tline; /* mostly copied from C_entries */
4634 long save_lcno;
4635 int save_lineno, namelen, taglen;
4636 char c, *name;
4638 bool /* each of these flags is true if: */
4639 incomment, /* point is inside a comment */
4640 inquote, /* point is inside '..' string */
4641 get_tagname, /* point is after PROCEDURE/FUNCTION
4642 keyword, so next item = potential tag */
4643 found_tag, /* point is after a potential tag */
4644 inparms, /* point is within parameter-list */
4645 verify_tag; /* point has passed the parm-list, so the
4646 next token will determine whether this
4647 is a FORWARD/EXTERN to be ignored, or
4648 whether it is a real tag */
4650 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4651 name = NULL; /* keep compiler quiet */
4652 dbp = lb.buffer;
4653 *dbp = '\0';
4654 linebuffer_init (&tline);
4656 incomment = inquote = false;
4657 found_tag = false; /* have a proc name; check if extern */
4658 get_tagname = false; /* found "procedure" keyword */
4659 inparms = false; /* found '(' after "proc" */
4660 verify_tag = false; /* check if "extern" is ahead */
4663 while (perhaps_more_input (inf)) /* long main loop to get next char */
4665 c = *dbp++;
4666 if (c == '\0') /* if end of line */
4668 readline (&lb, inf);
4669 dbp = lb.buffer;
4670 if (*dbp == '\0')
4671 continue;
4672 if (!((found_tag && verify_tag)
4673 || get_tagname))
4674 c = *dbp++; /* only if don't need *dbp pointing
4675 to the beginning of the name of
4676 the procedure or function */
4678 if (incomment)
4680 if (c == '}') /* within { } comments */
4681 incomment = false;
4682 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4684 dbp++;
4685 incomment = false;
4687 continue;
4689 else if (inquote)
4691 if (c == '\'')
4692 inquote = false;
4693 continue;
4695 else
4696 switch (c)
4698 case '\'':
4699 inquote = true; /* found first quote */
4700 continue;
4701 case '{': /* found open { comment */
4702 incomment = true;
4703 continue;
4704 case '(':
4705 if (*dbp == '*') /* found open (* comment */
4707 incomment = true;
4708 dbp++;
4710 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4711 inparms = true;
4712 continue;
4713 case ')': /* end of parms list */
4714 if (inparms)
4715 inparms = false;
4716 continue;
4717 case ';':
4718 if (found_tag && !inparms) /* end of proc or fn stmt */
4720 verify_tag = true;
4721 break;
4723 continue;
4725 if (found_tag && verify_tag && (*dbp != ' '))
4727 /* Check if this is an "extern" declaration. */
4728 if (*dbp == '\0')
4729 continue;
4730 if (c_tolower (*dbp) == 'e')
4732 if (nocase_tail ("extern")) /* superfluous, really! */
4734 found_tag = false;
4735 verify_tag = false;
4738 else if (c_tolower (*dbp) == 'f')
4740 if (nocase_tail ("forward")) /* check for forward reference */
4742 found_tag = false;
4743 verify_tag = false;
4746 if (found_tag && verify_tag) /* not external proc, so make tag */
4748 found_tag = false;
4749 verify_tag = false;
4750 make_tag (name, namelen, true,
4751 tline.buffer, taglen, save_lineno, save_lcno);
4752 continue;
4755 if (get_tagname) /* grab name of proc or fn */
4757 char *cp;
4759 if (*dbp == '\0')
4760 continue;
4762 /* Find block name. */
4763 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4764 continue;
4766 /* Save all values for later tagging. */
4767 linebuffer_setlen (&tline, lb.len);
4768 strcpy (tline.buffer, lb.buffer);
4769 save_lineno = lineno;
4770 save_lcno = linecharno;
4771 name = tline.buffer + (dbp - lb.buffer);
4772 namelen = cp - dbp;
4773 taglen = cp - lb.buffer + 1;
4775 dbp = cp; /* set dbp to e-o-token */
4776 get_tagname = false;
4777 found_tag = true;
4778 continue;
4780 /* And proceed to check for "extern". */
4782 else if (!incomment && !inquote && !found_tag)
4784 /* Check for proc/fn keywords. */
4785 switch (c_tolower (c))
4787 case 'p':
4788 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4789 get_tagname = true;
4790 continue;
4791 case 'f':
4792 if (nocase_tail ("unction"))
4793 get_tagname = true;
4794 continue;
4797 } /* while not eof */
4799 free (tline.buffer);
4804 * Lisp tag functions
4805 * look for (def or (DEF, quote or QUOTE
4808 static void L_getit (void);
4810 static void
4811 L_getit (void)
4813 if (*dbp == '\'') /* Skip prefix quote */
4814 dbp++;
4815 else if (*dbp == '(')
4817 dbp++;
4818 /* Try to skip "(quote " */
4819 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4820 /* Ok, then skip "(" before name in (defstruct (foo)) */
4821 dbp = skip_spaces (dbp);
4823 get_tag (dbp, NULL);
4826 static void
4827 Lisp_functions (FILE *inf)
4829 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4831 if (dbp[0] != '(')
4832 continue;
4834 /* "(defvar foo)" is a declaration rather than a definition. */
4835 if (! declarations)
4837 char *p = dbp + 1;
4838 if (LOOKING_AT (p, "defvar"))
4840 p = skip_name (p); /* past var name */
4841 p = skip_spaces (p);
4842 if (*p == ')')
4843 continue;
4847 if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
4848 dbp += 3;
4850 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4852 dbp = skip_non_spaces (dbp);
4853 dbp = skip_spaces (dbp);
4854 L_getit ();
4856 else
4858 /* Check for (foo::defmumble name-defined ... */
4860 dbp++;
4861 while (!notinname (*dbp) && *dbp != ':');
4862 if (*dbp == ':')
4865 dbp++;
4866 while (*dbp == ':');
4868 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4870 dbp = skip_non_spaces (dbp);
4871 dbp = skip_spaces (dbp);
4872 L_getit ();
4881 * Lua script language parsing
4882 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4884 * "function" and "local function" are tags if they start at column 1.
4886 static void
4887 Lua_functions (FILE *inf)
4889 register char *bp;
4891 LOOP_ON_INPUT_LINES (inf, lb, bp)
4893 if (bp[0] != 'f' && bp[0] != 'l')
4894 continue;
4896 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4898 if (LOOKING_AT (bp, "function"))
4899 get_tag (bp, NULL);
4905 * PostScript tags
4906 * Just look for lines where the first character is '/'
4907 * Also look at "defineps" for PSWrap
4908 * Ideas by:
4909 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4910 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4912 static void
4913 PS_functions (FILE *inf)
4915 register char *bp, *ep;
4917 LOOP_ON_INPUT_LINES (inf, lb, bp)
4919 if (bp[0] == '/')
4921 for (ep = bp+1;
4922 *ep != '\0' && *ep != ' ' && *ep != '{';
4923 ep++)
4924 continue;
4925 make_tag (bp, ep - bp, true,
4926 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4928 else if (LOOKING_AT (bp, "defineps"))
4929 get_tag (bp, NULL);
4935 * Forth tags
4936 * Ignore anything after \ followed by space or in ( )
4937 * Look for words defined by :
4938 * Look for constant, code, create, defer, value, and variable
4939 * OBP extensions: Look for buffer:, field,
4940 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4942 static void
4943 Forth_words (FILE *inf)
4945 register char *bp;
4947 LOOP_ON_INPUT_LINES (inf, lb, bp)
4948 while ((bp = skip_spaces (bp))[0] != '\0')
4949 if (bp[0] == '\\' && c_isspace (bp[1]))
4950 break; /* read next line */
4951 else if (bp[0] == '(' && c_isspace (bp[1]))
4952 do /* skip to ) or eol */
4953 bp++;
4954 while (*bp != ')' && *bp != '\0');
4955 else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++)
4956 || LOOKING_AT_NOCASE (bp, "constant")
4957 || LOOKING_AT_NOCASE (bp, "code")
4958 || LOOKING_AT_NOCASE (bp, "create")
4959 || LOOKING_AT_NOCASE (bp, "defer")
4960 || LOOKING_AT_NOCASE (bp, "value")
4961 || LOOKING_AT_NOCASE (bp, "variable")
4962 || LOOKING_AT_NOCASE (bp, "buffer:")
4963 || LOOKING_AT_NOCASE (bp, "field"))
4964 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4965 else
4966 bp = skip_non_spaces (bp);
4971 * Scheme tag functions
4972 * look for (def... xyzzy
4973 * (def... (xyzzy
4974 * (def ... ((...(xyzzy ....
4975 * (set! xyzzy
4976 * Original code by Ken Haase (1985?)
4978 static void
4979 Scheme_functions (FILE *inf)
4981 register char *bp;
4983 LOOP_ON_INPUT_LINES (inf, lb, bp)
4985 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4987 bp = skip_non_spaces (bp+4);
4988 /* Skip over open parens and white space. Don't continue past
4989 '\0'. */
4990 while (*bp && notinname (*bp))
4991 bp++;
4992 get_tag (bp, NULL);
4994 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4995 get_tag (bp, NULL);
5000 /* Find tags in TeX and LaTeX input files. */
5002 /* TEX_toktab is a table of TeX control sequences that define tags.
5003 * Each entry records one such control sequence.
5005 * Original code from who knows whom.
5006 * Ideas by:
5007 * Stefan Monnier (2002)
5010 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5012 /* Default set of control sequences to put into TEX_toktab.
5013 The value of environment var TEXTAGS is prepended to this. */
5014 static const char *TEX_defenv = "\
5015 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5016 :part:appendix:entry:index:def\
5017 :newcommand:renewcommand:newenvironment:renewenvironment";
5019 static void TEX_decode_env (const char *, const char *);
5022 * TeX/LaTeX scanning loop.
5024 static void
5025 TeX_commands (FILE *inf)
5027 char *cp;
5028 linebuffer *key;
5030 char TEX_esc = '\0';
5031 char TEX_opgrp, TEX_clgrp;
5033 /* Initialize token table once from environment. */
5034 if (TEX_toktab == NULL)
5035 TEX_decode_env ("TEXTAGS", TEX_defenv);
5037 LOOP_ON_INPUT_LINES (inf, lb, cp)
5039 /* Look at each TEX keyword in line. */
5040 for (;;)
5042 /* Look for a TEX escape. */
5043 while (true)
5045 char c = *cp++;
5046 if (c == '\0' || c == '%')
5047 goto tex_next_line;
5049 /* Select either \ or ! as escape character, whichever comes
5050 first outside a comment. */
5051 if (!TEX_esc)
5052 switch (c)
5054 case '\\':
5055 TEX_esc = c;
5056 TEX_opgrp = '{';
5057 TEX_clgrp = '}';
5058 break;
5060 case '!':
5061 TEX_esc = c;
5062 TEX_opgrp = '<';
5063 TEX_clgrp = '>';
5064 break;
5067 if (c == TEX_esc)
5068 break;
5071 for (key = TEX_toktab; key->buffer != NULL; key++)
5072 if (strneq (cp, key->buffer, key->len))
5074 char *p;
5075 int namelen, linelen;
5076 bool opgrp = false;
5078 cp = skip_spaces (cp + key->len);
5079 if (*cp == TEX_opgrp)
5081 opgrp = true;
5082 cp++;
5084 for (p = cp;
5085 (!c_isspace (*p) && *p != '#' &&
5086 *p != TEX_opgrp && *p != TEX_clgrp);
5087 p++)
5088 continue;
5089 namelen = p - cp;
5090 linelen = lb.len;
5091 if (!opgrp || *p == TEX_clgrp)
5093 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5094 p++;
5095 linelen = p - lb.buffer + 1;
5097 make_tag (cp, namelen, true,
5098 lb.buffer, linelen, lineno, linecharno);
5099 goto tex_next_line; /* We only tag a line once */
5102 tex_next_line:
5107 /* Read environment and prepend it to the default string.
5108 Build token table. */
5109 static void
5110 TEX_decode_env (const char *evarname, const char *defenv)
5112 register const char *env, *p;
5113 int i, len;
5115 /* Append default string to environment. */
5116 env = getenv (evarname);
5117 if (!env)
5118 env = defenv;
5119 else
5120 env = concat (env, defenv, "");
5122 /* Allocate a token table */
5123 for (len = 1, p = env; (p = strchr (p, ':')); )
5124 if (*++p)
5125 len++;
5126 TEX_toktab = xnew (len, linebuffer);
5128 /* Unpack environment string into token table. Be careful about */
5129 /* zero-length strings (leading ':', "::" and trailing ':') */
5130 for (i = 0; *env != '\0';)
5132 p = strchr (env, ':');
5133 if (!p) /* End of environment string. */
5134 p = env + strlen (env);
5135 if (p - env > 0)
5136 { /* Only non-zero strings. */
5137 TEX_toktab[i].buffer = savenstr (env, p - env);
5138 TEX_toktab[i].len = p - env;
5139 i++;
5141 if (*p)
5142 env = p + 1;
5143 else
5145 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5146 TEX_toktab[i].len = 0;
5147 break;
5153 /* Texinfo support. Dave Love, Mar. 2000. */
5154 static void
5155 Texinfo_nodes (FILE *inf)
5157 char *cp, *start;
5158 LOOP_ON_INPUT_LINES (inf, lb, cp)
5159 if (LOOKING_AT (cp, "@node"))
5161 start = cp;
5162 while (*cp != '\0' && *cp != ',')
5163 cp++;
5164 make_tag (start, cp - start, true,
5165 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5171 * HTML support.
5172 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5173 * Contents of <a name=xxx> are tags with name xxx.
5175 * Francesco Potortì, 2002.
5177 static void
5178 HTML_labels (FILE *inf)
5180 bool getnext = false; /* next text outside of HTML tags is a tag */
5181 bool skiptag = false; /* skip to the end of the current HTML tag */
5182 bool intag = false; /* inside an html tag, looking for ID= */
5183 bool inanchor = false; /* when INTAG, is an anchor, look for NAME= */
5184 char *end;
5187 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5189 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5190 for (;;) /* loop on the same line */
5192 if (skiptag) /* skip HTML tag */
5194 while (*dbp != '\0' && *dbp != '>')
5195 dbp++;
5196 if (*dbp == '>')
5198 dbp += 1;
5199 skiptag = false;
5200 continue; /* look on the same line */
5202 break; /* go to next line */
5205 else if (intag) /* look for "name=" or "id=" */
5207 while (*dbp != '\0' && *dbp != '>'
5208 && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5209 dbp++;
5210 if (*dbp == '\0')
5211 break; /* go to next line */
5212 if (*dbp == '>')
5214 dbp += 1;
5215 intag = false;
5216 continue; /* look on the same line */
5218 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5219 || LOOKING_AT_NOCASE (dbp, "id="))
5221 bool quoted = (dbp[0] == '"');
5223 if (quoted)
5224 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5225 continue;
5226 else
5227 for (end = dbp; *end != '\0' && intoken (*end); end++)
5228 continue;
5229 linebuffer_setlen (&token_name, end - dbp);
5230 memcpy (token_name.buffer, dbp, end - dbp);
5231 token_name.buffer[end - dbp] = '\0';
5233 dbp = end;
5234 intag = false; /* we found what we looked for */
5235 skiptag = true; /* skip to the end of the tag */
5236 getnext = true; /* then grab the text */
5237 continue; /* look on the same line */
5239 dbp += 1;
5242 else if (getnext) /* grab next tokens and tag them */
5244 dbp = skip_spaces (dbp);
5245 if (*dbp == '\0')
5246 break; /* go to next line */
5247 if (*dbp == '<')
5249 intag = true;
5250 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5251 continue; /* look on the same line */
5254 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5255 continue;
5256 make_tag (token_name.buffer, token_name.len, true,
5257 dbp, end - dbp, lineno, linecharno);
5258 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5259 getnext = false;
5260 break; /* go to next line */
5263 else /* look for an interesting HTML tag */
5265 while (*dbp != '\0' && *dbp != '<')
5266 dbp++;
5267 if (*dbp == '\0')
5268 break; /* go to next line */
5269 intag = true;
5270 if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5272 inanchor = true;
5273 continue; /* look on the same line */
5275 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5276 || LOOKING_AT_NOCASE (dbp, "<h1>")
5277 || LOOKING_AT_NOCASE (dbp, "<h2>")
5278 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5280 intag = false;
5281 getnext = true;
5282 continue; /* look on the same line */
5284 dbp += 1;
5291 * Prolog support
5293 * Assumes that the predicate or rule starts at column 0.
5294 * Only the first clause of a predicate or rule is added.
5295 * Original code by Sunichirou Sugou (1989)
5296 * Rewritten by Anders Lindgren (1996)
5298 static size_t prolog_pr (char *, char *);
5299 static void prolog_skip_comment (linebuffer *, FILE *);
5300 static size_t prolog_atom (char *, size_t);
5302 static void
5303 Prolog_functions (FILE *inf)
5305 char *cp, *last;
5306 size_t len;
5307 size_t allocated;
5309 allocated = 0;
5310 len = 0;
5311 last = NULL;
5313 LOOP_ON_INPUT_LINES (inf, lb, cp)
5315 if (cp[0] == '\0') /* Empty line */
5316 continue;
5317 else if (c_isspace (cp[0])) /* Not a predicate */
5318 continue;
5319 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5320 prolog_skip_comment (&lb, inf);
5321 else if ((len = prolog_pr (cp, last)) > 0)
5323 /* Predicate or rule. Store the function name so that we
5324 only generate a tag for the first clause. */
5325 if (last == NULL)
5326 last = xnew (len + 1, char);
5327 else if (len + 1 > allocated)
5328 xrnew (last, len + 1, char);
5329 allocated = len + 1;
5330 memcpy (last, cp, len);
5331 last[len] = '\0';
5334 free (last);
5338 static void
5339 prolog_skip_comment (linebuffer *plb, FILE *inf)
5341 char *cp;
5345 for (cp = plb->buffer; *cp != '\0'; cp++)
5346 if (cp[0] == '*' && cp[1] == '/')
5347 return;
5348 readline (plb, inf);
5350 while (perhaps_more_input (inf));
5354 * A predicate or rule definition is added if it matches:
5355 * <beginning of line><Prolog Atom><whitespace>(
5356 * or <beginning of line><Prolog Atom><whitespace>:-
5358 * It is added to the tags database if it doesn't match the
5359 * name of the previous clause header.
5361 * Return the size of the name of the predicate or rule, or 0 if no
5362 * header was found.
5364 static size_t
5365 prolog_pr (char *s, char *last)
5367 /* Name of last clause. */
5369 size_t pos;
5370 size_t len;
5372 pos = prolog_atom (s, 0);
5373 if (! pos)
5374 return 0;
5376 len = pos;
5377 pos = skip_spaces (s + pos) - s;
5379 if ((s[pos] == '.'
5380 || (s[pos] == '(' && (pos += 1))
5381 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5382 && (last == NULL /* save only the first clause */
5383 || len != strlen (last)
5384 || !strneq (s, last, len)))
5386 make_tag (s, len, true, s, pos, lineno, linecharno);
5387 return len;
5389 else
5390 return 0;
5394 * Consume a Prolog atom.
5395 * Return the number of bytes consumed, or 0 if there was an error.
5397 * A prolog atom, in this context, could be one of:
5398 * - An alphanumeric sequence, starting with a lower case letter.
5399 * - A quoted arbitrary string. Single quotes can escape themselves.
5400 * Backslash quotes everything.
5402 static size_t
5403 prolog_atom (char *s, size_t pos)
5405 size_t origpos;
5407 origpos = pos;
5409 if (c_islower (s[pos]) || s[pos] == '_')
5411 /* The atom is unquoted. */
5412 pos++;
5413 while (c_isalnum (s[pos]) || s[pos] == '_')
5415 pos++;
5417 return pos - origpos;
5419 else if (s[pos] == '\'')
5421 pos++;
5423 for (;;)
5425 if (s[pos] == '\'')
5427 pos++;
5428 if (s[pos] != '\'')
5429 break;
5430 pos++; /* A double quote */
5432 else if (s[pos] == '\0')
5433 /* Multiline quoted atoms are ignored. */
5434 return 0;
5435 else if (s[pos] == '\\')
5437 if (s[pos+1] == '\0')
5438 return 0;
5439 pos += 2;
5441 else
5442 pos++;
5444 return pos - origpos;
5446 else
5447 return 0;
5452 * Support for Erlang
5454 * Generates tags for functions, defines, and records.
5455 * Assumes that Erlang functions start at column 0.
5456 * Original code by Anders Lindgren (1996)
5458 static int erlang_func (char *, char *);
5459 static void erlang_attribute (char *);
5460 static int erlang_atom (char *);
5462 static void
5463 Erlang_functions (FILE *inf)
5465 char *cp, *last;
5466 int len;
5467 int allocated;
5469 allocated = 0;
5470 len = 0;
5471 last = NULL;
5473 LOOP_ON_INPUT_LINES (inf, lb, cp)
5475 if (cp[0] == '\0') /* Empty line */
5476 continue;
5477 else if (c_isspace (cp[0])) /* Not function nor attribute */
5478 continue;
5479 else if (cp[0] == '%') /* comment */
5480 continue;
5481 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5482 continue;
5483 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5485 erlang_attribute (cp);
5486 if (last != NULL)
5488 free (last);
5489 last = NULL;
5492 else if ((len = erlang_func (cp, last)) > 0)
5495 * Function. Store the function name so that we only
5496 * generates a tag for the first clause.
5498 if (last == NULL)
5499 last = xnew (len + 1, char);
5500 else if (len + 1 > allocated)
5501 xrnew (last, len + 1, char);
5502 allocated = len + 1;
5503 memcpy (last, cp, len);
5504 last[len] = '\0';
5507 free (last);
5512 * A function definition is added if it matches:
5513 * <beginning of line><Erlang Atom><whitespace>(
5515 * It is added to the tags database if it doesn't match the
5516 * name of the previous clause header.
5518 * Return the size of the name of the function, or 0 if no function
5519 * was found.
5521 static int
5522 erlang_func (char *s, char *last)
5524 /* Name of last clause. */
5526 int pos;
5527 int len;
5529 pos = erlang_atom (s);
5530 if (pos < 1)
5531 return 0;
5533 len = pos;
5534 pos = skip_spaces (s + pos) - s;
5536 /* Save only the first clause. */
5537 if (s[pos++] == '('
5538 && (last == NULL
5539 || len != (int)strlen (last)
5540 || !strneq (s, last, len)))
5542 make_tag (s, len, true, s, pos, lineno, linecharno);
5543 return len;
5546 return 0;
5551 * Handle attributes. Currently, tags are generated for defines
5552 * and records.
5554 * They are on the form:
5555 * -define(foo, bar).
5556 * -define(Foo(M, N), M+N).
5557 * -record(graph, {vtab = notable, cyclic = true}).
5559 static void
5560 erlang_attribute (char *s)
5562 char *cp = s;
5564 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5565 && *cp++ == '(')
5567 int len = erlang_atom (skip_spaces (cp));
5568 if (len > 0)
5569 make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5571 return;
5576 * Consume an Erlang atom (or variable).
5577 * Return the number of bytes consumed, or -1 if there was an error.
5579 static int
5580 erlang_atom (char *s)
5582 int pos = 0;
5584 if (c_isalpha (s[pos]) || s[pos] == '_')
5586 /* The atom is unquoted. */
5588 pos++;
5589 while (c_isalnum (s[pos]) || s[pos] == '_');
5591 else if (s[pos] == '\'')
5593 for (pos++; s[pos] != '\''; pos++)
5594 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5595 || (s[pos] == '\\' && s[++pos] == '\0'))
5596 return 0;
5597 pos++;
5600 return pos;
5604 static char *scan_separators (char *);
5605 static void add_regex (char *, language *);
5606 static char *substitute (char *, char *, struct re_registers *);
5609 * Take a string like "/blah/" and turn it into "blah", verifying
5610 * that the first and last characters are the same, and handling
5611 * quoted separator characters. Actually, stops on the occurrence of
5612 * an unquoted separator. Also process \t, \n, etc. and turn into
5613 * appropriate characters. Works in place. Null terminates name string.
5614 * Returns pointer to terminating separator, or NULL for
5615 * unterminated regexps.
5617 static char *
5618 scan_separators (char *name)
5620 char sep = name[0];
5621 char *copyto = name;
5622 bool quoted = false;
5624 for (++name; *name != '\0'; ++name)
5626 if (quoted)
5628 switch (*name)
5630 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5631 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5632 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5633 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5634 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5635 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5636 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5637 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5638 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5639 default:
5640 if (*name == sep)
5641 *copyto++ = sep;
5642 else
5644 /* Something else is quoted, so preserve the quote. */
5645 *copyto++ = '\\';
5646 *copyto++ = *name;
5648 break;
5650 quoted = false;
5652 else if (*name == '\\')
5653 quoted = true;
5654 else if (*name == sep)
5655 break;
5656 else
5657 *copyto++ = *name;
5659 if (*name != sep)
5660 name = NULL; /* signal unterminated regexp */
5662 /* Terminate copied string. */
5663 *copyto = '\0';
5664 return name;
5667 /* Look at the argument of --regex or --no-regex and do the right
5668 thing. Same for each line of a regexp file. */
5669 static void
5670 analyze_regex (char *regex_arg)
5672 if (regex_arg == NULL)
5674 free_regexps (); /* --no-regex: remove existing regexps */
5675 return;
5678 /* A real --regexp option or a line in a regexp file. */
5679 switch (regex_arg[0])
5681 /* Comments in regexp file or null arg to --regex. */
5682 case '\0':
5683 case ' ':
5684 case '\t':
5685 break;
5687 /* Read a regex file. This is recursive and may result in a
5688 loop, which will stop when the file descriptors are exhausted. */
5689 case '@':
5691 FILE *regexfp;
5692 linebuffer regexbuf;
5693 char *regexfile = regex_arg + 1;
5695 /* regexfile is a file containing regexps, one per line. */
5696 regexfp = fopen (regexfile, "r" FOPEN_BINARY);
5697 if (regexfp == NULL)
5698 pfatal (regexfile);
5699 linebuffer_init (&regexbuf);
5700 while (readline_internal (&regexbuf, regexfp, regexfile) > 0)
5701 analyze_regex (regexbuf.buffer);
5702 free (regexbuf.buffer);
5703 if (fclose (regexfp) != 0)
5704 pfatal (regexfile);
5706 break;
5708 /* Regexp to be used for a specific language only. */
5709 case '{':
5711 language *lang;
5712 char *lang_name = regex_arg + 1;
5713 char *cp;
5715 for (cp = lang_name; *cp != '}'; cp++)
5716 if (*cp == '\0')
5718 error ("unterminated language name in regex: %s", regex_arg);
5719 return;
5721 *cp++ = '\0';
5722 lang = get_language_from_langname (lang_name);
5723 if (lang == NULL)
5724 return;
5725 add_regex (cp, lang);
5727 break;
5729 /* Regexp to be used for any language. */
5730 default:
5731 add_regex (regex_arg, NULL);
5732 break;
5736 /* Separate the regexp pattern, compile it,
5737 and care for optional name and modifiers. */
5738 static void
5739 add_regex (char *regexp_pattern, language *lang)
5741 static struct re_pattern_buffer zeropattern;
5742 char sep, *pat, *name, *modifiers;
5743 char empty = '\0';
5744 const char *err;
5745 struct re_pattern_buffer *patbuf;
5746 regexp *rp;
5747 bool
5748 force_explicit_name = true, /* do not use implicit tag names */
5749 ignore_case = false, /* case is significant */
5750 multi_line = false, /* matches are done one line at a time */
5751 single_line = false; /* dot does not match newline */
5754 if (strlen (regexp_pattern) < 3)
5756 error ("null regexp");
5757 return;
5759 sep = regexp_pattern[0];
5760 name = scan_separators (regexp_pattern);
5761 if (name == NULL)
5763 error ("%s: unterminated regexp", regexp_pattern);
5764 return;
5766 if (name[1] == sep)
5768 error ("null name for regexp \"%s\"", regexp_pattern);
5769 return;
5771 modifiers = scan_separators (name);
5772 if (modifiers == NULL) /* no terminating separator --> no name */
5774 modifiers = name;
5775 name = &empty;
5777 else
5778 modifiers += 1; /* skip separator */
5780 /* Parse regex modifiers. */
5781 for (; modifiers[0] != '\0'; modifiers++)
5782 switch (modifiers[0])
5784 case 'N':
5785 if (modifiers == name)
5786 error ("forcing explicit tag name but no name, ignoring");
5787 force_explicit_name = true;
5788 break;
5789 case 'i':
5790 ignore_case = true;
5791 break;
5792 case 's':
5793 single_line = true;
5794 /* FALLTHRU */
5795 case 'm':
5796 multi_line = true;
5797 need_filebuf = true;
5798 break;
5799 default:
5800 error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5801 break;
5804 patbuf = xnew (1, struct re_pattern_buffer);
5805 *patbuf = zeropattern;
5806 if (ignore_case)
5808 static char lc_trans[UCHAR_MAX + 1];
5809 int i;
5810 for (i = 0; i < UCHAR_MAX + 1; i++)
5811 lc_trans[i] = c_tolower (i);
5812 patbuf->translate = lc_trans; /* translation table to fold case */
5815 if (multi_line)
5816 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5817 else
5818 pat = regexp_pattern;
5820 if (single_line)
5821 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5822 else
5823 re_set_syntax (RE_SYNTAX_EMACS);
5825 err = re_compile_pattern (pat, strlen (pat), patbuf);
5826 if (multi_line)
5827 free (pat);
5828 if (err != NULL)
5830 error ("%s while compiling pattern", err);
5831 return;
5834 rp = p_head;
5835 p_head = xnew (1, regexp);
5836 p_head->pattern = savestr (regexp_pattern);
5837 p_head->p_next = rp;
5838 p_head->lang = lang;
5839 p_head->pat = patbuf;
5840 p_head->name = savestr (name);
5841 p_head->error_signaled = false;
5842 p_head->force_explicit_name = force_explicit_name;
5843 p_head->ignore_case = ignore_case;
5844 p_head->multi_line = multi_line;
5848 * Do the substitutions indicated by the regular expression and
5849 * arguments.
5851 static char *
5852 substitute (char *in, char *out, struct re_registers *regs)
5854 char *result, *t;
5855 int size, dig, diglen;
5857 result = NULL;
5858 size = strlen (out);
5860 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5861 if (out[size - 1] == '\\')
5862 fatal ("pattern error in \"%s\"", out);
5863 for (t = strchr (out, '\\');
5864 t != NULL;
5865 t = strchr (t + 2, '\\'))
5866 if (c_isdigit (t[1]))
5868 dig = t[1] - '0';
5869 diglen = regs->end[dig] - regs->start[dig];
5870 size += diglen - 2;
5872 else
5873 size -= 1;
5875 /* Allocate space and do the substitutions. */
5876 assert (size >= 0);
5877 result = xnew (size + 1, char);
5879 for (t = result; *out != '\0'; out++)
5880 if (*out == '\\' && c_isdigit (*++out))
5882 dig = *out - '0';
5883 diglen = regs->end[dig] - regs->start[dig];
5884 memcpy (t, in + regs->start[dig], diglen);
5885 t += diglen;
5887 else
5888 *t++ = *out;
5889 *t = '\0';
5891 assert (t <= result + size);
5892 assert (t - result == (int)strlen (result));
5894 return result;
5897 /* Deallocate all regexps. */
5898 static void
5899 free_regexps (void)
5901 regexp *rp;
5902 while (p_head != NULL)
5904 rp = p_head->p_next;
5905 free (p_head->pattern);
5906 free (p_head->name);
5907 free (p_head);
5908 p_head = rp;
5910 return;
5914 * Reads the whole file as a single string from `filebuf' and looks for
5915 * multi-line regular expressions, creating tags on matches.
5916 * readline already dealt with normal regexps.
5918 * Idea by Ben Wing <ben@666.com> (2002).
5920 static void
5921 regex_tag_multiline (void)
5923 char *buffer = filebuf.buffer;
5924 regexp *rp;
5925 char *name;
5927 for (rp = p_head; rp != NULL; rp = rp->p_next)
5929 int match = 0;
5931 if (!rp->multi_line)
5932 continue; /* skip normal regexps */
5934 /* Generic initializations before parsing file from memory. */
5935 lineno = 1; /* reset global line number */
5936 charno = 0; /* reset global char number */
5937 linecharno = 0; /* reset global char number of line start */
5939 /* Only use generic regexps or those for the current language. */
5940 if (rp->lang != NULL && rp->lang != curfdp->lang)
5941 continue;
5943 while (match >= 0 && match < filebuf.len)
5945 match = re_search (rp->pat, buffer, filebuf.len, charno,
5946 filebuf.len - match, &rp->regs);
5947 switch (match)
5949 case -2:
5950 /* Some error. */
5951 if (!rp->error_signaled)
5953 error ("regexp stack overflow while matching \"%s\"",
5954 rp->pattern);
5955 rp->error_signaled = true;
5957 break;
5958 case -1:
5959 /* No match. */
5960 break;
5961 default:
5962 if (match == rp->regs.end[0])
5964 if (!rp->error_signaled)
5966 error ("regexp matches the empty string: \"%s\"",
5967 rp->pattern);
5968 rp->error_signaled = true;
5970 match = -3; /* exit from while loop */
5971 break;
5974 /* Match occurred. Construct a tag. */
5975 while (charno < rp->regs.end[0])
5976 if (buffer[charno++] == '\n')
5977 lineno++, linecharno = charno;
5978 name = rp->name;
5979 if (name[0] == '\0')
5980 name = NULL;
5981 else /* make a named tag */
5982 name = substitute (buffer, rp->name, &rp->regs);
5983 if (rp->force_explicit_name)
5984 /* Force explicit tag name, if a name is there. */
5985 pfnote (name, true, buffer + linecharno,
5986 charno - linecharno + 1, lineno, linecharno);
5987 else
5988 make_tag (name, strlen (name), true, buffer + linecharno,
5989 charno - linecharno + 1, lineno, linecharno);
5990 break;
5997 static bool
5998 nocase_tail (const char *cp)
6000 int len = 0;
6002 while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
6003 cp++, len++;
6004 if (*cp == '\0' && !intoken (dbp[len]))
6006 dbp += len;
6007 return true;
6009 return false;
6012 static void
6013 get_tag (register char *bp, char **namepp)
6015 register char *cp = bp;
6017 if (*bp != '\0')
6019 /* Go till you get to white space or a syntactic break */
6020 for (cp = bp + 1; !notinname (*cp); cp++)
6021 continue;
6022 make_tag (bp, cp - bp, true,
6023 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6026 if (namepp != NULL)
6027 *namepp = savenstr (bp, cp - bp);
6031 * Read a line of text from `stream' into `lbp', excluding the
6032 * newline or CR-NL, if any. Return the number of characters read from
6033 * `stream', which is the length of the line including the newline.
6035 * On DOS or Windows we do not count the CR character, if any before the
6036 * NL, in the returned length; this mirrors the behavior of Emacs on those
6037 * platforms (for text files, it translates CR-NL to NL as it reads in the
6038 * file).
6040 * If multi-line regular expressions are requested, each line read is
6041 * appended to `filebuf'.
6043 static long
6044 readline_internal (linebuffer *lbp, FILE *stream, char const *filename)
6046 char *buffer = lbp->buffer;
6047 char *p = lbp->buffer;
6048 char *pend;
6049 int chars_deleted;
6051 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6053 for (;;)
6055 register int c = getc (stream);
6056 if (p == pend)
6058 /* We're at the end of linebuffer: expand it. */
6059 lbp->size *= 2;
6060 xrnew (buffer, lbp->size, char);
6061 p += buffer - lbp->buffer;
6062 pend = buffer + lbp->size;
6063 lbp->buffer = buffer;
6065 if (c == EOF)
6067 if (ferror (stream))
6068 perror (filename);
6069 *p = '\0';
6070 chars_deleted = 0;
6071 break;
6073 if (c == '\n')
6075 if (p > buffer && p[-1] == '\r')
6077 p -= 1;
6078 chars_deleted = 2;
6080 else
6082 chars_deleted = 1;
6084 *p = '\0';
6085 break;
6087 *p++ = c;
6089 lbp->len = p - buffer;
6091 if (need_filebuf /* we need filebuf for multi-line regexps */
6092 && chars_deleted > 0) /* not at EOF */
6094 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6096 /* Expand filebuf. */
6097 filebuf.size *= 2;
6098 xrnew (filebuf.buffer, filebuf.size, char);
6100 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6101 filebuf.len += lbp->len;
6102 filebuf.buffer[filebuf.len++] = '\n';
6103 filebuf.buffer[filebuf.len] = '\0';
6106 return lbp->len + chars_deleted;
6110 * Like readline_internal, above, but in addition try to match the
6111 * input line against relevant regular expressions and manage #line
6112 * directives.
6114 static void
6115 readline (linebuffer *lbp, FILE *stream)
6117 long result;
6119 linecharno = charno; /* update global char number of line start */
6120 result = readline_internal (lbp, stream, infilename); /* read line */
6121 lineno += 1; /* increment global line number */
6122 charno += result; /* increment global char number */
6124 /* Honor #line directives. */
6125 if (!no_line_directive)
6127 static bool discard_until_line_directive;
6129 /* Check whether this is a #line directive. */
6130 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6132 unsigned int lno;
6133 int start = 0;
6135 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6136 && start > 0) /* double quote character found */
6138 char *endp = lbp->buffer + start;
6140 while ((endp = strchr (endp, '"')) != NULL
6141 && endp[-1] == '\\')
6142 endp++;
6143 if (endp != NULL)
6144 /* Ok, this is a real #line directive. Let's deal with it. */
6146 char *taggedabsname; /* absolute name of original file */
6147 char *taggedfname; /* name of original file as given */
6148 char *name; /* temp var */
6150 discard_until_line_directive = false; /* found it */
6151 name = lbp->buffer + start;
6152 *endp = '\0';
6153 canonicalize_filename (name);
6154 taggedabsname = absolute_filename (name, tagfiledir);
6155 if (filename_is_absolute (name)
6156 || filename_is_absolute (curfdp->infname))
6157 taggedfname = savestr (taggedabsname);
6158 else
6159 taggedfname = relative_filename (taggedabsname,tagfiledir);
6161 if (streq (curfdp->taggedfname, taggedfname))
6162 /* The #line directive is only a line number change. We
6163 deal with this afterwards. */
6164 free (taggedfname);
6165 else
6166 /* The tags following this #line directive should be
6167 attributed to taggedfname. In order to do this, set
6168 curfdp accordingly. */
6170 fdesc *fdp; /* file description pointer */
6172 /* Go look for a file description already set up for the
6173 file indicated in the #line directive. If there is
6174 one, use it from now until the next #line
6175 directive. */
6176 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6177 if (streq (fdp->infname, curfdp->infname)
6178 && streq (fdp->taggedfname, taggedfname))
6179 /* If we remove the second test above (after the &&)
6180 then all entries pertaining to the same file are
6181 coalesced in the tags file. If we use it, then
6182 entries pertaining to the same file but generated
6183 from different files (via #line directives) will
6184 go into separate sections in the tags file. These
6185 alternatives look equivalent. The first one
6186 destroys some apparently useless information. */
6188 curfdp = fdp;
6189 free (taggedfname);
6190 break;
6192 /* Else, if we already tagged the real file, skip all
6193 input lines until the next #line directive. */
6194 if (fdp == NULL) /* not found */
6195 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6196 if (streq (fdp->infabsname, taggedabsname))
6198 discard_until_line_directive = true;
6199 free (taggedfname);
6200 break;
6202 /* Else create a new file description and use that from
6203 now on, until the next #line directive. */
6204 if (fdp == NULL) /* not found */
6206 fdp = fdhead;
6207 fdhead = xnew (1, fdesc);
6208 *fdhead = *curfdp; /* copy curr. file description */
6209 fdhead->next = fdp;
6210 fdhead->infname = savestr (curfdp->infname);
6211 fdhead->infabsname = savestr (curfdp->infabsname);
6212 fdhead->infabsdir = savestr (curfdp->infabsdir);
6213 fdhead->taggedfname = taggedfname;
6214 fdhead->usecharno = false;
6215 fdhead->prop = NULL;
6216 fdhead->written = false;
6217 curfdp = fdhead;
6220 free (taggedabsname);
6221 lineno = lno - 1;
6222 readline (lbp, stream);
6223 return;
6224 } /* if a real #line directive */
6225 } /* if #line is followed by a number */
6226 } /* if line begins with "#line " */
6228 /* If we are here, no #line directive was found. */
6229 if (discard_until_line_directive)
6231 if (result > 0)
6233 /* Do a tail recursion on ourselves, thus discarding the contents
6234 of the line buffer. */
6235 readline (lbp, stream);
6236 return;
6238 /* End of file. */
6239 discard_until_line_directive = false;
6240 return;
6242 } /* if #line directives should be considered */
6245 int match;
6246 regexp *rp;
6247 char *name;
6249 /* Match against relevant regexps. */
6250 if (lbp->len > 0)
6251 for (rp = p_head; rp != NULL; rp = rp->p_next)
6253 /* Only use generic regexps or those for the current language.
6254 Also do not use multiline regexps, which is the job of
6255 regex_tag_multiline. */
6256 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6257 || rp->multi_line)
6258 continue;
6260 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6261 switch (match)
6263 case -2:
6264 /* Some error. */
6265 if (!rp->error_signaled)
6267 error ("regexp stack overflow while matching \"%s\"",
6268 rp->pattern);
6269 rp->error_signaled = true;
6271 break;
6272 case -1:
6273 /* No match. */
6274 break;
6275 case 0:
6276 /* Empty string matched. */
6277 if (!rp->error_signaled)
6279 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6280 rp->error_signaled = true;
6282 break;
6283 default:
6284 /* Match occurred. Construct a tag. */
6285 name = rp->name;
6286 if (name[0] == '\0')
6287 name = NULL;
6288 else /* make a named tag */
6289 name = substitute (lbp->buffer, rp->name, &rp->regs);
6290 if (rp->force_explicit_name)
6291 /* Force explicit tag name, if a name is there. */
6292 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6293 else
6294 make_tag (name, strlen (name), true,
6295 lbp->buffer, match, lineno, linecharno);
6296 break;
6304 * Return a pointer to a space of size strlen(cp)+1 allocated
6305 * with xnew where the string CP has been copied.
6307 static char *
6308 savestr (const char *cp)
6310 return savenstr (cp, strlen (cp));
6314 * Return a pointer to a space of size LEN+1 allocated with xnew where
6315 * the string CP has been copied for at most the first LEN characters.
6317 static char *
6318 savenstr (const char *cp, int len)
6320 char *dp = xnew (len + 1, char);
6321 dp[len] = '\0';
6322 return memcpy (dp, cp, len);
6325 /* Skip spaces (end of string is not space), return new pointer. */
6326 static char *
6327 skip_spaces (char *cp)
6329 while (c_isspace (*cp))
6330 cp++;
6331 return cp;
6334 /* Skip non spaces, except end of string, return new pointer. */
6335 static char *
6336 skip_non_spaces (char *cp)
6338 while (*cp != '\0' && !c_isspace (*cp))
6339 cp++;
6340 return cp;
6343 /* Skip any chars in the "name" class.*/
6344 static char *
6345 skip_name (char *cp)
6347 /* '\0' is a notinname() so loop stops there too */
6348 while (! notinname (*cp))
6349 cp++;
6350 return cp;
6353 /* Print error message and exit. */
6354 void
6355 fatal (const char *s1, const char *s2)
6357 error (s1, s2);
6358 exit (EXIT_FAILURE);
6361 static void
6362 pfatal (const char *s1)
6364 perror (s1);
6365 exit (EXIT_FAILURE);
6368 static void
6369 suggest_asking_for_help (void)
6371 fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
6372 progname);
6373 exit (EXIT_FAILURE);
6376 /* Output a diagnostic with printf-style FORMAT and args. */
6377 static void
6378 error (const char *format, ...)
6380 va_list ap;
6381 va_start (ap, format);
6382 fprintf (stderr, "%s: ", progname);
6383 vfprintf (stderr, format, ap);
6384 fprintf (stderr, "\n");
6385 va_end (ap);
6388 /* Return a newly-allocated string whose contents
6389 concatenate those of s1, s2, s3. */
6390 static char *
6391 concat (const char *s1, const char *s2, const char *s3)
6393 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6394 char *result = xnew (len1 + len2 + len3 + 1, char);
6396 strcpy (result, s1);
6397 strcpy (result + len1, s2);
6398 strcpy (result + len1 + len2, s3);
6400 return result;
6404 /* Does the same work as the system V getcwd, but does not need to
6405 guess the buffer size in advance. */
6406 static char *
6407 etags_getcwd (void)
6409 int bufsize = 200;
6410 char *path = xnew (bufsize, char);
6412 while (getcwd (path, bufsize) == NULL)
6414 if (errno != ERANGE)
6415 pfatal ("getcwd");
6416 bufsize *= 2;
6417 free (path);
6418 path = xnew (bufsize, char);
6421 canonicalize_filename (path);
6422 return path;
6425 /* Return a newly allocated string containing a name of a temporary file. */
6426 static char *
6427 etags_mktmp (void)
6429 const char *tmpdir = getenv ("TMPDIR");
6430 const char *slash = "/";
6432 #if MSDOS || defined (DOS_NT)
6433 if (!tmpdir)
6434 tmpdir = getenv ("TEMP");
6435 if (!tmpdir)
6436 tmpdir = getenv ("TMP");
6437 if (!tmpdir)
6438 tmpdir = ".";
6439 if (tmpdir[strlen (tmpdir) - 1] == '/'
6440 || tmpdir[strlen (tmpdir) - 1] == '\\')
6441 slash = "";
6442 #else
6443 if (!tmpdir)
6444 tmpdir = "/tmp";
6445 if (tmpdir[strlen (tmpdir) - 1] == '/')
6446 slash = "";
6447 #endif
6449 char *templt = concat (tmpdir, slash, "etXXXXXX");
6450 int fd = mkostemp (templt, O_CLOEXEC);
6451 if (fd < 0 || close (fd) != 0)
6453 int temp_errno = errno;
6454 free (templt);
6455 errno = temp_errno;
6456 templt = NULL;
6459 #if defined (DOS_NT)
6460 /* The file name will be used in shell redirection, so it needs to have
6461 DOS-style backslashes, or else the Windows shell will barf. */
6462 char *p;
6463 for (p = templt; *p; p++)
6464 if (*p == '/')
6465 *p = '\\';
6466 #endif
6468 return templt;
6471 /* Return a newly allocated string containing the file name of FILE
6472 relative to the absolute directory DIR (which should end with a slash). */
6473 static char *
6474 relative_filename (char *file, char *dir)
6476 char *fp, *dp, *afn, *res;
6477 int i;
6479 /* Find the common root of file and dir (with a trailing slash). */
6480 afn = absolute_filename (file, cwd);
6481 fp = afn;
6482 dp = dir;
6483 while (*fp++ == *dp++)
6484 continue;
6485 fp--, dp--; /* back to the first differing char */
6486 #ifdef DOS_NT
6487 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6488 return afn;
6489 #endif
6490 do /* look at the equal chars until '/' */
6491 fp--, dp--;
6492 while (*fp != '/');
6494 /* Build a sequence of "../" strings for the resulting relative file name. */
6495 i = 0;
6496 while ((dp = strchr (dp + 1, '/')) != NULL)
6497 i += 1;
6498 res = xnew (3*i + strlen (fp + 1) + 1, char);
6499 char *z = res;
6500 while (i-- > 0)
6501 z = stpcpy (z, "../");
6503 /* Add the file name relative to the common root of file and dir. */
6504 strcpy (z, fp + 1);
6505 free (afn);
6507 return res;
6510 /* Return a newly allocated string containing the absolute file name
6511 of FILE given DIR (which should end with a slash). */
6512 static char *
6513 absolute_filename (char *file, char *dir)
6515 char *slashp, *cp, *res;
6517 if (filename_is_absolute (file))
6518 res = savestr (file);
6519 #ifdef DOS_NT
6520 /* We don't support non-absolute file names with a drive
6521 letter, like `d:NAME' (it's too much hassle). */
6522 else if (file[1] == ':')
6523 fatal ("%s: relative file names with drive letters not supported", file);
6524 #endif
6525 else
6526 res = concat (dir, file, "");
6528 /* Delete the "/dirname/.." and "/." substrings. */
6529 slashp = strchr (res, '/');
6530 while (slashp != NULL && slashp[0] != '\0')
6532 if (slashp[1] == '.')
6534 if (slashp[2] == '.'
6535 && (slashp[3] == '/' || slashp[3] == '\0'))
6537 cp = slashp;
6539 cp--;
6540 while (cp >= res && !filename_is_absolute (cp));
6541 if (cp < res)
6542 cp = slashp; /* the absolute name begins with "/.." */
6543 #ifdef DOS_NT
6544 /* Under MSDOS and NT we get `d:/NAME' as absolute
6545 file name, so the luser could say `d:/../NAME'.
6546 We silently treat this as `d:/NAME'. */
6547 else if (cp[0] != '/')
6548 cp = slashp;
6549 #endif
6550 memmove (cp, slashp + 3, strlen (slashp + 2));
6551 slashp = cp;
6552 continue;
6554 else if (slashp[2] == '/' || slashp[2] == '\0')
6556 memmove (slashp, slashp + 2, strlen (slashp + 1));
6557 continue;
6561 slashp = strchr (slashp + 1, '/');
6564 if (res[0] == '\0') /* just a safety net: should never happen */
6566 free (res);
6567 return savestr ("/");
6569 else
6570 return res;
6573 /* Return a newly allocated string containing the absolute
6574 file name of dir where FILE resides given DIR (which should
6575 end with a slash). */
6576 static char *
6577 absolute_dirname (char *file, char *dir)
6579 char *slashp, *res;
6580 char save;
6582 slashp = strrchr (file, '/');
6583 if (slashp == NULL)
6584 return savestr (dir);
6585 save = slashp[1];
6586 slashp[1] = '\0';
6587 res = absolute_filename (file, dir);
6588 slashp[1] = save;
6590 return res;
6593 /* Whether the argument string is an absolute file name. The argument
6594 string must have been canonicalized with canonicalize_filename. */
6595 static bool
6596 filename_is_absolute (char *fn)
6598 return (fn[0] == '/'
6599 #ifdef DOS_NT
6600 || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
6601 #endif
6605 /* Downcase DOS drive letter and collapse separators into single slashes.
6606 Works in place. */
6607 static void
6608 canonicalize_filename (register char *fn)
6610 register char* cp;
6612 #ifdef DOS_NT
6613 /* Canonicalize drive letter case. */
6614 if (c_isupper (fn[0]) && fn[1] == ':')
6615 fn[0] = c_tolower (fn[0]);
6617 /* Collapse multiple forward- and back-slashes into a single forward
6618 slash. */
6619 for (cp = fn; *cp != '\0'; cp++, fn++)
6620 if (*cp == '/' || *cp == '\\')
6622 *fn = '/';
6623 while (cp[1] == '/' || cp[1] == '\\')
6624 cp++;
6626 else
6627 *fn = *cp;
6629 #else /* !DOS_NT */
6631 /* Collapse multiple slashes into a single slash. */
6632 for (cp = fn; *cp != '\0'; cp++, fn++)
6633 if (*cp == '/')
6635 *fn = '/';
6636 while (cp[1] == '/')
6637 cp++;
6639 else
6640 *fn = *cp;
6642 #endif /* !DOS_NT */
6644 *fn = '\0';
6648 /* Initialize a linebuffer for use. */
6649 static void
6650 linebuffer_init (linebuffer *lbp)
6652 lbp->size = (DEBUG) ? 3 : 200;
6653 lbp->buffer = xnew (lbp->size, char);
6654 lbp->buffer[0] = '\0';
6655 lbp->len = 0;
6658 /* Set the minimum size of a string contained in a linebuffer. */
6659 static void
6660 linebuffer_setlen (linebuffer *lbp, int toksize)
6662 while (lbp->size <= toksize)
6664 lbp->size *= 2;
6665 xrnew (lbp->buffer, lbp->size, char);
6667 lbp->len = toksize;
6670 /* Like malloc but get fatal error if memory is exhausted. */
6671 static void *
6672 xmalloc (size_t size)
6674 void *result = malloc (size);
6675 if (result == NULL)
6676 fatal ("virtual memory exhausted", (char *)NULL);
6677 return result;
6680 static void *
6681 xrealloc (void *ptr, size_t size)
6683 void *result = realloc (ptr, size);
6684 if (result == NULL)
6685 fatal ("virtual memory exhausted", (char *)NULL);
6686 return result;
6690 * Local Variables:
6691 * indent-tabs-mode: t
6692 * tab-width: 8
6693 * fill-column: 79
6694 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6695 * c-file-style: "gnu"
6696 * End:
6699 /* etags.c ends here */