Cleanup etags.c to use locale-independent code
[emacs.git] / lib-src / etags.c
blobfacb462f67b8c906244a1627422939434f1e69f1
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2015 Free Software
32 Foundation, Inc.
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
72 starting in 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
83 #ifdef DEBUG
84 # undef DEBUG
85 # define DEBUG true
86 #else
87 # define DEBUG false
88 # define NDEBUG /* disable assert */
89 #endif
91 #include <config.h>
93 #ifndef _GNU_SOURCE
94 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
95 #endif
97 /* WIN32_NATIVE is for XEmacs.
98 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
99 #ifdef WIN32_NATIVE
100 # undef MSDOS
101 # undef WINDOWSNT
102 # define WINDOWSNT
103 #endif /* WIN32_NATIVE */
105 #ifdef MSDOS
106 # undef MSDOS
107 # define MSDOS true
108 # include <sys/param.h>
109 #else
110 # define MSDOS false
111 #endif /* MSDOS */
113 #ifdef WINDOWSNT
114 # include <direct.h>
115 # define MAXPATHLEN _MAX_PATH
116 # undef HAVE_NTGUI
117 # undef DOS_NT
118 # define DOS_NT
119 # define O_CLOEXEC O_NOINHERIT
120 #endif /* WINDOWSNT */
122 #include <limits.h>
123 #include <unistd.h>
124 #include <stdarg.h>
125 #include <stdlib.h>
126 #include <string.h>
127 #include <sysstdio.h>
128 #include <errno.h>
129 #include <fcntl.h>
130 #include <sys/types.h>
131 #include <sys/stat.h>
132 #include <binary-io.h>
133 #include <c-ctype.h>
134 #include <c-strcase.h>
136 #include <assert.h>
137 #ifdef NDEBUG
138 # undef assert /* some systems have a buggy assert.h */
139 # define assert(x) ((void) 0)
140 #endif
142 #include <getopt.h>
143 #include <regex.h>
145 /* Define CTAGS to make the program "ctags" compatible with the usual one.
146 Leave it undefined to make the program "etags", which makes emacs-style
147 tag tables and tags typedefs, #defines and struct/union/enum by default. */
148 #ifdef CTAGS
149 # undef CTAGS
150 # define CTAGS true
151 #else
152 # define CTAGS false
153 #endif
155 #define streq(s,t) (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
156 #define strcaseeq(s,t) (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
157 #define strneq(s,t,n) (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
158 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
160 /* C is not in a name. */
161 static bool
162 notinname (unsigned char c)
164 /* Look at make_tag before modifying! */
165 static bool const table[UCHAR_MAX + 1] = {
166 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
167 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
169 return table[c];
172 /* C can start a token. */
173 static bool
174 begtoken (unsigned char c)
176 static bool const table[UCHAR_MAX + 1] = {
177 ['$']=1, ['@']=1,
178 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
179 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
180 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
181 ['Y']=1, ['Z']=1,
182 ['_']=1,
183 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
184 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
185 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
186 ['y']=1, ['z']=1,
187 ['~']=1
189 return table[c];
192 /* C can be in the middle of a token. */
193 static bool
194 intoken (unsigned char c)
196 static bool const table[UCHAR_MAX + 1] = {
197 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
198 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
199 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
200 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
201 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
202 ['Y']=1, ['Z']=1,
203 ['_']=1,
204 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
205 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
206 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
207 ['y']=1, ['z']=1
209 return table[c];
212 /* C can end a token. */
213 static bool
214 endtoken (unsigned char c)
216 static bool const table[UCHAR_MAX + 1] = {
217 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
218 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
219 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
220 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
221 ['{']=1, ['|']=1, ['}']=1, ['~']=1
223 return table[c];
227 * xnew, xrnew -- allocate, reallocate storage
229 * SYNOPSIS: Type *xnew (int n, Type);
230 * void xrnew (OldPointer, int n, Type);
232 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
233 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
235 typedef void Lang_function (FILE *);
237 typedef struct
239 const char *suffix; /* file name suffix for this compressor */
240 const char *command; /* takes one arg and decompresses to stdout */
241 } compressor;
243 typedef struct
245 const char *name; /* language name */
246 const char *help; /* detailed help for the language */
247 Lang_function *function; /* parse function */
248 const char **suffixes; /* name suffixes of this language's files */
249 const char **filenames; /* names of this language's files */
250 const char **interpreters; /* interpreters for this language */
251 bool metasource; /* source used to generate other sources */
252 } language;
254 typedef struct fdesc
256 struct fdesc *next; /* for the linked list */
257 char *infname; /* uncompressed input file name */
258 char *infabsname; /* absolute uncompressed input file name */
259 char *infabsdir; /* absolute dir of input file */
260 char *taggedfname; /* file name to write in tagfile */
261 language *lang; /* language of file */
262 char *prop; /* file properties to write in tagfile */
263 bool usecharno; /* etags tags shall contain char number */
264 bool written; /* entry written in the tags file */
265 } fdesc;
267 typedef struct node_st
268 { /* sorting structure */
269 struct node_st *left, *right; /* left and right sons */
270 fdesc *fdp; /* description of file to whom tag belongs */
271 char *name; /* tag name */
272 char *regex; /* search regexp */
273 bool valid; /* write this tag on the tag file */
274 bool is_func; /* function tag: use regexp in CTAGS mode */
275 bool been_warned; /* warning already given for duplicated tag */
276 int lno; /* line number tag is on */
277 long cno; /* character number line starts on */
278 } node;
281 * A `linebuffer' is a structure which holds a line of text.
282 * `readline_internal' reads a line from a stream into a linebuffer
283 * and works regardless of the length of the line.
284 * SIZE is the size of BUFFER, LEN is the length of the string in
285 * BUFFER after readline reads it.
287 typedef struct
289 long size;
290 int len;
291 char *buffer;
292 } linebuffer;
294 /* Used to support mixing of --lang and file names. */
295 typedef struct
297 enum {
298 at_language, /* a language specification */
299 at_regexp, /* a regular expression */
300 at_filename, /* a file name */
301 at_stdin, /* read from stdin here */
302 at_end /* stop parsing the list */
303 } arg_type; /* argument type */
304 language *lang; /* language associated with the argument */
305 char *what; /* the argument itself */
306 } argument;
308 /* Structure defining a regular expression. */
309 typedef struct regexp
311 struct regexp *p_next; /* pointer to next in list */
312 language *lang; /* if set, use only for this language */
313 char *pattern; /* the regexp pattern */
314 char *name; /* tag name */
315 struct re_pattern_buffer *pat; /* the compiled pattern */
316 struct re_registers regs; /* re registers */
317 bool error_signaled; /* already signaled for this regexp */
318 bool force_explicit_name; /* do not allow implicit tag name */
319 bool ignore_case; /* ignore case when matching */
320 bool multi_line; /* do a multi-line match on the whole file */
321 } regexp;
324 /* Many compilers barf on this:
325 Lang_function Ada_funcs;
326 so let's write it this way */
327 static void Ada_funcs (FILE *);
328 static void Asm_labels (FILE *);
329 static void C_entries (int c_ext, FILE *);
330 static void default_C_entries (FILE *);
331 static void plain_C_entries (FILE *);
332 static void Cjava_entries (FILE *);
333 static void Cobol_paragraphs (FILE *);
334 static void Cplusplus_entries (FILE *);
335 static void Cstar_entries (FILE *);
336 static void Erlang_functions (FILE *);
337 static void Forth_words (FILE *);
338 static void Fortran_functions (FILE *);
339 static void HTML_labels (FILE *);
340 static void Lisp_functions (FILE *);
341 static void Lua_functions (FILE *);
342 static void Makefile_targets (FILE *);
343 static void Pascal_functions (FILE *);
344 static void Perl_functions (FILE *);
345 static void PHP_functions (FILE *);
346 static void PS_functions (FILE *);
347 static void Prolog_functions (FILE *);
348 static void Python_functions (FILE *);
349 static void Scheme_functions (FILE *);
350 static void TeX_commands (FILE *);
351 static void Texinfo_nodes (FILE *);
352 static void Yacc_entries (FILE *);
353 static void just_read_file (FILE *);
355 static language *get_language_from_langname (const char *);
356 static void readline (linebuffer *, FILE *);
357 static long readline_internal (linebuffer *, FILE *);
358 static bool nocase_tail (const char *);
359 static void get_tag (char *, char **);
361 static void analyze_regex (char *);
362 static void free_regexps (void);
363 static void regex_tag_multiline (void);
364 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
365 static _Noreturn void suggest_asking_for_help (void);
366 _Noreturn void fatal (const char *, const char *);
367 static _Noreturn void pfatal (const char *);
368 static void add_node (node *, node **);
370 static void process_file_name (char *, language *);
371 static void process_file (FILE *, char *, language *);
372 static void find_entries (FILE *);
373 static void free_tree (node *);
374 static void free_fdesc (fdesc *);
375 static void pfnote (char *, bool, char *, int, int, long);
376 static void invalidate_nodes (fdesc *, node **);
377 static void put_entries (node *);
379 static char *concat (const char *, const char *, const char *);
380 static char *skip_spaces (char *);
381 static char *skip_non_spaces (char *);
382 static char *skip_name (char *);
383 static char *savenstr (const char *, int);
384 static char *savestr (const char *);
385 static char *etags_getcwd (void);
386 static char *relative_filename (char *, char *);
387 static char *absolute_filename (char *, char *);
388 static char *absolute_dirname (char *, char *);
389 static bool filename_is_absolute (char *f);
390 static void canonicalize_filename (char *);
391 static char *etags_mktmp (void);
392 static void linebuffer_init (linebuffer *);
393 static void linebuffer_setlen (linebuffer *, int);
394 static void *xmalloc (size_t);
395 static void *xrealloc (void *, size_t);
398 static char searchar = '/'; /* use /.../ searches */
400 static char *tagfile; /* output file */
401 static char *progname; /* name this program was invoked with */
402 static char *cwd; /* current working directory */
403 static char *tagfiledir; /* directory of tagfile */
404 static FILE *tagf; /* ioptr for tags file */
405 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
407 static fdesc *fdhead; /* head of file description list */
408 static fdesc *curfdp; /* current file description */
409 static int lineno; /* line number of current line */
410 static long charno; /* current character number */
411 static long linecharno; /* charno of start of current line */
412 static char *dbp; /* pointer to start of current tag */
414 static const int invalidcharno = -1;
416 static node *nodehead; /* the head of the binary tree of tags */
417 static node *last_node; /* the last node created */
419 static linebuffer lb; /* the current line */
420 static linebuffer filebuf; /* a buffer containing the whole file */
421 static linebuffer token_name; /* a buffer containing a tag name */
423 static bool append_to_tagfile; /* -a: append to tags */
424 /* The next five default to true in C and derived languages. */
425 static bool typedefs; /* -t: create tags for C and Ada typedefs */
426 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
427 /* 0 struct/enum/union decls, and C++ */
428 /* member functions. */
429 static bool constantypedefs; /* -d: create tags for C #define, enum */
430 /* constants and variables. */
431 /* -D: opposite of -d. Default under ctags. */
432 static int globals; /* create tags for global variables */
433 static int members; /* create tags for C member variables */
434 static int declarations; /* --declarations: tag them and extern in C&Co*/
435 static int no_line_directive; /* ignore #line directives (undocumented) */
436 static int no_duplicates; /* no duplicate tags for ctags (undocumented) */
437 static bool update; /* -u: update tags */
438 static bool vgrind_style; /* -v: create vgrind style index output */
439 static bool no_warnings; /* -w: suppress warnings (undocumented) */
440 static bool cxref_style; /* -x: create cxref style output */
441 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
442 static bool ignoreindent; /* -I: ignore indentation in C */
443 static int packages_only; /* --packages-only: in Ada, only tag packages*/
445 /* STDIN is defined in LynxOS system headers */
446 #ifdef STDIN
447 # undef STDIN
448 #endif
450 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
451 static bool parsing_stdin; /* --parse-stdin used */
453 static regexp *p_head; /* list of all regexps */
454 static bool need_filebuf; /* some regexes are multi-line */
456 static struct option longopts[] =
458 { "append", no_argument, NULL, 'a' },
459 { "packages-only", no_argument, &packages_only, 1 },
460 { "c++", no_argument, NULL, 'C' },
461 { "declarations", no_argument, &declarations, 1 },
462 { "no-line-directive", no_argument, &no_line_directive, 1 },
463 { "no-duplicates", no_argument, &no_duplicates, 1 },
464 { "help", no_argument, NULL, 'h' },
465 { "help", no_argument, NULL, 'H' },
466 { "ignore-indentation", no_argument, NULL, 'I' },
467 { "language", required_argument, NULL, 'l' },
468 { "members", no_argument, &members, 1 },
469 { "no-members", no_argument, &members, 0 },
470 { "output", required_argument, NULL, 'o' },
471 { "regex", required_argument, NULL, 'r' },
472 { "no-regex", no_argument, NULL, 'R' },
473 { "ignore-case-regex", required_argument, NULL, 'c' },
474 { "parse-stdin", required_argument, NULL, STDIN },
475 { "version", no_argument, NULL, 'V' },
477 #if CTAGS /* Ctags options */
478 { "backward-search", no_argument, NULL, 'B' },
479 { "cxref", no_argument, NULL, 'x' },
480 { "defines", no_argument, NULL, 'd' },
481 { "globals", no_argument, &globals, 1 },
482 { "typedefs", no_argument, NULL, 't' },
483 { "typedefs-and-c++", no_argument, NULL, 'T' },
484 { "update", no_argument, NULL, 'u' },
485 { "vgrind", no_argument, NULL, 'v' },
486 { "no-warn", no_argument, NULL, 'w' },
488 #else /* Etags options */
489 { "no-defines", no_argument, NULL, 'D' },
490 { "no-globals", no_argument, &globals, 0 },
491 { "include", required_argument, NULL, 'i' },
492 #endif
493 { NULL }
496 static compressor compressors[] =
498 { "z", "gzip -d -c"},
499 { "Z", "gzip -d -c"},
500 { "gz", "gzip -d -c"},
501 { "GZ", "gzip -d -c"},
502 { "bz2", "bzip2 -d -c" },
503 { "xz", "xz -d -c" },
504 { NULL }
508 * Language stuff.
511 /* Ada code */
512 static const char *Ada_suffixes [] =
513 { "ads", "adb", "ada", NULL };
514 static const char Ada_help [] =
515 "In Ada code, functions, procedures, packages, tasks and types are\n\
516 tags. Use the `--packages-only' option to create tags for\n\
517 packages only.\n\
518 Ada tag names have suffixes indicating the type of entity:\n\
519 Entity type: Qualifier:\n\
520 ------------ ----------\n\
521 function /f\n\
522 procedure /p\n\
523 package spec /s\n\
524 package body /b\n\
525 type /t\n\
526 task /k\n\
527 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
528 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
529 will just search for any tag `bidule'.";
531 /* Assembly code */
532 static const char *Asm_suffixes [] =
533 { "a", /* Unix assembler */
534 "asm", /* Microcontroller assembly */
535 "def", /* BSO/Tasking definition includes */
536 "inc", /* Microcontroller include files */
537 "ins", /* Microcontroller include files */
538 "s", "sa", /* Unix assembler */
539 "S", /* cpp-processed Unix assembler */
540 "src", /* BSO/Tasking C compiler output */
541 NULL
543 static const char Asm_help [] =
544 "In assembler code, labels appearing at the beginning of a line,\n\
545 followed by a colon, are tags.";
548 /* Note that .c and .h can be considered C++, if the --c++ flag was
549 given, or if the `class' or `template' keywords are met inside the file.
550 That is why default_C_entries is called for these. */
551 static const char *default_C_suffixes [] =
552 { "c", "h", NULL };
553 #if CTAGS /* C help for Ctags */
554 static const char default_C_help [] =
555 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
556 Use -T to tag definitions of `struct', `union' and `enum'.\n\
557 Use -d to tag `#define' macro definitions and `enum' constants.\n\
558 Use --globals to tag global variables.\n\
559 You can tag function declarations and external variables by\n\
560 using `--declarations', and struct members by using `--members'.";
561 #else /* C help for Etags */
562 static const char default_C_help [] =
563 "In C code, any C function or typedef is a tag, and so are\n\
564 definitions of `struct', `union' and `enum'. `#define' macro\n\
565 definitions and `enum' constants are tags unless you specify\n\
566 `--no-defines'. Global variables are tags unless you specify\n\
567 `--no-globals' and so are struct members unless you specify\n\
568 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
569 `--no-members' can make the tags table file much smaller.\n\
570 You can tag function declarations and external variables by\n\
571 using `--declarations'.";
572 #endif /* C help for Ctags and Etags */
574 static const char *Cplusplus_suffixes [] =
575 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
576 "M", /* Objective C++ */
577 "pdb", /* PostScript with C syntax */
578 NULL };
579 static const char Cplusplus_help [] =
580 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
581 --help --lang=c --lang=c++ for full help.)\n\
582 In addition to C tags, member functions are also recognized. Member\n\
583 variables are recognized unless you use the `--no-members' option.\n\
584 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
585 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
586 `operator+'.";
588 static const char *Cjava_suffixes [] =
589 { "java", NULL };
590 static char Cjava_help [] =
591 "In Java code, all the tags constructs of C and C++ code are\n\
592 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
595 static const char *Cobol_suffixes [] =
596 { "COB", "cob", NULL };
597 static char Cobol_help [] =
598 "In Cobol code, tags are paragraph names; that is, any word\n\
599 starting in column 8 and followed by a period.";
601 static const char *Cstar_suffixes [] =
602 { "cs", "hs", NULL };
604 static const char *Erlang_suffixes [] =
605 { "erl", "hrl", NULL };
606 static const char Erlang_help [] =
607 "In Erlang code, the tags are the functions, records and macros\n\
608 defined in the file.";
610 const char *Forth_suffixes [] =
611 { "fth", "tok", NULL };
612 static const char Forth_help [] =
613 "In Forth code, tags are words defined by `:',\n\
614 constant, code, create, defer, value, variable, buffer:, field.";
616 static const char *Fortran_suffixes [] =
617 { "F", "f", "f90", "for", NULL };
618 static const char Fortran_help [] =
619 "In Fortran code, functions, subroutines and block data are tags.";
621 static const char *HTML_suffixes [] =
622 { "htm", "html", "shtml", NULL };
623 static const char HTML_help [] =
624 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
625 `h3' headers. Also, tags are `name=' in anchors and all\n\
626 occurrences of `id='.";
628 static const char *Lisp_suffixes [] =
629 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
630 static const char Lisp_help [] =
631 "In Lisp code, any function defined with `defun', any variable\n\
632 defined with `defvar' or `defconst', and in general the first\n\
633 argument of any expression that starts with `(def' in column zero\n\
634 is a tag.\n\
635 The `--declarations' option tags \"(defvar foo)\" constructs too.";
637 static const char *Lua_suffixes [] =
638 { "lua", "LUA", NULL };
639 static const char Lua_help [] =
640 "In Lua scripts, all functions are tags.";
642 static const char *Makefile_filenames [] =
643 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
644 static const char Makefile_help [] =
645 "In makefiles, targets are tags; additionally, variables are tags\n\
646 unless you specify `--no-globals'.";
648 static const char *Objc_suffixes [] =
649 { "lm", /* Objective lex file */
650 "m", /* Objective C file */
651 NULL };
652 static const char Objc_help [] =
653 "In Objective C code, tags include Objective C definitions for classes,\n\
654 class categories, methods and protocols. Tags for variables and\n\
655 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
656 (Use --help --lang=c --lang=objc --lang=java for full help.)";
658 static const char *Pascal_suffixes [] =
659 { "p", "pas", NULL };
660 static const char Pascal_help [] =
661 "In Pascal code, the tags are the functions and procedures defined\n\
662 in the file.";
663 /* " // this is for working around an Emacs highlighting bug... */
665 static const char *Perl_suffixes [] =
666 { "pl", "pm", NULL };
667 static const char *Perl_interpreters [] =
668 { "perl", "@PERL@", NULL };
669 static const char Perl_help [] =
670 "In Perl code, the tags are the packages, subroutines and variables\n\
671 defined by the `package', `sub', `my' and `local' keywords. Use\n\
672 `--globals' if you want to tag global variables. Tags for\n\
673 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
674 defined in the default package is `main::SUB'.";
676 static const char *PHP_suffixes [] =
677 { "php", "php3", "php4", NULL };
678 static const char PHP_help [] =
679 "In PHP code, tags are functions, classes and defines. Unless you use\n\
680 the `--no-members' option, vars are tags too.";
682 static const char *plain_C_suffixes [] =
683 { "pc", /* Pro*C file */
684 NULL };
686 static const char *PS_suffixes [] =
687 { "ps", "psw", NULL }; /* .psw is for PSWrap */
688 static const char PS_help [] =
689 "In PostScript code, the tags are the functions.";
691 static const char *Prolog_suffixes [] =
692 { "prolog", NULL };
693 static const char Prolog_help [] =
694 "In Prolog code, tags are predicates and rules at the beginning of\n\
695 line.";
697 static const char *Python_suffixes [] =
698 { "py", NULL };
699 static const char Python_help [] =
700 "In Python code, `def' or `class' at the beginning of a line\n\
701 generate a tag.";
703 /* Can't do the `SCM' or `scm' prefix with a version number. */
704 static const char *Scheme_suffixes [] =
705 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
706 static const char Scheme_help [] =
707 "In Scheme code, tags include anything defined with `def' or with a\n\
708 construct whose name starts with `def'. They also include\n\
709 variables set with `set!' at top level in the file.";
711 static const char *TeX_suffixes [] =
712 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
713 static const char TeX_help [] =
714 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
715 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
716 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
717 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
718 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
720 Other commands can be specified by setting the environment variable\n\
721 `TEXTAGS' to a colon-separated list like, for example,\n\
722 TEXTAGS=\"mycommand:myothercommand\".";
725 static const char *Texinfo_suffixes [] =
726 { "texi", "texinfo", "txi", NULL };
727 static const char Texinfo_help [] =
728 "for texinfo files, lines starting with @node are tagged.";
730 static const char *Yacc_suffixes [] =
731 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
732 static const char Yacc_help [] =
733 "In Bison or Yacc input files, each rule defines as a tag the\n\
734 nonterminal it constructs. The portions of the file that contain\n\
735 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
736 for full help).";
738 static const char auto_help [] =
739 "`auto' is not a real language, it indicates to use\n\
740 a default language for files base on file name suffix and file contents.";
742 static const char none_help [] =
743 "`none' is not a real language, it indicates to only do\n\
744 regexp processing on files.";
746 static const char no_lang_help [] =
747 "No detailed help available for this language.";
751 * Table of languages.
753 * It is ok for a given function to be listed under more than one
754 * name. I just didn't.
757 static language lang_names [] =
759 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
760 { "asm", Asm_help, Asm_labels, Asm_suffixes },
761 { "c", default_C_help, default_C_entries, default_C_suffixes },
762 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
763 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
764 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
765 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
766 { "forth", Forth_help, Forth_words, Forth_suffixes },
767 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
768 { "html", HTML_help, HTML_labels, HTML_suffixes },
769 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
770 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
771 { "lua", Lua_help, Lua_functions, Lua_suffixes },
772 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
773 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
774 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
775 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
776 { "php", PHP_help, PHP_functions, PHP_suffixes },
777 { "postscript",PS_help, PS_functions, PS_suffixes },
778 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
779 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
780 { "python", Python_help, Python_functions, Python_suffixes },
781 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
782 { "tex", TeX_help, TeX_commands, TeX_suffixes },
783 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
784 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
785 { "auto", auto_help }, /* default guessing scheme */
786 { "none", none_help, just_read_file }, /* regexp matching only */
787 { NULL } /* end of list */
791 static void
792 print_language_names (void)
794 language *lang;
795 const char **name, **ext;
797 puts ("\nThese are the currently supported languages, along with the\n\
798 default file names and dot suffixes:");
799 for (lang = lang_names; lang->name != NULL; lang++)
801 printf (" %-*s", 10, lang->name);
802 if (lang->filenames != NULL)
803 for (name = lang->filenames; *name != NULL; name++)
804 printf (" %s", *name);
805 if (lang->suffixes != NULL)
806 for (ext = lang->suffixes; *ext != NULL; ext++)
807 printf (" .%s", *ext);
808 puts ("");
810 puts ("where `auto' means use default language for files based on file\n\
811 name suffix, and `none' means only do regexp processing on files.\n\
812 If no language is specified and no matching suffix is found,\n\
813 the first line of the file is read for a sharp-bang (#!) sequence\n\
814 followed by the name of an interpreter. If no such sequence is found,\n\
815 Fortran is tried first; if no tags are found, C is tried next.\n\
816 When parsing any C file, a \"class\" or \"template\" keyword\n\
817 switches to C++.");
818 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
820 For detailed help on a given language use, for example,\n\
821 etags --help --lang=ada.");
824 #ifndef EMACS_NAME
825 # define EMACS_NAME "standalone"
826 #endif
827 #ifndef VERSION
828 # define VERSION "17.38.1.4"
829 #endif
830 static _Noreturn void
831 print_version (void)
833 char emacs_copyright[] = COPYRIGHT;
835 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
836 puts (emacs_copyright);
837 puts ("This program is distributed under the terms in ETAGS.README");
839 exit (EXIT_SUCCESS);
842 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
843 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
844 #endif
846 static _Noreturn void
847 print_help (argument *argbuffer)
849 bool help_for_lang = false;
851 for (; argbuffer->arg_type != at_end; argbuffer++)
852 if (argbuffer->arg_type == at_language)
854 if (help_for_lang)
855 puts ("");
856 puts (argbuffer->lang->help);
857 help_for_lang = true;
860 if (help_for_lang)
861 exit (EXIT_SUCCESS);
863 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
865 These are the options accepted by %s.\n", progname, progname);
866 puts ("You may use unambiguous abbreviations for the long option names.");
867 puts (" A - as file name means read names from stdin (one per line).\n\
868 Absolute names are stored in the output file as they are.\n\
869 Relative ones are stored relative to the output file's directory.\n");
871 puts ("-a, --append\n\
872 Append tag entries to existing tags file.");
874 puts ("--packages-only\n\
875 For Ada files, only generate tags for packages.");
877 if (CTAGS)
878 puts ("-B, --backward-search\n\
879 Write the search commands for the tag entries using '?', the\n\
880 backward-search command instead of '/', the forward-search command.");
882 /* This option is mostly obsolete, because etags can now automatically
883 detect C++. Retained for backward compatibility and for debugging and
884 experimentation. In principle, we could want to tag as C++ even
885 before any "class" or "template" keyword.
886 puts ("-C, --c++\n\
887 Treat files whose name suffix defaults to C language as C++ files.");
890 puts ("--declarations\n\
891 In C and derived languages, create tags for function declarations,");
892 if (CTAGS)
893 puts ("\tand create tags for extern variables if --globals is used.");
894 else
895 puts
896 ("\tand create tags for extern variables unless --no-globals is used.");
898 if (CTAGS)
899 puts ("-d, --defines\n\
900 Create tag entries for C #define constants and enum constants, too.");
901 else
902 puts ("-D, --no-defines\n\
903 Don't create tag entries for C #define constants and enum constants.\n\
904 This makes the tags file smaller.");
906 if (!CTAGS)
907 puts ("-i FILE, --include=FILE\n\
908 Include a note in tag file indicating that, when searching for\n\
909 a tag, one should also consult the tags file FILE after\n\
910 checking the current file.");
912 puts ("-l LANG, --language=LANG\n\
913 Force the following files to be considered as written in the\n\
914 named language up to the next --language=LANG option.");
916 if (CTAGS)
917 puts ("--globals\n\
918 Create tag entries for global variables in some languages.");
919 else
920 puts ("--no-globals\n\
921 Do not create tag entries for global variables in some\n\
922 languages. This makes the tags file smaller.");
924 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
925 puts ("--no-line-directive\n\
926 Ignore #line preprocessor directives in C and derived languages.");
928 if (CTAGS)
929 puts ("--members\n\
930 Create tag entries for members of structures in some languages.");
931 else
932 puts ("--no-members\n\
933 Do not create tag entries for members of structures\n\
934 in some languages.");
936 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
937 Make a tag for each line matching a regular expression pattern\n\
938 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
939 files only. REGEXFILE is a file containing one REGEXP per line.\n\
940 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
941 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
942 puts (" If TAGNAME/ is present, the tags created are named.\n\
943 For example Tcl named tags can be created with:\n\
944 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
945 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
946 `m' means to allow multi-line matches, `s' implies `m' and\n\
947 causes dot to match any character, including newline.");
949 puts ("-R, --no-regex\n\
950 Don't create tags from regexps for the following files.");
952 puts ("-I, --ignore-indentation\n\
953 In C and C++ do not assume that a closing brace in the first\n\
954 column is the final brace of a function or structure definition.");
956 puts ("-o FILE, --output=FILE\n\
957 Write the tags to FILE.");
959 puts ("--parse-stdin=NAME\n\
960 Read from standard input and record tags as belonging to file NAME.");
962 if (CTAGS)
964 puts ("-t, --typedefs\n\
965 Generate tag entries for C and Ada typedefs.");
966 puts ("-T, --typedefs-and-c++\n\
967 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
968 and C++ member functions.");
971 if (CTAGS)
972 puts ("-u, --update\n\
973 Update the tag entries for the given files, leaving tag\n\
974 entries for other files in place. Currently, this is\n\
975 implemented by deleting the existing entries for the given\n\
976 files and then rewriting the new entries at the end of the\n\
977 tags file. It is often faster to simply rebuild the entire\n\
978 tag file than to use this.");
980 if (CTAGS)
982 puts ("-v, --vgrind\n\
983 Print on the standard output an index of items intended for\n\
984 human consumption, similar to the output of vgrind. The index\n\
985 is sorted, and gives the page number of each item.");
987 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
988 puts ("-w, --no-duplicates\n\
989 Do not create duplicate tag entries, for compatibility with\n\
990 traditional ctags.");
992 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
993 puts ("-w, --no-warn\n\
994 Suppress warning messages about duplicate tag entries.");
996 puts ("-x, --cxref\n\
997 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
998 The output uses line numbers instead of page numbers, but\n\
999 beyond that the differences are cosmetic; try both to see\n\
1000 which you like.");
1003 puts ("-V, --version\n\
1004 Print the version of the program.\n\
1005 -h, --help\n\
1006 Print this help message.\n\
1007 Followed by one or more `--language' options prints detailed\n\
1008 help about tag generation for the specified languages.");
1010 print_language_names ();
1012 puts ("");
1013 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1015 exit (EXIT_SUCCESS);
1020 main (int argc, char **argv)
1022 int i;
1023 unsigned int nincluded_files;
1024 char **included_files;
1025 argument *argbuffer;
1026 int current_arg, file_count;
1027 linebuffer filename_lb;
1028 bool help_asked = false;
1029 ptrdiff_t len;
1030 char *optstring;
1031 int opt;
1033 progname = argv[0];
1034 nincluded_files = 0;
1035 included_files = xnew (argc, char *);
1036 current_arg = 0;
1037 file_count = 0;
1039 /* Allocate enough no matter what happens. Overkill, but each one
1040 is small. */
1041 argbuffer = xnew (argc, argument);
1044 * Always find typedefs and structure tags.
1045 * Also default to find macro constants, enum constants, struct
1046 * members and global variables. Do it for both etags and ctags.
1048 typedefs = typedefs_or_cplusplus = constantypedefs = true;
1049 globals = members = true;
1051 /* When the optstring begins with a '-' getopt_long does not rearrange the
1052 non-options arguments to be at the end, but leaves them alone. */
1053 optstring = concat ("-ac:Cf:Il:o:r:RSVhH",
1054 (CTAGS) ? "BxdtTuvw" : "Di:",
1055 "");
1057 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1058 switch (opt)
1060 case 0:
1061 /* If getopt returns 0, then it has already processed a
1062 long-named option. We should do nothing. */
1063 break;
1065 case 1:
1066 /* This means that a file name has been seen. Record it. */
1067 argbuffer[current_arg].arg_type = at_filename;
1068 argbuffer[current_arg].what = optarg;
1069 len = strlen (optarg);
1070 if (whatlen_max < len)
1071 whatlen_max = len;
1072 ++current_arg;
1073 ++file_count;
1074 break;
1076 case STDIN:
1077 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1078 argbuffer[current_arg].arg_type = at_stdin;
1079 argbuffer[current_arg].what = optarg;
1080 len = strlen (optarg);
1081 if (whatlen_max < len)
1082 whatlen_max = len;
1083 ++current_arg;
1084 ++file_count;
1085 if (parsing_stdin)
1086 fatal ("cannot parse standard input more than once", (char *)NULL);
1087 parsing_stdin = true;
1088 break;
1090 /* Common options. */
1091 case 'a': append_to_tagfile = true; break;
1092 case 'C': cplusplus = true; break;
1093 case 'f': /* for compatibility with old makefiles */
1094 case 'o':
1095 if (tagfile)
1097 error ("-o option may only be given once.");
1098 suggest_asking_for_help ();
1099 /* NOTREACHED */
1101 tagfile = optarg;
1102 break;
1103 case 'I':
1104 case 'S': /* for backward compatibility */
1105 ignoreindent = true;
1106 break;
1107 case 'l':
1109 language *lang = get_language_from_langname (optarg);
1110 if (lang != NULL)
1112 argbuffer[current_arg].lang = lang;
1113 argbuffer[current_arg].arg_type = at_language;
1114 ++current_arg;
1117 break;
1118 case 'c':
1119 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1120 optarg = concat (optarg, "i", ""); /* memory leak here */
1121 /* FALLTHRU */
1122 case 'r':
1123 argbuffer[current_arg].arg_type = at_regexp;
1124 argbuffer[current_arg].what = optarg;
1125 len = strlen (optarg);
1126 if (whatlen_max < len)
1127 whatlen_max = len;
1128 ++current_arg;
1129 break;
1130 case 'R':
1131 argbuffer[current_arg].arg_type = at_regexp;
1132 argbuffer[current_arg].what = NULL;
1133 ++current_arg;
1134 break;
1135 case 'V':
1136 print_version ();
1137 break;
1138 case 'h':
1139 case 'H':
1140 help_asked = true;
1141 break;
1143 /* Etags options */
1144 case 'D': constantypedefs = false; break;
1145 case 'i': included_files[nincluded_files++] = optarg; break;
1147 /* Ctags options. */
1148 case 'B': searchar = '?'; break;
1149 case 'd': constantypedefs = true; break;
1150 case 't': typedefs = true; break;
1151 case 'T': typedefs = typedefs_or_cplusplus = true; break;
1152 case 'u': update = true; break;
1153 case 'v': vgrind_style = true; /*FALLTHRU*/
1154 case 'x': cxref_style = true; break;
1155 case 'w': no_warnings = true; break;
1156 default:
1157 suggest_asking_for_help ();
1158 /* NOTREACHED */
1161 /* No more options. Store the rest of arguments. */
1162 for (; optind < argc; optind++)
1164 argbuffer[current_arg].arg_type = at_filename;
1165 argbuffer[current_arg].what = argv[optind];
1166 len = strlen (argv[optind]);
1167 if (whatlen_max < len)
1168 whatlen_max = len;
1169 ++current_arg;
1170 ++file_count;
1173 argbuffer[current_arg].arg_type = at_end;
1175 if (help_asked)
1176 print_help (argbuffer);
1177 /* NOTREACHED */
1179 if (nincluded_files == 0 && file_count == 0)
1181 error ("no input files specified.");
1182 suggest_asking_for_help ();
1183 /* NOTREACHED */
1186 if (tagfile == NULL)
1187 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1188 cwd = etags_getcwd (); /* the current working directory */
1189 if (cwd[strlen (cwd) - 1] != '/')
1191 char *oldcwd = cwd;
1192 cwd = concat (oldcwd, "/", "");
1193 free (oldcwd);
1196 /* Compute base directory for relative file names. */
1197 if (streq (tagfile, "-")
1198 || strneq (tagfile, "/dev/", 5))
1199 tagfiledir = cwd; /* relative file names are relative to cwd */
1200 else
1202 canonicalize_filename (tagfile);
1203 tagfiledir = absolute_dirname (tagfile, cwd);
1206 linebuffer_init (&lb);
1207 linebuffer_init (&filename_lb);
1208 linebuffer_init (&filebuf);
1209 linebuffer_init (&token_name);
1211 if (!CTAGS)
1213 if (streq (tagfile, "-"))
1215 tagf = stdout;
1216 SET_BINARY (fileno (stdout));
1218 else
1219 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1220 if (tagf == NULL)
1221 pfatal (tagfile);
1225 * Loop through files finding functions.
1227 for (i = 0; i < current_arg; i++)
1229 static language *lang; /* non-NULL if language is forced */
1230 char *this_file;
1232 switch (argbuffer[i].arg_type)
1234 case at_language:
1235 lang = argbuffer[i].lang;
1236 break;
1237 case at_regexp:
1238 analyze_regex (argbuffer[i].what);
1239 break;
1240 case at_filename:
1241 this_file = argbuffer[i].what;
1242 /* Input file named "-" means read file names from stdin
1243 (one per line) and use them. */
1244 if (streq (this_file, "-"))
1246 if (parsing_stdin)
1247 fatal ("cannot parse standard input AND read file names from it",
1248 (char *)NULL);
1249 while (readline_internal (&filename_lb, stdin) > 0)
1250 process_file_name (filename_lb.buffer, lang);
1252 else
1253 process_file_name (this_file, lang);
1254 break;
1255 case at_stdin:
1256 this_file = argbuffer[i].what;
1257 process_file (stdin, this_file, lang);
1258 break;
1262 free_regexps ();
1263 free (lb.buffer);
1264 free (filebuf.buffer);
1265 free (token_name.buffer);
1267 if (!CTAGS || cxref_style)
1269 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1270 put_entries (nodehead);
1271 free_tree (nodehead);
1272 nodehead = NULL;
1273 if (!CTAGS)
1275 fdesc *fdp;
1277 /* Output file entries that have no tags. */
1278 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1279 if (!fdp->written)
1280 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1282 while (nincluded_files-- > 0)
1283 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1285 if (fclose (tagf) == EOF)
1286 pfatal (tagfile);
1289 exit (EXIT_SUCCESS);
1292 /* From here on, we are in (CTAGS && !cxref_style) */
1293 if (update)
1295 char *cmd =
1296 xmalloc (strlen (tagfile) + whatlen_max +
1297 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1298 for (i = 0; i < current_arg; ++i)
1300 switch (argbuffer[i].arg_type)
1302 case at_filename:
1303 case at_stdin:
1304 break;
1305 default:
1306 continue; /* the for loop */
1308 char *z = stpcpy (cmd, "mv ");
1309 z = stpcpy (z, tagfile);
1310 z = stpcpy (z, " OTAGS;fgrep -v '\t");
1311 z = stpcpy (z, argbuffer[i].what);
1312 z = stpcpy (z, "\t' OTAGS >");
1313 z = stpcpy (z, tagfile);
1314 strcpy (z, ";rm OTAGS");
1315 if (system (cmd) != EXIT_SUCCESS)
1316 fatal ("failed to execute shell command", (char *)NULL);
1318 free (cmd);
1319 append_to_tagfile = true;
1322 tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1323 if (tagf == NULL)
1324 pfatal (tagfile);
1325 put_entries (nodehead); /* write all the tags (CTAGS) */
1326 free_tree (nodehead);
1327 nodehead = NULL;
1328 if (fclose (tagf) == EOF)
1329 pfatal (tagfile);
1331 if (CTAGS)
1332 if (append_to_tagfile || update)
1334 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1335 /* Maybe these should be used:
1336 setenv ("LC_COLLATE", "C", 1);
1337 setenv ("LC_ALL", "C", 1); */
1338 char *z = stpcpy (cmd, "sort -u -o ");
1339 z = stpcpy (z, tagfile);
1340 *z++ = ' ';
1341 strcpy (z, tagfile);
1342 exit (system (cmd));
1344 return EXIT_SUCCESS;
1349 * Return a compressor given the file name. If EXTPTR is non-zero,
1350 * return a pointer into FILE where the compressor-specific
1351 * extension begins. If no compressor is found, NULL is returned
1352 * and EXTPTR is not significant.
1353 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1355 static compressor *
1356 get_compressor_from_suffix (char *file, char **extptr)
1358 compressor *compr;
1359 char *slash, *suffix;
1361 /* File has been processed by canonicalize_filename,
1362 so we don't need to consider backslashes on DOS_NT. */
1363 slash = strrchr (file, '/');
1364 suffix = strrchr (file, '.');
1365 if (suffix == NULL || suffix < slash)
1366 return NULL;
1367 if (extptr != NULL)
1368 *extptr = suffix;
1369 suffix += 1;
1370 /* Let those poor souls who live with DOS 8+3 file name limits get
1371 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1372 Only the first do loop is run if not MSDOS */
1375 for (compr = compressors; compr->suffix != NULL; compr++)
1376 if (streq (compr->suffix, suffix))
1377 return compr;
1378 if (!MSDOS)
1379 break; /* do it only once: not really a loop */
1380 if (extptr != NULL)
1381 *extptr = ++suffix;
1382 } while (*suffix != '\0');
1383 return NULL;
1389 * Return a language given the name.
1391 static language *
1392 get_language_from_langname (const char *name)
1394 language *lang;
1396 if (name == NULL)
1397 error ("empty language name");
1398 else
1400 for (lang = lang_names; lang->name != NULL; lang++)
1401 if (streq (name, lang->name))
1402 return lang;
1403 error ("unknown language \"%s\"", name);
1406 return NULL;
1411 * Return a language given the interpreter name.
1413 static language *
1414 get_language_from_interpreter (char *interpreter)
1416 language *lang;
1417 const char **iname;
1419 if (interpreter == NULL)
1420 return NULL;
1421 for (lang = lang_names; lang->name != NULL; lang++)
1422 if (lang->interpreters != NULL)
1423 for (iname = lang->interpreters; *iname != NULL; iname++)
1424 if (streq (*iname, interpreter))
1425 return lang;
1427 return NULL;
1433 * Return a language given the file name.
1435 static language *
1436 get_language_from_filename (char *file, int case_sensitive)
1438 language *lang;
1439 const char **name, **ext, *suffix;
1441 /* Try whole file name first. */
1442 for (lang = lang_names; lang->name != NULL; lang++)
1443 if (lang->filenames != NULL)
1444 for (name = lang->filenames; *name != NULL; name++)
1445 if ((case_sensitive)
1446 ? streq (*name, file)
1447 : strcaseeq (*name, file))
1448 return lang;
1450 /* If not found, try suffix after last dot. */
1451 suffix = strrchr (file, '.');
1452 if (suffix == NULL)
1453 return NULL;
1454 suffix += 1;
1455 for (lang = lang_names; lang->name != NULL; lang++)
1456 if (lang->suffixes != NULL)
1457 for (ext = lang->suffixes; *ext != NULL; ext++)
1458 if ((case_sensitive)
1459 ? streq (*ext, suffix)
1460 : strcaseeq (*ext, suffix))
1461 return lang;
1462 return NULL;
1467 * This routine is called on each file argument.
1469 static void
1470 process_file_name (char *file, language *lang)
1472 struct stat stat_buf;
1473 FILE *inf;
1474 fdesc *fdp;
1475 compressor *compr;
1476 char *compressed_name, *uncompressed_name;
1477 char *ext, *real_name, *tmp_name;
1478 int retval;
1480 canonicalize_filename (file);
1481 if (streq (file, tagfile) && !streq (tagfile, "-"))
1483 error ("skipping inclusion of %s in self.", file);
1484 return;
1486 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1488 compressed_name = NULL;
1489 real_name = uncompressed_name = savestr (file);
1491 else
1493 real_name = compressed_name = savestr (file);
1494 uncompressed_name = savenstr (file, ext - file);
1497 /* If the canonicalized uncompressed name
1498 has already been dealt with, skip it silently. */
1499 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1501 assert (fdp->infname != NULL);
1502 if (streq (uncompressed_name, fdp->infname))
1503 goto cleanup;
1506 if (stat (real_name, &stat_buf) != 0)
1508 /* Reset real_name and try with a different name. */
1509 real_name = NULL;
1510 if (compressed_name != NULL) /* try with the given suffix */
1512 if (stat (uncompressed_name, &stat_buf) == 0)
1513 real_name = uncompressed_name;
1515 else /* try all possible suffixes */
1517 for (compr = compressors; compr->suffix != NULL; compr++)
1519 compressed_name = concat (file, ".", compr->suffix);
1520 if (stat (compressed_name, &stat_buf) != 0)
1522 if (MSDOS)
1524 char *suf = compressed_name + strlen (file);
1525 size_t suflen = strlen (compr->suffix) + 1;
1526 for ( ; suf[1]; suf++, suflen--)
1528 memmove (suf, suf + 1, suflen);
1529 if (stat (compressed_name, &stat_buf) == 0)
1531 real_name = compressed_name;
1532 break;
1535 if (real_name != NULL)
1536 break;
1537 } /* MSDOS */
1538 free (compressed_name);
1539 compressed_name = NULL;
1541 else
1543 real_name = compressed_name;
1544 break;
1548 if (real_name == NULL)
1550 perror (file);
1551 goto cleanup;
1553 } /* try with a different name */
1555 if (!S_ISREG (stat_buf.st_mode))
1557 error ("skipping %s: it is not a regular file.", real_name);
1558 goto cleanup;
1560 if (real_name == compressed_name)
1562 tmp_name = etags_mktmp ();
1563 if (!tmp_name)
1564 inf = NULL;
1565 else
1567 char *cmd1 = concat (compr->command, " ", real_name);
1568 char *cmd = concat (cmd1, " > ", tmp_name);
1569 free (cmd1);
1570 if (system (cmd) == -1)
1571 inf = NULL;
1572 else
1573 inf = fopen (tmp_name, "r" FOPEN_BINARY);
1574 free (cmd);
1577 else
1578 inf = fopen (real_name, "r" FOPEN_BINARY);
1579 if (inf == NULL)
1581 perror (real_name);
1582 goto cleanup;
1585 process_file (inf, uncompressed_name, lang);
1587 retval = fclose (inf);
1588 if (real_name == compressed_name)
1590 remove (tmp_name);
1591 free (tmp_name);
1593 if (retval < 0)
1594 pfatal (file);
1596 cleanup:
1597 free (compressed_name);
1598 free (uncompressed_name);
1599 last_node = NULL;
1600 curfdp = NULL;
1601 return;
1604 static void
1605 process_file (FILE *fh, char *fn, language *lang)
1607 static const fdesc emptyfdesc;
1608 fdesc *fdp;
1610 /* Create a new input file description entry. */
1611 fdp = xnew (1, fdesc);
1612 *fdp = emptyfdesc;
1613 fdp->next = fdhead;
1614 fdp->infname = savestr (fn);
1615 fdp->lang = lang;
1616 fdp->infabsname = absolute_filename (fn, cwd);
1617 fdp->infabsdir = absolute_dirname (fn, cwd);
1618 if (filename_is_absolute (fn))
1620 /* An absolute file name. Canonicalize it. */
1621 fdp->taggedfname = absolute_filename (fn, NULL);
1623 else
1625 /* A file name relative to cwd. Make it relative
1626 to the directory of the tags file. */
1627 fdp->taggedfname = relative_filename (fn, tagfiledir);
1629 fdp->usecharno = true; /* use char position when making tags */
1630 fdp->prop = NULL;
1631 fdp->written = false; /* not written on tags file yet */
1633 fdhead = fdp;
1634 curfdp = fdhead; /* the current file description */
1636 find_entries (fh);
1638 /* If not Ctags, and if this is not metasource and if it contained no #line
1639 directives, we can write the tags and free all nodes pointing to
1640 curfdp. */
1641 if (!CTAGS
1642 && curfdp->usecharno /* no #line directives in this file */
1643 && !curfdp->lang->metasource)
1645 node *np, *prev;
1647 /* Look for the head of the sublist relative to this file. See add_node
1648 for the structure of the node tree. */
1649 prev = NULL;
1650 for (np = nodehead; np != NULL; prev = np, np = np->left)
1651 if (np->fdp == curfdp)
1652 break;
1654 /* If we generated tags for this file, write and delete them. */
1655 if (np != NULL)
1657 /* This is the head of the last sublist, if any. The following
1658 instructions depend on this being true. */
1659 assert (np->left == NULL);
1661 assert (fdhead == curfdp);
1662 assert (last_node->fdp == curfdp);
1663 put_entries (np); /* write tags for file curfdp->taggedfname */
1664 free_tree (np); /* remove the written nodes */
1665 if (prev == NULL)
1666 nodehead = NULL; /* no nodes left */
1667 else
1668 prev->left = NULL; /* delete the pointer to the sublist */
1674 * This routine opens the specified file and calls the function
1675 * which finds the function and type definitions.
1677 static void
1678 find_entries (FILE *inf)
1680 char *cp;
1681 language *lang = curfdp->lang;
1682 Lang_function *parser = NULL;
1684 /* If user specified a language, use it. */
1685 if (lang != NULL && lang->function != NULL)
1687 parser = lang->function;
1690 /* Else try to guess the language given the file name. */
1691 if (parser == NULL)
1693 lang = get_language_from_filename (curfdp->infname, true);
1694 if (lang != NULL && lang->function != NULL)
1696 curfdp->lang = lang;
1697 parser = lang->function;
1701 /* Else look for sharp-bang as the first two characters. */
1702 if (parser == NULL
1703 && readline_internal (&lb, inf) > 0
1704 && lb.len >= 2
1705 && lb.buffer[0] == '#'
1706 && lb.buffer[1] == '!')
1708 char *lp;
1710 /* Set lp to point at the first char after the last slash in the
1711 line or, if no slashes, at the first nonblank. Then set cp to
1712 the first successive blank and terminate the string. */
1713 lp = strrchr (lb.buffer+2, '/');
1714 if (lp != NULL)
1715 lp += 1;
1716 else
1717 lp = skip_spaces (lb.buffer + 2);
1718 cp = skip_non_spaces (lp);
1719 *cp = '\0';
1721 if (strlen (lp) > 0)
1723 lang = get_language_from_interpreter (lp);
1724 if (lang != NULL && lang->function != NULL)
1726 curfdp->lang = lang;
1727 parser = lang->function;
1732 rewind (inf);
1734 /* Else try to guess the language given the case insensitive file name. */
1735 if (parser == NULL)
1737 lang = get_language_from_filename (curfdp->infname, false);
1738 if (lang != NULL && lang->function != NULL)
1740 curfdp->lang = lang;
1741 parser = lang->function;
1745 /* Else try Fortran or C. */
1746 if (parser == NULL)
1748 node *old_last_node = last_node;
1750 curfdp->lang = get_language_from_langname ("fortran");
1751 find_entries (inf);
1753 if (old_last_node == last_node)
1754 /* No Fortran entries found. Try C. */
1756 rewind (inf);
1757 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1758 find_entries (inf);
1760 return;
1763 if (!no_line_directive
1764 && curfdp->lang != NULL && curfdp->lang->metasource)
1765 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1766 file, or anyway we parsed a file that is automatically generated from
1767 this one. If this is the case, the bingo.c file contained #line
1768 directives that generated tags pointing to this file. Let's delete
1769 them all before parsing this file, which is the real source. */
1771 fdesc **fdpp = &fdhead;
1772 while (*fdpp != NULL)
1773 if (*fdpp != curfdp
1774 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1775 /* We found one of those! We must delete both the file description
1776 and all tags referring to it. */
1778 fdesc *badfdp = *fdpp;
1780 /* Delete the tags referring to badfdp->taggedfname
1781 that were obtained from badfdp->infname. */
1782 invalidate_nodes (badfdp, &nodehead);
1784 *fdpp = badfdp->next; /* remove the bad description from the list */
1785 free_fdesc (badfdp);
1787 else
1788 fdpp = &(*fdpp)->next; /* advance the list pointer */
1791 assert (parser != NULL);
1793 /* Generic initializations before reading from file. */
1794 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1796 /* Generic initializations before parsing file with readline. */
1797 lineno = 0; /* reset global line number */
1798 charno = 0; /* reset global char number */
1799 linecharno = 0; /* reset global char number of line start */
1801 parser (inf);
1803 regex_tag_multiline ();
1808 * Check whether an implicitly named tag should be created,
1809 * then call `pfnote'.
1810 * NAME is a string that is internally copied by this function.
1812 * TAGS format specification
1813 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1814 * The following is explained in some more detail in etc/ETAGS.EBNF.
1816 * make_tag creates tags with "implicit tag names" (unnamed tags)
1817 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1818 * 1. NAME does not contain any of the characters in NONAM;
1819 * 2. LINESTART contains name as either a rightmost, or rightmost but
1820 * one character, substring;
1821 * 3. the character, if any, immediately before NAME in LINESTART must
1822 * be a character in NONAM;
1823 * 4. the character, if any, immediately after NAME in LINESTART must
1824 * also be a character in NONAM.
1826 * The implementation uses the notinname() macro, which recognizes the
1827 * characters stored in the string `nonam'.
1828 * etags.el needs to use the same characters that are in NONAM.
1830 static void
1831 make_tag (const char *name, /* tag name, or NULL if unnamed */
1832 int namelen, /* tag length */
1833 bool is_func, /* tag is a function */
1834 char *linestart, /* start of the line where tag is */
1835 int linelen, /* length of the line where tag is */
1836 int lno, /* line number */
1837 long int cno) /* character number */
1839 bool named = (name != NULL && namelen > 0);
1840 char *nname = NULL;
1842 if (!CTAGS && named) /* maybe set named to false */
1843 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1844 such that etags.el can guess a name from it. */
1846 int i;
1847 register const char *cp = name;
1849 for (i = 0; i < namelen; i++)
1850 if (notinname (*cp++))
1851 break;
1852 if (i == namelen) /* rule #1 */
1854 cp = linestart + linelen - namelen;
1855 if (notinname (linestart[linelen-1]))
1856 cp -= 1; /* rule #4 */
1857 if (cp >= linestart /* rule #2 */
1858 && (cp == linestart
1859 || notinname (cp[-1])) /* rule #3 */
1860 && strneq (name, cp, namelen)) /* rule #2 */
1861 named = false; /* use implicit tag name */
1865 if (named)
1866 nname = savenstr (name, namelen);
1868 pfnote (nname, is_func, linestart, linelen, lno, cno);
1871 /* Record a tag. */
1872 static void
1873 pfnote (char *name, bool is_func, char *linestart, int linelen, int lno,
1874 long int cno)
1875 /* tag name, or NULL if unnamed */
1876 /* tag is a function */
1877 /* start of the line where tag is */
1878 /* length of the line where tag is */
1879 /* line number */
1880 /* character number */
1882 register node *np;
1884 assert (name == NULL || name[0] != '\0');
1885 if (CTAGS && name == NULL)
1886 return;
1888 np = xnew (1, node);
1890 /* If ctags mode, change name "main" to M<thisfilename>. */
1891 if (CTAGS && !cxref_style && streq (name, "main"))
1893 char *fp = strrchr (curfdp->taggedfname, '/');
1894 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1895 fp = strrchr (np->name, '.');
1896 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1897 fp[0] = '\0';
1899 else
1900 np->name = name;
1901 np->valid = true;
1902 np->been_warned = false;
1903 np->fdp = curfdp;
1904 np->is_func = is_func;
1905 np->lno = lno;
1906 if (np->fdp->usecharno)
1907 /* Our char numbers are 0-base, because of C language tradition?
1908 ctags compatibility? old versions compatibility? I don't know.
1909 Anyway, since emacs's are 1-base we expect etags.el to take care
1910 of the difference. If we wanted to have 1-based numbers, we would
1911 uncomment the +1 below. */
1912 np->cno = cno /* + 1 */ ;
1913 else
1914 np->cno = invalidcharno;
1915 np->left = np->right = NULL;
1916 if (CTAGS && !cxref_style)
1918 if (strlen (linestart) < 50)
1919 np->regex = concat (linestart, "$", "");
1920 else
1921 np->regex = savenstr (linestart, 50);
1923 else
1924 np->regex = savenstr (linestart, linelen);
1926 add_node (np, &nodehead);
1930 * free_tree ()
1931 * recurse on left children, iterate on right children.
1933 static void
1934 free_tree (register node *np)
1936 while (np)
1938 register node *node_right = np->right;
1939 free_tree (np->left);
1940 free (np->name);
1941 free (np->regex);
1942 free (np);
1943 np = node_right;
1948 * free_fdesc ()
1949 * delete a file description
1951 static void
1952 free_fdesc (register fdesc *fdp)
1954 free (fdp->infname);
1955 free (fdp->infabsname);
1956 free (fdp->infabsdir);
1957 free (fdp->taggedfname);
1958 free (fdp->prop);
1959 free (fdp);
1963 * add_node ()
1964 * Adds a node to the tree of nodes. In etags mode, sort by file
1965 * name. In ctags mode, sort by tag name. Make no attempt at
1966 * balancing.
1968 * add_node is the only function allowed to add nodes, so it can
1969 * maintain state.
1971 static void
1972 add_node (node *np, node **cur_node_p)
1974 register int dif;
1975 register node *cur_node = *cur_node_p;
1977 if (cur_node == NULL)
1979 *cur_node_p = np;
1980 last_node = np;
1981 return;
1984 if (!CTAGS)
1985 /* Etags Mode */
1987 /* For each file name, tags are in a linked sublist on the right
1988 pointer. The first tags of different files are a linked list
1989 on the left pointer. last_node points to the end of the last
1990 used sublist. */
1991 if (last_node != NULL && last_node->fdp == np->fdp)
1993 /* Let's use the same sublist as the last added node. */
1994 assert (last_node->right == NULL);
1995 last_node->right = np;
1996 last_node = np;
1998 else if (cur_node->fdp == np->fdp)
2000 /* Scanning the list we found the head of a sublist which is
2001 good for us. Let's scan this sublist. */
2002 add_node (np, &cur_node->right);
2004 else
2005 /* The head of this sublist is not good for us. Let's try the
2006 next one. */
2007 add_node (np, &cur_node->left);
2008 } /* if ETAGS mode */
2010 else
2012 /* Ctags Mode */
2013 dif = strcmp (np->name, cur_node->name);
2016 * If this tag name matches an existing one, then
2017 * do not add the node, but maybe print a warning.
2019 if (no_duplicates && !dif)
2021 if (np->fdp == cur_node->fdp)
2023 if (!no_warnings)
2025 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2026 np->fdp->infname, lineno, np->name);
2027 fprintf (stderr, "Second entry ignored\n");
2030 else if (!cur_node->been_warned && !no_warnings)
2032 fprintf
2033 (stderr,
2034 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2035 np->fdp->infname, cur_node->fdp->infname, np->name);
2036 cur_node->been_warned = true;
2038 return;
2041 /* Actually add the node */
2042 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2043 } /* if CTAGS mode */
2047 * invalidate_nodes ()
2048 * Scan the node tree and invalidate all nodes pointing to the
2049 * given file description (CTAGS case) or free them (ETAGS case).
2051 static void
2052 invalidate_nodes (fdesc *badfdp, node **npp)
2054 node *np = *npp;
2056 if (np == NULL)
2057 return;
2059 if (CTAGS)
2061 if (np->left != NULL)
2062 invalidate_nodes (badfdp, &np->left);
2063 if (np->fdp == badfdp)
2064 np->valid = false;
2065 if (np->right != NULL)
2066 invalidate_nodes (badfdp, &np->right);
2068 else
2070 assert (np->fdp != NULL);
2071 if (np->fdp == badfdp)
2073 *npp = np->left; /* detach the sublist from the list */
2074 np->left = NULL; /* isolate it */
2075 free_tree (np); /* free it */
2076 invalidate_nodes (badfdp, npp);
2078 else
2079 invalidate_nodes (badfdp, &np->left);
2084 static int total_size_of_entries (node *);
2085 static int number_len (long) ATTRIBUTE_CONST;
2087 /* Length of a non-negative number's decimal representation. */
2088 static int
2089 number_len (long int num)
2091 int len = 1;
2092 while ((num /= 10) > 0)
2093 len += 1;
2094 return len;
2098 * Return total number of characters that put_entries will output for
2099 * the nodes in the linked list at the right of the specified node.
2100 * This count is irrelevant with etags.el since emacs 19.34 at least,
2101 * but is still supplied for backward compatibility.
2103 static int
2104 total_size_of_entries (register node *np)
2106 register int total = 0;
2108 for (; np != NULL; np = np->right)
2109 if (np->valid)
2111 total += strlen (np->regex) + 1; /* pat\177 */
2112 if (np->name != NULL)
2113 total += strlen (np->name) + 1; /* name\001 */
2114 total += number_len ((long) np->lno) + 1; /* lno, */
2115 if (np->cno != invalidcharno) /* cno */
2116 total += number_len (np->cno);
2117 total += 1; /* newline */
2120 return total;
2123 static void
2124 put_entries (register node *np)
2126 register char *sp;
2127 static fdesc *fdp = NULL;
2129 if (np == NULL)
2130 return;
2132 /* Output subentries that precede this one */
2133 if (CTAGS)
2134 put_entries (np->left);
2136 /* Output this entry */
2137 if (np->valid)
2139 if (!CTAGS)
2141 /* Etags mode */
2142 if (fdp != np->fdp)
2144 fdp = np->fdp;
2145 fprintf (tagf, "\f\n%s,%d\n",
2146 fdp->taggedfname, total_size_of_entries (np));
2147 fdp->written = true;
2149 fputs (np->regex, tagf);
2150 fputc ('\177', tagf);
2151 if (np->name != NULL)
2153 fputs (np->name, tagf);
2154 fputc ('\001', tagf);
2156 fprintf (tagf, "%d,", np->lno);
2157 if (np->cno != invalidcharno)
2158 fprintf (tagf, "%ld", np->cno);
2159 fputs ("\n", tagf);
2161 else
2163 /* Ctags mode */
2164 if (np->name == NULL)
2165 error ("internal error: NULL name in ctags mode.");
2167 if (cxref_style)
2169 if (vgrind_style)
2170 fprintf (stdout, "%s %s %d\n",
2171 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2172 else
2173 fprintf (stdout, "%-16s %3d %-16s %s\n",
2174 np->name, np->lno, np->fdp->taggedfname, np->regex);
2176 else
2178 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2180 if (np->is_func)
2181 { /* function or #define macro with args */
2182 putc (searchar, tagf);
2183 putc ('^', tagf);
2185 for (sp = np->regex; *sp; sp++)
2187 if (*sp == '\\' || *sp == searchar)
2188 putc ('\\', tagf);
2189 putc (*sp, tagf);
2191 putc (searchar, tagf);
2193 else
2194 { /* anything else; text pattern inadequate */
2195 fprintf (tagf, "%d", np->lno);
2197 putc ('\n', tagf);
2200 } /* if this node contains a valid tag */
2202 /* Output subentries that follow this one */
2203 put_entries (np->right);
2204 if (!CTAGS)
2205 put_entries (np->left);
2209 /* C extensions. */
2210 #define C_EXT 0x00fff /* C extensions */
2211 #define C_PLAIN 0x00000 /* C */
2212 #define C_PLPL 0x00001 /* C++ */
2213 #define C_STAR 0x00003 /* C* */
2214 #define C_JAVA 0x00005 /* JAVA */
2215 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2216 #define YACC 0x10000 /* yacc file */
2219 * The C symbol tables.
2221 enum sym_type
2223 st_none,
2224 st_C_objprot, st_C_objimpl, st_C_objend,
2225 st_C_gnumacro,
2226 st_C_ignore, st_C_attribute,
2227 st_C_javastruct,
2228 st_C_operator,
2229 st_C_class, st_C_template,
2230 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2233 /* Feed stuff between (but not including) %[ and %] lines to:
2234 gperf -m 5
2236 %compare-strncmp
2237 %enum
2238 %struct-type
2239 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2241 if, 0, st_C_ignore
2242 for, 0, st_C_ignore
2243 while, 0, st_C_ignore
2244 switch, 0, st_C_ignore
2245 return, 0, st_C_ignore
2246 __attribute__, 0, st_C_attribute
2247 GTY, 0, st_C_attribute
2248 @interface, 0, st_C_objprot
2249 @protocol, 0, st_C_objprot
2250 @implementation,0, st_C_objimpl
2251 @end, 0, st_C_objend
2252 import, (C_JAVA & ~C_PLPL), st_C_ignore
2253 package, (C_JAVA & ~C_PLPL), st_C_ignore
2254 friend, C_PLPL, st_C_ignore
2255 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2256 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2257 interface, (C_JAVA & ~C_PLPL), st_C_struct
2258 class, 0, st_C_class
2259 namespace, C_PLPL, st_C_struct
2260 domain, C_STAR, st_C_struct
2261 union, 0, st_C_struct
2262 struct, 0, st_C_struct
2263 extern, 0, st_C_extern
2264 enum, 0, st_C_enum
2265 typedef, 0, st_C_typedef
2266 define, 0, st_C_define
2267 undef, 0, st_C_define
2268 operator, C_PLPL, st_C_operator
2269 template, 0, st_C_template
2270 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2271 DEFUN, 0, st_C_gnumacro
2272 SYSCALL, 0, st_C_gnumacro
2273 ENTRY, 0, st_C_gnumacro
2274 PSEUDO, 0, st_C_gnumacro
2275 # These are defined inside C functions, so currently they are not met.
2276 # EXFUN used in glibc, DEFVAR_* in emacs.
2277 #EXFUN, 0, st_C_gnumacro
2278 #DEFVAR_, 0, st_C_gnumacro
2280 and replace lines between %< and %> with its output, then:
2281 - remove the #if characterset check
2282 - make in_word_set static and not inline. */
2283 /*%<*/
2284 /* C code produced by gperf version 3.0.1 */
2285 /* Command-line: gperf -m 5 */
2286 /* Computed positions: -k'2-3' */
2288 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2289 /* maximum key range = 33, duplicates = 0 */
2291 static int
2292 hash (const char *str, int len)
2294 static char const asso_values[] =
2296 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2297 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2298 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2299 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2300 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2301 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2302 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2303 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2304 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2305 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2306 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2307 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2308 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2309 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2310 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2311 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2312 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2313 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2314 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2315 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2316 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2317 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2318 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2319 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2320 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2321 35, 35, 35, 35, 35, 35
2323 int hval = len;
2325 switch (hval)
2327 default:
2328 hval += asso_values[(unsigned char) str[2]];
2329 /*FALLTHROUGH*/
2330 case 2:
2331 hval += asso_values[(unsigned char) str[1]];
2332 break;
2334 return hval;
2337 static struct C_stab_entry *
2338 in_word_set (register const char *str, register unsigned int len)
2340 enum
2342 TOTAL_KEYWORDS = 33,
2343 MIN_WORD_LENGTH = 2,
2344 MAX_WORD_LENGTH = 15,
2345 MIN_HASH_VALUE = 2,
2346 MAX_HASH_VALUE = 34
2349 static struct C_stab_entry wordlist[] =
2351 {""}, {""},
2352 {"if", 0, st_C_ignore},
2353 {"GTY", 0, st_C_attribute},
2354 {"@end", 0, st_C_objend},
2355 {"union", 0, st_C_struct},
2356 {"define", 0, st_C_define},
2357 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2358 {"template", 0, st_C_template},
2359 {"operator", C_PLPL, st_C_operator},
2360 {"@interface", 0, st_C_objprot},
2361 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2362 {"friend", C_PLPL, st_C_ignore},
2363 {"typedef", 0, st_C_typedef},
2364 {"return", 0, st_C_ignore},
2365 {"@implementation",0, st_C_objimpl},
2366 {"@protocol", 0, st_C_objprot},
2367 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2368 {"extern", 0, st_C_extern},
2369 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2370 {"struct", 0, st_C_struct},
2371 {"domain", C_STAR, st_C_struct},
2372 {"switch", 0, st_C_ignore},
2373 {"enum", 0, st_C_enum},
2374 {"for", 0, st_C_ignore},
2375 {"namespace", C_PLPL, st_C_struct},
2376 {"class", 0, st_C_class},
2377 {"while", 0, st_C_ignore},
2378 {"undef", 0, st_C_define},
2379 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2380 {"__attribute__", 0, st_C_attribute},
2381 {"SYSCALL", 0, st_C_gnumacro},
2382 {"ENTRY", 0, st_C_gnumacro},
2383 {"PSEUDO", 0, st_C_gnumacro},
2384 {"DEFUN", 0, st_C_gnumacro}
2387 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2389 int key = hash (str, len);
2391 if (key <= MAX_HASH_VALUE && key >= 0)
2393 const char *s = wordlist[key].name;
2395 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2396 return &wordlist[key];
2399 return 0;
2401 /*%>*/
2403 static enum sym_type
2404 C_symtype (char *str, int len, int c_ext)
2406 register struct C_stab_entry *se = in_word_set (str, len);
2408 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2409 return st_none;
2410 return se->type;
2415 * Ignoring __attribute__ ((list))
2417 static bool inattribute; /* looking at an __attribute__ construct */
2420 * C functions and variables are recognized using a simple
2421 * finite automaton. fvdef is its state variable.
2423 static enum
2425 fvnone, /* nothing seen */
2426 fdefunkey, /* Emacs DEFUN keyword seen */
2427 fdefunname, /* Emacs DEFUN name seen */
2428 foperator, /* func: operator keyword seen (cplpl) */
2429 fvnameseen, /* function or variable name seen */
2430 fstartlist, /* func: just after open parenthesis */
2431 finlist, /* func: in parameter list */
2432 flistseen, /* func: after parameter list */
2433 fignore, /* func: before open brace */
2434 vignore /* var-like: ignore until ';' */
2435 } fvdef;
2437 static bool fvextern; /* func or var: extern keyword seen; */
2440 * typedefs are recognized using a simple finite automaton.
2441 * typdef is its state variable.
2443 static enum
2445 tnone, /* nothing seen */
2446 tkeyseen, /* typedef keyword seen */
2447 ttypeseen, /* defined type seen */
2448 tinbody, /* inside typedef body */
2449 tend, /* just before typedef tag */
2450 tignore /* junk after typedef tag */
2451 } typdef;
2454 * struct-like structures (enum, struct and union) are recognized
2455 * using another simple finite automaton. `structdef' is its state
2456 * variable.
2458 static enum
2460 snone, /* nothing seen yet,
2461 or in struct body if bracelev > 0 */
2462 skeyseen, /* struct-like keyword seen */
2463 stagseen, /* struct-like tag seen */
2464 scolonseen /* colon seen after struct-like tag */
2465 } structdef;
2468 * When objdef is different from onone, objtag is the name of the class.
2470 static const char *objtag = "<uninited>";
2473 * Yet another little state machine to deal with preprocessor lines.
2475 static enum
2477 dnone, /* nothing seen */
2478 dsharpseen, /* '#' seen as first char on line */
2479 ddefineseen, /* '#' and 'define' seen */
2480 dignorerest /* ignore rest of line */
2481 } definedef;
2484 * State machine for Objective C protocols and implementations.
2485 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2487 static enum
2489 onone, /* nothing seen */
2490 oprotocol, /* @interface or @protocol seen */
2491 oimplementation, /* @implementations seen */
2492 otagseen, /* class name seen */
2493 oparenseen, /* parenthesis before category seen */
2494 ocatseen, /* category name seen */
2495 oinbody, /* in @implementation body */
2496 omethodsign, /* in @implementation body, after +/- */
2497 omethodtag, /* after method name */
2498 omethodcolon, /* after method colon */
2499 omethodparm, /* after method parameter */
2500 oignore /* wait for @end */
2501 } objdef;
2505 * Use this structure to keep info about the token read, and how it
2506 * should be tagged. Used by the make_C_tag function to build a tag.
2508 static struct tok
2510 char *line; /* string containing the token */
2511 int offset; /* where the token starts in LINE */
2512 int length; /* token length */
2514 The previous members can be used to pass strings around for generic
2515 purposes. The following ones specifically refer to creating tags. In this
2516 case the token contained here is the pattern that will be used to create a
2517 tag.
2519 bool valid; /* do not create a tag; the token should be
2520 invalidated whenever a state machine is
2521 reset prematurely */
2522 bool named; /* create a named tag */
2523 int lineno; /* source line number of tag */
2524 long linepos; /* source char number of tag */
2525 } token; /* latest token read */
2528 * Variables and functions for dealing with nested structures.
2529 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2531 static void pushclass_above (int, char *, int);
2532 static void popclass_above (int);
2533 static void write_classname (linebuffer *, const char *qualifier);
2535 static struct {
2536 char **cname; /* nested class names */
2537 int *bracelev; /* nested class brace level */
2538 int nl; /* class nesting level (elements used) */
2539 int size; /* length of the array */
2540 } cstack; /* stack for nested declaration tags */
2541 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2542 #define nestlev (cstack.nl)
2543 /* After struct keyword or in struct body, not inside a nested function. */
2544 #define instruct (structdef == snone && nestlev > 0 \
2545 && bracelev == cstack.bracelev[nestlev-1] + 1)
2547 static void
2548 pushclass_above (int bracelev, char *str, int len)
2550 int nl;
2552 popclass_above (bracelev);
2553 nl = cstack.nl;
2554 if (nl >= cstack.size)
2556 int size = cstack.size *= 2;
2557 xrnew (cstack.cname, size, char *);
2558 xrnew (cstack.bracelev, size, int);
2560 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2561 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2562 cstack.bracelev[nl] = bracelev;
2563 cstack.nl = nl + 1;
2566 static void
2567 popclass_above (int bracelev)
2569 int nl;
2571 for (nl = cstack.nl - 1;
2572 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2573 nl--)
2575 free (cstack.cname[nl]);
2576 cstack.nl = nl;
2580 static void
2581 write_classname (linebuffer *cn, const char *qualifier)
2583 int i, len;
2584 int qlen = strlen (qualifier);
2586 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2588 len = 0;
2589 cn->len = 0;
2590 cn->buffer[0] = '\0';
2592 else
2594 len = strlen (cstack.cname[0]);
2595 linebuffer_setlen (cn, len);
2596 strcpy (cn->buffer, cstack.cname[0]);
2598 for (i = 1; i < cstack.nl; i++)
2600 char *s = cstack.cname[i];
2601 if (s == NULL)
2602 continue;
2603 linebuffer_setlen (cn, len + qlen + strlen (s));
2604 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2609 static bool consider_token (char *, int, int, int *, int, int, bool *);
2610 static void make_C_tag (bool);
2613 * consider_token ()
2614 * checks to see if the current token is at the start of a
2615 * function or variable, or corresponds to a typedef, or
2616 * is a struct/union/enum tag, or #define, or an enum constant.
2618 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2619 * with args. C_EXTP points to which language we are looking at.
2621 * Globals
2622 * fvdef IN OUT
2623 * structdef IN OUT
2624 * definedef IN OUT
2625 * typdef IN OUT
2626 * objdef IN OUT
2629 static bool
2630 consider_token (char *str, int len, int c, int *c_extp,
2631 int bracelev, int parlev, bool *is_func_or_var)
2632 /* IN: token pointer */
2633 /* IN: token length */
2634 /* IN: first char after the token */
2635 /* IN, OUT: C extensions mask */
2636 /* IN: brace level */
2637 /* IN: parenthesis level */
2638 /* OUT: function or variable found */
2640 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2641 structtype is the type of the preceding struct-like keyword, and
2642 structbracelev is the brace level where it has been seen. */
2643 static enum sym_type structtype;
2644 static int structbracelev;
2645 static enum sym_type toktype;
2648 toktype = C_symtype (str, len, *c_extp);
2651 * Skip __attribute__
2653 if (toktype == st_C_attribute)
2655 inattribute = true;
2656 return false;
2660 * Advance the definedef state machine.
2662 switch (definedef)
2664 case dnone:
2665 /* We're not on a preprocessor line. */
2666 if (toktype == st_C_gnumacro)
2668 fvdef = fdefunkey;
2669 return false;
2671 break;
2672 case dsharpseen:
2673 if (toktype == st_C_define)
2675 definedef = ddefineseen;
2677 else
2679 definedef = dignorerest;
2681 return false;
2682 case ddefineseen:
2684 * Make a tag for any macro, unless it is a constant
2685 * and constantypedefs is false.
2687 definedef = dignorerest;
2688 *is_func_or_var = (c == '(');
2689 if (!*is_func_or_var && !constantypedefs)
2690 return false;
2691 else
2692 return true;
2693 case dignorerest:
2694 return false;
2695 default:
2696 error ("internal error: definedef value.");
2700 * Now typedefs
2702 switch (typdef)
2704 case tnone:
2705 if (toktype == st_C_typedef)
2707 if (typedefs)
2708 typdef = tkeyseen;
2709 fvextern = false;
2710 fvdef = fvnone;
2711 return false;
2713 break;
2714 case tkeyseen:
2715 switch (toktype)
2717 case st_none:
2718 case st_C_class:
2719 case st_C_struct:
2720 case st_C_enum:
2721 typdef = ttypeseen;
2723 break;
2724 case ttypeseen:
2725 if (structdef == snone && fvdef == fvnone)
2727 fvdef = fvnameseen;
2728 return true;
2730 break;
2731 case tend:
2732 switch (toktype)
2734 case st_C_class:
2735 case st_C_struct:
2736 case st_C_enum:
2737 return false;
2739 return true;
2742 switch (toktype)
2744 case st_C_javastruct:
2745 if (structdef == stagseen)
2746 structdef = scolonseen;
2747 return false;
2748 case st_C_template:
2749 case st_C_class:
2750 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2751 && bracelev == 0
2752 && definedef == dnone && structdef == snone
2753 && typdef == tnone && fvdef == fvnone)
2754 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2755 if (toktype == st_C_template)
2756 break;
2757 /* FALLTHRU */
2758 case st_C_struct:
2759 case st_C_enum:
2760 if (parlev == 0
2761 && fvdef != vignore
2762 && (typdef == tkeyseen
2763 || (typedefs_or_cplusplus && structdef == snone)))
2765 structdef = skeyseen;
2766 structtype = toktype;
2767 structbracelev = bracelev;
2768 if (fvdef == fvnameseen)
2769 fvdef = fvnone;
2771 return false;
2774 if (structdef == skeyseen)
2776 structdef = stagseen;
2777 return true;
2780 if (typdef != tnone)
2781 definedef = dnone;
2783 /* Detect Objective C constructs. */
2784 switch (objdef)
2786 case onone:
2787 switch (toktype)
2789 case st_C_objprot:
2790 objdef = oprotocol;
2791 return false;
2792 case st_C_objimpl:
2793 objdef = oimplementation;
2794 return false;
2796 break;
2797 case oimplementation:
2798 /* Save the class tag for functions or variables defined inside. */
2799 objtag = savenstr (str, len);
2800 objdef = oinbody;
2801 return false;
2802 case oprotocol:
2803 /* Save the class tag for categories. */
2804 objtag = savenstr (str, len);
2805 objdef = otagseen;
2806 *is_func_or_var = true;
2807 return true;
2808 case oparenseen:
2809 objdef = ocatseen;
2810 *is_func_or_var = true;
2811 return true;
2812 case oinbody:
2813 break;
2814 case omethodsign:
2815 if (parlev == 0)
2817 fvdef = fvnone;
2818 objdef = omethodtag;
2819 linebuffer_setlen (&token_name, len);
2820 memcpy (token_name.buffer, str, len);
2821 token_name.buffer[len] = '\0';
2822 return true;
2824 return false;
2825 case omethodcolon:
2826 if (parlev == 0)
2827 objdef = omethodparm;
2828 return false;
2829 case omethodparm:
2830 if (parlev == 0)
2832 int oldlen = token_name.len;
2833 fvdef = fvnone;
2834 objdef = omethodtag;
2835 linebuffer_setlen (&token_name, oldlen + len);
2836 memcpy (token_name.buffer + oldlen, str, len);
2837 token_name.buffer[oldlen + len] = '\0';
2838 return true;
2840 return false;
2841 case oignore:
2842 if (toktype == st_C_objend)
2844 /* Memory leakage here: the string pointed by objtag is
2845 never released, because many tests would be needed to
2846 avoid breaking on incorrect input code. The amount of
2847 memory leaked here is the sum of the lengths of the
2848 class tags.
2849 free (objtag); */
2850 objdef = onone;
2852 return false;
2855 /* A function, variable or enum constant? */
2856 switch (toktype)
2858 case st_C_extern:
2859 fvextern = true;
2860 switch (fvdef)
2862 case finlist:
2863 case flistseen:
2864 case fignore:
2865 case vignore:
2866 break;
2867 default:
2868 fvdef = fvnone;
2870 return false;
2871 case st_C_ignore:
2872 fvextern = false;
2873 fvdef = vignore;
2874 return false;
2875 case st_C_operator:
2876 fvdef = foperator;
2877 *is_func_or_var = true;
2878 return true;
2879 case st_none:
2880 if (constantypedefs
2881 && structdef == snone
2882 && structtype == st_C_enum && bracelev > structbracelev
2883 /* Don't tag tokens in expressions that assign values to enum
2884 constants. */
2885 && fvdef != vignore)
2886 return true; /* enum constant */
2887 switch (fvdef)
2889 case fdefunkey:
2890 if (bracelev > 0)
2891 break;
2892 fvdef = fdefunname; /* GNU macro */
2893 *is_func_or_var = true;
2894 return true;
2895 case fvnone:
2896 switch (typdef)
2898 case ttypeseen:
2899 return false;
2900 case tnone:
2901 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2902 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2904 fvdef = vignore;
2905 return false;
2907 break;
2909 /* FALLTHRU */
2910 case fvnameseen:
2911 if (len >= 10 && strneq (str+len-10, "::operator", 10))
2913 if (*c_extp & C_AUTO) /* automatic detection of C++ */
2914 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2915 fvdef = foperator;
2916 *is_func_or_var = true;
2917 return true;
2919 if (bracelev > 0 && !instruct)
2920 break;
2921 fvdef = fvnameseen; /* function or variable */
2922 *is_func_or_var = true;
2923 return true;
2925 break;
2928 return false;
2933 * C_entries often keeps pointers to tokens or lines which are older than
2934 * the line currently read. By keeping two line buffers, and switching
2935 * them at end of line, it is possible to use those pointers.
2937 static struct
2939 long linepos;
2940 linebuffer lb;
2941 } lbs[2];
2943 #define current_lb_is_new (newndx == curndx)
2944 #define switch_line_buffers() (curndx = 1 - curndx)
2946 #define curlb (lbs[curndx].lb)
2947 #define newlb (lbs[newndx].lb)
2948 #define curlinepos (lbs[curndx].linepos)
2949 #define newlinepos (lbs[newndx].linepos)
2951 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2952 #define cplpl (c_ext & C_PLPL)
2953 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2955 #define CNL_SAVE_DEFINEDEF() \
2956 do { \
2957 curlinepos = charno; \
2958 readline (&curlb, inf); \
2959 lp = curlb.buffer; \
2960 quotednl = false; \
2961 newndx = curndx; \
2962 } while (0)
2964 #define CNL() \
2965 do { \
2966 CNL_SAVE_DEFINEDEF(); \
2967 if (savetoken.valid) \
2969 token = savetoken; \
2970 savetoken.valid = false; \
2972 definedef = dnone; \
2973 } while (0)
2976 static void
2977 make_C_tag (bool isfun)
2979 /* This function is never called when token.valid is false, but
2980 we must protect against invalid input or internal errors. */
2981 if (token.valid)
2982 make_tag (token_name.buffer, token_name.len, isfun, token.line,
2983 token.offset+token.length+1, token.lineno, token.linepos);
2984 else if (DEBUG)
2985 { /* this branch is optimized away if !DEBUG */
2986 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
2987 token_name.len + 17, isfun, token.line,
2988 token.offset+token.length+1, token.lineno, token.linepos);
2989 error ("INVALID TOKEN");
2992 token.valid = false;
2997 * C_entries ()
2998 * This routine finds functions, variables, typedefs,
2999 * #define's, enum constants and struct/union/enum definitions in
3000 * C syntax and adds them to the list.
3002 static void
3003 C_entries (int c_ext, FILE *inf)
3004 /* extension of C */
3005 /* input file */
3007 register char c; /* latest char read; '\0' for end of line */
3008 register char *lp; /* pointer one beyond the character `c' */
3009 int curndx, newndx; /* indices for current and new lb */
3010 register int tokoff; /* offset in line of start of current token */
3011 register int toklen; /* length of current token */
3012 const char *qualifier; /* string used to qualify names */
3013 int qlen; /* length of qualifier */
3014 int bracelev; /* current brace level */
3015 int bracketlev; /* current bracket level */
3016 int parlev; /* current parenthesis level */
3017 int attrparlev; /* __attribute__ parenthesis level */
3018 int templatelev; /* current template level */
3019 int typdefbracelev; /* bracelev where a typedef struct body begun */
3020 bool incomm, inquote, inchar, quotednl, midtoken;
3021 bool yacc_rules; /* in the rules part of a yacc file */
3022 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3025 linebuffer_init (&lbs[0].lb);
3026 linebuffer_init (&lbs[1].lb);
3027 if (cstack.size == 0)
3029 cstack.size = (DEBUG) ? 1 : 4;
3030 cstack.nl = 0;
3031 cstack.cname = xnew (cstack.size, char *);
3032 cstack.bracelev = xnew (cstack.size, int);
3035 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3036 curndx = newndx = 0;
3037 lp = curlb.buffer;
3038 *lp = 0;
3040 fvdef = fvnone; fvextern = false; typdef = tnone;
3041 structdef = snone; definedef = dnone; objdef = onone;
3042 yacc_rules = false;
3043 midtoken = inquote = inchar = incomm = quotednl = false;
3044 token.valid = savetoken.valid = false;
3045 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3046 if (cjava)
3047 { qualifier = "."; qlen = 1; }
3048 else
3049 { qualifier = "::"; qlen = 2; }
3052 while (!feof (inf))
3054 c = *lp++;
3055 if (c == '\\')
3057 /* If we are at the end of the line, the next character is a
3058 '\0'; do not skip it, because it is what tells us
3059 to read the next line. */
3060 if (*lp == '\0')
3062 quotednl = true;
3063 continue;
3065 lp++;
3066 c = ' ';
3068 else if (incomm)
3070 switch (c)
3072 case '*':
3073 if (*lp == '/')
3075 c = *lp++;
3076 incomm = false;
3078 break;
3079 case '\0':
3080 /* Newlines inside comments do not end macro definitions in
3081 traditional cpp. */
3082 CNL_SAVE_DEFINEDEF ();
3083 break;
3085 continue;
3087 else if (inquote)
3089 switch (c)
3091 case '"':
3092 inquote = false;
3093 break;
3094 case '\0':
3095 /* Newlines inside strings do not end macro definitions
3096 in traditional cpp, even though compilers don't
3097 usually accept them. */
3098 CNL_SAVE_DEFINEDEF ();
3099 break;
3101 continue;
3103 else if (inchar)
3105 switch (c)
3107 case '\0':
3108 /* Hmmm, something went wrong. */
3109 CNL ();
3110 /* FALLTHRU */
3111 case '\'':
3112 inchar = false;
3113 break;
3115 continue;
3117 else switch (c)
3119 case '"':
3120 inquote = true;
3121 if (bracketlev > 0)
3122 continue;
3123 if (inattribute)
3124 break;
3125 switch (fvdef)
3127 case fdefunkey:
3128 case fstartlist:
3129 case finlist:
3130 case fignore:
3131 case vignore:
3132 break;
3133 default:
3134 fvextern = false;
3135 fvdef = fvnone;
3137 continue;
3138 case '\'':
3139 inchar = true;
3140 if (bracketlev > 0)
3141 continue;
3142 if (inattribute)
3143 break;
3144 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3146 fvextern = false;
3147 fvdef = fvnone;
3149 continue;
3150 case '/':
3151 if (*lp == '*')
3153 incomm = true;
3154 lp++;
3155 c = ' ';
3156 if (bracketlev > 0)
3157 continue;
3159 else if (/* cplpl && */ *lp == '/')
3161 c = '\0';
3163 break;
3164 case '%':
3165 if ((c_ext & YACC) && *lp == '%')
3167 /* Entering or exiting rules section in yacc file. */
3168 lp++;
3169 definedef = dnone; fvdef = fvnone; fvextern = false;
3170 typdef = tnone; structdef = snone;
3171 midtoken = inquote = inchar = incomm = quotednl = false;
3172 bracelev = 0;
3173 yacc_rules = !yacc_rules;
3174 continue;
3176 else
3177 break;
3178 case '#':
3179 if (definedef == dnone)
3181 char *cp;
3182 bool cpptoken = true;
3184 /* Look back on this line. If all blanks, or nonblanks
3185 followed by an end of comment, this is a preprocessor
3186 token. */
3187 for (cp = newlb.buffer; cp < lp-1; cp++)
3188 if (!c_isspace (*cp))
3190 if (*cp == '*' && cp[1] == '/')
3192 cp++;
3193 cpptoken = true;
3195 else
3196 cpptoken = false;
3198 if (cpptoken)
3200 definedef = dsharpseen;
3201 /* This is needed for tagging enum values: when there are
3202 preprocessor conditionals inside the enum, we need to
3203 reset the value of fvdef so that the next enum value is
3204 tagged even though the one before it did not end in a
3205 comma. */
3206 if (fvdef == vignore && instruct && parlev == 0)
3208 if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3209 fvdef = fvnone;
3212 } /* if (definedef == dnone) */
3213 continue;
3214 case '[':
3215 bracketlev++;
3216 continue;
3217 default:
3218 if (bracketlev > 0)
3220 if (c == ']')
3221 --bracketlev;
3222 else if (c == '\0')
3223 CNL_SAVE_DEFINEDEF ();
3224 continue;
3226 break;
3227 } /* switch (c) */
3230 /* Consider token only if some involved conditions are satisfied. */
3231 if (typdef != tignore
3232 && definedef != dignorerest
3233 && fvdef != finlist
3234 && templatelev == 0
3235 && (definedef != dnone
3236 || structdef != scolonseen)
3237 && !inattribute)
3239 if (midtoken)
3241 if (endtoken (c))
3243 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3244 /* This handles :: in the middle,
3245 but not at the beginning of an identifier.
3246 Also, space-separated :: is not recognized. */
3248 if (c_ext & C_AUTO) /* automatic detection of C++ */
3249 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3250 lp += 2;
3251 toklen += 2;
3252 c = lp[-1];
3253 goto still_in_token;
3255 else
3257 bool funorvar = false;
3259 if (yacc_rules
3260 || consider_token (newlb.buffer + tokoff, toklen, c,
3261 &c_ext, bracelev, parlev,
3262 &funorvar))
3264 if (fvdef == foperator)
3266 char *oldlp = lp;
3267 lp = skip_spaces (lp-1);
3268 if (*lp != '\0')
3269 lp += 1;
3270 while (*lp != '\0'
3271 && !c_isspace (*lp) && *lp != '(')
3272 lp += 1;
3273 c = *lp++;
3274 toklen += lp - oldlp;
3276 token.named = false;
3277 if (!plainc
3278 && nestlev > 0 && definedef == dnone)
3279 /* in struct body */
3281 int len;
3282 write_classname (&token_name, qualifier);
3283 len = token_name.len;
3284 linebuffer_setlen (&token_name, len+qlen+toklen);
3285 sprintf (token_name.buffer + len, "%s%.*s",
3286 qualifier, toklen, newlb.buffer + tokoff);
3287 token.named = true;
3289 else if (objdef == ocatseen)
3290 /* Objective C category */
3292 int len = strlen (objtag) + 2 + toklen;
3293 linebuffer_setlen (&token_name, len);
3294 sprintf (token_name.buffer, "%s(%.*s)",
3295 objtag, toklen, newlb.buffer + tokoff);
3296 token.named = true;
3298 else if (objdef == omethodtag
3299 || objdef == omethodparm)
3300 /* Objective C method */
3302 token.named = true;
3304 else if (fvdef == fdefunname)
3305 /* GNU DEFUN and similar macros */
3307 bool defun = (newlb.buffer[tokoff] == 'F');
3308 int off = tokoff;
3309 int len = toklen;
3311 /* Rewrite the tag so that emacs lisp DEFUNs
3312 can be found by their elisp name */
3313 if (defun)
3315 off += 1;
3316 len -= 1;
3318 linebuffer_setlen (&token_name, len);
3319 memcpy (token_name.buffer,
3320 newlb.buffer + off, len);
3321 token_name.buffer[len] = '\0';
3322 if (defun)
3323 while (--len >= 0)
3324 if (token_name.buffer[len] == '_')
3325 token_name.buffer[len] = '-';
3326 token.named = defun;
3328 else
3330 linebuffer_setlen (&token_name, toklen);
3331 memcpy (token_name.buffer,
3332 newlb.buffer + tokoff, toklen);
3333 token_name.buffer[toklen] = '\0';
3334 /* Name macros and members. */
3335 token.named = (structdef == stagseen
3336 || typdef == ttypeseen
3337 || typdef == tend
3338 || (funorvar
3339 && definedef == dignorerest)
3340 || (funorvar
3341 && definedef == dnone
3342 && structdef == snone
3343 && bracelev > 0));
3345 token.lineno = lineno;
3346 token.offset = tokoff;
3347 token.length = toklen;
3348 token.line = newlb.buffer;
3349 token.linepos = newlinepos;
3350 token.valid = true;
3352 if (definedef == dnone
3353 && (fvdef == fvnameseen
3354 || fvdef == foperator
3355 || structdef == stagseen
3356 || typdef == tend
3357 || typdef == ttypeseen
3358 || objdef != onone))
3360 if (current_lb_is_new)
3361 switch_line_buffers ();
3363 else if (definedef != dnone
3364 || fvdef == fdefunname
3365 || instruct)
3366 make_C_tag (funorvar);
3368 else /* not yacc and consider_token failed */
3370 if (inattribute && fvdef == fignore)
3372 /* We have just met __attribute__ after a
3373 function parameter list: do not tag the
3374 function again. */
3375 fvdef = fvnone;
3378 midtoken = false;
3380 } /* if (endtoken (c)) */
3381 else if (intoken (c))
3382 still_in_token:
3384 toklen++;
3385 continue;
3387 } /* if (midtoken) */
3388 else if (begtoken (c))
3390 switch (definedef)
3392 case dnone:
3393 switch (fvdef)
3395 case fstartlist:
3396 /* This prevents tagging fb in
3397 void (__attribute__((noreturn)) *fb) (void);
3398 Fixing this is not easy and not very important. */
3399 fvdef = finlist;
3400 continue;
3401 case flistseen:
3402 if (plainc || declarations)
3404 make_C_tag (true); /* a function */
3405 fvdef = fignore;
3407 break;
3409 if (structdef == stagseen && !cjava)
3411 popclass_above (bracelev);
3412 structdef = snone;
3414 break;
3415 case dsharpseen:
3416 savetoken = token;
3417 break;
3419 if (!yacc_rules || lp == newlb.buffer + 1)
3421 tokoff = lp - 1 - newlb.buffer;
3422 toklen = 1;
3423 midtoken = true;
3425 continue;
3426 } /* if (begtoken) */
3427 } /* if must look at token */
3430 /* Detect end of line, colon, comma, semicolon and various braces
3431 after having handled a token.*/
3432 switch (c)
3434 case ':':
3435 if (inattribute)
3436 break;
3437 if (yacc_rules && token.offset == 0 && token.valid)
3439 make_C_tag (false); /* a yacc function */
3440 break;
3442 if (definedef != dnone)
3443 break;
3444 switch (objdef)
3446 case otagseen:
3447 objdef = oignore;
3448 make_C_tag (true); /* an Objective C class */
3449 break;
3450 case omethodtag:
3451 case omethodparm:
3452 objdef = omethodcolon;
3453 int toklen = token_name.len;
3454 linebuffer_setlen (&token_name, toklen + 1);
3455 strcpy (token_name.buffer + toklen, ":");
3456 break;
3458 if (structdef == stagseen)
3460 structdef = scolonseen;
3461 break;
3463 /* Should be useless, but may be work as a safety net. */
3464 if (cplpl && fvdef == flistseen)
3466 make_C_tag (true); /* a function */
3467 fvdef = fignore;
3468 break;
3470 break;
3471 case ';':
3472 if (definedef != dnone || inattribute)
3473 break;
3474 switch (typdef)
3476 case tend:
3477 case ttypeseen:
3478 make_C_tag (false); /* a typedef */
3479 typdef = tnone;
3480 fvdef = fvnone;
3481 break;
3482 case tnone:
3483 case tinbody:
3484 case tignore:
3485 switch (fvdef)
3487 case fignore:
3488 if (typdef == tignore || cplpl)
3489 fvdef = fvnone;
3490 break;
3491 case fvnameseen:
3492 if ((globals && bracelev == 0 && (!fvextern || declarations))
3493 || (members && instruct))
3494 make_C_tag (false); /* a variable */
3495 fvextern = false;
3496 fvdef = fvnone;
3497 token.valid = false;
3498 break;
3499 case flistseen:
3500 if ((declarations
3501 && (cplpl || !instruct)
3502 && (typdef == tnone || (typdef != tignore && instruct)))
3503 || (members
3504 && plainc && instruct))
3505 make_C_tag (true); /* a function */
3506 /* FALLTHRU */
3507 default:
3508 fvextern = false;
3509 fvdef = fvnone;
3510 if (declarations
3511 && cplpl && structdef == stagseen)
3512 make_C_tag (false); /* forward declaration */
3513 else
3514 token.valid = false;
3515 } /* switch (fvdef) */
3516 /* FALLTHRU */
3517 default:
3518 if (!instruct)
3519 typdef = tnone;
3521 if (structdef == stagseen)
3522 structdef = snone;
3523 break;
3524 case ',':
3525 if (definedef != dnone || inattribute)
3526 break;
3527 switch (objdef)
3529 case omethodtag:
3530 case omethodparm:
3531 make_C_tag (true); /* an Objective C method */
3532 objdef = oinbody;
3533 break;
3535 switch (fvdef)
3537 case fdefunkey:
3538 case foperator:
3539 case fstartlist:
3540 case finlist:
3541 case fignore:
3542 break;
3543 case vignore:
3544 if (instruct && parlev == 0)
3545 fvdef = fvnone;
3546 break;
3547 case fdefunname:
3548 fvdef = fignore;
3549 break;
3550 case fvnameseen:
3551 if (parlev == 0
3552 && ((globals
3553 && bracelev == 0
3554 && templatelev == 0
3555 && (!fvextern || declarations))
3556 || (members && instruct)))
3557 make_C_tag (false); /* a variable */
3558 break;
3559 case flistseen:
3560 if ((declarations && typdef == tnone && !instruct)
3561 || (members && typdef != tignore && instruct))
3563 make_C_tag (true); /* a function */
3564 fvdef = fvnameseen;
3566 else if (!declarations)
3567 fvdef = fvnone;
3568 token.valid = false;
3569 break;
3570 default:
3571 fvdef = fvnone;
3573 if (structdef == stagseen)
3574 structdef = snone;
3575 break;
3576 case ']':
3577 if (definedef != dnone || inattribute)
3578 break;
3579 if (structdef == stagseen)
3580 structdef = snone;
3581 switch (typdef)
3583 case ttypeseen:
3584 case tend:
3585 typdef = tignore;
3586 make_C_tag (false); /* a typedef */
3587 break;
3588 case tnone:
3589 case tinbody:
3590 switch (fvdef)
3592 case foperator:
3593 case finlist:
3594 case fignore:
3595 case vignore:
3596 break;
3597 case fvnameseen:
3598 if ((members && bracelev == 1)
3599 || (globals && bracelev == 0
3600 && (!fvextern || declarations)))
3601 make_C_tag (false); /* a variable */
3602 /* FALLTHRU */
3603 default:
3604 fvdef = fvnone;
3606 break;
3608 break;
3609 case '(':
3610 if (inattribute)
3612 attrparlev++;
3613 break;
3615 if (definedef != dnone)
3616 break;
3617 if (objdef == otagseen && parlev == 0)
3618 objdef = oparenseen;
3619 switch (fvdef)
3621 case fvnameseen:
3622 if (typdef == ttypeseen
3623 && *lp != '*'
3624 && !instruct)
3626 /* This handles constructs like:
3627 typedef void OperatorFun (int fun); */
3628 make_C_tag (false);
3629 typdef = tignore;
3630 fvdef = fignore;
3631 break;
3633 /* FALLTHRU */
3634 case foperator:
3635 fvdef = fstartlist;
3636 break;
3637 case flistseen:
3638 fvdef = finlist;
3639 break;
3641 parlev++;
3642 break;
3643 case ')':
3644 if (inattribute)
3646 if (--attrparlev == 0)
3647 inattribute = false;
3648 break;
3650 if (definedef != dnone)
3651 break;
3652 if (objdef == ocatseen && parlev == 1)
3654 make_C_tag (true); /* an Objective C category */
3655 objdef = oignore;
3657 if (--parlev == 0)
3659 switch (fvdef)
3661 case fstartlist:
3662 case finlist:
3663 fvdef = flistseen;
3664 break;
3666 if (!instruct
3667 && (typdef == tend
3668 || typdef == ttypeseen))
3670 typdef = tignore;
3671 make_C_tag (false); /* a typedef */
3674 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3675 parlev = 0;
3676 break;
3677 case '{':
3678 if (definedef != dnone)
3679 break;
3680 if (typdef == ttypeseen)
3682 /* Whenever typdef is set to tinbody (currently only
3683 here), typdefbracelev should be set to bracelev. */
3684 typdef = tinbody;
3685 typdefbracelev = bracelev;
3687 switch (fvdef)
3689 case flistseen:
3690 make_C_tag (true); /* a function */
3691 /* FALLTHRU */
3692 case fignore:
3693 fvdef = fvnone;
3694 break;
3695 case fvnone:
3696 switch (objdef)
3698 case otagseen:
3699 make_C_tag (true); /* an Objective C class */
3700 objdef = oignore;
3701 break;
3702 case omethodtag:
3703 case omethodparm:
3704 make_C_tag (true); /* an Objective C method */
3705 objdef = oinbody;
3706 break;
3707 default:
3708 /* Neutralize `extern "C" {' grot. */
3709 if (bracelev == 0 && structdef == snone && nestlev == 0
3710 && typdef == tnone)
3711 bracelev = -1;
3713 break;
3715 switch (structdef)
3717 case skeyseen: /* unnamed struct */
3718 pushclass_above (bracelev, NULL, 0);
3719 structdef = snone;
3720 break;
3721 case stagseen: /* named struct or enum */
3722 case scolonseen: /* a class */
3723 pushclass_above (bracelev,token.line+token.offset, token.length);
3724 structdef = snone;
3725 make_C_tag (false); /* a struct or enum */
3726 break;
3728 bracelev += 1;
3729 break;
3730 case '*':
3731 if (definedef != dnone)
3732 break;
3733 if (fvdef == fstartlist)
3735 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3736 token.valid = false;
3738 break;
3739 case '}':
3740 if (definedef != dnone)
3741 break;
3742 bracelev -= 1;
3743 if (!ignoreindent && lp == newlb.buffer + 1)
3745 if (bracelev != 0)
3746 token.valid = false; /* unexpected value, token unreliable */
3747 bracelev = 0; /* reset brace level if first column */
3748 parlev = 0; /* also reset paren level, just in case... */
3750 else if (bracelev < 0)
3752 token.valid = false; /* something gone amiss, token unreliable */
3753 bracelev = 0;
3755 if (bracelev == 0 && fvdef == vignore)
3756 fvdef = fvnone; /* end of function */
3757 popclass_above (bracelev);
3758 structdef = snone;
3759 /* Only if typdef == tinbody is typdefbracelev significant. */
3760 if (typdef == tinbody && bracelev <= typdefbracelev)
3762 assert (bracelev == typdefbracelev);
3763 typdef = tend;
3765 break;
3766 case '=':
3767 if (definedef != dnone)
3768 break;
3769 switch (fvdef)
3771 case foperator:
3772 case finlist:
3773 case fignore:
3774 case vignore:
3775 break;
3776 case fvnameseen:
3777 if ((members && bracelev == 1)
3778 || (globals && bracelev == 0 && (!fvextern || declarations)))
3779 make_C_tag (false); /* a variable */
3780 /* FALLTHRU */
3781 default:
3782 fvdef = vignore;
3784 break;
3785 case '<':
3786 if (cplpl
3787 && (structdef == stagseen || fvdef == fvnameseen))
3789 templatelev++;
3790 break;
3792 goto resetfvdef;
3793 case '>':
3794 if (templatelev > 0)
3796 templatelev--;
3797 break;
3799 goto resetfvdef;
3800 case '+':
3801 case '-':
3802 if (objdef == oinbody && bracelev == 0)
3804 objdef = omethodsign;
3805 break;
3807 /* FALLTHRU */
3808 resetfvdef:
3809 case '#': case '~': case '&': case '%': case '/':
3810 case '|': case '^': case '!': case '.': case '?':
3811 if (definedef != dnone)
3812 break;
3813 /* These surely cannot follow a function tag in C. */
3814 switch (fvdef)
3816 case foperator:
3817 case finlist:
3818 case fignore:
3819 case vignore:
3820 break;
3821 default:
3822 fvdef = fvnone;
3824 break;
3825 case '\0':
3826 if (objdef == otagseen)
3828 make_C_tag (true); /* an Objective C class */
3829 objdef = oignore;
3831 /* If a macro spans multiple lines don't reset its state. */
3832 if (quotednl)
3833 CNL_SAVE_DEFINEDEF ();
3834 else
3835 CNL ();
3836 break;
3837 } /* switch (c) */
3839 } /* while not eof */
3841 free (lbs[0].lb.buffer);
3842 free (lbs[1].lb.buffer);
3846 * Process either a C++ file or a C file depending on the setting
3847 * of a global flag.
3849 static void
3850 default_C_entries (FILE *inf)
3852 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3855 /* Always do plain C. */
3856 static void
3857 plain_C_entries (FILE *inf)
3859 C_entries (0, inf);
3862 /* Always do C++. */
3863 static void
3864 Cplusplus_entries (FILE *inf)
3866 C_entries (C_PLPL, inf);
3869 /* Always do Java. */
3870 static void
3871 Cjava_entries (FILE *inf)
3873 C_entries (C_JAVA, inf);
3876 /* Always do C*. */
3877 static void
3878 Cstar_entries (FILE *inf)
3880 C_entries (C_STAR, inf);
3883 /* Always do Yacc. */
3884 static void
3885 Yacc_entries (FILE *inf)
3887 C_entries (YACC, inf);
3891 /* Useful macros. */
3892 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3893 for (; /* loop initialization */ \
3894 !feof (file_pointer) /* loop test */ \
3895 && /* instructions at start of loop */ \
3896 (readline (&line_buffer, file_pointer), \
3897 char_pointer = line_buffer.buffer, \
3898 true); \
3901 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
3902 ((assert ("" kw), true) /* syntax error if not a literal string */ \
3903 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3904 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
3905 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3907 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3908 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3909 ((assert ("" kw), true) /* syntax error if not a literal string */ \
3910 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3911 && ((cp) += sizeof (kw)-1)) /* skip spaces */
3914 * Read a file, but do no processing. This is used to do regexp
3915 * matching on files that have no language defined.
3917 static void
3918 just_read_file (FILE *inf)
3920 while (!feof (inf))
3921 readline (&lb, inf);
3925 /* Fortran parsing */
3927 static void F_takeprec (void);
3928 static void F_getit (FILE *);
3930 static void
3931 F_takeprec (void)
3933 dbp = skip_spaces (dbp);
3934 if (*dbp != '*')
3935 return;
3936 dbp++;
3937 dbp = skip_spaces (dbp);
3938 if (strneq (dbp, "(*)", 3))
3940 dbp += 3;
3941 return;
3943 if (!c_isdigit (*dbp))
3945 --dbp; /* force failure */
3946 return;
3949 dbp++;
3950 while (c_isdigit (*dbp));
3953 static void
3954 F_getit (FILE *inf)
3956 register char *cp;
3958 dbp = skip_spaces (dbp);
3959 if (*dbp == '\0')
3961 readline (&lb, inf);
3962 dbp = lb.buffer;
3963 if (dbp[5] != '&')
3964 return;
3965 dbp += 6;
3966 dbp = skip_spaces (dbp);
3968 if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
3969 return;
3970 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3971 continue;
3972 make_tag (dbp, cp-dbp, true,
3973 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3977 static void
3978 Fortran_functions (FILE *inf)
3980 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3982 if (*dbp == '%')
3983 dbp++; /* Ratfor escape to fortran */
3984 dbp = skip_spaces (dbp);
3985 if (*dbp == '\0')
3986 continue;
3988 if (LOOKING_AT_NOCASE (dbp, "recursive"))
3989 dbp = skip_spaces (dbp);
3991 if (LOOKING_AT_NOCASE (dbp, "pure"))
3992 dbp = skip_spaces (dbp);
3994 if (LOOKING_AT_NOCASE (dbp, "elemental"))
3995 dbp = skip_spaces (dbp);
3997 switch (c_tolower (*dbp))
3999 case 'i':
4000 if (nocase_tail ("integer"))
4001 F_takeprec ();
4002 break;
4003 case 'r':
4004 if (nocase_tail ("real"))
4005 F_takeprec ();
4006 break;
4007 case 'l':
4008 if (nocase_tail ("logical"))
4009 F_takeprec ();
4010 break;
4011 case 'c':
4012 if (nocase_tail ("complex") || nocase_tail ("character"))
4013 F_takeprec ();
4014 break;
4015 case 'd':
4016 if (nocase_tail ("double"))
4018 dbp = skip_spaces (dbp);
4019 if (*dbp == '\0')
4020 continue;
4021 if (nocase_tail ("precision"))
4022 break;
4023 continue;
4025 break;
4027 dbp = skip_spaces (dbp);
4028 if (*dbp == '\0')
4029 continue;
4030 switch (c_tolower (*dbp))
4032 case 'f':
4033 if (nocase_tail ("function"))
4034 F_getit (inf);
4035 continue;
4036 case 's':
4037 if (nocase_tail ("subroutine"))
4038 F_getit (inf);
4039 continue;
4040 case 'e':
4041 if (nocase_tail ("entry"))
4042 F_getit (inf);
4043 continue;
4044 case 'b':
4045 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4047 dbp = skip_spaces (dbp);
4048 if (*dbp == '\0') /* assume un-named */
4049 make_tag ("blockdata", 9, true,
4050 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4051 else
4052 F_getit (inf); /* look for name */
4054 continue;
4061 * Ada parsing
4062 * Original code by
4063 * Philippe Waroquiers (1998)
4066 /* Once we are positioned after an "interesting" keyword, let's get
4067 the real tag value necessary. */
4068 static void
4069 Ada_getit (FILE *inf, const char *name_qualifier)
4071 register char *cp;
4072 char *name;
4073 char c;
4075 while (!feof (inf))
4077 dbp = skip_spaces (dbp);
4078 if (*dbp == '\0'
4079 || (dbp[0] == '-' && dbp[1] == '-'))
4081 readline (&lb, inf);
4082 dbp = lb.buffer;
4084 switch (c_tolower (*dbp))
4086 case 'b':
4087 if (nocase_tail ("body"))
4089 /* Skipping body of procedure body or package body or ....
4090 resetting qualifier to body instead of spec. */
4091 name_qualifier = "/b";
4092 continue;
4094 break;
4095 case 't':
4096 /* Skipping type of task type or protected type ... */
4097 if (nocase_tail ("type"))
4098 continue;
4099 break;
4101 if (*dbp == '"')
4103 dbp += 1;
4104 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4105 continue;
4107 else
4109 dbp = skip_spaces (dbp);
4110 for (cp = dbp;
4111 c_isalnum (*cp) || *cp == '_' || *cp == '.';
4112 cp++)
4113 continue;
4114 if (cp == dbp)
4115 return;
4117 c = *cp;
4118 *cp = '\0';
4119 name = concat (dbp, name_qualifier, "");
4120 *cp = c;
4121 make_tag (name, strlen (name), true,
4122 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4123 free (name);
4124 if (c == '"')
4125 dbp = cp + 1;
4126 return;
4130 static void
4131 Ada_funcs (FILE *inf)
4133 bool inquote = false;
4134 bool skip_till_semicolumn = false;
4136 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4138 while (*dbp != '\0')
4140 /* Skip a string i.e. "abcd". */
4141 if (inquote || (*dbp == '"'))
4143 dbp = strchr (dbp + !inquote, '"');
4144 if (dbp != NULL)
4146 inquote = false;
4147 dbp += 1;
4148 continue; /* advance char */
4150 else
4152 inquote = true;
4153 break; /* advance line */
4157 /* Skip comments. */
4158 if (dbp[0] == '-' && dbp[1] == '-')
4159 break; /* advance line */
4161 /* Skip character enclosed in single quote i.e. 'a'
4162 and skip single quote starting an attribute i.e. 'Image. */
4163 if (*dbp == '\'')
4165 dbp++ ;
4166 if (*dbp != '\0')
4167 dbp++;
4168 continue;
4171 if (skip_till_semicolumn)
4173 if (*dbp == ';')
4174 skip_till_semicolumn = false;
4175 dbp++;
4176 continue; /* advance char */
4179 /* Search for beginning of a token. */
4180 if (!begtoken (*dbp))
4182 dbp++;
4183 continue; /* advance char */
4186 /* We are at the beginning of a token. */
4187 switch (c_tolower (*dbp))
4189 case 'f':
4190 if (!packages_only && nocase_tail ("function"))
4191 Ada_getit (inf, "/f");
4192 else
4193 break; /* from switch */
4194 continue; /* advance char */
4195 case 'p':
4196 if (!packages_only && nocase_tail ("procedure"))
4197 Ada_getit (inf, "/p");
4198 else if (nocase_tail ("package"))
4199 Ada_getit (inf, "/s");
4200 else if (nocase_tail ("protected")) /* protected type */
4201 Ada_getit (inf, "/t");
4202 else
4203 break; /* from switch */
4204 continue; /* advance char */
4206 case 'u':
4207 if (typedefs && !packages_only && nocase_tail ("use"))
4209 /* when tagging types, avoid tagging use type Pack.Typename;
4210 for this, we will skip everything till a ; */
4211 skip_till_semicolumn = true;
4212 continue; /* advance char */
4215 case 't':
4216 if (!packages_only && nocase_tail ("task"))
4217 Ada_getit (inf, "/k");
4218 else if (typedefs && !packages_only && nocase_tail ("type"))
4220 Ada_getit (inf, "/t");
4221 while (*dbp != '\0')
4222 dbp += 1;
4224 else
4225 break; /* from switch */
4226 continue; /* advance char */
4229 /* Look for the end of the token. */
4230 while (!endtoken (*dbp))
4231 dbp++;
4233 } /* advance char */
4234 } /* advance line */
4239 * Unix and microcontroller assembly tag handling
4240 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4241 * Idea by Bob Weiner, Motorola Inc. (1994)
4243 static void
4244 Asm_labels (FILE *inf)
4246 register char *cp;
4248 LOOP_ON_INPUT_LINES (inf, lb, cp)
4250 /* If first char is alphabetic or one of [_.$], test for colon
4251 following identifier. */
4252 if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4254 /* Read past label. */
4255 cp++;
4256 while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4257 cp++;
4258 if (*cp == ':' || c_isspace (*cp))
4259 /* Found end of label, so copy it and add it to the table. */
4260 make_tag (lb.buffer, cp - lb.buffer, true,
4261 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4268 * Perl support
4269 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4270 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4271 * Perl variable names: /^(my|local).../
4272 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4273 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4274 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4276 static void
4277 Perl_functions (FILE *inf)
4279 char *package = savestr ("main"); /* current package name */
4280 register char *cp;
4282 LOOP_ON_INPUT_LINES (inf, lb, cp)
4284 cp = skip_spaces (cp);
4286 if (LOOKING_AT (cp, "package"))
4288 free (package);
4289 get_tag (cp, &package);
4291 else if (LOOKING_AT (cp, "sub"))
4293 char *pos, *sp;
4295 subr:
4296 sp = cp;
4297 while (!notinname (*cp))
4298 cp++;
4299 if (cp == sp)
4300 continue; /* nothing found */
4301 if ((pos = strchr (sp, ':')) != NULL
4302 && pos < cp && pos[1] == ':')
4303 /* The name is already qualified. */
4304 make_tag (sp, cp - sp, true,
4305 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4306 else
4307 /* Qualify it. */
4309 char savechar, *name;
4311 savechar = *cp;
4312 *cp = '\0';
4313 name = concat (package, "::", sp);
4314 *cp = savechar;
4315 make_tag (name, strlen (name), true,
4316 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4317 free (name);
4320 else if (LOOKING_AT (cp, "use constant")
4321 || LOOKING_AT (cp, "use constant::defer"))
4323 /* For hash style multi-constant like
4324 use constant { FOO => 123,
4325 BAR => 456 };
4326 only the first FOO is picked up. Parsing across the value
4327 expressions would be difficult in general, due to possible nested
4328 hashes, here-documents, etc. */
4329 if (*cp == '{')
4330 cp = skip_spaces (cp+1);
4331 goto subr;
4333 else if (globals) /* only if we are tagging global vars */
4335 /* Skip a qualifier, if any. */
4336 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4337 /* After "my" or "local", but before any following paren or space. */
4338 char *varstart = cp;
4340 if (qual /* should this be removed? If yes, how? */
4341 && (*cp == '$' || *cp == '@' || *cp == '%'))
4343 varstart += 1;
4345 cp++;
4346 while (c_isalnum (*cp) || *cp == '_');
4348 else if (qual)
4350 /* Should be examining a variable list at this point;
4351 could insist on seeing an open parenthesis. */
4352 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4353 cp++;
4355 else
4356 continue;
4358 make_tag (varstart, cp - varstart, false,
4359 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4362 free (package);
4367 * Python support
4368 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4369 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4370 * More ideas by seb bacon <seb@jamkit.com> (2002)
4372 static void
4373 Python_functions (FILE *inf)
4375 register char *cp;
4377 LOOP_ON_INPUT_LINES (inf, lb, cp)
4379 cp = skip_spaces (cp);
4380 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4382 char *name = cp;
4383 while (!notinname (*cp) && *cp != ':')
4384 cp++;
4385 make_tag (name, cp - name, true,
4386 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4393 * PHP support
4394 * Look for:
4395 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4396 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4397 * - /^[ \t]*define\(\"[^\"]+/
4398 * Only with --members:
4399 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4400 * Idea by Diez B. Roggisch (2001)
4402 static void
4403 PHP_functions (FILE *inf)
4405 char *cp, *name;
4406 bool search_identifier = false;
4408 LOOP_ON_INPUT_LINES (inf, lb, cp)
4410 cp = skip_spaces (cp);
4411 name = cp;
4412 if (search_identifier
4413 && *cp != '\0')
4415 while (!notinname (*cp))
4416 cp++;
4417 make_tag (name, cp - name, true,
4418 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4419 search_identifier = false;
4421 else if (LOOKING_AT (cp, "function"))
4423 if (*cp == '&')
4424 cp = skip_spaces (cp+1);
4425 if (*cp != '\0')
4427 name = cp;
4428 while (!notinname (*cp))
4429 cp++;
4430 make_tag (name, cp - name, true,
4431 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4433 else
4434 search_identifier = true;
4436 else if (LOOKING_AT (cp, "class"))
4438 if (*cp != '\0')
4440 name = cp;
4441 while (*cp != '\0' && !c_isspace (*cp))
4442 cp++;
4443 make_tag (name, cp - name, false,
4444 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4446 else
4447 search_identifier = true;
4449 else if (strneq (cp, "define", 6)
4450 && (cp = skip_spaces (cp+6))
4451 && *cp++ == '('
4452 && (*cp == '"' || *cp == '\''))
4454 char quote = *cp++;
4455 name = cp;
4456 while (*cp != quote && *cp != '\0')
4457 cp++;
4458 make_tag (name, cp - name, false,
4459 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4461 else if (members
4462 && LOOKING_AT (cp, "var")
4463 && *cp == '$')
4465 name = cp;
4466 while (!notinname (*cp))
4467 cp++;
4468 make_tag (name, cp - name, false,
4469 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4476 * Cobol tag functions
4477 * We could look for anything that could be a paragraph name.
4478 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4479 * Idea by Corny de Souza (1993)
4481 static void
4482 Cobol_paragraphs (FILE *inf)
4484 register char *bp, *ep;
4486 LOOP_ON_INPUT_LINES (inf, lb, bp)
4488 if (lb.len < 9)
4489 continue;
4490 bp += 8;
4492 /* If eoln, compiler option or comment ignore whole line. */
4493 if (bp[-1] != ' ' || !c_isalnum (bp[0]))
4494 continue;
4496 for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
4497 continue;
4498 if (*ep++ == '.')
4499 make_tag (bp, ep - bp, true,
4500 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4506 * Makefile support
4507 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4509 static void
4510 Makefile_targets (FILE *inf)
4512 register char *bp;
4514 LOOP_ON_INPUT_LINES (inf, lb, bp)
4516 if (*bp == '\t' || *bp == '#')
4517 continue;
4518 while (*bp != '\0' && *bp != '=' && *bp != ':')
4519 bp++;
4520 if (*bp == ':' || (globals && *bp == '='))
4522 /* We should detect if there is more than one tag, but we do not.
4523 We just skip initial and final spaces. */
4524 char * namestart = skip_spaces (lb.buffer);
4525 while (--bp > namestart)
4526 if (!notinname (*bp))
4527 break;
4528 make_tag (namestart, bp - namestart + 1, true,
4529 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4536 * Pascal parsing
4537 * Original code by Mosur K. Mohan (1989)
4539 * Locates tags for procedures & functions. Doesn't do any type- or
4540 * var-definitions. It does look for the keyword "extern" or
4541 * "forward" immediately following the procedure statement; if found,
4542 * the tag is skipped.
4544 static void
4545 Pascal_functions (FILE *inf)
4547 linebuffer tline; /* mostly copied from C_entries */
4548 long save_lcno;
4549 int save_lineno, namelen, taglen;
4550 char c, *name;
4552 bool /* each of these flags is true if: */
4553 incomment, /* point is inside a comment */
4554 inquote, /* point is inside '..' string */
4555 get_tagname, /* point is after PROCEDURE/FUNCTION
4556 keyword, so next item = potential tag */
4557 found_tag, /* point is after a potential tag */
4558 inparms, /* point is within parameter-list */
4559 verify_tag; /* point has passed the parm-list, so the
4560 next token will determine whether this
4561 is a FORWARD/EXTERN to be ignored, or
4562 whether it is a real tag */
4564 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4565 name = NULL; /* keep compiler quiet */
4566 dbp = lb.buffer;
4567 *dbp = '\0';
4568 linebuffer_init (&tline);
4570 incomment = inquote = false;
4571 found_tag = false; /* have a proc name; check if extern */
4572 get_tagname = false; /* found "procedure" keyword */
4573 inparms = false; /* found '(' after "proc" */
4574 verify_tag = false; /* check if "extern" is ahead */
4577 while (!feof (inf)) /* long main loop to get next char */
4579 c = *dbp++;
4580 if (c == '\0') /* if end of line */
4582 readline (&lb, inf);
4583 dbp = lb.buffer;
4584 if (*dbp == '\0')
4585 continue;
4586 if (!((found_tag && verify_tag)
4587 || get_tagname))
4588 c = *dbp++; /* only if don't need *dbp pointing
4589 to the beginning of the name of
4590 the procedure or function */
4592 if (incomment)
4594 if (c == '}') /* within { } comments */
4595 incomment = false;
4596 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4598 dbp++;
4599 incomment = false;
4601 continue;
4603 else if (inquote)
4605 if (c == '\'')
4606 inquote = false;
4607 continue;
4609 else
4610 switch (c)
4612 case '\'':
4613 inquote = true; /* found first quote */
4614 continue;
4615 case '{': /* found open { comment */
4616 incomment = true;
4617 continue;
4618 case '(':
4619 if (*dbp == '*') /* found open (* comment */
4621 incomment = true;
4622 dbp++;
4624 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4625 inparms = true;
4626 continue;
4627 case ')': /* end of parms list */
4628 if (inparms)
4629 inparms = false;
4630 continue;
4631 case ';':
4632 if (found_tag && !inparms) /* end of proc or fn stmt */
4634 verify_tag = true;
4635 break;
4637 continue;
4639 if (found_tag && verify_tag && (*dbp != ' '))
4641 /* Check if this is an "extern" declaration. */
4642 if (*dbp == '\0')
4643 continue;
4644 if (c_tolower (*dbp) == 'e')
4646 if (nocase_tail ("extern")) /* superfluous, really! */
4648 found_tag = false;
4649 verify_tag = false;
4652 else if (c_tolower (*dbp) == 'f')
4654 if (nocase_tail ("forward")) /* check for forward reference */
4656 found_tag = false;
4657 verify_tag = false;
4660 if (found_tag && verify_tag) /* not external proc, so make tag */
4662 found_tag = false;
4663 verify_tag = false;
4664 make_tag (name, namelen, true,
4665 tline.buffer, taglen, save_lineno, save_lcno);
4666 continue;
4669 if (get_tagname) /* grab name of proc or fn */
4671 char *cp;
4673 if (*dbp == '\0')
4674 continue;
4676 /* Find block name. */
4677 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4678 continue;
4680 /* Save all values for later tagging. */
4681 linebuffer_setlen (&tline, lb.len);
4682 strcpy (tline.buffer, lb.buffer);
4683 save_lineno = lineno;
4684 save_lcno = linecharno;
4685 name = tline.buffer + (dbp - lb.buffer);
4686 namelen = cp - dbp;
4687 taglen = cp - lb.buffer + 1;
4689 dbp = cp; /* set dbp to e-o-token */
4690 get_tagname = false;
4691 found_tag = true;
4692 continue;
4694 /* And proceed to check for "extern". */
4696 else if (!incomment && !inquote && !found_tag)
4698 /* Check for proc/fn keywords. */
4699 switch (c_tolower (c))
4701 case 'p':
4702 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4703 get_tagname = true;
4704 continue;
4705 case 'f':
4706 if (nocase_tail ("unction"))
4707 get_tagname = true;
4708 continue;
4711 } /* while not eof */
4713 free (tline.buffer);
4718 * Lisp tag functions
4719 * look for (def or (DEF, quote or QUOTE
4722 static void L_getit (void);
4724 static void
4725 L_getit (void)
4727 if (*dbp == '\'') /* Skip prefix quote */
4728 dbp++;
4729 else if (*dbp == '(')
4731 dbp++;
4732 /* Try to skip "(quote " */
4733 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4734 /* Ok, then skip "(" before name in (defstruct (foo)) */
4735 dbp = skip_spaces (dbp);
4737 get_tag (dbp, NULL);
4740 static void
4741 Lisp_functions (FILE *inf)
4743 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4745 if (dbp[0] != '(')
4746 continue;
4748 /* "(defvar foo)" is a declaration rather than a definition. */
4749 if (! declarations)
4751 char *p = dbp + 1;
4752 if (LOOKING_AT (p, "defvar"))
4754 p = skip_name (p); /* past var name */
4755 p = skip_spaces (p);
4756 if (*p == ')')
4757 continue;
4761 if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
4762 dbp += 3;
4764 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4766 dbp = skip_non_spaces (dbp);
4767 dbp = skip_spaces (dbp);
4768 L_getit ();
4770 else
4772 /* Check for (foo::defmumble name-defined ... */
4774 dbp++;
4775 while (!notinname (*dbp) && *dbp != ':');
4776 if (*dbp == ':')
4779 dbp++;
4780 while (*dbp == ':');
4782 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4784 dbp = skip_non_spaces (dbp);
4785 dbp = skip_spaces (dbp);
4786 L_getit ();
4795 * Lua script language parsing
4796 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4798 * "function" and "local function" are tags if they start at column 1.
4800 static void
4801 Lua_functions (FILE *inf)
4803 register char *bp;
4805 LOOP_ON_INPUT_LINES (inf, lb, bp)
4807 if (bp[0] != 'f' && bp[0] != 'l')
4808 continue;
4810 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4812 if (LOOKING_AT (bp, "function"))
4813 get_tag (bp, NULL);
4819 * PostScript tags
4820 * Just look for lines where the first character is '/'
4821 * Also look at "defineps" for PSWrap
4822 * Ideas by:
4823 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4824 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4826 static void
4827 PS_functions (FILE *inf)
4829 register char *bp, *ep;
4831 LOOP_ON_INPUT_LINES (inf, lb, bp)
4833 if (bp[0] == '/')
4835 for (ep = bp+1;
4836 *ep != '\0' && *ep != ' ' && *ep != '{';
4837 ep++)
4838 continue;
4839 make_tag (bp, ep - bp, true,
4840 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4842 else if (LOOKING_AT (bp, "defineps"))
4843 get_tag (bp, NULL);
4849 * Forth tags
4850 * Ignore anything after \ followed by space or in ( )
4851 * Look for words defined by :
4852 * Look for constant, code, create, defer, value, and variable
4853 * OBP extensions: Look for buffer:, field,
4854 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4856 static void
4857 Forth_words (FILE *inf)
4859 register char *bp;
4861 LOOP_ON_INPUT_LINES (inf, lb, bp)
4862 while ((bp = skip_spaces (bp))[0] != '\0')
4863 if (bp[0] == '\\' && c_isspace (bp[1]))
4864 break; /* read next line */
4865 else if (bp[0] == '(' && c_isspace (bp[1]))
4866 do /* skip to ) or eol */
4867 bp++;
4868 while (*bp != ')' && *bp != '\0');
4869 else if ((bp[0] == ':' && c_isspace (bp[1]) && bp++)
4870 || LOOKING_AT_NOCASE (bp, "constant")
4871 || LOOKING_AT_NOCASE (bp, "code")
4872 || LOOKING_AT_NOCASE (bp, "create")
4873 || LOOKING_AT_NOCASE (bp, "defer")
4874 || LOOKING_AT_NOCASE (bp, "value")
4875 || LOOKING_AT_NOCASE (bp, "variable")
4876 || LOOKING_AT_NOCASE (bp, "buffer:")
4877 || LOOKING_AT_NOCASE (bp, "field"))
4878 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4879 else
4880 bp = skip_non_spaces (bp);
4885 * Scheme tag functions
4886 * look for (def... xyzzy
4887 * (def... (xyzzy
4888 * (def ... ((...(xyzzy ....
4889 * (set! xyzzy
4890 * Original code by Ken Haase (1985?)
4892 static void
4893 Scheme_functions (FILE *inf)
4895 register char *bp;
4897 LOOP_ON_INPUT_LINES (inf, lb, bp)
4899 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4901 bp = skip_non_spaces (bp+4);
4902 /* Skip over open parens and white space. Don't continue past
4903 '\0'. */
4904 while (*bp && notinname (*bp))
4905 bp++;
4906 get_tag (bp, NULL);
4908 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4909 get_tag (bp, NULL);
4914 /* Find tags in TeX and LaTeX input files. */
4916 /* TEX_toktab is a table of TeX control sequences that define tags.
4917 * Each entry records one such control sequence.
4919 * Original code from who knows whom.
4920 * Ideas by:
4921 * Stefan Monnier (2002)
4924 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4926 /* Default set of control sequences to put into TEX_toktab.
4927 The value of environment var TEXTAGS is prepended to this. */
4928 static const char *TEX_defenv = "\
4929 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4930 :part:appendix:entry:index:def\
4931 :newcommand:renewcommand:newenvironment:renewenvironment";
4933 static void TEX_mode (FILE *);
4934 static void TEX_decode_env (const char *, const char *);
4936 static char TEX_esc = '\\';
4937 static char TEX_opgrp = '{';
4938 static char TEX_clgrp = '}';
4941 * TeX/LaTeX scanning loop.
4943 static void
4944 TeX_commands (FILE *inf)
4946 char *cp;
4947 linebuffer *key;
4949 /* Select either \ or ! as escape character. */
4950 TEX_mode (inf);
4952 /* Initialize token table once from environment. */
4953 if (TEX_toktab == NULL)
4954 TEX_decode_env ("TEXTAGS", TEX_defenv);
4956 LOOP_ON_INPUT_LINES (inf, lb, cp)
4958 /* Look at each TEX keyword in line. */
4959 for (;;)
4961 /* Look for a TEX escape. */
4962 while (*cp++ != TEX_esc)
4963 if (cp[-1] == '\0' || cp[-1] == '%')
4964 goto tex_next_line;
4966 for (key = TEX_toktab; key->buffer != NULL; key++)
4967 if (strneq (cp, key->buffer, key->len))
4969 char *p;
4970 int namelen, linelen;
4971 bool opgrp = false;
4973 cp = skip_spaces (cp + key->len);
4974 if (*cp == TEX_opgrp)
4976 opgrp = true;
4977 cp++;
4979 for (p = cp;
4980 (!c_isspace (*p) && *p != '#' &&
4981 *p != TEX_opgrp && *p != TEX_clgrp);
4982 p++)
4983 continue;
4984 namelen = p - cp;
4985 linelen = lb.len;
4986 if (!opgrp || *p == TEX_clgrp)
4988 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4989 p++;
4990 linelen = p - lb.buffer + 1;
4992 make_tag (cp, namelen, true,
4993 lb.buffer, linelen, lineno, linecharno);
4994 goto tex_next_line; /* We only tag a line once */
4997 tex_next_line:
5002 #define TEX_LESC '\\'
5003 #define TEX_SESC '!'
5005 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5006 chars accordingly. */
5007 static void
5008 TEX_mode (FILE *inf)
5010 int c;
5012 while ((c = getc (inf)) != EOF)
5014 /* Skip to next line if we hit the TeX comment char. */
5015 if (c == '%')
5016 while (c != '\n' && c != EOF)
5017 c = getc (inf);
5018 else if (c == TEX_LESC || c == TEX_SESC )
5019 break;
5022 if (c == TEX_LESC)
5024 TEX_esc = TEX_LESC;
5025 TEX_opgrp = '{';
5026 TEX_clgrp = '}';
5028 else
5030 TEX_esc = TEX_SESC;
5031 TEX_opgrp = '<';
5032 TEX_clgrp = '>';
5034 rewind (inf);
5037 /* Read environment and prepend it to the default string.
5038 Build token table. */
5039 static void
5040 TEX_decode_env (const char *evarname, const char *defenv)
5042 register const char *env, *p;
5043 int i, len;
5045 /* Append default string to environment. */
5046 env = getenv (evarname);
5047 if (!env)
5048 env = defenv;
5049 else
5050 env = concat (env, defenv, "");
5052 /* Allocate a token table */
5053 for (len = 1, p = env; p;)
5054 if ((p = strchr (p, ':')) && *++p != '\0')
5055 len++;
5056 TEX_toktab = xnew (len, linebuffer);
5058 /* Unpack environment string into token table. Be careful about */
5059 /* zero-length strings (leading ':', "::" and trailing ':') */
5060 for (i = 0; *env != '\0';)
5062 p = strchr (env, ':');
5063 if (!p) /* End of environment string. */
5064 p = env + strlen (env);
5065 if (p - env > 0)
5066 { /* Only non-zero strings. */
5067 TEX_toktab[i].buffer = savenstr (env, p - env);
5068 TEX_toktab[i].len = p - env;
5069 i++;
5071 if (*p)
5072 env = p + 1;
5073 else
5075 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5076 TEX_toktab[i].len = 0;
5077 break;
5083 /* Texinfo support. Dave Love, Mar. 2000. */
5084 static void
5085 Texinfo_nodes (FILE *inf)
5087 char *cp, *start;
5088 LOOP_ON_INPUT_LINES (inf, lb, cp)
5089 if (LOOKING_AT (cp, "@node"))
5091 start = cp;
5092 while (*cp != '\0' && *cp != ',')
5093 cp++;
5094 make_tag (start, cp - start, true,
5095 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5101 * HTML support.
5102 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5103 * Contents of <a name=xxx> are tags with name xxx.
5105 * Francesco Potortì, 2002.
5107 static void
5108 HTML_labels (FILE *inf)
5110 bool getnext = false; /* next text outside of HTML tags is a tag */
5111 bool skiptag = false; /* skip to the end of the current HTML tag */
5112 bool intag = false; /* inside an html tag, looking for ID= */
5113 bool inanchor = false; /* when INTAG, is an anchor, look for NAME= */
5114 char *end;
5117 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5119 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5120 for (;;) /* loop on the same line */
5122 if (skiptag) /* skip HTML tag */
5124 while (*dbp != '\0' && *dbp != '>')
5125 dbp++;
5126 if (*dbp == '>')
5128 dbp += 1;
5129 skiptag = false;
5130 continue; /* look on the same line */
5132 break; /* go to next line */
5135 else if (intag) /* look for "name=" or "id=" */
5137 while (*dbp != '\0' && *dbp != '>'
5138 && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5139 dbp++;
5140 if (*dbp == '\0')
5141 break; /* go to next line */
5142 if (*dbp == '>')
5144 dbp += 1;
5145 intag = false;
5146 continue; /* look on the same line */
5148 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5149 || LOOKING_AT_NOCASE (dbp, "id="))
5151 bool quoted = (dbp[0] == '"');
5153 if (quoted)
5154 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5155 continue;
5156 else
5157 for (end = dbp; *end != '\0' && intoken (*end); end++)
5158 continue;
5159 linebuffer_setlen (&token_name, end - dbp);
5160 memcpy (token_name.buffer, dbp, end - dbp);
5161 token_name.buffer[end - dbp] = '\0';
5163 dbp = end;
5164 intag = false; /* we found what we looked for */
5165 skiptag = true; /* skip to the end of the tag */
5166 getnext = true; /* then grab the text */
5167 continue; /* look on the same line */
5169 dbp += 1;
5172 else if (getnext) /* grab next tokens and tag them */
5174 dbp = skip_spaces (dbp);
5175 if (*dbp == '\0')
5176 break; /* go to next line */
5177 if (*dbp == '<')
5179 intag = true;
5180 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5181 continue; /* look on the same line */
5184 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5185 continue;
5186 make_tag (token_name.buffer, token_name.len, true,
5187 dbp, end - dbp, lineno, linecharno);
5188 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5189 getnext = false;
5190 break; /* go to next line */
5193 else /* look for an interesting HTML tag */
5195 while (*dbp != '\0' && *dbp != '<')
5196 dbp++;
5197 if (*dbp == '\0')
5198 break; /* go to next line */
5199 intag = true;
5200 if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
5202 inanchor = true;
5203 continue; /* look on the same line */
5205 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5206 || LOOKING_AT_NOCASE (dbp, "<h1>")
5207 || LOOKING_AT_NOCASE (dbp, "<h2>")
5208 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5210 intag = false;
5211 getnext = true;
5212 continue; /* look on the same line */
5214 dbp += 1;
5221 * Prolog support
5223 * Assumes that the predicate or rule starts at column 0.
5224 * Only the first clause of a predicate or rule is added.
5225 * Original code by Sunichirou Sugou (1989)
5226 * Rewritten by Anders Lindgren (1996)
5228 static size_t prolog_pr (char *, char *);
5229 static void prolog_skip_comment (linebuffer *, FILE *);
5230 static size_t prolog_atom (char *, size_t);
5232 static void
5233 Prolog_functions (FILE *inf)
5235 char *cp, *last;
5236 size_t len;
5237 size_t allocated;
5239 allocated = 0;
5240 len = 0;
5241 last = NULL;
5243 LOOP_ON_INPUT_LINES (inf, lb, cp)
5245 if (cp[0] == '\0') /* Empty line */
5246 continue;
5247 else if (c_isspace (cp[0])) /* Not a predicate */
5248 continue;
5249 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5250 prolog_skip_comment (&lb, inf);
5251 else if ((len = prolog_pr (cp, last)) > 0)
5253 /* Predicate or rule. Store the function name so that we
5254 only generate a tag for the first clause. */
5255 if (last == NULL)
5256 last = xnew (len + 1, char);
5257 else if (len + 1 > allocated)
5258 xrnew (last, len + 1, char);
5259 allocated = len + 1;
5260 memcpy (last, cp, len);
5261 last[len] = '\0';
5264 free (last);
5268 static void
5269 prolog_skip_comment (linebuffer *plb, FILE *inf)
5271 char *cp;
5275 for (cp = plb->buffer; *cp != '\0'; cp++)
5276 if (cp[0] == '*' && cp[1] == '/')
5277 return;
5278 readline (plb, inf);
5280 while (!feof (inf));
5284 * A predicate or rule definition is added if it matches:
5285 * <beginning of line><Prolog Atom><whitespace>(
5286 * or <beginning of line><Prolog Atom><whitespace>:-
5288 * It is added to the tags database if it doesn't match the
5289 * name of the previous clause header.
5291 * Return the size of the name of the predicate or rule, or 0 if no
5292 * header was found.
5294 static size_t
5295 prolog_pr (char *s, char *last)
5297 /* Name of last clause. */
5299 size_t pos;
5300 size_t len;
5302 pos = prolog_atom (s, 0);
5303 if (! pos)
5304 return 0;
5306 len = pos;
5307 pos = skip_spaces (s + pos) - s;
5309 if ((s[pos] == '.'
5310 || (s[pos] == '(' && (pos += 1))
5311 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5312 && (last == NULL /* save only the first clause */
5313 || len != strlen (last)
5314 || !strneq (s, last, len)))
5316 make_tag (s, len, true, s, pos, lineno, linecharno);
5317 return len;
5319 else
5320 return 0;
5324 * Consume a Prolog atom.
5325 * Return the number of bytes consumed, or 0 if there was an error.
5327 * A prolog atom, in this context, could be one of:
5328 * - An alphanumeric sequence, starting with a lower case letter.
5329 * - A quoted arbitrary string. Single quotes can escape themselves.
5330 * Backslash quotes everything.
5332 static size_t
5333 prolog_atom (char *s, size_t pos)
5335 size_t origpos;
5337 origpos = pos;
5339 if (c_islower (s[pos]) || s[pos] == '_')
5341 /* The atom is unquoted. */
5342 pos++;
5343 while (c_isalnum (s[pos]) || s[pos] == '_')
5345 pos++;
5347 return pos - origpos;
5349 else if (s[pos] == '\'')
5351 pos++;
5353 for (;;)
5355 if (s[pos] == '\'')
5357 pos++;
5358 if (s[pos] != '\'')
5359 break;
5360 pos++; /* A double quote */
5362 else if (s[pos] == '\0')
5363 /* Multiline quoted atoms are ignored. */
5364 return 0;
5365 else if (s[pos] == '\\')
5367 if (s[pos+1] == '\0')
5368 return 0;
5369 pos += 2;
5371 else
5372 pos++;
5374 return pos - origpos;
5376 else
5377 return 0;
5382 * Support for Erlang
5384 * Generates tags for functions, defines, and records.
5385 * Assumes that Erlang functions start at column 0.
5386 * Original code by Anders Lindgren (1996)
5388 static int erlang_func (char *, char *);
5389 static void erlang_attribute (char *);
5390 static int erlang_atom (char *);
5392 static void
5393 Erlang_functions (FILE *inf)
5395 char *cp, *last;
5396 int len;
5397 int allocated;
5399 allocated = 0;
5400 len = 0;
5401 last = NULL;
5403 LOOP_ON_INPUT_LINES (inf, lb, cp)
5405 if (cp[0] == '\0') /* Empty line */
5406 continue;
5407 else if (c_isspace (cp[0])) /* Not function nor attribute */
5408 continue;
5409 else if (cp[0] == '%') /* comment */
5410 continue;
5411 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5412 continue;
5413 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5415 erlang_attribute (cp);
5416 if (last != NULL)
5418 free (last);
5419 last = NULL;
5422 else if ((len = erlang_func (cp, last)) > 0)
5425 * Function. Store the function name so that we only
5426 * generates a tag for the first clause.
5428 if (last == NULL)
5429 last = xnew (len + 1, char);
5430 else if (len + 1 > allocated)
5431 xrnew (last, len + 1, char);
5432 allocated = len + 1;
5433 memcpy (last, cp, len);
5434 last[len] = '\0';
5437 free (last);
5442 * A function definition is added if it matches:
5443 * <beginning of line><Erlang Atom><whitespace>(
5445 * It is added to the tags database if it doesn't match the
5446 * name of the previous clause header.
5448 * Return the size of the name of the function, or 0 if no function
5449 * was found.
5451 static int
5452 erlang_func (char *s, char *last)
5454 /* Name of last clause. */
5456 int pos;
5457 int len;
5459 pos = erlang_atom (s);
5460 if (pos < 1)
5461 return 0;
5463 len = pos;
5464 pos = skip_spaces (s + pos) - s;
5466 /* Save only the first clause. */
5467 if (s[pos++] == '('
5468 && (last == NULL
5469 || len != (int)strlen (last)
5470 || !strneq (s, last, len)))
5472 make_tag (s, len, true, s, pos, lineno, linecharno);
5473 return len;
5476 return 0;
5481 * Handle attributes. Currently, tags are generated for defines
5482 * and records.
5484 * They are on the form:
5485 * -define(foo, bar).
5486 * -define(Foo(M, N), M+N).
5487 * -record(graph, {vtab = notable, cyclic = true}).
5489 static void
5490 erlang_attribute (char *s)
5492 char *cp = s;
5494 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5495 && *cp++ == '(')
5497 int len = erlang_atom (skip_spaces (cp));
5498 if (len > 0)
5499 make_tag (cp, len, true, s, cp + len - s, lineno, linecharno);
5501 return;
5506 * Consume an Erlang atom (or variable).
5507 * Return the number of bytes consumed, or -1 if there was an error.
5509 static int
5510 erlang_atom (char *s)
5512 int pos = 0;
5514 if (c_isalpha (s[pos]) || s[pos] == '_')
5516 /* The atom is unquoted. */
5518 pos++;
5519 while (c_isalnum (s[pos]) || s[pos] == '_');
5521 else if (s[pos] == '\'')
5523 for (pos++; s[pos] != '\''; pos++)
5524 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5525 || (s[pos] == '\\' && s[++pos] == '\0'))
5526 return 0;
5527 pos++;
5530 return pos;
5534 static char *scan_separators (char *);
5535 static void add_regex (char *, language *);
5536 static char *substitute (char *, char *, struct re_registers *);
5539 * Take a string like "/blah/" and turn it into "blah", verifying
5540 * that the first and last characters are the same, and handling
5541 * quoted separator characters. Actually, stops on the occurrence of
5542 * an unquoted separator. Also process \t, \n, etc. and turn into
5543 * appropriate characters. Works in place. Null terminates name string.
5544 * Returns pointer to terminating separator, or NULL for
5545 * unterminated regexps.
5547 static char *
5548 scan_separators (char *name)
5550 char sep = name[0];
5551 char *copyto = name;
5552 bool quoted = false;
5554 for (++name; *name != '\0'; ++name)
5556 if (quoted)
5558 switch (*name)
5560 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5561 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5562 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5563 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5564 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5565 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5566 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5567 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5568 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5569 default:
5570 if (*name == sep)
5571 *copyto++ = sep;
5572 else
5574 /* Something else is quoted, so preserve the quote. */
5575 *copyto++ = '\\';
5576 *copyto++ = *name;
5578 break;
5580 quoted = false;
5582 else if (*name == '\\')
5583 quoted = true;
5584 else if (*name == sep)
5585 break;
5586 else
5587 *copyto++ = *name;
5589 if (*name != sep)
5590 name = NULL; /* signal unterminated regexp */
5592 /* Terminate copied string. */
5593 *copyto = '\0';
5594 return name;
5597 /* Look at the argument of --regex or --no-regex and do the right
5598 thing. Same for each line of a regexp file. */
5599 static void
5600 analyze_regex (char *regex_arg)
5602 if (regex_arg == NULL)
5604 free_regexps (); /* --no-regex: remove existing regexps */
5605 return;
5608 /* A real --regexp option or a line in a regexp file. */
5609 switch (regex_arg[0])
5611 /* Comments in regexp file or null arg to --regex. */
5612 case '\0':
5613 case ' ':
5614 case '\t':
5615 break;
5617 /* Read a regex file. This is recursive and may result in a
5618 loop, which will stop when the file descriptors are exhausted. */
5619 case '@':
5621 FILE *regexfp;
5622 linebuffer regexbuf;
5623 char *regexfile = regex_arg + 1;
5625 /* regexfile is a file containing regexps, one per line. */
5626 regexfp = fopen (regexfile, "r" FOPEN_BINARY);
5627 if (regexfp == NULL)
5628 pfatal (regexfile);
5629 linebuffer_init (&regexbuf);
5630 while (readline_internal (&regexbuf, regexfp) > 0)
5631 analyze_regex (regexbuf.buffer);
5632 free (regexbuf.buffer);
5633 fclose (regexfp);
5635 break;
5637 /* Regexp to be used for a specific language only. */
5638 case '{':
5640 language *lang;
5641 char *lang_name = regex_arg + 1;
5642 char *cp;
5644 for (cp = lang_name; *cp != '}'; cp++)
5645 if (*cp == '\0')
5647 error ("unterminated language name in regex: %s", regex_arg);
5648 return;
5650 *cp++ = '\0';
5651 lang = get_language_from_langname (lang_name);
5652 if (lang == NULL)
5653 return;
5654 add_regex (cp, lang);
5656 break;
5658 /* Regexp to be used for any language. */
5659 default:
5660 add_regex (regex_arg, NULL);
5661 break;
5665 /* Separate the regexp pattern, compile it,
5666 and care for optional name and modifiers. */
5667 static void
5668 add_regex (char *regexp_pattern, language *lang)
5670 static struct re_pattern_buffer zeropattern;
5671 char sep, *pat, *name, *modifiers;
5672 char empty = '\0';
5673 const char *err;
5674 struct re_pattern_buffer *patbuf;
5675 regexp *rp;
5676 bool
5677 force_explicit_name = true, /* do not use implicit tag names */
5678 ignore_case = false, /* case is significant */
5679 multi_line = false, /* matches are done one line at a time */
5680 single_line = false; /* dot does not match newline */
5683 if (strlen (regexp_pattern) < 3)
5685 error ("null regexp");
5686 return;
5688 sep = regexp_pattern[0];
5689 name = scan_separators (regexp_pattern);
5690 if (name == NULL)
5692 error ("%s: unterminated regexp", regexp_pattern);
5693 return;
5695 if (name[1] == sep)
5697 error ("null name for regexp \"%s\"", regexp_pattern);
5698 return;
5700 modifiers = scan_separators (name);
5701 if (modifiers == NULL) /* no terminating separator --> no name */
5703 modifiers = name;
5704 name = &empty;
5706 else
5707 modifiers += 1; /* skip separator */
5709 /* Parse regex modifiers. */
5710 for (; modifiers[0] != '\0'; modifiers++)
5711 switch (modifiers[0])
5713 case 'N':
5714 if (modifiers == name)
5715 error ("forcing explicit tag name but no name, ignoring");
5716 force_explicit_name = true;
5717 break;
5718 case 'i':
5719 ignore_case = true;
5720 break;
5721 case 's':
5722 single_line = true;
5723 /* FALLTHRU */
5724 case 'm':
5725 multi_line = true;
5726 need_filebuf = true;
5727 break;
5728 default:
5729 error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5730 break;
5733 patbuf = xnew (1, struct re_pattern_buffer);
5734 *patbuf = zeropattern;
5735 if (ignore_case)
5737 static char lc_trans[UCHAR_MAX + 1];
5738 int i;
5739 for (i = 0; i < UCHAR_MAX + 1; i++)
5740 lc_trans[i] = c_tolower (i);
5741 patbuf->translate = lc_trans; /* translation table to fold case */
5744 if (multi_line)
5745 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5746 else
5747 pat = regexp_pattern;
5749 if (single_line)
5750 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5751 else
5752 re_set_syntax (RE_SYNTAX_EMACS);
5754 err = re_compile_pattern (pat, strlen (pat), patbuf);
5755 if (multi_line)
5756 free (pat);
5757 if (err != NULL)
5759 error ("%s while compiling pattern", err);
5760 return;
5763 rp = p_head;
5764 p_head = xnew (1, regexp);
5765 p_head->pattern = savestr (regexp_pattern);
5766 p_head->p_next = rp;
5767 p_head->lang = lang;
5768 p_head->pat = patbuf;
5769 p_head->name = savestr (name);
5770 p_head->error_signaled = false;
5771 p_head->force_explicit_name = force_explicit_name;
5772 p_head->ignore_case = ignore_case;
5773 p_head->multi_line = multi_line;
5777 * Do the substitutions indicated by the regular expression and
5778 * arguments.
5780 static char *
5781 substitute (char *in, char *out, struct re_registers *regs)
5783 char *result, *t;
5784 int size, dig, diglen;
5786 result = NULL;
5787 size = strlen (out);
5789 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5790 if (out[size - 1] == '\\')
5791 fatal ("pattern error in \"%s\"", out);
5792 for (t = strchr (out, '\\');
5793 t != NULL;
5794 t = strchr (t + 2, '\\'))
5795 if (c_isdigit (t[1]))
5797 dig = t[1] - '0';
5798 diglen = regs->end[dig] - regs->start[dig];
5799 size += diglen - 2;
5801 else
5802 size -= 1;
5804 /* Allocate space and do the substitutions. */
5805 assert (size >= 0);
5806 result = xnew (size + 1, char);
5808 for (t = result; *out != '\0'; out++)
5809 if (*out == '\\' && c_isdigit (*++out))
5811 dig = *out - '0';
5812 diglen = regs->end[dig] - regs->start[dig];
5813 memcpy (t, in + regs->start[dig], diglen);
5814 t += diglen;
5816 else
5817 *t++ = *out;
5818 *t = '\0';
5820 assert (t <= result + size);
5821 assert (t - result == (int)strlen (result));
5823 return result;
5826 /* Deallocate all regexps. */
5827 static void
5828 free_regexps (void)
5830 regexp *rp;
5831 while (p_head != NULL)
5833 rp = p_head->p_next;
5834 free (p_head->pattern);
5835 free (p_head->name);
5836 free (p_head);
5837 p_head = rp;
5839 return;
5843 * Reads the whole file as a single string from `filebuf' and looks for
5844 * multi-line regular expressions, creating tags on matches.
5845 * readline already dealt with normal regexps.
5847 * Idea by Ben Wing <ben@666.com> (2002).
5849 static void
5850 regex_tag_multiline (void)
5852 char *buffer = filebuf.buffer;
5853 regexp *rp;
5854 char *name;
5856 for (rp = p_head; rp != NULL; rp = rp->p_next)
5858 int match = 0;
5860 if (!rp->multi_line)
5861 continue; /* skip normal regexps */
5863 /* Generic initializations before parsing file from memory. */
5864 lineno = 1; /* reset global line number */
5865 charno = 0; /* reset global char number */
5866 linecharno = 0; /* reset global char number of line start */
5868 /* Only use generic regexps or those for the current language. */
5869 if (rp->lang != NULL && rp->lang != curfdp->lang)
5870 continue;
5872 while (match >= 0 && match < filebuf.len)
5874 match = re_search (rp->pat, buffer, filebuf.len, charno,
5875 filebuf.len - match, &rp->regs);
5876 switch (match)
5878 case -2:
5879 /* Some error. */
5880 if (!rp->error_signaled)
5882 error ("regexp stack overflow while matching \"%s\"",
5883 rp->pattern);
5884 rp->error_signaled = true;
5886 break;
5887 case -1:
5888 /* No match. */
5889 break;
5890 default:
5891 if (match == rp->regs.end[0])
5893 if (!rp->error_signaled)
5895 error ("regexp matches the empty string: \"%s\"",
5896 rp->pattern);
5897 rp->error_signaled = true;
5899 match = -3; /* exit from while loop */
5900 break;
5903 /* Match occurred. Construct a tag. */
5904 while (charno < rp->regs.end[0])
5905 if (buffer[charno++] == '\n')
5906 lineno++, linecharno = charno;
5907 name = rp->name;
5908 if (name[0] == '\0')
5909 name = NULL;
5910 else /* make a named tag */
5911 name = substitute (buffer, rp->name, &rp->regs);
5912 if (rp->force_explicit_name)
5913 /* Force explicit tag name, if a name is there. */
5914 pfnote (name, true, buffer + linecharno,
5915 charno - linecharno + 1, lineno, linecharno);
5916 else
5917 make_tag (name, strlen (name), true, buffer + linecharno,
5918 charno - linecharno + 1, lineno, linecharno);
5919 break;
5926 static bool
5927 nocase_tail (const char *cp)
5929 int len = 0;
5931 while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
5932 cp++, len++;
5933 if (*cp == '\0' && !intoken (dbp[len]))
5935 dbp += len;
5936 return true;
5938 return false;
5941 static void
5942 get_tag (register char *bp, char **namepp)
5944 register char *cp = bp;
5946 if (*bp != '\0')
5948 /* Go till you get to white space or a syntactic break */
5949 for (cp = bp + 1; !notinname (*cp); cp++)
5950 continue;
5951 make_tag (bp, cp - bp, true,
5952 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5955 if (namepp != NULL)
5956 *namepp = savenstr (bp, cp - bp);
5960 * Read a line of text from `stream' into `lbp', excluding the
5961 * newline or CR-NL, if any. Return the number of characters read from
5962 * `stream', which is the length of the line including the newline.
5964 * On DOS or Windows we do not count the CR character, if any before the
5965 * NL, in the returned length; this mirrors the behavior of Emacs on those
5966 * platforms (for text files, it translates CR-NL to NL as it reads in the
5967 * file).
5969 * If multi-line regular expressions are requested, each line read is
5970 * appended to `filebuf'.
5972 static long
5973 readline_internal (linebuffer *lbp, register FILE *stream)
5975 char *buffer = lbp->buffer;
5976 register char *p = lbp->buffer;
5977 register char *pend;
5978 int chars_deleted;
5980 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5982 for (;;)
5984 register int c = getc (stream);
5985 if (p == pend)
5987 /* We're at the end of linebuffer: expand it. */
5988 lbp->size *= 2;
5989 xrnew (buffer, lbp->size, char);
5990 p += buffer - lbp->buffer;
5991 pend = buffer + lbp->size;
5992 lbp->buffer = buffer;
5994 if (c == EOF)
5996 *p = '\0';
5997 chars_deleted = 0;
5998 break;
6000 if (c == '\n')
6002 if (p > buffer && p[-1] == '\r')
6004 p -= 1;
6005 #ifdef DOS_NT
6006 /* Assume CRLF->LF translation will be performed by Emacs
6007 when loading this file, so CRs won't appear in the buffer.
6008 It would be cleaner to compensate within Emacs;
6009 however, Emacs does not know how many CRs were deleted
6010 before any given point in the file. */
6011 chars_deleted = 1;
6012 #else
6013 chars_deleted = 2;
6014 #endif
6016 else
6018 chars_deleted = 1;
6020 *p = '\0';
6021 break;
6023 *p++ = c;
6025 lbp->len = p - buffer;
6027 if (need_filebuf /* we need filebuf for multi-line regexps */
6028 && chars_deleted > 0) /* not at EOF */
6030 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6032 /* Expand filebuf. */
6033 filebuf.size *= 2;
6034 xrnew (filebuf.buffer, filebuf.size, char);
6036 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6037 filebuf.len += lbp->len;
6038 filebuf.buffer[filebuf.len++] = '\n';
6039 filebuf.buffer[filebuf.len] = '\0';
6042 return lbp->len + chars_deleted;
6046 * Like readline_internal, above, but in addition try to match the
6047 * input line against relevant regular expressions and manage #line
6048 * directives.
6050 static void
6051 readline (linebuffer *lbp, FILE *stream)
6053 long result;
6055 linecharno = charno; /* update global char number of line start */
6056 result = readline_internal (lbp, stream); /* read line */
6057 lineno += 1; /* increment global line number */
6058 charno += result; /* increment global char number */
6060 /* Honor #line directives. */
6061 if (!no_line_directive)
6063 static bool discard_until_line_directive;
6065 /* Check whether this is a #line directive. */
6066 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6068 unsigned int lno;
6069 int start = 0;
6071 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6072 && start > 0) /* double quote character found */
6074 char *endp = lbp->buffer + start;
6076 while ((endp = strchr (endp, '"')) != NULL
6077 && endp[-1] == '\\')
6078 endp++;
6079 if (endp != NULL)
6080 /* Ok, this is a real #line directive. Let's deal with it. */
6082 char *taggedabsname; /* absolute name of original file */
6083 char *taggedfname; /* name of original file as given */
6084 char *name; /* temp var */
6086 discard_until_line_directive = false; /* found it */
6087 name = lbp->buffer + start;
6088 *endp = '\0';
6089 canonicalize_filename (name);
6090 taggedabsname = absolute_filename (name, tagfiledir);
6091 if (filename_is_absolute (name)
6092 || filename_is_absolute (curfdp->infname))
6093 taggedfname = savestr (taggedabsname);
6094 else
6095 taggedfname = relative_filename (taggedabsname,tagfiledir);
6097 if (streq (curfdp->taggedfname, taggedfname))
6098 /* The #line directive is only a line number change. We
6099 deal with this afterwards. */
6100 free (taggedfname);
6101 else
6102 /* The tags following this #line directive should be
6103 attributed to taggedfname. In order to do this, set
6104 curfdp accordingly. */
6106 fdesc *fdp; /* file description pointer */
6108 /* Go look for a file description already set up for the
6109 file indicated in the #line directive. If there is
6110 one, use it from now until the next #line
6111 directive. */
6112 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6113 if (streq (fdp->infname, curfdp->infname)
6114 && streq (fdp->taggedfname, taggedfname))
6115 /* If we remove the second test above (after the &&)
6116 then all entries pertaining to the same file are
6117 coalesced in the tags file. If we use it, then
6118 entries pertaining to the same file but generated
6119 from different files (via #line directives) will
6120 go into separate sections in the tags file. These
6121 alternatives look equivalent. The first one
6122 destroys some apparently useless information. */
6124 curfdp = fdp;
6125 free (taggedfname);
6126 break;
6128 /* Else, if we already tagged the real file, skip all
6129 input lines until the next #line directive. */
6130 if (fdp == NULL) /* not found */
6131 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6132 if (streq (fdp->infabsname, taggedabsname))
6134 discard_until_line_directive = true;
6135 free (taggedfname);
6136 break;
6138 /* Else create a new file description and use that from
6139 now on, until the next #line directive. */
6140 if (fdp == NULL) /* not found */
6142 fdp = fdhead;
6143 fdhead = xnew (1, fdesc);
6144 *fdhead = *curfdp; /* copy curr. file description */
6145 fdhead->next = fdp;
6146 fdhead->infname = savestr (curfdp->infname);
6147 fdhead->infabsname = savestr (curfdp->infabsname);
6148 fdhead->infabsdir = savestr (curfdp->infabsdir);
6149 fdhead->taggedfname = taggedfname;
6150 fdhead->usecharno = false;
6151 fdhead->prop = NULL;
6152 fdhead->written = false;
6153 curfdp = fdhead;
6156 free (taggedabsname);
6157 lineno = lno - 1;
6158 readline (lbp, stream);
6159 return;
6160 } /* if a real #line directive */
6161 } /* if #line is followed by a number */
6162 } /* if line begins with "#line " */
6164 /* If we are here, no #line directive was found. */
6165 if (discard_until_line_directive)
6167 if (result > 0)
6169 /* Do a tail recursion on ourselves, thus discarding the contents
6170 of the line buffer. */
6171 readline (lbp, stream);
6172 return;
6174 /* End of file. */
6175 discard_until_line_directive = false;
6176 return;
6178 } /* if #line directives should be considered */
6181 int match;
6182 regexp *rp;
6183 char *name;
6185 /* Match against relevant regexps. */
6186 if (lbp->len > 0)
6187 for (rp = p_head; rp != NULL; rp = rp->p_next)
6189 /* Only use generic regexps or those for the current language.
6190 Also do not use multiline regexps, which is the job of
6191 regex_tag_multiline. */
6192 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6193 || rp->multi_line)
6194 continue;
6196 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6197 switch (match)
6199 case -2:
6200 /* Some error. */
6201 if (!rp->error_signaled)
6203 error ("regexp stack overflow while matching \"%s\"",
6204 rp->pattern);
6205 rp->error_signaled = true;
6207 break;
6208 case -1:
6209 /* No match. */
6210 break;
6211 case 0:
6212 /* Empty string matched. */
6213 if (!rp->error_signaled)
6215 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6216 rp->error_signaled = true;
6218 break;
6219 default:
6220 /* Match occurred. Construct a tag. */
6221 name = rp->name;
6222 if (name[0] == '\0')
6223 name = NULL;
6224 else /* make a named tag */
6225 name = substitute (lbp->buffer, rp->name, &rp->regs);
6226 if (rp->force_explicit_name)
6227 /* Force explicit tag name, if a name is there. */
6228 pfnote (name, true, lbp->buffer, match, lineno, linecharno);
6229 else
6230 make_tag (name, strlen (name), true,
6231 lbp->buffer, match, lineno, linecharno);
6232 break;
6240 * Return a pointer to a space of size strlen(cp)+1 allocated
6241 * with xnew where the string CP has been copied.
6243 static char *
6244 savestr (const char *cp)
6246 return savenstr (cp, strlen (cp));
6250 * Return a pointer to a space of size LEN+1 allocated with xnew where
6251 * the string CP has been copied for at most the first LEN characters.
6253 static char *
6254 savenstr (const char *cp, int len)
6256 char *dp = xnew (len + 1, char);
6257 dp[len] = '\0';
6258 return memcpy (dp, cp, len);
6261 /* Skip spaces (end of string is not space), return new pointer. */
6262 static char *
6263 skip_spaces (char *cp)
6265 while (c_isspace (*cp))
6266 cp++;
6267 return cp;
6270 /* Skip non spaces, except end of string, return new pointer. */
6271 static char *
6272 skip_non_spaces (char *cp)
6274 while (*cp != '\0' && !c_isspace (*cp))
6275 cp++;
6276 return cp;
6279 /* Skip any chars in the "name" class.*/
6280 static char *
6281 skip_name (char *cp)
6283 /* '\0' is a notinname() so loop stops there too */
6284 while (! notinname (*cp))
6285 cp++;
6286 return cp;
6289 /* Print error message and exit. */
6290 void
6291 fatal (const char *s1, const char *s2)
6293 error (s1, s2);
6294 exit (EXIT_FAILURE);
6297 static void
6298 pfatal (const char *s1)
6300 perror (s1);
6301 exit (EXIT_FAILURE);
6304 static void
6305 suggest_asking_for_help (void)
6307 fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
6308 progname);
6309 exit (EXIT_FAILURE);
6312 /* Output a diagnostic with printf-style FORMAT and args. */
6313 static void
6314 error (const char *format, ...)
6316 va_list ap;
6317 va_start (ap, format);
6318 fprintf (stderr, "%s: ", progname);
6319 vfprintf (stderr, format, ap);
6320 fprintf (stderr, "\n");
6321 va_end (ap);
6324 /* Return a newly-allocated string whose contents
6325 concatenate those of s1, s2, s3. */
6326 static char *
6327 concat (const char *s1, const char *s2, const char *s3)
6329 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6330 char *result = xnew (len1 + len2 + len3 + 1, char);
6332 strcpy (result, s1);
6333 strcpy (result + len1, s2);
6334 strcpy (result + len1 + len2, s3);
6336 return result;
6340 /* Does the same work as the system V getcwd, but does not need to
6341 guess the buffer size in advance. */
6342 static char *
6343 etags_getcwd (void)
6345 int bufsize = 200;
6346 char *path = xnew (bufsize, char);
6348 while (getcwd (path, bufsize) == NULL)
6350 if (errno != ERANGE)
6351 pfatal ("getcwd");
6352 bufsize *= 2;
6353 free (path);
6354 path = xnew (bufsize, char);
6357 canonicalize_filename (path);
6358 return path;
6361 /* Return a newly allocated string containing a name of a temporary file. */
6362 static char *
6363 etags_mktmp (void)
6365 const char *tmpdir = getenv ("TMPDIR");
6366 const char *slash = "/";
6368 #if MSDOS || defined (DOS_NT)
6369 if (!tmpdir)
6370 tmpdir = getenv ("TEMP");
6371 if (!tmpdir)
6372 tmpdir = getenv ("TMP");
6373 if (!tmpdir)
6374 tmpdir = ".";
6375 if (tmpdir[strlen (tmpdir) - 1] == '/'
6376 || tmpdir[strlen (tmpdir) - 1] == '\\')
6377 slash = "";
6378 #else
6379 if (!tmpdir)
6380 tmpdir = "/tmp";
6381 if (tmpdir[strlen (tmpdir) - 1] == '/')
6382 slash = "";
6383 #endif
6385 char *templt = concat (tmpdir, slash, "etXXXXXX");
6386 int fd = mkostemp (templt, O_CLOEXEC);
6387 if (fd < 0)
6389 free (templt);
6390 templt = NULL;
6392 else
6393 close (fd);
6395 #if defined (DOS_NT)
6396 /* The file name will be used in shell redirection, so it needs to have
6397 DOS-style backslashes, or else the Windows shell will barf. */
6398 char *p;
6399 for (p = templt; *p; p++)
6400 if (*p == '/')
6401 *p = '\\';
6402 #endif
6403 return templt;
6406 /* Return a newly allocated string containing the file name of FILE
6407 relative to the absolute directory DIR (which should end with a slash). */
6408 static char *
6409 relative_filename (char *file, char *dir)
6411 char *fp, *dp, *afn, *res;
6412 int i;
6414 /* Find the common root of file and dir (with a trailing slash). */
6415 afn = absolute_filename (file, cwd);
6416 fp = afn;
6417 dp = dir;
6418 while (*fp++ == *dp++)
6419 continue;
6420 fp--, dp--; /* back to the first differing char */
6421 #ifdef DOS_NT
6422 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6423 return afn;
6424 #endif
6425 do /* look at the equal chars until '/' */
6426 fp--, dp--;
6427 while (*fp != '/');
6429 /* Build a sequence of "../" strings for the resulting relative file name. */
6430 i = 0;
6431 while ((dp = strchr (dp + 1, '/')) != NULL)
6432 i += 1;
6433 res = xnew (3*i + strlen (fp + 1) + 1, char);
6434 char *z = res;
6435 while (i-- > 0)
6436 z = stpcpy (z, "../");
6438 /* Add the file name relative to the common root of file and dir. */
6439 strcpy (z, fp + 1);
6440 free (afn);
6442 return res;
6445 /* Return a newly allocated string containing the absolute file name
6446 of FILE given DIR (which should end with a slash). */
6447 static char *
6448 absolute_filename (char *file, char *dir)
6450 char *slashp, *cp, *res;
6452 if (filename_is_absolute (file))
6453 res = savestr (file);
6454 #ifdef DOS_NT
6455 /* We don't support non-absolute file names with a drive
6456 letter, like `d:NAME' (it's too much hassle). */
6457 else if (file[1] == ':')
6458 fatal ("%s: relative file names with drive letters not supported", file);
6459 #endif
6460 else
6461 res = concat (dir, file, "");
6463 /* Delete the "/dirname/.." and "/." substrings. */
6464 slashp = strchr (res, '/');
6465 while (slashp != NULL && slashp[0] != '\0')
6467 if (slashp[1] == '.')
6469 if (slashp[2] == '.'
6470 && (slashp[3] == '/' || slashp[3] == '\0'))
6472 cp = slashp;
6474 cp--;
6475 while (cp >= res && !filename_is_absolute (cp));
6476 if (cp < res)
6477 cp = slashp; /* the absolute name begins with "/.." */
6478 #ifdef DOS_NT
6479 /* Under MSDOS and NT we get `d:/NAME' as absolute
6480 file name, so the luser could say `d:/../NAME'.
6481 We silently treat this as `d:/NAME'. */
6482 else if (cp[0] != '/')
6483 cp = slashp;
6484 #endif
6485 memmove (cp, slashp + 3, strlen (slashp + 2));
6486 slashp = cp;
6487 continue;
6489 else if (slashp[2] == '/' || slashp[2] == '\0')
6491 memmove (slashp, slashp + 2, strlen (slashp + 1));
6492 continue;
6496 slashp = strchr (slashp + 1, '/');
6499 if (res[0] == '\0') /* just a safety net: should never happen */
6501 free (res);
6502 return savestr ("/");
6504 else
6505 return res;
6508 /* Return a newly allocated string containing the absolute
6509 file name of dir where FILE resides given DIR (which should
6510 end with a slash). */
6511 static char *
6512 absolute_dirname (char *file, char *dir)
6514 char *slashp, *res;
6515 char save;
6517 slashp = strrchr (file, '/');
6518 if (slashp == NULL)
6519 return savestr (dir);
6520 save = slashp[1];
6521 slashp[1] = '\0';
6522 res = absolute_filename (file, dir);
6523 slashp[1] = save;
6525 return res;
6528 /* Whether the argument string is an absolute file name. The argument
6529 string must have been canonicalized with canonicalize_filename. */
6530 static bool
6531 filename_is_absolute (char *fn)
6533 return (fn[0] == '/'
6534 #ifdef DOS_NT
6535 || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
6536 #endif
6540 /* Downcase DOS drive letter and collapse separators into single slashes.
6541 Works in place. */
6542 static void
6543 canonicalize_filename (register char *fn)
6545 register char* cp;
6547 #ifdef DOS_NT
6548 /* Canonicalize drive letter case. */
6549 if (c_isupper (fn[0]) && fn[1] == ':')
6550 fn[0] = c_tolower (fn[0]);
6552 /* Collapse multiple forward- and back-slashes into a single forward
6553 slash. */
6554 for (cp = fn; *cp != '\0'; cp++, fn++)
6555 if (*cp == '/' || *cp == '\\')
6557 *fn = '/';
6558 while (cp[1] == '/' || cp[1] == '\\')
6559 cp++;
6561 else
6562 *fn = *cp;
6564 #else /* !DOS_NT */
6566 /* Collapse multiple slashes into a single slash. */
6567 for (cp = fn; *cp != '\0'; cp++, fn++)
6568 if (*cp == '/')
6570 *fn = '/';
6571 while (cp[1] == '/')
6572 cp++;
6574 else
6575 *fn = *cp;
6577 #endif /* !DOS_NT */
6579 *fn = '\0';
6583 /* Initialize a linebuffer for use. */
6584 static void
6585 linebuffer_init (linebuffer *lbp)
6587 lbp->size = (DEBUG) ? 3 : 200;
6588 lbp->buffer = xnew (lbp->size, char);
6589 lbp->buffer[0] = '\0';
6590 lbp->len = 0;
6593 /* Set the minimum size of a string contained in a linebuffer. */
6594 static void
6595 linebuffer_setlen (linebuffer *lbp, int toksize)
6597 while (lbp->size <= toksize)
6599 lbp->size *= 2;
6600 xrnew (lbp->buffer, lbp->size, char);
6602 lbp->len = toksize;
6605 /* Like malloc but get fatal error if memory is exhausted. */
6606 static void *
6607 xmalloc (size_t size)
6609 void *result = malloc (size);
6610 if (result == NULL)
6611 fatal ("virtual memory exhausted", (char *)NULL);
6612 return result;
6615 static void *
6616 xrealloc (void *ptr, size_t size)
6618 void *result = realloc (ptr, size);
6619 if (result == NULL)
6620 fatal ("virtual memory exhausted", (char *)NULL);
6621 return result;
6625 * Local Variables:
6626 * indent-tabs-mode: t
6627 * tab-width: 8
6628 * fill-column: 79
6629 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6630 * c-file-style: "gnu"
6631 * End:
6634 /* etags.c ends here */