* lisp/isearch.el (isearch-quote-char): Comment out converting unibyte
[emacs.git] / lib-src / etags.c
blobaa8c773e357ec26073e9d0c5112f62dec996e6bc
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2013 Free Software
32 Foundation, Inc.
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
72 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
83 #define TRUE 1
84 #define FALSE 0
86 #ifdef DEBUG
87 # undef DEBUG
88 # define DEBUG TRUE
89 #else
90 # define DEBUG FALSE
91 # define NDEBUG /* disable assert */
92 #endif
94 #include <config.h>
96 #ifndef _GNU_SOURCE
97 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
98 #endif
100 /* WIN32_NATIVE is for XEmacs.
101 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
102 #ifdef WIN32_NATIVE
103 # undef MSDOS
104 # undef WINDOWSNT
105 # define WINDOWSNT
106 #endif /* WIN32_NATIVE */
108 #ifdef MSDOS
109 # undef MSDOS
110 # define MSDOS TRUE
111 # include <fcntl.h>
112 # include <sys/param.h>
113 # include <io.h>
114 #else
115 # define MSDOS FALSE
116 #endif /* MSDOS */
118 #ifdef WINDOWSNT
119 # include <fcntl.h>
120 # include <direct.h>
121 # include <io.h>
122 # define MAXPATHLEN _MAX_PATH
123 # undef HAVE_NTGUI
124 # undef DOS_NT
125 # define DOS_NT
126 #endif /* WINDOWSNT */
128 #include <unistd.h>
129 #include <stdarg.h>
130 #include <stdlib.h>
131 #include <string.h>
132 #include <stdio.h>
133 #include <ctype.h>
134 #include <errno.h>
135 #include <sys/types.h>
136 #include <sys/stat.h>
137 #include <c-strcase.h>
139 #include <assert.h>
140 #ifdef NDEBUG
141 # undef assert /* some systems have a buggy assert.h */
142 # define assert(x) ((void) 0)
143 #endif
145 #include <getopt.h>
146 #include <regex.h>
148 /* Define CTAGS to make the program "ctags" compatible with the usual one.
149 Leave it undefined to make the program "etags", which makes emacs-style
150 tag tables and tags typedefs, #defines and struct/union/enum by default. */
151 #ifdef CTAGS
152 # undef CTAGS
153 # define CTAGS TRUE
154 #else
155 # define CTAGS FALSE
156 #endif
158 #define streq(s,t) (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
159 #define strcaseeq(s,t) (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
160 #define strneq(s,t,n) (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
161 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
163 #define CHARS 256 /* 2^sizeof(char) */
164 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
165 #define iswhite(c) (_wht[CHAR (c)]) /* c is white (see white) */
166 #define notinname(c) (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
167 #define begtoken(c) (_btk[CHAR (c)]) /* c can start token (see begtk) */
168 #define intoken(c) (_itk[CHAR (c)]) /* c can be in token (see midtk) */
169 #define endtoken(c) (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
171 #define ISALNUM(c) isalnum (CHAR (c))
172 #define ISALPHA(c) isalpha (CHAR (c))
173 #define ISDIGIT(c) isdigit (CHAR (c))
174 #define ISLOWER(c) islower (CHAR (c))
176 #define lowcase(c) tolower (CHAR (c))
180 * xnew, xrnew -- allocate, reallocate storage
182 * SYNOPSIS: Type *xnew (int n, Type);
183 * void xrnew (OldPointer, int n, Type);
185 #if DEBUG
186 # include "chkmalloc.h"
187 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
188 (n) * sizeof (Type)))
189 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
190 (char *) (op), (n) * sizeof (Type)))
191 #else
192 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
193 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
194 (char *) (op), (n) * sizeof (Type)))
195 #endif
197 #define bool int
199 typedef void Lang_function (FILE *);
201 typedef struct
203 const char *suffix; /* file name suffix for this compressor */
204 const char *command; /* takes one arg and decompresses to stdout */
205 } compressor;
207 typedef struct
209 const char *name; /* language name */
210 const char *help; /* detailed help for the language */
211 Lang_function *function; /* parse function */
212 const char **suffixes; /* name suffixes of this language's files */
213 const char **filenames; /* names of this language's files */
214 const char **interpreters; /* interpreters for this language */
215 bool metasource; /* source used to generate other sources */
216 } language;
218 typedef struct fdesc
220 struct fdesc *next; /* for the linked list */
221 char *infname; /* uncompressed input file name */
222 char *infabsname; /* absolute uncompressed input file name */
223 char *infabsdir; /* absolute dir of input file */
224 char *taggedfname; /* file name to write in tagfile */
225 language *lang; /* language of file */
226 char *prop; /* file properties to write in tagfile */
227 bool usecharno; /* etags tags shall contain char number */
228 bool written; /* entry written in the tags file */
229 } fdesc;
231 typedef struct node_st
232 { /* sorting structure */
233 struct node_st *left, *right; /* left and right sons */
234 fdesc *fdp; /* description of file to whom tag belongs */
235 char *name; /* tag name */
236 char *regex; /* search regexp */
237 bool valid; /* write this tag on the tag file */
238 bool is_func; /* function tag: use regexp in CTAGS mode */
239 bool been_warned; /* warning already given for duplicated tag */
240 int lno; /* line number tag is on */
241 long cno; /* character number line starts on */
242 } node;
245 * A `linebuffer' is a structure which holds a line of text.
246 * `readline_internal' reads a line from a stream into a linebuffer
247 * and works regardless of the length of the line.
248 * SIZE is the size of BUFFER, LEN is the length of the string in
249 * BUFFER after readline reads it.
251 typedef struct
253 long size;
254 int len;
255 char *buffer;
256 } linebuffer;
258 /* Used to support mixing of --lang and file names. */
259 typedef struct
261 enum {
262 at_language, /* a language specification */
263 at_regexp, /* a regular expression */
264 at_filename, /* a file name */
265 at_stdin, /* read from stdin here */
266 at_end /* stop parsing the list */
267 } arg_type; /* argument type */
268 language *lang; /* language associated with the argument */
269 char *what; /* the argument itself */
270 } argument;
272 /* Structure defining a regular expression. */
273 typedef struct regexp
275 struct regexp *p_next; /* pointer to next in list */
276 language *lang; /* if set, use only for this language */
277 char *pattern; /* the regexp pattern */
278 char *name; /* tag name */
279 struct re_pattern_buffer *pat; /* the compiled pattern */
280 struct re_registers regs; /* re registers */
281 bool error_signaled; /* already signaled for this regexp */
282 bool force_explicit_name; /* do not allow implicit tag name */
283 bool ignore_case; /* ignore case when matching */
284 bool multi_line; /* do a multi-line match on the whole file */
285 } regexp;
288 /* Many compilers barf on this:
289 Lang_function Ada_funcs;
290 so let's write it this way */
291 static void Ada_funcs (FILE *);
292 static void Asm_labels (FILE *);
293 static void C_entries (int c_ext, FILE *);
294 static void default_C_entries (FILE *);
295 static void plain_C_entries (FILE *);
296 static void Cjava_entries (FILE *);
297 static void Cobol_paragraphs (FILE *);
298 static void Cplusplus_entries (FILE *);
299 static void Cstar_entries (FILE *);
300 static void Erlang_functions (FILE *);
301 static void Forth_words (FILE *);
302 static void Fortran_functions (FILE *);
303 static void HTML_labels (FILE *);
304 static void Lisp_functions (FILE *);
305 static void Lua_functions (FILE *);
306 static void Makefile_targets (FILE *);
307 static void Pascal_functions (FILE *);
308 static void Perl_functions (FILE *);
309 static void PHP_functions (FILE *);
310 static void PS_functions (FILE *);
311 static void Prolog_functions (FILE *);
312 static void Python_functions (FILE *);
313 static void Scheme_functions (FILE *);
314 static void TeX_commands (FILE *);
315 static void Texinfo_nodes (FILE *);
316 static void Yacc_entries (FILE *);
317 static void just_read_file (FILE *);
319 static language *get_language_from_langname (const char *);
320 static void readline (linebuffer *, FILE *);
321 static long readline_internal (linebuffer *, FILE *);
322 static bool nocase_tail (const char *);
323 static void get_tag (char *, char **);
325 static void analyse_regex (char *);
326 static void free_regexps (void);
327 static void regex_tag_multiline (void);
328 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
329 static _Noreturn void suggest_asking_for_help (void);
330 _Noreturn void fatal (const char *, const char *);
331 static _Noreturn void pfatal (const char *);
332 static void add_node (node *, node **);
334 static void init (void);
335 static void process_file_name (char *, language *);
336 static void process_file (FILE *, char *, language *);
337 static void find_entries (FILE *);
338 static void free_tree (node *);
339 static void free_fdesc (fdesc *);
340 static void pfnote (char *, bool, char *, int, int, long);
341 static void invalidate_nodes (fdesc *, node **);
342 static void put_entries (node *);
344 static char *concat (const char *, const char *, const char *);
345 static char *skip_spaces (char *);
346 static char *skip_non_spaces (char *);
347 static char *skip_name (char *);
348 static char *savenstr (const char *, int);
349 static char *savestr (const char *);
350 static char *etags_strchr (const char *, int);
351 static char *etags_strrchr (const char *, int);
352 static char *etags_getcwd (void);
353 static char *relative_filename (char *, char *);
354 static char *absolute_filename (char *, char *);
355 static char *absolute_dirname (char *, char *);
356 static bool filename_is_absolute (char *f);
357 static void canonicalize_filename (char *);
358 static void linebuffer_init (linebuffer *);
359 static void linebuffer_setlen (linebuffer *, int);
360 static void *xmalloc (size_t);
361 static void *xrealloc (char *, size_t);
364 static char searchar = '/'; /* use /.../ searches */
366 static char *tagfile; /* output file */
367 static char *progname; /* name this program was invoked with */
368 static char *cwd; /* current working directory */
369 static char *tagfiledir; /* directory of tagfile */
370 static FILE *tagf; /* ioptr for tags file */
371 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
373 static fdesc *fdhead; /* head of file description list */
374 static fdesc *curfdp; /* current file description */
375 static int lineno; /* line number of current line */
376 static long charno; /* current character number */
377 static long linecharno; /* charno of start of current line */
378 static char *dbp; /* pointer to start of current tag */
380 static const int invalidcharno = -1;
382 static node *nodehead; /* the head of the binary tree of tags */
383 static node *last_node; /* the last node created */
385 static linebuffer lb; /* the current line */
386 static linebuffer filebuf; /* a buffer containing the whole file */
387 static linebuffer token_name; /* a buffer containing a tag name */
389 /* boolean "functions" (see init) */
390 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
391 static const char
392 /* white chars */
393 *white = " \f\t\n\r\v",
394 /* not in a name */
395 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
396 /* token ending chars */
397 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
398 /* token starting chars */
399 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
400 /* valid in-token chars */
401 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
403 static bool append_to_tagfile; /* -a: append to tags */
404 /* The next five default to TRUE in C and derived languages. */
405 static bool typedefs; /* -t: create tags for C and Ada typedefs */
406 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
407 /* 0 struct/enum/union decls, and C++ */
408 /* member functions. */
409 static bool constantypedefs; /* -d: create tags for C #define, enum */
410 /* constants and variables. */
411 /* -D: opposite of -d. Default under ctags. */
412 static bool globals; /* create tags for global variables */
413 static bool members; /* create tags for C member variables */
414 static bool declarations; /* --declarations: tag them and extern in C&Co*/
415 static bool no_line_directive; /* ignore #line directives (undocumented) */
416 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
417 static bool update; /* -u: update tags */
418 static bool vgrind_style; /* -v: create vgrind style index output */
419 static bool no_warnings; /* -w: suppress warnings (undocumented) */
420 static bool cxref_style; /* -x: create cxref style output */
421 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
422 static bool ignoreindent; /* -I: ignore indentation in C */
423 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
425 /* STDIN is defined in LynxOS system headers */
426 #ifdef STDIN
427 # undef STDIN
428 #endif
430 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
431 static bool parsing_stdin; /* --parse-stdin used */
433 static regexp *p_head; /* list of all regexps */
434 static bool need_filebuf; /* some regexes are multi-line */
436 static struct option longopts[] =
438 { "append", no_argument, NULL, 'a' },
439 { "packages-only", no_argument, &packages_only, TRUE },
440 { "c++", no_argument, NULL, 'C' },
441 { "declarations", no_argument, &declarations, TRUE },
442 { "no-line-directive", no_argument, &no_line_directive, TRUE },
443 { "no-duplicates", no_argument, &no_duplicates, TRUE },
444 { "help", no_argument, NULL, 'h' },
445 { "help", no_argument, NULL, 'H' },
446 { "ignore-indentation", no_argument, NULL, 'I' },
447 { "language", required_argument, NULL, 'l' },
448 { "members", no_argument, &members, TRUE },
449 { "no-members", no_argument, &members, FALSE },
450 { "output", required_argument, NULL, 'o' },
451 { "regex", required_argument, NULL, 'r' },
452 { "no-regex", no_argument, NULL, 'R' },
453 { "ignore-case-regex", required_argument, NULL, 'c' },
454 { "parse-stdin", required_argument, NULL, STDIN },
455 { "version", no_argument, NULL, 'V' },
457 #if CTAGS /* Ctags options */
458 { "backward-search", no_argument, NULL, 'B' },
459 { "cxref", no_argument, NULL, 'x' },
460 { "defines", no_argument, NULL, 'd' },
461 { "globals", no_argument, &globals, TRUE },
462 { "typedefs", no_argument, NULL, 't' },
463 { "typedefs-and-c++", no_argument, NULL, 'T' },
464 { "update", no_argument, NULL, 'u' },
465 { "vgrind", no_argument, NULL, 'v' },
466 { "no-warn", no_argument, NULL, 'w' },
468 #else /* Etags options */
469 { "no-defines", no_argument, NULL, 'D' },
470 { "no-globals", no_argument, &globals, FALSE },
471 { "include", required_argument, NULL, 'i' },
472 #endif
473 { NULL }
476 static compressor compressors[] =
478 { "z", "gzip -d -c"},
479 { "Z", "gzip -d -c"},
480 { "gz", "gzip -d -c"},
481 { "GZ", "gzip -d -c"},
482 { "bz2", "bzip2 -d -c" },
483 { "xz", "xz -d -c" },
484 { NULL }
488 * Language stuff.
491 /* Ada code */
492 static const char *Ada_suffixes [] =
493 { "ads", "adb", "ada", NULL };
494 static const char Ada_help [] =
495 "In Ada code, functions, procedures, packages, tasks and types are\n\
496 tags. Use the `--packages-only' option to create tags for\n\
497 packages only.\n\
498 Ada tag names have suffixes indicating the type of entity:\n\
499 Entity type: Qualifier:\n\
500 ------------ ----------\n\
501 function /f\n\
502 procedure /p\n\
503 package spec /s\n\
504 package body /b\n\
505 type /t\n\
506 task /k\n\
507 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
508 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
509 will just search for any tag `bidule'.";
511 /* Assembly code */
512 static const char *Asm_suffixes [] =
513 { "a", /* Unix assembler */
514 "asm", /* Microcontroller assembly */
515 "def", /* BSO/Tasking definition includes */
516 "inc", /* Microcontroller include files */
517 "ins", /* Microcontroller include files */
518 "s", "sa", /* Unix assembler */
519 "S", /* cpp-processed Unix assembler */
520 "src", /* BSO/Tasking C compiler output */
521 NULL
523 static const char Asm_help [] =
524 "In assembler code, labels appearing at the beginning of a line,\n\
525 followed by a colon, are tags.";
528 /* Note that .c and .h can be considered C++, if the --c++ flag was
529 given, or if the `class' or `template' keywords are met inside the file.
530 That is why default_C_entries is called for these. */
531 static const char *default_C_suffixes [] =
532 { "c", "h", NULL };
533 #if CTAGS /* C help for Ctags */
534 static const char default_C_help [] =
535 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
536 Use -T to tag definitions of `struct', `union' and `enum'.\n\
537 Use -d to tag `#define' macro definitions and `enum' constants.\n\
538 Use --globals to tag global variables.\n\
539 You can tag function declarations and external variables by\n\
540 using `--declarations', and struct members by using `--members'.";
541 #else /* C help for Etags */
542 static const char default_C_help [] =
543 "In C code, any C function or typedef is a tag, and so are\n\
544 definitions of `struct', `union' and `enum'. `#define' macro\n\
545 definitions and `enum' constants are tags unless you specify\n\
546 `--no-defines'. Global variables are tags unless you specify\n\
547 `--no-globals' and so are struct members unless you specify\n\
548 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
549 `--no-members' can make the tags table file much smaller.\n\
550 You can tag function declarations and external variables by\n\
551 using `--declarations'.";
552 #endif /* C help for Ctags and Etags */
554 static const char *Cplusplus_suffixes [] =
555 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
556 "M", /* Objective C++ */
557 "pdb", /* PostScript with C syntax */
558 NULL };
559 static const char Cplusplus_help [] =
560 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
561 --help --lang=c --lang=c++ for full help.)\n\
562 In addition to C tags, member functions are also recognized. Member\n\
563 variables are recognized unless you use the `--no-members' option.\n\
564 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
565 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
566 `operator+'.";
568 static const char *Cjava_suffixes [] =
569 { "java", NULL };
570 static char Cjava_help [] =
571 "In Java code, all the tags constructs of C and C++ code are\n\
572 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
575 static const char *Cobol_suffixes [] =
576 { "COB", "cob", NULL };
577 static char Cobol_help [] =
578 "In Cobol code, tags are paragraph names; that is, any word\n\
579 starting in column 8 and followed by a period.";
581 static const char *Cstar_suffixes [] =
582 { "cs", "hs", NULL };
584 static const char *Erlang_suffixes [] =
585 { "erl", "hrl", NULL };
586 static const char Erlang_help [] =
587 "In Erlang code, the tags are the functions, records and macros\n\
588 defined in the file.";
590 const char *Forth_suffixes [] =
591 { "fth", "tok", NULL };
592 static const char Forth_help [] =
593 "In Forth code, tags are words defined by `:',\n\
594 constant, code, create, defer, value, variable, buffer:, field.";
596 static const char *Fortran_suffixes [] =
597 { "F", "f", "f90", "for", NULL };
598 static const char Fortran_help [] =
599 "In Fortran code, functions, subroutines and block data are tags.";
601 static const char *HTML_suffixes [] =
602 { "htm", "html", "shtml", NULL };
603 static const char HTML_help [] =
604 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
605 `h3' headers. Also, tags are `name=' in anchors and all\n\
606 occurrences of `id='.";
608 static const char *Lisp_suffixes [] =
609 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
610 static const char Lisp_help [] =
611 "In Lisp code, any function defined with `defun', any variable\n\
612 defined with `defvar' or `defconst', and in general the first\n\
613 argument of any expression that starts with `(def' in column zero\n\
614 is a tag.\n\
615 The `--declarations' option tags \"(defvar foo)\" constructs too.";
617 static const char *Lua_suffixes [] =
618 { "lua", "LUA", NULL };
619 static const char Lua_help [] =
620 "In Lua scripts, all functions are tags.";
622 static const char *Makefile_filenames [] =
623 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
624 static const char Makefile_help [] =
625 "In makefiles, targets are tags; additionally, variables are tags\n\
626 unless you specify `--no-globals'.";
628 static const char *Objc_suffixes [] =
629 { "lm", /* Objective lex file */
630 "m", /* Objective C file */
631 NULL };
632 static const char Objc_help [] =
633 "In Objective C code, tags include Objective C definitions for classes,\n\
634 class categories, methods and protocols. Tags for variables and\n\
635 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
636 (Use --help --lang=c --lang=objc --lang=java for full help.)";
638 static const char *Pascal_suffixes [] =
639 { "p", "pas", NULL };
640 static const char Pascal_help [] =
641 "In Pascal code, the tags are the functions and procedures defined\n\
642 in the file.";
643 /* " // this is for working around an Emacs highlighting bug... */
645 static const char *Perl_suffixes [] =
646 { "pl", "pm", NULL };
647 static const char *Perl_interpreters [] =
648 { "perl", "@PERL@", NULL };
649 static const char Perl_help [] =
650 "In Perl code, the tags are the packages, subroutines and variables\n\
651 defined by the `package', `sub', `my' and `local' keywords. Use\n\
652 `--globals' if you want to tag global variables. Tags for\n\
653 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
654 defined in the default package is `main::SUB'.";
656 static const char *PHP_suffixes [] =
657 { "php", "php3", "php4", NULL };
658 static const char PHP_help [] =
659 "In PHP code, tags are functions, classes and defines. Unless you use\n\
660 the `--no-members' option, vars are tags too.";
662 static const char *plain_C_suffixes [] =
663 { "pc", /* Pro*C file */
664 NULL };
666 static const char *PS_suffixes [] =
667 { "ps", "psw", NULL }; /* .psw is for PSWrap */
668 static const char PS_help [] =
669 "In PostScript code, the tags are the functions.";
671 static const char *Prolog_suffixes [] =
672 { "prolog", NULL };
673 static const char Prolog_help [] =
674 "In Prolog code, tags are predicates and rules at the beginning of\n\
675 line.";
677 static const char *Python_suffixes [] =
678 { "py", NULL };
679 static const char Python_help [] =
680 "In Python code, `def' or `class' at the beginning of a line\n\
681 generate a tag.";
683 /* Can't do the `SCM' or `scm' prefix with a version number. */
684 static const char *Scheme_suffixes [] =
685 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
686 static const char Scheme_help [] =
687 "In Scheme code, tags include anything defined with `def' or with a\n\
688 construct whose name starts with `def'. They also include\n\
689 variables set with `set!' at top level in the file.";
691 static const char *TeX_suffixes [] =
692 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
693 static const char TeX_help [] =
694 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
695 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
696 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
697 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
698 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
700 Other commands can be specified by setting the environment variable\n\
701 `TEXTAGS' to a colon-separated list like, for example,\n\
702 TEXTAGS=\"mycommand:myothercommand\".";
705 static const char *Texinfo_suffixes [] =
706 { "texi", "texinfo", "txi", NULL };
707 static const char Texinfo_help [] =
708 "for texinfo files, lines starting with @node are tagged.";
710 static const char *Yacc_suffixes [] =
711 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
712 static const char Yacc_help [] =
713 "In Bison or Yacc input files, each rule defines as a tag the\n\
714 nonterminal it constructs. The portions of the file that contain\n\
715 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
716 for full help).";
718 static const char auto_help [] =
719 "`auto' is not a real language, it indicates to use\n\
720 a default language for files base on file name suffix and file contents.";
722 static const char none_help [] =
723 "`none' is not a real language, it indicates to only do\n\
724 regexp processing on files.";
726 static const char no_lang_help [] =
727 "No detailed help available for this language.";
731 * Table of languages.
733 * It is ok for a given function to be listed under more than one
734 * name. I just didn't.
737 static language lang_names [] =
739 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
740 { "asm", Asm_help, Asm_labels, Asm_suffixes },
741 { "c", default_C_help, default_C_entries, default_C_suffixes },
742 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
743 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
744 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
745 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
746 { "forth", Forth_help, Forth_words, Forth_suffixes },
747 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
748 { "html", HTML_help, HTML_labels, HTML_suffixes },
749 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
750 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
751 { "lua", Lua_help, Lua_functions, Lua_suffixes },
752 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
753 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
754 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
755 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
756 { "php", PHP_help, PHP_functions, PHP_suffixes },
757 { "postscript",PS_help, PS_functions, PS_suffixes },
758 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
759 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
760 { "python", Python_help, Python_functions, Python_suffixes },
761 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
762 { "tex", TeX_help, TeX_commands, TeX_suffixes },
763 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
764 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
765 { "auto", auto_help }, /* default guessing scheme */
766 { "none", none_help, just_read_file }, /* regexp matching only */
767 { NULL } /* end of list */
771 static void
772 print_language_names (void)
774 language *lang;
775 const char **name, **ext;
777 puts ("\nThese are the currently supported languages, along with the\n\
778 default file names and dot suffixes:");
779 for (lang = lang_names; lang->name != NULL; lang++)
781 printf (" %-*s", 10, lang->name);
782 if (lang->filenames != NULL)
783 for (name = lang->filenames; *name != NULL; name++)
784 printf (" %s", *name);
785 if (lang->suffixes != NULL)
786 for (ext = lang->suffixes; *ext != NULL; ext++)
787 printf (" .%s", *ext);
788 puts ("");
790 puts ("where `auto' means use default language for files based on file\n\
791 name suffix, and `none' means only do regexp processing on files.\n\
792 If no language is specified and no matching suffix is found,\n\
793 the first line of the file is read for a sharp-bang (#!) sequence\n\
794 followed by the name of an interpreter. If no such sequence is found,\n\
795 Fortran is tried first; if no tags are found, C is tried next.\n\
796 When parsing any C file, a \"class\" or \"template\" keyword\n\
797 switches to C++.");
798 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
800 For detailed help on a given language use, for example,\n\
801 etags --help --lang=ada.");
804 #ifndef EMACS_NAME
805 # define EMACS_NAME "standalone"
806 #endif
807 #ifndef VERSION
808 # define VERSION "17.38.1.4"
809 #endif
810 static _Noreturn void
811 print_version (void)
813 char emacs_copyright[] = COPYRIGHT;
815 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
816 puts (emacs_copyright);
817 puts ("This program is distributed under the terms in ETAGS.README");
819 exit (EXIT_SUCCESS);
822 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
823 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
824 #endif
826 static _Noreturn void
827 print_help (argument *argbuffer)
829 bool help_for_lang = FALSE;
831 for (; argbuffer->arg_type != at_end; argbuffer++)
832 if (argbuffer->arg_type == at_language)
834 if (help_for_lang)
835 puts ("");
836 puts (argbuffer->lang->help);
837 help_for_lang = TRUE;
840 if (help_for_lang)
841 exit (EXIT_SUCCESS);
843 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
845 These are the options accepted by %s.\n", progname, progname);
846 puts ("You may use unambiguous abbreviations for the long option names.");
847 puts (" A - as file name means read names from stdin (one per line).\n\
848 Absolute names are stored in the output file as they are.\n\
849 Relative ones are stored relative to the output file's directory.\n");
851 puts ("-a, --append\n\
852 Append tag entries to existing tags file.");
854 puts ("--packages-only\n\
855 For Ada files, only generate tags for packages.");
857 if (CTAGS)
858 puts ("-B, --backward-search\n\
859 Write the search commands for the tag entries using '?', the\n\
860 backward-search command instead of '/', the forward-search command.");
862 /* This option is mostly obsolete, because etags can now automatically
863 detect C++. Retained for backward compatibility and for debugging and
864 experimentation. In principle, we could want to tag as C++ even
865 before any "class" or "template" keyword.
866 puts ("-C, --c++\n\
867 Treat files whose name suffix defaults to C language as C++ files.");
870 puts ("--declarations\n\
871 In C and derived languages, create tags for function declarations,");
872 if (CTAGS)
873 puts ("\tand create tags for extern variables if --globals is used.");
874 else
875 puts
876 ("\tand create tags for extern variables unless --no-globals is used.");
878 if (CTAGS)
879 puts ("-d, --defines\n\
880 Create tag entries for C #define constants and enum constants, too.");
881 else
882 puts ("-D, --no-defines\n\
883 Don't create tag entries for C #define constants and enum constants.\n\
884 This makes the tags file smaller.");
886 if (!CTAGS)
887 puts ("-i FILE, --include=FILE\n\
888 Include a note in tag file indicating that, when searching for\n\
889 a tag, one should also consult the tags file FILE after\n\
890 checking the current file.");
892 puts ("-l LANG, --language=LANG\n\
893 Force the following files to be considered as written in the\n\
894 named language up to the next --language=LANG option.");
896 if (CTAGS)
897 puts ("--globals\n\
898 Create tag entries for global variables in some languages.");
899 else
900 puts ("--no-globals\n\
901 Do not create tag entries for global variables in some\n\
902 languages. This makes the tags file smaller.");
904 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
905 puts ("--no-line-directive\n\
906 Ignore #line preprocessor directives in C and derived languages.");
908 if (CTAGS)
909 puts ("--members\n\
910 Create tag entries for members of structures in some languages.");
911 else
912 puts ("--no-members\n\
913 Do not create tag entries for members of structures\n\
914 in some languages.");
916 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
917 Make a tag for each line matching a regular expression pattern\n\
918 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
919 files only. REGEXFILE is a file containing one REGEXP per line.\n\
920 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
921 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
922 puts (" If TAGNAME/ is present, the tags created are named.\n\
923 For example Tcl named tags can be created with:\n\
924 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
925 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
926 `m' means to allow multi-line matches, `s' implies `m' and\n\
927 causes dot to match any character, including newline.");
929 puts ("-R, --no-regex\n\
930 Don't create tags from regexps for the following files.");
932 puts ("-I, --ignore-indentation\n\
933 In C and C++ do not assume that a closing brace in the first\n\
934 column is the final brace of a function or structure definition.");
936 puts ("-o FILE, --output=FILE\n\
937 Write the tags to FILE.");
939 puts ("--parse-stdin=NAME\n\
940 Read from standard input and record tags as belonging to file NAME.");
942 if (CTAGS)
944 puts ("-t, --typedefs\n\
945 Generate tag entries for C and Ada typedefs.");
946 puts ("-T, --typedefs-and-c++\n\
947 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
948 and C++ member functions.");
951 if (CTAGS)
952 puts ("-u, --update\n\
953 Update the tag entries for the given files, leaving tag\n\
954 entries for other files in place. Currently, this is\n\
955 implemented by deleting the existing entries for the given\n\
956 files and then rewriting the new entries at the end of the\n\
957 tags file. It is often faster to simply rebuild the entire\n\
958 tag file than to use this.");
960 if (CTAGS)
962 puts ("-v, --vgrind\n\
963 Print on the standard output an index of items intended for\n\
964 human consumption, similar to the output of vgrind. The index\n\
965 is sorted, and gives the page number of each item.");
967 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
968 puts ("-w, --no-duplicates\n\
969 Do not create duplicate tag entries, for compatibility with\n\
970 traditional ctags.");
972 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
973 puts ("-w, --no-warn\n\
974 Suppress warning messages about duplicate tag entries.");
976 puts ("-x, --cxref\n\
977 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
978 The output uses line numbers instead of page numbers, but\n\
979 beyond that the differences are cosmetic; try both to see\n\
980 which you like.");
983 puts ("-V, --version\n\
984 Print the version of the program.\n\
985 -h, --help\n\
986 Print this help message.\n\
987 Followed by one or more `--language' options prints detailed\n\
988 help about tag generation for the specified languages.");
990 print_language_names ();
992 puts ("");
993 puts ("Report bugs to bug-gnu-emacs@gnu.org");
995 exit (EXIT_SUCCESS);
1000 main (int argc, char **argv)
1002 int i;
1003 unsigned int nincluded_files;
1004 char **included_files;
1005 argument *argbuffer;
1006 int current_arg, file_count;
1007 linebuffer filename_lb;
1008 bool help_asked = FALSE;
1009 ptrdiff_t len;
1010 char *optstring;
1011 int opt;
1014 #ifdef DOS_NT
1015 _fmode = O_BINARY; /* all of files are treated as binary files */
1016 #endif /* DOS_NT */
1018 progname = argv[0];
1019 nincluded_files = 0;
1020 included_files = xnew (argc, char *);
1021 current_arg = 0;
1022 file_count = 0;
1024 /* Allocate enough no matter what happens. Overkill, but each one
1025 is small. */
1026 argbuffer = xnew (argc, argument);
1029 * Always find typedefs and structure tags.
1030 * Also default to find macro constants, enum constants, struct
1031 * members and global variables. Do it for both etags and ctags.
1033 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1034 globals = members = TRUE;
1036 /* When the optstring begins with a '-' getopt_long does not rearrange the
1037 non-options arguments to be at the end, but leaves them alone. */
1038 optstring = concat ("-ac:Cf:Il:o:r:RSVhH",
1039 (CTAGS) ? "BxdtTuvw" : "Di:",
1040 "");
1042 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1043 switch (opt)
1045 case 0:
1046 /* If getopt returns 0, then it has already processed a
1047 long-named option. We should do nothing. */
1048 break;
1050 case 1:
1051 /* This means that a file name has been seen. Record it. */
1052 argbuffer[current_arg].arg_type = at_filename;
1053 argbuffer[current_arg].what = optarg;
1054 len = strlen (optarg);
1055 if (whatlen_max < len)
1056 whatlen_max = len;
1057 ++current_arg;
1058 ++file_count;
1059 break;
1061 case STDIN:
1062 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1063 argbuffer[current_arg].arg_type = at_stdin;
1064 argbuffer[current_arg].what = optarg;
1065 len = strlen (optarg);
1066 if (whatlen_max < len)
1067 whatlen_max = len;
1068 ++current_arg;
1069 ++file_count;
1070 if (parsing_stdin)
1071 fatal ("cannot parse standard input more than once", (char *)NULL);
1072 parsing_stdin = TRUE;
1073 break;
1075 /* Common options. */
1076 case 'a': append_to_tagfile = TRUE; break;
1077 case 'C': cplusplus = TRUE; break;
1078 case 'f': /* for compatibility with old makefiles */
1079 case 'o':
1080 if (tagfile)
1082 error ("-o option may only be given once.");
1083 suggest_asking_for_help ();
1084 /* NOTREACHED */
1086 tagfile = optarg;
1087 break;
1088 case 'I':
1089 case 'S': /* for backward compatibility */
1090 ignoreindent = TRUE;
1091 break;
1092 case 'l':
1094 language *lang = get_language_from_langname (optarg);
1095 if (lang != NULL)
1097 argbuffer[current_arg].lang = lang;
1098 argbuffer[current_arg].arg_type = at_language;
1099 ++current_arg;
1102 break;
1103 case 'c':
1104 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1105 optarg = concat (optarg, "i", ""); /* memory leak here */
1106 /* FALLTHRU */
1107 case 'r':
1108 argbuffer[current_arg].arg_type = at_regexp;
1109 argbuffer[current_arg].what = optarg;
1110 len = strlen (optarg);
1111 if (whatlen_max < len)
1112 whatlen_max = len;
1113 ++current_arg;
1114 break;
1115 case 'R':
1116 argbuffer[current_arg].arg_type = at_regexp;
1117 argbuffer[current_arg].what = NULL;
1118 ++current_arg;
1119 break;
1120 case 'V':
1121 print_version ();
1122 break;
1123 case 'h':
1124 case 'H':
1125 help_asked = TRUE;
1126 break;
1128 /* Etags options */
1129 case 'D': constantypedefs = FALSE; break;
1130 case 'i': included_files[nincluded_files++] = optarg; break;
1132 /* Ctags options. */
1133 case 'B': searchar = '?'; break;
1134 case 'd': constantypedefs = TRUE; break;
1135 case 't': typedefs = TRUE; break;
1136 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1137 case 'u': update = TRUE; break;
1138 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1139 case 'x': cxref_style = TRUE; break;
1140 case 'w': no_warnings = TRUE; break;
1141 default:
1142 suggest_asking_for_help ();
1143 /* NOTREACHED */
1146 /* No more options. Store the rest of arguments. */
1147 for (; optind < argc; optind++)
1149 argbuffer[current_arg].arg_type = at_filename;
1150 argbuffer[current_arg].what = argv[optind];
1151 len = strlen (argv[optind]);
1152 if (whatlen_max < len)
1153 whatlen_max = len;
1154 ++current_arg;
1155 ++file_count;
1158 argbuffer[current_arg].arg_type = at_end;
1160 if (help_asked)
1161 print_help (argbuffer);
1162 /* NOTREACHED */
1164 if (nincluded_files == 0 && file_count == 0)
1166 error ("no input files specified.");
1167 suggest_asking_for_help ();
1168 /* NOTREACHED */
1171 if (tagfile == NULL)
1172 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1173 cwd = etags_getcwd (); /* the current working directory */
1174 if (cwd[strlen (cwd) - 1] != '/')
1176 char *oldcwd = cwd;
1177 cwd = concat (oldcwd, "/", "");
1178 free (oldcwd);
1181 /* Compute base directory for relative file names. */
1182 if (streq (tagfile, "-")
1183 || strneq (tagfile, "/dev/", 5))
1184 tagfiledir = cwd; /* relative file names are relative to cwd */
1185 else
1187 canonicalize_filename (tagfile);
1188 tagfiledir = absolute_dirname (tagfile, cwd);
1191 init (); /* set up boolean "functions" */
1193 linebuffer_init (&lb);
1194 linebuffer_init (&filename_lb);
1195 linebuffer_init (&filebuf);
1196 linebuffer_init (&token_name);
1198 if (!CTAGS)
1200 if (streq (tagfile, "-"))
1202 tagf = stdout;
1203 #ifdef DOS_NT
1204 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1205 doesn't take effect until after `stdout' is already open). */
1206 if (!isatty (fileno (stdout)))
1207 setmode (fileno (stdout), O_BINARY);
1208 #endif /* DOS_NT */
1210 else
1211 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1212 if (tagf == NULL)
1213 pfatal (tagfile);
1217 * Loop through files finding functions.
1219 for (i = 0; i < current_arg; i++)
1221 static language *lang; /* non-NULL if language is forced */
1222 char *this_file;
1224 switch (argbuffer[i].arg_type)
1226 case at_language:
1227 lang = argbuffer[i].lang;
1228 break;
1229 case at_regexp:
1230 analyse_regex (argbuffer[i].what);
1231 break;
1232 case at_filename:
1233 this_file = argbuffer[i].what;
1234 /* Input file named "-" means read file names from stdin
1235 (one per line) and use them. */
1236 if (streq (this_file, "-"))
1238 if (parsing_stdin)
1239 fatal ("cannot parse standard input AND read file names from it",
1240 (char *)NULL);
1241 while (readline_internal (&filename_lb, stdin) > 0)
1242 process_file_name (filename_lb.buffer, lang);
1244 else
1245 process_file_name (this_file, lang);
1246 break;
1247 case at_stdin:
1248 this_file = argbuffer[i].what;
1249 process_file (stdin, this_file, lang);
1250 break;
1254 free_regexps ();
1255 free (lb.buffer);
1256 free (filebuf.buffer);
1257 free (token_name.buffer);
1259 if (!CTAGS || cxref_style)
1261 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1262 put_entries (nodehead);
1263 free_tree (nodehead);
1264 nodehead = NULL;
1265 if (!CTAGS)
1267 fdesc *fdp;
1269 /* Output file entries that have no tags. */
1270 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1271 if (!fdp->written)
1272 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1274 while (nincluded_files-- > 0)
1275 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1277 if (fclose (tagf) == EOF)
1278 pfatal (tagfile);
1281 exit (EXIT_SUCCESS);
1284 /* From here on, we are in (CTAGS && !cxref_style) */
1285 if (update)
1287 char *cmd =
1288 xmalloc (strlen (tagfile) + whatlen_max +
1289 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1290 for (i = 0; i < current_arg; ++i)
1292 switch (argbuffer[i].arg_type)
1294 case at_filename:
1295 case at_stdin:
1296 break;
1297 default:
1298 continue; /* the for loop */
1300 strcpy (cmd, "mv ");
1301 strcat (cmd, tagfile);
1302 strcat (cmd, " OTAGS;fgrep -v '\t");
1303 strcat (cmd, argbuffer[i].what);
1304 strcat (cmd, "\t' OTAGS >");
1305 strcat (cmd, tagfile);
1306 strcat (cmd, ";rm OTAGS");
1307 if (system (cmd) != EXIT_SUCCESS)
1308 fatal ("failed to execute shell command", (char *)NULL);
1310 free (cmd);
1311 append_to_tagfile = TRUE;
1314 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1315 if (tagf == NULL)
1316 pfatal (tagfile);
1317 put_entries (nodehead); /* write all the tags (CTAGS) */
1318 free_tree (nodehead);
1319 nodehead = NULL;
1320 if (fclose (tagf) == EOF)
1321 pfatal (tagfile);
1323 if (CTAGS)
1324 if (append_to_tagfile || update)
1326 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1327 /* Maybe these should be used:
1328 setenv ("LC_COLLATE", "C", 1);
1329 setenv ("LC_ALL", "C", 1); */
1330 strcpy (cmd, "sort -u -o ");
1331 strcat (cmd, tagfile);
1332 strcat (cmd, " ");
1333 strcat (cmd, tagfile);
1334 exit (system (cmd));
1336 return EXIT_SUCCESS;
1341 * Return a compressor given the file name. If EXTPTR is non-zero,
1342 * return a pointer into FILE where the compressor-specific
1343 * extension begins. If no compressor is found, NULL is returned
1344 * and EXTPTR is not significant.
1345 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1347 static compressor *
1348 get_compressor_from_suffix (char *file, char **extptr)
1350 compressor *compr;
1351 char *slash, *suffix;
1353 /* File has been processed by canonicalize_filename,
1354 so we don't need to consider backslashes on DOS_NT. */
1355 slash = etags_strrchr (file, '/');
1356 suffix = etags_strrchr (file, '.');
1357 if (suffix == NULL || suffix < slash)
1358 return NULL;
1359 if (extptr != NULL)
1360 *extptr = suffix;
1361 suffix += 1;
1362 /* Let those poor souls who live with DOS 8+3 file name limits get
1363 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1364 Only the first do loop is run if not MSDOS */
1367 for (compr = compressors; compr->suffix != NULL; compr++)
1368 if (streq (compr->suffix, suffix))
1369 return compr;
1370 if (!MSDOS)
1371 break; /* do it only once: not really a loop */
1372 if (extptr != NULL)
1373 *extptr = ++suffix;
1374 } while (*suffix != '\0');
1375 return NULL;
1381 * Return a language given the name.
1383 static language *
1384 get_language_from_langname (const char *name)
1386 language *lang;
1388 if (name == NULL)
1389 error ("empty language name");
1390 else
1392 for (lang = lang_names; lang->name != NULL; lang++)
1393 if (streq (name, lang->name))
1394 return lang;
1395 error ("unknown language \"%s\"", name);
1398 return NULL;
1403 * Return a language given the interpreter name.
1405 static language *
1406 get_language_from_interpreter (char *interpreter)
1408 language *lang;
1409 const char **iname;
1411 if (interpreter == NULL)
1412 return NULL;
1413 for (lang = lang_names; lang->name != NULL; lang++)
1414 if (lang->interpreters != NULL)
1415 for (iname = lang->interpreters; *iname != NULL; iname++)
1416 if (streq (*iname, interpreter))
1417 return lang;
1419 return NULL;
1425 * Return a language given the file name.
1427 static language *
1428 get_language_from_filename (char *file, int case_sensitive)
1430 language *lang;
1431 const char **name, **ext, *suffix;
1433 /* Try whole file name first. */
1434 for (lang = lang_names; lang->name != NULL; lang++)
1435 if (lang->filenames != NULL)
1436 for (name = lang->filenames; *name != NULL; name++)
1437 if ((case_sensitive)
1438 ? streq (*name, file)
1439 : strcaseeq (*name, file))
1440 return lang;
1442 /* If not found, try suffix after last dot. */
1443 suffix = etags_strrchr (file, '.');
1444 if (suffix == NULL)
1445 return NULL;
1446 suffix += 1;
1447 for (lang = lang_names; lang->name != NULL; lang++)
1448 if (lang->suffixes != NULL)
1449 for (ext = lang->suffixes; *ext != NULL; ext++)
1450 if ((case_sensitive)
1451 ? streq (*ext, suffix)
1452 : strcaseeq (*ext, suffix))
1453 return lang;
1454 return NULL;
1459 * This routine is called on each file argument.
1461 static void
1462 process_file_name (char *file, language *lang)
1464 struct stat stat_buf;
1465 FILE *inf;
1466 fdesc *fdp;
1467 compressor *compr;
1468 char *compressed_name, *uncompressed_name;
1469 char *ext, *real_name;
1470 int retval;
1472 canonicalize_filename (file);
1473 if (streq (file, tagfile) && !streq (tagfile, "-"))
1475 error ("skipping inclusion of %s in self.", file);
1476 return;
1478 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1480 compressed_name = NULL;
1481 real_name = uncompressed_name = savestr (file);
1483 else
1485 real_name = compressed_name = savestr (file);
1486 uncompressed_name = savenstr (file, ext - file);
1489 /* If the canonicalized uncompressed name
1490 has already been dealt with, skip it silently. */
1491 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1493 assert (fdp->infname != NULL);
1494 if (streq (uncompressed_name, fdp->infname))
1495 goto cleanup;
1498 if (stat (real_name, &stat_buf) != 0)
1500 /* Reset real_name and try with a different name. */
1501 real_name = NULL;
1502 if (compressed_name != NULL) /* try with the given suffix */
1504 if (stat (uncompressed_name, &stat_buf) == 0)
1505 real_name = uncompressed_name;
1507 else /* try all possible suffixes */
1509 for (compr = compressors; compr->suffix != NULL; compr++)
1511 compressed_name = concat (file, ".", compr->suffix);
1512 if (stat (compressed_name, &stat_buf) != 0)
1514 if (MSDOS)
1516 char *suf = compressed_name + strlen (file);
1517 size_t suflen = strlen (compr->suffix) + 1;
1518 for ( ; suf[1]; suf++, suflen--)
1520 memmove (suf, suf + 1, suflen);
1521 if (stat (compressed_name, &stat_buf) == 0)
1523 real_name = compressed_name;
1524 break;
1527 if (real_name != NULL)
1528 break;
1529 } /* MSDOS */
1530 free (compressed_name);
1531 compressed_name = NULL;
1533 else
1535 real_name = compressed_name;
1536 break;
1540 if (real_name == NULL)
1542 perror (file);
1543 goto cleanup;
1545 } /* try with a different name */
1547 if (!S_ISREG (stat_buf.st_mode))
1549 error ("skipping %s: it is not a regular file.", real_name);
1550 goto cleanup;
1552 if (real_name == compressed_name)
1554 char *cmd = concat (compr->command, " ", real_name);
1555 inf = (FILE *) popen (cmd, "r");
1556 free (cmd);
1558 else
1559 inf = fopen (real_name, "r");
1560 if (inf == NULL)
1562 perror (real_name);
1563 goto cleanup;
1566 process_file (inf, uncompressed_name, lang);
1568 if (real_name == compressed_name)
1569 retval = pclose (inf);
1570 else
1571 retval = fclose (inf);
1572 if (retval < 0)
1573 pfatal (file);
1575 cleanup:
1576 free (compressed_name);
1577 free (uncompressed_name);
1578 last_node = NULL;
1579 curfdp = NULL;
1580 return;
1583 static void
1584 process_file (FILE *fh, char *fn, language *lang)
1586 static const fdesc emptyfdesc;
1587 fdesc *fdp;
1589 /* Create a new input file description entry. */
1590 fdp = xnew (1, fdesc);
1591 *fdp = emptyfdesc;
1592 fdp->next = fdhead;
1593 fdp->infname = savestr (fn);
1594 fdp->lang = lang;
1595 fdp->infabsname = absolute_filename (fn, cwd);
1596 fdp->infabsdir = absolute_dirname (fn, cwd);
1597 if (filename_is_absolute (fn))
1599 /* An absolute file name. Canonicalize it. */
1600 fdp->taggedfname = absolute_filename (fn, NULL);
1602 else
1604 /* A file name relative to cwd. Make it relative
1605 to the directory of the tags file. */
1606 fdp->taggedfname = relative_filename (fn, tagfiledir);
1608 fdp->usecharno = TRUE; /* use char position when making tags */
1609 fdp->prop = NULL;
1610 fdp->written = FALSE; /* not written on tags file yet */
1612 fdhead = fdp;
1613 curfdp = fdhead; /* the current file description */
1615 find_entries (fh);
1617 /* If not Ctags, and if this is not metasource and if it contained no #line
1618 directives, we can write the tags and free all nodes pointing to
1619 curfdp. */
1620 if (!CTAGS
1621 && curfdp->usecharno /* no #line directives in this file */
1622 && !curfdp->lang->metasource)
1624 node *np, *prev;
1626 /* Look for the head of the sublist relative to this file. See add_node
1627 for the structure of the node tree. */
1628 prev = NULL;
1629 for (np = nodehead; np != NULL; prev = np, np = np->left)
1630 if (np->fdp == curfdp)
1631 break;
1633 /* If we generated tags for this file, write and delete them. */
1634 if (np != NULL)
1636 /* This is the head of the last sublist, if any. The following
1637 instructions depend on this being true. */
1638 assert (np->left == NULL);
1640 assert (fdhead == curfdp);
1641 assert (last_node->fdp == curfdp);
1642 put_entries (np); /* write tags for file curfdp->taggedfname */
1643 free_tree (np); /* remove the written nodes */
1644 if (prev == NULL)
1645 nodehead = NULL; /* no nodes left */
1646 else
1647 prev->left = NULL; /* delete the pointer to the sublist */
1653 * This routine sets up the boolean pseudo-functions which work
1654 * by setting boolean flags dependent upon the corresponding character.
1655 * Every char which is NOT in that string is not a white char. Therefore,
1656 * all of the array "_wht" is set to FALSE, and then the elements
1657 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1658 * of a char is TRUE if it is the string "white", else FALSE.
1660 static void
1661 init (void)
1663 register const char *sp;
1664 register int i;
1666 for (i = 0; i < CHARS; i++)
1667 iswhite (i) = notinname (i) = begtoken (i) = intoken (i) = endtoken (i) = FALSE;
1668 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1669 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1670 notinname ('\0') = notinname ('\n');
1671 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1672 begtoken ('\0') = begtoken ('\n');
1673 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1674 intoken ('\0') = intoken ('\n');
1675 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1676 endtoken ('\0') = endtoken ('\n');
1680 * This routine opens the specified file and calls the function
1681 * which finds the function and type definitions.
1683 static void
1684 find_entries (FILE *inf)
1686 char *cp;
1687 language *lang = curfdp->lang;
1688 Lang_function *parser = NULL;
1690 /* If user specified a language, use it. */
1691 if (lang != NULL && lang->function != NULL)
1693 parser = lang->function;
1696 /* Else try to guess the language given the file name. */
1697 if (parser == NULL)
1699 lang = get_language_from_filename (curfdp->infname, TRUE);
1700 if (lang != NULL && lang->function != NULL)
1702 curfdp->lang = lang;
1703 parser = lang->function;
1707 /* Else look for sharp-bang as the first two characters. */
1708 if (parser == NULL
1709 && readline_internal (&lb, inf) > 0
1710 && lb.len >= 2
1711 && lb.buffer[0] == '#'
1712 && lb.buffer[1] == '!')
1714 char *lp;
1716 /* Set lp to point at the first char after the last slash in the
1717 line or, if no slashes, at the first nonblank. Then set cp to
1718 the first successive blank and terminate the string. */
1719 lp = etags_strrchr (lb.buffer+2, '/');
1720 if (lp != NULL)
1721 lp += 1;
1722 else
1723 lp = skip_spaces (lb.buffer + 2);
1724 cp = skip_non_spaces (lp);
1725 *cp = '\0';
1727 if (strlen (lp) > 0)
1729 lang = get_language_from_interpreter (lp);
1730 if (lang != NULL && lang->function != NULL)
1732 curfdp->lang = lang;
1733 parser = lang->function;
1738 /* We rewind here, even if inf may be a pipe. We fail if the
1739 length of the first line is longer than the pipe block size,
1740 which is unlikely. */
1741 rewind (inf);
1743 /* Else try to guess the language given the case insensitive file name. */
1744 if (parser == NULL)
1746 lang = get_language_from_filename (curfdp->infname, FALSE);
1747 if (lang != NULL && lang->function != NULL)
1749 curfdp->lang = lang;
1750 parser = lang->function;
1754 /* Else try Fortran or C. */
1755 if (parser == NULL)
1757 node *old_last_node = last_node;
1759 curfdp->lang = get_language_from_langname ("fortran");
1760 find_entries (inf);
1762 if (old_last_node == last_node)
1763 /* No Fortran entries found. Try C. */
1765 /* We do not tag if rewind fails.
1766 Only the file name will be recorded in the tags file. */
1767 rewind (inf);
1768 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1769 find_entries (inf);
1771 return;
1774 if (!no_line_directive
1775 && curfdp->lang != NULL && curfdp->lang->metasource)
1776 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1777 file, or anyway we parsed a file that is automatically generated from
1778 this one. If this is the case, the bingo.c file contained #line
1779 directives that generated tags pointing to this file. Let's delete
1780 them all before parsing this file, which is the real source. */
1782 fdesc **fdpp = &fdhead;
1783 while (*fdpp != NULL)
1784 if (*fdpp != curfdp
1785 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1786 /* We found one of those! We must delete both the file description
1787 and all tags referring to it. */
1789 fdesc *badfdp = *fdpp;
1791 /* Delete the tags referring to badfdp->taggedfname
1792 that were obtained from badfdp->infname. */
1793 invalidate_nodes (badfdp, &nodehead);
1795 *fdpp = badfdp->next; /* remove the bad description from the list */
1796 free_fdesc (badfdp);
1798 else
1799 fdpp = &(*fdpp)->next; /* advance the list pointer */
1802 assert (parser != NULL);
1804 /* Generic initializations before reading from file. */
1805 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1807 /* Generic initializations before parsing file with readline. */
1808 lineno = 0; /* reset global line number */
1809 charno = 0; /* reset global char number */
1810 linecharno = 0; /* reset global char number of line start */
1812 parser (inf);
1814 regex_tag_multiline ();
1819 * Check whether an implicitly named tag should be created,
1820 * then call `pfnote'.
1821 * NAME is a string that is internally copied by this function.
1823 * TAGS format specification
1824 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1825 * The following is explained in some more detail in etc/ETAGS.EBNF.
1827 * make_tag creates tags with "implicit tag names" (unnamed tags)
1828 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1829 * 1. NAME does not contain any of the characters in NONAM;
1830 * 2. LINESTART contains name as either a rightmost, or rightmost but
1831 * one character, substring;
1832 * 3. the character, if any, immediately before NAME in LINESTART must
1833 * be a character in NONAM;
1834 * 4. the character, if any, immediately after NAME in LINESTART must
1835 * also be a character in NONAM.
1837 * The implementation uses the notinname() macro, which recognizes the
1838 * characters stored in the string `nonam'.
1839 * etags.el needs to use the same characters that are in NONAM.
1841 static void
1842 make_tag (const char *name, /* tag name, or NULL if unnamed */
1843 int namelen, /* tag length */
1844 int is_func, /* tag is a function */
1845 char *linestart, /* start of the line where tag is */
1846 int linelen, /* length of the line where tag is */
1847 int lno, /* line number */
1848 long int cno) /* character number */
1850 bool named = (name != NULL && namelen > 0);
1851 char *nname = NULL;
1853 if (!CTAGS && named) /* maybe set named to false */
1854 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1855 such that etags.el can guess a name from it. */
1857 int i;
1858 register const char *cp = name;
1860 for (i = 0; i < namelen; i++)
1861 if (notinname (*cp++))
1862 break;
1863 if (i == namelen) /* rule #1 */
1865 cp = linestart + linelen - namelen;
1866 if (notinname (linestart[linelen-1]))
1867 cp -= 1; /* rule #4 */
1868 if (cp >= linestart /* rule #2 */
1869 && (cp == linestart
1870 || notinname (cp[-1])) /* rule #3 */
1871 && strneq (name, cp, namelen)) /* rule #2 */
1872 named = FALSE; /* use implicit tag name */
1876 if (named)
1877 nname = savenstr (name, namelen);
1879 pfnote (nname, is_func, linestart, linelen, lno, cno);
1882 /* Record a tag. */
1883 static void
1884 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1885 /* tag name, or NULL if unnamed */
1886 /* tag is a function */
1887 /* start of the line where tag is */
1888 /* length of the line where tag is */
1889 /* line number */
1890 /* character number */
1892 register node *np;
1894 assert (name == NULL || name[0] != '\0');
1895 if (CTAGS && name == NULL)
1896 return;
1898 np = xnew (1, node);
1900 /* If ctags mode, change name "main" to M<thisfilename>. */
1901 if (CTAGS && !cxref_style && streq (name, "main"))
1903 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1904 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1905 fp = etags_strrchr (np->name, '.');
1906 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1907 fp[0] = '\0';
1909 else
1910 np->name = name;
1911 np->valid = TRUE;
1912 np->been_warned = FALSE;
1913 np->fdp = curfdp;
1914 np->is_func = is_func;
1915 np->lno = lno;
1916 if (np->fdp->usecharno)
1917 /* Our char numbers are 0-base, because of C language tradition?
1918 ctags compatibility? old versions compatibility? I don't know.
1919 Anyway, since emacs's are 1-base we expect etags.el to take care
1920 of the difference. If we wanted to have 1-based numbers, we would
1921 uncomment the +1 below. */
1922 np->cno = cno /* + 1 */ ;
1923 else
1924 np->cno = invalidcharno;
1925 np->left = np->right = NULL;
1926 if (CTAGS && !cxref_style)
1928 if (strlen (linestart) < 50)
1929 np->regex = concat (linestart, "$", "");
1930 else
1931 np->regex = savenstr (linestart, 50);
1933 else
1934 np->regex = savenstr (linestart, linelen);
1936 add_node (np, &nodehead);
1940 * free_tree ()
1941 * recurse on left children, iterate on right children.
1943 static void
1944 free_tree (register node *np)
1946 while (np)
1948 register node *node_right = np->right;
1949 free_tree (np->left);
1950 free (np->name);
1951 free (np->regex);
1952 free (np);
1953 np = node_right;
1958 * free_fdesc ()
1959 * delete a file description
1961 static void
1962 free_fdesc (register fdesc *fdp)
1964 free (fdp->infname);
1965 free (fdp->infabsname);
1966 free (fdp->infabsdir);
1967 free (fdp->taggedfname);
1968 free (fdp->prop);
1969 free (fdp);
1973 * add_node ()
1974 * Adds a node to the tree of nodes. In etags mode, sort by file
1975 * name. In ctags mode, sort by tag name. Make no attempt at
1976 * balancing.
1978 * add_node is the only function allowed to add nodes, so it can
1979 * maintain state.
1981 static void
1982 add_node (node *np, node **cur_node_p)
1984 register int dif;
1985 register node *cur_node = *cur_node_p;
1987 if (cur_node == NULL)
1989 *cur_node_p = np;
1990 last_node = np;
1991 return;
1994 if (!CTAGS)
1995 /* Etags Mode */
1997 /* For each file name, tags are in a linked sublist on the right
1998 pointer. The first tags of different files are a linked list
1999 on the left pointer. last_node points to the end of the last
2000 used sublist. */
2001 if (last_node != NULL && last_node->fdp == np->fdp)
2003 /* Let's use the same sublist as the last added node. */
2004 assert (last_node->right == NULL);
2005 last_node->right = np;
2006 last_node = np;
2008 else if (cur_node->fdp == np->fdp)
2010 /* Scanning the list we found the head of a sublist which is
2011 good for us. Let's scan this sublist. */
2012 add_node (np, &cur_node->right);
2014 else
2015 /* The head of this sublist is not good for us. Let's try the
2016 next one. */
2017 add_node (np, &cur_node->left);
2018 } /* if ETAGS mode */
2020 else
2022 /* Ctags Mode */
2023 dif = strcmp (np->name, cur_node->name);
2026 * If this tag name matches an existing one, then
2027 * do not add the node, but maybe print a warning.
2029 if (no_duplicates && !dif)
2031 if (np->fdp == cur_node->fdp)
2033 if (!no_warnings)
2035 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2036 np->fdp->infname, lineno, np->name);
2037 fprintf (stderr, "Second entry ignored\n");
2040 else if (!cur_node->been_warned && !no_warnings)
2042 fprintf
2043 (stderr,
2044 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2045 np->fdp->infname, cur_node->fdp->infname, np->name);
2046 cur_node->been_warned = TRUE;
2048 return;
2051 /* Actually add the node */
2052 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2053 } /* if CTAGS mode */
2057 * invalidate_nodes ()
2058 * Scan the node tree and invalidate all nodes pointing to the
2059 * given file description (CTAGS case) or free them (ETAGS case).
2061 static void
2062 invalidate_nodes (fdesc *badfdp, node **npp)
2064 node *np = *npp;
2066 if (np == NULL)
2067 return;
2069 if (CTAGS)
2071 if (np->left != NULL)
2072 invalidate_nodes (badfdp, &np->left);
2073 if (np->fdp == badfdp)
2074 np->valid = FALSE;
2075 if (np->right != NULL)
2076 invalidate_nodes (badfdp, &np->right);
2078 else
2080 assert (np->fdp != NULL);
2081 if (np->fdp == badfdp)
2083 *npp = np->left; /* detach the sublist from the list */
2084 np->left = NULL; /* isolate it */
2085 free_tree (np); /* free it */
2086 invalidate_nodes (badfdp, npp);
2088 else
2089 invalidate_nodes (badfdp, &np->left);
2094 static int total_size_of_entries (node *);
2095 static int number_len (long) ATTRIBUTE_CONST;
2097 /* Length of a non-negative number's decimal representation. */
2098 static int
2099 number_len (long int num)
2101 int len = 1;
2102 while ((num /= 10) > 0)
2103 len += 1;
2104 return len;
2108 * Return total number of characters that put_entries will output for
2109 * the nodes in the linked list at the right of the specified node.
2110 * This count is irrelevant with etags.el since emacs 19.34 at least,
2111 * but is still supplied for backward compatibility.
2113 static int
2114 total_size_of_entries (register node *np)
2116 register int total = 0;
2118 for (; np != NULL; np = np->right)
2119 if (np->valid)
2121 total += strlen (np->regex) + 1; /* pat\177 */
2122 if (np->name != NULL)
2123 total += strlen (np->name) + 1; /* name\001 */
2124 total += number_len ((long) np->lno) + 1; /* lno, */
2125 if (np->cno != invalidcharno) /* cno */
2126 total += number_len (np->cno);
2127 total += 1; /* newline */
2130 return total;
2133 static void
2134 put_entries (register node *np)
2136 register char *sp;
2137 static fdesc *fdp = NULL;
2139 if (np == NULL)
2140 return;
2142 /* Output subentries that precede this one */
2143 if (CTAGS)
2144 put_entries (np->left);
2146 /* Output this entry */
2147 if (np->valid)
2149 if (!CTAGS)
2151 /* Etags mode */
2152 if (fdp != np->fdp)
2154 fdp = np->fdp;
2155 fprintf (tagf, "\f\n%s,%d\n",
2156 fdp->taggedfname, total_size_of_entries (np));
2157 fdp->written = TRUE;
2159 fputs (np->regex, tagf);
2160 fputc ('\177', tagf);
2161 if (np->name != NULL)
2163 fputs (np->name, tagf);
2164 fputc ('\001', tagf);
2166 fprintf (tagf, "%d,", np->lno);
2167 if (np->cno != invalidcharno)
2168 fprintf (tagf, "%ld", np->cno);
2169 fputs ("\n", tagf);
2171 else
2173 /* Ctags mode */
2174 if (np->name == NULL)
2175 error ("internal error: NULL name in ctags mode.");
2177 if (cxref_style)
2179 if (vgrind_style)
2180 fprintf (stdout, "%s %s %d\n",
2181 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2182 else
2183 fprintf (stdout, "%-16s %3d %-16s %s\n",
2184 np->name, np->lno, np->fdp->taggedfname, np->regex);
2186 else
2188 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2190 if (np->is_func)
2191 { /* function or #define macro with args */
2192 putc (searchar, tagf);
2193 putc ('^', tagf);
2195 for (sp = np->regex; *sp; sp++)
2197 if (*sp == '\\' || *sp == searchar)
2198 putc ('\\', tagf);
2199 putc (*sp, tagf);
2201 putc (searchar, tagf);
2203 else
2204 { /* anything else; text pattern inadequate */
2205 fprintf (tagf, "%d", np->lno);
2207 putc ('\n', tagf);
2210 } /* if this node contains a valid tag */
2212 /* Output subentries that follow this one */
2213 put_entries (np->right);
2214 if (!CTAGS)
2215 put_entries (np->left);
2219 /* C extensions. */
2220 #define C_EXT 0x00fff /* C extensions */
2221 #define C_PLAIN 0x00000 /* C */
2222 #define C_PLPL 0x00001 /* C++ */
2223 #define C_STAR 0x00003 /* C* */
2224 #define C_JAVA 0x00005 /* JAVA */
2225 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2226 #define YACC 0x10000 /* yacc file */
2229 * The C symbol tables.
2231 enum sym_type
2233 st_none,
2234 st_C_objprot, st_C_objimpl, st_C_objend,
2235 st_C_gnumacro,
2236 st_C_ignore, st_C_attribute,
2237 st_C_javastruct,
2238 st_C_operator,
2239 st_C_class, st_C_template,
2240 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2243 /* Feed stuff between (but not including) %[ and %] lines to:
2244 gperf -m 5
2246 %compare-strncmp
2247 %enum
2248 %struct-type
2249 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2251 if, 0, st_C_ignore
2252 for, 0, st_C_ignore
2253 while, 0, st_C_ignore
2254 switch, 0, st_C_ignore
2255 return, 0, st_C_ignore
2256 __attribute__, 0, st_C_attribute
2257 GTY, 0, st_C_attribute
2258 @interface, 0, st_C_objprot
2259 @protocol, 0, st_C_objprot
2260 @implementation,0, st_C_objimpl
2261 @end, 0, st_C_objend
2262 import, (C_JAVA & ~C_PLPL), st_C_ignore
2263 package, (C_JAVA & ~C_PLPL), st_C_ignore
2264 friend, C_PLPL, st_C_ignore
2265 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2266 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2267 interface, (C_JAVA & ~C_PLPL), st_C_struct
2268 class, 0, st_C_class
2269 namespace, C_PLPL, st_C_struct
2270 domain, C_STAR, st_C_struct
2271 union, 0, st_C_struct
2272 struct, 0, st_C_struct
2273 extern, 0, st_C_extern
2274 enum, 0, st_C_enum
2275 typedef, 0, st_C_typedef
2276 define, 0, st_C_define
2277 undef, 0, st_C_define
2278 operator, C_PLPL, st_C_operator
2279 template, 0, st_C_template
2280 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2281 DEFUN, 0, st_C_gnumacro
2282 SYSCALL, 0, st_C_gnumacro
2283 ENTRY, 0, st_C_gnumacro
2284 PSEUDO, 0, st_C_gnumacro
2285 # These are defined inside C functions, so currently they are not met.
2286 # EXFUN used in glibc, DEFVAR_* in emacs.
2287 #EXFUN, 0, st_C_gnumacro
2288 #DEFVAR_, 0, st_C_gnumacro
2290 and replace lines between %< and %> with its output, then:
2291 - remove the #if characterset check
2292 - make in_word_set static and not inline. */
2293 /*%<*/
2294 /* C code produced by gperf version 3.0.1 */
2295 /* Command-line: gperf -m 5 */
2296 /* Computed positions: -k'2-3' */
2298 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2299 /* maximum key range = 33, duplicates = 0 */
2301 static int
2302 hash (const char *str, int len)
2304 static char const asso_values[] =
2306 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2307 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2308 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2309 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2310 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2311 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2312 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2313 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2314 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2315 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2316 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2317 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2318 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2319 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2320 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2321 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2322 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2323 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2324 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2325 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2326 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2327 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2328 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2329 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2330 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2331 35, 35, 35, 35, 35, 35
2333 int hval = len;
2335 switch (hval)
2337 default:
2338 hval += asso_values[(unsigned char) str[2]];
2339 /*FALLTHROUGH*/
2340 case 2:
2341 hval += asso_values[(unsigned char) str[1]];
2342 break;
2344 return hval;
2347 static struct C_stab_entry *
2348 in_word_set (register const char *str, register unsigned int len)
2350 enum
2352 TOTAL_KEYWORDS = 33,
2353 MIN_WORD_LENGTH = 2,
2354 MAX_WORD_LENGTH = 15,
2355 MIN_HASH_VALUE = 2,
2356 MAX_HASH_VALUE = 34
2359 static struct C_stab_entry wordlist[] =
2361 {""}, {""},
2362 {"if", 0, st_C_ignore},
2363 {"GTY", 0, st_C_attribute},
2364 {"@end", 0, st_C_objend},
2365 {"union", 0, st_C_struct},
2366 {"define", 0, st_C_define},
2367 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2368 {"template", 0, st_C_template},
2369 {"operator", C_PLPL, st_C_operator},
2370 {"@interface", 0, st_C_objprot},
2371 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2372 {"friend", C_PLPL, st_C_ignore},
2373 {"typedef", 0, st_C_typedef},
2374 {"return", 0, st_C_ignore},
2375 {"@implementation",0, st_C_objimpl},
2376 {"@protocol", 0, st_C_objprot},
2377 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2378 {"extern", 0, st_C_extern},
2379 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2380 {"struct", 0, st_C_struct},
2381 {"domain", C_STAR, st_C_struct},
2382 {"switch", 0, st_C_ignore},
2383 {"enum", 0, st_C_enum},
2384 {"for", 0, st_C_ignore},
2385 {"namespace", C_PLPL, st_C_struct},
2386 {"class", 0, st_C_class},
2387 {"while", 0, st_C_ignore},
2388 {"undef", 0, st_C_define},
2389 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2390 {"__attribute__", 0, st_C_attribute},
2391 {"SYSCALL", 0, st_C_gnumacro},
2392 {"ENTRY", 0, st_C_gnumacro},
2393 {"PSEUDO", 0, st_C_gnumacro},
2394 {"DEFUN", 0, st_C_gnumacro}
2397 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2399 int key = hash (str, len);
2401 if (key <= MAX_HASH_VALUE && key >= 0)
2403 const char *s = wordlist[key].name;
2405 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2406 return &wordlist[key];
2409 return 0;
2411 /*%>*/
2413 static enum sym_type
2414 C_symtype (char *str, int len, int c_ext)
2416 register struct C_stab_entry *se = in_word_set (str, len);
2418 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2419 return st_none;
2420 return se->type;
2425 * Ignoring __attribute__ ((list))
2427 static bool inattribute; /* looking at an __attribute__ construct */
2430 * C functions and variables are recognized using a simple
2431 * finite automaton. fvdef is its state variable.
2433 static enum
2435 fvnone, /* nothing seen */
2436 fdefunkey, /* Emacs DEFUN keyword seen */
2437 fdefunname, /* Emacs DEFUN name seen */
2438 foperator, /* func: operator keyword seen (cplpl) */
2439 fvnameseen, /* function or variable name seen */
2440 fstartlist, /* func: just after open parenthesis */
2441 finlist, /* func: in parameter list */
2442 flistseen, /* func: after parameter list */
2443 fignore, /* func: before open brace */
2444 vignore /* var-like: ignore until ';' */
2445 } fvdef;
2447 static bool fvextern; /* func or var: extern keyword seen; */
2450 * typedefs are recognized using a simple finite automaton.
2451 * typdef is its state variable.
2453 static enum
2455 tnone, /* nothing seen */
2456 tkeyseen, /* typedef keyword seen */
2457 ttypeseen, /* defined type seen */
2458 tinbody, /* inside typedef body */
2459 tend, /* just before typedef tag */
2460 tignore /* junk after typedef tag */
2461 } typdef;
2464 * struct-like structures (enum, struct and union) are recognized
2465 * using another simple finite automaton. `structdef' is its state
2466 * variable.
2468 static enum
2470 snone, /* nothing seen yet,
2471 or in struct body if bracelev > 0 */
2472 skeyseen, /* struct-like keyword seen */
2473 stagseen, /* struct-like tag seen */
2474 scolonseen /* colon seen after struct-like tag */
2475 } structdef;
2478 * When objdef is different from onone, objtag is the name of the class.
2480 static const char *objtag = "<uninited>";
2483 * Yet another little state machine to deal with preprocessor lines.
2485 static enum
2487 dnone, /* nothing seen */
2488 dsharpseen, /* '#' seen as first char on line */
2489 ddefineseen, /* '#' and 'define' seen */
2490 dignorerest /* ignore rest of line */
2491 } definedef;
2494 * State machine for Objective C protocols and implementations.
2495 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2497 static enum
2499 onone, /* nothing seen */
2500 oprotocol, /* @interface or @protocol seen */
2501 oimplementation, /* @implementations seen */
2502 otagseen, /* class name seen */
2503 oparenseen, /* parenthesis before category seen */
2504 ocatseen, /* category name seen */
2505 oinbody, /* in @implementation body */
2506 omethodsign, /* in @implementation body, after +/- */
2507 omethodtag, /* after method name */
2508 omethodcolon, /* after method colon */
2509 omethodparm, /* after method parameter */
2510 oignore /* wait for @end */
2511 } objdef;
2515 * Use this structure to keep info about the token read, and how it
2516 * should be tagged. Used by the make_C_tag function to build a tag.
2518 static struct tok
2520 char *line; /* string containing the token */
2521 int offset; /* where the token starts in LINE */
2522 int length; /* token length */
2524 The previous members can be used to pass strings around for generic
2525 purposes. The following ones specifically refer to creating tags. In this
2526 case the token contained here is the pattern that will be used to create a
2527 tag.
2529 bool valid; /* do not create a tag; the token should be
2530 invalidated whenever a state machine is
2531 reset prematurely */
2532 bool named; /* create a named tag */
2533 int lineno; /* source line number of tag */
2534 long linepos; /* source char number of tag */
2535 } token; /* latest token read */
2538 * Variables and functions for dealing with nested structures.
2539 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2541 static void pushclass_above (int, char *, int);
2542 static void popclass_above (int);
2543 static void write_classname (linebuffer *, const char *qualifier);
2545 static struct {
2546 char **cname; /* nested class names */
2547 int *bracelev; /* nested class brace level */
2548 int nl; /* class nesting level (elements used) */
2549 int size; /* length of the array */
2550 } cstack; /* stack for nested declaration tags */
2551 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2552 #define nestlev (cstack.nl)
2553 /* After struct keyword or in struct body, not inside a nested function. */
2554 #define instruct (structdef == snone && nestlev > 0 \
2555 && bracelev == cstack.bracelev[nestlev-1] + 1)
2557 static void
2558 pushclass_above (int bracelev, char *str, int len)
2560 int nl;
2562 popclass_above (bracelev);
2563 nl = cstack.nl;
2564 if (nl >= cstack.size)
2566 int size = cstack.size *= 2;
2567 xrnew (cstack.cname, size, char *);
2568 xrnew (cstack.bracelev, size, int);
2570 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2571 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2572 cstack.bracelev[nl] = bracelev;
2573 cstack.nl = nl + 1;
2576 static void
2577 popclass_above (int bracelev)
2579 int nl;
2581 for (nl = cstack.nl - 1;
2582 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2583 nl--)
2585 free (cstack.cname[nl]);
2586 cstack.nl = nl;
2590 static void
2591 write_classname (linebuffer *cn, const char *qualifier)
2593 int i, len;
2594 int qlen = strlen (qualifier);
2596 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2598 len = 0;
2599 cn->len = 0;
2600 cn->buffer[0] = '\0';
2602 else
2604 len = strlen (cstack.cname[0]);
2605 linebuffer_setlen (cn, len);
2606 strcpy (cn->buffer, cstack.cname[0]);
2608 for (i = 1; i < cstack.nl; i++)
2610 char *s = cstack.cname[i];
2611 if (s == NULL)
2612 continue;
2613 linebuffer_setlen (cn, len + qlen + strlen (s));
2614 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2619 static bool consider_token (char *, int, int, int *, int, int, bool *);
2620 static void make_C_tag (bool);
2623 * consider_token ()
2624 * checks to see if the current token is at the start of a
2625 * function or variable, or corresponds to a typedef, or
2626 * is a struct/union/enum tag, or #define, or an enum constant.
2628 * *IS_FUNC gets TRUE if the token is a function or #define macro
2629 * with args. C_EXTP points to which language we are looking at.
2631 * Globals
2632 * fvdef IN OUT
2633 * structdef IN OUT
2634 * definedef IN OUT
2635 * typdef IN OUT
2636 * objdef IN OUT
2639 static bool
2640 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2641 /* IN: token pointer */
2642 /* IN: token length */
2643 /* IN: first char after the token */
2644 /* IN, OUT: C extensions mask */
2645 /* IN: brace level */
2646 /* IN: parenthesis level */
2647 /* OUT: function or variable found */
2649 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2650 structtype is the type of the preceding struct-like keyword, and
2651 structbracelev is the brace level where it has been seen. */
2652 static enum sym_type structtype;
2653 static int structbracelev;
2654 static enum sym_type toktype;
2657 toktype = C_symtype (str, len, *c_extp);
2660 * Skip __attribute__
2662 if (toktype == st_C_attribute)
2664 inattribute = TRUE;
2665 return FALSE;
2669 * Advance the definedef state machine.
2671 switch (definedef)
2673 case dnone:
2674 /* We're not on a preprocessor line. */
2675 if (toktype == st_C_gnumacro)
2677 fvdef = fdefunkey;
2678 return FALSE;
2680 break;
2681 case dsharpseen:
2682 if (toktype == st_C_define)
2684 definedef = ddefineseen;
2686 else
2688 definedef = dignorerest;
2690 return FALSE;
2691 case ddefineseen:
2693 * Make a tag for any macro, unless it is a constant
2694 * and constantypedefs is FALSE.
2696 definedef = dignorerest;
2697 *is_func_or_var = (c == '(');
2698 if (!*is_func_or_var && !constantypedefs)
2699 return FALSE;
2700 else
2701 return TRUE;
2702 case dignorerest:
2703 return FALSE;
2704 default:
2705 error ("internal error: definedef value.");
2709 * Now typedefs
2711 switch (typdef)
2713 case tnone:
2714 if (toktype == st_C_typedef)
2716 if (typedefs)
2717 typdef = tkeyseen;
2718 fvextern = FALSE;
2719 fvdef = fvnone;
2720 return FALSE;
2722 break;
2723 case tkeyseen:
2724 switch (toktype)
2726 case st_none:
2727 case st_C_class:
2728 case st_C_struct:
2729 case st_C_enum:
2730 typdef = ttypeseen;
2732 break;
2733 case ttypeseen:
2734 if (structdef == snone && fvdef == fvnone)
2736 fvdef = fvnameseen;
2737 return TRUE;
2739 break;
2740 case tend:
2741 switch (toktype)
2743 case st_C_class:
2744 case st_C_struct:
2745 case st_C_enum:
2746 return FALSE;
2748 return TRUE;
2751 switch (toktype)
2753 case st_C_javastruct:
2754 if (structdef == stagseen)
2755 structdef = scolonseen;
2756 return FALSE;
2757 case st_C_template:
2758 case st_C_class:
2759 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2760 && bracelev == 0
2761 && definedef == dnone && structdef == snone
2762 && typdef == tnone && fvdef == fvnone)
2763 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2764 if (toktype == st_C_template)
2765 break;
2766 /* FALLTHRU */
2767 case st_C_struct:
2768 case st_C_enum:
2769 if (parlev == 0
2770 && fvdef != vignore
2771 && (typdef == tkeyseen
2772 || (typedefs_or_cplusplus && structdef == snone)))
2774 structdef = skeyseen;
2775 structtype = toktype;
2776 structbracelev = bracelev;
2777 if (fvdef == fvnameseen)
2778 fvdef = fvnone;
2780 return FALSE;
2783 if (structdef == skeyseen)
2785 structdef = stagseen;
2786 return TRUE;
2789 if (typdef != tnone)
2790 definedef = dnone;
2792 /* Detect Objective C constructs. */
2793 switch (objdef)
2795 case onone:
2796 switch (toktype)
2798 case st_C_objprot:
2799 objdef = oprotocol;
2800 return FALSE;
2801 case st_C_objimpl:
2802 objdef = oimplementation;
2803 return FALSE;
2805 break;
2806 case oimplementation:
2807 /* Save the class tag for functions or variables defined inside. */
2808 objtag = savenstr (str, len);
2809 objdef = oinbody;
2810 return FALSE;
2811 case oprotocol:
2812 /* Save the class tag for categories. */
2813 objtag = savenstr (str, len);
2814 objdef = otagseen;
2815 *is_func_or_var = TRUE;
2816 return TRUE;
2817 case oparenseen:
2818 objdef = ocatseen;
2819 *is_func_or_var = TRUE;
2820 return TRUE;
2821 case oinbody:
2822 break;
2823 case omethodsign:
2824 if (parlev == 0)
2826 fvdef = fvnone;
2827 objdef = omethodtag;
2828 linebuffer_setlen (&token_name, len);
2829 memcpy (token_name.buffer, str, len);
2830 token_name.buffer[len] = '\0';
2831 return TRUE;
2833 return FALSE;
2834 case omethodcolon:
2835 if (parlev == 0)
2836 objdef = omethodparm;
2837 return FALSE;
2838 case omethodparm:
2839 if (parlev == 0)
2841 int oldlen = token_name.len;
2842 fvdef = fvnone;
2843 objdef = omethodtag;
2844 linebuffer_setlen (&token_name, oldlen + len);
2845 memcpy (token_name.buffer + oldlen, str, len);
2846 token_name.buffer[oldlen + len] = '\0';
2847 return TRUE;
2849 return FALSE;
2850 case oignore:
2851 if (toktype == st_C_objend)
2853 /* Memory leakage here: the string pointed by objtag is
2854 never released, because many tests would be needed to
2855 avoid breaking on incorrect input code. The amount of
2856 memory leaked here is the sum of the lengths of the
2857 class tags.
2858 free (objtag); */
2859 objdef = onone;
2861 return FALSE;
2864 /* A function, variable or enum constant? */
2865 switch (toktype)
2867 case st_C_extern:
2868 fvextern = TRUE;
2869 switch (fvdef)
2871 case finlist:
2872 case flistseen:
2873 case fignore:
2874 case vignore:
2875 break;
2876 default:
2877 fvdef = fvnone;
2879 return FALSE;
2880 case st_C_ignore:
2881 fvextern = FALSE;
2882 fvdef = vignore;
2883 return FALSE;
2884 case st_C_operator:
2885 fvdef = foperator;
2886 *is_func_or_var = TRUE;
2887 return TRUE;
2888 case st_none:
2889 if (constantypedefs
2890 && structdef == snone
2891 && structtype == st_C_enum && bracelev > structbracelev)
2892 return TRUE; /* enum constant */
2893 switch (fvdef)
2895 case fdefunkey:
2896 if (bracelev > 0)
2897 break;
2898 fvdef = fdefunname; /* GNU macro */
2899 *is_func_or_var = TRUE;
2900 return TRUE;
2901 case fvnone:
2902 switch (typdef)
2904 case ttypeseen:
2905 return FALSE;
2906 case tnone:
2907 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2908 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2910 fvdef = vignore;
2911 return FALSE;
2913 break;
2915 /* FALLTHRU */
2916 case fvnameseen:
2917 if (len >= 10 && strneq (str+len-10, "::operator", 10))
2919 if (*c_extp & C_AUTO) /* automatic detection of C++ */
2920 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2921 fvdef = foperator;
2922 *is_func_or_var = TRUE;
2923 return TRUE;
2925 if (bracelev > 0 && !instruct)
2926 break;
2927 fvdef = fvnameseen; /* function or variable */
2928 *is_func_or_var = TRUE;
2929 return TRUE;
2931 break;
2934 return FALSE;
2939 * C_entries often keeps pointers to tokens or lines which are older than
2940 * the line currently read. By keeping two line buffers, and switching
2941 * them at end of line, it is possible to use those pointers.
2943 static struct
2945 long linepos;
2946 linebuffer lb;
2947 } lbs[2];
2949 #define current_lb_is_new (newndx == curndx)
2950 #define switch_line_buffers() (curndx = 1 - curndx)
2952 #define curlb (lbs[curndx].lb)
2953 #define newlb (lbs[newndx].lb)
2954 #define curlinepos (lbs[curndx].linepos)
2955 #define newlinepos (lbs[newndx].linepos)
2957 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2958 #define cplpl (c_ext & C_PLPL)
2959 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2961 #define CNL_SAVE_DEFINEDEF() \
2962 do { \
2963 curlinepos = charno; \
2964 readline (&curlb, inf); \
2965 lp = curlb.buffer; \
2966 quotednl = FALSE; \
2967 newndx = curndx; \
2968 } while (0)
2970 #define CNL() \
2971 do { \
2972 CNL_SAVE_DEFINEDEF(); \
2973 if (savetoken.valid) \
2975 token = savetoken; \
2976 savetoken.valid = FALSE; \
2978 definedef = dnone; \
2979 } while (0)
2982 static void
2983 make_C_tag (int isfun)
2985 /* This function is never called when token.valid is FALSE, but
2986 we must protect against invalid input or internal errors. */
2987 if (token.valid)
2988 make_tag (token_name.buffer, token_name.len, isfun, token.line,
2989 token.offset+token.length+1, token.lineno, token.linepos);
2990 else if (DEBUG)
2991 { /* this branch is optimized away if !DEBUG */
2992 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
2993 token_name.len + 17, isfun, token.line,
2994 token.offset+token.length+1, token.lineno, token.linepos);
2995 error ("INVALID TOKEN");
2998 token.valid = FALSE;
3003 * C_entries ()
3004 * This routine finds functions, variables, typedefs,
3005 * #define's, enum constants and struct/union/enum definitions in
3006 * C syntax and adds them to the list.
3008 static void
3009 C_entries (int c_ext, FILE *inf)
3010 /* extension of C */
3011 /* input file */
3013 register char c; /* latest char read; '\0' for end of line */
3014 register char *lp; /* pointer one beyond the character `c' */
3015 int curndx, newndx; /* indices for current and new lb */
3016 register int tokoff; /* offset in line of start of current token */
3017 register int toklen; /* length of current token */
3018 const char *qualifier; /* string used to qualify names */
3019 int qlen; /* length of qualifier */
3020 int bracelev; /* current brace level */
3021 int bracketlev; /* current bracket level */
3022 int parlev; /* current parenthesis level */
3023 int attrparlev; /* __attribute__ parenthesis level */
3024 int templatelev; /* current template level */
3025 int typdefbracelev; /* bracelev where a typedef struct body begun */
3026 bool incomm, inquote, inchar, quotednl, midtoken;
3027 bool yacc_rules; /* in the rules part of a yacc file */
3028 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3031 linebuffer_init (&lbs[0].lb);
3032 linebuffer_init (&lbs[1].lb);
3033 if (cstack.size == 0)
3035 cstack.size = (DEBUG) ? 1 : 4;
3036 cstack.nl = 0;
3037 cstack.cname = xnew (cstack.size, char *);
3038 cstack.bracelev = xnew (cstack.size, int);
3041 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3042 curndx = newndx = 0;
3043 lp = curlb.buffer;
3044 *lp = 0;
3046 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3047 structdef = snone; definedef = dnone; objdef = onone;
3048 yacc_rules = FALSE;
3049 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3050 token.valid = savetoken.valid = FALSE;
3051 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3052 if (cjava)
3053 { qualifier = "."; qlen = 1; }
3054 else
3055 { qualifier = "::"; qlen = 2; }
3058 while (!feof (inf))
3060 c = *lp++;
3061 if (c == '\\')
3063 /* If we are at the end of the line, the next character is a
3064 '\0'; do not skip it, because it is what tells us
3065 to read the next line. */
3066 if (*lp == '\0')
3068 quotednl = TRUE;
3069 continue;
3071 lp++;
3072 c = ' ';
3074 else if (incomm)
3076 switch (c)
3078 case '*':
3079 if (*lp == '/')
3081 c = *lp++;
3082 incomm = FALSE;
3084 break;
3085 case '\0':
3086 /* Newlines inside comments do not end macro definitions in
3087 traditional cpp. */
3088 CNL_SAVE_DEFINEDEF ();
3089 break;
3091 continue;
3093 else if (inquote)
3095 switch (c)
3097 case '"':
3098 inquote = FALSE;
3099 break;
3100 case '\0':
3101 /* Newlines inside strings do not end macro definitions
3102 in traditional cpp, even though compilers don't
3103 usually accept them. */
3104 CNL_SAVE_DEFINEDEF ();
3105 break;
3107 continue;
3109 else if (inchar)
3111 switch (c)
3113 case '\0':
3114 /* Hmmm, something went wrong. */
3115 CNL ();
3116 /* FALLTHRU */
3117 case '\'':
3118 inchar = FALSE;
3119 break;
3121 continue;
3123 else switch (c)
3125 case '"':
3126 inquote = TRUE;
3127 if (bracketlev > 0)
3128 continue;
3129 if (inattribute)
3130 break;
3131 switch (fvdef)
3133 case fdefunkey:
3134 case fstartlist:
3135 case finlist:
3136 case fignore:
3137 case vignore:
3138 break;
3139 default:
3140 fvextern = FALSE;
3141 fvdef = fvnone;
3143 continue;
3144 case '\'':
3145 inchar = TRUE;
3146 if (bracketlev > 0)
3147 continue;
3148 if (inattribute)
3149 break;
3150 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3152 fvextern = FALSE;
3153 fvdef = fvnone;
3155 continue;
3156 case '/':
3157 if (*lp == '*')
3159 incomm = TRUE;
3160 lp++;
3161 c = ' ';
3162 if (bracketlev > 0)
3163 continue;
3165 else if (/* cplpl && */ *lp == '/')
3167 c = '\0';
3169 break;
3170 case '%':
3171 if ((c_ext & YACC) && *lp == '%')
3173 /* Entering or exiting rules section in yacc file. */
3174 lp++;
3175 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3176 typdef = tnone; structdef = snone;
3177 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3178 bracelev = 0;
3179 yacc_rules = !yacc_rules;
3180 continue;
3182 else
3183 break;
3184 case '#':
3185 if (definedef == dnone)
3187 char *cp;
3188 bool cpptoken = TRUE;
3190 /* Look back on this line. If all blanks, or nonblanks
3191 followed by an end of comment, this is a preprocessor
3192 token. */
3193 for (cp = newlb.buffer; cp < lp-1; cp++)
3194 if (!iswhite (*cp))
3196 if (*cp == '*' && cp[1] == '/')
3198 cp++;
3199 cpptoken = TRUE;
3201 else
3202 cpptoken = FALSE;
3204 if (cpptoken)
3205 definedef = dsharpseen;
3206 } /* if (definedef == dnone) */
3207 continue;
3208 case '[':
3209 bracketlev++;
3210 continue;
3211 default:
3212 if (bracketlev > 0)
3214 if (c == ']')
3215 --bracketlev;
3216 else if (c == '\0')
3217 CNL_SAVE_DEFINEDEF ();
3218 continue;
3220 break;
3221 } /* switch (c) */
3224 /* Consider token only if some involved conditions are satisfied. */
3225 if (typdef != tignore
3226 && definedef != dignorerest
3227 && fvdef != finlist
3228 && templatelev == 0
3229 && (definedef != dnone
3230 || structdef != scolonseen)
3231 && !inattribute)
3233 if (midtoken)
3235 if (endtoken (c))
3237 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3238 /* This handles :: in the middle,
3239 but not at the beginning of an identifier.
3240 Also, space-separated :: is not recognized. */
3242 if (c_ext & C_AUTO) /* automatic detection of C++ */
3243 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3244 lp += 2;
3245 toklen += 2;
3246 c = lp[-1];
3247 goto still_in_token;
3249 else
3251 bool funorvar = FALSE;
3253 if (yacc_rules
3254 || consider_token (newlb.buffer + tokoff, toklen, c,
3255 &c_ext, bracelev, parlev,
3256 &funorvar))
3258 if (fvdef == foperator)
3260 char *oldlp = lp;
3261 lp = skip_spaces (lp-1);
3262 if (*lp != '\0')
3263 lp += 1;
3264 while (*lp != '\0'
3265 && !iswhite (*lp) && *lp != '(')
3266 lp += 1;
3267 c = *lp++;
3268 toklen += lp - oldlp;
3270 token.named = FALSE;
3271 if (!plainc
3272 && nestlev > 0 && definedef == dnone)
3273 /* in struct body */
3275 int len;
3276 write_classname (&token_name, qualifier);
3277 len = token_name.len;
3278 linebuffer_setlen (&token_name, len+qlen+toklen);
3279 sprintf (token_name.buffer + len, "%s%.*s",
3280 qualifier, toklen, newlb.buffer + tokoff);
3281 token.named = TRUE;
3283 else if (objdef == ocatseen)
3284 /* Objective C category */
3286 int len = strlen (objtag) + 2 + toklen;
3287 linebuffer_setlen (&token_name, len);
3288 sprintf (token_name.buffer, "%s(%.*s)",
3289 objtag, toklen, newlb.buffer + tokoff);
3290 token.named = TRUE;
3292 else if (objdef == omethodtag
3293 || objdef == omethodparm)
3294 /* Objective C method */
3296 token.named = TRUE;
3298 else if (fvdef == fdefunname)
3299 /* GNU DEFUN and similar macros */
3301 bool defun = (newlb.buffer[tokoff] == 'F');
3302 int off = tokoff;
3303 int len = toklen;
3305 /* Rewrite the tag so that emacs lisp DEFUNs
3306 can be found by their elisp name */
3307 if (defun)
3309 off += 1;
3310 len -= 1;
3312 linebuffer_setlen (&token_name, len);
3313 memcpy (token_name.buffer,
3314 newlb.buffer + off, len);
3315 token_name.buffer[len] = '\0';
3316 if (defun)
3317 while (--len >= 0)
3318 if (token_name.buffer[len] == '_')
3319 token_name.buffer[len] = '-';
3320 token.named = defun;
3322 else
3324 linebuffer_setlen (&token_name, toklen);
3325 memcpy (token_name.buffer,
3326 newlb.buffer + tokoff, toklen);
3327 token_name.buffer[toklen] = '\0';
3328 /* Name macros and members. */
3329 token.named = (structdef == stagseen
3330 || typdef == ttypeseen
3331 || typdef == tend
3332 || (funorvar
3333 && definedef == dignorerest)
3334 || (funorvar
3335 && definedef == dnone
3336 && structdef == snone
3337 && bracelev > 0));
3339 token.lineno = lineno;
3340 token.offset = tokoff;
3341 token.length = toklen;
3342 token.line = newlb.buffer;
3343 token.linepos = newlinepos;
3344 token.valid = TRUE;
3346 if (definedef == dnone
3347 && (fvdef == fvnameseen
3348 || fvdef == foperator
3349 || structdef == stagseen
3350 || typdef == tend
3351 || typdef == ttypeseen
3352 || objdef != onone))
3354 if (current_lb_is_new)
3355 switch_line_buffers ();
3357 else if (definedef != dnone
3358 || fvdef == fdefunname
3359 || instruct)
3360 make_C_tag (funorvar);
3362 else /* not yacc and consider_token failed */
3364 if (inattribute && fvdef == fignore)
3366 /* We have just met __attribute__ after a
3367 function parameter list: do not tag the
3368 function again. */
3369 fvdef = fvnone;
3372 midtoken = FALSE;
3374 } /* if (endtoken (c)) */
3375 else if (intoken (c))
3376 still_in_token:
3378 toklen++;
3379 continue;
3381 } /* if (midtoken) */
3382 else if (begtoken (c))
3384 switch (definedef)
3386 case dnone:
3387 switch (fvdef)
3389 case fstartlist:
3390 /* This prevents tagging fb in
3391 void (__attribute__((noreturn)) *fb) (void);
3392 Fixing this is not easy and not very important. */
3393 fvdef = finlist;
3394 continue;
3395 case flistseen:
3396 if (plainc || declarations)
3398 make_C_tag (TRUE); /* a function */
3399 fvdef = fignore;
3401 break;
3403 if (structdef == stagseen && !cjava)
3405 popclass_above (bracelev);
3406 structdef = snone;
3408 break;
3409 case dsharpseen:
3410 savetoken = token;
3411 break;
3413 if (!yacc_rules || lp == newlb.buffer + 1)
3415 tokoff = lp - 1 - newlb.buffer;
3416 toklen = 1;
3417 midtoken = TRUE;
3419 continue;
3420 } /* if (begtoken) */
3421 } /* if must look at token */
3424 /* Detect end of line, colon, comma, semicolon and various braces
3425 after having handled a token.*/
3426 switch (c)
3428 case ':':
3429 if (inattribute)
3430 break;
3431 if (yacc_rules && token.offset == 0 && token.valid)
3433 make_C_tag (FALSE); /* a yacc function */
3434 break;
3436 if (definedef != dnone)
3437 break;
3438 switch (objdef)
3440 case otagseen:
3441 objdef = oignore;
3442 make_C_tag (TRUE); /* an Objective C class */
3443 break;
3444 case omethodtag:
3445 case omethodparm:
3446 objdef = omethodcolon;
3447 linebuffer_setlen (&token_name, token_name.len + 1);
3448 strcat (token_name.buffer, ":");
3449 break;
3451 if (structdef == stagseen)
3453 structdef = scolonseen;
3454 break;
3456 /* Should be useless, but may be work as a safety net. */
3457 if (cplpl && fvdef == flistseen)
3459 make_C_tag (TRUE); /* a function */
3460 fvdef = fignore;
3461 break;
3463 break;
3464 case ';':
3465 if (definedef != dnone || inattribute)
3466 break;
3467 switch (typdef)
3469 case tend:
3470 case ttypeseen:
3471 make_C_tag (FALSE); /* a typedef */
3472 typdef = tnone;
3473 fvdef = fvnone;
3474 break;
3475 case tnone:
3476 case tinbody:
3477 case tignore:
3478 switch (fvdef)
3480 case fignore:
3481 if (typdef == tignore || cplpl)
3482 fvdef = fvnone;
3483 break;
3484 case fvnameseen:
3485 if ((globals && bracelev == 0 && (!fvextern || declarations))
3486 || (members && instruct))
3487 make_C_tag (FALSE); /* a variable */
3488 fvextern = FALSE;
3489 fvdef = fvnone;
3490 token.valid = FALSE;
3491 break;
3492 case flistseen:
3493 if ((declarations
3494 && (cplpl || !instruct)
3495 && (typdef == tnone || (typdef != tignore && instruct)))
3496 || (members
3497 && plainc && instruct))
3498 make_C_tag (TRUE); /* a function */
3499 /* FALLTHRU */
3500 default:
3501 fvextern = FALSE;
3502 fvdef = fvnone;
3503 if (declarations
3504 && cplpl && structdef == stagseen)
3505 make_C_tag (FALSE); /* forward declaration */
3506 else
3507 token.valid = FALSE;
3508 } /* switch (fvdef) */
3509 /* FALLTHRU */
3510 default:
3511 if (!instruct)
3512 typdef = tnone;
3514 if (structdef == stagseen)
3515 structdef = snone;
3516 break;
3517 case ',':
3518 if (definedef != dnone || inattribute)
3519 break;
3520 switch (objdef)
3522 case omethodtag:
3523 case omethodparm:
3524 make_C_tag (TRUE); /* an Objective C method */
3525 objdef = oinbody;
3526 break;
3528 switch (fvdef)
3530 case fdefunkey:
3531 case foperator:
3532 case fstartlist:
3533 case finlist:
3534 case fignore:
3535 case vignore:
3536 break;
3537 case fdefunname:
3538 fvdef = fignore;
3539 break;
3540 case fvnameseen:
3541 if (parlev == 0
3542 && ((globals
3543 && bracelev == 0
3544 && templatelev == 0
3545 && (!fvextern || declarations))
3546 || (members && instruct)))
3547 make_C_tag (FALSE); /* a variable */
3548 break;
3549 case flistseen:
3550 if ((declarations && typdef == tnone && !instruct)
3551 || (members && typdef != tignore && instruct))
3553 make_C_tag (TRUE); /* a function */
3554 fvdef = fvnameseen;
3556 else if (!declarations)
3557 fvdef = fvnone;
3558 token.valid = FALSE;
3559 break;
3560 default:
3561 fvdef = fvnone;
3563 if (structdef == stagseen)
3564 structdef = snone;
3565 break;
3566 case ']':
3567 if (definedef != dnone || inattribute)
3568 break;
3569 if (structdef == stagseen)
3570 structdef = snone;
3571 switch (typdef)
3573 case ttypeseen:
3574 case tend:
3575 typdef = tignore;
3576 make_C_tag (FALSE); /* a typedef */
3577 break;
3578 case tnone:
3579 case tinbody:
3580 switch (fvdef)
3582 case foperator:
3583 case finlist:
3584 case fignore:
3585 case vignore:
3586 break;
3587 case fvnameseen:
3588 if ((members && bracelev == 1)
3589 || (globals && bracelev == 0
3590 && (!fvextern || declarations)))
3591 make_C_tag (FALSE); /* a variable */
3592 /* FALLTHRU */
3593 default:
3594 fvdef = fvnone;
3596 break;
3598 break;
3599 case '(':
3600 if (inattribute)
3602 attrparlev++;
3603 break;
3605 if (definedef != dnone)
3606 break;
3607 if (objdef == otagseen && parlev == 0)
3608 objdef = oparenseen;
3609 switch (fvdef)
3611 case fvnameseen:
3612 if (typdef == ttypeseen
3613 && *lp != '*'
3614 && !instruct)
3616 /* This handles constructs like:
3617 typedef void OperatorFun (int fun); */
3618 make_C_tag (FALSE);
3619 typdef = tignore;
3620 fvdef = fignore;
3621 break;
3623 /* FALLTHRU */
3624 case foperator:
3625 fvdef = fstartlist;
3626 break;
3627 case flistseen:
3628 fvdef = finlist;
3629 break;
3631 parlev++;
3632 break;
3633 case ')':
3634 if (inattribute)
3636 if (--attrparlev == 0)
3637 inattribute = FALSE;
3638 break;
3640 if (definedef != dnone)
3641 break;
3642 if (objdef == ocatseen && parlev == 1)
3644 make_C_tag (TRUE); /* an Objective C category */
3645 objdef = oignore;
3647 if (--parlev == 0)
3649 switch (fvdef)
3651 case fstartlist:
3652 case finlist:
3653 fvdef = flistseen;
3654 break;
3656 if (!instruct
3657 && (typdef == tend
3658 || typdef == ttypeseen))
3660 typdef = tignore;
3661 make_C_tag (FALSE); /* a typedef */
3664 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3665 parlev = 0;
3666 break;
3667 case '{':
3668 if (definedef != dnone)
3669 break;
3670 if (typdef == ttypeseen)
3672 /* Whenever typdef is set to tinbody (currently only
3673 here), typdefbracelev should be set to bracelev. */
3674 typdef = tinbody;
3675 typdefbracelev = bracelev;
3677 switch (fvdef)
3679 case flistseen:
3680 make_C_tag (TRUE); /* a function */
3681 /* FALLTHRU */
3682 case fignore:
3683 fvdef = fvnone;
3684 break;
3685 case fvnone:
3686 switch (objdef)
3688 case otagseen:
3689 make_C_tag (TRUE); /* an Objective C class */
3690 objdef = oignore;
3691 break;
3692 case omethodtag:
3693 case omethodparm:
3694 make_C_tag (TRUE); /* an Objective C method */
3695 objdef = oinbody;
3696 break;
3697 default:
3698 /* Neutralize `extern "C" {' grot. */
3699 if (bracelev == 0 && structdef == snone && nestlev == 0
3700 && typdef == tnone)
3701 bracelev = -1;
3703 break;
3705 switch (structdef)
3707 case skeyseen: /* unnamed struct */
3708 pushclass_above (bracelev, NULL, 0);
3709 structdef = snone;
3710 break;
3711 case stagseen: /* named struct or enum */
3712 case scolonseen: /* a class */
3713 pushclass_above (bracelev,token.line+token.offset, token.length);
3714 structdef = snone;
3715 make_C_tag (FALSE); /* a struct or enum */
3716 break;
3718 bracelev += 1;
3719 break;
3720 case '*':
3721 if (definedef != dnone)
3722 break;
3723 if (fvdef == fstartlist)
3725 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3726 token.valid = FALSE;
3728 break;
3729 case '}':
3730 if (definedef != dnone)
3731 break;
3732 bracelev -= 1;
3733 if (!ignoreindent && lp == newlb.buffer + 1)
3735 if (bracelev != 0)
3736 token.valid = FALSE; /* unexpected value, token unreliable */
3737 bracelev = 0; /* reset brace level if first column */
3738 parlev = 0; /* also reset paren level, just in case... */
3740 else if (bracelev < 0)
3742 token.valid = FALSE; /* something gone amiss, token unreliable */
3743 bracelev = 0;
3745 if (bracelev == 0 && fvdef == vignore)
3746 fvdef = fvnone; /* end of function */
3747 popclass_above (bracelev);
3748 structdef = snone;
3749 /* Only if typdef == tinbody is typdefbracelev significant. */
3750 if (typdef == tinbody && bracelev <= typdefbracelev)
3752 assert (bracelev == typdefbracelev);
3753 typdef = tend;
3755 break;
3756 case '=':
3757 if (definedef != dnone)
3758 break;
3759 switch (fvdef)
3761 case foperator:
3762 case finlist:
3763 case fignore:
3764 case vignore:
3765 break;
3766 case fvnameseen:
3767 if ((members && bracelev == 1)
3768 || (globals && bracelev == 0 && (!fvextern || declarations)))
3769 make_C_tag (FALSE); /* a variable */
3770 /* FALLTHRU */
3771 default:
3772 fvdef = vignore;
3774 break;
3775 case '<':
3776 if (cplpl
3777 && (structdef == stagseen || fvdef == fvnameseen))
3779 templatelev++;
3780 break;
3782 goto resetfvdef;
3783 case '>':
3784 if (templatelev > 0)
3786 templatelev--;
3787 break;
3789 goto resetfvdef;
3790 case '+':
3791 case '-':
3792 if (objdef == oinbody && bracelev == 0)
3794 objdef = omethodsign;
3795 break;
3797 /* FALLTHRU */
3798 resetfvdef:
3799 case '#': case '~': case '&': case '%': case '/':
3800 case '|': case '^': case '!': case '.': case '?':
3801 if (definedef != dnone)
3802 break;
3803 /* These surely cannot follow a function tag in C. */
3804 switch (fvdef)
3806 case foperator:
3807 case finlist:
3808 case fignore:
3809 case vignore:
3810 break;
3811 default:
3812 fvdef = fvnone;
3814 break;
3815 case '\0':
3816 if (objdef == otagseen)
3818 make_C_tag (TRUE); /* an Objective C class */
3819 objdef = oignore;
3821 /* If a macro spans multiple lines don't reset its state. */
3822 if (quotednl)
3823 CNL_SAVE_DEFINEDEF ();
3824 else
3825 CNL ();
3826 break;
3827 } /* switch (c) */
3829 } /* while not eof */
3831 free (lbs[0].lb.buffer);
3832 free (lbs[1].lb.buffer);
3836 * Process either a C++ file or a C file depending on the setting
3837 * of a global flag.
3839 static void
3840 default_C_entries (FILE *inf)
3842 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3845 /* Always do plain C. */
3846 static void
3847 plain_C_entries (FILE *inf)
3849 C_entries (0, inf);
3852 /* Always do C++. */
3853 static void
3854 Cplusplus_entries (FILE *inf)
3856 C_entries (C_PLPL, inf);
3859 /* Always do Java. */
3860 static void
3861 Cjava_entries (FILE *inf)
3863 C_entries (C_JAVA, inf);
3866 /* Always do C*. */
3867 static void
3868 Cstar_entries (FILE *inf)
3870 C_entries (C_STAR, inf);
3873 /* Always do Yacc. */
3874 static void
3875 Yacc_entries (FILE *inf)
3877 C_entries (YACC, inf);
3881 /* Useful macros. */
3882 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3883 for (; /* loop initialization */ \
3884 !feof (file_pointer) /* loop test */ \
3885 && /* instructions at start of loop */ \
3886 (readline (&line_buffer, file_pointer), \
3887 char_pointer = line_buffer.buffer, \
3888 TRUE); \
3891 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
3892 ((assert ("" kw), TRUE) /* syntax error if not a literal string */ \
3893 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3894 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
3895 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3897 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3898 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3899 ((assert ("" kw), TRUE) /* syntax error if not a literal string */ \
3900 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3901 && ((cp) += sizeof (kw)-1)) /* skip spaces */
3904 * Read a file, but do no processing. This is used to do regexp
3905 * matching on files that have no language defined.
3907 static void
3908 just_read_file (FILE *inf)
3910 while (!feof (inf))
3911 readline (&lb, inf);
3915 /* Fortran parsing */
3917 static void F_takeprec (void);
3918 static void F_getit (FILE *);
3920 static void
3921 F_takeprec (void)
3923 dbp = skip_spaces (dbp);
3924 if (*dbp != '*')
3925 return;
3926 dbp++;
3927 dbp = skip_spaces (dbp);
3928 if (strneq (dbp, "(*)", 3))
3930 dbp += 3;
3931 return;
3933 if (!ISDIGIT (*dbp))
3935 --dbp; /* force failure */
3936 return;
3939 dbp++;
3940 while (ISDIGIT (*dbp));
3943 static void
3944 F_getit (FILE *inf)
3946 register char *cp;
3948 dbp = skip_spaces (dbp);
3949 if (*dbp == '\0')
3951 readline (&lb, inf);
3952 dbp = lb.buffer;
3953 if (dbp[5] != '&')
3954 return;
3955 dbp += 6;
3956 dbp = skip_spaces (dbp);
3958 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3959 return;
3960 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3961 continue;
3962 make_tag (dbp, cp-dbp, TRUE,
3963 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3967 static void
3968 Fortran_functions (FILE *inf)
3970 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3972 if (*dbp == '%')
3973 dbp++; /* Ratfor escape to fortran */
3974 dbp = skip_spaces (dbp);
3975 if (*dbp == '\0')
3976 continue;
3978 if (LOOKING_AT_NOCASE (dbp, "recursive"))
3979 dbp = skip_spaces (dbp);
3981 if (LOOKING_AT_NOCASE (dbp, "pure"))
3982 dbp = skip_spaces (dbp);
3984 if (LOOKING_AT_NOCASE (dbp, "elemental"))
3985 dbp = skip_spaces (dbp);
3987 switch (lowcase (*dbp))
3989 case 'i':
3990 if (nocase_tail ("integer"))
3991 F_takeprec ();
3992 break;
3993 case 'r':
3994 if (nocase_tail ("real"))
3995 F_takeprec ();
3996 break;
3997 case 'l':
3998 if (nocase_tail ("logical"))
3999 F_takeprec ();
4000 break;
4001 case 'c':
4002 if (nocase_tail ("complex") || nocase_tail ("character"))
4003 F_takeprec ();
4004 break;
4005 case 'd':
4006 if (nocase_tail ("double"))
4008 dbp = skip_spaces (dbp);
4009 if (*dbp == '\0')
4010 continue;
4011 if (nocase_tail ("precision"))
4012 break;
4013 continue;
4015 break;
4017 dbp = skip_spaces (dbp);
4018 if (*dbp == '\0')
4019 continue;
4020 switch (lowcase (*dbp))
4022 case 'f':
4023 if (nocase_tail ("function"))
4024 F_getit (inf);
4025 continue;
4026 case 's':
4027 if (nocase_tail ("subroutine"))
4028 F_getit (inf);
4029 continue;
4030 case 'e':
4031 if (nocase_tail ("entry"))
4032 F_getit (inf);
4033 continue;
4034 case 'b':
4035 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4037 dbp = skip_spaces (dbp);
4038 if (*dbp == '\0') /* assume un-named */
4039 make_tag ("blockdata", 9, TRUE,
4040 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4041 else
4042 F_getit (inf); /* look for name */
4044 continue;
4051 * Ada parsing
4052 * Original code by
4053 * Philippe Waroquiers (1998)
4056 /* Once we are positioned after an "interesting" keyword, let's get
4057 the real tag value necessary. */
4058 static void
4059 Ada_getit (FILE *inf, const char *name_qualifier)
4061 register char *cp;
4062 char *name;
4063 char c;
4065 while (!feof (inf))
4067 dbp = skip_spaces (dbp);
4068 if (*dbp == '\0'
4069 || (dbp[0] == '-' && dbp[1] == '-'))
4071 readline (&lb, inf);
4072 dbp = lb.buffer;
4074 switch (lowcase (*dbp))
4076 case 'b':
4077 if (nocase_tail ("body"))
4079 /* Skipping body of procedure body or package body or ....
4080 resetting qualifier to body instead of spec. */
4081 name_qualifier = "/b";
4082 continue;
4084 break;
4085 case 't':
4086 /* Skipping type of task type or protected type ... */
4087 if (nocase_tail ("type"))
4088 continue;
4089 break;
4091 if (*dbp == '"')
4093 dbp += 1;
4094 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4095 continue;
4097 else
4099 dbp = skip_spaces (dbp);
4100 for (cp = dbp;
4101 (*cp != '\0'
4102 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4103 cp++)
4104 continue;
4105 if (cp == dbp)
4106 return;
4108 c = *cp;
4109 *cp = '\0';
4110 name = concat (dbp, name_qualifier, "");
4111 *cp = c;
4112 make_tag (name, strlen (name), TRUE,
4113 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4114 free (name);
4115 if (c == '"')
4116 dbp = cp + 1;
4117 return;
4121 static void
4122 Ada_funcs (FILE *inf)
4124 bool inquote = FALSE;
4125 bool skip_till_semicolumn = FALSE;
4127 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4129 while (*dbp != '\0')
4131 /* Skip a string i.e. "abcd". */
4132 if (inquote || (*dbp == '"'))
4134 dbp = etags_strchr (dbp + !inquote, '"');
4135 if (dbp != NULL)
4137 inquote = FALSE;
4138 dbp += 1;
4139 continue; /* advance char */
4141 else
4143 inquote = TRUE;
4144 break; /* advance line */
4148 /* Skip comments. */
4149 if (dbp[0] == '-' && dbp[1] == '-')
4150 break; /* advance line */
4152 /* Skip character enclosed in single quote i.e. 'a'
4153 and skip single quote starting an attribute i.e. 'Image. */
4154 if (*dbp == '\'')
4156 dbp++ ;
4157 if (*dbp != '\0')
4158 dbp++;
4159 continue;
4162 if (skip_till_semicolumn)
4164 if (*dbp == ';')
4165 skip_till_semicolumn = FALSE;
4166 dbp++;
4167 continue; /* advance char */
4170 /* Search for beginning of a token. */
4171 if (!begtoken (*dbp))
4173 dbp++;
4174 continue; /* advance char */
4177 /* We are at the beginning of a token. */
4178 switch (lowcase (*dbp))
4180 case 'f':
4181 if (!packages_only && nocase_tail ("function"))
4182 Ada_getit (inf, "/f");
4183 else
4184 break; /* from switch */
4185 continue; /* advance char */
4186 case 'p':
4187 if (!packages_only && nocase_tail ("procedure"))
4188 Ada_getit (inf, "/p");
4189 else if (nocase_tail ("package"))
4190 Ada_getit (inf, "/s");
4191 else if (nocase_tail ("protected")) /* protected type */
4192 Ada_getit (inf, "/t");
4193 else
4194 break; /* from switch */
4195 continue; /* advance char */
4197 case 'u':
4198 if (typedefs && !packages_only && nocase_tail ("use"))
4200 /* when tagging types, avoid tagging use type Pack.Typename;
4201 for this, we will skip everything till a ; */
4202 skip_till_semicolumn = TRUE;
4203 continue; /* advance char */
4206 case 't':
4207 if (!packages_only && nocase_tail ("task"))
4208 Ada_getit (inf, "/k");
4209 else if (typedefs && !packages_only && nocase_tail ("type"))
4211 Ada_getit (inf, "/t");
4212 while (*dbp != '\0')
4213 dbp += 1;
4215 else
4216 break; /* from switch */
4217 continue; /* advance char */
4220 /* Look for the end of the token. */
4221 while (!endtoken (*dbp))
4222 dbp++;
4224 } /* advance char */
4225 } /* advance line */
4230 * Unix and microcontroller assembly tag handling
4231 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4232 * Idea by Bob Weiner, Motorola Inc. (1994)
4234 static void
4235 Asm_labels (FILE *inf)
4237 register char *cp;
4239 LOOP_ON_INPUT_LINES (inf, lb, cp)
4241 /* If first char is alphabetic or one of [_.$], test for colon
4242 following identifier. */
4243 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4245 /* Read past label. */
4246 cp++;
4247 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4248 cp++;
4249 if (*cp == ':' || iswhite (*cp))
4250 /* Found end of label, so copy it and add it to the table. */
4251 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4252 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4259 * Perl support
4260 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4261 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4262 * Perl variable names: /^(my|local).../
4263 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4264 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4265 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4267 static void
4268 Perl_functions (FILE *inf)
4270 char *package = savestr ("main"); /* current package name */
4271 register char *cp;
4273 LOOP_ON_INPUT_LINES (inf, lb, cp)
4275 cp = skip_spaces (cp);
4277 if (LOOKING_AT (cp, "package"))
4279 free (package);
4280 get_tag (cp, &package);
4282 else if (LOOKING_AT (cp, "sub"))
4284 char *pos, *sp;
4286 subr:
4287 sp = cp;
4288 while (!notinname (*cp))
4289 cp++;
4290 if (cp == sp)
4291 continue; /* nothing found */
4292 if ((pos = etags_strchr (sp, ':')) != NULL
4293 && pos < cp && pos[1] == ':')
4294 /* The name is already qualified. */
4295 make_tag (sp, cp - sp, TRUE,
4296 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4297 else
4298 /* Qualify it. */
4300 char savechar, *name;
4302 savechar = *cp;
4303 *cp = '\0';
4304 name = concat (package, "::", sp);
4305 *cp = savechar;
4306 make_tag (name, strlen (name), TRUE,
4307 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4308 free (name);
4311 else if (LOOKING_AT (cp, "use constant")
4312 || LOOKING_AT (cp, "use constant::defer"))
4314 /* For hash style multi-constant like
4315 use constant { FOO => 123,
4316 BAR => 456 };
4317 only the first FOO is picked up. Parsing across the value
4318 expressions would be difficult in general, due to possible nested
4319 hashes, here-documents, etc. */
4320 if (*cp == '{')
4321 cp = skip_spaces (cp+1);
4322 goto subr;
4324 else if (globals) /* only if we are tagging global vars */
4326 /* Skip a qualifier, if any. */
4327 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4328 /* After "my" or "local", but before any following paren or space. */
4329 char *varstart = cp;
4331 if (qual /* should this be removed? If yes, how? */
4332 && (*cp == '$' || *cp == '@' || *cp == '%'))
4334 varstart += 1;
4336 cp++;
4337 while (ISALNUM (*cp) || *cp == '_');
4339 else if (qual)
4341 /* Should be examining a variable list at this point;
4342 could insist on seeing an open parenthesis. */
4343 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4344 cp++;
4346 else
4347 continue;
4349 make_tag (varstart, cp - varstart, FALSE,
4350 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4353 free (package);
4358 * Python support
4359 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4360 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4361 * More ideas by seb bacon <seb@jamkit.com> (2002)
4363 static void
4364 Python_functions (FILE *inf)
4366 register char *cp;
4368 LOOP_ON_INPUT_LINES (inf, lb, cp)
4370 cp = skip_spaces (cp);
4371 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4373 char *name = cp;
4374 while (!notinname (*cp) && *cp != ':')
4375 cp++;
4376 make_tag (name, cp - name, TRUE,
4377 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4384 * PHP support
4385 * Look for:
4386 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4387 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4388 * - /^[ \t]*define\(\"[^\"]+/
4389 * Only with --members:
4390 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4391 * Idea by Diez B. Roggisch (2001)
4393 static void
4394 PHP_functions (FILE *inf)
4396 register char *cp, *name;
4397 bool search_identifier = FALSE;
4399 LOOP_ON_INPUT_LINES (inf, lb, cp)
4401 cp = skip_spaces (cp);
4402 name = cp;
4403 if (search_identifier
4404 && *cp != '\0')
4406 while (!notinname (*cp))
4407 cp++;
4408 make_tag (name, cp - name, TRUE,
4409 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4410 search_identifier = FALSE;
4412 else if (LOOKING_AT (cp, "function"))
4414 if (*cp == '&')
4415 cp = skip_spaces (cp+1);
4416 if (*cp != '\0')
4418 name = cp;
4419 while (!notinname (*cp))
4420 cp++;
4421 make_tag (name, cp - name, TRUE,
4422 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4424 else
4425 search_identifier = TRUE;
4427 else if (LOOKING_AT (cp, "class"))
4429 if (*cp != '\0')
4431 name = cp;
4432 while (*cp != '\0' && !iswhite (*cp))
4433 cp++;
4434 make_tag (name, cp - name, FALSE,
4435 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4437 else
4438 search_identifier = TRUE;
4440 else if (strneq (cp, "define", 6)
4441 && (cp = skip_spaces (cp+6))
4442 && *cp++ == '('
4443 && (*cp == '"' || *cp == '\''))
4445 char quote = *cp++;
4446 name = cp;
4447 while (*cp != quote && *cp != '\0')
4448 cp++;
4449 make_tag (name, cp - name, FALSE,
4450 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4452 else if (members
4453 && LOOKING_AT (cp, "var")
4454 && *cp == '$')
4456 name = cp;
4457 while (!notinname (*cp))
4458 cp++;
4459 make_tag (name, cp - name, FALSE,
4460 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4467 * Cobol tag functions
4468 * We could look for anything that could be a paragraph name.
4469 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4470 * Idea by Corny de Souza (1993)
4472 static void
4473 Cobol_paragraphs (FILE *inf)
4475 register char *bp, *ep;
4477 LOOP_ON_INPUT_LINES (inf, lb, bp)
4479 if (lb.len < 9)
4480 continue;
4481 bp += 8;
4483 /* If eoln, compiler option or comment ignore whole line. */
4484 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4485 continue;
4487 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4488 continue;
4489 if (*ep++ == '.')
4490 make_tag (bp, ep - bp, TRUE,
4491 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4497 * Makefile support
4498 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4500 static void
4501 Makefile_targets (FILE *inf)
4503 register char *bp;
4505 LOOP_ON_INPUT_LINES (inf, lb, bp)
4507 if (*bp == '\t' || *bp == '#')
4508 continue;
4509 while (*bp != '\0' && *bp != '=' && *bp != ':')
4510 bp++;
4511 if (*bp == ':' || (globals && *bp == '='))
4513 /* We should detect if there is more than one tag, but we do not.
4514 We just skip initial and final spaces. */
4515 char * namestart = skip_spaces (lb.buffer);
4516 while (--bp > namestart)
4517 if (!notinname (*bp))
4518 break;
4519 make_tag (namestart, bp - namestart + 1, TRUE,
4520 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4527 * Pascal parsing
4528 * Original code by Mosur K. Mohan (1989)
4530 * Locates tags for procedures & functions. Doesn't do any type- or
4531 * var-definitions. It does look for the keyword "extern" or
4532 * "forward" immediately following the procedure statement; if found,
4533 * the tag is skipped.
4535 static void
4536 Pascal_functions (FILE *inf)
4538 linebuffer tline; /* mostly copied from C_entries */
4539 long save_lcno;
4540 int save_lineno, namelen, taglen;
4541 char c, *name;
4543 bool /* each of these flags is TRUE if: */
4544 incomment, /* point is inside a comment */
4545 inquote, /* point is inside '..' string */
4546 get_tagname, /* point is after PROCEDURE/FUNCTION
4547 keyword, so next item = potential tag */
4548 found_tag, /* point is after a potential tag */
4549 inparms, /* point is within parameter-list */
4550 verify_tag; /* point has passed the parm-list, so the
4551 next token will determine whether this
4552 is a FORWARD/EXTERN to be ignored, or
4553 whether it is a real tag */
4555 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4556 name = NULL; /* keep compiler quiet */
4557 dbp = lb.buffer;
4558 *dbp = '\0';
4559 linebuffer_init (&tline);
4561 incomment = inquote = FALSE;
4562 found_tag = FALSE; /* have a proc name; check if extern */
4563 get_tagname = FALSE; /* found "procedure" keyword */
4564 inparms = FALSE; /* found '(' after "proc" */
4565 verify_tag = FALSE; /* check if "extern" is ahead */
4568 while (!feof (inf)) /* long main loop to get next char */
4570 c = *dbp++;
4571 if (c == '\0') /* if end of line */
4573 readline (&lb, inf);
4574 dbp = lb.buffer;
4575 if (*dbp == '\0')
4576 continue;
4577 if (!((found_tag && verify_tag)
4578 || get_tagname))
4579 c = *dbp++; /* only if don't need *dbp pointing
4580 to the beginning of the name of
4581 the procedure or function */
4583 if (incomment)
4585 if (c == '}') /* within { } comments */
4586 incomment = FALSE;
4587 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4589 dbp++;
4590 incomment = FALSE;
4592 continue;
4594 else if (inquote)
4596 if (c == '\'')
4597 inquote = FALSE;
4598 continue;
4600 else
4601 switch (c)
4603 case '\'':
4604 inquote = TRUE; /* found first quote */
4605 continue;
4606 case '{': /* found open { comment */
4607 incomment = TRUE;
4608 continue;
4609 case '(':
4610 if (*dbp == '*') /* found open (* comment */
4612 incomment = TRUE;
4613 dbp++;
4615 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4616 inparms = TRUE;
4617 continue;
4618 case ')': /* end of parms list */
4619 if (inparms)
4620 inparms = FALSE;
4621 continue;
4622 case ';':
4623 if (found_tag && !inparms) /* end of proc or fn stmt */
4625 verify_tag = TRUE;
4626 break;
4628 continue;
4630 if (found_tag && verify_tag && (*dbp != ' '))
4632 /* Check if this is an "extern" declaration. */
4633 if (*dbp == '\0')
4634 continue;
4635 if (lowcase (*dbp) == 'e')
4637 if (nocase_tail ("extern")) /* superfluous, really! */
4639 found_tag = FALSE;
4640 verify_tag = FALSE;
4643 else if (lowcase (*dbp) == 'f')
4645 if (nocase_tail ("forward")) /* check for forward reference */
4647 found_tag = FALSE;
4648 verify_tag = FALSE;
4651 if (found_tag && verify_tag) /* not external proc, so make tag */
4653 found_tag = FALSE;
4654 verify_tag = FALSE;
4655 make_tag (name, namelen, TRUE,
4656 tline.buffer, taglen, save_lineno, save_lcno);
4657 continue;
4660 if (get_tagname) /* grab name of proc or fn */
4662 char *cp;
4664 if (*dbp == '\0')
4665 continue;
4667 /* Find block name. */
4668 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4669 continue;
4671 /* Save all values for later tagging. */
4672 linebuffer_setlen (&tline, lb.len);
4673 strcpy (tline.buffer, lb.buffer);
4674 save_lineno = lineno;
4675 save_lcno = linecharno;
4676 name = tline.buffer + (dbp - lb.buffer);
4677 namelen = cp - dbp;
4678 taglen = cp - lb.buffer + 1;
4680 dbp = cp; /* set dbp to e-o-token */
4681 get_tagname = FALSE;
4682 found_tag = TRUE;
4683 continue;
4685 /* And proceed to check for "extern". */
4687 else if (!incomment && !inquote && !found_tag)
4689 /* Check for proc/fn keywords. */
4690 switch (lowcase (c))
4692 case 'p':
4693 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4694 get_tagname = TRUE;
4695 continue;
4696 case 'f':
4697 if (nocase_tail ("unction"))
4698 get_tagname = TRUE;
4699 continue;
4702 } /* while not eof */
4704 free (tline.buffer);
4709 * Lisp tag functions
4710 * look for (def or (DEF, quote or QUOTE
4713 static void L_getit (void);
4715 static void
4716 L_getit (void)
4718 if (*dbp == '\'') /* Skip prefix quote */
4719 dbp++;
4720 else if (*dbp == '(')
4722 dbp++;
4723 /* Try to skip "(quote " */
4724 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4725 /* Ok, then skip "(" before name in (defstruct (foo)) */
4726 dbp = skip_spaces (dbp);
4728 get_tag (dbp, NULL);
4731 static void
4732 Lisp_functions (FILE *inf)
4734 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4736 if (dbp[0] != '(')
4737 continue;
4739 /* "(defvar foo)" is a declaration rather than a definition. */
4740 if (! declarations)
4742 char *p = dbp + 1;
4743 if (LOOKING_AT (p, "defvar"))
4745 p = skip_name (p); /* past var name */
4746 p = skip_spaces (p);
4747 if (*p == ')')
4748 continue;
4752 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4754 dbp = skip_non_spaces (dbp);
4755 dbp = skip_spaces (dbp);
4756 L_getit ();
4758 else
4760 /* Check for (foo::defmumble name-defined ... */
4762 dbp++;
4763 while (!notinname (*dbp) && *dbp != ':');
4764 if (*dbp == ':')
4767 dbp++;
4768 while (*dbp == ':');
4770 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4772 dbp = skip_non_spaces (dbp);
4773 dbp = skip_spaces (dbp);
4774 L_getit ();
4783 * Lua script language parsing
4784 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4786 * "function" and "local function" are tags if they start at column 1.
4788 static void
4789 Lua_functions (FILE *inf)
4791 register char *bp;
4793 LOOP_ON_INPUT_LINES (inf, lb, bp)
4795 if (bp[0] != 'f' && bp[0] != 'l')
4796 continue;
4798 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4800 if (LOOKING_AT (bp, "function"))
4801 get_tag (bp, NULL);
4807 * PostScript tags
4808 * Just look for lines where the first character is '/'
4809 * Also look at "defineps" for PSWrap
4810 * Ideas by:
4811 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4812 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4814 static void
4815 PS_functions (FILE *inf)
4817 register char *bp, *ep;
4819 LOOP_ON_INPUT_LINES (inf, lb, bp)
4821 if (bp[0] == '/')
4823 for (ep = bp+1;
4824 *ep != '\0' && *ep != ' ' && *ep != '{';
4825 ep++)
4826 continue;
4827 make_tag (bp, ep - bp, TRUE,
4828 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4830 else if (LOOKING_AT (bp, "defineps"))
4831 get_tag (bp, NULL);
4837 * Forth tags
4838 * Ignore anything after \ followed by space or in ( )
4839 * Look for words defined by :
4840 * Look for constant, code, create, defer, value, and variable
4841 * OBP extensions: Look for buffer:, field,
4842 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4844 static void
4845 Forth_words (FILE *inf)
4847 register char *bp;
4849 LOOP_ON_INPUT_LINES (inf, lb, bp)
4850 while ((bp = skip_spaces (bp))[0] != '\0')
4851 if (bp[0] == '\\' && iswhite (bp[1]))
4852 break; /* read next line */
4853 else if (bp[0] == '(' && iswhite (bp[1]))
4854 do /* skip to ) or eol */
4855 bp++;
4856 while (*bp != ')' && *bp != '\0');
4857 else if ((bp[0] == ':' && iswhite (bp[1]) && bp++)
4858 || LOOKING_AT_NOCASE (bp, "constant")
4859 || LOOKING_AT_NOCASE (bp, "code")
4860 || LOOKING_AT_NOCASE (bp, "create")
4861 || LOOKING_AT_NOCASE (bp, "defer")
4862 || LOOKING_AT_NOCASE (bp, "value")
4863 || LOOKING_AT_NOCASE (bp, "variable")
4864 || LOOKING_AT_NOCASE (bp, "buffer:")
4865 || LOOKING_AT_NOCASE (bp, "field"))
4866 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4867 else
4868 bp = skip_non_spaces (bp);
4873 * Scheme tag functions
4874 * look for (def... xyzzy
4875 * (def... (xyzzy
4876 * (def ... ((...(xyzzy ....
4877 * (set! xyzzy
4878 * Original code by Ken Haase (1985?)
4880 static void
4881 Scheme_functions (FILE *inf)
4883 register char *bp;
4885 LOOP_ON_INPUT_LINES (inf, lb, bp)
4887 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4889 bp = skip_non_spaces (bp+4);
4890 /* Skip over open parens and white space. Don't continue past
4891 '\0'. */
4892 while (*bp && notinname (*bp))
4893 bp++;
4894 get_tag (bp, NULL);
4896 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4897 get_tag (bp, NULL);
4902 /* Find tags in TeX and LaTeX input files. */
4904 /* TEX_toktab is a table of TeX control sequences that define tags.
4905 * Each entry records one such control sequence.
4907 * Original code from who knows whom.
4908 * Ideas by:
4909 * Stefan Monnier (2002)
4912 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4914 /* Default set of control sequences to put into TEX_toktab.
4915 The value of environment var TEXTAGS is prepended to this. */
4916 static const char *TEX_defenv = "\
4917 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4918 :part:appendix:entry:index:def\
4919 :newcommand:renewcommand:newenvironment:renewenvironment";
4921 static void TEX_mode (FILE *);
4922 static void TEX_decode_env (const char *, const char *);
4924 static char TEX_esc = '\\';
4925 static char TEX_opgrp = '{';
4926 static char TEX_clgrp = '}';
4929 * TeX/LaTeX scanning loop.
4931 static void
4932 TeX_commands (FILE *inf)
4934 char *cp;
4935 linebuffer *key;
4937 /* Select either \ or ! as escape character. */
4938 TEX_mode (inf);
4940 /* Initialize token table once from environment. */
4941 if (TEX_toktab == NULL)
4942 TEX_decode_env ("TEXTAGS", TEX_defenv);
4944 LOOP_ON_INPUT_LINES (inf, lb, cp)
4946 /* Look at each TEX keyword in line. */
4947 for (;;)
4949 /* Look for a TEX escape. */
4950 while (*cp++ != TEX_esc)
4951 if (cp[-1] == '\0' || cp[-1] == '%')
4952 goto tex_next_line;
4954 for (key = TEX_toktab; key->buffer != NULL; key++)
4955 if (strneq (cp, key->buffer, key->len))
4957 register char *p;
4958 int namelen, linelen;
4959 bool opgrp = FALSE;
4961 cp = skip_spaces (cp + key->len);
4962 if (*cp == TEX_opgrp)
4964 opgrp = TRUE;
4965 cp++;
4967 for (p = cp;
4968 (!iswhite (*p) && *p != '#' &&
4969 *p != TEX_opgrp && *p != TEX_clgrp);
4970 p++)
4971 continue;
4972 namelen = p - cp;
4973 linelen = lb.len;
4974 if (!opgrp || *p == TEX_clgrp)
4976 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4977 p++;
4978 linelen = p - lb.buffer + 1;
4980 make_tag (cp, namelen, TRUE,
4981 lb.buffer, linelen, lineno, linecharno);
4982 goto tex_next_line; /* We only tag a line once */
4985 tex_next_line:
4990 #define TEX_LESC '\\'
4991 #define TEX_SESC '!'
4993 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4994 chars accordingly. */
4995 static void
4996 TEX_mode (FILE *inf)
4998 int c;
5000 while ((c = getc (inf)) != EOF)
5002 /* Skip to next line if we hit the TeX comment char. */
5003 if (c == '%')
5004 while (c != '\n' && c != EOF)
5005 c = getc (inf);
5006 else if (c == TEX_LESC || c == TEX_SESC )
5007 break;
5010 if (c == TEX_LESC)
5012 TEX_esc = TEX_LESC;
5013 TEX_opgrp = '{';
5014 TEX_clgrp = '}';
5016 else
5018 TEX_esc = TEX_SESC;
5019 TEX_opgrp = '<';
5020 TEX_clgrp = '>';
5022 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5023 No attempt is made to correct the situation. */
5024 rewind (inf);
5027 /* Read environment and prepend it to the default string.
5028 Build token table. */
5029 static void
5030 TEX_decode_env (const char *evarname, const char *defenv)
5032 register const char *env, *p;
5033 int i, len;
5035 /* Append default string to environment. */
5036 env = getenv (evarname);
5037 if (!env)
5038 env = defenv;
5039 else
5040 env = concat (env, defenv, "");
5042 /* Allocate a token table */
5043 for (len = 1, p = env; p;)
5044 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5045 len++;
5046 TEX_toktab = xnew (len, linebuffer);
5048 /* Unpack environment string into token table. Be careful about */
5049 /* zero-length strings (leading ':', "::" and trailing ':') */
5050 for (i = 0; *env != '\0';)
5052 p = etags_strchr (env, ':');
5053 if (!p) /* End of environment string. */
5054 p = env + strlen (env);
5055 if (p - env > 0)
5056 { /* Only non-zero strings. */
5057 TEX_toktab[i].buffer = savenstr (env, p - env);
5058 TEX_toktab[i].len = p - env;
5059 i++;
5061 if (*p)
5062 env = p + 1;
5063 else
5065 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5066 TEX_toktab[i].len = 0;
5067 break;
5073 /* Texinfo support. Dave Love, Mar. 2000. */
5074 static void
5075 Texinfo_nodes (FILE *inf)
5077 char *cp, *start;
5078 LOOP_ON_INPUT_LINES (inf, lb, cp)
5079 if (LOOKING_AT (cp, "@node"))
5081 start = cp;
5082 while (*cp != '\0' && *cp != ',')
5083 cp++;
5084 make_tag (start, cp - start, TRUE,
5085 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5091 * HTML support.
5092 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5093 * Contents of <a name=xxx> are tags with name xxx.
5095 * Francesco Potortì, 2002.
5097 static void
5098 HTML_labels (FILE *inf)
5100 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5101 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5102 bool intag = FALSE; /* inside an html tag, looking for ID= */
5103 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5104 char *end;
5107 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5109 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5110 for (;;) /* loop on the same line */
5112 if (skiptag) /* skip HTML tag */
5114 while (*dbp != '\0' && *dbp != '>')
5115 dbp++;
5116 if (*dbp == '>')
5118 dbp += 1;
5119 skiptag = FALSE;
5120 continue; /* look on the same line */
5122 break; /* go to next line */
5125 else if (intag) /* look for "name=" or "id=" */
5127 while (*dbp != '\0' && *dbp != '>'
5128 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5129 dbp++;
5130 if (*dbp == '\0')
5131 break; /* go to next line */
5132 if (*dbp == '>')
5134 dbp += 1;
5135 intag = FALSE;
5136 continue; /* look on the same line */
5138 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5139 || LOOKING_AT_NOCASE (dbp, "id="))
5141 bool quoted = (dbp[0] == '"');
5143 if (quoted)
5144 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5145 continue;
5146 else
5147 for (end = dbp; *end != '\0' && intoken (*end); end++)
5148 continue;
5149 linebuffer_setlen (&token_name, end - dbp);
5150 memcpy (token_name.buffer, dbp, end - dbp);
5151 token_name.buffer[end - dbp] = '\0';
5153 dbp = end;
5154 intag = FALSE; /* we found what we looked for */
5155 skiptag = TRUE; /* skip to the end of the tag */
5156 getnext = TRUE; /* then grab the text */
5157 continue; /* look on the same line */
5159 dbp += 1;
5162 else if (getnext) /* grab next tokens and tag them */
5164 dbp = skip_spaces (dbp);
5165 if (*dbp == '\0')
5166 break; /* go to next line */
5167 if (*dbp == '<')
5169 intag = TRUE;
5170 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5171 continue; /* look on the same line */
5174 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5175 continue;
5176 make_tag (token_name.buffer, token_name.len, TRUE,
5177 dbp, end - dbp, lineno, linecharno);
5178 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5179 getnext = FALSE;
5180 break; /* go to next line */
5183 else /* look for an interesting HTML tag */
5185 while (*dbp != '\0' && *dbp != '<')
5186 dbp++;
5187 if (*dbp == '\0')
5188 break; /* go to next line */
5189 intag = TRUE;
5190 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5192 inanchor = TRUE;
5193 continue; /* look on the same line */
5195 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5196 || LOOKING_AT_NOCASE (dbp, "<h1>")
5197 || LOOKING_AT_NOCASE (dbp, "<h2>")
5198 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5200 intag = FALSE;
5201 getnext = TRUE;
5202 continue; /* look on the same line */
5204 dbp += 1;
5211 * Prolog support
5213 * Assumes that the predicate or rule starts at column 0.
5214 * Only the first clause of a predicate or rule is added.
5215 * Original code by Sunichirou Sugou (1989)
5216 * Rewritten by Anders Lindgren (1996)
5218 static size_t prolog_pr (char *, char *);
5219 static void prolog_skip_comment (linebuffer *, FILE *);
5220 static size_t prolog_atom (char *, size_t);
5222 static void
5223 Prolog_functions (FILE *inf)
5225 char *cp, *last;
5226 size_t len;
5227 size_t allocated;
5229 allocated = 0;
5230 len = 0;
5231 last = NULL;
5233 LOOP_ON_INPUT_LINES (inf, lb, cp)
5235 if (cp[0] == '\0') /* Empty line */
5236 continue;
5237 else if (iswhite (cp[0])) /* Not a predicate */
5238 continue;
5239 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5240 prolog_skip_comment (&lb, inf);
5241 else if ((len = prolog_pr (cp, last)) > 0)
5243 /* Predicate or rule. Store the function name so that we
5244 only generate a tag for the first clause. */
5245 if (last == NULL)
5246 last = xnew (len + 1, char);
5247 else if (len + 1 > allocated)
5248 xrnew (last, len + 1, char);
5249 allocated = len + 1;
5250 memcpy (last, cp, len);
5251 last[len] = '\0';
5254 free (last);
5258 static void
5259 prolog_skip_comment (linebuffer *plb, FILE *inf)
5261 char *cp;
5265 for (cp = plb->buffer; *cp != '\0'; cp++)
5266 if (cp[0] == '*' && cp[1] == '/')
5267 return;
5268 readline (plb, inf);
5270 while (!feof (inf));
5274 * A predicate or rule definition is added if it matches:
5275 * <beginning of line><Prolog Atom><whitespace>(
5276 * or <beginning of line><Prolog Atom><whitespace>:-
5278 * It is added to the tags database if it doesn't match the
5279 * name of the previous clause header.
5281 * Return the size of the name of the predicate or rule, or 0 if no
5282 * header was found.
5284 static size_t
5285 prolog_pr (char *s, char *last)
5287 /* Name of last clause. */
5289 size_t pos;
5290 size_t len;
5292 pos = prolog_atom (s, 0);
5293 if (! pos)
5294 return 0;
5296 len = pos;
5297 pos = skip_spaces (s + pos) - s;
5299 if ((s[pos] == '.'
5300 || (s[pos] == '(' && (pos += 1))
5301 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5302 && (last == NULL /* save only the first clause */
5303 || len != strlen (last)
5304 || !strneq (s, last, len)))
5306 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5307 return len;
5309 else
5310 return 0;
5314 * Consume a Prolog atom.
5315 * Return the number of bytes consumed, or 0 if there was an error.
5317 * A prolog atom, in this context, could be one of:
5318 * - An alphanumeric sequence, starting with a lower case letter.
5319 * - A quoted arbitrary string. Single quotes can escape themselves.
5320 * Backslash quotes everything.
5322 static size_t
5323 prolog_atom (char *s, size_t pos)
5325 size_t origpos;
5327 origpos = pos;
5329 if (ISLOWER (s[pos]) || (s[pos] == '_'))
5331 /* The atom is unquoted. */
5332 pos++;
5333 while (ISALNUM (s[pos]) || (s[pos] == '_'))
5335 pos++;
5337 return pos - origpos;
5339 else if (s[pos] == '\'')
5341 pos++;
5343 for (;;)
5345 if (s[pos] == '\'')
5347 pos++;
5348 if (s[pos] != '\'')
5349 break;
5350 pos++; /* A double quote */
5352 else if (s[pos] == '\0')
5353 /* Multiline quoted atoms are ignored. */
5354 return 0;
5355 else if (s[pos] == '\\')
5357 if (s[pos+1] == '\0')
5358 return 0;
5359 pos += 2;
5361 else
5362 pos++;
5364 return pos - origpos;
5366 else
5367 return 0;
5372 * Support for Erlang
5374 * Generates tags for functions, defines, and records.
5375 * Assumes that Erlang functions start at column 0.
5376 * Original code by Anders Lindgren (1996)
5378 static int erlang_func (char *, char *);
5379 static void erlang_attribute (char *);
5380 static int erlang_atom (char *);
5382 static void
5383 Erlang_functions (FILE *inf)
5385 char *cp, *last;
5386 int len;
5387 int allocated;
5389 allocated = 0;
5390 len = 0;
5391 last = NULL;
5393 LOOP_ON_INPUT_LINES (inf, lb, cp)
5395 if (cp[0] == '\0') /* Empty line */
5396 continue;
5397 else if (iswhite (cp[0])) /* Not function nor attribute */
5398 continue;
5399 else if (cp[0] == '%') /* comment */
5400 continue;
5401 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5402 continue;
5403 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5405 erlang_attribute (cp);
5406 if (last != NULL)
5408 free (last);
5409 last = NULL;
5412 else if ((len = erlang_func (cp, last)) > 0)
5415 * Function. Store the function name so that we only
5416 * generates a tag for the first clause.
5418 if (last == NULL)
5419 last = xnew (len + 1, char);
5420 else if (len + 1 > allocated)
5421 xrnew (last, len + 1, char);
5422 allocated = len + 1;
5423 memcpy (last, cp, len);
5424 last[len] = '\0';
5427 free (last);
5432 * A function definition is added if it matches:
5433 * <beginning of line><Erlang Atom><whitespace>(
5435 * It is added to the tags database if it doesn't match the
5436 * name of the previous clause header.
5438 * Return the size of the name of the function, or 0 if no function
5439 * was found.
5441 static int
5442 erlang_func (char *s, char *last)
5444 /* Name of last clause. */
5446 int pos;
5447 int len;
5449 pos = erlang_atom (s);
5450 if (pos < 1)
5451 return 0;
5453 len = pos;
5454 pos = skip_spaces (s + pos) - s;
5456 /* Save only the first clause. */
5457 if (s[pos++] == '('
5458 && (last == NULL
5459 || len != (int)strlen (last)
5460 || !strneq (s, last, len)))
5462 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5463 return len;
5466 return 0;
5471 * Handle attributes. Currently, tags are generated for defines
5472 * and records.
5474 * They are on the form:
5475 * -define(foo, bar).
5476 * -define(Foo(M, N), M+N).
5477 * -record(graph, {vtab = notable, cyclic = true}).
5479 static void
5480 erlang_attribute (char *s)
5482 char *cp = s;
5484 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5485 && *cp++ == '(')
5487 int len = erlang_atom (skip_spaces (cp));
5488 if (len > 0)
5489 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5491 return;
5496 * Consume an Erlang atom (or variable).
5497 * Return the number of bytes consumed, or -1 if there was an error.
5499 static int
5500 erlang_atom (char *s)
5502 int pos = 0;
5504 if (ISALPHA (s[pos]) || s[pos] == '_')
5506 /* The atom is unquoted. */
5508 pos++;
5509 while (ISALNUM (s[pos]) || s[pos] == '_');
5511 else if (s[pos] == '\'')
5513 for (pos++; s[pos] != '\''; pos++)
5514 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5515 || (s[pos] == '\\' && s[++pos] == '\0'))
5516 return 0;
5517 pos++;
5520 return pos;
5524 static char *scan_separators (char *);
5525 static void add_regex (char *, language *);
5526 static char *substitute (char *, char *, struct re_registers *);
5529 * Take a string like "/blah/" and turn it into "blah", verifying
5530 * that the first and last characters are the same, and handling
5531 * quoted separator characters. Actually, stops on the occurrence of
5532 * an unquoted separator. Also process \t, \n, etc. and turn into
5533 * appropriate characters. Works in place. Null terminates name string.
5534 * Returns pointer to terminating separator, or NULL for
5535 * unterminated regexps.
5537 static char *
5538 scan_separators (char *name)
5540 char sep = name[0];
5541 char *copyto = name;
5542 bool quoted = FALSE;
5544 for (++name; *name != '\0'; ++name)
5546 if (quoted)
5548 switch (*name)
5550 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5551 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5552 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5553 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5554 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5555 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5556 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5557 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5558 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5559 default:
5560 if (*name == sep)
5561 *copyto++ = sep;
5562 else
5564 /* Something else is quoted, so preserve the quote. */
5565 *copyto++ = '\\';
5566 *copyto++ = *name;
5568 break;
5570 quoted = FALSE;
5572 else if (*name == '\\')
5573 quoted = TRUE;
5574 else if (*name == sep)
5575 break;
5576 else
5577 *copyto++ = *name;
5579 if (*name != sep)
5580 name = NULL; /* signal unterminated regexp */
5582 /* Terminate copied string. */
5583 *copyto = '\0';
5584 return name;
5587 /* Look at the argument of --regex or --no-regex and do the right
5588 thing. Same for each line of a regexp file. */
5589 static void
5590 analyse_regex (char *regex_arg)
5592 if (regex_arg == NULL)
5594 free_regexps (); /* --no-regex: remove existing regexps */
5595 return;
5598 /* A real --regexp option or a line in a regexp file. */
5599 switch (regex_arg[0])
5601 /* Comments in regexp file or null arg to --regex. */
5602 case '\0':
5603 case ' ':
5604 case '\t':
5605 break;
5607 /* Read a regex file. This is recursive and may result in a
5608 loop, which will stop when the file descriptors are exhausted. */
5609 case '@':
5611 FILE *regexfp;
5612 linebuffer regexbuf;
5613 char *regexfile = regex_arg + 1;
5615 /* regexfile is a file containing regexps, one per line. */
5616 regexfp = fopen (regexfile, "r");
5617 if (regexfp == NULL)
5618 pfatal (regexfile);
5619 linebuffer_init (&regexbuf);
5620 while (readline_internal (&regexbuf, regexfp) > 0)
5621 analyse_regex (regexbuf.buffer);
5622 free (regexbuf.buffer);
5623 fclose (regexfp);
5625 break;
5627 /* Regexp to be used for a specific language only. */
5628 case '{':
5630 language *lang;
5631 char *lang_name = regex_arg + 1;
5632 char *cp;
5634 for (cp = lang_name; *cp != '}'; cp++)
5635 if (*cp == '\0')
5637 error ("unterminated language name in regex: %s", regex_arg);
5638 return;
5640 *cp++ = '\0';
5641 lang = get_language_from_langname (lang_name);
5642 if (lang == NULL)
5643 return;
5644 add_regex (cp, lang);
5646 break;
5648 /* Regexp to be used for any language. */
5649 default:
5650 add_regex (regex_arg, NULL);
5651 break;
5655 /* Separate the regexp pattern, compile it,
5656 and care for optional name and modifiers. */
5657 static void
5658 add_regex (char *regexp_pattern, language *lang)
5660 static struct re_pattern_buffer zeropattern;
5661 char sep, *pat, *name, *modifiers;
5662 char empty = '\0';
5663 const char *err;
5664 struct re_pattern_buffer *patbuf;
5665 regexp *rp;
5666 bool
5667 force_explicit_name = TRUE, /* do not use implicit tag names */
5668 ignore_case = FALSE, /* case is significant */
5669 multi_line = FALSE, /* matches are done one line at a time */
5670 single_line = FALSE; /* dot does not match newline */
5673 if (strlen (regexp_pattern) < 3)
5675 error ("null regexp");
5676 return;
5678 sep = regexp_pattern[0];
5679 name = scan_separators (regexp_pattern);
5680 if (name == NULL)
5682 error ("%s: unterminated regexp", regexp_pattern);
5683 return;
5685 if (name[1] == sep)
5687 error ("null name for regexp \"%s\"", regexp_pattern);
5688 return;
5690 modifiers = scan_separators (name);
5691 if (modifiers == NULL) /* no terminating separator --> no name */
5693 modifiers = name;
5694 name = &empty;
5696 else
5697 modifiers += 1; /* skip separator */
5699 /* Parse regex modifiers. */
5700 for (; modifiers[0] != '\0'; modifiers++)
5701 switch (modifiers[0])
5703 case 'N':
5704 if (modifiers == name)
5705 error ("forcing explicit tag name but no name, ignoring");
5706 force_explicit_name = TRUE;
5707 break;
5708 case 'i':
5709 ignore_case = TRUE;
5710 break;
5711 case 's':
5712 single_line = TRUE;
5713 /* FALLTHRU */
5714 case 'm':
5715 multi_line = TRUE;
5716 need_filebuf = TRUE;
5717 break;
5718 default:
5719 error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5720 break;
5723 patbuf = xnew (1, struct re_pattern_buffer);
5724 *patbuf = zeropattern;
5725 if (ignore_case)
5727 static char lc_trans[CHARS];
5728 int i;
5729 for (i = 0; i < CHARS; i++)
5730 lc_trans[i] = lowcase (i);
5731 patbuf->translate = lc_trans; /* translation table to fold case */
5734 if (multi_line)
5735 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5736 else
5737 pat = regexp_pattern;
5739 if (single_line)
5740 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5741 else
5742 re_set_syntax (RE_SYNTAX_EMACS);
5744 err = re_compile_pattern (pat, strlen (pat), patbuf);
5745 if (multi_line)
5746 free (pat);
5747 if (err != NULL)
5749 error ("%s while compiling pattern", err);
5750 return;
5753 rp = p_head;
5754 p_head = xnew (1, regexp);
5755 p_head->pattern = savestr (regexp_pattern);
5756 p_head->p_next = rp;
5757 p_head->lang = lang;
5758 p_head->pat = patbuf;
5759 p_head->name = savestr (name);
5760 p_head->error_signaled = FALSE;
5761 p_head->force_explicit_name = force_explicit_name;
5762 p_head->ignore_case = ignore_case;
5763 p_head->multi_line = multi_line;
5767 * Do the substitutions indicated by the regular expression and
5768 * arguments.
5770 static char *
5771 substitute (char *in, char *out, struct re_registers *regs)
5773 char *result, *t;
5774 int size, dig, diglen;
5776 result = NULL;
5777 size = strlen (out);
5779 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5780 if (out[size - 1] == '\\')
5781 fatal ("pattern error in \"%s\"", out);
5782 for (t = etags_strchr (out, '\\');
5783 t != NULL;
5784 t = etags_strchr (t + 2, '\\'))
5785 if (ISDIGIT (t[1]))
5787 dig = t[1] - '0';
5788 diglen = regs->end[dig] - regs->start[dig];
5789 size += diglen - 2;
5791 else
5792 size -= 1;
5794 /* Allocate space and do the substitutions. */
5795 assert (size >= 0);
5796 result = xnew (size + 1, char);
5798 for (t = result; *out != '\0'; out++)
5799 if (*out == '\\' && ISDIGIT (*++out))
5801 dig = *out - '0';
5802 diglen = regs->end[dig] - regs->start[dig];
5803 memcpy (t, in + regs->start[dig], diglen);
5804 t += diglen;
5806 else
5807 *t++ = *out;
5808 *t = '\0';
5810 assert (t <= result + size);
5811 assert (t - result == (int)strlen (result));
5813 return result;
5816 /* Deallocate all regexps. */
5817 static void
5818 free_regexps (void)
5820 regexp *rp;
5821 while (p_head != NULL)
5823 rp = p_head->p_next;
5824 free (p_head->pattern);
5825 free (p_head->name);
5826 free (p_head);
5827 p_head = rp;
5829 return;
5833 * Reads the whole file as a single string from `filebuf' and looks for
5834 * multi-line regular expressions, creating tags on matches.
5835 * readline already dealt with normal regexps.
5837 * Idea by Ben Wing <ben@666.com> (2002).
5839 static void
5840 regex_tag_multiline (void)
5842 char *buffer = filebuf.buffer;
5843 regexp *rp;
5844 char *name;
5846 for (rp = p_head; rp != NULL; rp = rp->p_next)
5848 int match = 0;
5850 if (!rp->multi_line)
5851 continue; /* skip normal regexps */
5853 /* Generic initializations before parsing file from memory. */
5854 lineno = 1; /* reset global line number */
5855 charno = 0; /* reset global char number */
5856 linecharno = 0; /* reset global char number of line start */
5858 /* Only use generic regexps or those for the current language. */
5859 if (rp->lang != NULL && rp->lang != curfdp->lang)
5860 continue;
5862 while (match >= 0 && match < filebuf.len)
5864 match = re_search (rp->pat, buffer, filebuf.len, charno,
5865 filebuf.len - match, &rp->regs);
5866 switch (match)
5868 case -2:
5869 /* Some error. */
5870 if (!rp->error_signaled)
5872 error ("regexp stack overflow while matching \"%s\"",
5873 rp->pattern);
5874 rp->error_signaled = TRUE;
5876 break;
5877 case -1:
5878 /* No match. */
5879 break;
5880 default:
5881 if (match == rp->regs.end[0])
5883 if (!rp->error_signaled)
5885 error ("regexp matches the empty string: \"%s\"",
5886 rp->pattern);
5887 rp->error_signaled = TRUE;
5889 match = -3; /* exit from while loop */
5890 break;
5893 /* Match occurred. Construct a tag. */
5894 while (charno < rp->regs.end[0])
5895 if (buffer[charno++] == '\n')
5896 lineno++, linecharno = charno;
5897 name = rp->name;
5898 if (name[0] == '\0')
5899 name = NULL;
5900 else /* make a named tag */
5901 name = substitute (buffer, rp->name, &rp->regs);
5902 if (rp->force_explicit_name)
5903 /* Force explicit tag name, if a name is there. */
5904 pfnote (name, TRUE, buffer + linecharno,
5905 charno - linecharno + 1, lineno, linecharno);
5906 else
5907 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5908 charno - linecharno + 1, lineno, linecharno);
5909 break;
5916 static bool
5917 nocase_tail (const char *cp)
5919 register int len = 0;
5921 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5922 cp++, len++;
5923 if (*cp == '\0' && !intoken (dbp[len]))
5925 dbp += len;
5926 return TRUE;
5928 return FALSE;
5931 static void
5932 get_tag (register char *bp, char **namepp)
5934 register char *cp = bp;
5936 if (*bp != '\0')
5938 /* Go till you get to white space or a syntactic break */
5939 for (cp = bp + 1; !notinname (*cp); cp++)
5940 continue;
5941 make_tag (bp, cp - bp, TRUE,
5942 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5945 if (namepp != NULL)
5946 *namepp = savenstr (bp, cp - bp);
5950 * Read a line of text from `stream' into `lbp', excluding the
5951 * newline or CR-NL, if any. Return the number of characters read from
5952 * `stream', which is the length of the line including the newline.
5954 * On DOS or Windows we do not count the CR character, if any before the
5955 * NL, in the returned length; this mirrors the behavior of Emacs on those
5956 * platforms (for text files, it translates CR-NL to NL as it reads in the
5957 * file).
5959 * If multi-line regular expressions are requested, each line read is
5960 * appended to `filebuf'.
5962 static long
5963 readline_internal (linebuffer *lbp, register FILE *stream)
5965 char *buffer = lbp->buffer;
5966 register char *p = lbp->buffer;
5967 register char *pend;
5968 int chars_deleted;
5970 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5972 for (;;)
5974 register int c = getc (stream);
5975 if (p == pend)
5977 /* We're at the end of linebuffer: expand it. */
5978 lbp->size *= 2;
5979 xrnew (buffer, lbp->size, char);
5980 p += buffer - lbp->buffer;
5981 pend = buffer + lbp->size;
5982 lbp->buffer = buffer;
5984 if (c == EOF)
5986 *p = '\0';
5987 chars_deleted = 0;
5988 break;
5990 if (c == '\n')
5992 if (p > buffer && p[-1] == '\r')
5994 p -= 1;
5995 #ifdef DOS_NT
5996 /* Assume CRLF->LF translation will be performed by Emacs
5997 when loading this file, so CRs won't appear in the buffer.
5998 It would be cleaner to compensate within Emacs;
5999 however, Emacs does not know how many CRs were deleted
6000 before any given point in the file. */
6001 chars_deleted = 1;
6002 #else
6003 chars_deleted = 2;
6004 #endif
6006 else
6008 chars_deleted = 1;
6010 *p = '\0';
6011 break;
6013 *p++ = c;
6015 lbp->len = p - buffer;
6017 if (need_filebuf /* we need filebuf for multi-line regexps */
6018 && chars_deleted > 0) /* not at EOF */
6020 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6022 /* Expand filebuf. */
6023 filebuf.size *= 2;
6024 xrnew (filebuf.buffer, filebuf.size, char);
6026 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6027 filebuf.len += lbp->len;
6028 filebuf.buffer[filebuf.len++] = '\n';
6029 filebuf.buffer[filebuf.len] = '\0';
6032 return lbp->len + chars_deleted;
6036 * Like readline_internal, above, but in addition try to match the
6037 * input line against relevant regular expressions and manage #line
6038 * directives.
6040 static void
6041 readline (linebuffer *lbp, FILE *stream)
6043 long result;
6045 linecharno = charno; /* update global char number of line start */
6046 result = readline_internal (lbp, stream); /* read line */
6047 lineno += 1; /* increment global line number */
6048 charno += result; /* increment global char number */
6050 /* Honor #line directives. */
6051 if (!no_line_directive)
6053 static bool discard_until_line_directive;
6055 /* Check whether this is a #line directive. */
6056 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6058 unsigned int lno;
6059 int start = 0;
6061 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6062 && start > 0) /* double quote character found */
6064 char *endp = lbp->buffer + start;
6066 while ((endp = etags_strchr (endp, '"')) != NULL
6067 && endp[-1] == '\\')
6068 endp++;
6069 if (endp != NULL)
6070 /* Ok, this is a real #line directive. Let's deal with it. */
6072 char *taggedabsname; /* absolute name of original file */
6073 char *taggedfname; /* name of original file as given */
6074 char *name; /* temp var */
6076 discard_until_line_directive = FALSE; /* found it */
6077 name = lbp->buffer + start;
6078 *endp = '\0';
6079 canonicalize_filename (name);
6080 taggedabsname = absolute_filename (name, tagfiledir);
6081 if (filename_is_absolute (name)
6082 || filename_is_absolute (curfdp->infname))
6083 taggedfname = savestr (taggedabsname);
6084 else
6085 taggedfname = relative_filename (taggedabsname,tagfiledir);
6087 if (streq (curfdp->taggedfname, taggedfname))
6088 /* The #line directive is only a line number change. We
6089 deal with this afterwards. */
6090 free (taggedfname);
6091 else
6092 /* The tags following this #line directive should be
6093 attributed to taggedfname. In order to do this, set
6094 curfdp accordingly. */
6096 fdesc *fdp; /* file description pointer */
6098 /* Go look for a file description already set up for the
6099 file indicated in the #line directive. If there is
6100 one, use it from now until the next #line
6101 directive. */
6102 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6103 if (streq (fdp->infname, curfdp->infname)
6104 && streq (fdp->taggedfname, taggedfname))
6105 /* If we remove the second test above (after the &&)
6106 then all entries pertaining to the same file are
6107 coalesced in the tags file. If we use it, then
6108 entries pertaining to the same file but generated
6109 from different files (via #line directives) will
6110 go into separate sections in the tags file. These
6111 alternatives look equivalent. The first one
6112 destroys some apparently useless information. */
6114 curfdp = fdp;
6115 free (taggedfname);
6116 break;
6118 /* Else, if we already tagged the real file, skip all
6119 input lines until the next #line directive. */
6120 if (fdp == NULL) /* not found */
6121 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6122 if (streq (fdp->infabsname, taggedabsname))
6124 discard_until_line_directive = TRUE;
6125 free (taggedfname);
6126 break;
6128 /* Else create a new file description and use that from
6129 now on, until the next #line directive. */
6130 if (fdp == NULL) /* not found */
6132 fdp = fdhead;
6133 fdhead = xnew (1, fdesc);
6134 *fdhead = *curfdp; /* copy curr. file description */
6135 fdhead->next = fdp;
6136 fdhead->infname = savestr (curfdp->infname);
6137 fdhead->infabsname = savestr (curfdp->infabsname);
6138 fdhead->infabsdir = savestr (curfdp->infabsdir);
6139 fdhead->taggedfname = taggedfname;
6140 fdhead->usecharno = FALSE;
6141 fdhead->prop = NULL;
6142 fdhead->written = FALSE;
6143 curfdp = fdhead;
6146 free (taggedabsname);
6147 lineno = lno - 1;
6148 readline (lbp, stream);
6149 return;
6150 } /* if a real #line directive */
6151 } /* if #line is followed by a number */
6152 } /* if line begins with "#line " */
6154 /* If we are here, no #line directive was found. */
6155 if (discard_until_line_directive)
6157 if (result > 0)
6159 /* Do a tail recursion on ourselves, thus discarding the contents
6160 of the line buffer. */
6161 readline (lbp, stream);
6162 return;
6164 /* End of file. */
6165 discard_until_line_directive = FALSE;
6166 return;
6168 } /* if #line directives should be considered */
6171 int match;
6172 regexp *rp;
6173 char *name;
6175 /* Match against relevant regexps. */
6176 if (lbp->len > 0)
6177 for (rp = p_head; rp != NULL; rp = rp->p_next)
6179 /* Only use generic regexps or those for the current language.
6180 Also do not use multiline regexps, which is the job of
6181 regex_tag_multiline. */
6182 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6183 || rp->multi_line)
6184 continue;
6186 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6187 switch (match)
6189 case -2:
6190 /* Some error. */
6191 if (!rp->error_signaled)
6193 error ("regexp stack overflow while matching \"%s\"",
6194 rp->pattern);
6195 rp->error_signaled = TRUE;
6197 break;
6198 case -1:
6199 /* No match. */
6200 break;
6201 case 0:
6202 /* Empty string matched. */
6203 if (!rp->error_signaled)
6205 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6206 rp->error_signaled = TRUE;
6208 break;
6209 default:
6210 /* Match occurred. Construct a tag. */
6211 name = rp->name;
6212 if (name[0] == '\0')
6213 name = NULL;
6214 else /* make a named tag */
6215 name = substitute (lbp->buffer, rp->name, &rp->regs);
6216 if (rp->force_explicit_name)
6217 /* Force explicit tag name, if a name is there. */
6218 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6219 else
6220 make_tag (name, strlen (name), TRUE,
6221 lbp->buffer, match, lineno, linecharno);
6222 break;
6230 * Return a pointer to a space of size strlen(cp)+1 allocated
6231 * with xnew where the string CP has been copied.
6233 static char *
6234 savestr (const char *cp)
6236 return savenstr (cp, strlen (cp));
6240 * Return a pointer to a space of size LEN+1 allocated with xnew where
6241 * the string CP has been copied for at most the first LEN characters.
6243 static char *
6244 savenstr (const char *cp, int len)
6246 register char *dp;
6248 dp = xnew (len + 1, char);
6249 memcpy (dp, cp, len);
6250 dp[len] = '\0';
6251 return dp;
6255 * Return the ptr in sp at which the character c last
6256 * appears; NULL if not found
6258 * Identical to POSIX strrchr, included for portability.
6260 static char *
6261 etags_strrchr (register const char *sp, register int c)
6263 register const char *r;
6265 r = NULL;
6268 if (*sp == c)
6269 r = sp;
6270 } while (*sp++);
6271 return (char *)r;
6275 * Return the ptr in sp at which the character c first
6276 * appears; NULL if not found
6278 * Identical to POSIX strchr, included for portability.
6280 static char *
6281 etags_strchr (register const char *sp, register int c)
6285 if (*sp == c)
6286 return (char *)sp;
6287 } while (*sp++);
6288 return NULL;
6291 /* Skip spaces (end of string is not space), return new pointer. */
6292 static char *
6293 skip_spaces (char *cp)
6295 while (iswhite (*cp))
6296 cp++;
6297 return cp;
6300 /* Skip non spaces, except end of string, return new pointer. */
6301 static char *
6302 skip_non_spaces (char *cp)
6304 while (*cp != '\0' && !iswhite (*cp))
6305 cp++;
6306 return cp;
6309 /* Skip any chars in the "name" class.*/
6310 static char *
6311 skip_name (char *cp)
6313 /* '\0' is a notinname() so loop stops there too */
6314 while (! notinname (*cp))
6315 cp++;
6316 return cp;
6319 /* Print error message and exit. */
6320 void
6321 fatal (const char *s1, const char *s2)
6323 error (s1, s2);
6324 exit (EXIT_FAILURE);
6327 static void
6328 pfatal (const char *s1)
6330 perror (s1);
6331 exit (EXIT_FAILURE);
6334 static void
6335 suggest_asking_for_help (void)
6337 fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
6338 progname);
6339 exit (EXIT_FAILURE);
6342 /* Output a diagnostic with printf-style FORMAT and args. */
6343 static void
6344 error (const char *format, ...)
6346 va_list ap;
6347 va_start (ap, format);
6348 fprintf (stderr, "%s: ", progname);
6349 vfprintf (stderr, format, ap);
6350 fprintf (stderr, "\n");
6351 va_end (ap);
6354 /* Return a newly-allocated string whose contents
6355 concatenate those of s1, s2, s3. */
6356 static char *
6357 concat (const char *s1, const char *s2, const char *s3)
6359 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6360 char *result = xnew (len1 + len2 + len3 + 1, char);
6362 strcpy (result, s1);
6363 strcpy (result + len1, s2);
6364 strcpy (result + len1 + len2, s3);
6365 result[len1 + len2 + len3] = '\0';
6367 return result;
6371 /* Does the same work as the system V getcwd, but does not need to
6372 guess the buffer size in advance. */
6373 static char *
6374 etags_getcwd (void)
6376 int bufsize = 200;
6377 char *path = xnew (bufsize, char);
6379 while (getcwd (path, bufsize) == NULL)
6381 if (errno != ERANGE)
6382 pfatal ("getcwd");
6383 bufsize *= 2;
6384 free (path);
6385 path = xnew (bufsize, char);
6388 canonicalize_filename (path);
6389 return path;
6392 /* Return a newly allocated string containing the file name of FILE
6393 relative to the absolute directory DIR (which should end with a slash). */
6394 static char *
6395 relative_filename (char *file, char *dir)
6397 char *fp, *dp, *afn, *res;
6398 int i;
6400 /* Find the common root of file and dir (with a trailing slash). */
6401 afn = absolute_filename (file, cwd);
6402 fp = afn;
6403 dp = dir;
6404 while (*fp++ == *dp++)
6405 continue;
6406 fp--, dp--; /* back to the first differing char */
6407 #ifdef DOS_NT
6408 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6409 return afn;
6410 #endif
6411 do /* look at the equal chars until '/' */
6412 fp--, dp--;
6413 while (*fp != '/');
6415 /* Build a sequence of "../" strings for the resulting relative file name. */
6416 i = 0;
6417 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6418 i += 1;
6419 res = xnew (3*i + strlen (fp + 1) + 1, char);
6420 res[0] = '\0';
6421 while (i-- > 0)
6422 strcat (res, "../");
6424 /* Add the file name relative to the common root of file and dir. */
6425 strcat (res, fp + 1);
6426 free (afn);
6428 return res;
6431 /* Return a newly allocated string containing the absolute file name
6432 of FILE given DIR (which should end with a slash). */
6433 static char *
6434 absolute_filename (char *file, char *dir)
6436 char *slashp, *cp, *res;
6438 if (filename_is_absolute (file))
6439 res = savestr (file);
6440 #ifdef DOS_NT
6441 /* We don't support non-absolute file names with a drive
6442 letter, like `d:NAME' (it's too much hassle). */
6443 else if (file[1] == ':')
6444 fatal ("%s: relative file names with drive letters not supported", file);
6445 #endif
6446 else
6447 res = concat (dir, file, "");
6449 /* Delete the "/dirname/.." and "/." substrings. */
6450 slashp = etags_strchr (res, '/');
6451 while (slashp != NULL && slashp[0] != '\0')
6453 if (slashp[1] == '.')
6455 if (slashp[2] == '.'
6456 && (slashp[3] == '/' || slashp[3] == '\0'))
6458 cp = slashp;
6460 cp--;
6461 while (cp >= res && !filename_is_absolute (cp));
6462 if (cp < res)
6463 cp = slashp; /* the absolute name begins with "/.." */
6464 #ifdef DOS_NT
6465 /* Under MSDOS and NT we get `d:/NAME' as absolute
6466 file name, so the luser could say `d:/../NAME'.
6467 We silently treat this as `d:/NAME'. */
6468 else if (cp[0] != '/')
6469 cp = slashp;
6470 #endif
6471 memmove (cp, slashp + 3, strlen (slashp + 2));
6472 slashp = cp;
6473 continue;
6475 else if (slashp[2] == '/' || slashp[2] == '\0')
6477 memmove (slashp, slashp + 2, strlen (slashp + 1));
6478 continue;
6482 slashp = etags_strchr (slashp + 1, '/');
6485 if (res[0] == '\0') /* just a safety net: should never happen */
6487 free (res);
6488 return savestr ("/");
6490 else
6491 return res;
6494 /* Return a newly allocated string containing the absolute
6495 file name of dir where FILE resides given DIR (which should
6496 end with a slash). */
6497 static char *
6498 absolute_dirname (char *file, char *dir)
6500 char *slashp, *res;
6501 char save;
6503 slashp = etags_strrchr (file, '/');
6504 if (slashp == NULL)
6505 return savestr (dir);
6506 save = slashp[1];
6507 slashp[1] = '\0';
6508 res = absolute_filename (file, dir);
6509 slashp[1] = save;
6511 return res;
6514 /* Whether the argument string is an absolute file name. The argument
6515 string must have been canonicalized with canonicalize_filename. */
6516 static bool
6517 filename_is_absolute (char *fn)
6519 return (fn[0] == '/'
6520 #ifdef DOS_NT
6521 || (ISALPHA (fn[0]) && fn[1] == ':' && fn[2] == '/')
6522 #endif
6526 /* Downcase DOS drive letter and collapse separators into single slashes.
6527 Works in place. */
6528 static void
6529 canonicalize_filename (register char *fn)
6531 register char* cp;
6532 char sep = '/';
6534 #ifdef DOS_NT
6535 /* Canonicalize drive letter case. */
6536 # define ISUPPER(c) isupper (CHAR (c))
6537 if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6538 fn[0] = lowcase (fn[0]);
6540 sep = '\\';
6541 #endif
6543 /* Collapse multiple separators into a single slash. */
6544 for (cp = fn; *cp != '\0'; cp++, fn++)
6545 if (*cp == sep)
6547 *fn = '/';
6548 while (cp[1] == sep)
6549 cp++;
6551 else
6552 *fn = *cp;
6553 *fn = '\0';
6557 /* Initialize a linebuffer for use. */
6558 static void
6559 linebuffer_init (linebuffer *lbp)
6561 lbp->size = (DEBUG) ? 3 : 200;
6562 lbp->buffer = xnew (lbp->size, char);
6563 lbp->buffer[0] = '\0';
6564 lbp->len = 0;
6567 /* Set the minimum size of a string contained in a linebuffer. */
6568 static void
6569 linebuffer_setlen (linebuffer *lbp, int toksize)
6571 while (lbp->size <= toksize)
6573 lbp->size *= 2;
6574 xrnew (lbp->buffer, lbp->size, char);
6576 lbp->len = toksize;
6579 /* Like malloc but get fatal error if memory is exhausted. */
6580 static void *
6581 xmalloc (size_t size)
6583 void *result = malloc (size);
6584 if (result == NULL)
6585 fatal ("virtual memory exhausted", (char *)NULL);
6586 return result;
6589 static void *
6590 xrealloc (char *ptr, size_t size)
6592 void *result = realloc (ptr, size);
6593 if (result == NULL)
6594 fatal ("virtual memory exhausted", (char *)NULL);
6595 return result;
6599 * Local Variables:
6600 * indent-tabs-mode: t
6601 * tab-width: 8
6602 * fill-column: 79
6603 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6604 * c-file-style: "gnu"
6605 * End:
6608 /* etags.c ends here */