lisp/progmodes/python.el: Updated Copyright years.
[emacs.git] / lib-src / etags.c
blob7d2a5a90999b5f17a5bd6b72dac4f134ab3567c0
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2012
32 Free Software Foundation, Inc.
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
72 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
83 #define TRUE 1
84 #define FALSE 0
86 #ifdef DEBUG
87 # undef DEBUG
88 # define DEBUG TRUE
89 #else
90 # define DEBUG FALSE
91 # define NDEBUG /* disable assert */
92 #endif
94 #ifdef HAVE_CONFIG_H
95 # include <config.h>
96 #endif /* !HAVE_CONFIG_H */
98 #ifndef _GNU_SOURCE
99 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
100 #endif
102 /* WIN32_NATIVE is for XEmacs.
103 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
104 #ifdef WIN32_NATIVE
105 # undef MSDOS
106 # undef WINDOWSNT
107 # define WINDOWSNT
108 #endif /* WIN32_NATIVE */
110 #ifdef MSDOS
111 # undef MSDOS
112 # define MSDOS TRUE
113 # include <fcntl.h>
114 # include <sys/param.h>
115 # include <io.h>
116 # ifndef HAVE_CONFIG_H
117 # define DOS_NT
118 # include <sys/config.h>
119 # endif
120 #else
121 # define MSDOS FALSE
122 #endif /* MSDOS */
124 #ifdef WINDOWSNT
125 # include <fcntl.h>
126 # include <direct.h>
127 # include <io.h>
128 # define MAXPATHLEN _MAX_PATH
129 # undef HAVE_NTGUI
130 # undef DOS_NT
131 # define DOS_NT
132 # ifndef HAVE_GETCWD
133 # define HAVE_GETCWD
134 # endif /* undef HAVE_GETCWD */
135 #else /* not WINDOWSNT */
136 #endif /* !WINDOWSNT */
138 #include <unistd.h>
139 #ifndef HAVE_UNISTD_H
140 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
141 extern char *getcwd (char *buf, size_t size);
142 # endif
143 #endif /* HAVE_UNISTD_H */
145 #include <stdarg.h>
146 #include <stdlib.h>
147 #include <string.h>
148 #include <stdio.h>
149 #include <ctype.h>
150 #include <errno.h>
151 #include <sys/types.h>
152 #include <sys/stat.h>
154 #include <assert.h>
155 #ifdef NDEBUG
156 # undef assert /* some systems have a buggy assert.h */
157 # define assert(x) ((void) 0)
158 #endif
160 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
161 # define NO_LONG_OPTIONS TRUE
162 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
163 extern char *optarg;
164 extern int optind, opterr;
165 #else
166 # define NO_LONG_OPTIONS FALSE
167 # include <getopt.h>
168 #endif /* NO_LONG_OPTIONS */
170 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
171 # ifdef __CYGWIN__ /* compiling on Cygwin */
172 !!! NOTICE !!!
173 the regex.h distributed with Cygwin is not compatible with etags, alas!
174 If you want regular expression support, you should delete this notice and
175 arrange to use the GNU regex.h and regex.c.
176 # endif
177 #endif
178 #include <regex.h>
180 /* Define CTAGS to make the program "ctags" compatible with the usual one.
181 Leave it undefined to make the program "etags", which makes emacs-style
182 tag tables and tags typedefs, #defines and struct/union/enum by default. */
183 #ifdef CTAGS
184 # undef CTAGS
185 # define CTAGS TRUE
186 #else
187 # define CTAGS FALSE
188 #endif
190 #define streq(s,t) (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
191 #define strcaseeq(s,t) (assert ((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
192 #define strneq(s,t,n) (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
193 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
195 #define CHARS 256 /* 2^sizeof(char) */
196 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
197 #define iswhite(c) (_wht[CHAR (c)]) /* c is white (see white) */
198 #define notinname(c) (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
199 #define begtoken(c) (_btk[CHAR (c)]) /* c can start token (see begtk) */
200 #define intoken(c) (_itk[CHAR (c)]) /* c can be in token (see midtk) */
201 #define endtoken(c) (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
203 #define ISALNUM(c) isalnum (CHAR (c))
204 #define ISALPHA(c) isalpha (CHAR (c))
205 #define ISDIGIT(c) isdigit (CHAR (c))
206 #define ISLOWER(c) islower (CHAR (c))
208 #define lowcase(c) tolower (CHAR (c))
212 * xnew, xrnew -- allocate, reallocate storage
214 * SYNOPSIS: Type *xnew (int n, Type);
215 * void xrnew (OldPointer, int n, Type);
217 #if DEBUG
218 # include "chkmalloc.h"
219 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
220 (n) * sizeof (Type)))
221 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
222 (char *) (op), (n) * sizeof (Type)))
223 #else
224 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
225 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
226 (char *) (op), (n) * sizeof (Type)))
227 #endif
229 #define bool int
231 typedef void Lang_function (FILE *);
233 typedef struct
235 const char *suffix; /* file name suffix for this compressor */
236 const char *command; /* takes one arg and decompresses to stdout */
237 } compressor;
239 typedef struct
241 const char *name; /* language name */
242 const char *help; /* detailed help for the language */
243 Lang_function *function; /* parse function */
244 const char **suffixes; /* name suffixes of this language's files */
245 const char **filenames; /* names of this language's files */
246 const char **interpreters; /* interpreters for this language */
247 bool metasource; /* source used to generate other sources */
248 } language;
250 typedef struct fdesc
252 struct fdesc *next; /* for the linked list */
253 char *infname; /* uncompressed input file name */
254 char *infabsname; /* absolute uncompressed input file name */
255 char *infabsdir; /* absolute dir of input file */
256 char *taggedfname; /* file name to write in tagfile */
257 language *lang; /* language of file */
258 char *prop; /* file properties to write in tagfile */
259 bool usecharno; /* etags tags shall contain char number */
260 bool written; /* entry written in the tags file */
261 } fdesc;
263 typedef struct node_st
264 { /* sorting structure */
265 struct node_st *left, *right; /* left and right sons */
266 fdesc *fdp; /* description of file to whom tag belongs */
267 char *name; /* tag name */
268 char *regex; /* search regexp */
269 bool valid; /* write this tag on the tag file */
270 bool is_func; /* function tag: use regexp in CTAGS mode */
271 bool been_warned; /* warning already given for duplicated tag */
272 int lno; /* line number tag is on */
273 long cno; /* character number line starts on */
274 } node;
277 * A `linebuffer' is a structure which holds a line of text.
278 * `readline_internal' reads a line from a stream into a linebuffer
279 * and works regardless of the length of the line.
280 * SIZE is the size of BUFFER, LEN is the length of the string in
281 * BUFFER after readline reads it.
283 typedef struct
285 long size;
286 int len;
287 char *buffer;
288 } linebuffer;
290 /* Used to support mixing of --lang and file names. */
291 typedef struct
293 enum {
294 at_language, /* a language specification */
295 at_regexp, /* a regular expression */
296 at_filename, /* a file name */
297 at_stdin, /* read from stdin here */
298 at_end /* stop parsing the list */
299 } arg_type; /* argument type */
300 language *lang; /* language associated with the argument */
301 char *what; /* the argument itself */
302 } argument;
304 /* Structure defining a regular expression. */
305 typedef struct regexp
307 struct regexp *p_next; /* pointer to next in list */
308 language *lang; /* if set, use only for this language */
309 char *pattern; /* the regexp pattern */
310 char *name; /* tag name */
311 struct re_pattern_buffer *pat; /* the compiled pattern */
312 struct re_registers regs; /* re registers */
313 bool error_signaled; /* already signaled for this regexp */
314 bool force_explicit_name; /* do not allow implicit tag name */
315 bool ignore_case; /* ignore case when matching */
316 bool multi_line; /* do a multi-line match on the whole file */
317 } regexp;
320 /* Many compilers barf on this:
321 Lang_function Ada_funcs;
322 so let's write it this way */
323 static void Ada_funcs (FILE *);
324 static void Asm_labels (FILE *);
325 static void C_entries (int c_ext, FILE *);
326 static void default_C_entries (FILE *);
327 static void plain_C_entries (FILE *);
328 static void Cjava_entries (FILE *);
329 static void Cobol_paragraphs (FILE *);
330 static void Cplusplus_entries (FILE *);
331 static void Cstar_entries (FILE *);
332 static void Erlang_functions (FILE *);
333 static void Forth_words (FILE *);
334 static void Fortran_functions (FILE *);
335 static void HTML_labels (FILE *);
336 static void Lisp_functions (FILE *);
337 static void Lua_functions (FILE *);
338 static void Makefile_targets (FILE *);
339 static void Pascal_functions (FILE *);
340 static void Perl_functions (FILE *);
341 static void PHP_functions (FILE *);
342 static void PS_functions (FILE *);
343 static void Prolog_functions (FILE *);
344 static void Python_functions (FILE *);
345 static void Scheme_functions (FILE *);
346 static void TeX_commands (FILE *);
347 static void Texinfo_nodes (FILE *);
348 static void Yacc_entries (FILE *);
349 static void just_read_file (FILE *);
351 static void print_language_names (void);
352 static void print_version (void);
353 static void print_help (argument *);
354 int main (int, char **);
356 static compressor *get_compressor_from_suffix (char *, char **);
357 static language *get_language_from_langname (const char *);
358 static language *get_language_from_interpreter (char *);
359 static language *get_language_from_filename (char *, bool);
360 static void readline (linebuffer *, FILE *);
361 static long readline_internal (linebuffer *, FILE *);
362 static bool nocase_tail (const char *);
363 static void get_tag (char *, char **);
365 static void analyse_regex (char *);
366 static void free_regexps (void);
367 static void regex_tag_multiline (void);
368 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
369 static void suggest_asking_for_help (void) NO_RETURN;
370 void fatal (const char *, const char *) NO_RETURN;
371 static void pfatal (const char *) NO_RETURN;
372 static void add_node (node *, node **);
374 static void init (void);
375 static void process_file_name (char *, language *);
376 static void process_file (FILE *, char *, language *);
377 static void find_entries (FILE *);
378 static void free_tree (node *);
379 static void free_fdesc (fdesc *);
380 static void pfnote (char *, bool, char *, int, int, long);
381 static void make_tag (const char *, int, bool, char *, int, int, long);
382 static void invalidate_nodes (fdesc *, node **);
383 static void put_entries (node *);
385 static char *concat (const char *, const char *, const char *);
386 static char *skip_spaces (char *);
387 static char *skip_non_spaces (char *);
388 static char *savenstr (const char *, int);
389 static char *savestr (const char *);
390 static char *etags_strchr (const char *, int);
391 static char *etags_strrchr (const char *, int);
392 static int etags_strcasecmp (const char *, const char *);
393 static int etags_strncasecmp (const char *, const char *, int);
394 static char *etags_getcwd (void);
395 static char *relative_filename (char *, char *);
396 static char *absolute_filename (char *, char *);
397 static char *absolute_dirname (char *, char *);
398 static bool filename_is_absolute (char *f);
399 static void canonicalize_filename (char *);
400 static void linebuffer_init (linebuffer *);
401 static void linebuffer_setlen (linebuffer *, int);
402 static void *xmalloc (size_t);
403 static void *xrealloc (char *, size_t);
406 static char searchar = '/'; /* use /.../ searches */
408 static char *tagfile; /* output file */
409 static char *progname; /* name this program was invoked with */
410 static char *cwd; /* current working directory */
411 static char *tagfiledir; /* directory of tagfile */
412 static FILE *tagf; /* ioptr for tags file */
413 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
415 static fdesc *fdhead; /* head of file description list */
416 static fdesc *curfdp; /* current file description */
417 static int lineno; /* line number of current line */
418 static long charno; /* current character number */
419 static long linecharno; /* charno of start of current line */
420 static char *dbp; /* pointer to start of current tag */
422 static const int invalidcharno = -1;
424 static node *nodehead; /* the head of the binary tree of tags */
425 static node *last_node; /* the last node created */
427 static linebuffer lb; /* the current line */
428 static linebuffer filebuf; /* a buffer containing the whole file */
429 static linebuffer token_name; /* a buffer containing a tag name */
431 /* boolean "functions" (see init) */
432 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
433 static const char
434 /* white chars */
435 *white = " \f\t\n\r\v",
436 /* not in a name */
437 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
438 /* token ending chars */
439 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
440 /* token starting chars */
441 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
442 /* valid in-token chars */
443 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
445 static bool append_to_tagfile; /* -a: append to tags */
446 /* The next five default to TRUE in C and derived languages. */
447 static bool typedefs; /* -t: create tags for C and Ada typedefs */
448 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
449 /* 0 struct/enum/union decls, and C++ */
450 /* member functions. */
451 static bool constantypedefs; /* -d: create tags for C #define, enum */
452 /* constants and variables. */
453 /* -D: opposite of -d. Default under ctags. */
454 static bool globals; /* create tags for global variables */
455 static bool members; /* create tags for C member variables */
456 static bool declarations; /* --declarations: tag them and extern in C&Co*/
457 static bool no_line_directive; /* ignore #line directives (undocumented) */
458 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
459 static bool update; /* -u: update tags */
460 static bool vgrind_style; /* -v: create vgrind style index output */
461 static bool no_warnings; /* -w: suppress warnings (undocumented) */
462 static bool cxref_style; /* -x: create cxref style output */
463 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
464 static bool ignoreindent; /* -I: ignore indentation in C */
465 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
467 /* STDIN is defined in LynxOS system headers */
468 #ifdef STDIN
469 # undef STDIN
470 #endif
472 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
473 static bool parsing_stdin; /* --parse-stdin used */
475 static regexp *p_head; /* list of all regexps */
476 static bool need_filebuf; /* some regexes are multi-line */
478 static struct option longopts[] =
480 { "append", no_argument, NULL, 'a' },
481 { "packages-only", no_argument, &packages_only, TRUE },
482 { "c++", no_argument, NULL, 'C' },
483 { "declarations", no_argument, &declarations, TRUE },
484 { "no-line-directive", no_argument, &no_line_directive, TRUE },
485 { "no-duplicates", no_argument, &no_duplicates, TRUE },
486 { "help", no_argument, NULL, 'h' },
487 { "help", no_argument, NULL, 'H' },
488 { "ignore-indentation", no_argument, NULL, 'I' },
489 { "language", required_argument, NULL, 'l' },
490 { "members", no_argument, &members, TRUE },
491 { "no-members", no_argument, &members, FALSE },
492 { "output", required_argument, NULL, 'o' },
493 { "regex", required_argument, NULL, 'r' },
494 { "no-regex", no_argument, NULL, 'R' },
495 { "ignore-case-regex", required_argument, NULL, 'c' },
496 { "parse-stdin", required_argument, NULL, STDIN },
497 { "version", no_argument, NULL, 'V' },
499 #if CTAGS /* Ctags options */
500 { "backward-search", no_argument, NULL, 'B' },
501 { "cxref", no_argument, NULL, 'x' },
502 { "defines", no_argument, NULL, 'd' },
503 { "globals", no_argument, &globals, TRUE },
504 { "typedefs", no_argument, NULL, 't' },
505 { "typedefs-and-c++", no_argument, NULL, 'T' },
506 { "update", no_argument, NULL, 'u' },
507 { "vgrind", no_argument, NULL, 'v' },
508 { "no-warn", no_argument, NULL, 'w' },
510 #else /* Etags options */
511 { "no-defines", no_argument, NULL, 'D' },
512 { "no-globals", no_argument, &globals, FALSE },
513 { "include", required_argument, NULL, 'i' },
514 #endif
515 { NULL }
518 static compressor compressors[] =
520 { "z", "gzip -d -c"},
521 { "Z", "gzip -d -c"},
522 { "gz", "gzip -d -c"},
523 { "GZ", "gzip -d -c"},
524 { "bz2", "bzip2 -d -c" },
525 { "xz", "xz -d -c" },
526 { NULL }
530 * Language stuff.
533 /* Ada code */
534 static const char *Ada_suffixes [] =
535 { "ads", "adb", "ada", NULL };
536 static const char Ada_help [] =
537 "In Ada code, functions, procedures, packages, tasks and types are\n\
538 tags. Use the `--packages-only' option to create tags for\n\
539 packages only.\n\
540 Ada tag names have suffixes indicating the type of entity:\n\
541 Entity type: Qualifier:\n\
542 ------------ ----------\n\
543 function /f\n\
544 procedure /p\n\
545 package spec /s\n\
546 package body /b\n\
547 type /t\n\
548 task /k\n\
549 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
550 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
551 will just search for any tag `bidule'.";
553 /* Assembly code */
554 static const char *Asm_suffixes [] =
555 { "a", /* Unix assembler */
556 "asm", /* Microcontroller assembly */
557 "def", /* BSO/Tasking definition includes */
558 "inc", /* Microcontroller include files */
559 "ins", /* Microcontroller include files */
560 "s", "sa", /* Unix assembler */
561 "S", /* cpp-processed Unix assembler */
562 "src", /* BSO/Tasking C compiler output */
563 NULL
565 static const char Asm_help [] =
566 "In assembler code, labels appearing at the beginning of a line,\n\
567 followed by a colon, are tags.";
570 /* Note that .c and .h can be considered C++, if the --c++ flag was
571 given, or if the `class' or `template' keywords are met inside the file.
572 That is why default_C_entries is called for these. */
573 static const char *default_C_suffixes [] =
574 { "c", "h", NULL };
575 #if CTAGS /* C help for Ctags */
576 static const char default_C_help [] =
577 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
578 Use -T to tag definitions of `struct', `union' and `enum'.\n\
579 Use -d to tag `#define' macro definitions and `enum' constants.\n\
580 Use --globals to tag global variables.\n\
581 You can tag function declarations and external variables by\n\
582 using `--declarations', and struct members by using `--members'.";
583 #else /* C help for Etags */
584 static const char default_C_help [] =
585 "In C code, any C function or typedef is a tag, and so are\n\
586 definitions of `struct', `union' and `enum'. `#define' macro\n\
587 definitions and `enum' constants are tags unless you specify\n\
588 `--no-defines'. Global variables are tags unless you specify\n\
589 `--no-globals' and so are struct members unless you specify\n\
590 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
591 `--no-members' can make the tags table file much smaller.\n\
592 You can tag function declarations and external variables by\n\
593 using `--declarations'.";
594 #endif /* C help for Ctags and Etags */
596 static const char *Cplusplus_suffixes [] =
597 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
598 "M", /* Objective C++ */
599 "pdb", /* PostScript with C syntax */
600 NULL };
601 static const char Cplusplus_help [] =
602 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
603 --help --lang=c --lang=c++ for full help.)\n\
604 In addition to C tags, member functions are also recognized. Member\n\
605 variables are recognized unless you use the `--no-members' option.\n\
606 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
607 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
608 `operator+'.";
610 static const char *Cjava_suffixes [] =
611 { "java", NULL };
612 static char Cjava_help [] =
613 "In Java code, all the tags constructs of C and C++ code are\n\
614 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
617 static const char *Cobol_suffixes [] =
618 { "COB", "cob", NULL };
619 static char Cobol_help [] =
620 "In Cobol code, tags are paragraph names; that is, any word\n\
621 starting in column 8 and followed by a period.";
623 static const char *Cstar_suffixes [] =
624 { "cs", "hs", NULL };
626 static const char *Erlang_suffixes [] =
627 { "erl", "hrl", NULL };
628 static const char Erlang_help [] =
629 "In Erlang code, the tags are the functions, records and macros\n\
630 defined in the file.";
632 const char *Forth_suffixes [] =
633 { "fth", "tok", NULL };
634 static const char Forth_help [] =
635 "In Forth code, tags are words defined by `:',\n\
636 constant, code, create, defer, value, variable, buffer:, field.";
638 static const char *Fortran_suffixes [] =
639 { "F", "f", "f90", "for", NULL };
640 static const char Fortran_help [] =
641 "In Fortran code, functions, subroutines and block data are tags.";
643 static const char *HTML_suffixes [] =
644 { "htm", "html", "shtml", NULL };
645 static const char HTML_help [] =
646 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
647 `h3' headers. Also, tags are `name=' in anchors and all\n\
648 occurrences of `id='.";
650 static const char *Lisp_suffixes [] =
651 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
652 static const char Lisp_help [] =
653 "In Lisp code, any function defined with `defun', any variable\n\
654 defined with `defvar' or `defconst', and in general the first\n\
655 argument of any expression that starts with `(def' in column zero\n\
656 is a tag.";
658 static const char *Lua_suffixes [] =
659 { "lua", "LUA", NULL };
660 static const char Lua_help [] =
661 "In Lua scripts, all functions are tags.";
663 static const char *Makefile_filenames [] =
664 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
665 static const char Makefile_help [] =
666 "In makefiles, targets are tags; additionally, variables are tags\n\
667 unless you specify `--no-globals'.";
669 static const char *Objc_suffixes [] =
670 { "lm", /* Objective lex file */
671 "m", /* Objective C file */
672 NULL };
673 static const char Objc_help [] =
674 "In Objective C code, tags include Objective C definitions for classes,\n\
675 class categories, methods and protocols. Tags for variables and\n\
676 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
677 (Use --help --lang=c --lang=objc --lang=java for full help.)";
679 static const char *Pascal_suffixes [] =
680 { "p", "pas", NULL };
681 static const char Pascal_help [] =
682 "In Pascal code, the tags are the functions and procedures defined\n\
683 in the file.";
684 /* " // this is for working around an Emacs highlighting bug... */
686 static const char *Perl_suffixes [] =
687 { "pl", "pm", NULL };
688 static const char *Perl_interpreters [] =
689 { "perl", "@PERL@", NULL };
690 static const char Perl_help [] =
691 "In Perl code, the tags are the packages, subroutines and variables\n\
692 defined by the `package', `sub', `my' and `local' keywords. Use\n\
693 `--globals' if you want to tag global variables. Tags for\n\
694 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
695 defined in the default package is `main::SUB'.";
697 static const char *PHP_suffixes [] =
698 { "php", "php3", "php4", NULL };
699 static const char PHP_help [] =
700 "In PHP code, tags are functions, classes and defines. Unless you use\n\
701 the `--no-members' option, vars are tags too.";
703 static const char *plain_C_suffixes [] =
704 { "pc", /* Pro*C file */
705 NULL };
707 static const char *PS_suffixes [] =
708 { "ps", "psw", NULL }; /* .psw is for PSWrap */
709 static const char PS_help [] =
710 "In PostScript code, the tags are the functions.";
712 static const char *Prolog_suffixes [] =
713 { "prolog", NULL };
714 static const char Prolog_help [] =
715 "In Prolog code, tags are predicates and rules at the beginning of\n\
716 line.";
718 static const char *Python_suffixes [] =
719 { "py", NULL };
720 static const char Python_help [] =
721 "In Python code, `def' or `class' at the beginning of a line\n\
722 generate a tag.";
724 /* Can't do the `SCM' or `scm' prefix with a version number. */
725 static const char *Scheme_suffixes [] =
726 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
727 static const char Scheme_help [] =
728 "In Scheme code, tags include anything defined with `def' or with a\n\
729 construct whose name starts with `def'. They also include\n\
730 variables set with `set!' at top level in the file.";
732 static const char *TeX_suffixes [] =
733 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
734 static const char TeX_help [] =
735 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
736 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
737 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
738 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
739 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
741 Other commands can be specified by setting the environment variable\n\
742 `TEXTAGS' to a colon-separated list like, for example,\n\
743 TEXTAGS=\"mycommand:myothercommand\".";
746 static const char *Texinfo_suffixes [] =
747 { "texi", "texinfo", "txi", NULL };
748 static const char Texinfo_help [] =
749 "for texinfo files, lines starting with @node are tagged.";
751 static const char *Yacc_suffixes [] =
752 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
753 static const char Yacc_help [] =
754 "In Bison or Yacc input files, each rule defines as a tag the\n\
755 nonterminal it constructs. The portions of the file that contain\n\
756 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
757 for full help).";
759 static const char auto_help [] =
760 "`auto' is not a real language, it indicates to use\n\
761 a default language for files base on file name suffix and file contents.";
763 static const char none_help [] =
764 "`none' is not a real language, it indicates to only do\n\
765 regexp processing on files.";
767 static const char no_lang_help [] =
768 "No detailed help available for this language.";
772 * Table of languages.
774 * It is ok for a given function to be listed under more than one
775 * name. I just didn't.
778 static language lang_names [] =
780 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
781 { "asm", Asm_help, Asm_labels, Asm_suffixes },
782 { "c", default_C_help, default_C_entries, default_C_suffixes },
783 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
784 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
785 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
786 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
787 { "forth", Forth_help, Forth_words, Forth_suffixes },
788 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
789 { "html", HTML_help, HTML_labels, HTML_suffixes },
790 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
791 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
792 { "lua", Lua_help, Lua_functions, Lua_suffixes },
793 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
794 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
795 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
796 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
797 { "php", PHP_help, PHP_functions, PHP_suffixes },
798 { "postscript",PS_help, PS_functions, PS_suffixes },
799 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
800 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
801 { "python", Python_help, Python_functions, Python_suffixes },
802 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
803 { "tex", TeX_help, TeX_commands, TeX_suffixes },
804 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
805 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
806 { "auto", auto_help }, /* default guessing scheme */
807 { "none", none_help, just_read_file }, /* regexp matching only */
808 { NULL } /* end of list */
812 static void
813 print_language_names (void)
815 language *lang;
816 const char **name, **ext;
818 puts ("\nThese are the currently supported languages, along with the\n\
819 default file names and dot suffixes:");
820 for (lang = lang_names; lang->name != NULL; lang++)
822 printf (" %-*s", 10, lang->name);
823 if (lang->filenames != NULL)
824 for (name = lang->filenames; *name != NULL; name++)
825 printf (" %s", *name);
826 if (lang->suffixes != NULL)
827 for (ext = lang->suffixes; *ext != NULL; ext++)
828 printf (" .%s", *ext);
829 puts ("");
831 puts ("where `auto' means use default language for files based on file\n\
832 name suffix, and `none' means only do regexp processing on files.\n\
833 If no language is specified and no matching suffix is found,\n\
834 the first line of the file is read for a sharp-bang (#!) sequence\n\
835 followed by the name of an interpreter. If no such sequence is found,\n\
836 Fortran is tried first; if no tags are found, C is tried next.\n\
837 When parsing any C file, a \"class\" or \"template\" keyword\n\
838 switches to C++.");
839 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
841 For detailed help on a given language use, for example,\n\
842 etags --help --lang=ada.");
845 #ifndef EMACS_NAME
846 # define EMACS_NAME "standalone"
847 #endif
848 #ifndef VERSION
849 # define VERSION "17.38.1.4"
850 #endif
851 static void
852 print_version (void)
854 /* Makes it easier to update automatically. */
855 char emacs_copyright[] = "Copyright (C) 2012 Free Software Foundation, Inc.";
857 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
858 puts (emacs_copyright);
859 puts ("This program is distributed under the terms in ETAGS.README");
861 exit (EXIT_SUCCESS);
864 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
865 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
866 #endif
868 static void
869 print_help (argument *argbuffer)
871 bool help_for_lang = FALSE;
873 for (; argbuffer->arg_type != at_end; argbuffer++)
874 if (argbuffer->arg_type == at_language)
876 if (help_for_lang)
877 puts ("");
878 puts (argbuffer->lang->help);
879 help_for_lang = TRUE;
882 if (help_for_lang)
883 exit (EXIT_SUCCESS);
885 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
887 These are the options accepted by %s.\n", progname, progname);
888 if (NO_LONG_OPTIONS)
889 puts ("WARNING: long option names do not work with this executable,\n\
890 as it is not linked with GNU getopt.");
891 else
892 puts ("You may use unambiguous abbreviations for the long option names.");
893 puts (" A - as file name means read names from stdin (one per line).\n\
894 Absolute names are stored in the output file as they are.\n\
895 Relative ones are stored relative to the output file's directory.\n");
897 puts ("-a, --append\n\
898 Append tag entries to existing tags file.");
900 puts ("--packages-only\n\
901 For Ada files, only generate tags for packages.");
903 if (CTAGS)
904 puts ("-B, --backward-search\n\
905 Write the search commands for the tag entries using '?', the\n\
906 backward-search command instead of '/', the forward-search command.");
908 /* This option is mostly obsolete, because etags can now automatically
909 detect C++. Retained for backward compatibility and for debugging and
910 experimentation. In principle, we could want to tag as C++ even
911 before any "class" or "template" keyword.
912 puts ("-C, --c++\n\
913 Treat files whose name suffix defaults to C language as C++ files.");
916 puts ("--declarations\n\
917 In C and derived languages, create tags for function declarations,");
918 if (CTAGS)
919 puts ("\tand create tags for extern variables if --globals is used.");
920 else
921 puts
922 ("\tand create tags for extern variables unless --no-globals is used.");
924 if (CTAGS)
925 puts ("-d, --defines\n\
926 Create tag entries for C #define constants and enum constants, too.");
927 else
928 puts ("-D, --no-defines\n\
929 Don't create tag entries for C #define constants and enum constants.\n\
930 This makes the tags file smaller.");
932 if (!CTAGS)
933 puts ("-i FILE, --include=FILE\n\
934 Include a note in tag file indicating that, when searching for\n\
935 a tag, one should also consult the tags file FILE after\n\
936 checking the current file.");
938 puts ("-l LANG, --language=LANG\n\
939 Force the following files to be considered as written in the\n\
940 named language up to the next --language=LANG option.");
942 if (CTAGS)
943 puts ("--globals\n\
944 Create tag entries for global variables in some languages.");
945 else
946 puts ("--no-globals\n\
947 Do not create tag entries for global variables in some\n\
948 languages. This makes the tags file smaller.");
950 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
951 puts ("--no-line-directive\n\
952 Ignore #line preprocessor directives in C and derived languages.");
954 if (CTAGS)
955 puts ("--members\n\
956 Create tag entries for members of structures in some languages.");
957 else
958 puts ("--no-members\n\
959 Do not create tag entries for members of structures\n\
960 in some languages.");
962 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
963 Make a tag for each line matching a regular expression pattern\n\
964 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
965 files only. REGEXFILE is a file containing one REGEXP per line.\n\
966 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
967 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
968 puts (" If TAGNAME/ is present, the tags created are named.\n\
969 For example Tcl named tags can be created with:\n\
970 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
971 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
972 `m' means to allow multi-line matches, `s' implies `m' and\n\
973 causes dot to match any character, including newline.");
975 puts ("-R, --no-regex\n\
976 Don't create tags from regexps for the following files.");
978 puts ("-I, --ignore-indentation\n\
979 In C and C++ do not assume that a closing brace in the first\n\
980 column is the final brace of a function or structure definition.");
982 puts ("-o FILE, --output=FILE\n\
983 Write the tags to FILE.");
985 puts ("--parse-stdin=NAME\n\
986 Read from standard input and record tags as belonging to file NAME.");
988 if (CTAGS)
990 puts ("-t, --typedefs\n\
991 Generate tag entries for C and Ada typedefs.");
992 puts ("-T, --typedefs-and-c++\n\
993 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
994 and C++ member functions.");
997 if (CTAGS)
998 puts ("-u, --update\n\
999 Update the tag entries for the given files, leaving tag\n\
1000 entries for other files in place. Currently, this is\n\
1001 implemented by deleting the existing entries for the given\n\
1002 files and then rewriting the new entries at the end of the\n\
1003 tags file. It is often faster to simply rebuild the entire\n\
1004 tag file than to use this.");
1006 if (CTAGS)
1008 puts ("-v, --vgrind\n\
1009 Print on the standard output an index of items intended for\n\
1010 human consumption, similar to the output of vgrind. The index\n\
1011 is sorted, and gives the page number of each item.");
1013 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1014 puts ("-w, --no-duplicates\n\
1015 Do not create duplicate tag entries, for compatibility with\n\
1016 traditional ctags.");
1018 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1019 puts ("-w, --no-warn\n\
1020 Suppress warning messages about duplicate tag entries.");
1022 puts ("-x, --cxref\n\
1023 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1024 The output uses line numbers instead of page numbers, but\n\
1025 beyond that the differences are cosmetic; try both to see\n\
1026 which you like.");
1029 puts ("-V, --version\n\
1030 Print the version of the program.\n\
1031 -h, --help\n\
1032 Print this help message.\n\
1033 Followed by one or more `--language' options prints detailed\n\
1034 help about tag generation for the specified languages.");
1036 print_language_names ();
1038 puts ("");
1039 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1041 exit (EXIT_SUCCESS);
1046 main (int argc, char **argv)
1048 int i;
1049 unsigned int nincluded_files;
1050 char **included_files;
1051 argument *argbuffer;
1052 int current_arg, file_count;
1053 linebuffer filename_lb;
1054 bool help_asked = FALSE;
1055 ptrdiff_t len;
1056 char *optstring;
1057 int opt;
1060 #ifdef DOS_NT
1061 _fmode = O_BINARY; /* all of files are treated as binary files */
1062 #endif /* DOS_NT */
1064 progname = argv[0];
1065 nincluded_files = 0;
1066 included_files = xnew (argc, char *);
1067 current_arg = 0;
1068 file_count = 0;
1070 /* Allocate enough no matter what happens. Overkill, but each one
1071 is small. */
1072 argbuffer = xnew (argc, argument);
1075 * Always find typedefs and structure tags.
1076 * Also default to find macro constants, enum constants, struct
1077 * members and global variables. Do it for both etags and ctags.
1079 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1080 globals = members = TRUE;
1082 /* When the optstring begins with a '-' getopt_long does not rearrange the
1083 non-options arguments to be at the end, but leaves them alone. */
1084 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1085 "ac:Cf:Il:o:r:RSVhH",
1086 (CTAGS) ? "BxdtTuvw" : "Di:");
1088 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1089 switch (opt)
1091 case 0:
1092 /* If getopt returns 0, then it has already processed a
1093 long-named option. We should do nothing. */
1094 break;
1096 case 1:
1097 /* This means that a file name has been seen. Record it. */
1098 argbuffer[current_arg].arg_type = at_filename;
1099 argbuffer[current_arg].what = optarg;
1100 len = strlen (optarg);
1101 if (whatlen_max < len)
1102 whatlen_max = len;
1103 ++current_arg;
1104 ++file_count;
1105 break;
1107 case STDIN:
1108 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1109 argbuffer[current_arg].arg_type = at_stdin;
1110 argbuffer[current_arg].what = optarg;
1111 len = strlen (optarg);
1112 if (whatlen_max < len)
1113 whatlen_max = len;
1114 ++current_arg;
1115 ++file_count;
1116 if (parsing_stdin)
1117 fatal ("cannot parse standard input more than once", (char *)NULL);
1118 parsing_stdin = TRUE;
1119 break;
1121 /* Common options. */
1122 case 'a': append_to_tagfile = TRUE; break;
1123 case 'C': cplusplus = TRUE; break;
1124 case 'f': /* for compatibility with old makefiles */
1125 case 'o':
1126 if (tagfile)
1128 error ("-o option may only be given once.");
1129 suggest_asking_for_help ();
1130 /* NOTREACHED */
1132 tagfile = optarg;
1133 break;
1134 case 'I':
1135 case 'S': /* for backward compatibility */
1136 ignoreindent = TRUE;
1137 break;
1138 case 'l':
1140 language *lang = get_language_from_langname (optarg);
1141 if (lang != NULL)
1143 argbuffer[current_arg].lang = lang;
1144 argbuffer[current_arg].arg_type = at_language;
1145 ++current_arg;
1148 break;
1149 case 'c':
1150 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1151 optarg = concat (optarg, "i", ""); /* memory leak here */
1152 /* FALLTHRU */
1153 case 'r':
1154 argbuffer[current_arg].arg_type = at_regexp;
1155 argbuffer[current_arg].what = optarg;
1156 len = strlen (optarg);
1157 if (whatlen_max < len)
1158 whatlen_max = len;
1159 ++current_arg;
1160 break;
1161 case 'R':
1162 argbuffer[current_arg].arg_type = at_regexp;
1163 argbuffer[current_arg].what = NULL;
1164 ++current_arg;
1165 break;
1166 case 'V':
1167 print_version ();
1168 break;
1169 case 'h':
1170 case 'H':
1171 help_asked = TRUE;
1172 break;
1174 /* Etags options */
1175 case 'D': constantypedefs = FALSE; break;
1176 case 'i': included_files[nincluded_files++] = optarg; break;
1178 /* Ctags options. */
1179 case 'B': searchar = '?'; break;
1180 case 'd': constantypedefs = TRUE; break;
1181 case 't': typedefs = TRUE; break;
1182 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1183 case 'u': update = TRUE; break;
1184 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1185 case 'x': cxref_style = TRUE; break;
1186 case 'w': no_warnings = TRUE; break;
1187 default:
1188 suggest_asking_for_help ();
1189 /* NOTREACHED */
1192 /* No more options. Store the rest of arguments. */
1193 for (; optind < argc; optind++)
1195 argbuffer[current_arg].arg_type = at_filename;
1196 argbuffer[current_arg].what = argv[optind];
1197 len = strlen (argv[optind]);
1198 if (whatlen_max < len)
1199 whatlen_max = len;
1200 ++current_arg;
1201 ++file_count;
1204 argbuffer[current_arg].arg_type = at_end;
1206 if (help_asked)
1207 print_help (argbuffer);
1208 /* NOTREACHED */
1210 if (nincluded_files == 0 && file_count == 0)
1212 error ("no input files specified.");
1213 suggest_asking_for_help ();
1214 /* NOTREACHED */
1217 if (tagfile == NULL)
1218 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1219 cwd = etags_getcwd (); /* the current working directory */
1220 if (cwd[strlen (cwd) - 1] != '/')
1222 char *oldcwd = cwd;
1223 cwd = concat (oldcwd, "/", "");
1224 free (oldcwd);
1227 /* Compute base directory for relative file names. */
1228 if (streq (tagfile, "-")
1229 || strneq (tagfile, "/dev/", 5))
1230 tagfiledir = cwd; /* relative file names are relative to cwd */
1231 else
1233 canonicalize_filename (tagfile);
1234 tagfiledir = absolute_dirname (tagfile, cwd);
1237 init (); /* set up boolean "functions" */
1239 linebuffer_init (&lb);
1240 linebuffer_init (&filename_lb);
1241 linebuffer_init (&filebuf);
1242 linebuffer_init (&token_name);
1244 if (!CTAGS)
1246 if (streq (tagfile, "-"))
1248 tagf = stdout;
1249 #ifdef DOS_NT
1250 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1251 doesn't take effect until after `stdout' is already open). */
1252 if (!isatty (fileno (stdout)))
1253 setmode (fileno (stdout), O_BINARY);
1254 #endif /* DOS_NT */
1256 else
1257 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1258 if (tagf == NULL)
1259 pfatal (tagfile);
1263 * Loop through files finding functions.
1265 for (i = 0; i < current_arg; i++)
1267 static language *lang; /* non-NULL if language is forced */
1268 char *this_file;
1270 switch (argbuffer[i].arg_type)
1272 case at_language:
1273 lang = argbuffer[i].lang;
1274 break;
1275 case at_regexp:
1276 analyse_regex (argbuffer[i].what);
1277 break;
1278 case at_filename:
1279 this_file = argbuffer[i].what;
1280 /* Input file named "-" means read file names from stdin
1281 (one per line) and use them. */
1282 if (streq (this_file, "-"))
1284 if (parsing_stdin)
1285 fatal ("cannot parse standard input AND read file names from it",
1286 (char *)NULL);
1287 while (readline_internal (&filename_lb, stdin) > 0)
1288 process_file_name (filename_lb.buffer, lang);
1290 else
1291 process_file_name (this_file, lang);
1292 break;
1293 case at_stdin:
1294 this_file = argbuffer[i].what;
1295 process_file (stdin, this_file, lang);
1296 break;
1300 free_regexps ();
1301 free (lb.buffer);
1302 free (filebuf.buffer);
1303 free (token_name.buffer);
1305 if (!CTAGS || cxref_style)
1307 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1308 put_entries (nodehead);
1309 free_tree (nodehead);
1310 nodehead = NULL;
1311 if (!CTAGS)
1313 fdesc *fdp;
1315 /* Output file entries that have no tags. */
1316 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1317 if (!fdp->written)
1318 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1320 while (nincluded_files-- > 0)
1321 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1323 if (fclose (tagf) == EOF)
1324 pfatal (tagfile);
1327 exit (EXIT_SUCCESS);
1330 /* From here on, we are in (CTAGS && !cxref_style) */
1331 if (update)
1333 char *cmd =
1334 xmalloc (strlen (tagfile) + whatlen_max +
1335 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1336 for (i = 0; i < current_arg; ++i)
1338 switch (argbuffer[i].arg_type)
1340 case at_filename:
1341 case at_stdin:
1342 break;
1343 default:
1344 continue; /* the for loop */
1346 strcpy (cmd, "mv ");
1347 strcat (cmd, tagfile);
1348 strcat (cmd, " OTAGS;fgrep -v '\t");
1349 strcat (cmd, argbuffer[i].what);
1350 strcat (cmd, "\t' OTAGS >");
1351 strcat (cmd, tagfile);
1352 strcat (cmd, ";rm OTAGS");
1353 if (system (cmd) != EXIT_SUCCESS)
1354 fatal ("failed to execute shell command", (char *)NULL);
1356 free (cmd);
1357 append_to_tagfile = TRUE;
1360 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1361 if (tagf == NULL)
1362 pfatal (tagfile);
1363 put_entries (nodehead); /* write all the tags (CTAGS) */
1364 free_tree (nodehead);
1365 nodehead = NULL;
1366 if (fclose (tagf) == EOF)
1367 pfatal (tagfile);
1369 if (CTAGS)
1370 if (append_to_tagfile || update)
1372 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1373 /* Maybe these should be used:
1374 setenv ("LC_COLLATE", "C", 1);
1375 setenv ("LC_ALL", "C", 1); */
1376 strcpy (cmd, "sort -u -o ");
1377 strcat (cmd, tagfile);
1378 strcat (cmd, " ");
1379 strcat (cmd, tagfile);
1380 exit (system (cmd));
1382 return EXIT_SUCCESS;
1387 * Return a compressor given the file name. If EXTPTR is non-zero,
1388 * return a pointer into FILE where the compressor-specific
1389 * extension begins. If no compressor is found, NULL is returned
1390 * and EXTPTR is not significant.
1391 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1393 static compressor *
1394 get_compressor_from_suffix (char *file, char **extptr)
1396 compressor *compr;
1397 char *slash, *suffix;
1399 /* File has been processed by canonicalize_filename,
1400 so we don't need to consider backslashes on DOS_NT. */
1401 slash = etags_strrchr (file, '/');
1402 suffix = etags_strrchr (file, '.');
1403 if (suffix == NULL || suffix < slash)
1404 return NULL;
1405 if (extptr != NULL)
1406 *extptr = suffix;
1407 suffix += 1;
1408 /* Let those poor souls who live with DOS 8+3 file name limits get
1409 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1410 Only the first do loop is run if not MSDOS */
1413 for (compr = compressors; compr->suffix != NULL; compr++)
1414 if (streq (compr->suffix, suffix))
1415 return compr;
1416 if (!MSDOS)
1417 break; /* do it only once: not really a loop */
1418 if (extptr != NULL)
1419 *extptr = ++suffix;
1420 } while (*suffix != '\0');
1421 return NULL;
1427 * Return a language given the name.
1429 static language *
1430 get_language_from_langname (const char *name)
1432 language *lang;
1434 if (name == NULL)
1435 error ("empty language name");
1436 else
1438 for (lang = lang_names; lang->name != NULL; lang++)
1439 if (streq (name, lang->name))
1440 return lang;
1441 error ("unknown language \"%s\"", name);
1444 return NULL;
1449 * Return a language given the interpreter name.
1451 static language *
1452 get_language_from_interpreter (char *interpreter)
1454 language *lang;
1455 const char **iname;
1457 if (interpreter == NULL)
1458 return NULL;
1459 for (lang = lang_names; lang->name != NULL; lang++)
1460 if (lang->interpreters != NULL)
1461 for (iname = lang->interpreters; *iname != NULL; iname++)
1462 if (streq (*iname, interpreter))
1463 return lang;
1465 return NULL;
1471 * Return a language given the file name.
1473 static language *
1474 get_language_from_filename (char *file, int case_sensitive)
1476 language *lang;
1477 const char **name, **ext, *suffix;
1479 /* Try whole file name first. */
1480 for (lang = lang_names; lang->name != NULL; lang++)
1481 if (lang->filenames != NULL)
1482 for (name = lang->filenames; *name != NULL; name++)
1483 if ((case_sensitive)
1484 ? streq (*name, file)
1485 : strcaseeq (*name, file))
1486 return lang;
1488 /* If not found, try suffix after last dot. */
1489 suffix = etags_strrchr (file, '.');
1490 if (suffix == NULL)
1491 return NULL;
1492 suffix += 1;
1493 for (lang = lang_names; lang->name != NULL; lang++)
1494 if (lang->suffixes != NULL)
1495 for (ext = lang->suffixes; *ext != NULL; ext++)
1496 if ((case_sensitive)
1497 ? streq (*ext, suffix)
1498 : strcaseeq (*ext, suffix))
1499 return lang;
1500 return NULL;
1505 * This routine is called on each file argument.
1507 static void
1508 process_file_name (char *file, language *lang)
1510 struct stat stat_buf;
1511 FILE *inf;
1512 fdesc *fdp;
1513 compressor *compr;
1514 char *compressed_name, *uncompressed_name;
1515 char *ext, *real_name;
1516 int retval;
1518 canonicalize_filename (file);
1519 if (streq (file, tagfile) && !streq (tagfile, "-"))
1521 error ("skipping inclusion of %s in self.", file);
1522 return;
1524 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1526 compressed_name = NULL;
1527 real_name = uncompressed_name = savestr (file);
1529 else
1531 real_name = compressed_name = savestr (file);
1532 uncompressed_name = savenstr (file, ext - file);
1535 /* If the canonicalized uncompressed name
1536 has already been dealt with, skip it silently. */
1537 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1539 assert (fdp->infname != NULL);
1540 if (streq (uncompressed_name, fdp->infname))
1541 goto cleanup;
1544 if (stat (real_name, &stat_buf) != 0)
1546 /* Reset real_name and try with a different name. */
1547 real_name = NULL;
1548 if (compressed_name != NULL) /* try with the given suffix */
1550 if (stat (uncompressed_name, &stat_buf) == 0)
1551 real_name = uncompressed_name;
1553 else /* try all possible suffixes */
1555 for (compr = compressors; compr->suffix != NULL; compr++)
1557 compressed_name = concat (file, ".", compr->suffix);
1558 if (stat (compressed_name, &stat_buf) != 0)
1560 if (MSDOS)
1562 char *suf = compressed_name + strlen (file);
1563 size_t suflen = strlen (compr->suffix) + 1;
1564 for ( ; suf[1]; suf++, suflen--)
1566 memmove (suf, suf + 1, suflen);
1567 if (stat (compressed_name, &stat_buf) == 0)
1569 real_name = compressed_name;
1570 break;
1573 if (real_name != NULL)
1574 break;
1575 } /* MSDOS */
1576 free (compressed_name);
1577 compressed_name = NULL;
1579 else
1581 real_name = compressed_name;
1582 break;
1586 if (real_name == NULL)
1588 perror (file);
1589 goto cleanup;
1591 } /* try with a different name */
1593 if (!S_ISREG (stat_buf.st_mode))
1595 error ("skipping %s: it is not a regular file.", real_name);
1596 goto cleanup;
1598 if (real_name == compressed_name)
1600 char *cmd = concat (compr->command, " ", real_name);
1601 inf = (FILE *) popen (cmd, "r");
1602 free (cmd);
1604 else
1605 inf = fopen (real_name, "r");
1606 if (inf == NULL)
1608 perror (real_name);
1609 goto cleanup;
1612 process_file (inf, uncompressed_name, lang);
1614 if (real_name == compressed_name)
1615 retval = pclose (inf);
1616 else
1617 retval = fclose (inf);
1618 if (retval < 0)
1619 pfatal (file);
1621 cleanup:
1622 free (compressed_name);
1623 free (uncompressed_name);
1624 last_node = NULL;
1625 curfdp = NULL;
1626 return;
1629 static void
1630 process_file (FILE *fh, char *fn, language *lang)
1632 static const fdesc emptyfdesc;
1633 fdesc *fdp;
1635 /* Create a new input file description entry. */
1636 fdp = xnew (1, fdesc);
1637 *fdp = emptyfdesc;
1638 fdp->next = fdhead;
1639 fdp->infname = savestr (fn);
1640 fdp->lang = lang;
1641 fdp->infabsname = absolute_filename (fn, cwd);
1642 fdp->infabsdir = absolute_dirname (fn, cwd);
1643 if (filename_is_absolute (fn))
1645 /* An absolute file name. Canonicalize it. */
1646 fdp->taggedfname = absolute_filename (fn, NULL);
1648 else
1650 /* A file name relative to cwd. Make it relative
1651 to the directory of the tags file. */
1652 fdp->taggedfname = relative_filename (fn, tagfiledir);
1654 fdp->usecharno = TRUE; /* use char position when making tags */
1655 fdp->prop = NULL;
1656 fdp->written = FALSE; /* not written on tags file yet */
1658 fdhead = fdp;
1659 curfdp = fdhead; /* the current file description */
1661 find_entries (fh);
1663 /* If not Ctags, and if this is not metasource and if it contained no #line
1664 directives, we can write the tags and free all nodes pointing to
1665 curfdp. */
1666 if (!CTAGS
1667 && curfdp->usecharno /* no #line directives in this file */
1668 && !curfdp->lang->metasource)
1670 node *np, *prev;
1672 /* Look for the head of the sublist relative to this file. See add_node
1673 for the structure of the node tree. */
1674 prev = NULL;
1675 for (np = nodehead; np != NULL; prev = np, np = np->left)
1676 if (np->fdp == curfdp)
1677 break;
1679 /* If we generated tags for this file, write and delete them. */
1680 if (np != NULL)
1682 /* This is the head of the last sublist, if any. The following
1683 instructions depend on this being true. */
1684 assert (np->left == NULL);
1686 assert (fdhead == curfdp);
1687 assert (last_node->fdp == curfdp);
1688 put_entries (np); /* write tags for file curfdp->taggedfname */
1689 free_tree (np); /* remove the written nodes */
1690 if (prev == NULL)
1691 nodehead = NULL; /* no nodes left */
1692 else
1693 prev->left = NULL; /* delete the pointer to the sublist */
1699 * This routine sets up the boolean pseudo-functions which work
1700 * by setting boolean flags dependent upon the corresponding character.
1701 * Every char which is NOT in that string is not a white char. Therefore,
1702 * all of the array "_wht" is set to FALSE, and then the elements
1703 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1704 * of a char is TRUE if it is the string "white", else FALSE.
1706 static void
1707 init (void)
1709 register const char *sp;
1710 register int i;
1712 for (i = 0; i < CHARS; i++)
1713 iswhite (i) = notinname (i) = begtoken (i) = intoken (i) = endtoken (i) = FALSE;
1714 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1715 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1716 notinname ('\0') = notinname ('\n');
1717 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1718 begtoken ('\0') = begtoken ('\n');
1719 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1720 intoken ('\0') = intoken ('\n');
1721 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1722 endtoken ('\0') = endtoken ('\n');
1726 * This routine opens the specified file and calls the function
1727 * which finds the function and type definitions.
1729 static void
1730 find_entries (FILE *inf)
1732 char *cp;
1733 language *lang = curfdp->lang;
1734 Lang_function *parser = NULL;
1736 /* If user specified a language, use it. */
1737 if (lang != NULL && lang->function != NULL)
1739 parser = lang->function;
1742 /* Else try to guess the language given the file name. */
1743 if (parser == NULL)
1745 lang = get_language_from_filename (curfdp->infname, TRUE);
1746 if (lang != NULL && lang->function != NULL)
1748 curfdp->lang = lang;
1749 parser = lang->function;
1753 /* Else look for sharp-bang as the first two characters. */
1754 if (parser == NULL
1755 && readline_internal (&lb, inf) > 0
1756 && lb.len >= 2
1757 && lb.buffer[0] == '#'
1758 && lb.buffer[1] == '!')
1760 char *lp;
1762 /* Set lp to point at the first char after the last slash in the
1763 line or, if no slashes, at the first nonblank. Then set cp to
1764 the first successive blank and terminate the string. */
1765 lp = etags_strrchr (lb.buffer+2, '/');
1766 if (lp != NULL)
1767 lp += 1;
1768 else
1769 lp = skip_spaces (lb.buffer + 2);
1770 cp = skip_non_spaces (lp);
1771 *cp = '\0';
1773 if (strlen (lp) > 0)
1775 lang = get_language_from_interpreter (lp);
1776 if (lang != NULL && lang->function != NULL)
1778 curfdp->lang = lang;
1779 parser = lang->function;
1784 /* We rewind here, even if inf may be a pipe. We fail if the
1785 length of the first line is longer than the pipe block size,
1786 which is unlikely. */
1787 rewind (inf);
1789 /* Else try to guess the language given the case insensitive file name. */
1790 if (parser == NULL)
1792 lang = get_language_from_filename (curfdp->infname, FALSE);
1793 if (lang != NULL && lang->function != NULL)
1795 curfdp->lang = lang;
1796 parser = lang->function;
1800 /* Else try Fortran or C. */
1801 if (parser == NULL)
1803 node *old_last_node = last_node;
1805 curfdp->lang = get_language_from_langname ("fortran");
1806 find_entries (inf);
1808 if (old_last_node == last_node)
1809 /* No Fortran entries found. Try C. */
1811 /* We do not tag if rewind fails.
1812 Only the file name will be recorded in the tags file. */
1813 rewind (inf);
1814 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1815 find_entries (inf);
1817 return;
1820 if (!no_line_directive
1821 && curfdp->lang != NULL && curfdp->lang->metasource)
1822 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1823 file, or anyway we parsed a file that is automatically generated from
1824 this one. If this is the case, the bingo.c file contained #line
1825 directives that generated tags pointing to this file. Let's delete
1826 them all before parsing this file, which is the real source. */
1828 fdesc **fdpp = &fdhead;
1829 while (*fdpp != NULL)
1830 if (*fdpp != curfdp
1831 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1832 /* We found one of those! We must delete both the file description
1833 and all tags referring to it. */
1835 fdesc *badfdp = *fdpp;
1837 /* Delete the tags referring to badfdp->taggedfname
1838 that were obtained from badfdp->infname. */
1839 invalidate_nodes (badfdp, &nodehead);
1841 *fdpp = badfdp->next; /* remove the bad description from the list */
1842 free_fdesc (badfdp);
1844 else
1845 fdpp = &(*fdpp)->next; /* advance the list pointer */
1848 assert (parser != NULL);
1850 /* Generic initializations before reading from file. */
1851 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1853 /* Generic initializations before parsing file with readline. */
1854 lineno = 0; /* reset global line number */
1855 charno = 0; /* reset global char number */
1856 linecharno = 0; /* reset global char number of line start */
1858 parser (inf);
1860 regex_tag_multiline ();
1865 * Check whether an implicitly named tag should be created,
1866 * then call `pfnote'.
1867 * NAME is a string that is internally copied by this function.
1869 * TAGS format specification
1870 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1871 * The following is explained in some more detail in etc/ETAGS.EBNF.
1873 * make_tag creates tags with "implicit tag names" (unnamed tags)
1874 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1875 * 1. NAME does not contain any of the characters in NONAM;
1876 * 2. LINESTART contains name as either a rightmost, or rightmost but
1877 * one character, substring;
1878 * 3. the character, if any, immediately before NAME in LINESTART must
1879 * be a character in NONAM;
1880 * 4. the character, if any, immediately after NAME in LINESTART must
1881 * also be a character in NONAM.
1883 * The implementation uses the notinname() macro, which recognizes the
1884 * characters stored in the string `nonam'.
1885 * etags.el needs to use the same characters that are in NONAM.
1887 static void
1888 make_tag (const char *name, /* tag name, or NULL if unnamed */
1889 int namelen, /* tag length */
1890 int is_func, /* tag is a function */
1891 char *linestart, /* start of the line where tag is */
1892 int linelen, /* length of the line where tag is */
1893 int lno, /* line number */
1894 long int cno) /* character number */
1896 bool named = (name != NULL && namelen > 0);
1897 char *nname = NULL;
1899 if (!CTAGS && named) /* maybe set named to false */
1900 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1901 such that etags.el can guess a name from it. */
1903 int i;
1904 register const char *cp = name;
1906 for (i = 0; i < namelen; i++)
1907 if (notinname (*cp++))
1908 break;
1909 if (i == namelen) /* rule #1 */
1911 cp = linestart + linelen - namelen;
1912 if (notinname (linestart[linelen-1]))
1913 cp -= 1; /* rule #4 */
1914 if (cp >= linestart /* rule #2 */
1915 && (cp == linestart
1916 || notinname (cp[-1])) /* rule #3 */
1917 && strneq (name, cp, namelen)) /* rule #2 */
1918 named = FALSE; /* use implicit tag name */
1922 if (named)
1923 nname = savenstr (name, namelen);
1925 pfnote (nname, is_func, linestart, linelen, lno, cno);
1928 /* Record a tag. */
1929 static void
1930 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1931 /* tag name, or NULL if unnamed */
1932 /* tag is a function */
1933 /* start of the line where tag is */
1934 /* length of the line where tag is */
1935 /* line number */
1936 /* character number */
1938 register node *np;
1940 assert (name == NULL || name[0] != '\0');
1941 if (CTAGS && name == NULL)
1942 return;
1944 np = xnew (1, node);
1946 /* If ctags mode, change name "main" to M<thisfilename>. */
1947 if (CTAGS && !cxref_style && streq (name, "main"))
1949 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1950 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1951 fp = etags_strrchr (np->name, '.');
1952 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1953 fp[0] = '\0';
1955 else
1956 np->name = name;
1957 np->valid = TRUE;
1958 np->been_warned = FALSE;
1959 np->fdp = curfdp;
1960 np->is_func = is_func;
1961 np->lno = lno;
1962 if (np->fdp->usecharno)
1963 /* Our char numbers are 0-base, because of C language tradition?
1964 ctags compatibility? old versions compatibility? I don't know.
1965 Anyway, since emacs's are 1-base we expect etags.el to take care
1966 of the difference. If we wanted to have 1-based numbers, we would
1967 uncomment the +1 below. */
1968 np->cno = cno /* + 1 */ ;
1969 else
1970 np->cno = invalidcharno;
1971 np->left = np->right = NULL;
1972 if (CTAGS && !cxref_style)
1974 if (strlen (linestart) < 50)
1975 np->regex = concat (linestart, "$", "");
1976 else
1977 np->regex = savenstr (linestart, 50);
1979 else
1980 np->regex = savenstr (linestart, linelen);
1982 add_node (np, &nodehead);
1986 * free_tree ()
1987 * recurse on left children, iterate on right children.
1989 static void
1990 free_tree (register node *np)
1992 while (np)
1994 register node *node_right = np->right;
1995 free_tree (np->left);
1996 free (np->name);
1997 free (np->regex);
1998 free (np);
1999 np = node_right;
2004 * free_fdesc ()
2005 * delete a file description
2007 static void
2008 free_fdesc (register fdesc *fdp)
2010 free (fdp->infname);
2011 free (fdp->infabsname);
2012 free (fdp->infabsdir);
2013 free (fdp->taggedfname);
2014 free (fdp->prop);
2015 free (fdp);
2019 * add_node ()
2020 * Adds a node to the tree of nodes. In etags mode, sort by file
2021 * name. In ctags mode, sort by tag name. Make no attempt at
2022 * balancing.
2024 * add_node is the only function allowed to add nodes, so it can
2025 * maintain state.
2027 static void
2028 add_node (node *np, node **cur_node_p)
2030 register int dif;
2031 register node *cur_node = *cur_node_p;
2033 if (cur_node == NULL)
2035 *cur_node_p = np;
2036 last_node = np;
2037 return;
2040 if (!CTAGS)
2041 /* Etags Mode */
2043 /* For each file name, tags are in a linked sublist on the right
2044 pointer. The first tags of different files are a linked list
2045 on the left pointer. last_node points to the end of the last
2046 used sublist. */
2047 if (last_node != NULL && last_node->fdp == np->fdp)
2049 /* Let's use the same sublist as the last added node. */
2050 assert (last_node->right == NULL);
2051 last_node->right = np;
2052 last_node = np;
2054 else if (cur_node->fdp == np->fdp)
2056 /* Scanning the list we found the head of a sublist which is
2057 good for us. Let's scan this sublist. */
2058 add_node (np, &cur_node->right);
2060 else
2061 /* The head of this sublist is not good for us. Let's try the
2062 next one. */
2063 add_node (np, &cur_node->left);
2064 } /* if ETAGS mode */
2066 else
2068 /* Ctags Mode */
2069 dif = strcmp (np->name, cur_node->name);
2072 * If this tag name matches an existing one, then
2073 * do not add the node, but maybe print a warning.
2075 if (no_duplicates && !dif)
2077 if (np->fdp == cur_node->fdp)
2079 if (!no_warnings)
2081 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2082 np->fdp->infname, lineno, np->name);
2083 fprintf (stderr, "Second entry ignored\n");
2086 else if (!cur_node->been_warned && !no_warnings)
2088 fprintf
2089 (stderr,
2090 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2091 np->fdp->infname, cur_node->fdp->infname, np->name);
2092 cur_node->been_warned = TRUE;
2094 return;
2097 /* Actually add the node */
2098 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2099 } /* if CTAGS mode */
2103 * invalidate_nodes ()
2104 * Scan the node tree and invalidate all nodes pointing to the
2105 * given file description (CTAGS case) or free them (ETAGS case).
2107 static void
2108 invalidate_nodes (fdesc *badfdp, node **npp)
2110 node *np = *npp;
2112 if (np == NULL)
2113 return;
2115 if (CTAGS)
2117 if (np->left != NULL)
2118 invalidate_nodes (badfdp, &np->left);
2119 if (np->fdp == badfdp)
2120 np->valid = FALSE;
2121 if (np->right != NULL)
2122 invalidate_nodes (badfdp, &np->right);
2124 else
2126 assert (np->fdp != NULL);
2127 if (np->fdp == badfdp)
2129 *npp = np->left; /* detach the sublist from the list */
2130 np->left = NULL; /* isolate it */
2131 free_tree (np); /* free it */
2132 invalidate_nodes (badfdp, npp);
2134 else
2135 invalidate_nodes (badfdp, &np->left);
2140 static int total_size_of_entries (node *);
2141 static int number_len (long);
2143 /* Length of a non-negative number's decimal representation. */
2144 static int
2145 number_len (long int num)
2147 int len = 1;
2148 while ((num /= 10) > 0)
2149 len += 1;
2150 return len;
2154 * Return total number of characters that put_entries will output for
2155 * the nodes in the linked list at the right of the specified node.
2156 * This count is irrelevant with etags.el since emacs 19.34 at least,
2157 * but is still supplied for backward compatibility.
2159 static int
2160 total_size_of_entries (register node *np)
2162 register int total = 0;
2164 for (; np != NULL; np = np->right)
2165 if (np->valid)
2167 total += strlen (np->regex) + 1; /* pat\177 */
2168 if (np->name != NULL)
2169 total += strlen (np->name) + 1; /* name\001 */
2170 total += number_len ((long) np->lno) + 1; /* lno, */
2171 if (np->cno != invalidcharno) /* cno */
2172 total += number_len (np->cno);
2173 total += 1; /* newline */
2176 return total;
2179 static void
2180 put_entries (register node *np)
2182 register char *sp;
2183 static fdesc *fdp = NULL;
2185 if (np == NULL)
2186 return;
2188 /* Output subentries that precede this one */
2189 if (CTAGS)
2190 put_entries (np->left);
2192 /* Output this entry */
2193 if (np->valid)
2195 if (!CTAGS)
2197 /* Etags mode */
2198 if (fdp != np->fdp)
2200 fdp = np->fdp;
2201 fprintf (tagf, "\f\n%s,%d\n",
2202 fdp->taggedfname, total_size_of_entries (np));
2203 fdp->written = TRUE;
2205 fputs (np->regex, tagf);
2206 fputc ('\177', tagf);
2207 if (np->name != NULL)
2209 fputs (np->name, tagf);
2210 fputc ('\001', tagf);
2212 fprintf (tagf, "%d,", np->lno);
2213 if (np->cno != invalidcharno)
2214 fprintf (tagf, "%ld", np->cno);
2215 fputs ("\n", tagf);
2217 else
2219 /* Ctags mode */
2220 if (np->name == NULL)
2221 error ("internal error: NULL name in ctags mode.");
2223 if (cxref_style)
2225 if (vgrind_style)
2226 fprintf (stdout, "%s %s %d\n",
2227 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2228 else
2229 fprintf (stdout, "%-16s %3d %-16s %s\n",
2230 np->name, np->lno, np->fdp->taggedfname, np->regex);
2232 else
2234 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2236 if (np->is_func)
2237 { /* function or #define macro with args */
2238 putc (searchar, tagf);
2239 putc ('^', tagf);
2241 for (sp = np->regex; *sp; sp++)
2243 if (*sp == '\\' || *sp == searchar)
2244 putc ('\\', tagf);
2245 putc (*sp, tagf);
2247 putc (searchar, tagf);
2249 else
2250 { /* anything else; text pattern inadequate */
2251 fprintf (tagf, "%d", np->lno);
2253 putc ('\n', tagf);
2256 } /* if this node contains a valid tag */
2258 /* Output subentries that follow this one */
2259 put_entries (np->right);
2260 if (!CTAGS)
2261 put_entries (np->left);
2265 /* C extensions. */
2266 #define C_EXT 0x00fff /* C extensions */
2267 #define C_PLAIN 0x00000 /* C */
2268 #define C_PLPL 0x00001 /* C++ */
2269 #define C_STAR 0x00003 /* C* */
2270 #define C_JAVA 0x00005 /* JAVA */
2271 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2272 #define YACC 0x10000 /* yacc file */
2275 * The C symbol tables.
2277 enum sym_type
2279 st_none,
2280 st_C_objprot, st_C_objimpl, st_C_objend,
2281 st_C_gnumacro,
2282 st_C_ignore, st_C_attribute,
2283 st_C_javastruct,
2284 st_C_operator,
2285 st_C_class, st_C_template,
2286 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2289 static unsigned int hash (const char *, unsigned int);
2290 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2291 static enum sym_type C_symtype (char *, int, int);
2293 /* Feed stuff between (but not including) %[ and %] lines to:
2294 gperf -m 5
2296 %compare-strncmp
2297 %enum
2298 %struct-type
2299 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2301 if, 0, st_C_ignore
2302 for, 0, st_C_ignore
2303 while, 0, st_C_ignore
2304 switch, 0, st_C_ignore
2305 return, 0, st_C_ignore
2306 __attribute__, 0, st_C_attribute
2307 GTY, 0, st_C_attribute
2308 @interface, 0, st_C_objprot
2309 @protocol, 0, st_C_objprot
2310 @implementation,0, st_C_objimpl
2311 @end, 0, st_C_objend
2312 import, (C_JAVA & ~C_PLPL), st_C_ignore
2313 package, (C_JAVA & ~C_PLPL), st_C_ignore
2314 friend, C_PLPL, st_C_ignore
2315 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2316 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2317 interface, (C_JAVA & ~C_PLPL), st_C_struct
2318 class, 0, st_C_class
2319 namespace, C_PLPL, st_C_struct
2320 domain, C_STAR, st_C_struct
2321 union, 0, st_C_struct
2322 struct, 0, st_C_struct
2323 extern, 0, st_C_extern
2324 enum, 0, st_C_enum
2325 typedef, 0, st_C_typedef
2326 define, 0, st_C_define
2327 undef, 0, st_C_define
2328 operator, C_PLPL, st_C_operator
2329 template, 0, st_C_template
2330 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2331 DEFUN, 0, st_C_gnumacro
2332 SYSCALL, 0, st_C_gnumacro
2333 ENTRY, 0, st_C_gnumacro
2334 PSEUDO, 0, st_C_gnumacro
2335 # These are defined inside C functions, so currently they are not met.
2336 # EXFUN used in glibc, DEFVAR_* in emacs.
2337 #EXFUN, 0, st_C_gnumacro
2338 #DEFVAR_, 0, st_C_gnumacro
2340 and replace lines between %< and %> with its output, then:
2341 - remove the #if characterset check
2342 - make in_word_set static and not inline. */
2343 /*%<*/
2344 /* C code produced by gperf version 3.0.1 */
2345 /* Command-line: gperf -m 5 */
2346 /* Computed positions: -k'2-3' */
2348 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2349 /* maximum key range = 33, duplicates = 0 */
2351 static inline unsigned int
2352 hash (register const char *str, register unsigned int len)
2354 static unsigned char asso_values[] =
2356 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2357 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2358 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2359 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2360 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2361 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2362 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2363 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2364 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2365 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2366 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2367 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2368 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2369 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2370 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2371 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2372 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2373 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2374 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2375 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2376 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2377 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2378 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2379 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2380 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2381 35, 35, 35, 35, 35, 35
2383 register int hval = len;
2385 switch (hval)
2387 default:
2388 hval += asso_values[(unsigned char)str[2]];
2389 /*FALLTHROUGH*/
2390 case 2:
2391 hval += asso_values[(unsigned char)str[1]];
2392 break;
2394 return hval;
2397 static struct C_stab_entry *
2398 in_word_set (register const char *str, register unsigned int len)
2400 enum
2402 TOTAL_KEYWORDS = 33,
2403 MIN_WORD_LENGTH = 2,
2404 MAX_WORD_LENGTH = 15,
2405 MIN_HASH_VALUE = 2,
2406 MAX_HASH_VALUE = 34
2409 static struct C_stab_entry wordlist[] =
2411 {""}, {""},
2412 {"if", 0, st_C_ignore},
2413 {"GTY", 0, st_C_attribute},
2414 {"@end", 0, st_C_objend},
2415 {"union", 0, st_C_struct},
2416 {"define", 0, st_C_define},
2417 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2418 {"template", 0, st_C_template},
2419 {"operator", C_PLPL, st_C_operator},
2420 {"@interface", 0, st_C_objprot},
2421 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2422 {"friend", C_PLPL, st_C_ignore},
2423 {"typedef", 0, st_C_typedef},
2424 {"return", 0, st_C_ignore},
2425 {"@implementation",0, st_C_objimpl},
2426 {"@protocol", 0, st_C_objprot},
2427 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2428 {"extern", 0, st_C_extern},
2429 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2430 {"struct", 0, st_C_struct},
2431 {"domain", C_STAR, st_C_struct},
2432 {"switch", 0, st_C_ignore},
2433 {"enum", 0, st_C_enum},
2434 {"for", 0, st_C_ignore},
2435 {"namespace", C_PLPL, st_C_struct},
2436 {"class", 0, st_C_class},
2437 {"while", 0, st_C_ignore},
2438 {"undef", 0, st_C_define},
2439 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2440 {"__attribute__", 0, st_C_attribute},
2441 {"SYSCALL", 0, st_C_gnumacro},
2442 {"ENTRY", 0, st_C_gnumacro},
2443 {"PSEUDO", 0, st_C_gnumacro},
2444 {"DEFUN", 0, st_C_gnumacro}
2447 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2449 register int key = hash (str, len);
2451 if (key <= MAX_HASH_VALUE && key >= 0)
2453 register const char *s = wordlist[key].name;
2455 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2456 return &wordlist[key];
2459 return 0;
2461 /*%>*/
2463 static enum sym_type
2464 C_symtype (char *str, int len, int c_ext)
2466 register struct C_stab_entry *se = in_word_set (str, len);
2468 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2469 return st_none;
2470 return se->type;
2475 * Ignoring __attribute__ ((list))
2477 static bool inattribute; /* looking at an __attribute__ construct */
2480 * C functions and variables are recognized using a simple
2481 * finite automaton. fvdef is its state variable.
2483 static enum
2485 fvnone, /* nothing seen */
2486 fdefunkey, /* Emacs DEFUN keyword seen */
2487 fdefunname, /* Emacs DEFUN name seen */
2488 foperator, /* func: operator keyword seen (cplpl) */
2489 fvnameseen, /* function or variable name seen */
2490 fstartlist, /* func: just after open parenthesis */
2491 finlist, /* func: in parameter list */
2492 flistseen, /* func: after parameter list */
2493 fignore, /* func: before open brace */
2494 vignore /* var-like: ignore until ';' */
2495 } fvdef;
2497 static bool fvextern; /* func or var: extern keyword seen; */
2500 * typedefs are recognized using a simple finite automaton.
2501 * typdef is its state variable.
2503 static enum
2505 tnone, /* nothing seen */
2506 tkeyseen, /* typedef keyword seen */
2507 ttypeseen, /* defined type seen */
2508 tinbody, /* inside typedef body */
2509 tend, /* just before typedef tag */
2510 tignore /* junk after typedef tag */
2511 } typdef;
2514 * struct-like structures (enum, struct and union) are recognized
2515 * using another simple finite automaton. `structdef' is its state
2516 * variable.
2518 static enum
2520 snone, /* nothing seen yet,
2521 or in struct body if bracelev > 0 */
2522 skeyseen, /* struct-like keyword seen */
2523 stagseen, /* struct-like tag seen */
2524 scolonseen /* colon seen after struct-like tag */
2525 } structdef;
2528 * When objdef is different from onone, objtag is the name of the class.
2530 static const char *objtag = "<uninited>";
2533 * Yet another little state machine to deal with preprocessor lines.
2535 static enum
2537 dnone, /* nothing seen */
2538 dsharpseen, /* '#' seen as first char on line */
2539 ddefineseen, /* '#' and 'define' seen */
2540 dignorerest /* ignore rest of line */
2541 } definedef;
2544 * State machine for Objective C protocols and implementations.
2545 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2547 static enum
2549 onone, /* nothing seen */
2550 oprotocol, /* @interface or @protocol seen */
2551 oimplementation, /* @implementations seen */
2552 otagseen, /* class name seen */
2553 oparenseen, /* parenthesis before category seen */
2554 ocatseen, /* category name seen */
2555 oinbody, /* in @implementation body */
2556 omethodsign, /* in @implementation body, after +/- */
2557 omethodtag, /* after method name */
2558 omethodcolon, /* after method colon */
2559 omethodparm, /* after method parameter */
2560 oignore /* wait for @end */
2561 } objdef;
2565 * Use this structure to keep info about the token read, and how it
2566 * should be tagged. Used by the make_C_tag function to build a tag.
2568 static struct tok
2570 char *line; /* string containing the token */
2571 int offset; /* where the token starts in LINE */
2572 int length; /* token length */
2574 The previous members can be used to pass strings around for generic
2575 purposes. The following ones specifically refer to creating tags. In this
2576 case the token contained here is the pattern that will be used to create a
2577 tag.
2579 bool valid; /* do not create a tag; the token should be
2580 invalidated whenever a state machine is
2581 reset prematurely */
2582 bool named; /* create a named tag */
2583 int lineno; /* source line number of tag */
2584 long linepos; /* source char number of tag */
2585 } token; /* latest token read */
2588 * Variables and functions for dealing with nested structures.
2589 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2591 static void pushclass_above (int, char *, int);
2592 static void popclass_above (int);
2593 static void write_classname (linebuffer *, const char *qualifier);
2595 static struct {
2596 char **cname; /* nested class names */
2597 int *bracelev; /* nested class brace level */
2598 int nl; /* class nesting level (elements used) */
2599 int size; /* length of the array */
2600 } cstack; /* stack for nested declaration tags */
2601 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2602 #define nestlev (cstack.nl)
2603 /* After struct keyword or in struct body, not inside a nested function. */
2604 #define instruct (structdef == snone && nestlev > 0 \
2605 && bracelev == cstack.bracelev[nestlev-1] + 1)
2607 static void
2608 pushclass_above (int bracelev, char *str, int len)
2610 int nl;
2612 popclass_above (bracelev);
2613 nl = cstack.nl;
2614 if (nl >= cstack.size)
2616 int size = cstack.size *= 2;
2617 xrnew (cstack.cname, size, char *);
2618 xrnew (cstack.bracelev, size, int);
2620 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2621 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2622 cstack.bracelev[nl] = bracelev;
2623 cstack.nl = nl + 1;
2626 static void
2627 popclass_above (int bracelev)
2629 int nl;
2631 for (nl = cstack.nl - 1;
2632 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2633 nl--)
2635 free (cstack.cname[nl]);
2636 cstack.nl = nl;
2640 static void
2641 write_classname (linebuffer *cn, const char *qualifier)
2643 int i, len;
2644 int qlen = strlen (qualifier);
2646 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2648 len = 0;
2649 cn->len = 0;
2650 cn->buffer[0] = '\0';
2652 else
2654 len = strlen (cstack.cname[0]);
2655 linebuffer_setlen (cn, len);
2656 strcpy (cn->buffer, cstack.cname[0]);
2658 for (i = 1; i < cstack.nl; i++)
2660 char *s;
2661 int slen;
2663 s = cstack.cname[i];
2664 if (s == NULL)
2665 continue;
2666 slen = strlen (s);
2667 len += slen + qlen;
2668 linebuffer_setlen (cn, len);
2669 strncat (cn->buffer, qualifier, qlen);
2670 strncat (cn->buffer, s, slen);
2675 static bool consider_token (char *, int, int, int *, int, int, bool *);
2676 static void make_C_tag (bool);
2679 * consider_token ()
2680 * checks to see if the current token is at the start of a
2681 * function or variable, or corresponds to a typedef, or
2682 * is a struct/union/enum tag, or #define, or an enum constant.
2684 * *IS_FUNC gets TRUE if the token is a function or #define macro
2685 * with args. C_EXTP points to which language we are looking at.
2687 * Globals
2688 * fvdef IN OUT
2689 * structdef IN OUT
2690 * definedef IN OUT
2691 * typdef IN OUT
2692 * objdef IN OUT
2695 static bool
2696 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2697 /* IN: token pointer */
2698 /* IN: token length */
2699 /* IN: first char after the token */
2700 /* IN, OUT: C extensions mask */
2701 /* IN: brace level */
2702 /* IN: parenthesis level */
2703 /* OUT: function or variable found */
2705 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2706 structtype is the type of the preceding struct-like keyword, and
2707 structbracelev is the brace level where it has been seen. */
2708 static enum sym_type structtype;
2709 static int structbracelev;
2710 static enum sym_type toktype;
2713 toktype = C_symtype (str, len, *c_extp);
2716 * Skip __attribute__
2718 if (toktype == st_C_attribute)
2720 inattribute = TRUE;
2721 return FALSE;
2725 * Advance the definedef state machine.
2727 switch (definedef)
2729 case dnone:
2730 /* We're not on a preprocessor line. */
2731 if (toktype == st_C_gnumacro)
2733 fvdef = fdefunkey;
2734 return FALSE;
2736 break;
2737 case dsharpseen:
2738 if (toktype == st_C_define)
2740 definedef = ddefineseen;
2742 else
2744 definedef = dignorerest;
2746 return FALSE;
2747 case ddefineseen:
2749 * Make a tag for any macro, unless it is a constant
2750 * and constantypedefs is FALSE.
2752 definedef = dignorerest;
2753 *is_func_or_var = (c == '(');
2754 if (!*is_func_or_var && !constantypedefs)
2755 return FALSE;
2756 else
2757 return TRUE;
2758 case dignorerest:
2759 return FALSE;
2760 default:
2761 error ("internal error: definedef value.");
2765 * Now typedefs
2767 switch (typdef)
2769 case tnone:
2770 if (toktype == st_C_typedef)
2772 if (typedefs)
2773 typdef = tkeyseen;
2774 fvextern = FALSE;
2775 fvdef = fvnone;
2776 return FALSE;
2778 break;
2779 case tkeyseen:
2780 switch (toktype)
2782 case st_none:
2783 case st_C_class:
2784 case st_C_struct:
2785 case st_C_enum:
2786 typdef = ttypeseen;
2788 break;
2789 case ttypeseen:
2790 if (structdef == snone && fvdef == fvnone)
2792 fvdef = fvnameseen;
2793 return TRUE;
2795 break;
2796 case tend:
2797 switch (toktype)
2799 case st_C_class:
2800 case st_C_struct:
2801 case st_C_enum:
2802 return FALSE;
2804 return TRUE;
2807 switch (toktype)
2809 case st_C_javastruct:
2810 if (structdef == stagseen)
2811 structdef = scolonseen;
2812 return FALSE;
2813 case st_C_template:
2814 case st_C_class:
2815 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2816 && bracelev == 0
2817 && definedef == dnone && structdef == snone
2818 && typdef == tnone && fvdef == fvnone)
2819 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2820 if (toktype == st_C_template)
2821 break;
2822 /* FALLTHRU */
2823 case st_C_struct:
2824 case st_C_enum:
2825 if (parlev == 0
2826 && fvdef != vignore
2827 && (typdef == tkeyseen
2828 || (typedefs_or_cplusplus && structdef == snone)))
2830 structdef = skeyseen;
2831 structtype = toktype;
2832 structbracelev = bracelev;
2833 if (fvdef == fvnameseen)
2834 fvdef = fvnone;
2836 return FALSE;
2839 if (structdef == skeyseen)
2841 structdef = stagseen;
2842 return TRUE;
2845 if (typdef != tnone)
2846 definedef = dnone;
2848 /* Detect Objective C constructs. */
2849 switch (objdef)
2851 case onone:
2852 switch (toktype)
2854 case st_C_objprot:
2855 objdef = oprotocol;
2856 return FALSE;
2857 case st_C_objimpl:
2858 objdef = oimplementation;
2859 return FALSE;
2861 break;
2862 case oimplementation:
2863 /* Save the class tag for functions or variables defined inside. */
2864 objtag = savenstr (str, len);
2865 objdef = oinbody;
2866 return FALSE;
2867 case oprotocol:
2868 /* Save the class tag for categories. */
2869 objtag = savenstr (str, len);
2870 objdef = otagseen;
2871 *is_func_or_var = TRUE;
2872 return TRUE;
2873 case oparenseen:
2874 objdef = ocatseen;
2875 *is_func_or_var = TRUE;
2876 return TRUE;
2877 case oinbody:
2878 break;
2879 case omethodsign:
2880 if (parlev == 0)
2882 fvdef = fvnone;
2883 objdef = omethodtag;
2884 linebuffer_setlen (&token_name, len);
2885 strncpy (token_name.buffer, str, len);
2886 token_name.buffer[len] = '\0';
2887 return TRUE;
2889 return FALSE;
2890 case omethodcolon:
2891 if (parlev == 0)
2892 objdef = omethodparm;
2893 return FALSE;
2894 case omethodparm:
2895 if (parlev == 0)
2897 fvdef = fvnone;
2898 objdef = omethodtag;
2899 linebuffer_setlen (&token_name, token_name.len + len);
2900 strncat (token_name.buffer, str, len);
2901 return TRUE;
2903 return FALSE;
2904 case oignore:
2905 if (toktype == st_C_objend)
2907 /* Memory leakage here: the string pointed by objtag is
2908 never released, because many tests would be needed to
2909 avoid breaking on incorrect input code. The amount of
2910 memory leaked here is the sum of the lengths of the
2911 class tags.
2912 free (objtag); */
2913 objdef = onone;
2915 return FALSE;
2918 /* A function, variable or enum constant? */
2919 switch (toktype)
2921 case st_C_extern:
2922 fvextern = TRUE;
2923 switch (fvdef)
2925 case finlist:
2926 case flistseen:
2927 case fignore:
2928 case vignore:
2929 break;
2930 default:
2931 fvdef = fvnone;
2933 return FALSE;
2934 case st_C_ignore:
2935 fvextern = FALSE;
2936 fvdef = vignore;
2937 return FALSE;
2938 case st_C_operator:
2939 fvdef = foperator;
2940 *is_func_or_var = TRUE;
2941 return TRUE;
2942 case st_none:
2943 if (constantypedefs
2944 && structdef == snone
2945 && structtype == st_C_enum && bracelev > structbracelev)
2946 return TRUE; /* enum constant */
2947 switch (fvdef)
2949 case fdefunkey:
2950 if (bracelev > 0)
2951 break;
2952 fvdef = fdefunname; /* GNU macro */
2953 *is_func_or_var = TRUE;
2954 return TRUE;
2955 case fvnone:
2956 switch (typdef)
2958 case ttypeseen:
2959 return FALSE;
2960 case tnone:
2961 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2962 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2964 fvdef = vignore;
2965 return FALSE;
2967 break;
2969 /* FALLTHRU */
2970 case fvnameseen:
2971 if (len >= 10 && strneq (str+len-10, "::operator", 10))
2973 if (*c_extp & C_AUTO) /* automatic detection of C++ */
2974 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2975 fvdef = foperator;
2976 *is_func_or_var = TRUE;
2977 return TRUE;
2979 if (bracelev > 0 && !instruct)
2980 break;
2981 fvdef = fvnameseen; /* function or variable */
2982 *is_func_or_var = TRUE;
2983 return TRUE;
2985 break;
2988 return FALSE;
2993 * C_entries often keeps pointers to tokens or lines which are older than
2994 * the line currently read. By keeping two line buffers, and switching
2995 * them at end of line, it is possible to use those pointers.
2997 static struct
2999 long linepos;
3000 linebuffer lb;
3001 } lbs[2];
3003 #define current_lb_is_new (newndx == curndx)
3004 #define switch_line_buffers() (curndx = 1 - curndx)
3006 #define curlb (lbs[curndx].lb)
3007 #define newlb (lbs[newndx].lb)
3008 #define curlinepos (lbs[curndx].linepos)
3009 #define newlinepos (lbs[newndx].linepos)
3011 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3012 #define cplpl (c_ext & C_PLPL)
3013 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3015 #define CNL_SAVE_DEFINEDEF() \
3016 do { \
3017 curlinepos = charno; \
3018 readline (&curlb, inf); \
3019 lp = curlb.buffer; \
3020 quotednl = FALSE; \
3021 newndx = curndx; \
3022 } while (0)
3024 #define CNL() \
3025 do { \
3026 CNL_SAVE_DEFINEDEF(); \
3027 if (savetoken.valid) \
3029 token = savetoken; \
3030 savetoken.valid = FALSE; \
3032 definedef = dnone; \
3033 } while (0)
3036 static void
3037 make_C_tag (int isfun)
3039 /* This function is never called when token.valid is FALSE, but
3040 we must protect against invalid input or internal errors. */
3041 if (token.valid)
3042 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3043 token.offset+token.length+1, token.lineno, token.linepos);
3044 else if (DEBUG)
3045 { /* this branch is optimized away if !DEBUG */
3046 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3047 token_name.len + 17, isfun, token.line,
3048 token.offset+token.length+1, token.lineno, token.linepos);
3049 error ("INVALID TOKEN");
3052 token.valid = FALSE;
3057 * C_entries ()
3058 * This routine finds functions, variables, typedefs,
3059 * #define's, enum constants and struct/union/enum definitions in
3060 * C syntax and adds them to the list.
3062 static void
3063 C_entries (int c_ext, FILE *inf)
3064 /* extension of C */
3065 /* input file */
3067 register char c; /* latest char read; '\0' for end of line */
3068 register char *lp; /* pointer one beyond the character `c' */
3069 int curndx, newndx; /* indices for current and new lb */
3070 register int tokoff; /* offset in line of start of current token */
3071 register int toklen; /* length of current token */
3072 const char *qualifier; /* string used to qualify names */
3073 int qlen; /* length of qualifier */
3074 int bracelev; /* current brace level */
3075 int bracketlev; /* current bracket level */
3076 int parlev; /* current parenthesis level */
3077 int attrparlev; /* __attribute__ parenthesis level */
3078 int templatelev; /* current template level */
3079 int typdefbracelev; /* bracelev where a typedef struct body begun */
3080 bool incomm, inquote, inchar, quotednl, midtoken;
3081 bool yacc_rules; /* in the rules part of a yacc file */
3082 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3085 linebuffer_init (&lbs[0].lb);
3086 linebuffer_init (&lbs[1].lb);
3087 if (cstack.size == 0)
3089 cstack.size = (DEBUG) ? 1 : 4;
3090 cstack.nl = 0;
3091 cstack.cname = xnew (cstack.size, char *);
3092 cstack.bracelev = xnew (cstack.size, int);
3095 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3096 curndx = newndx = 0;
3097 lp = curlb.buffer;
3098 *lp = 0;
3100 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3101 structdef = snone; definedef = dnone; objdef = onone;
3102 yacc_rules = FALSE;
3103 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3104 token.valid = savetoken.valid = FALSE;
3105 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3106 if (cjava)
3107 { qualifier = "."; qlen = 1; }
3108 else
3109 { qualifier = "::"; qlen = 2; }
3112 while (!feof (inf))
3114 c = *lp++;
3115 if (c == '\\')
3117 /* If we are at the end of the line, the next character is a
3118 '\0'; do not skip it, because it is what tells us
3119 to read the next line. */
3120 if (*lp == '\0')
3122 quotednl = TRUE;
3123 continue;
3125 lp++;
3126 c = ' ';
3128 else if (incomm)
3130 switch (c)
3132 case '*':
3133 if (*lp == '/')
3135 c = *lp++;
3136 incomm = FALSE;
3138 break;
3139 case '\0':
3140 /* Newlines inside comments do not end macro definitions in
3141 traditional cpp. */
3142 CNL_SAVE_DEFINEDEF ();
3143 break;
3145 continue;
3147 else if (inquote)
3149 switch (c)
3151 case '"':
3152 inquote = FALSE;
3153 break;
3154 case '\0':
3155 /* Newlines inside strings do not end macro definitions
3156 in traditional cpp, even though compilers don't
3157 usually accept them. */
3158 CNL_SAVE_DEFINEDEF ();
3159 break;
3161 continue;
3163 else if (inchar)
3165 switch (c)
3167 case '\0':
3168 /* Hmmm, something went wrong. */
3169 CNL ();
3170 /* FALLTHRU */
3171 case '\'':
3172 inchar = FALSE;
3173 break;
3175 continue;
3177 else switch (c)
3179 case '"':
3180 inquote = TRUE;
3181 if (bracketlev > 0)
3182 continue;
3183 if (inattribute)
3184 break;
3185 switch (fvdef)
3187 case fdefunkey:
3188 case fstartlist:
3189 case finlist:
3190 case fignore:
3191 case vignore:
3192 break;
3193 default:
3194 fvextern = FALSE;
3195 fvdef = fvnone;
3197 continue;
3198 case '\'':
3199 inchar = TRUE;
3200 if (bracketlev > 0)
3201 continue;
3202 if (inattribute)
3203 break;
3204 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3206 fvextern = FALSE;
3207 fvdef = fvnone;
3209 continue;
3210 case '/':
3211 if (*lp == '*')
3213 incomm = TRUE;
3214 lp++;
3215 c = ' ';
3216 if (bracketlev > 0)
3217 continue;
3219 else if (/* cplpl && */ *lp == '/')
3221 c = '\0';
3223 break;
3224 case '%':
3225 if ((c_ext & YACC) && *lp == '%')
3227 /* Entering or exiting rules section in yacc file. */
3228 lp++;
3229 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3230 typdef = tnone; structdef = snone;
3231 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3232 bracelev = 0;
3233 yacc_rules = !yacc_rules;
3234 continue;
3236 else
3237 break;
3238 case '#':
3239 if (definedef == dnone)
3241 char *cp;
3242 bool cpptoken = TRUE;
3244 /* Look back on this line. If all blanks, or nonblanks
3245 followed by an end of comment, this is a preprocessor
3246 token. */
3247 for (cp = newlb.buffer; cp < lp-1; cp++)
3248 if (!iswhite (*cp))
3250 if (*cp == '*' && cp[1] == '/')
3252 cp++;
3253 cpptoken = TRUE;
3255 else
3256 cpptoken = FALSE;
3258 if (cpptoken)
3259 definedef = dsharpseen;
3260 } /* if (definedef == dnone) */
3261 continue;
3262 case '[':
3263 bracketlev++;
3264 continue;
3265 default:
3266 if (bracketlev > 0)
3268 if (c == ']')
3269 --bracketlev;
3270 else if (c == '\0')
3271 CNL_SAVE_DEFINEDEF ();
3272 continue;
3274 break;
3275 } /* switch (c) */
3278 /* Consider token only if some involved conditions are satisfied. */
3279 if (typdef != tignore
3280 && definedef != dignorerest
3281 && fvdef != finlist
3282 && templatelev == 0
3283 && (definedef != dnone
3284 || structdef != scolonseen)
3285 && !inattribute)
3287 if (midtoken)
3289 if (endtoken (c))
3291 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3292 /* This handles :: in the middle,
3293 but not at the beginning of an identifier.
3294 Also, space-separated :: is not recognized. */
3296 if (c_ext & C_AUTO) /* automatic detection of C++ */
3297 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3298 lp += 2;
3299 toklen += 2;
3300 c = lp[-1];
3301 goto still_in_token;
3303 else
3305 bool funorvar = FALSE;
3307 if (yacc_rules
3308 || consider_token (newlb.buffer + tokoff, toklen, c,
3309 &c_ext, bracelev, parlev,
3310 &funorvar))
3312 if (fvdef == foperator)
3314 char *oldlp = lp;
3315 lp = skip_spaces (lp-1);
3316 if (*lp != '\0')
3317 lp += 1;
3318 while (*lp != '\0'
3319 && !iswhite (*lp) && *lp != '(')
3320 lp += 1;
3321 c = *lp++;
3322 toklen += lp - oldlp;
3324 token.named = FALSE;
3325 if (!plainc
3326 && nestlev > 0 && definedef == dnone)
3327 /* in struct body */
3329 write_classname (&token_name, qualifier);
3330 linebuffer_setlen (&token_name,
3331 token_name.len+qlen+toklen);
3332 strcat (token_name.buffer, qualifier);
3333 strncat (token_name.buffer,
3334 newlb.buffer + tokoff, toklen);
3335 token.named = TRUE;
3337 else if (objdef == ocatseen)
3338 /* Objective C category */
3340 int len = strlen (objtag) + 2 + toklen;
3341 linebuffer_setlen (&token_name, len);
3342 strcpy (token_name.buffer, objtag);
3343 strcat (token_name.buffer, "(");
3344 strncat (token_name.buffer,
3345 newlb.buffer + tokoff, toklen);
3346 strcat (token_name.buffer, ")");
3347 token.named = TRUE;
3349 else if (objdef == omethodtag
3350 || objdef == omethodparm)
3351 /* Objective C method */
3353 token.named = TRUE;
3355 else if (fvdef == fdefunname)
3356 /* GNU DEFUN and similar macros */
3358 bool defun = (newlb.buffer[tokoff] == 'F');
3359 int off = tokoff;
3360 int len = toklen;
3362 /* Rewrite the tag so that emacs lisp DEFUNs
3363 can be found by their elisp name */
3364 if (defun)
3366 off += 1;
3367 len -= 1;
3369 linebuffer_setlen (&token_name, len);
3370 strncpy (token_name.buffer,
3371 newlb.buffer + off, len);
3372 token_name.buffer[len] = '\0';
3373 if (defun)
3374 while (--len >= 0)
3375 if (token_name.buffer[len] == '_')
3376 token_name.buffer[len] = '-';
3377 token.named = defun;
3379 else
3381 linebuffer_setlen (&token_name, toklen);
3382 strncpy (token_name.buffer,
3383 newlb.buffer + tokoff, toklen);
3384 token_name.buffer[toklen] = '\0';
3385 /* Name macros and members. */
3386 token.named = (structdef == stagseen
3387 || typdef == ttypeseen
3388 || typdef == tend
3389 || (funorvar
3390 && definedef == dignorerest)
3391 || (funorvar
3392 && definedef == dnone
3393 && structdef == snone
3394 && bracelev > 0));
3396 token.lineno = lineno;
3397 token.offset = tokoff;
3398 token.length = toklen;
3399 token.line = newlb.buffer;
3400 token.linepos = newlinepos;
3401 token.valid = TRUE;
3403 if (definedef == dnone
3404 && (fvdef == fvnameseen
3405 || fvdef == foperator
3406 || structdef == stagseen
3407 || typdef == tend
3408 || typdef == ttypeseen
3409 || objdef != onone))
3411 if (current_lb_is_new)
3412 switch_line_buffers ();
3414 else if (definedef != dnone
3415 || fvdef == fdefunname
3416 || instruct)
3417 make_C_tag (funorvar);
3419 else /* not yacc and consider_token failed */
3421 if (inattribute && fvdef == fignore)
3423 /* We have just met __attribute__ after a
3424 function parameter list: do not tag the
3425 function again. */
3426 fvdef = fvnone;
3429 midtoken = FALSE;
3431 } /* if (endtoken (c)) */
3432 else if (intoken (c))
3433 still_in_token:
3435 toklen++;
3436 continue;
3438 } /* if (midtoken) */
3439 else if (begtoken (c))
3441 switch (definedef)
3443 case dnone:
3444 switch (fvdef)
3446 case fstartlist:
3447 /* This prevents tagging fb in
3448 void (__attribute__((noreturn)) *fb) (void);
3449 Fixing this is not easy and not very important. */
3450 fvdef = finlist;
3451 continue;
3452 case flistseen:
3453 if (plainc || declarations)
3455 make_C_tag (TRUE); /* a function */
3456 fvdef = fignore;
3458 break;
3460 if (structdef == stagseen && !cjava)
3462 popclass_above (bracelev);
3463 structdef = snone;
3465 break;
3466 case dsharpseen:
3467 savetoken = token;
3468 break;
3470 if (!yacc_rules || lp == newlb.buffer + 1)
3472 tokoff = lp - 1 - newlb.buffer;
3473 toklen = 1;
3474 midtoken = TRUE;
3476 continue;
3477 } /* if (begtoken) */
3478 } /* if must look at token */
3481 /* Detect end of line, colon, comma, semicolon and various braces
3482 after having handled a token.*/
3483 switch (c)
3485 case ':':
3486 if (inattribute)
3487 break;
3488 if (yacc_rules && token.offset == 0 && token.valid)
3490 make_C_tag (FALSE); /* a yacc function */
3491 break;
3493 if (definedef != dnone)
3494 break;
3495 switch (objdef)
3497 case otagseen:
3498 objdef = oignore;
3499 make_C_tag (TRUE); /* an Objective C class */
3500 break;
3501 case omethodtag:
3502 case omethodparm:
3503 objdef = omethodcolon;
3504 linebuffer_setlen (&token_name, token_name.len + 1);
3505 strcat (token_name.buffer, ":");
3506 break;
3508 if (structdef == stagseen)
3510 structdef = scolonseen;
3511 break;
3513 /* Should be useless, but may be work as a safety net. */
3514 if (cplpl && fvdef == flistseen)
3516 make_C_tag (TRUE); /* a function */
3517 fvdef = fignore;
3518 break;
3520 break;
3521 case ';':
3522 if (definedef != dnone || inattribute)
3523 break;
3524 switch (typdef)
3526 case tend:
3527 case ttypeseen:
3528 make_C_tag (FALSE); /* a typedef */
3529 typdef = tnone;
3530 fvdef = fvnone;
3531 break;
3532 case tnone:
3533 case tinbody:
3534 case tignore:
3535 switch (fvdef)
3537 case fignore:
3538 if (typdef == tignore || cplpl)
3539 fvdef = fvnone;
3540 break;
3541 case fvnameseen:
3542 if ((globals && bracelev == 0 && (!fvextern || declarations))
3543 || (members && instruct))
3544 make_C_tag (FALSE); /* a variable */
3545 fvextern = FALSE;
3546 fvdef = fvnone;
3547 token.valid = FALSE;
3548 break;
3549 case flistseen:
3550 if ((declarations
3551 && (cplpl || !instruct)
3552 && (typdef == tnone || (typdef != tignore && instruct)))
3553 || (members
3554 && plainc && instruct))
3555 make_C_tag (TRUE); /* a function */
3556 /* FALLTHRU */
3557 default:
3558 fvextern = FALSE;
3559 fvdef = fvnone;
3560 if (declarations
3561 && cplpl && structdef == stagseen)
3562 make_C_tag (FALSE); /* forward declaration */
3563 else
3564 token.valid = FALSE;
3565 } /* switch (fvdef) */
3566 /* FALLTHRU */
3567 default:
3568 if (!instruct)
3569 typdef = tnone;
3571 if (structdef == stagseen)
3572 structdef = snone;
3573 break;
3574 case ',':
3575 if (definedef != dnone || inattribute)
3576 break;
3577 switch (objdef)
3579 case omethodtag:
3580 case omethodparm:
3581 make_C_tag (TRUE); /* an Objective C method */
3582 objdef = oinbody;
3583 break;
3585 switch (fvdef)
3587 case fdefunkey:
3588 case foperator:
3589 case fstartlist:
3590 case finlist:
3591 case fignore:
3592 case vignore:
3593 break;
3594 case fdefunname:
3595 fvdef = fignore;
3596 break;
3597 case fvnameseen:
3598 if (parlev == 0
3599 && ((globals
3600 && bracelev == 0
3601 && templatelev == 0
3602 && (!fvextern || declarations))
3603 || (members && instruct)))
3604 make_C_tag (FALSE); /* a variable */
3605 break;
3606 case flistseen:
3607 if ((declarations && typdef == tnone && !instruct)
3608 || (members && typdef != tignore && instruct))
3610 make_C_tag (TRUE); /* a function */
3611 fvdef = fvnameseen;
3613 else if (!declarations)
3614 fvdef = fvnone;
3615 token.valid = FALSE;
3616 break;
3617 default:
3618 fvdef = fvnone;
3620 if (structdef == stagseen)
3621 structdef = snone;
3622 break;
3623 case ']':
3624 if (definedef != dnone || inattribute)
3625 break;
3626 if (structdef == stagseen)
3627 structdef = snone;
3628 switch (typdef)
3630 case ttypeseen:
3631 case tend:
3632 typdef = tignore;
3633 make_C_tag (FALSE); /* a typedef */
3634 break;
3635 case tnone:
3636 case tinbody:
3637 switch (fvdef)
3639 case foperator:
3640 case finlist:
3641 case fignore:
3642 case vignore:
3643 break;
3644 case fvnameseen:
3645 if ((members && bracelev == 1)
3646 || (globals && bracelev == 0
3647 && (!fvextern || declarations)))
3648 make_C_tag (FALSE); /* a variable */
3649 /* FALLTHRU */
3650 default:
3651 fvdef = fvnone;
3653 break;
3655 break;
3656 case '(':
3657 if (inattribute)
3659 attrparlev++;
3660 break;
3662 if (definedef != dnone)
3663 break;
3664 if (objdef == otagseen && parlev == 0)
3665 objdef = oparenseen;
3666 switch (fvdef)
3668 case fvnameseen:
3669 if (typdef == ttypeseen
3670 && *lp != '*'
3671 && !instruct)
3673 /* This handles constructs like:
3674 typedef void OperatorFun (int fun); */
3675 make_C_tag (FALSE);
3676 typdef = tignore;
3677 fvdef = fignore;
3678 break;
3680 /* FALLTHRU */
3681 case foperator:
3682 fvdef = fstartlist;
3683 break;
3684 case flistseen:
3685 fvdef = finlist;
3686 break;
3688 parlev++;
3689 break;
3690 case ')':
3691 if (inattribute)
3693 if (--attrparlev == 0)
3694 inattribute = FALSE;
3695 break;
3697 if (definedef != dnone)
3698 break;
3699 if (objdef == ocatseen && parlev == 1)
3701 make_C_tag (TRUE); /* an Objective C category */
3702 objdef = oignore;
3704 if (--parlev == 0)
3706 switch (fvdef)
3708 case fstartlist:
3709 case finlist:
3710 fvdef = flistseen;
3711 break;
3713 if (!instruct
3714 && (typdef == tend
3715 || typdef == ttypeseen))
3717 typdef = tignore;
3718 make_C_tag (FALSE); /* a typedef */
3721 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3722 parlev = 0;
3723 break;
3724 case '{':
3725 if (definedef != dnone)
3726 break;
3727 if (typdef == ttypeseen)
3729 /* Whenever typdef is set to tinbody (currently only
3730 here), typdefbracelev should be set to bracelev. */
3731 typdef = tinbody;
3732 typdefbracelev = bracelev;
3734 switch (fvdef)
3736 case flistseen:
3737 make_C_tag (TRUE); /* a function */
3738 /* FALLTHRU */
3739 case fignore:
3740 fvdef = fvnone;
3741 break;
3742 case fvnone:
3743 switch (objdef)
3745 case otagseen:
3746 make_C_tag (TRUE); /* an Objective C class */
3747 objdef = oignore;
3748 break;
3749 case omethodtag:
3750 case omethodparm:
3751 make_C_tag (TRUE); /* an Objective C method */
3752 objdef = oinbody;
3753 break;
3754 default:
3755 /* Neutralize `extern "C" {' grot. */
3756 if (bracelev == 0 && structdef == snone && nestlev == 0
3757 && typdef == tnone)
3758 bracelev = -1;
3760 break;
3762 switch (structdef)
3764 case skeyseen: /* unnamed struct */
3765 pushclass_above (bracelev, NULL, 0);
3766 structdef = snone;
3767 break;
3768 case stagseen: /* named struct or enum */
3769 case scolonseen: /* a class */
3770 pushclass_above (bracelev,token.line+token.offset, token.length);
3771 structdef = snone;
3772 make_C_tag (FALSE); /* a struct or enum */
3773 break;
3775 bracelev += 1;
3776 break;
3777 case '*':
3778 if (definedef != dnone)
3779 break;
3780 if (fvdef == fstartlist)
3782 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3783 token.valid = FALSE;
3785 break;
3786 case '}':
3787 if (definedef != dnone)
3788 break;
3789 bracelev -= 1;
3790 if (!ignoreindent && lp == newlb.buffer + 1)
3792 if (bracelev != 0)
3793 token.valid = FALSE; /* unexpected value, token unreliable */
3794 bracelev = 0; /* reset brace level if first column */
3795 parlev = 0; /* also reset paren level, just in case... */
3797 else if (bracelev < 0)
3799 token.valid = FALSE; /* something gone amiss, token unreliable */
3800 bracelev = 0;
3802 if (bracelev == 0 && fvdef == vignore)
3803 fvdef = fvnone; /* end of function */
3804 popclass_above (bracelev);
3805 structdef = snone;
3806 /* Only if typdef == tinbody is typdefbracelev significant. */
3807 if (typdef == tinbody && bracelev <= typdefbracelev)
3809 assert (bracelev == typdefbracelev);
3810 typdef = tend;
3812 break;
3813 case '=':
3814 if (definedef != dnone)
3815 break;
3816 switch (fvdef)
3818 case foperator:
3819 case finlist:
3820 case fignore:
3821 case vignore:
3822 break;
3823 case fvnameseen:
3824 if ((members && bracelev == 1)
3825 || (globals && bracelev == 0 && (!fvextern || declarations)))
3826 make_C_tag (FALSE); /* a variable */
3827 /* FALLTHRU */
3828 default:
3829 fvdef = vignore;
3831 break;
3832 case '<':
3833 if (cplpl
3834 && (structdef == stagseen || fvdef == fvnameseen))
3836 templatelev++;
3837 break;
3839 goto resetfvdef;
3840 case '>':
3841 if (templatelev > 0)
3843 templatelev--;
3844 break;
3846 goto resetfvdef;
3847 case '+':
3848 case '-':
3849 if (objdef == oinbody && bracelev == 0)
3851 objdef = omethodsign;
3852 break;
3854 /* FALLTHRU */
3855 resetfvdef:
3856 case '#': case '~': case '&': case '%': case '/':
3857 case '|': case '^': case '!': case '.': case '?':
3858 if (definedef != dnone)
3859 break;
3860 /* These surely cannot follow a function tag in C. */
3861 switch (fvdef)
3863 case foperator:
3864 case finlist:
3865 case fignore:
3866 case vignore:
3867 break;
3868 default:
3869 fvdef = fvnone;
3871 break;
3872 case '\0':
3873 if (objdef == otagseen)
3875 make_C_tag (TRUE); /* an Objective C class */
3876 objdef = oignore;
3878 /* If a macro spans multiple lines don't reset its state. */
3879 if (quotednl)
3880 CNL_SAVE_DEFINEDEF ();
3881 else
3882 CNL ();
3883 break;
3884 } /* switch (c) */
3886 } /* while not eof */
3888 free (lbs[0].lb.buffer);
3889 free (lbs[1].lb.buffer);
3893 * Process either a C++ file or a C file depending on the setting
3894 * of a global flag.
3896 static void
3897 default_C_entries (FILE *inf)
3899 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3902 /* Always do plain C. */
3903 static void
3904 plain_C_entries (FILE *inf)
3906 C_entries (0, inf);
3909 /* Always do C++. */
3910 static void
3911 Cplusplus_entries (FILE *inf)
3913 C_entries (C_PLPL, inf);
3916 /* Always do Java. */
3917 static void
3918 Cjava_entries (FILE *inf)
3920 C_entries (C_JAVA, inf);
3923 /* Always do C*. */
3924 static void
3925 Cstar_entries (FILE *inf)
3927 C_entries (C_STAR, inf);
3930 /* Always do Yacc. */
3931 static void
3932 Yacc_entries (FILE *inf)
3934 C_entries (YACC, inf);
3938 /* Useful macros. */
3939 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3940 for (; /* loop initialization */ \
3941 !feof (file_pointer) /* loop test */ \
3942 && /* instructions at start of loop */ \
3943 (readline (&line_buffer, file_pointer), \
3944 char_pointer = line_buffer.buffer, \
3945 TRUE); \
3948 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
3949 ((assert ("" kw), TRUE) /* syntax error if not a literal string */ \
3950 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3951 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
3952 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3954 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3955 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3956 ((assert ("" kw), TRUE) /* syntax error if not a literal string */ \
3957 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3958 && ((cp) += sizeof (kw)-1)) /* skip spaces */
3961 * Read a file, but do no processing. This is used to do regexp
3962 * matching on files that have no language defined.
3964 static void
3965 just_read_file (FILE *inf)
3967 while (!feof (inf))
3968 readline (&lb, inf);
3972 /* Fortran parsing */
3974 static void F_takeprec (void);
3975 static void F_getit (FILE *);
3977 static void
3978 F_takeprec (void)
3980 dbp = skip_spaces (dbp);
3981 if (*dbp != '*')
3982 return;
3983 dbp++;
3984 dbp = skip_spaces (dbp);
3985 if (strneq (dbp, "(*)", 3))
3987 dbp += 3;
3988 return;
3990 if (!ISDIGIT (*dbp))
3992 --dbp; /* force failure */
3993 return;
3996 dbp++;
3997 while (ISDIGIT (*dbp));
4000 static void
4001 F_getit (FILE *inf)
4003 register char *cp;
4005 dbp = skip_spaces (dbp);
4006 if (*dbp == '\0')
4008 readline (&lb, inf);
4009 dbp = lb.buffer;
4010 if (dbp[5] != '&')
4011 return;
4012 dbp += 6;
4013 dbp = skip_spaces (dbp);
4015 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4016 return;
4017 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4018 continue;
4019 make_tag (dbp, cp-dbp, TRUE,
4020 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4024 static void
4025 Fortran_functions (FILE *inf)
4027 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4029 if (*dbp == '%')
4030 dbp++; /* Ratfor escape to fortran */
4031 dbp = skip_spaces (dbp);
4032 if (*dbp == '\0')
4033 continue;
4035 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4036 dbp = skip_spaces (dbp);
4038 if (LOOKING_AT_NOCASE (dbp, "pure"))
4039 dbp = skip_spaces (dbp);
4041 if (LOOKING_AT_NOCASE (dbp, "elemental"))
4042 dbp = skip_spaces (dbp);
4044 switch (lowcase (*dbp))
4046 case 'i':
4047 if (nocase_tail ("integer"))
4048 F_takeprec ();
4049 break;
4050 case 'r':
4051 if (nocase_tail ("real"))
4052 F_takeprec ();
4053 break;
4054 case 'l':
4055 if (nocase_tail ("logical"))
4056 F_takeprec ();
4057 break;
4058 case 'c':
4059 if (nocase_tail ("complex") || nocase_tail ("character"))
4060 F_takeprec ();
4061 break;
4062 case 'd':
4063 if (nocase_tail ("double"))
4065 dbp = skip_spaces (dbp);
4066 if (*dbp == '\0')
4067 continue;
4068 if (nocase_tail ("precision"))
4069 break;
4070 continue;
4072 break;
4074 dbp = skip_spaces (dbp);
4075 if (*dbp == '\0')
4076 continue;
4077 switch (lowcase (*dbp))
4079 case 'f':
4080 if (nocase_tail ("function"))
4081 F_getit (inf);
4082 continue;
4083 case 's':
4084 if (nocase_tail ("subroutine"))
4085 F_getit (inf);
4086 continue;
4087 case 'e':
4088 if (nocase_tail ("entry"))
4089 F_getit (inf);
4090 continue;
4091 case 'b':
4092 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4094 dbp = skip_spaces (dbp);
4095 if (*dbp == '\0') /* assume un-named */
4096 make_tag ("blockdata", 9, TRUE,
4097 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4098 else
4099 F_getit (inf); /* look for name */
4101 continue;
4108 * Ada parsing
4109 * Original code by
4110 * Philippe Waroquiers (1998)
4113 /* Once we are positioned after an "interesting" keyword, let's get
4114 the real tag value necessary. */
4115 static void
4116 Ada_getit (FILE *inf, const char *name_qualifier)
4118 register char *cp;
4119 char *name;
4120 char c;
4122 while (!feof (inf))
4124 dbp = skip_spaces (dbp);
4125 if (*dbp == '\0'
4126 || (dbp[0] == '-' && dbp[1] == '-'))
4128 readline (&lb, inf);
4129 dbp = lb.buffer;
4131 switch (lowcase (*dbp))
4133 case 'b':
4134 if (nocase_tail ("body"))
4136 /* Skipping body of procedure body or package body or ....
4137 resetting qualifier to body instead of spec. */
4138 name_qualifier = "/b";
4139 continue;
4141 break;
4142 case 't':
4143 /* Skipping type of task type or protected type ... */
4144 if (nocase_tail ("type"))
4145 continue;
4146 break;
4148 if (*dbp == '"')
4150 dbp += 1;
4151 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4152 continue;
4154 else
4156 dbp = skip_spaces (dbp);
4157 for (cp = dbp;
4158 (*cp != '\0'
4159 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4160 cp++)
4161 continue;
4162 if (cp == dbp)
4163 return;
4165 c = *cp;
4166 *cp = '\0';
4167 name = concat (dbp, name_qualifier, "");
4168 *cp = c;
4169 make_tag (name, strlen (name), TRUE,
4170 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4171 free (name);
4172 if (c == '"')
4173 dbp = cp + 1;
4174 return;
4178 static void
4179 Ada_funcs (FILE *inf)
4181 bool inquote = FALSE;
4182 bool skip_till_semicolumn = FALSE;
4184 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4186 while (*dbp != '\0')
4188 /* Skip a string i.e. "abcd". */
4189 if (inquote || (*dbp == '"'))
4191 dbp = etags_strchr (dbp + !inquote, '"');
4192 if (dbp != NULL)
4194 inquote = FALSE;
4195 dbp += 1;
4196 continue; /* advance char */
4198 else
4200 inquote = TRUE;
4201 break; /* advance line */
4205 /* Skip comments. */
4206 if (dbp[0] == '-' && dbp[1] == '-')
4207 break; /* advance line */
4209 /* Skip character enclosed in single quote i.e. 'a'
4210 and skip single quote starting an attribute i.e. 'Image. */
4211 if (*dbp == '\'')
4213 dbp++ ;
4214 if (*dbp != '\0')
4215 dbp++;
4216 continue;
4219 if (skip_till_semicolumn)
4221 if (*dbp == ';')
4222 skip_till_semicolumn = FALSE;
4223 dbp++;
4224 continue; /* advance char */
4227 /* Search for beginning of a token. */
4228 if (!begtoken (*dbp))
4230 dbp++;
4231 continue; /* advance char */
4234 /* We are at the beginning of a token. */
4235 switch (lowcase (*dbp))
4237 case 'f':
4238 if (!packages_only && nocase_tail ("function"))
4239 Ada_getit (inf, "/f");
4240 else
4241 break; /* from switch */
4242 continue; /* advance char */
4243 case 'p':
4244 if (!packages_only && nocase_tail ("procedure"))
4245 Ada_getit (inf, "/p");
4246 else if (nocase_tail ("package"))
4247 Ada_getit (inf, "/s");
4248 else if (nocase_tail ("protected")) /* protected type */
4249 Ada_getit (inf, "/t");
4250 else
4251 break; /* from switch */
4252 continue; /* advance char */
4254 case 'u':
4255 if (typedefs && !packages_only && nocase_tail ("use"))
4257 /* when tagging types, avoid tagging use type Pack.Typename;
4258 for this, we will skip everything till a ; */
4259 skip_till_semicolumn = TRUE;
4260 continue; /* advance char */
4263 case 't':
4264 if (!packages_only && nocase_tail ("task"))
4265 Ada_getit (inf, "/k");
4266 else if (typedefs && !packages_only && nocase_tail ("type"))
4268 Ada_getit (inf, "/t");
4269 while (*dbp != '\0')
4270 dbp += 1;
4272 else
4273 break; /* from switch */
4274 continue; /* advance char */
4277 /* Look for the end of the token. */
4278 while (!endtoken (*dbp))
4279 dbp++;
4281 } /* advance char */
4282 } /* advance line */
4287 * Unix and microcontroller assembly tag handling
4288 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4289 * Idea by Bob Weiner, Motorola Inc. (1994)
4291 static void
4292 Asm_labels (FILE *inf)
4294 register char *cp;
4296 LOOP_ON_INPUT_LINES (inf, lb, cp)
4298 /* If first char is alphabetic or one of [_.$], test for colon
4299 following identifier. */
4300 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4302 /* Read past label. */
4303 cp++;
4304 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4305 cp++;
4306 if (*cp == ':' || iswhite (*cp))
4307 /* Found end of label, so copy it and add it to the table. */
4308 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4309 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4316 * Perl support
4317 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4318 * Perl variable names: /^(my|local).../
4319 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4320 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4321 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4323 static void
4324 Perl_functions (FILE *inf)
4326 char *package = savestr ("main"); /* current package name */
4327 register char *cp;
4329 LOOP_ON_INPUT_LINES (inf, lb, cp)
4331 cp = skip_spaces (cp);
4333 if (LOOKING_AT (cp, "package"))
4335 free (package);
4336 get_tag (cp, &package);
4338 else if (LOOKING_AT (cp, "sub"))
4340 char *pos;
4341 char *sp = cp;
4343 while (!notinname (*cp))
4344 cp++;
4345 if (cp == sp)
4346 continue; /* nothing found */
4347 if ((pos = etags_strchr (sp, ':')) != NULL
4348 && pos < cp && pos[1] == ':')
4349 /* The name is already qualified. */
4350 make_tag (sp, cp - sp, TRUE,
4351 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4352 else
4353 /* Qualify it. */
4355 char savechar, *name;
4357 savechar = *cp;
4358 *cp = '\0';
4359 name = concat (package, "::", sp);
4360 *cp = savechar;
4361 make_tag (name, strlen (name), TRUE,
4362 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4363 free (name);
4366 else if (globals) /* only if we are tagging global vars */
4368 /* Skip a qualifier, if any. */
4369 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4370 /* After "my" or "local", but before any following paren or space. */
4371 char *varstart = cp;
4373 if (qual /* should this be removed? If yes, how? */
4374 && (*cp == '$' || *cp == '@' || *cp == '%'))
4376 varstart += 1;
4378 cp++;
4379 while (ISALNUM (*cp) || *cp == '_');
4381 else if (qual)
4383 /* Should be examining a variable list at this point;
4384 could insist on seeing an open parenthesis. */
4385 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4386 cp++;
4388 else
4389 continue;
4391 make_tag (varstart, cp - varstart, FALSE,
4392 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4395 free (package);
4400 * Python support
4401 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4402 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4403 * More ideas by seb bacon <seb@jamkit.com> (2002)
4405 static void
4406 Python_functions (FILE *inf)
4408 register char *cp;
4410 LOOP_ON_INPUT_LINES (inf, lb, cp)
4412 cp = skip_spaces (cp);
4413 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4415 char *name = cp;
4416 while (!notinname (*cp) && *cp != ':')
4417 cp++;
4418 make_tag (name, cp - name, TRUE,
4419 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4426 * PHP support
4427 * Look for:
4428 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4429 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4430 * - /^[ \t]*define\(\"[^\"]+/
4431 * Only with --members:
4432 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4433 * Idea by Diez B. Roggisch (2001)
4435 static void
4436 PHP_functions (FILE *inf)
4438 register char *cp, *name;
4439 bool search_identifier = FALSE;
4441 LOOP_ON_INPUT_LINES (inf, lb, cp)
4443 cp = skip_spaces (cp);
4444 name = cp;
4445 if (search_identifier
4446 && *cp != '\0')
4448 while (!notinname (*cp))
4449 cp++;
4450 make_tag (name, cp - name, TRUE,
4451 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4452 search_identifier = FALSE;
4454 else if (LOOKING_AT (cp, "function"))
4456 if (*cp == '&')
4457 cp = skip_spaces (cp+1);
4458 if (*cp != '\0')
4460 name = cp;
4461 while (!notinname (*cp))
4462 cp++;
4463 make_tag (name, cp - name, TRUE,
4464 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4466 else
4467 search_identifier = TRUE;
4469 else if (LOOKING_AT (cp, "class"))
4471 if (*cp != '\0')
4473 name = cp;
4474 while (*cp != '\0' && !iswhite (*cp))
4475 cp++;
4476 make_tag (name, cp - name, FALSE,
4477 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4479 else
4480 search_identifier = TRUE;
4482 else if (strneq (cp, "define", 6)
4483 && (cp = skip_spaces (cp+6))
4484 && *cp++ == '('
4485 && (*cp == '"' || *cp == '\''))
4487 char quote = *cp++;
4488 name = cp;
4489 while (*cp != quote && *cp != '\0')
4490 cp++;
4491 make_tag (name, cp - name, FALSE,
4492 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4494 else if (members
4495 && LOOKING_AT (cp, "var")
4496 && *cp == '$')
4498 name = cp;
4499 while (!notinname (*cp))
4500 cp++;
4501 make_tag (name, cp - name, FALSE,
4502 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4509 * Cobol tag functions
4510 * We could look for anything that could be a paragraph name.
4511 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4512 * Idea by Corny de Souza (1993)
4514 static void
4515 Cobol_paragraphs (FILE *inf)
4517 register char *bp, *ep;
4519 LOOP_ON_INPUT_LINES (inf, lb, bp)
4521 if (lb.len < 9)
4522 continue;
4523 bp += 8;
4525 /* If eoln, compiler option or comment ignore whole line. */
4526 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4527 continue;
4529 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4530 continue;
4531 if (*ep++ == '.')
4532 make_tag (bp, ep - bp, TRUE,
4533 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4539 * Makefile support
4540 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4542 static void
4543 Makefile_targets (FILE *inf)
4545 register char *bp;
4547 LOOP_ON_INPUT_LINES (inf, lb, bp)
4549 if (*bp == '\t' || *bp == '#')
4550 continue;
4551 while (*bp != '\0' && *bp != '=' && *bp != ':')
4552 bp++;
4553 if (*bp == ':' || (globals && *bp == '='))
4555 /* We should detect if there is more than one tag, but we do not.
4556 We just skip initial and final spaces. */
4557 char * namestart = skip_spaces (lb.buffer);
4558 while (--bp > namestart)
4559 if (!notinname (*bp))
4560 break;
4561 make_tag (namestart, bp - namestart + 1, TRUE,
4562 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4569 * Pascal parsing
4570 * Original code by Mosur K. Mohan (1989)
4572 * Locates tags for procedures & functions. Doesn't do any type- or
4573 * var-definitions. It does look for the keyword "extern" or
4574 * "forward" immediately following the procedure statement; if found,
4575 * the tag is skipped.
4577 static void
4578 Pascal_functions (FILE *inf)
4580 linebuffer tline; /* mostly copied from C_entries */
4581 long save_lcno;
4582 int save_lineno, namelen, taglen;
4583 char c, *name;
4585 bool /* each of these flags is TRUE if: */
4586 incomment, /* point is inside a comment */
4587 inquote, /* point is inside '..' string */
4588 get_tagname, /* point is after PROCEDURE/FUNCTION
4589 keyword, so next item = potential tag */
4590 found_tag, /* point is after a potential tag */
4591 inparms, /* point is within parameter-list */
4592 verify_tag; /* point has passed the parm-list, so the
4593 next token will determine whether this
4594 is a FORWARD/EXTERN to be ignored, or
4595 whether it is a real tag */
4597 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4598 name = NULL; /* keep compiler quiet */
4599 dbp = lb.buffer;
4600 *dbp = '\0';
4601 linebuffer_init (&tline);
4603 incomment = inquote = FALSE;
4604 found_tag = FALSE; /* have a proc name; check if extern */
4605 get_tagname = FALSE; /* found "procedure" keyword */
4606 inparms = FALSE; /* found '(' after "proc" */
4607 verify_tag = FALSE; /* check if "extern" is ahead */
4610 while (!feof (inf)) /* long main loop to get next char */
4612 c = *dbp++;
4613 if (c == '\0') /* if end of line */
4615 readline (&lb, inf);
4616 dbp = lb.buffer;
4617 if (*dbp == '\0')
4618 continue;
4619 if (!((found_tag && verify_tag)
4620 || get_tagname))
4621 c = *dbp++; /* only if don't need *dbp pointing
4622 to the beginning of the name of
4623 the procedure or function */
4625 if (incomment)
4627 if (c == '}') /* within { } comments */
4628 incomment = FALSE;
4629 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4631 dbp++;
4632 incomment = FALSE;
4634 continue;
4636 else if (inquote)
4638 if (c == '\'')
4639 inquote = FALSE;
4640 continue;
4642 else
4643 switch (c)
4645 case '\'':
4646 inquote = TRUE; /* found first quote */
4647 continue;
4648 case '{': /* found open { comment */
4649 incomment = TRUE;
4650 continue;
4651 case '(':
4652 if (*dbp == '*') /* found open (* comment */
4654 incomment = TRUE;
4655 dbp++;
4657 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4658 inparms = TRUE;
4659 continue;
4660 case ')': /* end of parms list */
4661 if (inparms)
4662 inparms = FALSE;
4663 continue;
4664 case ';':
4665 if (found_tag && !inparms) /* end of proc or fn stmt */
4667 verify_tag = TRUE;
4668 break;
4670 continue;
4672 if (found_tag && verify_tag && (*dbp != ' '))
4674 /* Check if this is an "extern" declaration. */
4675 if (*dbp == '\0')
4676 continue;
4677 if (lowcase (*dbp == 'e'))
4679 if (nocase_tail ("extern")) /* superfluous, really! */
4681 found_tag = FALSE;
4682 verify_tag = FALSE;
4685 else if (lowcase (*dbp) == 'f')
4687 if (nocase_tail ("forward")) /* check for forward reference */
4689 found_tag = FALSE;
4690 verify_tag = FALSE;
4693 if (found_tag && verify_tag) /* not external proc, so make tag */
4695 found_tag = FALSE;
4696 verify_tag = FALSE;
4697 make_tag (name, namelen, TRUE,
4698 tline.buffer, taglen, save_lineno, save_lcno);
4699 continue;
4702 if (get_tagname) /* grab name of proc or fn */
4704 char *cp;
4706 if (*dbp == '\0')
4707 continue;
4709 /* Find block name. */
4710 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4711 continue;
4713 /* Save all values for later tagging. */
4714 linebuffer_setlen (&tline, lb.len);
4715 strcpy (tline.buffer, lb.buffer);
4716 save_lineno = lineno;
4717 save_lcno = linecharno;
4718 name = tline.buffer + (dbp - lb.buffer);
4719 namelen = cp - dbp;
4720 taglen = cp - lb.buffer + 1;
4722 dbp = cp; /* set dbp to e-o-token */
4723 get_tagname = FALSE;
4724 found_tag = TRUE;
4725 continue;
4727 /* And proceed to check for "extern". */
4729 else if (!incomment && !inquote && !found_tag)
4731 /* Check for proc/fn keywords. */
4732 switch (lowcase (c))
4734 case 'p':
4735 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4736 get_tagname = TRUE;
4737 continue;
4738 case 'f':
4739 if (nocase_tail ("unction"))
4740 get_tagname = TRUE;
4741 continue;
4744 } /* while not eof */
4746 free (tline.buffer);
4751 * Lisp tag functions
4752 * look for (def or (DEF, quote or QUOTE
4755 static void L_getit (void);
4757 static void
4758 L_getit (void)
4760 if (*dbp == '\'') /* Skip prefix quote */
4761 dbp++;
4762 else if (*dbp == '(')
4764 dbp++;
4765 /* Try to skip "(quote " */
4766 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4767 /* Ok, then skip "(" before name in (defstruct (foo)) */
4768 dbp = skip_spaces (dbp);
4770 get_tag (dbp, NULL);
4773 static void
4774 Lisp_functions (FILE *inf)
4776 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4778 if (dbp[0] != '(')
4779 continue;
4781 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4783 dbp = skip_non_spaces (dbp);
4784 dbp = skip_spaces (dbp);
4785 L_getit ();
4787 else
4789 /* Check for (foo::defmumble name-defined ... */
4791 dbp++;
4792 while (!notinname (*dbp) && *dbp != ':');
4793 if (*dbp == ':')
4796 dbp++;
4797 while (*dbp == ':');
4799 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4801 dbp = skip_non_spaces (dbp);
4802 dbp = skip_spaces (dbp);
4803 L_getit ();
4812 * Lua script language parsing
4813 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4815 * "function" and "local function" are tags if they start at column 1.
4817 static void
4818 Lua_functions (FILE *inf)
4820 register char *bp;
4822 LOOP_ON_INPUT_LINES (inf, lb, bp)
4824 if (bp[0] != 'f' && bp[0] != 'l')
4825 continue;
4827 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4829 if (LOOKING_AT (bp, "function"))
4830 get_tag (bp, NULL);
4836 * PostScript tags
4837 * Just look for lines where the first character is '/'
4838 * Also look at "defineps" for PSWrap
4839 * Ideas by:
4840 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4841 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4843 static void
4844 PS_functions (FILE *inf)
4846 register char *bp, *ep;
4848 LOOP_ON_INPUT_LINES (inf, lb, bp)
4850 if (bp[0] == '/')
4852 for (ep = bp+1;
4853 *ep != '\0' && *ep != ' ' && *ep != '{';
4854 ep++)
4855 continue;
4856 make_tag (bp, ep - bp, TRUE,
4857 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4859 else if (LOOKING_AT (bp, "defineps"))
4860 get_tag (bp, NULL);
4866 * Forth tags
4867 * Ignore anything after \ followed by space or in ( )
4868 * Look for words defined by :
4869 * Look for constant, code, create, defer, value, and variable
4870 * OBP extensions: Look for buffer:, field,
4871 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4873 static void
4874 Forth_words (FILE *inf)
4876 register char *bp;
4878 LOOP_ON_INPUT_LINES (inf, lb, bp)
4879 while ((bp = skip_spaces (bp))[0] != '\0')
4880 if (bp[0] == '\\' && iswhite (bp[1]))
4881 break; /* read next line */
4882 else if (bp[0] == '(' && iswhite (bp[1]))
4883 do /* skip to ) or eol */
4884 bp++;
4885 while (*bp != ')' && *bp != '\0');
4886 else if ((bp[0] == ':' && iswhite (bp[1]) && bp++)
4887 || LOOKING_AT_NOCASE (bp, "constant")
4888 || LOOKING_AT_NOCASE (bp, "code")
4889 || LOOKING_AT_NOCASE (bp, "create")
4890 || LOOKING_AT_NOCASE (bp, "defer")
4891 || LOOKING_AT_NOCASE (bp, "value")
4892 || LOOKING_AT_NOCASE (bp, "variable")
4893 || LOOKING_AT_NOCASE (bp, "buffer:")
4894 || LOOKING_AT_NOCASE (bp, "field"))
4895 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4896 else
4897 bp = skip_non_spaces (bp);
4902 * Scheme tag functions
4903 * look for (def... xyzzy
4904 * (def... (xyzzy
4905 * (def ... ((...(xyzzy ....
4906 * (set! xyzzy
4907 * Original code by Ken Haase (1985?)
4909 static void
4910 Scheme_functions (FILE *inf)
4912 register char *bp;
4914 LOOP_ON_INPUT_LINES (inf, lb, bp)
4916 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4918 bp = skip_non_spaces (bp+4);
4919 /* Skip over open parens and white space. Don't continue past
4920 '\0'. */
4921 while (*bp && notinname (*bp))
4922 bp++;
4923 get_tag (bp, NULL);
4925 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4926 get_tag (bp, NULL);
4931 /* Find tags in TeX and LaTeX input files. */
4933 /* TEX_toktab is a table of TeX control sequences that define tags.
4934 * Each entry records one such control sequence.
4936 * Original code from who knows whom.
4937 * Ideas by:
4938 * Stefan Monnier (2002)
4941 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4943 /* Default set of control sequences to put into TEX_toktab.
4944 The value of environment var TEXTAGS is prepended to this. */
4945 static const char *TEX_defenv = "\
4946 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4947 :part:appendix:entry:index:def\
4948 :newcommand:renewcommand:newenvironment:renewenvironment";
4950 static void TEX_mode (FILE *);
4951 static void TEX_decode_env (const char *, const char *);
4953 static char TEX_esc = '\\';
4954 static char TEX_opgrp = '{';
4955 static char TEX_clgrp = '}';
4958 * TeX/LaTeX scanning loop.
4960 static void
4961 TeX_commands (FILE *inf)
4963 char *cp;
4964 linebuffer *key;
4966 /* Select either \ or ! as escape character. */
4967 TEX_mode (inf);
4969 /* Initialize token table once from environment. */
4970 if (TEX_toktab == NULL)
4971 TEX_decode_env ("TEXTAGS", TEX_defenv);
4973 LOOP_ON_INPUT_LINES (inf, lb, cp)
4975 /* Look at each TEX keyword in line. */
4976 for (;;)
4978 /* Look for a TEX escape. */
4979 while (*cp++ != TEX_esc)
4980 if (cp[-1] == '\0' || cp[-1] == '%')
4981 goto tex_next_line;
4983 for (key = TEX_toktab; key->buffer != NULL; key++)
4984 if (strneq (cp, key->buffer, key->len))
4986 register char *p;
4987 int namelen, linelen;
4988 bool opgrp = FALSE;
4990 cp = skip_spaces (cp + key->len);
4991 if (*cp == TEX_opgrp)
4993 opgrp = TRUE;
4994 cp++;
4996 for (p = cp;
4997 (!iswhite (*p) && *p != '#' &&
4998 *p != TEX_opgrp && *p != TEX_clgrp);
4999 p++)
5000 continue;
5001 namelen = p - cp;
5002 linelen = lb.len;
5003 if (!opgrp || *p == TEX_clgrp)
5005 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5006 p++;
5007 linelen = p - lb.buffer + 1;
5009 make_tag (cp, namelen, TRUE,
5010 lb.buffer, linelen, lineno, linecharno);
5011 goto tex_next_line; /* We only tag a line once */
5014 tex_next_line:
5019 #define TEX_LESC '\\'
5020 #define TEX_SESC '!'
5022 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5023 chars accordingly. */
5024 static void
5025 TEX_mode (FILE *inf)
5027 int c;
5029 while ((c = getc (inf)) != EOF)
5031 /* Skip to next line if we hit the TeX comment char. */
5032 if (c == '%')
5033 while (c != '\n' && c != EOF)
5034 c = getc (inf);
5035 else if (c == TEX_LESC || c == TEX_SESC )
5036 break;
5039 if (c == TEX_LESC)
5041 TEX_esc = TEX_LESC;
5042 TEX_opgrp = '{';
5043 TEX_clgrp = '}';
5045 else
5047 TEX_esc = TEX_SESC;
5048 TEX_opgrp = '<';
5049 TEX_clgrp = '>';
5051 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5052 No attempt is made to correct the situation. */
5053 rewind (inf);
5056 /* Read environment and prepend it to the default string.
5057 Build token table. */
5058 static void
5059 TEX_decode_env (const char *evarname, const char *defenv)
5061 register const char *env, *p;
5062 int i, len;
5064 /* Append default string to environment. */
5065 env = getenv (evarname);
5066 if (!env)
5067 env = defenv;
5068 else
5069 env = concat (env, defenv, "");
5071 /* Allocate a token table */
5072 for (len = 1, p = env; p;)
5073 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5074 len++;
5075 TEX_toktab = xnew (len, linebuffer);
5077 /* Unpack environment string into token table. Be careful about */
5078 /* zero-length strings (leading ':', "::" and trailing ':') */
5079 for (i = 0; *env != '\0';)
5081 p = etags_strchr (env, ':');
5082 if (!p) /* End of environment string. */
5083 p = env + strlen (env);
5084 if (p - env > 0)
5085 { /* Only non-zero strings. */
5086 TEX_toktab[i].buffer = savenstr (env, p - env);
5087 TEX_toktab[i].len = p - env;
5088 i++;
5090 if (*p)
5091 env = p + 1;
5092 else
5094 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5095 TEX_toktab[i].len = 0;
5096 break;
5102 /* Texinfo support. Dave Love, Mar. 2000. */
5103 static void
5104 Texinfo_nodes (FILE *inf)
5106 char *cp, *start;
5107 LOOP_ON_INPUT_LINES (inf, lb, cp)
5108 if (LOOKING_AT (cp, "@node"))
5110 start = cp;
5111 while (*cp != '\0' && *cp != ',')
5112 cp++;
5113 make_tag (start, cp - start, TRUE,
5114 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5120 * HTML support.
5121 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5122 * Contents of <a name=xxx> are tags with name xxx.
5124 * Francesco Potortì, 2002.
5126 static void
5127 HTML_labels (FILE *inf)
5129 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5130 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5131 bool intag = FALSE; /* inside an html tag, looking for ID= */
5132 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5133 char *end;
5136 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5138 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5139 for (;;) /* loop on the same line */
5141 if (skiptag) /* skip HTML tag */
5143 while (*dbp != '\0' && *dbp != '>')
5144 dbp++;
5145 if (*dbp == '>')
5147 dbp += 1;
5148 skiptag = FALSE;
5149 continue; /* look on the same line */
5151 break; /* go to next line */
5154 else if (intag) /* look for "name=" or "id=" */
5156 while (*dbp != '\0' && *dbp != '>'
5157 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5158 dbp++;
5159 if (*dbp == '\0')
5160 break; /* go to next line */
5161 if (*dbp == '>')
5163 dbp += 1;
5164 intag = FALSE;
5165 continue; /* look on the same line */
5167 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5168 || LOOKING_AT_NOCASE (dbp, "id="))
5170 bool quoted = (dbp[0] == '"');
5172 if (quoted)
5173 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5174 continue;
5175 else
5176 for (end = dbp; *end != '\0' && intoken (*end); end++)
5177 continue;
5178 linebuffer_setlen (&token_name, end - dbp);
5179 strncpy (token_name.buffer, dbp, end - dbp);
5180 token_name.buffer[end - dbp] = '\0';
5182 dbp = end;
5183 intag = FALSE; /* we found what we looked for */
5184 skiptag = TRUE; /* skip to the end of the tag */
5185 getnext = TRUE; /* then grab the text */
5186 continue; /* look on the same line */
5188 dbp += 1;
5191 else if (getnext) /* grab next tokens and tag them */
5193 dbp = skip_spaces (dbp);
5194 if (*dbp == '\0')
5195 break; /* go to next line */
5196 if (*dbp == '<')
5198 intag = TRUE;
5199 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5200 continue; /* look on the same line */
5203 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5204 continue;
5205 make_tag (token_name.buffer, token_name.len, TRUE,
5206 dbp, end - dbp, lineno, linecharno);
5207 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5208 getnext = FALSE;
5209 break; /* go to next line */
5212 else /* look for an interesting HTML tag */
5214 while (*dbp != '\0' && *dbp != '<')
5215 dbp++;
5216 if (*dbp == '\0')
5217 break; /* go to next line */
5218 intag = TRUE;
5219 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5221 inanchor = TRUE;
5222 continue; /* look on the same line */
5224 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5225 || LOOKING_AT_NOCASE (dbp, "<h1>")
5226 || LOOKING_AT_NOCASE (dbp, "<h2>")
5227 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5229 intag = FALSE;
5230 getnext = TRUE;
5231 continue; /* look on the same line */
5233 dbp += 1;
5240 * Prolog support
5242 * Assumes that the predicate or rule starts at column 0.
5243 * Only the first clause of a predicate or rule is added.
5244 * Original code by Sunichirou Sugou (1989)
5245 * Rewritten by Anders Lindgren (1996)
5247 static size_t prolog_pr (char *, char *);
5248 static void prolog_skip_comment (linebuffer *, FILE *);
5249 static size_t prolog_atom (char *, size_t);
5251 static void
5252 Prolog_functions (FILE *inf)
5254 char *cp, *last;
5255 size_t len;
5256 size_t allocated;
5258 allocated = 0;
5259 len = 0;
5260 last = NULL;
5262 LOOP_ON_INPUT_LINES (inf, lb, cp)
5264 if (cp[0] == '\0') /* Empty line */
5265 continue;
5266 else if (iswhite (cp[0])) /* Not a predicate */
5267 continue;
5268 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5269 prolog_skip_comment (&lb, inf);
5270 else if ((len = prolog_pr (cp, last)) > 0)
5272 /* Predicate or rule. Store the function name so that we
5273 only generate a tag for the first clause. */
5274 if (last == NULL)
5275 last = xnew (len + 1, char);
5276 else if (len + 1 > allocated)
5277 xrnew (last, len + 1, char);
5278 allocated = len + 1;
5279 strncpy (last, cp, len);
5280 last[len] = '\0';
5283 free (last);
5287 static void
5288 prolog_skip_comment (linebuffer *plb, FILE *inf)
5290 char *cp;
5294 for (cp = plb->buffer; *cp != '\0'; cp++)
5295 if (cp[0] == '*' && cp[1] == '/')
5296 return;
5297 readline (plb, inf);
5299 while (!feof (inf));
5303 * A predicate or rule definition is added if it matches:
5304 * <beginning of line><Prolog Atom><whitespace>(
5305 * or <beginning of line><Prolog Atom><whitespace>:-
5307 * It is added to the tags database if it doesn't match the
5308 * name of the previous clause header.
5310 * Return the size of the name of the predicate or rule, or 0 if no
5311 * header was found.
5313 static size_t
5314 prolog_pr (char *s, char *last)
5316 /* Name of last clause. */
5318 size_t pos;
5319 size_t len;
5321 pos = prolog_atom (s, 0);
5322 if (! pos)
5323 return 0;
5325 len = pos;
5326 pos = skip_spaces (s + pos) - s;
5328 if ((s[pos] == '.'
5329 || (s[pos] == '(' && (pos += 1))
5330 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5331 && (last == NULL /* save only the first clause */
5332 || len != strlen (last)
5333 || !strneq (s, last, len)))
5335 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5336 return len;
5338 else
5339 return 0;
5343 * Consume a Prolog atom.
5344 * Return the number of bytes consumed, or 0 if there was an error.
5346 * A prolog atom, in this context, could be one of:
5347 * - An alphanumeric sequence, starting with a lower case letter.
5348 * - A quoted arbitrary string. Single quotes can escape themselves.
5349 * Backslash quotes everything.
5351 static size_t
5352 prolog_atom (char *s, size_t pos)
5354 size_t origpos;
5356 origpos = pos;
5358 if (ISLOWER (s[pos]) || (s[pos] == '_'))
5360 /* The atom is unquoted. */
5361 pos++;
5362 while (ISALNUM (s[pos]) || (s[pos] == '_'))
5364 pos++;
5366 return pos - origpos;
5368 else if (s[pos] == '\'')
5370 pos++;
5372 for (;;)
5374 if (s[pos] == '\'')
5376 pos++;
5377 if (s[pos] != '\'')
5378 break;
5379 pos++; /* A double quote */
5381 else if (s[pos] == '\0')
5382 /* Multiline quoted atoms are ignored. */
5383 return 0;
5384 else if (s[pos] == '\\')
5386 if (s[pos+1] == '\0')
5387 return 0;
5388 pos += 2;
5390 else
5391 pos++;
5393 return pos - origpos;
5395 else
5396 return 0;
5401 * Support for Erlang
5403 * Generates tags for functions, defines, and records.
5404 * Assumes that Erlang functions start at column 0.
5405 * Original code by Anders Lindgren (1996)
5407 static int erlang_func (char *, char *);
5408 static void erlang_attribute (char *);
5409 static int erlang_atom (char *);
5411 static void
5412 Erlang_functions (FILE *inf)
5414 char *cp, *last;
5415 int len;
5416 int allocated;
5418 allocated = 0;
5419 len = 0;
5420 last = NULL;
5422 LOOP_ON_INPUT_LINES (inf, lb, cp)
5424 if (cp[0] == '\0') /* Empty line */
5425 continue;
5426 else if (iswhite (cp[0])) /* Not function nor attribute */
5427 continue;
5428 else if (cp[0] == '%') /* comment */
5429 continue;
5430 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5431 continue;
5432 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5434 erlang_attribute (cp);
5435 if (last != NULL)
5437 free (last);
5438 last = NULL;
5441 else if ((len = erlang_func (cp, last)) > 0)
5444 * Function. Store the function name so that we only
5445 * generates a tag for the first clause.
5447 if (last == NULL)
5448 last = xnew (len + 1, char);
5449 else if (len + 1 > allocated)
5450 xrnew (last, len + 1, char);
5451 allocated = len + 1;
5452 strncpy (last, cp, len);
5453 last[len] = '\0';
5456 free (last);
5461 * A function definition is added if it matches:
5462 * <beginning of line><Erlang Atom><whitespace>(
5464 * It is added to the tags database if it doesn't match the
5465 * name of the previous clause header.
5467 * Return the size of the name of the function, or 0 if no function
5468 * was found.
5470 static int
5471 erlang_func (char *s, char *last)
5473 /* Name of last clause. */
5475 int pos;
5476 int len;
5478 pos = erlang_atom (s);
5479 if (pos < 1)
5480 return 0;
5482 len = pos;
5483 pos = skip_spaces (s + pos) - s;
5485 /* Save only the first clause. */
5486 if (s[pos++] == '('
5487 && (last == NULL
5488 || len != (int)strlen (last)
5489 || !strneq (s, last, len)))
5491 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5492 return len;
5495 return 0;
5500 * Handle attributes. Currently, tags are generated for defines
5501 * and records.
5503 * They are on the form:
5504 * -define(foo, bar).
5505 * -define(Foo(M, N), M+N).
5506 * -record(graph, {vtab = notable, cyclic = true}).
5508 static void
5509 erlang_attribute (char *s)
5511 char *cp = s;
5513 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5514 && *cp++ == '(')
5516 int len = erlang_atom (skip_spaces (cp));
5517 if (len > 0)
5518 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5520 return;
5525 * Consume an Erlang atom (or variable).
5526 * Return the number of bytes consumed, or -1 if there was an error.
5528 static int
5529 erlang_atom (char *s)
5531 int pos = 0;
5533 if (ISALPHA (s[pos]) || s[pos] == '_')
5535 /* The atom is unquoted. */
5537 pos++;
5538 while (ISALNUM (s[pos]) || s[pos] == '_');
5540 else if (s[pos] == '\'')
5542 for (pos++; s[pos] != '\''; pos++)
5543 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5544 || (s[pos] == '\\' && s[++pos] == '\0'))
5545 return 0;
5546 pos++;
5549 return pos;
5553 static char *scan_separators (char *);
5554 static void add_regex (char *, language *);
5555 static char *substitute (char *, char *, struct re_registers *);
5558 * Take a string like "/blah/" and turn it into "blah", verifying
5559 * that the first and last characters are the same, and handling
5560 * quoted separator characters. Actually, stops on the occurrence of
5561 * an unquoted separator. Also process \t, \n, etc. and turn into
5562 * appropriate characters. Works in place. Null terminates name string.
5563 * Returns pointer to terminating separator, or NULL for
5564 * unterminated regexps.
5566 static char *
5567 scan_separators (char *name)
5569 char sep = name[0];
5570 char *copyto = name;
5571 bool quoted = FALSE;
5573 for (++name; *name != '\0'; ++name)
5575 if (quoted)
5577 switch (*name)
5579 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5580 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5581 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5582 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5583 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5584 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5585 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5586 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5587 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5588 default:
5589 if (*name == sep)
5590 *copyto++ = sep;
5591 else
5593 /* Something else is quoted, so preserve the quote. */
5594 *copyto++ = '\\';
5595 *copyto++ = *name;
5597 break;
5599 quoted = FALSE;
5601 else if (*name == '\\')
5602 quoted = TRUE;
5603 else if (*name == sep)
5604 break;
5605 else
5606 *copyto++ = *name;
5608 if (*name != sep)
5609 name = NULL; /* signal unterminated regexp */
5611 /* Terminate copied string. */
5612 *copyto = '\0';
5613 return name;
5616 /* Look at the argument of --regex or --no-regex and do the right
5617 thing. Same for each line of a regexp file. */
5618 static void
5619 analyse_regex (char *regex_arg)
5621 if (regex_arg == NULL)
5623 free_regexps (); /* --no-regex: remove existing regexps */
5624 return;
5627 /* A real --regexp option or a line in a regexp file. */
5628 switch (regex_arg[0])
5630 /* Comments in regexp file or null arg to --regex. */
5631 case '\0':
5632 case ' ':
5633 case '\t':
5634 break;
5636 /* Read a regex file. This is recursive and may result in a
5637 loop, which will stop when the file descriptors are exhausted. */
5638 case '@':
5640 FILE *regexfp;
5641 linebuffer regexbuf;
5642 char *regexfile = regex_arg + 1;
5644 /* regexfile is a file containing regexps, one per line. */
5645 regexfp = fopen (regexfile, "r");
5646 if (regexfp == NULL)
5648 pfatal (regexfile);
5649 return;
5651 linebuffer_init (&regexbuf);
5652 while (readline_internal (&regexbuf, regexfp) > 0)
5653 analyse_regex (regexbuf.buffer);
5654 free (regexbuf.buffer);
5655 fclose (regexfp);
5657 break;
5659 /* Regexp to be used for a specific language only. */
5660 case '{':
5662 language *lang;
5663 char *lang_name = regex_arg + 1;
5664 char *cp;
5666 for (cp = lang_name; *cp != '}'; cp++)
5667 if (*cp == '\0')
5669 error ("unterminated language name in regex: %s", regex_arg);
5670 return;
5672 *cp++ = '\0';
5673 lang = get_language_from_langname (lang_name);
5674 if (lang == NULL)
5675 return;
5676 add_regex (cp, lang);
5678 break;
5680 /* Regexp to be used for any language. */
5681 default:
5682 add_regex (regex_arg, NULL);
5683 break;
5687 /* Separate the regexp pattern, compile it,
5688 and care for optional name and modifiers. */
5689 static void
5690 add_regex (char *regexp_pattern, language *lang)
5692 static struct re_pattern_buffer zeropattern;
5693 char sep, *pat, *name, *modifiers;
5694 char empty = '\0';
5695 const char *err;
5696 struct re_pattern_buffer *patbuf;
5697 regexp *rp;
5698 bool
5699 force_explicit_name = TRUE, /* do not use implicit tag names */
5700 ignore_case = FALSE, /* case is significant */
5701 multi_line = FALSE, /* matches are done one line at a time */
5702 single_line = FALSE; /* dot does not match newline */
5705 if (strlen (regexp_pattern) < 3)
5707 error ("null regexp");
5708 return;
5710 sep = regexp_pattern[0];
5711 name = scan_separators (regexp_pattern);
5712 if (name == NULL)
5714 error ("%s: unterminated regexp", regexp_pattern);
5715 return;
5717 if (name[1] == sep)
5719 error ("null name for regexp \"%s\"", regexp_pattern);
5720 return;
5722 modifiers = scan_separators (name);
5723 if (modifiers == NULL) /* no terminating separator --> no name */
5725 modifiers = name;
5726 name = &empty;
5728 else
5729 modifiers += 1; /* skip separator */
5731 /* Parse regex modifiers. */
5732 for (; modifiers[0] != '\0'; modifiers++)
5733 switch (modifiers[0])
5735 case 'N':
5736 if (modifiers == name)
5737 error ("forcing explicit tag name but no name, ignoring");
5738 force_explicit_name = TRUE;
5739 break;
5740 case 'i':
5741 ignore_case = TRUE;
5742 break;
5743 case 's':
5744 single_line = TRUE;
5745 /* FALLTHRU */
5746 case 'm':
5747 multi_line = TRUE;
5748 need_filebuf = TRUE;
5749 break;
5750 default:
5751 error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5752 break;
5755 patbuf = xnew (1, struct re_pattern_buffer);
5756 *patbuf = zeropattern;
5757 if (ignore_case)
5759 static char lc_trans[CHARS];
5760 int i;
5761 for (i = 0; i < CHARS; i++)
5762 lc_trans[i] = lowcase (i);
5763 patbuf->translate = lc_trans; /* translation table to fold case */
5766 if (multi_line)
5767 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5768 else
5769 pat = regexp_pattern;
5771 if (single_line)
5772 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5773 else
5774 re_set_syntax (RE_SYNTAX_EMACS);
5776 err = re_compile_pattern (pat, strlen (pat), patbuf);
5777 if (multi_line)
5778 free (pat);
5779 if (err != NULL)
5781 error ("%s while compiling pattern", err);
5782 return;
5785 rp = p_head;
5786 p_head = xnew (1, regexp);
5787 p_head->pattern = savestr (regexp_pattern);
5788 p_head->p_next = rp;
5789 p_head->lang = lang;
5790 p_head->pat = patbuf;
5791 p_head->name = savestr (name);
5792 p_head->error_signaled = FALSE;
5793 p_head->force_explicit_name = force_explicit_name;
5794 p_head->ignore_case = ignore_case;
5795 p_head->multi_line = multi_line;
5799 * Do the substitutions indicated by the regular expression and
5800 * arguments.
5802 static char *
5803 substitute (char *in, char *out, struct re_registers *regs)
5805 char *result, *t;
5806 int size, dig, diglen;
5808 result = NULL;
5809 size = strlen (out);
5811 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5812 if (out[size - 1] == '\\')
5813 fatal ("pattern error in \"%s\"", out);
5814 for (t = etags_strchr (out, '\\');
5815 t != NULL;
5816 t = etags_strchr (t + 2, '\\'))
5817 if (ISDIGIT (t[1]))
5819 dig = t[1] - '0';
5820 diglen = regs->end[dig] - regs->start[dig];
5821 size += diglen - 2;
5823 else
5824 size -= 1;
5826 /* Allocate space and do the substitutions. */
5827 assert (size >= 0);
5828 result = xnew (size + 1, char);
5830 for (t = result; *out != '\0'; out++)
5831 if (*out == '\\' && ISDIGIT (*++out))
5833 dig = *out - '0';
5834 diglen = regs->end[dig] - regs->start[dig];
5835 strncpy (t, in + regs->start[dig], diglen);
5836 t += diglen;
5838 else
5839 *t++ = *out;
5840 *t = '\0';
5842 assert (t <= result + size);
5843 assert (t - result == (int)strlen (result));
5845 return result;
5848 /* Deallocate all regexps. */
5849 static void
5850 free_regexps (void)
5852 regexp *rp;
5853 while (p_head != NULL)
5855 rp = p_head->p_next;
5856 free (p_head->pattern);
5857 free (p_head->name);
5858 free (p_head);
5859 p_head = rp;
5861 return;
5865 * Reads the whole file as a single string from `filebuf' and looks for
5866 * multi-line regular expressions, creating tags on matches.
5867 * readline already dealt with normal regexps.
5869 * Idea by Ben Wing <ben@666.com> (2002).
5871 static void
5872 regex_tag_multiline (void)
5874 char *buffer = filebuf.buffer;
5875 regexp *rp;
5876 char *name;
5878 for (rp = p_head; rp != NULL; rp = rp->p_next)
5880 int match = 0;
5882 if (!rp->multi_line)
5883 continue; /* skip normal regexps */
5885 /* Generic initializations before parsing file from memory. */
5886 lineno = 1; /* reset global line number */
5887 charno = 0; /* reset global char number */
5888 linecharno = 0; /* reset global char number of line start */
5890 /* Only use generic regexps or those for the current language. */
5891 if (rp->lang != NULL && rp->lang != curfdp->lang)
5892 continue;
5894 while (match >= 0 && match < filebuf.len)
5896 match = re_search (rp->pat, buffer, filebuf.len, charno,
5897 filebuf.len - match, &rp->regs);
5898 switch (match)
5900 case -2:
5901 /* Some error. */
5902 if (!rp->error_signaled)
5904 error ("regexp stack overflow while matching \"%s\"",
5905 rp->pattern);
5906 rp->error_signaled = TRUE;
5908 break;
5909 case -1:
5910 /* No match. */
5911 break;
5912 default:
5913 if (match == rp->regs.end[0])
5915 if (!rp->error_signaled)
5917 error ("regexp matches the empty string: \"%s\"",
5918 rp->pattern);
5919 rp->error_signaled = TRUE;
5921 match = -3; /* exit from while loop */
5922 break;
5925 /* Match occurred. Construct a tag. */
5926 while (charno < rp->regs.end[0])
5927 if (buffer[charno++] == '\n')
5928 lineno++, linecharno = charno;
5929 name = rp->name;
5930 if (name[0] == '\0')
5931 name = NULL;
5932 else /* make a named tag */
5933 name = substitute (buffer, rp->name, &rp->regs);
5934 if (rp->force_explicit_name)
5935 /* Force explicit tag name, if a name is there. */
5936 pfnote (name, TRUE, buffer + linecharno,
5937 charno - linecharno + 1, lineno, linecharno);
5938 else
5939 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5940 charno - linecharno + 1, lineno, linecharno);
5941 break;
5948 static bool
5949 nocase_tail (const char *cp)
5951 register int len = 0;
5953 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5954 cp++, len++;
5955 if (*cp == '\0' && !intoken (dbp[len]))
5957 dbp += len;
5958 return TRUE;
5960 return FALSE;
5963 static void
5964 get_tag (register char *bp, char **namepp)
5966 register char *cp = bp;
5968 if (*bp != '\0')
5970 /* Go till you get to white space or a syntactic break */
5971 for (cp = bp + 1; !notinname (*cp); cp++)
5972 continue;
5973 make_tag (bp, cp - bp, TRUE,
5974 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5977 if (namepp != NULL)
5978 *namepp = savenstr (bp, cp - bp);
5982 * Read a line of text from `stream' into `lbp', excluding the
5983 * newline or CR-NL, if any. Return the number of characters read from
5984 * `stream', which is the length of the line including the newline.
5986 * On DOS or Windows we do not count the CR character, if any before the
5987 * NL, in the returned length; this mirrors the behavior of Emacs on those
5988 * platforms (for text files, it translates CR-NL to NL as it reads in the
5989 * file).
5991 * If multi-line regular expressions are requested, each line read is
5992 * appended to `filebuf'.
5994 static long
5995 readline_internal (linebuffer *lbp, register FILE *stream)
5997 char *buffer = lbp->buffer;
5998 register char *p = lbp->buffer;
5999 register char *pend;
6000 int chars_deleted;
6002 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6004 for (;;)
6006 register int c = getc (stream);
6007 if (p == pend)
6009 /* We're at the end of linebuffer: expand it. */
6010 lbp->size *= 2;
6011 xrnew (buffer, lbp->size, char);
6012 p += buffer - lbp->buffer;
6013 pend = buffer + lbp->size;
6014 lbp->buffer = buffer;
6016 if (c == EOF)
6018 *p = '\0';
6019 chars_deleted = 0;
6020 break;
6022 if (c == '\n')
6024 if (p > buffer && p[-1] == '\r')
6026 p -= 1;
6027 #ifdef DOS_NT
6028 /* Assume CRLF->LF translation will be performed by Emacs
6029 when loading this file, so CRs won't appear in the buffer.
6030 It would be cleaner to compensate within Emacs;
6031 however, Emacs does not know how many CRs were deleted
6032 before any given point in the file. */
6033 chars_deleted = 1;
6034 #else
6035 chars_deleted = 2;
6036 #endif
6038 else
6040 chars_deleted = 1;
6042 *p = '\0';
6043 break;
6045 *p++ = c;
6047 lbp->len = p - buffer;
6049 if (need_filebuf /* we need filebuf for multi-line regexps */
6050 && chars_deleted > 0) /* not at EOF */
6052 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6054 /* Expand filebuf. */
6055 filebuf.size *= 2;
6056 xrnew (filebuf.buffer, filebuf.size, char);
6058 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6059 filebuf.len += lbp->len;
6060 filebuf.buffer[filebuf.len++] = '\n';
6061 filebuf.buffer[filebuf.len] = '\0';
6064 return lbp->len + chars_deleted;
6068 * Like readline_internal, above, but in addition try to match the
6069 * input line against relevant regular expressions and manage #line
6070 * directives.
6072 static void
6073 readline (linebuffer *lbp, FILE *stream)
6075 long result;
6077 linecharno = charno; /* update global char number of line start */
6078 result = readline_internal (lbp, stream); /* read line */
6079 lineno += 1; /* increment global line number */
6080 charno += result; /* increment global char number */
6082 /* Honor #line directives. */
6083 if (!no_line_directive)
6085 static bool discard_until_line_directive;
6087 /* Check whether this is a #line directive. */
6088 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6090 unsigned int lno;
6091 int start = 0;
6093 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6094 && start > 0) /* double quote character found */
6096 char *endp = lbp->buffer + start;
6098 while ((endp = etags_strchr (endp, '"')) != NULL
6099 && endp[-1] == '\\')
6100 endp++;
6101 if (endp != NULL)
6102 /* Ok, this is a real #line directive. Let's deal with it. */
6104 char *taggedabsname; /* absolute name of original file */
6105 char *taggedfname; /* name of original file as given */
6106 char *name; /* temp var */
6108 discard_until_line_directive = FALSE; /* found it */
6109 name = lbp->buffer + start;
6110 *endp = '\0';
6111 canonicalize_filename (name);
6112 taggedabsname = absolute_filename (name, tagfiledir);
6113 if (filename_is_absolute (name)
6114 || filename_is_absolute (curfdp->infname))
6115 taggedfname = savestr (taggedabsname);
6116 else
6117 taggedfname = relative_filename (taggedabsname,tagfiledir);
6119 if (streq (curfdp->taggedfname, taggedfname))
6120 /* The #line directive is only a line number change. We
6121 deal with this afterwards. */
6122 free (taggedfname);
6123 else
6124 /* The tags following this #line directive should be
6125 attributed to taggedfname. In order to do this, set
6126 curfdp accordingly. */
6128 fdesc *fdp; /* file description pointer */
6130 /* Go look for a file description already set up for the
6131 file indicated in the #line directive. If there is
6132 one, use it from now until the next #line
6133 directive. */
6134 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6135 if (streq (fdp->infname, curfdp->infname)
6136 && streq (fdp->taggedfname, taggedfname))
6137 /* If we remove the second test above (after the &&)
6138 then all entries pertaining to the same file are
6139 coalesced in the tags file. If we use it, then
6140 entries pertaining to the same file but generated
6141 from different files (via #line directives) will
6142 go into separate sections in the tags file. These
6143 alternatives look equivalent. The first one
6144 destroys some apparently useless information. */
6146 curfdp = fdp;
6147 free (taggedfname);
6148 break;
6150 /* Else, if we already tagged the real file, skip all
6151 input lines until the next #line directive. */
6152 if (fdp == NULL) /* not found */
6153 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6154 if (streq (fdp->infabsname, taggedabsname))
6156 discard_until_line_directive = TRUE;
6157 free (taggedfname);
6158 break;
6160 /* Else create a new file description and use that from
6161 now on, until the next #line directive. */
6162 if (fdp == NULL) /* not found */
6164 fdp = fdhead;
6165 fdhead = xnew (1, fdesc);
6166 *fdhead = *curfdp; /* copy curr. file description */
6167 fdhead->next = fdp;
6168 fdhead->infname = savestr (curfdp->infname);
6169 fdhead->infabsname = savestr (curfdp->infabsname);
6170 fdhead->infabsdir = savestr (curfdp->infabsdir);
6171 fdhead->taggedfname = taggedfname;
6172 fdhead->usecharno = FALSE;
6173 fdhead->prop = NULL;
6174 fdhead->written = FALSE;
6175 curfdp = fdhead;
6178 free (taggedabsname);
6179 lineno = lno - 1;
6180 readline (lbp, stream);
6181 return;
6182 } /* if a real #line directive */
6183 } /* if #line is followed by a number */
6184 } /* if line begins with "#line " */
6186 /* If we are here, no #line directive was found. */
6187 if (discard_until_line_directive)
6189 if (result > 0)
6191 /* Do a tail recursion on ourselves, thus discarding the contents
6192 of the line buffer. */
6193 readline (lbp, stream);
6194 return;
6196 /* End of file. */
6197 discard_until_line_directive = FALSE;
6198 return;
6200 } /* if #line directives should be considered */
6203 int match;
6204 regexp *rp;
6205 char *name;
6207 /* Match against relevant regexps. */
6208 if (lbp->len > 0)
6209 for (rp = p_head; rp != NULL; rp = rp->p_next)
6211 /* Only use generic regexps or those for the current language.
6212 Also do not use multiline regexps, which is the job of
6213 regex_tag_multiline. */
6214 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6215 || rp->multi_line)
6216 continue;
6218 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6219 switch (match)
6221 case -2:
6222 /* Some error. */
6223 if (!rp->error_signaled)
6225 error ("regexp stack overflow while matching \"%s\"",
6226 rp->pattern);
6227 rp->error_signaled = TRUE;
6229 break;
6230 case -1:
6231 /* No match. */
6232 break;
6233 case 0:
6234 /* Empty string matched. */
6235 if (!rp->error_signaled)
6237 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6238 rp->error_signaled = TRUE;
6240 break;
6241 default:
6242 /* Match occurred. Construct a tag. */
6243 name = rp->name;
6244 if (name[0] == '\0')
6245 name = NULL;
6246 else /* make a named tag */
6247 name = substitute (lbp->buffer, rp->name, &rp->regs);
6248 if (rp->force_explicit_name)
6249 /* Force explicit tag name, if a name is there. */
6250 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6251 else
6252 make_tag (name, strlen (name), TRUE,
6253 lbp->buffer, match, lineno, linecharno);
6254 break;
6262 * Return a pointer to a space of size strlen(cp)+1 allocated
6263 * with xnew where the string CP has been copied.
6265 static char *
6266 savestr (const char *cp)
6268 return savenstr (cp, strlen (cp));
6272 * Return a pointer to a space of size LEN+1 allocated with xnew where
6273 * the string CP has been copied for at most the first LEN characters.
6275 static char *
6276 savenstr (const char *cp, int len)
6278 register char *dp;
6280 dp = xnew (len + 1, char);
6281 strncpy (dp, cp, len);
6282 dp[len] = '\0';
6283 return dp;
6287 * Return the ptr in sp at which the character c last
6288 * appears; NULL if not found
6290 * Identical to POSIX strrchr, included for portability.
6292 static char *
6293 etags_strrchr (register const char *sp, register int c)
6295 register const char *r;
6297 r = NULL;
6300 if (*sp == c)
6301 r = sp;
6302 } while (*sp++);
6303 return (char *)r;
6307 * Return the ptr in sp at which the character c first
6308 * appears; NULL if not found
6310 * Identical to POSIX strchr, included for portability.
6312 static char *
6313 etags_strchr (register const char *sp, register int c)
6317 if (*sp == c)
6318 return (char *)sp;
6319 } while (*sp++);
6320 return NULL;
6324 * Compare two strings, ignoring case for alphabetic characters.
6326 * Same as BSD's strcasecmp, included for portability.
6328 static int
6329 etags_strcasecmp (register const char *s1, register const char *s2)
6331 while (*s1 != '\0'
6332 && (ISALPHA (*s1) && ISALPHA (*s2)
6333 ? lowcase (*s1) == lowcase (*s2)
6334 : *s1 == *s2))
6335 s1++, s2++;
6337 return (ISALPHA (*s1) && ISALPHA (*s2)
6338 ? lowcase (*s1) - lowcase (*s2)
6339 : *s1 - *s2);
6343 * Compare two strings, ignoring case for alphabetic characters.
6344 * Stop after a given number of characters
6346 * Same as BSD's strncasecmp, included for portability.
6348 static int
6349 etags_strncasecmp (register const char *s1, register const char *s2, register int n)
6351 while (*s1 != '\0' && n-- > 0
6352 && (ISALPHA (*s1) && ISALPHA (*s2)
6353 ? lowcase (*s1) == lowcase (*s2)
6354 : *s1 == *s2))
6355 s1++, s2++;
6357 if (n < 0)
6358 return 0;
6359 else
6360 return (ISALPHA (*s1) && ISALPHA (*s2)
6361 ? lowcase (*s1) - lowcase (*s2)
6362 : *s1 - *s2);
6365 /* Skip spaces (end of string is not space), return new pointer. */
6366 static char *
6367 skip_spaces (char *cp)
6369 while (iswhite (*cp))
6370 cp++;
6371 return cp;
6374 /* Skip non spaces, except end of string, return new pointer. */
6375 static char *
6376 skip_non_spaces (char *cp)
6378 while (*cp != '\0' && !iswhite (*cp))
6379 cp++;
6380 return cp;
6383 /* Print error message and exit. */
6384 void
6385 fatal (const char *s1, const char *s2)
6387 error (s1, s2);
6388 exit (EXIT_FAILURE);
6391 static void
6392 pfatal (const char *s1)
6394 perror (s1);
6395 exit (EXIT_FAILURE);
6398 static void
6399 suggest_asking_for_help (void)
6401 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6402 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6403 exit (EXIT_FAILURE);
6406 /* Output a diagnostic with printf-style FORMAT and args. */
6407 static void
6408 error (const char *format, ...)
6410 va_list ap;
6411 va_start (ap, format);
6412 fprintf (stderr, "%s: ", progname);
6413 vfprintf (stderr, format, ap);
6414 fprintf (stderr, "\n");
6415 va_end (ap);
6418 /* Return a newly-allocated string whose contents
6419 concatenate those of s1, s2, s3. */
6420 static char *
6421 concat (const char *s1, const char *s2, const char *s3)
6423 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6424 char *result = xnew (len1 + len2 + len3 + 1, char);
6426 strcpy (result, s1);
6427 strcpy (result + len1, s2);
6428 strcpy (result + len1 + len2, s3);
6429 result[len1 + len2 + len3] = '\0';
6431 return result;
6435 /* Does the same work as the system V getcwd, but does not need to
6436 guess the buffer size in advance. */
6437 static char *
6438 etags_getcwd (void)
6440 #ifdef HAVE_GETCWD
6441 int bufsize = 200;
6442 char *path = xnew (bufsize, char);
6444 while (getcwd (path, bufsize) == NULL)
6446 if (errno != ERANGE)
6447 pfatal ("getcwd");
6448 bufsize *= 2;
6449 free (path);
6450 path = xnew (bufsize, char);
6453 canonicalize_filename (path);
6454 return path;
6456 #else /* not HAVE_GETCWD */
6457 #if MSDOS
6459 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6461 getwd (path);
6463 for (p = path; *p != '\0'; p++)
6464 if (*p == '\\')
6465 *p = '/';
6466 else
6467 *p = lowcase (*p);
6469 return strdup (path);
6470 #else /* not MSDOS */
6471 linebuffer path;
6472 FILE *pipe;
6474 linebuffer_init (&path);
6475 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6476 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6477 pfatal ("pwd");
6478 pclose (pipe);
6480 return path.buffer;
6481 #endif /* not MSDOS */
6482 #endif /* not HAVE_GETCWD */
6485 /* Return a newly allocated string containing the file name of FILE
6486 relative to the absolute directory DIR (which should end with a slash). */
6487 static char *
6488 relative_filename (char *file, char *dir)
6490 char *fp, *dp, *afn, *res;
6491 int i;
6493 /* Find the common root of file and dir (with a trailing slash). */
6494 afn = absolute_filename (file, cwd);
6495 fp = afn;
6496 dp = dir;
6497 while (*fp++ == *dp++)
6498 continue;
6499 fp--, dp--; /* back to the first differing char */
6500 #ifdef DOS_NT
6501 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6502 return afn;
6503 #endif
6504 do /* look at the equal chars until '/' */
6505 fp--, dp--;
6506 while (*fp != '/');
6508 /* Build a sequence of "../" strings for the resulting relative file name. */
6509 i = 0;
6510 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6511 i += 1;
6512 res = xnew (3*i + strlen (fp + 1) + 1, char);
6513 res[0] = '\0';
6514 while (i-- > 0)
6515 strcat (res, "../");
6517 /* Add the file name relative to the common root of file and dir. */
6518 strcat (res, fp + 1);
6519 free (afn);
6521 return res;
6524 /* Return a newly allocated string containing the absolute file name
6525 of FILE given DIR (which should end with a slash). */
6526 static char *
6527 absolute_filename (char *file, char *dir)
6529 char *slashp, *cp, *res;
6531 if (filename_is_absolute (file))
6532 res = savestr (file);
6533 #ifdef DOS_NT
6534 /* We don't support non-absolute file names with a drive
6535 letter, like `d:NAME' (it's too much hassle). */
6536 else if (file[1] == ':')
6537 fatal ("%s: relative file names with drive letters not supported", file);
6538 #endif
6539 else
6540 res = concat (dir, file, "");
6542 /* Delete the "/dirname/.." and "/." substrings. */
6543 slashp = etags_strchr (res, '/');
6544 while (slashp != NULL && slashp[0] != '\0')
6546 if (slashp[1] == '.')
6548 if (slashp[2] == '.'
6549 && (slashp[3] == '/' || slashp[3] == '\0'))
6551 cp = slashp;
6553 cp--;
6554 while (cp >= res && !filename_is_absolute (cp));
6555 if (cp < res)
6556 cp = slashp; /* the absolute name begins with "/.." */
6557 #ifdef DOS_NT
6558 /* Under MSDOS and NT we get `d:/NAME' as absolute
6559 file name, so the luser could say `d:/../NAME'.
6560 We silently treat this as `d:/NAME'. */
6561 else if (cp[0] != '/')
6562 cp = slashp;
6563 #endif
6564 memmove (cp, slashp + 3, strlen (slashp + 2));
6565 slashp = cp;
6566 continue;
6568 else if (slashp[2] == '/' || slashp[2] == '\0')
6570 memmove (slashp, slashp + 2, strlen (slashp + 1));
6571 continue;
6575 slashp = etags_strchr (slashp + 1, '/');
6578 if (res[0] == '\0') /* just a safety net: should never happen */
6580 free (res);
6581 return savestr ("/");
6583 else
6584 return res;
6587 /* Return a newly allocated string containing the absolute
6588 file name of dir where FILE resides given DIR (which should
6589 end with a slash). */
6590 static char *
6591 absolute_dirname (char *file, char *dir)
6593 char *slashp, *res;
6594 char save;
6596 slashp = etags_strrchr (file, '/');
6597 if (slashp == NULL)
6598 return savestr (dir);
6599 save = slashp[1];
6600 slashp[1] = '\0';
6601 res = absolute_filename (file, dir);
6602 slashp[1] = save;
6604 return res;
6607 /* Whether the argument string is an absolute file name. The argument
6608 string must have been canonicalized with canonicalize_filename. */
6609 static bool
6610 filename_is_absolute (char *fn)
6612 return (fn[0] == '/'
6613 #ifdef DOS_NT
6614 || (ISALPHA (fn[0]) && fn[1] == ':' && fn[2] == '/')
6615 #endif
6619 /* Downcase DOS drive letter and collapse separators into single slashes.
6620 Works in place. */
6621 static void
6622 canonicalize_filename (register char *fn)
6624 register char* cp;
6625 char sep = '/';
6627 #ifdef DOS_NT
6628 /* Canonicalize drive letter case. */
6629 # define ISUPPER(c) isupper (CHAR (c))
6630 if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6631 fn[0] = lowcase (fn[0]);
6633 sep = '\\';
6634 #endif
6636 /* Collapse multiple separators into a single slash. */
6637 for (cp = fn; *cp != '\0'; cp++, fn++)
6638 if (*cp == sep)
6640 *fn = '/';
6641 while (cp[1] == sep)
6642 cp++;
6644 else
6645 *fn = *cp;
6646 *fn = '\0';
6650 /* Initialize a linebuffer for use. */
6651 static void
6652 linebuffer_init (linebuffer *lbp)
6654 lbp->size = (DEBUG) ? 3 : 200;
6655 lbp->buffer = xnew (lbp->size, char);
6656 lbp->buffer[0] = '\0';
6657 lbp->len = 0;
6660 /* Set the minimum size of a string contained in a linebuffer. */
6661 static void
6662 linebuffer_setlen (linebuffer *lbp, int toksize)
6664 while (lbp->size <= toksize)
6666 lbp->size *= 2;
6667 xrnew (lbp->buffer, lbp->size, char);
6669 lbp->len = toksize;
6672 /* Like malloc but get fatal error if memory is exhausted. */
6673 static void *
6674 xmalloc (size_t size)
6676 void *result = malloc (size);
6677 if (result == NULL)
6678 fatal ("virtual memory exhausted", (char *)NULL);
6679 return result;
6682 static void *
6683 xrealloc (char *ptr, size_t size)
6685 void *result = realloc (ptr, size);
6686 if (result == NULL)
6687 fatal ("virtual memory exhausted", (char *)NULL);
6688 return result;
6692 * Local Variables:
6693 * indent-tabs-mode: t
6694 * tab-width: 8
6695 * fill-column: 79
6696 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6697 * c-file-style: "gnu"
6698 * End:
6701 /* etags.c ends here */