Large simplification in (vc-deduce-fileset) logic.
[emacs.git] / lib-src / etags.c
blob2fe0c6314fdb0381db2228a2e3c35dbc3189610f
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
33 Free Software Foundation, Inc.
35 This file is not considered part of GNU Emacs.
37 This program is free software; you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation; either version 3, or (at your option)
40 any later version.
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
47 You should have received a copy of the GNU General Public License
48 along with this program; see the file COPYING. If not, write to the
49 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
50 Boston, MA 02110-1301, USA. */
53 /* NB To comply with the above BSD license, copyright information is
54 reproduced in etc/ETAGS.README. That file should be updated when the
55 above notices are.
57 To the best of our knowledge, this code was originally based on the
58 ctags.c distributed with BSD4.2, which was copyrighted by the
59 University of California, as described above. */
63 * Authors:
64 * 1983 Ctags originally by Ken Arnold.
65 * 1984 Fortran added by Jim Kleckner.
66 * 1984 Ed Pelegri-Llopart added C typedefs.
67 * 1985 Emacs TAGS format by Richard Stallman.
68 * 1989 Sam Kendall added C++.
69 * 1992 Joseph B. Wells improved C and C++ parsing.
70 * 1993 Francesco Potortì reorganised C and C++.
71 * 1994 Line-by-line regexp tags by Tom Tromey.
72 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
73 * 2002 #line directives by Francesco Potortì.
75 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
79 * If you want to add support for a new language, start by looking at the LUA
80 * language, which is the simplest. Alternatively, consider shipping a
81 * configuration file containing regexp definitions for etags.
84 char pot_etags_version[] = "@(#) pot revision number is 17.38";
86 #define TRUE 1
87 #define FALSE 0
89 #ifdef DEBUG
90 # undef DEBUG
91 # define DEBUG TRUE
92 #else
93 # define DEBUG FALSE
94 # define NDEBUG /* disable assert */
95 #endif
97 #ifdef HAVE_CONFIG_H
98 # include <config.h>
99 /* On some systems, Emacs defines static as nothing for the sake
100 of unexec. We don't want that here since we don't use unexec. */
101 # undef static
102 # ifndef PTR /* for XEmacs */
103 # define PTR void *
104 # endif
105 # ifndef __P /* for XEmacs */
106 # define __P(args) args
107 # endif
108 #else /* no config.h */
109 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
110 # define __P(args) args /* use prototypes */
111 # define PTR void * /* for generic pointers */
112 # else /* not standard C */
113 # define __P(args) () /* no prototypes */
114 # define const /* remove const for old compilers' sake */
115 # define PTR long * /* don't use void* */
116 # endif
117 #endif /* !HAVE_CONFIG_H */
119 #ifndef _GNU_SOURCE
120 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
121 #endif
123 /* WIN32_NATIVE is for XEmacs.
124 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
125 #ifdef WIN32_NATIVE
126 # undef MSDOS
127 # undef WINDOWSNT
128 # define WINDOWSNT
129 #endif /* WIN32_NATIVE */
131 #ifdef MSDOS
132 # undef MSDOS
133 # define MSDOS TRUE
134 # include <fcntl.h>
135 # include <sys/param.h>
136 # include <io.h>
137 # ifndef HAVE_CONFIG_H
138 # define DOS_NT
139 # include <sys/config.h>
140 # endif
141 #else
142 # define MSDOS FALSE
143 #endif /* MSDOS */
145 #ifdef WINDOWSNT
146 # include <stdlib.h>
147 # include <fcntl.h>
148 # include <string.h>
149 # include <direct.h>
150 # include <io.h>
151 # define MAXPATHLEN _MAX_PATH
152 # undef HAVE_NTGUI
153 # undef DOS_NT
154 # define DOS_NT
155 # ifndef HAVE_GETCWD
156 # define HAVE_GETCWD
157 # endif /* undef HAVE_GETCWD */
158 #else /* not WINDOWSNT */
159 # ifdef STDC_HEADERS
160 # include <stdlib.h>
161 # include <string.h>
162 # else /* no standard C headers */
163 extern char *getenv __P((const char *));
164 extern char *strcpy __P((char *, const char *));
165 extern char *strncpy __P((char *, const char *, unsigned long));
166 extern char *strcat __P((char *, const char *));
167 extern char *strncat __P((char *, const char *, unsigned long));
168 extern int strcmp __P((const char *, const char *));
169 extern int strncmp __P((const char *, const char *, unsigned long));
170 extern int system __P((const char *));
171 extern unsigned long strlen __P((const char *));
172 extern void *malloc __P((unsigned long));
173 extern void *realloc __P((void *, unsigned long));
174 extern void exit __P((int));
175 extern void free __P((void *));
176 extern void *memmove __P((void *, const void *, unsigned long));
177 # ifdef VMS
178 # define EXIT_SUCCESS 1
179 # define EXIT_FAILURE 0
180 # else /* no VMS */
181 # define EXIT_SUCCESS 0
182 # define EXIT_FAILURE 1
183 # endif
184 # endif
185 #endif /* !WINDOWSNT */
187 #ifdef HAVE_UNISTD_H
188 # include <unistd.h>
189 #else
190 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
191 extern char *getcwd (char *buf, size_t size);
192 # endif
193 #endif /* HAVE_UNISTD_H */
195 #include <stdio.h>
196 #include <ctype.h>
197 #include <errno.h>
198 #ifndef errno
199 extern int errno;
200 #endif
201 #include <sys/types.h>
202 #include <sys/stat.h>
204 #include <assert.h>
205 #ifdef NDEBUG
206 # undef assert /* some systems have a buggy assert.h */
207 # define assert(x) ((void) 0)
208 #endif
210 #if !defined (S_ISREG) && defined (S_IFREG)
211 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
212 #endif
214 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
215 # define NO_LONG_OPTIONS TRUE
216 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
217 extern char *optarg;
218 extern int optind, opterr;
219 #else
220 # define NO_LONG_OPTIONS FALSE
221 # include <getopt.h>
222 #endif /* NO_LONG_OPTIONS */
224 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
225 # ifdef __CYGWIN__ /* compiling on Cygwin */
226 !!! NOTICE !!!
227 the regex.h distributed with Cygwin is not compatible with etags, alas!
228 If you want regular expression support, you should delete this notice and
229 arrange to use the GNU regex.h and regex.c.
230 # endif
231 #endif
232 #include <regex.h>
234 /* Define CTAGS to make the program "ctags" compatible with the usual one.
235 Leave it undefined to make the program "etags", which makes emacs-style
236 tag tables and tags typedefs, #defines and struct/union/enum by default. */
237 #ifdef CTAGS
238 # undef CTAGS
239 # define CTAGS TRUE
240 #else
241 # define CTAGS FALSE
242 #endif
244 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
245 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
246 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
247 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
249 #define CHARS 256 /* 2^sizeof(char) */
250 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
251 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
252 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
253 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
254 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
255 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
257 #define ISALNUM(c) isalnum (CHAR(c))
258 #define ISALPHA(c) isalpha (CHAR(c))
259 #define ISDIGIT(c) isdigit (CHAR(c))
260 #define ISLOWER(c) islower (CHAR(c))
262 #define lowcase(c) tolower (CHAR(c))
263 #define upcase(c) toupper (CHAR(c))
267 * xnew, xrnew -- allocate, reallocate storage
269 * SYNOPSIS: Type *xnew (int n, Type);
270 * void xrnew (OldPointer, int n, Type);
272 #if DEBUG
273 # include "chkmalloc.h"
274 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
275 (n) * sizeof (Type)))
276 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
277 (char *) (op), (n) * sizeof (Type)))
278 #else
279 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
280 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
281 (char *) (op), (n) * sizeof (Type)))
282 #endif
284 #define bool int
286 typedef void Lang_function __P((FILE *));
288 typedef struct
290 char *suffix; /* file name suffix for this compressor */
291 char *command; /* takes one arg and decompresses to stdout */
292 } compressor;
294 typedef struct
296 char *name; /* language name */
297 char *help; /* detailed help for the language */
298 Lang_function *function; /* parse function */
299 char **suffixes; /* name suffixes of this language's files */
300 char **filenames; /* names of this language's files */
301 char **interpreters; /* interpreters for this language */
302 bool metasource; /* source used to generate other sources */
303 } language;
305 typedef struct fdesc
307 struct fdesc *next; /* for the linked list */
308 char *infname; /* uncompressed input file name */
309 char *infabsname; /* absolute uncompressed input file name */
310 char *infabsdir; /* absolute dir of input file */
311 char *taggedfname; /* file name to write in tagfile */
312 language *lang; /* language of file */
313 char *prop; /* file properties to write in tagfile */
314 bool usecharno; /* etags tags shall contain char number */
315 bool written; /* entry written in the tags file */
316 } fdesc;
318 typedef struct node_st
319 { /* sorting structure */
320 struct node_st *left, *right; /* left and right sons */
321 fdesc *fdp; /* description of file to whom tag belongs */
322 char *name; /* tag name */
323 char *regex; /* search regexp */
324 bool valid; /* write this tag on the tag file */
325 bool is_func; /* function tag: use regexp in CTAGS mode */
326 bool been_warned; /* warning already given for duplicated tag */
327 int lno; /* line number tag is on */
328 long cno; /* character number line starts on */
329 } node;
332 * A `linebuffer' is a structure which holds a line of text.
333 * `readline_internal' reads a line from a stream into a linebuffer
334 * and works regardless of the length of the line.
335 * SIZE is the size of BUFFER, LEN is the length of the string in
336 * BUFFER after readline reads it.
338 typedef struct
340 long size;
341 int len;
342 char *buffer;
343 } linebuffer;
345 /* Used to support mixing of --lang and file names. */
346 typedef struct
348 enum {
349 at_language, /* a language specification */
350 at_regexp, /* a regular expression */
351 at_filename, /* a file name */
352 at_stdin, /* read from stdin here */
353 at_end /* stop parsing the list */
354 } arg_type; /* argument type */
355 language *lang; /* language associated with the argument */
356 char *what; /* the argument itself */
357 } argument;
359 /* Structure defining a regular expression. */
360 typedef struct regexp
362 struct regexp *p_next; /* pointer to next in list */
363 language *lang; /* if set, use only for this language */
364 char *pattern; /* the regexp pattern */
365 char *name; /* tag name */
366 struct re_pattern_buffer *pat; /* the compiled pattern */
367 struct re_registers regs; /* re registers */
368 bool error_signaled; /* already signaled for this regexp */
369 bool force_explicit_name; /* do not allow implict tag name */
370 bool ignore_case; /* ignore case when matching */
371 bool multi_line; /* do a multi-line match on the whole file */
372 } regexp;
375 /* Many compilers barf on this:
376 Lang_function Ada_funcs;
377 so let's write it this way */
378 static void Ada_funcs __P((FILE *));
379 static void Asm_labels __P((FILE *));
380 static void C_entries __P((int c_ext, FILE *));
381 static void default_C_entries __P((FILE *));
382 static void plain_C_entries __P((FILE *));
383 static void Cjava_entries __P((FILE *));
384 static void Cobol_paragraphs __P((FILE *));
385 static void Cplusplus_entries __P((FILE *));
386 static void Cstar_entries __P((FILE *));
387 static void Erlang_functions __P((FILE *));
388 static void Forth_words __P((FILE *));
389 static void Fortran_functions __P((FILE *));
390 static void HTML_labels __P((FILE *));
391 static void Lisp_functions __P((FILE *));
392 static void Lua_functions __P((FILE *));
393 static void Makefile_targets __P((FILE *));
394 static void Pascal_functions __P((FILE *));
395 static void Perl_functions __P((FILE *));
396 static void PHP_functions __P((FILE *));
397 static void PS_functions __P((FILE *));
398 static void Prolog_functions __P((FILE *));
399 static void Python_functions __P((FILE *));
400 static void Scheme_functions __P((FILE *));
401 static void TeX_commands __P((FILE *));
402 static void Texinfo_nodes __P((FILE *));
403 static void Yacc_entries __P((FILE *));
404 static void just_read_file __P((FILE *));
406 static void print_language_names __P((void));
407 static void print_version __P((void));
408 static void print_help __P((argument *));
409 int main __P((int, char **));
411 static compressor *get_compressor_from_suffix __P((char *, char **));
412 static language *get_language_from_langname __P((const char *));
413 static language *get_language_from_interpreter __P((char *));
414 static language *get_language_from_filename __P((char *, bool));
415 static void readline __P((linebuffer *, FILE *));
416 static long readline_internal __P((linebuffer *, FILE *));
417 static bool nocase_tail __P((char *));
418 static void get_tag __P((char *, char **));
420 static void analyse_regex __P((char *));
421 static void free_regexps __P((void));
422 static void regex_tag_multiline __P((void));
423 static void error __P((const char *, const char *));
424 static void suggest_asking_for_help __P((void));
425 void fatal __P((char *, char *));
426 static void pfatal __P((char *));
427 static void add_node __P((node *, node **));
429 static void init __P((void));
430 static void process_file_name __P((char *, language *));
431 static void process_file __P((FILE *, char *, language *));
432 static void find_entries __P((FILE *));
433 static void free_tree __P((node *));
434 static void free_fdesc __P((fdesc *));
435 static void pfnote __P((char *, bool, char *, int, int, long));
436 static void make_tag __P((char *, int, bool, char *, int, int, long));
437 static void invalidate_nodes __P((fdesc *, node **));
438 static void put_entries __P((node *));
440 static char *concat __P((char *, char *, char *));
441 static char *skip_spaces __P((char *));
442 static char *skip_non_spaces __P((char *));
443 static char *savenstr __P((char *, int));
444 static char *savestr __P((char *));
445 static char *etags_strchr __P((const char *, int));
446 static char *etags_strrchr __P((const char *, int));
447 static int etags_strcasecmp __P((const char *, const char *));
448 static int etags_strncasecmp __P((const char *, const char *, int));
449 static char *etags_getcwd __P((void));
450 static char *relative_filename __P((char *, char *));
451 static char *absolute_filename __P((char *, char *));
452 static char *absolute_dirname __P((char *, char *));
453 static bool filename_is_absolute __P((char *f));
454 static void canonicalize_filename __P((char *));
455 static void linebuffer_init __P((linebuffer *));
456 static void linebuffer_setlen __P((linebuffer *, int));
457 static PTR xmalloc __P((unsigned int));
458 static PTR xrealloc __P((char *, unsigned int));
461 static char searchar = '/'; /* use /.../ searches */
463 static char *tagfile; /* output file */
464 static char *progname; /* name this program was invoked with */
465 static char *cwd; /* current working directory */
466 static char *tagfiledir; /* directory of tagfile */
467 static FILE *tagf; /* ioptr for tags file */
469 static fdesc *fdhead; /* head of file description list */
470 static fdesc *curfdp; /* current file description */
471 static int lineno; /* line number of current line */
472 static long charno; /* current character number */
473 static long linecharno; /* charno of start of current line */
474 static char *dbp; /* pointer to start of current tag */
476 static const int invalidcharno = -1;
478 static node *nodehead; /* the head of the binary tree of tags */
479 static node *last_node; /* the last node created */
481 static linebuffer lb; /* the current line */
482 static linebuffer filebuf; /* a buffer containing the whole file */
483 static linebuffer token_name; /* a buffer containing a tag name */
485 /* boolean "functions" (see init) */
486 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
487 static char
488 /* white chars */
489 *white = " \f\t\n\r\v",
490 /* not in a name */
491 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
492 /* token ending chars */
493 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
494 /* token starting chars */
495 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
496 /* valid in-token chars */
497 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
499 static bool append_to_tagfile; /* -a: append to tags */
500 /* The next five default to TRUE in C and derived languages. */
501 static bool typedefs; /* -t: create tags for C and Ada typedefs */
502 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
503 /* 0 struct/enum/union decls, and C++ */
504 /* member functions. */
505 static bool constantypedefs; /* -d: create tags for C #define, enum */
506 /* constants and variables. */
507 /* -D: opposite of -d. Default under ctags. */
508 static bool globals; /* create tags for global variables */
509 static bool members; /* create tags for C member variables */
510 static bool declarations; /* --declarations: tag them and extern in C&Co*/
511 static bool no_line_directive; /* ignore #line directives (undocumented) */
512 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
513 static bool update; /* -u: update tags */
514 static bool vgrind_style; /* -v: create vgrind style index output */
515 static bool no_warnings; /* -w: suppress warnings (undocumented) */
516 static bool cxref_style; /* -x: create cxref style output */
517 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
518 static bool ignoreindent; /* -I: ignore indentation in C */
519 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
521 /* STDIN is defined in LynxOS system headers */
522 #ifdef STDIN
523 # undef STDIN
524 #endif
526 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
527 static bool parsing_stdin; /* --parse-stdin used */
529 static regexp *p_head; /* list of all regexps */
530 static bool need_filebuf; /* some regexes are multi-line */
532 static struct option longopts[] =
534 { "append", no_argument, NULL, 'a' },
535 { "packages-only", no_argument, &packages_only, TRUE },
536 { "c++", no_argument, NULL, 'C' },
537 { "declarations", no_argument, &declarations, TRUE },
538 { "no-line-directive", no_argument, &no_line_directive, TRUE },
539 { "no-duplicates", no_argument, &no_duplicates, TRUE },
540 { "help", no_argument, NULL, 'h' },
541 { "help", no_argument, NULL, 'H' },
542 { "ignore-indentation", no_argument, NULL, 'I' },
543 { "language", required_argument, NULL, 'l' },
544 { "members", no_argument, &members, TRUE },
545 { "no-members", no_argument, &members, FALSE },
546 { "output", required_argument, NULL, 'o' },
547 { "regex", required_argument, NULL, 'r' },
548 { "no-regex", no_argument, NULL, 'R' },
549 { "ignore-case-regex", required_argument, NULL, 'c' },
550 { "parse-stdin", required_argument, NULL, STDIN },
551 { "version", no_argument, NULL, 'V' },
553 #if CTAGS /* Ctags options */
554 { "backward-search", no_argument, NULL, 'B' },
555 { "cxref", no_argument, NULL, 'x' },
556 { "defines", no_argument, NULL, 'd' },
557 { "globals", no_argument, &globals, TRUE },
558 { "typedefs", no_argument, NULL, 't' },
559 { "typedefs-and-c++", no_argument, NULL, 'T' },
560 { "update", no_argument, NULL, 'u' },
561 { "vgrind", no_argument, NULL, 'v' },
562 { "no-warn", no_argument, NULL, 'w' },
564 #else /* Etags options */
565 { "no-defines", no_argument, NULL, 'D' },
566 { "no-globals", no_argument, &globals, FALSE },
567 { "include", required_argument, NULL, 'i' },
568 #endif
569 { NULL }
572 static compressor compressors[] =
574 { "z", "gzip -d -c"},
575 { "Z", "gzip -d -c"},
576 { "gz", "gzip -d -c"},
577 { "GZ", "gzip -d -c"},
578 { "bz2", "bzip2 -d -c" },
579 { NULL }
583 * Language stuff.
586 /* Ada code */
587 static char *Ada_suffixes [] =
588 { "ads", "adb", "ada", NULL };
589 static char Ada_help [] =
590 "In Ada code, functions, procedures, packages, tasks and types are\n\
591 tags. Use the `--packages-only' option to create tags for\n\
592 packages only.\n\
593 Ada tag names have suffixes indicating the type of entity:\n\
594 Entity type: Qualifier:\n\
595 ------------ ----------\n\
596 function /f\n\
597 procedure /p\n\
598 package spec /s\n\
599 package body /b\n\
600 type /t\n\
601 task /k\n\
602 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
603 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
604 will just search for any tag `bidule'.";
606 /* Assembly code */
607 static char *Asm_suffixes [] =
608 { "a", /* Unix assembler */
609 "asm", /* Microcontroller assembly */
610 "def", /* BSO/Tasking definition includes */
611 "inc", /* Microcontroller include files */
612 "ins", /* Microcontroller include files */
613 "s", "sa", /* Unix assembler */
614 "S", /* cpp-processed Unix assembler */
615 "src", /* BSO/Tasking C compiler output */
616 NULL
618 static char Asm_help [] =
619 "In assembler code, labels appearing at the beginning of a line,\n\
620 followed by a colon, are tags.";
623 /* Note that .c and .h can be considered C++, if the --c++ flag was
624 given, or if the `class' or `template' keywords are met inside the file.
625 That is why default_C_entries is called for these. */
626 static char *default_C_suffixes [] =
627 { "c", "h", NULL };
628 #if CTAGS /* C help for Ctags */
629 static char default_C_help [] =
630 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
631 Use -T to tag definitions of `struct', `union' and `enum'.\n\
632 Use -d to tag `#define' macro definitions and `enum' constants.\n\
633 Use --globals to tag global variables.\n\
634 You can tag function declarations and external variables by\n\
635 using `--declarations', and struct members by using `--members'.";
636 #else /* C help for Etags */
637 static char default_C_help [] =
638 "In C code, any C function or typedef is a tag, and so are\n\
639 definitions of `struct', `union' and `enum'. `#define' macro\n\
640 definitions and `enum' constants are tags unless you specify\n\
641 `--no-defines'. Global variables are tags unless you specify\n\
642 `--no-globals' and so are struct members unless you specify\n\
643 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
644 `--no-members' can make the tags table file much smaller.\n\
645 You can tag function declarations and external variables by\n\
646 using `--declarations'.";
647 #endif /* C help for Ctags and Etags */
649 static char *Cplusplus_suffixes [] =
650 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
651 "M", /* Objective C++ */
652 "pdb", /* Postscript with C syntax */
653 NULL };
654 static char Cplusplus_help [] =
655 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
656 --help --lang=c --lang=c++ for full help.)\n\
657 In addition to C tags, member functions are also recognized. Member\n\
658 variables are recognized unless you use the `--no-members' option.\n\
659 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
660 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
661 `operator+'.";
663 static char *Cjava_suffixes [] =
664 { "java", NULL };
665 static char Cjava_help [] =
666 "In Java code, all the tags constructs of C and C++ code are\n\
667 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
670 static char *Cobol_suffixes [] =
671 { "COB", "cob", NULL };
672 static char Cobol_help [] =
673 "In Cobol code, tags are paragraph names; that is, any word\n\
674 starting in column 8 and followed by a period.";
676 static char *Cstar_suffixes [] =
677 { "cs", "hs", NULL };
679 static char *Erlang_suffixes [] =
680 { "erl", "hrl", NULL };
681 static char Erlang_help [] =
682 "In Erlang code, the tags are the functions, records and macros\n\
683 defined in the file.";
685 char *Forth_suffixes [] =
686 { "fth", "tok", NULL };
687 static char Forth_help [] =
688 "In Forth code, tags are words defined by `:',\n\
689 constant, code, create, defer, value, variable, buffer:, field.";
691 static char *Fortran_suffixes [] =
692 { "F", "f", "f90", "for", NULL };
693 static char Fortran_help [] =
694 "In Fortran code, functions, subroutines and block data are tags.";
696 static char *HTML_suffixes [] =
697 { "htm", "html", "shtml", NULL };
698 static char HTML_help [] =
699 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
700 `h3' headers. Also, tags are `name=' in anchors and all\n\
701 occurrences of `id='.";
703 static char *Lisp_suffixes [] =
704 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
705 static char Lisp_help [] =
706 "In Lisp code, any function defined with `defun', any variable\n\
707 defined with `defvar' or `defconst', and in general the first\n\
708 argument of any expression that starts with `(def' in column zero\n\
709 is a tag.";
711 static char *Lua_suffixes [] =
712 { "lua", "LUA", NULL };
713 static char Lua_help [] =
714 "In Lua scripts, all functions are tags.";
716 static char *Makefile_filenames [] =
717 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
718 static char Makefile_help [] =
719 "In makefiles, targets are tags; additionally, variables are tags\n\
720 unless you specify `--no-globals'.";
722 static char *Objc_suffixes [] =
723 { "lm", /* Objective lex file */
724 "m", /* Objective C file */
725 NULL };
726 static char Objc_help [] =
727 "In Objective C code, tags include Objective C definitions for classes,\n\
728 class categories, methods and protocols. Tags for variables and\n\
729 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
730 (Use --help --lang=c --lang=objc --lang=java for full help.)";
732 static char *Pascal_suffixes [] =
733 { "p", "pas", NULL };
734 static char Pascal_help [] =
735 "In Pascal code, the tags are the functions and procedures defined\n\
736 in the file.";
737 /* " // this is for working around an Emacs highlighting bug... */
739 static char *Perl_suffixes [] =
740 { "pl", "pm", NULL };
741 static char *Perl_interpreters [] =
742 { "perl", "@PERL@", NULL };
743 static char Perl_help [] =
744 "In Perl code, the tags are the packages, subroutines and variables\n\
745 defined by the `package', `sub', `my' and `local' keywords. Use\n\
746 `--globals' if you want to tag global variables. Tags for\n\
747 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
748 defined in the default package is `main::SUB'.";
750 static char *PHP_suffixes [] =
751 { "php", "php3", "php4", NULL };
752 static char PHP_help [] =
753 "In PHP code, tags are functions, classes and defines. Unless you use\n\
754 the `--no-members' option, vars are tags too.";
756 static char *plain_C_suffixes [] =
757 { "pc", /* Pro*C file */
758 NULL };
760 static char *PS_suffixes [] =
761 { "ps", "psw", NULL }; /* .psw is for PSWrap */
762 static char PS_help [] =
763 "In PostScript code, the tags are the functions.";
765 static char *Prolog_suffixes [] =
766 { "prolog", NULL };
767 static char Prolog_help [] =
768 "In Prolog code, tags are predicates and rules at the beginning of\n\
769 line.";
771 static char *Python_suffixes [] =
772 { "py", NULL };
773 static char Python_help [] =
774 "In Python code, `def' or `class' at the beginning of a line\n\
775 generate a tag.";
777 /* Can't do the `SCM' or `scm' prefix with a version number. */
778 static char *Scheme_suffixes [] =
779 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
780 static char Scheme_help [] =
781 "In Scheme code, tags include anything defined with `def' or with a\n\
782 construct whose name starts with `def'. They also include\n\
783 variables set with `set!' at top level in the file.";
785 static char *TeX_suffixes [] =
786 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
787 static char TeX_help [] =
788 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
789 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
790 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
791 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
792 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
794 Other commands can be specified by setting the environment variable\n\
795 `TEXTAGS' to a colon-separated list like, for example,\n\
796 TEXTAGS=\"mycommand:myothercommand\".";
799 static char *Texinfo_suffixes [] =
800 { "texi", "texinfo", "txi", NULL };
801 static char Texinfo_help [] =
802 "for texinfo files, lines starting with @node are tagged.";
804 static char *Yacc_suffixes [] =
805 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
806 static char Yacc_help [] =
807 "In Bison or Yacc input files, each rule defines as a tag the\n\
808 nonterminal it constructs. The portions of the file that contain\n\
809 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
810 for full help).";
812 static char auto_help [] =
813 "`auto' is not a real language, it indicates to use\n\
814 a default language for files base on file name suffix and file contents.";
816 static char none_help [] =
817 "`none' is not a real language, it indicates to only do\n\
818 regexp processing on files.";
820 static char no_lang_help [] =
821 "No detailed help available for this language.";
825 * Table of languages.
827 * It is ok for a given function to be listed under more than one
828 * name. I just didn't.
831 static language lang_names [] =
833 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
834 { "asm", Asm_help, Asm_labels, Asm_suffixes },
835 { "c", default_C_help, default_C_entries, default_C_suffixes },
836 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
837 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
838 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
839 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
840 { "forth", Forth_help, Forth_words, Forth_suffixes },
841 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
842 { "html", HTML_help, HTML_labels, HTML_suffixes },
843 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
844 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
845 { "lua", Lua_help, Lua_functions, Lua_suffixes },
846 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
847 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
848 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
849 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
850 { "php", PHP_help, PHP_functions, PHP_suffixes },
851 { "postscript",PS_help, PS_functions, PS_suffixes },
852 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
853 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
854 { "python", Python_help, Python_functions, Python_suffixes },
855 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
856 { "tex", TeX_help, TeX_commands, TeX_suffixes },
857 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
858 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
859 { "auto", auto_help }, /* default guessing scheme */
860 { "none", none_help, just_read_file }, /* regexp matching only */
861 { NULL } /* end of list */
865 static void
866 print_language_names ()
868 language *lang;
869 char **name, **ext;
871 puts ("\nThese are the currently supported languages, along with the\n\
872 default file names and dot suffixes:");
873 for (lang = lang_names; lang->name != NULL; lang++)
875 printf (" %-*s", 10, lang->name);
876 if (lang->filenames != NULL)
877 for (name = lang->filenames; *name != NULL; name++)
878 printf (" %s", *name);
879 if (lang->suffixes != NULL)
880 for (ext = lang->suffixes; *ext != NULL; ext++)
881 printf (" .%s", *ext);
882 puts ("");
884 puts ("where `auto' means use default language for files based on file\n\
885 name suffix, and `none' means only do regexp processing on files.\n\
886 If no language is specified and no matching suffix is found,\n\
887 the first line of the file is read for a sharp-bang (#!) sequence\n\
888 followed by the name of an interpreter. If no such sequence is found,\n\
889 Fortran is tried first; if no tags are found, C is tried next.\n\
890 When parsing any C file, a \"class\" or \"template\" keyword\n\
891 switches to C++.");
892 puts ("Compressed files are supported using gzip and bzip2.\n\
894 For detailed help on a given language use, for example,\n\
895 etags --help --lang=ada.");
898 #ifndef EMACS_NAME
899 # define EMACS_NAME "standalone"
900 #endif
901 #ifndef VERSION
902 # define VERSION "17.38"
903 #endif
904 static void
905 print_version ()
907 /* Makes it easier to update automatically. */
908 char emacs_copyright[] = "Copyright (C) 2008 Free Software Foundation, Inc.";
910 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
911 puts (emacs_copyright);
912 puts ("This program is distributed under the terms in ETAGS.README");
914 exit (EXIT_SUCCESS);
917 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
918 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
919 #endif
921 static void
922 print_help (argbuffer)
923 argument *argbuffer;
925 bool help_for_lang = FALSE;
927 for (; argbuffer->arg_type != at_end; argbuffer++)
928 if (argbuffer->arg_type == at_language)
930 if (help_for_lang)
931 puts ("");
932 puts (argbuffer->lang->help);
933 help_for_lang = TRUE;
936 if (help_for_lang)
937 exit (EXIT_SUCCESS);
939 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
941 These are the options accepted by %s.\n", progname, progname);
942 if (NO_LONG_OPTIONS)
943 puts ("WARNING: long option names do not work with this executable,\n\
944 as it is not linked with GNU getopt.");
945 else
946 puts ("You may use unambiguous abbreviations for the long option names.");
947 puts (" A - as file name means read names from stdin (one per line).\n\
948 Absolute names are stored in the output file as they are.\n\
949 Relative ones are stored relative to the output file's directory.\n");
951 puts ("-a, --append\n\
952 Append tag entries to existing tags file.");
954 puts ("--packages-only\n\
955 For Ada files, only generate tags for packages.");
957 if (CTAGS)
958 puts ("-B, --backward-search\n\
959 Write the search commands for the tag entries using '?', the\n\
960 backward-search command instead of '/', the forward-search command.");
962 /* This option is mostly obsolete, because etags can now automatically
963 detect C++. Retained for backward compatibility and for debugging and
964 experimentation. In principle, we could want to tag as C++ even
965 before any "class" or "template" keyword.
966 puts ("-C, --c++\n\
967 Treat files whose name suffix defaults to C language as C++ files.");
970 puts ("--declarations\n\
971 In C and derived languages, create tags for function declarations,");
972 if (CTAGS)
973 puts ("\tand create tags for extern variables if --globals is used.");
974 else
975 puts
976 ("\tand create tags for extern variables unless --no-globals is used.");
978 if (CTAGS)
979 puts ("-d, --defines\n\
980 Create tag entries for C #define constants and enum constants, too.");
981 else
982 puts ("-D, --no-defines\n\
983 Don't create tag entries for C #define constants and enum constants.\n\
984 This makes the tags file smaller.");
986 if (!CTAGS)
987 puts ("-i FILE, --include=FILE\n\
988 Include a note in tag file indicating that, when searching for\n\
989 a tag, one should also consult the tags file FILE after\n\
990 checking the current file.");
992 puts ("-l LANG, --language=LANG\n\
993 Force the following files to be considered as written in the\n\
994 named language up to the next --language=LANG option.");
996 if (CTAGS)
997 puts ("--globals\n\
998 Create tag entries for global variables in some languages.");
999 else
1000 puts ("--no-globals\n\
1001 Do not create tag entries for global variables in some\n\
1002 languages. This makes the tags file smaller.");
1004 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1005 puts ("--no-line-directive\n\
1006 Ignore #line preprocessor directives in C and derived languages.");
1008 if (CTAGS)
1009 puts ("--members\n\
1010 Create tag entries for members of structures in some languages.");
1011 else
1012 puts ("--no-members\n\
1013 Do not create tag entries for members of structures\n\
1014 in some languages.");
1016 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1017 Make a tag for each line matching a regular expression pattern\n\
1018 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1019 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1020 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1021 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1022 puts (" If TAGNAME/ is present, the tags created are named.\n\
1023 For example Tcl named tags can be created with:\n\
1024 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1025 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1026 `m' means to allow multi-line matches, `s' implies `m' and\n\
1027 causes dot to match any character, including newline.");
1029 puts ("-R, --no-regex\n\
1030 Don't create tags from regexps for the following files.");
1032 puts ("-I, --ignore-indentation\n\
1033 In C and C++ do not assume that a closing brace in the first\n\
1034 column is the final brace of a function or structure definition.");
1036 puts ("-o FILE, --output=FILE\n\
1037 Write the tags to FILE.");
1039 puts ("--parse-stdin=NAME\n\
1040 Read from standard input and record tags as belonging to file NAME.");
1042 if (CTAGS)
1044 puts ("-t, --typedefs\n\
1045 Generate tag entries for C and Ada typedefs.");
1046 puts ("-T, --typedefs-and-c++\n\
1047 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1048 and C++ member functions.");
1051 if (CTAGS)
1052 puts ("-u, --update\n\
1053 Update the tag entries for the given files, leaving tag\n\
1054 entries for other files in place. Currently, this is\n\
1055 implemented by deleting the existing entries for the given\n\
1056 files and then rewriting the new entries at the end of the\n\
1057 tags file. It is often faster to simply rebuild the entire\n\
1058 tag file than to use this.");
1060 if (CTAGS)
1062 puts ("-v, --vgrind\n\
1063 Print on the standard output an index of items intended for\n\
1064 human consumption, similar to the output of vgrind. The index\n\
1065 is sorted, and gives the page number of each item.");
1067 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1068 puts ("-w, --no-duplicates\n\
1069 Do not create duplicate tag entries, for compatibility with\n\
1070 traditional ctags.");
1072 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1073 puts ("-w, --no-warn\n\
1074 Suppress warning messages about duplicate tag entries.");
1076 puts ("-x, --cxref\n\
1077 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1078 The output uses line numbers instead of page numbers, but\n\
1079 beyond that the differences are cosmetic; try both to see\n\
1080 which you like.");
1083 puts ("-V, --version\n\
1084 Print the version of the program.\n\
1085 -h, --help\n\
1086 Print this help message.\n\
1087 Followed by one or more `--language' options prints detailed\n\
1088 help about tag generation for the specified languages.");
1090 print_language_names ();
1092 puts ("");
1093 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1095 exit (EXIT_SUCCESS);
1099 #ifdef VMS /* VMS specific functions */
1101 #define EOS '\0'
1103 /* This is a BUG! ANY arbitrary limit is a BUG!
1104 Won't someone please fix this? */
1105 #define MAX_FILE_SPEC_LEN 255
1106 typedef struct {
1107 short curlen;
1108 char body[MAX_FILE_SPEC_LEN + 1];
1109 } vspec;
1112 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1113 returning in each successive call the next file name matching the input
1114 spec. The function expects that each in_spec passed
1115 to it will be processed to completion; in particular, up to and
1116 including the call following that in which the last matching name
1117 is returned, the function ignores the value of in_spec, and will
1118 only start processing a new spec with the following call.
1119 If an error occurs, on return out_spec contains the value
1120 of in_spec when the error occurred.
1122 With each successive file name returned in out_spec, the
1123 function's return value is one. When there are no more matching
1124 names the function returns zero. If on the first call no file
1125 matches in_spec, or there is any other error, -1 is returned.
1128 #include <rmsdef.h>
1129 #include <descrip.h>
1130 #define OUTSIZE MAX_FILE_SPEC_LEN
1131 static short
1132 fn_exp (out, in)
1133 vspec *out;
1134 char *in;
1136 static long context = 0;
1137 static struct dsc$descriptor_s o;
1138 static struct dsc$descriptor_s i;
1139 static bool pass1 = TRUE;
1140 long status;
1141 short retval;
1143 if (pass1)
1145 pass1 = FALSE;
1146 o.dsc$a_pointer = (char *) out;
1147 o.dsc$w_length = (short)OUTSIZE;
1148 i.dsc$a_pointer = in;
1149 i.dsc$w_length = (short)strlen(in);
1150 i.dsc$b_dtype = DSC$K_DTYPE_T;
1151 i.dsc$b_class = DSC$K_CLASS_S;
1152 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1153 o.dsc$b_class = DSC$K_CLASS_VS;
1155 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1157 out->body[out->curlen] = EOS;
1158 return 1;
1160 else if (status == RMS$_NMF)
1161 retval = 0;
1162 else
1164 strcpy(out->body, in);
1165 retval = -1;
1167 lib$find_file_end(&context);
1168 pass1 = TRUE;
1169 return retval;
1173 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1174 name of each file specified by the provided arg expanding wildcards.
1176 static char *
1177 gfnames (arg, p_error)
1178 char *arg;
1179 bool *p_error;
1181 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1183 switch (fn_exp (&filename, arg))
1185 case 1:
1186 *p_error = FALSE;
1187 return filename.body;
1188 case 0:
1189 *p_error = FALSE;
1190 return NULL;
1191 default:
1192 *p_error = TRUE;
1193 return filename.body;
1197 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1198 system (cmd)
1199 char *cmd;
1201 error ("%s", "system() function not implemented under VMS");
1203 #endif
1205 #define VERSION_DELIM ';'
1206 char *massage_name (s)
1207 char *s;
1209 char *start = s;
1211 for ( ; *s; s++)
1212 if (*s == VERSION_DELIM)
1214 *s = EOS;
1215 break;
1217 else
1218 *s = lowcase (*s);
1219 return start;
1221 #endif /* VMS */
1225 main (argc, argv)
1226 int argc;
1227 char *argv[];
1229 int i;
1230 unsigned int nincluded_files;
1231 char **included_files;
1232 argument *argbuffer;
1233 int current_arg, file_count;
1234 linebuffer filename_lb;
1235 bool help_asked = FALSE;
1236 #ifdef VMS
1237 bool got_err;
1238 #endif
1239 char *optstring;
1240 int opt;
1243 #ifdef DOS_NT
1244 _fmode = O_BINARY; /* all of files are treated as binary files */
1245 #endif /* DOS_NT */
1247 progname = argv[0];
1248 nincluded_files = 0;
1249 included_files = xnew (argc, char *);
1250 current_arg = 0;
1251 file_count = 0;
1253 /* Allocate enough no matter what happens. Overkill, but each one
1254 is small. */
1255 argbuffer = xnew (argc, argument);
1258 * Always find typedefs and structure tags.
1259 * Also default to find macro constants, enum constants, struct
1260 * members and global variables. Do it for both etags and ctags.
1262 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1263 globals = members = TRUE;
1265 /* When the optstring begins with a '-' getopt_long does not rearrange the
1266 non-options arguments to be at the end, but leaves them alone. */
1267 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1268 "ac:Cf:Il:o:r:RSVhH",
1269 (CTAGS) ? "BxdtTuvw" : "Di:");
1271 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1272 switch (opt)
1274 case 0:
1275 /* If getopt returns 0, then it has already processed a
1276 long-named option. We should do nothing. */
1277 break;
1279 case 1:
1280 /* This means that a file name has been seen. Record it. */
1281 argbuffer[current_arg].arg_type = at_filename;
1282 argbuffer[current_arg].what = optarg;
1283 ++current_arg;
1284 ++file_count;
1285 break;
1287 case STDIN:
1288 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1289 argbuffer[current_arg].arg_type = at_stdin;
1290 argbuffer[current_arg].what = optarg;
1291 ++current_arg;
1292 ++file_count;
1293 if (parsing_stdin)
1294 fatal ("cannot parse standard input more than once", (char *)NULL);
1295 parsing_stdin = TRUE;
1296 break;
1298 /* Common options. */
1299 case 'a': append_to_tagfile = TRUE; break;
1300 case 'C': cplusplus = TRUE; break;
1301 case 'f': /* for compatibility with old makefiles */
1302 case 'o':
1303 if (tagfile)
1305 error ("-o option may only be given once.", (char *)NULL);
1306 suggest_asking_for_help ();
1307 /* NOTREACHED */
1309 tagfile = optarg;
1310 break;
1311 case 'I':
1312 case 'S': /* for backward compatibility */
1313 ignoreindent = TRUE;
1314 break;
1315 case 'l':
1317 language *lang = get_language_from_langname (optarg);
1318 if (lang != NULL)
1320 argbuffer[current_arg].lang = lang;
1321 argbuffer[current_arg].arg_type = at_language;
1322 ++current_arg;
1325 break;
1326 case 'c':
1327 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1328 optarg = concat (optarg, "i", ""); /* memory leak here */
1329 /* FALLTHRU */
1330 case 'r':
1331 argbuffer[current_arg].arg_type = at_regexp;
1332 argbuffer[current_arg].what = optarg;
1333 ++current_arg;
1334 break;
1335 case 'R':
1336 argbuffer[current_arg].arg_type = at_regexp;
1337 argbuffer[current_arg].what = NULL;
1338 ++current_arg;
1339 break;
1340 case 'V':
1341 print_version ();
1342 break;
1343 case 'h':
1344 case 'H':
1345 help_asked = TRUE;
1346 break;
1348 /* Etags options */
1349 case 'D': constantypedefs = FALSE; break;
1350 case 'i': included_files[nincluded_files++] = optarg; break;
1352 /* Ctags options. */
1353 case 'B': searchar = '?'; break;
1354 case 'd': constantypedefs = TRUE; break;
1355 case 't': typedefs = TRUE; break;
1356 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1357 case 'u': update = TRUE; break;
1358 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1359 case 'x': cxref_style = TRUE; break;
1360 case 'w': no_warnings = TRUE; break;
1361 default:
1362 suggest_asking_for_help ();
1363 /* NOTREACHED */
1366 /* No more options. Store the rest of arguments. */
1367 for (; optind < argc; optind++)
1369 argbuffer[current_arg].arg_type = at_filename;
1370 argbuffer[current_arg].what = argv[optind];
1371 ++current_arg;
1372 ++file_count;
1375 argbuffer[current_arg].arg_type = at_end;
1377 if (help_asked)
1378 print_help (argbuffer);
1379 /* NOTREACHED */
1381 if (nincluded_files == 0 && file_count == 0)
1383 error ("no input files specified.", (char *)NULL);
1384 suggest_asking_for_help ();
1385 /* NOTREACHED */
1388 if (tagfile == NULL)
1389 tagfile = CTAGS ? "tags" : "TAGS";
1390 cwd = etags_getcwd (); /* the current working directory */
1391 if (cwd[strlen (cwd) - 1] != '/')
1393 char *oldcwd = cwd;
1394 cwd = concat (oldcwd, "/", "");
1395 free (oldcwd);
1397 /* Relative file names are made relative to the current directory. */
1398 if (streq (tagfile, "-")
1399 || strneq (tagfile, "/dev/", 5))
1400 tagfiledir = cwd;
1401 else
1402 tagfiledir = absolute_dirname (tagfile, cwd);
1404 init (); /* set up boolean "functions" */
1406 linebuffer_init (&lb);
1407 linebuffer_init (&filename_lb);
1408 linebuffer_init (&filebuf);
1409 linebuffer_init (&token_name);
1411 if (!CTAGS)
1413 if (streq (tagfile, "-"))
1415 tagf = stdout;
1416 #ifdef DOS_NT
1417 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1418 doesn't take effect until after `stdout' is already open). */
1419 if (!isatty (fileno (stdout)))
1420 setmode (fileno (stdout), O_BINARY);
1421 #endif /* DOS_NT */
1423 else
1424 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1425 if (tagf == NULL)
1426 pfatal (tagfile);
1430 * Loop through files finding functions.
1432 for (i = 0; i < current_arg; i++)
1434 static language *lang; /* non-NULL if language is forced */
1435 char *this_file;
1437 switch (argbuffer[i].arg_type)
1439 case at_language:
1440 lang = argbuffer[i].lang;
1441 break;
1442 case at_regexp:
1443 analyse_regex (argbuffer[i].what);
1444 break;
1445 case at_filename:
1446 #ifdef VMS
1447 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1449 if (got_err)
1451 error ("can't find file %s\n", this_file);
1452 argc--, argv++;
1454 else
1456 this_file = massage_name (this_file);
1458 #else
1459 this_file = argbuffer[i].what;
1460 #endif
1461 /* Input file named "-" means read file names from stdin
1462 (one per line) and use them. */
1463 if (streq (this_file, "-"))
1465 if (parsing_stdin)
1466 fatal ("cannot parse standard input AND read file names from it",
1467 (char *)NULL);
1468 while (readline_internal (&filename_lb, stdin) > 0)
1469 process_file_name (filename_lb.buffer, lang);
1471 else
1472 process_file_name (this_file, lang);
1473 #ifdef VMS
1475 #endif
1476 break;
1477 case at_stdin:
1478 this_file = argbuffer[i].what;
1479 process_file (stdin, this_file, lang);
1480 break;
1484 free_regexps ();
1485 free (lb.buffer);
1486 free (filebuf.buffer);
1487 free (token_name.buffer);
1489 if (!CTAGS || cxref_style)
1491 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1492 put_entries (nodehead);
1493 free_tree (nodehead);
1494 nodehead = NULL;
1495 if (!CTAGS)
1497 fdesc *fdp;
1499 /* Output file entries that have no tags. */
1500 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1501 if (!fdp->written)
1502 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1504 while (nincluded_files-- > 0)
1505 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1507 if (fclose (tagf) == EOF)
1508 pfatal (tagfile);
1511 exit (EXIT_SUCCESS);
1514 /* From here on, we are in (CTAGS && !cxref_style) */
1515 if (update)
1517 char cmd[BUFSIZ];
1518 for (i = 0; i < current_arg; ++i)
1520 switch (argbuffer[i].arg_type)
1522 case at_filename:
1523 case at_stdin:
1524 break;
1525 default:
1526 continue; /* the for loop */
1528 sprintf (cmd,
1529 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1530 tagfile, argbuffer[i].what, tagfile);
1531 if (system (cmd) != EXIT_SUCCESS)
1532 fatal ("failed to execute shell command", (char *)NULL);
1534 append_to_tagfile = TRUE;
1537 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1538 if (tagf == NULL)
1539 pfatal (tagfile);
1540 put_entries (nodehead); /* write all the tags (CTAGS) */
1541 free_tree (nodehead);
1542 nodehead = NULL;
1543 if (fclose (tagf) == EOF)
1544 pfatal (tagfile);
1546 if (CTAGS)
1547 if (append_to_tagfile || update)
1549 char cmd[2*BUFSIZ+20];
1550 /* Maybe these should be used:
1551 setenv ("LC_COLLATE", "C", 1);
1552 setenv ("LC_ALL", "C", 1); */
1553 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1554 exit (system (cmd));
1556 return EXIT_SUCCESS;
1561 * Return a compressor given the file name. If EXTPTR is non-zero,
1562 * return a pointer into FILE where the compressor-specific
1563 * extension begins. If no compressor is found, NULL is returned
1564 * and EXTPTR is not significant.
1565 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1567 static compressor *
1568 get_compressor_from_suffix (file, extptr)
1569 char *file;
1570 char **extptr;
1572 compressor *compr;
1573 char *slash, *suffix;
1575 /* This relies on FN to be after canonicalize_filename,
1576 so we don't need to consider backslashes on DOS_NT. */
1577 slash = etags_strrchr (file, '/');
1578 suffix = etags_strrchr (file, '.');
1579 if (suffix == NULL || suffix < slash)
1580 return NULL;
1581 if (extptr != NULL)
1582 *extptr = suffix;
1583 suffix += 1;
1584 /* Let those poor souls who live with DOS 8+3 file name limits get
1585 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1586 Only the first do loop is run if not MSDOS */
1589 for (compr = compressors; compr->suffix != NULL; compr++)
1590 if (streq (compr->suffix, suffix))
1591 return compr;
1592 if (!MSDOS)
1593 break; /* do it only once: not really a loop */
1594 if (extptr != NULL)
1595 *extptr = ++suffix;
1596 } while (*suffix != '\0');
1597 return NULL;
1603 * Return a language given the name.
1605 static language *
1606 get_language_from_langname (name)
1607 const char *name;
1609 language *lang;
1611 if (name == NULL)
1612 error ("empty language name", (char *)NULL);
1613 else
1615 for (lang = lang_names; lang->name != NULL; lang++)
1616 if (streq (name, lang->name))
1617 return lang;
1618 error ("unknown language \"%s\"", name);
1621 return NULL;
1626 * Return a language given the interpreter name.
1628 static language *
1629 get_language_from_interpreter (interpreter)
1630 char *interpreter;
1632 language *lang;
1633 char **iname;
1635 if (interpreter == NULL)
1636 return NULL;
1637 for (lang = lang_names; lang->name != NULL; lang++)
1638 if (lang->interpreters != NULL)
1639 for (iname = lang->interpreters; *iname != NULL; iname++)
1640 if (streq (*iname, interpreter))
1641 return lang;
1643 return NULL;
1649 * Return a language given the file name.
1651 static language *
1652 get_language_from_filename (file, case_sensitive)
1653 char *file;
1654 bool case_sensitive;
1656 language *lang;
1657 char **name, **ext, *suffix;
1659 /* Try whole file name first. */
1660 for (lang = lang_names; lang->name != NULL; lang++)
1661 if (lang->filenames != NULL)
1662 for (name = lang->filenames; *name != NULL; name++)
1663 if ((case_sensitive)
1664 ? streq (*name, file)
1665 : strcaseeq (*name, file))
1666 return lang;
1668 /* If not found, try suffix after last dot. */
1669 suffix = etags_strrchr (file, '.');
1670 if (suffix == NULL)
1671 return NULL;
1672 suffix += 1;
1673 for (lang = lang_names; lang->name != NULL; lang++)
1674 if (lang->suffixes != NULL)
1675 for (ext = lang->suffixes; *ext != NULL; ext++)
1676 if ((case_sensitive)
1677 ? streq (*ext, suffix)
1678 : strcaseeq (*ext, suffix))
1679 return lang;
1680 return NULL;
1685 * This routine is called on each file argument.
1687 static void
1688 process_file_name (file, lang)
1689 char *file;
1690 language *lang;
1692 struct stat stat_buf;
1693 FILE *inf;
1694 fdesc *fdp;
1695 compressor *compr;
1696 char *compressed_name, *uncompressed_name;
1697 char *ext, *real_name;
1698 int retval;
1700 canonicalize_filename (file);
1701 if (streq (file, tagfile) && !streq (tagfile, "-"))
1703 error ("skipping inclusion of %s in self.", file);
1704 return;
1706 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1708 compressed_name = NULL;
1709 real_name = uncompressed_name = savestr (file);
1711 else
1713 real_name = compressed_name = savestr (file);
1714 uncompressed_name = savenstr (file, ext - file);
1717 /* If the canonicalized uncompressed name
1718 has already been dealt with, skip it silently. */
1719 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1721 assert (fdp->infname != NULL);
1722 if (streq (uncompressed_name, fdp->infname))
1723 goto cleanup;
1726 if (stat (real_name, &stat_buf) != 0)
1728 /* Reset real_name and try with a different name. */
1729 real_name = NULL;
1730 if (compressed_name != NULL) /* try with the given suffix */
1732 if (stat (uncompressed_name, &stat_buf) == 0)
1733 real_name = uncompressed_name;
1735 else /* try all possible suffixes */
1737 for (compr = compressors; compr->suffix != NULL; compr++)
1739 compressed_name = concat (file, ".", compr->suffix);
1740 if (stat (compressed_name, &stat_buf) != 0)
1742 if (MSDOS)
1744 char *suf = compressed_name + strlen (file);
1745 size_t suflen = strlen (compr->suffix) + 1;
1746 for ( ; suf[1]; suf++, suflen--)
1748 memmove (suf, suf + 1, suflen);
1749 if (stat (compressed_name, &stat_buf) == 0)
1751 real_name = compressed_name;
1752 break;
1755 if (real_name != NULL)
1756 break;
1757 } /* MSDOS */
1758 free (compressed_name);
1759 compressed_name = NULL;
1761 else
1763 real_name = compressed_name;
1764 break;
1768 if (real_name == NULL)
1770 perror (file);
1771 goto cleanup;
1773 } /* try with a different name */
1775 if (!S_ISREG (stat_buf.st_mode))
1777 error ("skipping %s: it is not a regular file.", real_name);
1778 goto cleanup;
1780 if (real_name == compressed_name)
1782 char *cmd = concat (compr->command, " ", real_name);
1783 inf = (FILE *) popen (cmd, "r");
1784 free (cmd);
1786 else
1787 inf = fopen (real_name, "r");
1788 if (inf == NULL)
1790 perror (real_name);
1791 goto cleanup;
1794 process_file (inf, uncompressed_name, lang);
1796 if (real_name == compressed_name)
1797 retval = pclose (inf);
1798 else
1799 retval = fclose (inf);
1800 if (retval < 0)
1801 pfatal (file);
1803 cleanup:
1804 if (compressed_name) free (compressed_name);
1805 if (uncompressed_name) free (uncompressed_name);
1806 last_node = NULL;
1807 curfdp = NULL;
1808 return;
1811 static void
1812 process_file (fh, fn, lang)
1813 FILE *fh;
1814 char *fn;
1815 language *lang;
1817 static const fdesc emptyfdesc;
1818 fdesc *fdp;
1820 /* Create a new input file description entry. */
1821 fdp = xnew (1, fdesc);
1822 *fdp = emptyfdesc;
1823 fdp->next = fdhead;
1824 fdp->infname = savestr (fn);
1825 fdp->lang = lang;
1826 fdp->infabsname = absolute_filename (fn, cwd);
1827 fdp->infabsdir = absolute_dirname (fn, cwd);
1828 if (filename_is_absolute (fn))
1830 /* An absolute file name. Canonicalize it. */
1831 fdp->taggedfname = absolute_filename (fn, NULL);
1833 else
1835 /* A file name relative to cwd. Make it relative
1836 to the directory of the tags file. */
1837 fdp->taggedfname = relative_filename (fn, tagfiledir);
1839 fdp->usecharno = TRUE; /* use char position when making tags */
1840 fdp->prop = NULL;
1841 fdp->written = FALSE; /* not written on tags file yet */
1843 fdhead = fdp;
1844 curfdp = fdhead; /* the current file description */
1846 find_entries (fh);
1848 /* If not Ctags, and if this is not metasource and if it contained no #line
1849 directives, we can write the tags and free all nodes pointing to
1850 curfdp. */
1851 if (!CTAGS
1852 && curfdp->usecharno /* no #line directives in this file */
1853 && !curfdp->lang->metasource)
1855 node *np, *prev;
1857 /* Look for the head of the sublist relative to this file. See add_node
1858 for the structure of the node tree. */
1859 prev = NULL;
1860 for (np = nodehead; np != NULL; prev = np, np = np->left)
1861 if (np->fdp == curfdp)
1862 break;
1864 /* If we generated tags for this file, write and delete them. */
1865 if (np != NULL)
1867 /* This is the head of the last sublist, if any. The following
1868 instructions depend on this being true. */
1869 assert (np->left == NULL);
1871 assert (fdhead == curfdp);
1872 assert (last_node->fdp == curfdp);
1873 put_entries (np); /* write tags for file curfdp->taggedfname */
1874 free_tree (np); /* remove the written nodes */
1875 if (prev == NULL)
1876 nodehead = NULL; /* no nodes left */
1877 else
1878 prev->left = NULL; /* delete the pointer to the sublist */
1884 * This routine sets up the boolean pseudo-functions which work
1885 * by setting boolean flags dependent upon the corresponding character.
1886 * Every char which is NOT in that string is not a white char. Therefore,
1887 * all of the array "_wht" is set to FALSE, and then the elements
1888 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1889 * of a char is TRUE if it is the string "white", else FALSE.
1891 static void
1892 init ()
1894 register char *sp;
1895 register int i;
1897 for (i = 0; i < CHARS; i++)
1898 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1899 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1900 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1901 notinname('\0') = notinname('\n');
1902 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1903 begtoken('\0') = begtoken('\n');
1904 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1905 intoken('\0') = intoken('\n');
1906 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1907 endtoken('\0') = endtoken('\n');
1911 * This routine opens the specified file and calls the function
1912 * which finds the function and type definitions.
1914 static void
1915 find_entries (inf)
1916 FILE *inf;
1918 char *cp;
1919 language *lang = curfdp->lang;
1920 Lang_function *parser = NULL;
1922 /* If user specified a language, use it. */
1923 if (lang != NULL && lang->function != NULL)
1925 parser = lang->function;
1928 /* Else try to guess the language given the file name. */
1929 if (parser == NULL)
1931 lang = get_language_from_filename (curfdp->infname, TRUE);
1932 if (lang != NULL && lang->function != NULL)
1934 curfdp->lang = lang;
1935 parser = lang->function;
1939 /* Else look for sharp-bang as the first two characters. */
1940 if (parser == NULL
1941 && readline_internal (&lb, inf) > 0
1942 && lb.len >= 2
1943 && lb.buffer[0] == '#'
1944 && lb.buffer[1] == '!')
1946 char *lp;
1948 /* Set lp to point at the first char after the last slash in the
1949 line or, if no slashes, at the first nonblank. Then set cp to
1950 the first successive blank and terminate the string. */
1951 lp = etags_strrchr (lb.buffer+2, '/');
1952 if (lp != NULL)
1953 lp += 1;
1954 else
1955 lp = skip_spaces (lb.buffer + 2);
1956 cp = skip_non_spaces (lp);
1957 *cp = '\0';
1959 if (strlen (lp) > 0)
1961 lang = get_language_from_interpreter (lp);
1962 if (lang != NULL && lang->function != NULL)
1964 curfdp->lang = lang;
1965 parser = lang->function;
1970 /* We rewind here, even if inf may be a pipe. We fail if the
1971 length of the first line is longer than the pipe block size,
1972 which is unlikely. */
1973 rewind (inf);
1975 /* Else try to guess the language given the case insensitive file name. */
1976 if (parser == NULL)
1978 lang = get_language_from_filename (curfdp->infname, FALSE);
1979 if (lang != NULL && lang->function != NULL)
1981 curfdp->lang = lang;
1982 parser = lang->function;
1986 /* Else try Fortran or C. */
1987 if (parser == NULL)
1989 node *old_last_node = last_node;
1991 curfdp->lang = get_language_from_langname ("fortran");
1992 find_entries (inf);
1994 if (old_last_node == last_node)
1995 /* No Fortran entries found. Try C. */
1997 /* We do not tag if rewind fails.
1998 Only the file name will be recorded in the tags file. */
1999 rewind (inf);
2000 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
2001 find_entries (inf);
2003 return;
2006 if (!no_line_directive
2007 && curfdp->lang != NULL && curfdp->lang->metasource)
2008 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
2009 file, or anyway we parsed a file that is automatically generated from
2010 this one. If this is the case, the bingo.c file contained #line
2011 directives that generated tags pointing to this file. Let's delete
2012 them all before parsing this file, which is the real source. */
2014 fdesc **fdpp = &fdhead;
2015 while (*fdpp != NULL)
2016 if (*fdpp != curfdp
2017 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
2018 /* We found one of those! We must delete both the file description
2019 and all tags referring to it. */
2021 fdesc *badfdp = *fdpp;
2023 /* Delete the tags referring to badfdp->taggedfname
2024 that were obtained from badfdp->infname. */
2025 invalidate_nodes (badfdp, &nodehead);
2027 *fdpp = badfdp->next; /* remove the bad description from the list */
2028 free_fdesc (badfdp);
2030 else
2031 fdpp = &(*fdpp)->next; /* advance the list pointer */
2034 assert (parser != NULL);
2036 /* Generic initialisations before reading from file. */
2037 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
2039 /* Generic initialisations before parsing file with readline. */
2040 lineno = 0; /* reset global line number */
2041 charno = 0; /* reset global char number */
2042 linecharno = 0; /* reset global char number of line start */
2044 parser (inf);
2046 regex_tag_multiline ();
2051 * Check whether an implicitly named tag should be created,
2052 * then call `pfnote'.
2053 * NAME is a string that is internally copied by this function.
2055 * TAGS format specification
2056 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2057 * The following is explained in some more detail in etc/ETAGS.EBNF.
2059 * make_tag creates tags with "implicit tag names" (unnamed tags)
2060 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2061 * 1. NAME does not contain any of the characters in NONAM;
2062 * 2. LINESTART contains name as either a rightmost, or rightmost but
2063 * one character, substring;
2064 * 3. the character, if any, immediately before NAME in LINESTART must
2065 * be a character in NONAM;
2066 * 4. the character, if any, immediately after NAME in LINESTART must
2067 * also be a character in NONAM.
2069 * The implementation uses the notinname() macro, which recognises the
2070 * characters stored in the string `nonam'.
2071 * etags.el needs to use the same characters that are in NONAM.
2073 static void
2074 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2075 char *name; /* tag name, or NULL if unnamed */
2076 int namelen; /* tag length */
2077 bool is_func; /* tag is a function */
2078 char *linestart; /* start of the line where tag is */
2079 int linelen; /* length of the line where tag is */
2080 int lno; /* line number */
2081 long cno; /* character number */
2083 bool named = (name != NULL && namelen > 0);
2085 if (!CTAGS && named) /* maybe set named to false */
2086 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2087 such that etags.el can guess a name from it. */
2089 int i;
2090 register char *cp = name;
2092 for (i = 0; i < namelen; i++)
2093 if (notinname (*cp++))
2094 break;
2095 if (i == namelen) /* rule #1 */
2097 cp = linestart + linelen - namelen;
2098 if (notinname (linestart[linelen-1]))
2099 cp -= 1; /* rule #4 */
2100 if (cp >= linestart /* rule #2 */
2101 && (cp == linestart
2102 || notinname (cp[-1])) /* rule #3 */
2103 && strneq (name, cp, namelen)) /* rule #2 */
2104 named = FALSE; /* use implicit tag name */
2108 if (named)
2109 name = savenstr (name, namelen);
2110 else
2111 name = NULL;
2112 pfnote (name, is_func, linestart, linelen, lno, cno);
2115 /* Record a tag. */
2116 static void
2117 pfnote (name, is_func, linestart, linelen, lno, cno)
2118 char *name; /* tag name, or NULL if unnamed */
2119 bool is_func; /* tag is a function */
2120 char *linestart; /* start of the line where tag is */
2121 int linelen; /* length of the line where tag is */
2122 int lno; /* line number */
2123 long cno; /* character number */
2125 register node *np;
2127 assert (name == NULL || name[0] != '\0');
2128 if (CTAGS && name == NULL)
2129 return;
2131 np = xnew (1, node);
2133 /* If ctags mode, change name "main" to M<thisfilename>. */
2134 if (CTAGS && !cxref_style && streq (name, "main"))
2136 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2137 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2138 fp = etags_strrchr (np->name, '.');
2139 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2140 fp[0] = '\0';
2142 else
2143 np->name = name;
2144 np->valid = TRUE;
2145 np->been_warned = FALSE;
2146 np->fdp = curfdp;
2147 np->is_func = is_func;
2148 np->lno = lno;
2149 if (np->fdp->usecharno)
2150 /* Our char numbers are 0-base, because of C language tradition?
2151 ctags compatibility? old versions compatibility? I don't know.
2152 Anyway, since emacs's are 1-base we expect etags.el to take care
2153 of the difference. If we wanted to have 1-based numbers, we would
2154 uncomment the +1 below. */
2155 np->cno = cno /* + 1 */ ;
2156 else
2157 np->cno = invalidcharno;
2158 np->left = np->right = NULL;
2159 if (CTAGS && !cxref_style)
2161 if (strlen (linestart) < 50)
2162 np->regex = concat (linestart, "$", "");
2163 else
2164 np->regex = savenstr (linestart, 50);
2166 else
2167 np->regex = savenstr (linestart, linelen);
2169 add_node (np, &nodehead);
2173 * free_tree ()
2174 * recurse on left children, iterate on right children.
2176 static void
2177 free_tree (np)
2178 register node *np;
2180 while (np)
2182 register node *node_right = np->right;
2183 free_tree (np->left);
2184 if (np->name != NULL)
2185 free (np->name);
2186 free (np->regex);
2187 free (np);
2188 np = node_right;
2193 * free_fdesc ()
2194 * delete a file description
2196 static void
2197 free_fdesc (fdp)
2198 register fdesc *fdp;
2200 if (fdp->infname != NULL) free (fdp->infname);
2201 if (fdp->infabsname != NULL) free (fdp->infabsname);
2202 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2203 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2204 if (fdp->prop != NULL) free (fdp->prop);
2205 free (fdp);
2209 * add_node ()
2210 * Adds a node to the tree of nodes. In etags mode, sort by file
2211 * name. In ctags mode, sort by tag name. Make no attempt at
2212 * balancing.
2214 * add_node is the only function allowed to add nodes, so it can
2215 * maintain state.
2217 static void
2218 add_node (np, cur_node_p)
2219 node *np, **cur_node_p;
2221 register int dif;
2222 register node *cur_node = *cur_node_p;
2224 if (cur_node == NULL)
2226 *cur_node_p = np;
2227 last_node = np;
2228 return;
2231 if (!CTAGS)
2232 /* Etags Mode */
2234 /* For each file name, tags are in a linked sublist on the right
2235 pointer. The first tags of different files are a linked list
2236 on the left pointer. last_node points to the end of the last
2237 used sublist. */
2238 if (last_node != NULL && last_node->fdp == np->fdp)
2240 /* Let's use the same sublist as the last added node. */
2241 assert (last_node->right == NULL);
2242 last_node->right = np;
2243 last_node = np;
2245 else if (cur_node->fdp == np->fdp)
2247 /* Scanning the list we found the head of a sublist which is
2248 good for us. Let's scan this sublist. */
2249 add_node (np, &cur_node->right);
2251 else
2252 /* The head of this sublist is not good for us. Let's try the
2253 next one. */
2254 add_node (np, &cur_node->left);
2255 } /* if ETAGS mode */
2257 else
2259 /* Ctags Mode */
2260 dif = strcmp (np->name, cur_node->name);
2263 * If this tag name matches an existing one, then
2264 * do not add the node, but maybe print a warning.
2266 if (no_duplicates && !dif)
2268 if (np->fdp == cur_node->fdp)
2270 if (!no_warnings)
2272 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2273 np->fdp->infname, lineno, np->name);
2274 fprintf (stderr, "Second entry ignored\n");
2277 else if (!cur_node->been_warned && !no_warnings)
2279 fprintf
2280 (stderr,
2281 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2282 np->fdp->infname, cur_node->fdp->infname, np->name);
2283 cur_node->been_warned = TRUE;
2285 return;
2288 /* Actually add the node */
2289 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2290 } /* if CTAGS mode */
2294 * invalidate_nodes ()
2295 * Scan the node tree and invalidate all nodes pointing to the
2296 * given file description (CTAGS case) or free them (ETAGS case).
2298 static void
2299 invalidate_nodes (badfdp, npp)
2300 fdesc *badfdp;
2301 node **npp;
2303 node *np = *npp;
2305 if (np == NULL)
2306 return;
2308 if (CTAGS)
2310 if (np->left != NULL)
2311 invalidate_nodes (badfdp, &np->left);
2312 if (np->fdp == badfdp)
2313 np->valid = FALSE;
2314 if (np->right != NULL)
2315 invalidate_nodes (badfdp, &np->right);
2317 else
2319 assert (np->fdp != NULL);
2320 if (np->fdp == badfdp)
2322 *npp = np->left; /* detach the sublist from the list */
2323 np->left = NULL; /* isolate it */
2324 free_tree (np); /* free it */
2325 invalidate_nodes (badfdp, npp);
2327 else
2328 invalidate_nodes (badfdp, &np->left);
2333 static int total_size_of_entries __P((node *));
2334 static int number_len __P((long));
2336 /* Length of a non-negative number's decimal representation. */
2337 static int
2338 number_len (num)
2339 long num;
2341 int len = 1;
2342 while ((num /= 10) > 0)
2343 len += 1;
2344 return len;
2348 * Return total number of characters that put_entries will output for
2349 * the nodes in the linked list at the right of the specified node.
2350 * This count is irrelevant with etags.el since emacs 19.34 at least,
2351 * but is still supplied for backward compatibility.
2353 static int
2354 total_size_of_entries (np)
2355 register node *np;
2357 register int total = 0;
2359 for (; np != NULL; np = np->right)
2360 if (np->valid)
2362 total += strlen (np->regex) + 1; /* pat\177 */
2363 if (np->name != NULL)
2364 total += strlen (np->name) + 1; /* name\001 */
2365 total += number_len ((long) np->lno) + 1; /* lno, */
2366 if (np->cno != invalidcharno) /* cno */
2367 total += number_len (np->cno);
2368 total += 1; /* newline */
2371 return total;
2374 static void
2375 put_entries (np)
2376 register node *np;
2378 register char *sp;
2379 static fdesc *fdp = NULL;
2381 if (np == NULL)
2382 return;
2384 /* Output subentries that precede this one */
2385 if (CTAGS)
2386 put_entries (np->left);
2388 /* Output this entry */
2389 if (np->valid)
2391 if (!CTAGS)
2393 /* Etags mode */
2394 if (fdp != np->fdp)
2396 fdp = np->fdp;
2397 fprintf (tagf, "\f\n%s,%d\n",
2398 fdp->taggedfname, total_size_of_entries (np));
2399 fdp->written = TRUE;
2401 fputs (np->regex, tagf);
2402 fputc ('\177', tagf);
2403 if (np->name != NULL)
2405 fputs (np->name, tagf);
2406 fputc ('\001', tagf);
2408 fprintf (tagf, "%d,", np->lno);
2409 if (np->cno != invalidcharno)
2410 fprintf (tagf, "%ld", np->cno);
2411 fputs ("\n", tagf);
2413 else
2415 /* Ctags mode */
2416 if (np->name == NULL)
2417 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2419 if (cxref_style)
2421 if (vgrind_style)
2422 fprintf (stdout, "%s %s %d\n",
2423 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2424 else
2425 fprintf (stdout, "%-16s %3d %-16s %s\n",
2426 np->name, np->lno, np->fdp->taggedfname, np->regex);
2428 else
2430 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2432 if (np->is_func)
2433 { /* function or #define macro with args */
2434 putc (searchar, tagf);
2435 putc ('^', tagf);
2437 for (sp = np->regex; *sp; sp++)
2439 if (*sp == '\\' || *sp == searchar)
2440 putc ('\\', tagf);
2441 putc (*sp, tagf);
2443 putc (searchar, tagf);
2445 else
2446 { /* anything else; text pattern inadequate */
2447 fprintf (tagf, "%d", np->lno);
2449 putc ('\n', tagf);
2452 } /* if this node contains a valid tag */
2454 /* Output subentries that follow this one */
2455 put_entries (np->right);
2456 if (!CTAGS)
2457 put_entries (np->left);
2461 /* C extensions. */
2462 #define C_EXT 0x00fff /* C extensions */
2463 #define C_PLAIN 0x00000 /* C */
2464 #define C_PLPL 0x00001 /* C++ */
2465 #define C_STAR 0x00003 /* C* */
2466 #define C_JAVA 0x00005 /* JAVA */
2467 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2468 #define YACC 0x10000 /* yacc file */
2471 * The C symbol tables.
2473 enum sym_type
2475 st_none,
2476 st_C_objprot, st_C_objimpl, st_C_objend,
2477 st_C_gnumacro,
2478 st_C_ignore, st_C_attribute,
2479 st_C_javastruct,
2480 st_C_operator,
2481 st_C_class, st_C_template,
2482 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2485 static unsigned int hash __P((const char *, unsigned int));
2486 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2487 static enum sym_type C_symtype __P((char *, int, int));
2489 /* Feed stuff between (but not including) %[ and %] lines to:
2490 gperf -m 5
2492 %compare-strncmp
2493 %enum
2494 %struct-type
2495 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2497 if, 0, st_C_ignore
2498 for, 0, st_C_ignore
2499 while, 0, st_C_ignore
2500 switch, 0, st_C_ignore
2501 return, 0, st_C_ignore
2502 __attribute__, 0, st_C_attribute
2503 GTY, 0, st_C_attribute
2504 @interface, 0, st_C_objprot
2505 @protocol, 0, st_C_objprot
2506 @implementation,0, st_C_objimpl
2507 @end, 0, st_C_objend
2508 import, (C_JAVA & ~C_PLPL), st_C_ignore
2509 package, (C_JAVA & ~C_PLPL), st_C_ignore
2510 friend, C_PLPL, st_C_ignore
2511 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2512 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2513 interface, (C_JAVA & ~C_PLPL), st_C_struct
2514 class, 0, st_C_class
2515 namespace, C_PLPL, st_C_struct
2516 domain, C_STAR, st_C_struct
2517 union, 0, st_C_struct
2518 struct, 0, st_C_struct
2519 extern, 0, st_C_extern
2520 enum, 0, st_C_enum
2521 typedef, 0, st_C_typedef
2522 define, 0, st_C_define
2523 undef, 0, st_C_define
2524 operator, C_PLPL, st_C_operator
2525 template, 0, st_C_template
2526 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2527 DEFUN, 0, st_C_gnumacro
2528 SYSCALL, 0, st_C_gnumacro
2529 ENTRY, 0, st_C_gnumacro
2530 PSEUDO, 0, st_C_gnumacro
2531 # These are defined inside C functions, so currently they are not met.
2532 # EXFUN used in glibc, DEFVAR_* in emacs.
2533 #EXFUN, 0, st_C_gnumacro
2534 #DEFVAR_, 0, st_C_gnumacro
2536 and replace lines between %< and %> with its output, then:
2537 - remove the #if characterset check
2538 - make in_word_set static and not inline. */
2539 /*%<*/
2540 /* C code produced by gperf version 3.0.1 */
2541 /* Command-line: gperf -m 5 */
2542 /* Computed positions: -k'2-3' */
2544 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2545 /* maximum key range = 33, duplicates = 0 */
2547 #ifdef __GNUC__
2548 __inline
2549 #else
2550 #ifdef __cplusplus
2551 inline
2552 #endif
2553 #endif
2554 static unsigned int
2555 hash (str, len)
2556 register const char *str;
2557 register unsigned int len;
2559 static unsigned char asso_values[] =
2561 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2562 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2563 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2564 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2565 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2566 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2567 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2568 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2569 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2570 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2571 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2572 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2573 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2574 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2575 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2576 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2577 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2578 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2579 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2580 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2581 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2582 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2583 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2584 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2585 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2586 35, 35, 35, 35, 35, 35
2588 register int hval = len;
2590 switch (hval)
2592 default:
2593 hval += asso_values[(unsigned char)str[2]];
2594 /*FALLTHROUGH*/
2595 case 2:
2596 hval += asso_values[(unsigned char)str[1]];
2597 break;
2599 return hval;
2602 static struct C_stab_entry *
2603 in_word_set (str, len)
2604 register const char *str;
2605 register unsigned int len;
2607 enum
2609 TOTAL_KEYWORDS = 33,
2610 MIN_WORD_LENGTH = 2,
2611 MAX_WORD_LENGTH = 15,
2612 MIN_HASH_VALUE = 2,
2613 MAX_HASH_VALUE = 34
2616 static struct C_stab_entry wordlist[] =
2618 {""}, {""},
2619 {"if", 0, st_C_ignore},
2620 {"GTY", 0, st_C_attribute},
2621 {"@end", 0, st_C_objend},
2622 {"union", 0, st_C_struct},
2623 {"define", 0, st_C_define},
2624 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2625 {"template", 0, st_C_template},
2626 {"operator", C_PLPL, st_C_operator},
2627 {"@interface", 0, st_C_objprot},
2628 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2629 {"friend", C_PLPL, st_C_ignore},
2630 {"typedef", 0, st_C_typedef},
2631 {"return", 0, st_C_ignore},
2632 {"@implementation",0, st_C_objimpl},
2633 {"@protocol", 0, st_C_objprot},
2634 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2635 {"extern", 0, st_C_extern},
2636 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2637 {"struct", 0, st_C_struct},
2638 {"domain", C_STAR, st_C_struct},
2639 {"switch", 0, st_C_ignore},
2640 {"enum", 0, st_C_enum},
2641 {"for", 0, st_C_ignore},
2642 {"namespace", C_PLPL, st_C_struct},
2643 {"class", 0, st_C_class},
2644 {"while", 0, st_C_ignore},
2645 {"undef", 0, st_C_define},
2646 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2647 {"__attribute__", 0, st_C_attribute},
2648 {"SYSCALL", 0, st_C_gnumacro},
2649 {"ENTRY", 0, st_C_gnumacro},
2650 {"PSEUDO", 0, st_C_gnumacro},
2651 {"DEFUN", 0, st_C_gnumacro}
2654 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2656 register int key = hash (str, len);
2658 if (key <= MAX_HASH_VALUE && key >= 0)
2660 register const char *s = wordlist[key].name;
2662 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2663 return &wordlist[key];
2666 return 0;
2668 /*%>*/
2670 static enum sym_type
2671 C_symtype (str, len, c_ext)
2672 char *str;
2673 int len;
2674 int c_ext;
2676 register struct C_stab_entry *se = in_word_set (str, len);
2678 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2679 return st_none;
2680 return se->type;
2685 * Ignoring __attribute__ ((list))
2687 static bool inattribute; /* looking at an __attribute__ construct */
2690 * C functions and variables are recognized using a simple
2691 * finite automaton. fvdef is its state variable.
2693 static enum
2695 fvnone, /* nothing seen */
2696 fdefunkey, /* Emacs DEFUN keyword seen */
2697 fdefunname, /* Emacs DEFUN name seen */
2698 foperator, /* func: operator keyword seen (cplpl) */
2699 fvnameseen, /* function or variable name seen */
2700 fstartlist, /* func: just after open parenthesis */
2701 finlist, /* func: in parameter list */
2702 flistseen, /* func: after parameter list */
2703 fignore, /* func: before open brace */
2704 vignore /* var-like: ignore until ';' */
2705 } fvdef;
2707 static bool fvextern; /* func or var: extern keyword seen; */
2710 * typedefs are recognized using a simple finite automaton.
2711 * typdef is its state variable.
2713 static enum
2715 tnone, /* nothing seen */
2716 tkeyseen, /* typedef keyword seen */
2717 ttypeseen, /* defined type seen */
2718 tinbody, /* inside typedef body */
2719 tend, /* just before typedef tag */
2720 tignore /* junk after typedef tag */
2721 } typdef;
2724 * struct-like structures (enum, struct and union) are recognized
2725 * using another simple finite automaton. `structdef' is its state
2726 * variable.
2728 static enum
2730 snone, /* nothing seen yet,
2731 or in struct body if bracelev > 0 */
2732 skeyseen, /* struct-like keyword seen */
2733 stagseen, /* struct-like tag seen */
2734 scolonseen /* colon seen after struct-like tag */
2735 } structdef;
2738 * When objdef is different from onone, objtag is the name of the class.
2740 static char *objtag = "<uninited>";
2743 * Yet another little state machine to deal with preprocessor lines.
2745 static enum
2747 dnone, /* nothing seen */
2748 dsharpseen, /* '#' seen as first char on line */
2749 ddefineseen, /* '#' and 'define' seen */
2750 dignorerest /* ignore rest of line */
2751 } definedef;
2754 * State machine for Objective C protocols and implementations.
2755 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2757 static enum
2759 onone, /* nothing seen */
2760 oprotocol, /* @interface or @protocol seen */
2761 oimplementation, /* @implementations seen */
2762 otagseen, /* class name seen */
2763 oparenseen, /* parenthesis before category seen */
2764 ocatseen, /* category name seen */
2765 oinbody, /* in @implementation body */
2766 omethodsign, /* in @implementation body, after +/- */
2767 omethodtag, /* after method name */
2768 omethodcolon, /* after method colon */
2769 omethodparm, /* after method parameter */
2770 oignore /* wait for @end */
2771 } objdef;
2775 * Use this structure to keep info about the token read, and how it
2776 * should be tagged. Used by the make_C_tag function to build a tag.
2778 static struct tok
2780 char *line; /* string containing the token */
2781 int offset; /* where the token starts in LINE */
2782 int length; /* token length */
2784 The previous members can be used to pass strings around for generic
2785 purposes. The following ones specifically refer to creating tags. In this
2786 case the token contained here is the pattern that will be used to create a
2787 tag.
2789 bool valid; /* do not create a tag; the token should be
2790 invalidated whenever a state machine is
2791 reset prematurely */
2792 bool named; /* create a named tag */
2793 int lineno; /* source line number of tag */
2794 long linepos; /* source char number of tag */
2795 } token; /* latest token read */
2798 * Variables and functions for dealing with nested structures.
2799 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2801 static void pushclass_above __P((int, char *, int));
2802 static void popclass_above __P((int));
2803 static void write_classname __P((linebuffer *, char *qualifier));
2805 static struct {
2806 char **cname; /* nested class names */
2807 int *bracelev; /* nested class brace level */
2808 int nl; /* class nesting level (elements used) */
2809 int size; /* length of the array */
2810 } cstack; /* stack for nested declaration tags */
2811 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2812 #define nestlev (cstack.nl)
2813 /* After struct keyword or in struct body, not inside a nested function. */
2814 #define instruct (structdef == snone && nestlev > 0 \
2815 && bracelev == cstack.bracelev[nestlev-1] + 1)
2817 static void
2818 pushclass_above (bracelev, str, len)
2819 int bracelev;
2820 char *str;
2821 int len;
2823 int nl;
2825 popclass_above (bracelev);
2826 nl = cstack.nl;
2827 if (nl >= cstack.size)
2829 int size = cstack.size *= 2;
2830 xrnew (cstack.cname, size, char *);
2831 xrnew (cstack.bracelev, size, int);
2833 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2834 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2835 cstack.bracelev[nl] = bracelev;
2836 cstack.nl = nl + 1;
2839 static void
2840 popclass_above (bracelev)
2841 int bracelev;
2843 int nl;
2845 for (nl = cstack.nl - 1;
2846 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2847 nl--)
2849 if (cstack.cname[nl] != NULL)
2850 free (cstack.cname[nl]);
2851 cstack.nl = nl;
2855 static void
2856 write_classname (cn, qualifier)
2857 linebuffer *cn;
2858 char *qualifier;
2860 int i, len;
2861 int qlen = strlen (qualifier);
2863 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2865 len = 0;
2866 cn->len = 0;
2867 cn->buffer[0] = '\0';
2869 else
2871 len = strlen (cstack.cname[0]);
2872 linebuffer_setlen (cn, len);
2873 strcpy (cn->buffer, cstack.cname[0]);
2875 for (i = 1; i < cstack.nl; i++)
2877 char *s;
2878 int slen;
2880 s = cstack.cname[i];
2881 if (s == NULL)
2882 continue;
2883 slen = strlen (s);
2884 len += slen + qlen;
2885 linebuffer_setlen (cn, len);
2886 strncat (cn->buffer, qualifier, qlen);
2887 strncat (cn->buffer, s, slen);
2892 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2893 static void make_C_tag __P((bool));
2896 * consider_token ()
2897 * checks to see if the current token is at the start of a
2898 * function or variable, or corresponds to a typedef, or
2899 * is a struct/union/enum tag, or #define, or an enum constant.
2901 * *IS_FUNC gets TRUE if the token is a function or #define macro
2902 * with args. C_EXTP points to which language we are looking at.
2904 * Globals
2905 * fvdef IN OUT
2906 * structdef IN OUT
2907 * definedef IN OUT
2908 * typdef IN OUT
2909 * objdef IN OUT
2912 static bool
2913 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2914 register char *str; /* IN: token pointer */
2915 register int len; /* IN: token length */
2916 register int c; /* IN: first char after the token */
2917 int *c_extp; /* IN, OUT: C extensions mask */
2918 int bracelev; /* IN: brace level */
2919 int parlev; /* IN: parenthesis level */
2920 bool *is_func_or_var; /* OUT: function or variable found */
2922 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2923 structtype is the type of the preceding struct-like keyword, and
2924 structbracelev is the brace level where it has been seen. */
2925 static enum sym_type structtype;
2926 static int structbracelev;
2927 static enum sym_type toktype;
2930 toktype = C_symtype (str, len, *c_extp);
2933 * Skip __attribute__
2935 if (toktype == st_C_attribute)
2937 inattribute = TRUE;
2938 return FALSE;
2942 * Advance the definedef state machine.
2944 switch (definedef)
2946 case dnone:
2947 /* We're not on a preprocessor line. */
2948 if (toktype == st_C_gnumacro)
2950 fvdef = fdefunkey;
2951 return FALSE;
2953 break;
2954 case dsharpseen:
2955 if (toktype == st_C_define)
2957 definedef = ddefineseen;
2959 else
2961 definedef = dignorerest;
2963 return FALSE;
2964 case ddefineseen:
2966 * Make a tag for any macro, unless it is a constant
2967 * and constantypedefs is FALSE.
2969 definedef = dignorerest;
2970 *is_func_or_var = (c == '(');
2971 if (!*is_func_or_var && !constantypedefs)
2972 return FALSE;
2973 else
2974 return TRUE;
2975 case dignorerest:
2976 return FALSE;
2977 default:
2978 error ("internal error: definedef value.", (char *)NULL);
2982 * Now typedefs
2984 switch (typdef)
2986 case tnone:
2987 if (toktype == st_C_typedef)
2989 if (typedefs)
2990 typdef = tkeyseen;
2991 fvextern = FALSE;
2992 fvdef = fvnone;
2993 return FALSE;
2995 break;
2996 case tkeyseen:
2997 switch (toktype)
2999 case st_none:
3000 case st_C_class:
3001 case st_C_struct:
3002 case st_C_enum:
3003 typdef = ttypeseen;
3005 break;
3006 case ttypeseen:
3007 if (structdef == snone && fvdef == fvnone)
3009 fvdef = fvnameseen;
3010 return TRUE;
3012 break;
3013 case tend:
3014 switch (toktype)
3016 case st_C_class:
3017 case st_C_struct:
3018 case st_C_enum:
3019 return FALSE;
3021 return TRUE;
3024 switch (toktype)
3026 case st_C_javastruct:
3027 if (structdef == stagseen)
3028 structdef = scolonseen;
3029 return FALSE;
3030 case st_C_template:
3031 case st_C_class:
3032 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
3033 && bracelev == 0
3034 && definedef == dnone && structdef == snone
3035 && typdef == tnone && fvdef == fvnone)
3036 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3037 if (toktype == st_C_template)
3038 break;
3039 /* FALLTHRU */
3040 case st_C_struct:
3041 case st_C_enum:
3042 if (parlev == 0
3043 && fvdef != vignore
3044 && (typdef == tkeyseen
3045 || (typedefs_or_cplusplus && structdef == snone)))
3047 structdef = skeyseen;
3048 structtype = toktype;
3049 structbracelev = bracelev;
3050 if (fvdef == fvnameseen)
3051 fvdef = fvnone;
3053 return FALSE;
3056 if (structdef == skeyseen)
3058 structdef = stagseen;
3059 return TRUE;
3062 if (typdef != tnone)
3063 definedef = dnone;
3065 /* Detect Objective C constructs. */
3066 switch (objdef)
3068 case onone:
3069 switch (toktype)
3071 case st_C_objprot:
3072 objdef = oprotocol;
3073 return FALSE;
3074 case st_C_objimpl:
3075 objdef = oimplementation;
3076 return FALSE;
3078 break;
3079 case oimplementation:
3080 /* Save the class tag for functions or variables defined inside. */
3081 objtag = savenstr (str, len);
3082 objdef = oinbody;
3083 return FALSE;
3084 case oprotocol:
3085 /* Save the class tag for categories. */
3086 objtag = savenstr (str, len);
3087 objdef = otagseen;
3088 *is_func_or_var = TRUE;
3089 return TRUE;
3090 case oparenseen:
3091 objdef = ocatseen;
3092 *is_func_or_var = TRUE;
3093 return TRUE;
3094 case oinbody:
3095 break;
3096 case omethodsign:
3097 if (parlev == 0)
3099 fvdef = fvnone;
3100 objdef = omethodtag;
3101 linebuffer_setlen (&token_name, len);
3102 strncpy (token_name.buffer, str, len);
3103 token_name.buffer[len] = '\0';
3104 return TRUE;
3106 return FALSE;
3107 case omethodcolon:
3108 if (parlev == 0)
3109 objdef = omethodparm;
3110 return FALSE;
3111 case omethodparm:
3112 if (parlev == 0)
3114 fvdef = fvnone;
3115 objdef = omethodtag;
3116 linebuffer_setlen (&token_name, token_name.len + len);
3117 strncat (token_name.buffer, str, len);
3118 return TRUE;
3120 return FALSE;
3121 case oignore:
3122 if (toktype == st_C_objend)
3124 /* Memory leakage here: the string pointed by objtag is
3125 never released, because many tests would be needed to
3126 avoid breaking on incorrect input code. The amount of
3127 memory leaked here is the sum of the lengths of the
3128 class tags.
3129 free (objtag); */
3130 objdef = onone;
3132 return FALSE;
3135 /* A function, variable or enum constant? */
3136 switch (toktype)
3138 case st_C_extern:
3139 fvextern = TRUE;
3140 switch (fvdef)
3142 case finlist:
3143 case flistseen:
3144 case fignore:
3145 case vignore:
3146 break;
3147 default:
3148 fvdef = fvnone;
3150 return FALSE;
3151 case st_C_ignore:
3152 fvextern = FALSE;
3153 fvdef = vignore;
3154 return FALSE;
3155 case st_C_operator:
3156 fvdef = foperator;
3157 *is_func_or_var = TRUE;
3158 return TRUE;
3159 case st_none:
3160 if (constantypedefs
3161 && structdef == snone
3162 && structtype == st_C_enum && bracelev > structbracelev)
3163 return TRUE; /* enum constant */
3164 switch (fvdef)
3166 case fdefunkey:
3167 if (bracelev > 0)
3168 break;
3169 fvdef = fdefunname; /* GNU macro */
3170 *is_func_or_var = TRUE;
3171 return TRUE;
3172 case fvnone:
3173 switch (typdef)
3175 case ttypeseen:
3176 return FALSE;
3177 case tnone:
3178 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3179 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3181 fvdef = vignore;
3182 return FALSE;
3184 break;
3186 /* FALLTHRU */
3187 case fvnameseen:
3188 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3190 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3191 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3192 fvdef = foperator;
3193 *is_func_or_var = TRUE;
3194 return TRUE;
3196 if (bracelev > 0 && !instruct)
3197 break;
3198 fvdef = fvnameseen; /* function or variable */
3199 *is_func_or_var = TRUE;
3200 return TRUE;
3202 break;
3205 return FALSE;
3210 * C_entries often keeps pointers to tokens or lines which are older than
3211 * the line currently read. By keeping two line buffers, and switching
3212 * them at end of line, it is possible to use those pointers.
3214 static struct
3216 long linepos;
3217 linebuffer lb;
3218 } lbs[2];
3220 #define current_lb_is_new (newndx == curndx)
3221 #define switch_line_buffers() (curndx = 1 - curndx)
3223 #define curlb (lbs[curndx].lb)
3224 #define newlb (lbs[newndx].lb)
3225 #define curlinepos (lbs[curndx].linepos)
3226 #define newlinepos (lbs[newndx].linepos)
3228 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3229 #define cplpl (c_ext & C_PLPL)
3230 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3232 #define CNL_SAVE_DEFINEDEF() \
3233 do { \
3234 curlinepos = charno; \
3235 readline (&curlb, inf); \
3236 lp = curlb.buffer; \
3237 quotednl = FALSE; \
3238 newndx = curndx; \
3239 } while (0)
3241 #define CNL() \
3242 do { \
3243 CNL_SAVE_DEFINEDEF(); \
3244 if (savetoken.valid) \
3246 token = savetoken; \
3247 savetoken.valid = FALSE; \
3249 definedef = dnone; \
3250 } while (0)
3253 static void
3254 make_C_tag (isfun)
3255 bool isfun;
3257 /* This function is never called when token.valid is FALSE, but
3258 we must protect against invalid input or internal errors. */
3259 if (token.valid)
3260 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3261 token.offset+token.length+1, token.lineno, token.linepos);
3262 else if (DEBUG)
3263 { /* this branch is optimised away if !DEBUG */
3264 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3265 token_name.len + 17, isfun, token.line,
3266 token.offset+token.length+1, token.lineno, token.linepos);
3267 error ("INVALID TOKEN", NULL);
3270 token.valid = FALSE;
3275 * C_entries ()
3276 * This routine finds functions, variables, typedefs,
3277 * #define's, enum constants and struct/union/enum definitions in
3278 * C syntax and adds them to the list.
3280 static void
3281 C_entries (c_ext, inf)
3282 int c_ext; /* extension of C */
3283 FILE *inf; /* input file */
3285 register char c; /* latest char read; '\0' for end of line */
3286 register char *lp; /* pointer one beyond the character `c' */
3287 int curndx, newndx; /* indices for current and new lb */
3288 register int tokoff; /* offset in line of start of current token */
3289 register int toklen; /* length of current token */
3290 char *qualifier; /* string used to qualify names */
3291 int qlen; /* length of qualifier */
3292 int bracelev; /* current brace level */
3293 int bracketlev; /* current bracket level */
3294 int parlev; /* current parenthesis level */
3295 int attrparlev; /* __attribute__ parenthesis level */
3296 int templatelev; /* current template level */
3297 int typdefbracelev; /* bracelev where a typedef struct body begun */
3298 bool incomm, inquote, inchar, quotednl, midtoken;
3299 bool yacc_rules; /* in the rules part of a yacc file */
3300 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3303 linebuffer_init (&lbs[0].lb);
3304 linebuffer_init (&lbs[1].lb);
3305 if (cstack.size == 0)
3307 cstack.size = (DEBUG) ? 1 : 4;
3308 cstack.nl = 0;
3309 cstack.cname = xnew (cstack.size, char *);
3310 cstack.bracelev = xnew (cstack.size, int);
3313 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3314 curndx = newndx = 0;
3315 lp = curlb.buffer;
3316 *lp = 0;
3318 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3319 structdef = snone; definedef = dnone; objdef = onone;
3320 yacc_rules = FALSE;
3321 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3322 token.valid = savetoken.valid = FALSE;
3323 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3324 if (cjava)
3325 { qualifier = "."; qlen = 1; }
3326 else
3327 { qualifier = "::"; qlen = 2; }
3330 while (!feof (inf))
3332 c = *lp++;
3333 if (c == '\\')
3335 /* If we are at the end of the line, the next character is a
3336 '\0'; do not skip it, because it is what tells us
3337 to read the next line. */
3338 if (*lp == '\0')
3340 quotednl = TRUE;
3341 continue;
3343 lp++;
3344 c = ' ';
3346 else if (incomm)
3348 switch (c)
3350 case '*':
3351 if (*lp == '/')
3353 c = *lp++;
3354 incomm = FALSE;
3356 break;
3357 case '\0':
3358 /* Newlines inside comments do not end macro definitions in
3359 traditional cpp. */
3360 CNL_SAVE_DEFINEDEF ();
3361 break;
3363 continue;
3365 else if (inquote)
3367 switch (c)
3369 case '"':
3370 inquote = FALSE;
3371 break;
3372 case '\0':
3373 /* Newlines inside strings do not end macro definitions
3374 in traditional cpp, even though compilers don't
3375 usually accept them. */
3376 CNL_SAVE_DEFINEDEF ();
3377 break;
3379 continue;
3381 else if (inchar)
3383 switch (c)
3385 case '\0':
3386 /* Hmmm, something went wrong. */
3387 CNL ();
3388 /* FALLTHRU */
3389 case '\'':
3390 inchar = FALSE;
3391 break;
3393 continue;
3395 else if (bracketlev > 0)
3397 switch (c)
3399 case ']':
3400 if (--bracketlev > 0)
3401 continue;
3402 break;
3403 case '\0':
3404 CNL_SAVE_DEFINEDEF ();
3405 break;
3407 continue;
3409 else switch (c)
3411 case '"':
3412 inquote = TRUE;
3413 if (inattribute)
3414 break;
3415 switch (fvdef)
3417 case fdefunkey:
3418 case fstartlist:
3419 case finlist:
3420 case fignore:
3421 case vignore:
3422 break;
3423 default:
3424 fvextern = FALSE;
3425 fvdef = fvnone;
3427 continue;
3428 case '\'':
3429 inchar = TRUE;
3430 if (inattribute)
3431 break;
3432 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3434 fvextern = FALSE;
3435 fvdef = fvnone;
3437 continue;
3438 case '/':
3439 if (*lp == '*')
3441 incomm = TRUE;
3442 lp++;
3443 c = ' ';
3445 else if (/* cplpl && */ *lp == '/')
3447 c = '\0';
3449 break;
3450 case '%':
3451 if ((c_ext & YACC) && *lp == '%')
3453 /* Entering or exiting rules section in yacc file. */
3454 lp++;
3455 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3456 typdef = tnone; structdef = snone;
3457 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3458 bracelev = 0;
3459 yacc_rules = !yacc_rules;
3460 continue;
3462 else
3463 break;
3464 case '#':
3465 if (definedef == dnone)
3467 char *cp;
3468 bool cpptoken = TRUE;
3470 /* Look back on this line. If all blanks, or nonblanks
3471 followed by an end of comment, this is a preprocessor
3472 token. */
3473 for (cp = newlb.buffer; cp < lp-1; cp++)
3474 if (!iswhite (*cp))
3476 if (*cp == '*' && *(cp+1) == '/')
3478 cp++;
3479 cpptoken = TRUE;
3481 else
3482 cpptoken = FALSE;
3484 if (cpptoken)
3485 definedef = dsharpseen;
3486 } /* if (definedef == dnone) */
3487 continue;
3488 case '[':
3489 bracketlev++;
3490 continue;
3491 } /* switch (c) */
3494 /* Consider token only if some involved conditions are satisfied. */
3495 if (typdef != tignore
3496 && definedef != dignorerest
3497 && fvdef != finlist
3498 && templatelev == 0
3499 && (definedef != dnone
3500 || structdef != scolonseen)
3501 && !inattribute)
3503 if (midtoken)
3505 if (endtoken (c))
3507 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3508 /* This handles :: in the middle,
3509 but not at the beginning of an identifier.
3510 Also, space-separated :: is not recognised. */
3512 if (c_ext & C_AUTO) /* automatic detection of C++ */
3513 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3514 lp += 2;
3515 toklen += 2;
3516 c = lp[-1];
3517 goto still_in_token;
3519 else
3521 bool funorvar = FALSE;
3523 if (yacc_rules
3524 || consider_token (newlb.buffer + tokoff, toklen, c,
3525 &c_ext, bracelev, parlev,
3526 &funorvar))
3528 if (fvdef == foperator)
3530 char *oldlp = lp;
3531 lp = skip_spaces (lp-1);
3532 if (*lp != '\0')
3533 lp += 1;
3534 while (*lp != '\0'
3535 && !iswhite (*lp) && *lp != '(')
3536 lp += 1;
3537 c = *lp++;
3538 toklen += lp - oldlp;
3540 token.named = FALSE;
3541 if (!plainc
3542 && nestlev > 0 && definedef == dnone)
3543 /* in struct body */
3545 write_classname (&token_name, qualifier);
3546 linebuffer_setlen (&token_name,
3547 token_name.len+qlen+toklen);
3548 strcat (token_name.buffer, qualifier);
3549 strncat (token_name.buffer,
3550 newlb.buffer + tokoff, toklen);
3551 token.named = TRUE;
3553 else if (objdef == ocatseen)
3554 /* Objective C category */
3556 int len = strlen (objtag) + 2 + toklen;
3557 linebuffer_setlen (&token_name, len);
3558 strcpy (token_name.buffer, objtag);
3559 strcat (token_name.buffer, "(");
3560 strncat (token_name.buffer,
3561 newlb.buffer + tokoff, toklen);
3562 strcat (token_name.buffer, ")");
3563 token.named = TRUE;
3565 else if (objdef == omethodtag
3566 || objdef == omethodparm)
3567 /* Objective C method */
3569 token.named = TRUE;
3571 else if (fvdef == fdefunname)
3572 /* GNU DEFUN and similar macros */
3574 bool defun = (newlb.buffer[tokoff] == 'F');
3575 int off = tokoff;
3576 int len = toklen;
3578 /* Rewrite the tag so that emacs lisp DEFUNs
3579 can be found by their elisp name */
3580 if (defun)
3582 off += 1;
3583 len -= 1;
3585 linebuffer_setlen (&token_name, len);
3586 strncpy (token_name.buffer,
3587 newlb.buffer + off, len);
3588 token_name.buffer[len] = '\0';
3589 if (defun)
3590 while (--len >= 0)
3591 if (token_name.buffer[len] == '_')
3592 token_name.buffer[len] = '-';
3593 token.named = defun;
3595 else
3597 linebuffer_setlen (&token_name, toklen);
3598 strncpy (token_name.buffer,
3599 newlb.buffer + tokoff, toklen);
3600 token_name.buffer[toklen] = '\0';
3601 /* Name macros and members. */
3602 token.named = (structdef == stagseen
3603 || typdef == ttypeseen
3604 || typdef == tend
3605 || (funorvar
3606 && definedef == dignorerest)
3607 || (funorvar
3608 && definedef == dnone
3609 && structdef == snone
3610 && bracelev > 0));
3612 token.lineno = lineno;
3613 token.offset = tokoff;
3614 token.length = toklen;
3615 token.line = newlb.buffer;
3616 token.linepos = newlinepos;
3617 token.valid = TRUE;
3619 if (definedef == dnone
3620 && (fvdef == fvnameseen
3621 || fvdef == foperator
3622 || structdef == stagseen
3623 || typdef == tend
3624 || typdef == ttypeseen
3625 || objdef != onone))
3627 if (current_lb_is_new)
3628 switch_line_buffers ();
3630 else if (definedef != dnone
3631 || fvdef == fdefunname
3632 || instruct)
3633 make_C_tag (funorvar);
3635 else /* not yacc and consider_token failed */
3637 if (inattribute && fvdef == fignore)
3639 /* We have just met __attribute__ after a
3640 function parameter list: do not tag the
3641 function again. */
3642 fvdef = fvnone;
3645 midtoken = FALSE;
3647 } /* if (endtoken (c)) */
3648 else if (intoken (c))
3649 still_in_token:
3651 toklen++;
3652 continue;
3654 } /* if (midtoken) */
3655 else if (begtoken (c))
3657 switch (definedef)
3659 case dnone:
3660 switch (fvdef)
3662 case fstartlist:
3663 /* This prevents tagging fb in
3664 void (__attribute__((noreturn)) *fb) (void);
3665 Fixing this is not easy and not very important. */
3666 fvdef = finlist;
3667 continue;
3668 case flistseen:
3669 if (plainc || declarations)
3671 make_C_tag (TRUE); /* a function */
3672 fvdef = fignore;
3674 break;
3676 if (structdef == stagseen && !cjava)
3678 popclass_above (bracelev);
3679 structdef = snone;
3681 break;
3682 case dsharpseen:
3683 savetoken = token;
3684 break;
3686 if (!yacc_rules || lp == newlb.buffer + 1)
3688 tokoff = lp - 1 - newlb.buffer;
3689 toklen = 1;
3690 midtoken = TRUE;
3692 continue;
3693 } /* if (begtoken) */
3694 } /* if must look at token */
3697 /* Detect end of line, colon, comma, semicolon and various braces
3698 after having handled a token.*/
3699 switch (c)
3701 case ':':
3702 if (inattribute)
3703 break;
3704 if (yacc_rules && token.offset == 0 && token.valid)
3706 make_C_tag (FALSE); /* a yacc function */
3707 break;
3709 if (definedef != dnone)
3710 break;
3711 switch (objdef)
3713 case otagseen:
3714 objdef = oignore;
3715 make_C_tag (TRUE); /* an Objective C class */
3716 break;
3717 case omethodtag:
3718 case omethodparm:
3719 objdef = omethodcolon;
3720 linebuffer_setlen (&token_name, token_name.len + 1);
3721 strcat (token_name.buffer, ":");
3722 break;
3724 if (structdef == stagseen)
3726 structdef = scolonseen;
3727 break;
3729 /* Should be useless, but may be work as a safety net. */
3730 if (cplpl && fvdef == flistseen)
3732 make_C_tag (TRUE); /* a function */
3733 fvdef = fignore;
3734 break;
3736 break;
3737 case ';':
3738 if (definedef != dnone || inattribute)
3739 break;
3740 switch (typdef)
3742 case tend:
3743 case ttypeseen:
3744 make_C_tag (FALSE); /* a typedef */
3745 typdef = tnone;
3746 fvdef = fvnone;
3747 break;
3748 case tnone:
3749 case tinbody:
3750 case tignore:
3751 switch (fvdef)
3753 case fignore:
3754 if (typdef == tignore || cplpl)
3755 fvdef = fvnone;
3756 break;
3757 case fvnameseen:
3758 if ((globals && bracelev == 0 && (!fvextern || declarations))
3759 || (members && instruct))
3760 make_C_tag (FALSE); /* a variable */
3761 fvextern = FALSE;
3762 fvdef = fvnone;
3763 token.valid = FALSE;
3764 break;
3765 case flistseen:
3766 if ((declarations
3767 && (cplpl || !instruct)
3768 && (typdef == tnone || (typdef != tignore && instruct)))
3769 || (members
3770 && plainc && instruct))
3771 make_C_tag (TRUE); /* a function */
3772 /* FALLTHRU */
3773 default:
3774 fvextern = FALSE;
3775 fvdef = fvnone;
3776 if (declarations
3777 && cplpl && structdef == stagseen)
3778 make_C_tag (FALSE); /* forward declaration */
3779 else
3780 token.valid = FALSE;
3781 } /* switch (fvdef) */
3782 /* FALLTHRU */
3783 default:
3784 if (!instruct)
3785 typdef = tnone;
3787 if (structdef == stagseen)
3788 structdef = snone;
3789 break;
3790 case ',':
3791 if (definedef != dnone || inattribute)
3792 break;
3793 switch (objdef)
3795 case omethodtag:
3796 case omethodparm:
3797 make_C_tag (TRUE); /* an Objective C method */
3798 objdef = oinbody;
3799 break;
3801 switch (fvdef)
3803 case fdefunkey:
3804 case foperator:
3805 case fstartlist:
3806 case finlist:
3807 case fignore:
3808 case vignore:
3809 break;
3810 case fdefunname:
3811 fvdef = fignore;
3812 break;
3813 case fvnameseen:
3814 if (parlev == 0
3815 && ((globals
3816 && bracelev == 0
3817 && templatelev == 0
3818 && (!fvextern || declarations))
3819 || (members && instruct)))
3820 make_C_tag (FALSE); /* a variable */
3821 break;
3822 case flistseen:
3823 if ((declarations && typdef == tnone && !instruct)
3824 || (members && typdef != tignore && instruct))
3826 make_C_tag (TRUE); /* a function */
3827 fvdef = fvnameseen;
3829 else if (!declarations)
3830 fvdef = fvnone;
3831 token.valid = FALSE;
3832 break;
3833 default:
3834 fvdef = fvnone;
3836 if (structdef == stagseen)
3837 structdef = snone;
3838 break;
3839 case ']':
3840 if (definedef != dnone || inattribute)
3841 break;
3842 if (structdef == stagseen)
3843 structdef = snone;
3844 switch (typdef)
3846 case ttypeseen:
3847 case tend:
3848 typdef = tignore;
3849 make_C_tag (FALSE); /* a typedef */
3850 break;
3851 case tnone:
3852 case tinbody:
3853 switch (fvdef)
3855 case foperator:
3856 case finlist:
3857 case fignore:
3858 case vignore:
3859 break;
3860 case fvnameseen:
3861 if ((members && bracelev == 1)
3862 || (globals && bracelev == 0
3863 && (!fvextern || declarations)))
3864 make_C_tag (FALSE); /* a variable */
3865 /* FALLTHRU */
3866 default:
3867 fvdef = fvnone;
3869 break;
3871 break;
3872 case '(':
3873 if (inattribute)
3875 attrparlev++;
3876 break;
3878 if (definedef != dnone)
3879 break;
3880 if (objdef == otagseen && parlev == 0)
3881 objdef = oparenseen;
3882 switch (fvdef)
3884 case fvnameseen:
3885 if (typdef == ttypeseen
3886 && *lp != '*'
3887 && !instruct)
3889 /* This handles constructs like:
3890 typedef void OperatorFun (int fun); */
3891 make_C_tag (FALSE);
3892 typdef = tignore;
3893 fvdef = fignore;
3894 break;
3896 /* FALLTHRU */
3897 case foperator:
3898 fvdef = fstartlist;
3899 break;
3900 case flistseen:
3901 fvdef = finlist;
3902 break;
3904 parlev++;
3905 break;
3906 case ')':
3907 if (inattribute)
3909 if (--attrparlev == 0)
3910 inattribute = FALSE;
3911 break;
3913 if (definedef != dnone)
3914 break;
3915 if (objdef == ocatseen && parlev == 1)
3917 make_C_tag (TRUE); /* an Objective C category */
3918 objdef = oignore;
3920 if (--parlev == 0)
3922 switch (fvdef)
3924 case fstartlist:
3925 case finlist:
3926 fvdef = flistseen;
3927 break;
3929 if (!instruct
3930 && (typdef == tend
3931 || typdef == ttypeseen))
3933 typdef = tignore;
3934 make_C_tag (FALSE); /* a typedef */
3937 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3938 parlev = 0;
3939 break;
3940 case '{':
3941 if (definedef != dnone)
3942 break;
3943 if (typdef == ttypeseen)
3945 /* Whenever typdef is set to tinbody (currently only
3946 here), typdefbracelev should be set to bracelev. */
3947 typdef = tinbody;
3948 typdefbracelev = bracelev;
3950 switch (fvdef)
3952 case flistseen:
3953 make_C_tag (TRUE); /* a function */
3954 /* FALLTHRU */
3955 case fignore:
3956 fvdef = fvnone;
3957 break;
3958 case fvnone:
3959 switch (objdef)
3961 case otagseen:
3962 make_C_tag (TRUE); /* an Objective C class */
3963 objdef = oignore;
3964 break;
3965 case omethodtag:
3966 case omethodparm:
3967 make_C_tag (TRUE); /* an Objective C method */
3968 objdef = oinbody;
3969 break;
3970 default:
3971 /* Neutralize `extern "C" {' grot. */
3972 if (bracelev == 0 && structdef == snone && nestlev == 0
3973 && typdef == tnone)
3974 bracelev = -1;
3976 break;
3978 switch (structdef)
3980 case skeyseen: /* unnamed struct */
3981 pushclass_above (bracelev, NULL, 0);
3982 structdef = snone;
3983 break;
3984 case stagseen: /* named struct or enum */
3985 case scolonseen: /* a class */
3986 pushclass_above (bracelev,token.line+token.offset, token.length);
3987 structdef = snone;
3988 make_C_tag (FALSE); /* a struct or enum */
3989 break;
3991 bracelev += 1;
3992 break;
3993 case '*':
3994 if (definedef != dnone)
3995 break;
3996 if (fvdef == fstartlist)
3998 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3999 token.valid = FALSE;
4001 break;
4002 case '}':
4003 if (definedef != dnone)
4004 break;
4005 bracelev -= 1;
4006 if (!ignoreindent && lp == newlb.buffer + 1)
4008 if (bracelev != 0)
4009 token.valid = FALSE; /* unexpected value, token unreliable */
4010 bracelev = 0; /* reset brace level if first column */
4011 parlev = 0; /* also reset paren level, just in case... */
4013 else if (bracelev < 0)
4015 token.valid = FALSE; /* something gone amiss, token unreliable */
4016 bracelev = 0;
4018 if (bracelev == 0 && fvdef == vignore)
4019 fvdef = fvnone; /* end of function */
4020 popclass_above (bracelev);
4021 structdef = snone;
4022 /* Only if typdef == tinbody is typdefbracelev significant. */
4023 if (typdef == tinbody && bracelev <= typdefbracelev)
4025 assert (bracelev == typdefbracelev);
4026 typdef = tend;
4028 break;
4029 case '=':
4030 if (definedef != dnone)
4031 break;
4032 switch (fvdef)
4034 case foperator:
4035 case finlist:
4036 case fignore:
4037 case vignore:
4038 break;
4039 case fvnameseen:
4040 if ((members && bracelev == 1)
4041 || (globals && bracelev == 0 && (!fvextern || declarations)))
4042 make_C_tag (FALSE); /* a variable */
4043 /* FALLTHRU */
4044 default:
4045 fvdef = vignore;
4047 break;
4048 case '<':
4049 if (cplpl
4050 && (structdef == stagseen || fvdef == fvnameseen))
4052 templatelev++;
4053 break;
4055 goto resetfvdef;
4056 case '>':
4057 if (templatelev > 0)
4059 templatelev--;
4060 break;
4062 goto resetfvdef;
4063 case '+':
4064 case '-':
4065 if (objdef == oinbody && bracelev == 0)
4067 objdef = omethodsign;
4068 break;
4070 /* FALLTHRU */
4071 resetfvdef:
4072 case '#': case '~': case '&': case '%': case '/':
4073 case '|': case '^': case '!': case '.': case '?':
4074 if (definedef != dnone)
4075 break;
4076 /* These surely cannot follow a function tag in C. */
4077 switch (fvdef)
4079 case foperator:
4080 case finlist:
4081 case fignore:
4082 case vignore:
4083 break;
4084 default:
4085 fvdef = fvnone;
4087 break;
4088 case '\0':
4089 if (objdef == otagseen)
4091 make_C_tag (TRUE); /* an Objective C class */
4092 objdef = oignore;
4094 /* If a macro spans multiple lines don't reset its state. */
4095 if (quotednl)
4096 CNL_SAVE_DEFINEDEF ();
4097 else
4098 CNL ();
4099 break;
4100 } /* switch (c) */
4102 } /* while not eof */
4104 free (lbs[0].lb.buffer);
4105 free (lbs[1].lb.buffer);
4109 * Process either a C++ file or a C file depending on the setting
4110 * of a global flag.
4112 static void
4113 default_C_entries (inf)
4114 FILE *inf;
4116 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4119 /* Always do plain C. */
4120 static void
4121 plain_C_entries (inf)
4122 FILE *inf;
4124 C_entries (0, inf);
4127 /* Always do C++. */
4128 static void
4129 Cplusplus_entries (inf)
4130 FILE *inf;
4132 C_entries (C_PLPL, inf);
4135 /* Always do Java. */
4136 static void
4137 Cjava_entries (inf)
4138 FILE *inf;
4140 C_entries (C_JAVA, inf);
4143 /* Always do C*. */
4144 static void
4145 Cstar_entries (inf)
4146 FILE *inf;
4148 C_entries (C_STAR, inf);
4151 /* Always do Yacc. */
4152 static void
4153 Yacc_entries (inf)
4154 FILE *inf;
4156 C_entries (YACC, inf);
4160 /* Useful macros. */
4161 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4162 for (; /* loop initialization */ \
4163 !feof (file_pointer) /* loop test */ \
4164 && /* instructions at start of loop */ \
4165 (readline (&line_buffer, file_pointer), \
4166 char_pointer = line_buffer.buffer, \
4167 TRUE); \
4170 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4171 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4172 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4173 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4174 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4176 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4177 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4178 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4179 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4180 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4183 * Read a file, but do no processing. This is used to do regexp
4184 * matching on files that have no language defined.
4186 static void
4187 just_read_file (inf)
4188 FILE *inf;
4190 register char *dummy;
4192 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4193 continue;
4197 /* Fortran parsing */
4199 static void F_takeprec __P((void));
4200 static void F_getit __P((FILE *));
4202 static void
4203 F_takeprec ()
4205 dbp = skip_spaces (dbp);
4206 if (*dbp != '*')
4207 return;
4208 dbp++;
4209 dbp = skip_spaces (dbp);
4210 if (strneq (dbp, "(*)", 3))
4212 dbp += 3;
4213 return;
4215 if (!ISDIGIT (*dbp))
4217 --dbp; /* force failure */
4218 return;
4221 dbp++;
4222 while (ISDIGIT (*dbp));
4225 static void
4226 F_getit (inf)
4227 FILE *inf;
4229 register char *cp;
4231 dbp = skip_spaces (dbp);
4232 if (*dbp == '\0')
4234 readline (&lb, inf);
4235 dbp = lb.buffer;
4236 if (dbp[5] != '&')
4237 return;
4238 dbp += 6;
4239 dbp = skip_spaces (dbp);
4241 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4242 return;
4243 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4244 continue;
4245 make_tag (dbp, cp-dbp, TRUE,
4246 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4250 static void
4251 Fortran_functions (inf)
4252 FILE *inf;
4254 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4256 if (*dbp == '%')
4257 dbp++; /* Ratfor escape to fortran */
4258 dbp = skip_spaces (dbp);
4259 if (*dbp == '\0')
4260 continue;
4261 switch (lowcase (*dbp))
4263 case 'i':
4264 if (nocase_tail ("integer"))
4265 F_takeprec ();
4266 break;
4267 case 'r':
4268 if (nocase_tail ("real"))
4269 F_takeprec ();
4270 break;
4271 case 'l':
4272 if (nocase_tail ("logical"))
4273 F_takeprec ();
4274 break;
4275 case 'c':
4276 if (nocase_tail ("complex") || nocase_tail ("character"))
4277 F_takeprec ();
4278 break;
4279 case 'd':
4280 if (nocase_tail ("double"))
4282 dbp = skip_spaces (dbp);
4283 if (*dbp == '\0')
4284 continue;
4285 if (nocase_tail ("precision"))
4286 break;
4287 continue;
4289 break;
4291 dbp = skip_spaces (dbp);
4292 if (*dbp == '\0')
4293 continue;
4294 switch (lowcase (*dbp))
4296 case 'f':
4297 if (nocase_tail ("function"))
4298 F_getit (inf);
4299 continue;
4300 case 's':
4301 if (nocase_tail ("subroutine"))
4302 F_getit (inf);
4303 continue;
4304 case 'e':
4305 if (nocase_tail ("entry"))
4306 F_getit (inf);
4307 continue;
4308 case 'b':
4309 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4311 dbp = skip_spaces (dbp);
4312 if (*dbp == '\0') /* assume un-named */
4313 make_tag ("blockdata", 9, TRUE,
4314 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4315 else
4316 F_getit (inf); /* look for name */
4318 continue;
4325 * Ada parsing
4326 * Original code by
4327 * Philippe Waroquiers (1998)
4330 static void Ada_getit __P((FILE *, char *));
4332 /* Once we are positioned after an "interesting" keyword, let's get
4333 the real tag value necessary. */
4334 static void
4335 Ada_getit (inf, name_qualifier)
4336 FILE *inf;
4337 char *name_qualifier;
4339 register char *cp;
4340 char *name;
4341 char c;
4343 while (!feof (inf))
4345 dbp = skip_spaces (dbp);
4346 if (*dbp == '\0'
4347 || (dbp[0] == '-' && dbp[1] == '-'))
4349 readline (&lb, inf);
4350 dbp = lb.buffer;
4352 switch (lowcase(*dbp))
4354 case 'b':
4355 if (nocase_tail ("body"))
4357 /* Skipping body of procedure body or package body or ....
4358 resetting qualifier to body instead of spec. */
4359 name_qualifier = "/b";
4360 continue;
4362 break;
4363 case 't':
4364 /* Skipping type of task type or protected type ... */
4365 if (nocase_tail ("type"))
4366 continue;
4367 break;
4369 if (*dbp == '"')
4371 dbp += 1;
4372 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4373 continue;
4375 else
4377 dbp = skip_spaces (dbp);
4378 for (cp = dbp;
4379 (*cp != '\0'
4380 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4381 cp++)
4382 continue;
4383 if (cp == dbp)
4384 return;
4386 c = *cp;
4387 *cp = '\0';
4388 name = concat (dbp, name_qualifier, "");
4389 *cp = c;
4390 make_tag (name, strlen (name), TRUE,
4391 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4392 free (name);
4393 if (c == '"')
4394 dbp = cp + 1;
4395 return;
4399 static void
4400 Ada_funcs (inf)
4401 FILE *inf;
4403 bool inquote = FALSE;
4404 bool skip_till_semicolumn = FALSE;
4406 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4408 while (*dbp != '\0')
4410 /* Skip a string i.e. "abcd". */
4411 if (inquote || (*dbp == '"'))
4413 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4414 if (dbp != NULL)
4416 inquote = FALSE;
4417 dbp += 1;
4418 continue; /* advance char */
4420 else
4422 inquote = TRUE;
4423 break; /* advance line */
4427 /* Skip comments. */
4428 if (dbp[0] == '-' && dbp[1] == '-')
4429 break; /* advance line */
4431 /* Skip character enclosed in single quote i.e. 'a'
4432 and skip single quote starting an attribute i.e. 'Image. */
4433 if (*dbp == '\'')
4435 dbp++ ;
4436 if (*dbp != '\0')
4437 dbp++;
4438 continue;
4441 if (skip_till_semicolumn)
4443 if (*dbp == ';')
4444 skip_till_semicolumn = FALSE;
4445 dbp++;
4446 continue; /* advance char */
4449 /* Search for beginning of a token. */
4450 if (!begtoken (*dbp))
4452 dbp++;
4453 continue; /* advance char */
4456 /* We are at the beginning of a token. */
4457 switch (lowcase(*dbp))
4459 case 'f':
4460 if (!packages_only && nocase_tail ("function"))
4461 Ada_getit (inf, "/f");
4462 else
4463 break; /* from switch */
4464 continue; /* advance char */
4465 case 'p':
4466 if (!packages_only && nocase_tail ("procedure"))
4467 Ada_getit (inf, "/p");
4468 else if (nocase_tail ("package"))
4469 Ada_getit (inf, "/s");
4470 else if (nocase_tail ("protected")) /* protected type */
4471 Ada_getit (inf, "/t");
4472 else
4473 break; /* from switch */
4474 continue; /* advance char */
4476 case 'u':
4477 if (typedefs && !packages_only && nocase_tail ("use"))
4479 /* when tagging types, avoid tagging use type Pack.Typename;
4480 for this, we will skip everything till a ; */
4481 skip_till_semicolumn = TRUE;
4482 continue; /* advance char */
4485 case 't':
4486 if (!packages_only && nocase_tail ("task"))
4487 Ada_getit (inf, "/k");
4488 else if (typedefs && !packages_only && nocase_tail ("type"))
4490 Ada_getit (inf, "/t");
4491 while (*dbp != '\0')
4492 dbp += 1;
4494 else
4495 break; /* from switch */
4496 continue; /* advance char */
4499 /* Look for the end of the token. */
4500 while (!endtoken (*dbp))
4501 dbp++;
4503 } /* advance char */
4504 } /* advance line */
4509 * Unix and microcontroller assembly tag handling
4510 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4511 * Idea by Bob Weiner, Motorola Inc. (1994)
4513 static void
4514 Asm_labels (inf)
4515 FILE *inf;
4517 register char *cp;
4519 LOOP_ON_INPUT_LINES (inf, lb, cp)
4521 /* If first char is alphabetic or one of [_.$], test for colon
4522 following identifier. */
4523 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4525 /* Read past label. */
4526 cp++;
4527 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4528 cp++;
4529 if (*cp == ':' || iswhite (*cp))
4530 /* Found end of label, so copy it and add it to the table. */
4531 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4532 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4539 * Perl support
4540 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4541 * Perl variable names: /^(my|local).../
4542 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4543 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4544 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4546 static void
4547 Perl_functions (inf)
4548 FILE *inf;
4550 char *package = savestr ("main"); /* current package name */
4551 register char *cp;
4553 LOOP_ON_INPUT_LINES (inf, lb, cp)
4555 cp = skip_spaces (cp);
4557 if (LOOKING_AT (cp, "package"))
4559 free (package);
4560 get_tag (cp, &package);
4562 else if (LOOKING_AT (cp, "sub"))
4564 char *pos;
4565 char *sp = cp;
4567 while (!notinname (*cp))
4568 cp++;
4569 if (cp == sp)
4570 continue; /* nothing found */
4571 if ((pos = etags_strchr (sp, ':')) != NULL
4572 && pos < cp && pos[1] == ':')
4573 /* The name is already qualified. */
4574 make_tag (sp, cp - sp, TRUE,
4575 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4576 else
4577 /* Qualify it. */
4579 char savechar, *name;
4581 savechar = *cp;
4582 *cp = '\0';
4583 name = concat (package, "::", sp);
4584 *cp = savechar;
4585 make_tag (name, strlen(name), TRUE,
4586 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4587 free (name);
4590 else if (globals) /* only if we are tagging global vars */
4592 /* Skip a qualifier, if any. */
4593 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4594 /* After "my" or "local", but before any following paren or space. */
4595 char *varstart = cp;
4597 if (qual /* should this be removed? If yes, how? */
4598 && (*cp == '$' || *cp == '@' || *cp == '%'))
4600 varstart += 1;
4602 cp++;
4603 while (ISALNUM (*cp) || *cp == '_');
4605 else if (qual)
4607 /* Should be examining a variable list at this point;
4608 could insist on seeing an open parenthesis. */
4609 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4610 cp++;
4612 else
4613 continue;
4615 make_tag (varstart, cp - varstart, FALSE,
4616 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4619 free (package);
4624 * Python support
4625 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4626 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4627 * More ideas by seb bacon <seb@jamkit.com> (2002)
4629 static void
4630 Python_functions (inf)
4631 FILE *inf;
4633 register char *cp;
4635 LOOP_ON_INPUT_LINES (inf, lb, cp)
4637 cp = skip_spaces (cp);
4638 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4640 char *name = cp;
4641 while (!notinname (*cp) && *cp != ':')
4642 cp++;
4643 make_tag (name, cp - name, TRUE,
4644 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4651 * PHP support
4652 * Look for:
4653 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4654 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4655 * - /^[ \t]*define\(\"[^\"]+/
4656 * Only with --members:
4657 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4658 * Idea by Diez B. Roggisch (2001)
4660 static void
4661 PHP_functions (inf)
4662 FILE *inf;
4664 register char *cp, *name;
4665 bool search_identifier = FALSE;
4667 LOOP_ON_INPUT_LINES (inf, lb, cp)
4669 cp = skip_spaces (cp);
4670 name = cp;
4671 if (search_identifier
4672 && *cp != '\0')
4674 while (!notinname (*cp))
4675 cp++;
4676 make_tag (name, cp - name, TRUE,
4677 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4678 search_identifier = FALSE;
4680 else if (LOOKING_AT (cp, "function"))
4682 if(*cp == '&')
4683 cp = skip_spaces (cp+1);
4684 if(*cp != '\0')
4686 name = cp;
4687 while (!notinname (*cp))
4688 cp++;
4689 make_tag (name, cp - name, TRUE,
4690 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4692 else
4693 search_identifier = TRUE;
4695 else if (LOOKING_AT (cp, "class"))
4697 if (*cp != '\0')
4699 name = cp;
4700 while (*cp != '\0' && !iswhite (*cp))
4701 cp++;
4702 make_tag (name, cp - name, FALSE,
4703 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4705 else
4706 search_identifier = TRUE;
4708 else if (strneq (cp, "define", 6)
4709 && (cp = skip_spaces (cp+6))
4710 && *cp++ == '('
4711 && (*cp == '"' || *cp == '\''))
4713 char quote = *cp++;
4714 name = cp;
4715 while (*cp != quote && *cp != '\0')
4716 cp++;
4717 make_tag (name, cp - name, FALSE,
4718 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4720 else if (members
4721 && LOOKING_AT (cp, "var")
4722 && *cp == '$')
4724 name = cp;
4725 while (!notinname(*cp))
4726 cp++;
4727 make_tag (name, cp - name, FALSE,
4728 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4735 * Cobol tag functions
4736 * We could look for anything that could be a paragraph name.
4737 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4738 * Idea by Corny de Souza (1993)
4740 static void
4741 Cobol_paragraphs (inf)
4742 FILE *inf;
4744 register char *bp, *ep;
4746 LOOP_ON_INPUT_LINES (inf, lb, bp)
4748 if (lb.len < 9)
4749 continue;
4750 bp += 8;
4752 /* If eoln, compiler option or comment ignore whole line. */
4753 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4754 continue;
4756 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4757 continue;
4758 if (*ep++ == '.')
4759 make_tag (bp, ep - bp, TRUE,
4760 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4766 * Makefile support
4767 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4769 static void
4770 Makefile_targets (inf)
4771 FILE *inf;
4773 register char *bp;
4775 LOOP_ON_INPUT_LINES (inf, lb, bp)
4777 if (*bp == '\t' || *bp == '#')
4778 continue;
4779 while (*bp != '\0' && *bp != '=' && *bp != ':')
4780 bp++;
4781 if (*bp == ':' || (globals && *bp == '='))
4783 /* We should detect if there is more than one tag, but we do not.
4784 We just skip initial and final spaces. */
4785 char * namestart = skip_spaces (lb.buffer);
4786 while (--bp > namestart)
4787 if (!notinname (*bp))
4788 break;
4789 make_tag (namestart, bp - namestart + 1, TRUE,
4790 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4797 * Pascal parsing
4798 * Original code by Mosur K. Mohan (1989)
4800 * Locates tags for procedures & functions. Doesn't do any type- or
4801 * var-definitions. It does look for the keyword "extern" or
4802 * "forward" immediately following the procedure statement; if found,
4803 * the tag is skipped.
4805 static void
4806 Pascal_functions (inf)
4807 FILE *inf;
4809 linebuffer tline; /* mostly copied from C_entries */
4810 long save_lcno;
4811 int save_lineno, namelen, taglen;
4812 char c, *name;
4814 bool /* each of these flags is TRUE if: */
4815 incomment, /* point is inside a comment */
4816 inquote, /* point is inside '..' string */
4817 get_tagname, /* point is after PROCEDURE/FUNCTION
4818 keyword, so next item = potential tag */
4819 found_tag, /* point is after a potential tag */
4820 inparms, /* point is within parameter-list */
4821 verify_tag; /* point has passed the parm-list, so the
4822 next token will determine whether this
4823 is a FORWARD/EXTERN to be ignored, or
4824 whether it is a real tag */
4826 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4827 name = NULL; /* keep compiler quiet */
4828 dbp = lb.buffer;
4829 *dbp = '\0';
4830 linebuffer_init (&tline);
4832 incomment = inquote = FALSE;
4833 found_tag = FALSE; /* have a proc name; check if extern */
4834 get_tagname = FALSE; /* found "procedure" keyword */
4835 inparms = FALSE; /* found '(' after "proc" */
4836 verify_tag = FALSE; /* check if "extern" is ahead */
4839 while (!feof (inf)) /* long main loop to get next char */
4841 c = *dbp++;
4842 if (c == '\0') /* if end of line */
4844 readline (&lb, inf);
4845 dbp = lb.buffer;
4846 if (*dbp == '\0')
4847 continue;
4848 if (!((found_tag && verify_tag)
4849 || get_tagname))
4850 c = *dbp++; /* only if don't need *dbp pointing
4851 to the beginning of the name of
4852 the procedure or function */
4854 if (incomment)
4856 if (c == '}') /* within { } comments */
4857 incomment = FALSE;
4858 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4860 dbp++;
4861 incomment = FALSE;
4863 continue;
4865 else if (inquote)
4867 if (c == '\'')
4868 inquote = FALSE;
4869 continue;
4871 else
4872 switch (c)
4874 case '\'':
4875 inquote = TRUE; /* found first quote */
4876 continue;
4877 case '{': /* found open { comment */
4878 incomment = TRUE;
4879 continue;
4880 case '(':
4881 if (*dbp == '*') /* found open (* comment */
4883 incomment = TRUE;
4884 dbp++;
4886 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4887 inparms = TRUE;
4888 continue;
4889 case ')': /* end of parms list */
4890 if (inparms)
4891 inparms = FALSE;
4892 continue;
4893 case ';':
4894 if (found_tag && !inparms) /* end of proc or fn stmt */
4896 verify_tag = TRUE;
4897 break;
4899 continue;
4901 if (found_tag && verify_tag && (*dbp != ' '))
4903 /* Check if this is an "extern" declaration. */
4904 if (*dbp == '\0')
4905 continue;
4906 if (lowcase (*dbp == 'e'))
4908 if (nocase_tail ("extern")) /* superfluous, really! */
4910 found_tag = FALSE;
4911 verify_tag = FALSE;
4914 else if (lowcase (*dbp) == 'f')
4916 if (nocase_tail ("forward")) /* check for forward reference */
4918 found_tag = FALSE;
4919 verify_tag = FALSE;
4922 if (found_tag && verify_tag) /* not external proc, so make tag */
4924 found_tag = FALSE;
4925 verify_tag = FALSE;
4926 make_tag (name, namelen, TRUE,
4927 tline.buffer, taglen, save_lineno, save_lcno);
4928 continue;
4931 if (get_tagname) /* grab name of proc or fn */
4933 char *cp;
4935 if (*dbp == '\0')
4936 continue;
4938 /* Find block name. */
4939 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4940 continue;
4942 /* Save all values for later tagging. */
4943 linebuffer_setlen (&tline, lb.len);
4944 strcpy (tline.buffer, lb.buffer);
4945 save_lineno = lineno;
4946 save_lcno = linecharno;
4947 name = tline.buffer + (dbp - lb.buffer);
4948 namelen = cp - dbp;
4949 taglen = cp - lb.buffer + 1;
4951 dbp = cp; /* set dbp to e-o-token */
4952 get_tagname = FALSE;
4953 found_tag = TRUE;
4954 continue;
4956 /* And proceed to check for "extern". */
4958 else if (!incomment && !inquote && !found_tag)
4960 /* Check for proc/fn keywords. */
4961 switch (lowcase (c))
4963 case 'p':
4964 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4965 get_tagname = TRUE;
4966 continue;
4967 case 'f':
4968 if (nocase_tail ("unction"))
4969 get_tagname = TRUE;
4970 continue;
4973 } /* while not eof */
4975 free (tline.buffer);
4980 * Lisp tag functions
4981 * look for (def or (DEF, quote or QUOTE
4984 static void L_getit __P((void));
4986 static void
4987 L_getit ()
4989 if (*dbp == '\'') /* Skip prefix quote */
4990 dbp++;
4991 else if (*dbp == '(')
4993 dbp++;
4994 /* Try to skip "(quote " */
4995 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4996 /* Ok, then skip "(" before name in (defstruct (foo)) */
4997 dbp = skip_spaces (dbp);
4999 get_tag (dbp, NULL);
5002 static void
5003 Lisp_functions (inf)
5004 FILE *inf;
5006 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5008 if (dbp[0] != '(')
5009 continue;
5011 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5013 dbp = skip_non_spaces (dbp);
5014 dbp = skip_spaces (dbp);
5015 L_getit ();
5017 else
5019 /* Check for (foo::defmumble name-defined ... */
5021 dbp++;
5022 while (!notinname (*dbp) && *dbp != ':');
5023 if (*dbp == ':')
5026 dbp++;
5027 while (*dbp == ':');
5029 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5031 dbp = skip_non_spaces (dbp);
5032 dbp = skip_spaces (dbp);
5033 L_getit ();
5042 * Lua script language parsing
5043 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5045 * "function" and "local function" are tags if they start at column 1.
5047 static void
5048 Lua_functions (inf)
5049 FILE *inf;
5051 register char *bp;
5053 LOOP_ON_INPUT_LINES (inf, lb, bp)
5055 if (bp[0] != 'f' && bp[0] != 'l')
5056 continue;
5058 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5060 if (LOOKING_AT (bp, "function"))
5061 get_tag (bp, NULL);
5067 * Postscript tags
5068 * Just look for lines where the first character is '/'
5069 * Also look at "defineps" for PSWrap
5070 * Ideas by:
5071 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5072 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5074 static void
5075 PS_functions (inf)
5076 FILE *inf;
5078 register char *bp, *ep;
5080 LOOP_ON_INPUT_LINES (inf, lb, bp)
5082 if (bp[0] == '/')
5084 for (ep = bp+1;
5085 *ep != '\0' && *ep != ' ' && *ep != '{';
5086 ep++)
5087 continue;
5088 make_tag (bp, ep - bp, TRUE,
5089 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5091 else if (LOOKING_AT (bp, "defineps"))
5092 get_tag (bp, NULL);
5098 * Forth tags
5099 * Ignore anything after \ followed by space or in ( )
5100 * Look for words defined by :
5101 * Look for constant, code, create, defer, value, and variable
5102 * OBP extensions: Look for buffer:, field,
5103 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5105 static void
5106 Forth_words (inf)
5107 FILE *inf;
5109 register char *bp;
5111 LOOP_ON_INPUT_LINES (inf, lb, bp)
5112 while ((bp = skip_spaces (bp))[0] != '\0')
5113 if (bp[0] == '\\' && iswhite(bp[1]))
5114 break; /* read next line */
5115 else if (bp[0] == '(' && iswhite(bp[1]))
5116 do /* skip to ) or eol */
5117 bp++;
5118 while (*bp != ')' && *bp != '\0');
5119 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5120 || LOOKING_AT_NOCASE (bp, "constant")
5121 || LOOKING_AT_NOCASE (bp, "code")
5122 || LOOKING_AT_NOCASE (bp, "create")
5123 || LOOKING_AT_NOCASE (bp, "defer")
5124 || LOOKING_AT_NOCASE (bp, "value")
5125 || LOOKING_AT_NOCASE (bp, "variable")
5126 || LOOKING_AT_NOCASE (bp, "buffer:")
5127 || LOOKING_AT_NOCASE (bp, "field"))
5128 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5129 else
5130 bp = skip_non_spaces (bp);
5135 * Scheme tag functions
5136 * look for (def... xyzzy
5137 * (def... (xyzzy
5138 * (def ... ((...(xyzzy ....
5139 * (set! xyzzy
5140 * Original code by Ken Haase (1985?)
5142 static void
5143 Scheme_functions (inf)
5144 FILE *inf;
5146 register char *bp;
5148 LOOP_ON_INPUT_LINES (inf, lb, bp)
5150 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5152 bp = skip_non_spaces (bp+4);
5153 /* Skip over open parens and white space */
5154 while (notinname (*bp))
5155 bp++;
5156 get_tag (bp, NULL);
5158 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5159 get_tag (bp, NULL);
5164 /* Find tags in TeX and LaTeX input files. */
5166 /* TEX_toktab is a table of TeX control sequences that define tags.
5167 * Each entry records one such control sequence.
5169 * Original code from who knows whom.
5170 * Ideas by:
5171 * Stefan Monnier (2002)
5174 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5176 /* Default set of control sequences to put into TEX_toktab.
5177 The value of environment var TEXTAGS is prepended to this. */
5178 static char *TEX_defenv = "\
5179 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5180 :part:appendix:entry:index:def\
5181 :newcommand:renewcommand:newenvironment:renewenvironment";
5183 static void TEX_mode __P((FILE *));
5184 static void TEX_decode_env __P((char *, char *));
5186 static char TEX_esc = '\\';
5187 static char TEX_opgrp = '{';
5188 static char TEX_clgrp = '}';
5191 * TeX/LaTeX scanning loop.
5193 static void
5194 TeX_commands (inf)
5195 FILE *inf;
5197 char *cp;
5198 linebuffer *key;
5200 /* Select either \ or ! as escape character. */
5201 TEX_mode (inf);
5203 /* Initialize token table once from environment. */
5204 if (TEX_toktab == NULL)
5205 TEX_decode_env ("TEXTAGS", TEX_defenv);
5207 LOOP_ON_INPUT_LINES (inf, lb, cp)
5209 /* Look at each TEX keyword in line. */
5210 for (;;)
5212 /* Look for a TEX escape. */
5213 while (*cp++ != TEX_esc)
5214 if (cp[-1] == '\0' || cp[-1] == '%')
5215 goto tex_next_line;
5217 for (key = TEX_toktab; key->buffer != NULL; key++)
5218 if (strneq (cp, key->buffer, key->len))
5220 register char *p;
5221 int namelen, linelen;
5222 bool opgrp = FALSE;
5224 cp = skip_spaces (cp + key->len);
5225 if (*cp == TEX_opgrp)
5227 opgrp = TRUE;
5228 cp++;
5230 for (p = cp;
5231 (!iswhite (*p) && *p != '#' &&
5232 *p != TEX_opgrp && *p != TEX_clgrp);
5233 p++)
5234 continue;
5235 namelen = p - cp;
5236 linelen = lb.len;
5237 if (!opgrp || *p == TEX_clgrp)
5239 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5240 p++;
5241 linelen = p - lb.buffer + 1;
5243 make_tag (cp, namelen, TRUE,
5244 lb.buffer, linelen, lineno, linecharno);
5245 goto tex_next_line; /* We only tag a line once */
5248 tex_next_line:
5253 #define TEX_LESC '\\'
5254 #define TEX_SESC '!'
5256 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5257 chars accordingly. */
5258 static void
5259 TEX_mode (inf)
5260 FILE *inf;
5262 int c;
5264 while ((c = getc (inf)) != EOF)
5266 /* Skip to next line if we hit the TeX comment char. */
5267 if (c == '%')
5268 while (c != '\n' && c != EOF)
5269 c = getc (inf);
5270 else if (c == TEX_LESC || c == TEX_SESC )
5271 break;
5274 if (c == TEX_LESC)
5276 TEX_esc = TEX_LESC;
5277 TEX_opgrp = '{';
5278 TEX_clgrp = '}';
5280 else
5282 TEX_esc = TEX_SESC;
5283 TEX_opgrp = '<';
5284 TEX_clgrp = '>';
5286 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5287 No attempt is made to correct the situation. */
5288 rewind (inf);
5291 /* Read environment and prepend it to the default string.
5292 Build token table. */
5293 static void
5294 TEX_decode_env (evarname, defenv)
5295 char *evarname;
5296 char *defenv;
5298 register char *env, *p;
5299 int i, len;
5301 /* Append default string to environment. */
5302 env = getenv (evarname);
5303 if (!env)
5304 env = defenv;
5305 else
5307 char *oldenv = env;
5308 env = concat (oldenv, defenv, "");
5311 /* Allocate a token table */
5312 for (len = 1, p = env; p;)
5313 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5314 len++;
5315 TEX_toktab = xnew (len, linebuffer);
5317 /* Unpack environment string into token table. Be careful about */
5318 /* zero-length strings (leading ':', "::" and trailing ':') */
5319 for (i = 0; *env != '\0';)
5321 p = etags_strchr (env, ':');
5322 if (!p) /* End of environment string. */
5323 p = env + strlen (env);
5324 if (p - env > 0)
5325 { /* Only non-zero strings. */
5326 TEX_toktab[i].buffer = savenstr (env, p - env);
5327 TEX_toktab[i].len = p - env;
5328 i++;
5330 if (*p)
5331 env = p + 1;
5332 else
5334 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5335 TEX_toktab[i].len = 0;
5336 break;
5342 /* Texinfo support. Dave Love, Mar. 2000. */
5343 static void
5344 Texinfo_nodes (inf)
5345 FILE * inf;
5347 char *cp, *start;
5348 LOOP_ON_INPUT_LINES (inf, lb, cp)
5349 if (LOOKING_AT (cp, "@node"))
5351 start = cp;
5352 while (*cp != '\0' && *cp != ',')
5353 cp++;
5354 make_tag (start, cp - start, TRUE,
5355 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5361 * HTML support.
5362 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5363 * Contents of <a name=xxx> are tags with name xxx.
5365 * Francesco Potortì, 2002.
5367 static void
5368 HTML_labels (inf)
5369 FILE * inf;
5371 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5372 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5373 bool intag = FALSE; /* inside an html tag, looking for ID= */
5374 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5375 char *end;
5378 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5380 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5381 for (;;) /* loop on the same line */
5383 if (skiptag) /* skip HTML tag */
5385 while (*dbp != '\0' && *dbp != '>')
5386 dbp++;
5387 if (*dbp == '>')
5389 dbp += 1;
5390 skiptag = FALSE;
5391 continue; /* look on the same line */
5393 break; /* go to next line */
5396 else if (intag) /* look for "name=" or "id=" */
5398 while (*dbp != '\0' && *dbp != '>'
5399 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5400 dbp++;
5401 if (*dbp == '\0')
5402 break; /* go to next line */
5403 if (*dbp == '>')
5405 dbp += 1;
5406 intag = FALSE;
5407 continue; /* look on the same line */
5409 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5410 || LOOKING_AT_NOCASE (dbp, "id="))
5412 bool quoted = (dbp[0] == '"');
5414 if (quoted)
5415 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5416 continue;
5417 else
5418 for (end = dbp; *end != '\0' && intoken (*end); end++)
5419 continue;
5420 linebuffer_setlen (&token_name, end - dbp);
5421 strncpy (token_name.buffer, dbp, end - dbp);
5422 token_name.buffer[end - dbp] = '\0';
5424 dbp = end;
5425 intag = FALSE; /* we found what we looked for */
5426 skiptag = TRUE; /* skip to the end of the tag */
5427 getnext = TRUE; /* then grab the text */
5428 continue; /* look on the same line */
5430 dbp += 1;
5433 else if (getnext) /* grab next tokens and tag them */
5435 dbp = skip_spaces (dbp);
5436 if (*dbp == '\0')
5437 break; /* go to next line */
5438 if (*dbp == '<')
5440 intag = TRUE;
5441 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5442 continue; /* look on the same line */
5445 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5446 continue;
5447 make_tag (token_name.buffer, token_name.len, TRUE,
5448 dbp, end - dbp, lineno, linecharno);
5449 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5450 getnext = FALSE;
5451 break; /* go to next line */
5454 else /* look for an interesting HTML tag */
5456 while (*dbp != '\0' && *dbp != '<')
5457 dbp++;
5458 if (*dbp == '\0')
5459 break; /* go to next line */
5460 intag = TRUE;
5461 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5463 inanchor = TRUE;
5464 continue; /* look on the same line */
5466 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5467 || LOOKING_AT_NOCASE (dbp, "<h1>")
5468 || LOOKING_AT_NOCASE (dbp, "<h2>")
5469 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5471 intag = FALSE;
5472 getnext = TRUE;
5473 continue; /* look on the same line */
5475 dbp += 1;
5482 * Prolog support
5484 * Assumes that the predicate or rule starts at column 0.
5485 * Only the first clause of a predicate or rule is added.
5486 * Original code by Sunichirou Sugou (1989)
5487 * Rewritten by Anders Lindgren (1996)
5489 static int prolog_pr __P((char *, char *));
5490 static void prolog_skip_comment __P((linebuffer *, FILE *));
5491 static int prolog_atom __P((char *, int));
5493 static void
5494 Prolog_functions (inf)
5495 FILE *inf;
5497 char *cp, *last;
5498 int len;
5499 int allocated;
5501 allocated = 0;
5502 len = 0;
5503 last = NULL;
5505 LOOP_ON_INPUT_LINES (inf, lb, cp)
5507 if (cp[0] == '\0') /* Empty line */
5508 continue;
5509 else if (iswhite (cp[0])) /* Not a predicate */
5510 continue;
5511 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5512 prolog_skip_comment (&lb, inf);
5513 else if ((len = prolog_pr (cp, last)) > 0)
5515 /* Predicate or rule. Store the function name so that we
5516 only generate a tag for the first clause. */
5517 if (last == NULL)
5518 last = xnew(len + 1, char);
5519 else if (len + 1 > allocated)
5520 xrnew (last, len + 1, char);
5521 allocated = len + 1;
5522 strncpy (last, cp, len);
5523 last[len] = '\0';
5526 if (last != NULL)
5527 free (last);
5531 static void
5532 prolog_skip_comment (plb, inf)
5533 linebuffer *plb;
5534 FILE *inf;
5536 char *cp;
5540 for (cp = plb->buffer; *cp != '\0'; cp++)
5541 if (cp[0] == '*' && cp[1] == '/')
5542 return;
5543 readline (plb, inf);
5545 while (!feof(inf));
5549 * A predicate or rule definition is added if it matches:
5550 * <beginning of line><Prolog Atom><whitespace>(
5551 * or <beginning of line><Prolog Atom><whitespace>:-
5553 * It is added to the tags database if it doesn't match the
5554 * name of the previous clause header.
5556 * Return the size of the name of the predicate or rule, or 0 if no
5557 * header was found.
5559 static int
5560 prolog_pr (s, last)
5561 char *s;
5562 char *last; /* Name of last clause. */
5564 int pos;
5565 int len;
5567 pos = prolog_atom (s, 0);
5568 if (pos < 1)
5569 return 0;
5571 len = pos;
5572 pos = skip_spaces (s + pos) - s;
5574 if ((s[pos] == '.'
5575 || (s[pos] == '(' && (pos += 1))
5576 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5577 && (last == NULL /* save only the first clause */
5578 || len != (int)strlen (last)
5579 || !strneq (s, last, len)))
5581 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5582 return len;
5584 else
5585 return 0;
5589 * Consume a Prolog atom.
5590 * Return the number of bytes consumed, or -1 if there was an error.
5592 * A prolog atom, in this context, could be one of:
5593 * - An alphanumeric sequence, starting with a lower case letter.
5594 * - A quoted arbitrary string. Single quotes can escape themselves.
5595 * Backslash quotes everything.
5597 static int
5598 prolog_atom (s, pos)
5599 char *s;
5600 int pos;
5602 int origpos;
5604 origpos = pos;
5606 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5608 /* The atom is unquoted. */
5609 pos++;
5610 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5612 pos++;
5614 return pos - origpos;
5616 else if (s[pos] == '\'')
5618 pos++;
5620 for (;;)
5622 if (s[pos] == '\'')
5624 pos++;
5625 if (s[pos] != '\'')
5626 break;
5627 pos++; /* A double quote */
5629 else if (s[pos] == '\0')
5630 /* Multiline quoted atoms are ignored. */
5631 return -1;
5632 else if (s[pos] == '\\')
5634 if (s[pos+1] == '\0')
5635 return -1;
5636 pos += 2;
5638 else
5639 pos++;
5641 return pos - origpos;
5643 else
5644 return -1;
5649 * Support for Erlang
5651 * Generates tags for functions, defines, and records.
5652 * Assumes that Erlang functions start at column 0.
5653 * Original code by Anders Lindgren (1996)
5655 static int erlang_func __P((char *, char *));
5656 static void erlang_attribute __P((char *));
5657 static int erlang_atom __P((char *));
5659 static void
5660 Erlang_functions (inf)
5661 FILE *inf;
5663 char *cp, *last;
5664 int len;
5665 int allocated;
5667 allocated = 0;
5668 len = 0;
5669 last = NULL;
5671 LOOP_ON_INPUT_LINES (inf, lb, cp)
5673 if (cp[0] == '\0') /* Empty line */
5674 continue;
5675 else if (iswhite (cp[0])) /* Not function nor attribute */
5676 continue;
5677 else if (cp[0] == '%') /* comment */
5678 continue;
5679 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5680 continue;
5681 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5683 erlang_attribute (cp);
5684 if (last != NULL)
5686 free (last);
5687 last = NULL;
5690 else if ((len = erlang_func (cp, last)) > 0)
5693 * Function. Store the function name so that we only
5694 * generates a tag for the first clause.
5696 if (last == NULL)
5697 last = xnew (len + 1, char);
5698 else if (len + 1 > allocated)
5699 xrnew (last, len + 1, char);
5700 allocated = len + 1;
5701 strncpy (last, cp, len);
5702 last[len] = '\0';
5705 if (last != NULL)
5706 free (last);
5711 * A function definition is added if it matches:
5712 * <beginning of line><Erlang Atom><whitespace>(
5714 * It is added to the tags database if it doesn't match the
5715 * name of the previous clause header.
5717 * Return the size of the name of the function, or 0 if no function
5718 * was found.
5720 static int
5721 erlang_func (s, last)
5722 char *s;
5723 char *last; /* Name of last clause. */
5725 int pos;
5726 int len;
5728 pos = erlang_atom (s);
5729 if (pos < 1)
5730 return 0;
5732 len = pos;
5733 pos = skip_spaces (s + pos) - s;
5735 /* Save only the first clause. */
5736 if (s[pos++] == '('
5737 && (last == NULL
5738 || len != (int)strlen (last)
5739 || !strneq (s, last, len)))
5741 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5742 return len;
5745 return 0;
5750 * Handle attributes. Currently, tags are generated for defines
5751 * and records.
5753 * They are on the form:
5754 * -define(foo, bar).
5755 * -define(Foo(M, N), M+N).
5756 * -record(graph, {vtab = notable, cyclic = true}).
5758 static void
5759 erlang_attribute (s)
5760 char *s;
5762 char *cp = s;
5764 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5765 && *cp++ == '(')
5767 int len = erlang_atom (skip_spaces (cp));
5768 if (len > 0)
5769 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5771 return;
5776 * Consume an Erlang atom (or variable).
5777 * Return the number of bytes consumed, or -1 if there was an error.
5779 static int
5780 erlang_atom (s)
5781 char *s;
5783 int pos = 0;
5785 if (ISALPHA (s[pos]) || s[pos] == '_')
5787 /* The atom is unquoted. */
5789 pos++;
5790 while (ISALNUM (s[pos]) || s[pos] == '_');
5792 else if (s[pos] == '\'')
5794 for (pos++; s[pos] != '\''; pos++)
5795 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5796 || (s[pos] == '\\' && s[++pos] == '\0'))
5797 return 0;
5798 pos++;
5801 return pos;
5805 static char *scan_separators __P((char *));
5806 static void add_regex __P((char *, language *));
5807 static char *substitute __P((char *, char *, struct re_registers *));
5810 * Take a string like "/blah/" and turn it into "blah", verifying
5811 * that the first and last characters are the same, and handling
5812 * quoted separator characters. Actually, stops on the occurrence of
5813 * an unquoted separator. Also process \t, \n, etc. and turn into
5814 * appropriate characters. Works in place. Null terminates name string.
5815 * Returns pointer to terminating separator, or NULL for
5816 * unterminated regexps.
5818 static char *
5819 scan_separators (name)
5820 char *name;
5822 char sep = name[0];
5823 char *copyto = name;
5824 bool quoted = FALSE;
5826 for (++name; *name != '\0'; ++name)
5828 if (quoted)
5830 switch (*name)
5832 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5833 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5834 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5835 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5836 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5837 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5838 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5839 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5840 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5841 default:
5842 if (*name == sep)
5843 *copyto++ = sep;
5844 else
5846 /* Something else is quoted, so preserve the quote. */
5847 *copyto++ = '\\';
5848 *copyto++ = *name;
5850 break;
5852 quoted = FALSE;
5854 else if (*name == '\\')
5855 quoted = TRUE;
5856 else if (*name == sep)
5857 break;
5858 else
5859 *copyto++ = *name;
5861 if (*name != sep)
5862 name = NULL; /* signal unterminated regexp */
5864 /* Terminate copied string. */
5865 *copyto = '\0';
5866 return name;
5869 /* Look at the argument of --regex or --no-regex and do the right
5870 thing. Same for each line of a regexp file. */
5871 static void
5872 analyse_regex (regex_arg)
5873 char *regex_arg;
5875 if (regex_arg == NULL)
5877 free_regexps (); /* --no-regex: remove existing regexps */
5878 return;
5881 /* A real --regexp option or a line in a regexp file. */
5882 switch (regex_arg[0])
5884 /* Comments in regexp file or null arg to --regex. */
5885 case '\0':
5886 case ' ':
5887 case '\t':
5888 break;
5890 /* Read a regex file. This is recursive and may result in a
5891 loop, which will stop when the file descriptors are exhausted. */
5892 case '@':
5894 FILE *regexfp;
5895 linebuffer regexbuf;
5896 char *regexfile = regex_arg + 1;
5898 /* regexfile is a file containing regexps, one per line. */
5899 regexfp = fopen (regexfile, "r");
5900 if (regexfp == NULL)
5902 pfatal (regexfile);
5903 return;
5905 linebuffer_init (&regexbuf);
5906 while (readline_internal (&regexbuf, regexfp) > 0)
5907 analyse_regex (regexbuf.buffer);
5908 free (regexbuf.buffer);
5909 fclose (regexfp);
5911 break;
5913 /* Regexp to be used for a specific language only. */
5914 case '{':
5916 language *lang;
5917 char *lang_name = regex_arg + 1;
5918 char *cp;
5920 for (cp = lang_name; *cp != '}'; cp++)
5921 if (*cp == '\0')
5923 error ("unterminated language name in regex: %s", regex_arg);
5924 return;
5926 *cp++ = '\0';
5927 lang = get_language_from_langname (lang_name);
5928 if (lang == NULL)
5929 return;
5930 add_regex (cp, lang);
5932 break;
5934 /* Regexp to be used for any language. */
5935 default:
5936 add_regex (regex_arg, NULL);
5937 break;
5941 /* Separate the regexp pattern, compile it,
5942 and care for optional name and modifiers. */
5943 static void
5944 add_regex (regexp_pattern, lang)
5945 char *regexp_pattern;
5946 language *lang;
5948 static struct re_pattern_buffer zeropattern;
5949 char sep, *pat, *name, *modifiers;
5950 const char *err;
5951 struct re_pattern_buffer *patbuf;
5952 regexp *rp;
5953 bool
5954 force_explicit_name = TRUE, /* do not use implicit tag names */
5955 ignore_case = FALSE, /* case is significant */
5956 multi_line = FALSE, /* matches are done one line at a time */
5957 single_line = FALSE; /* dot does not match newline */
5960 if (strlen(regexp_pattern) < 3)
5962 error ("null regexp", (char *)NULL);
5963 return;
5965 sep = regexp_pattern[0];
5966 name = scan_separators (regexp_pattern);
5967 if (name == NULL)
5969 error ("%s: unterminated regexp", regexp_pattern);
5970 return;
5972 if (name[1] == sep)
5974 error ("null name for regexp \"%s\"", regexp_pattern);
5975 return;
5977 modifiers = scan_separators (name);
5978 if (modifiers == NULL) /* no terminating separator --> no name */
5980 modifiers = name;
5981 name = "";
5983 else
5984 modifiers += 1; /* skip separator */
5986 /* Parse regex modifiers. */
5987 for (; modifiers[0] != '\0'; modifiers++)
5988 switch (modifiers[0])
5990 case 'N':
5991 if (modifiers == name)
5992 error ("forcing explicit tag name but no name, ignoring", NULL);
5993 force_explicit_name = TRUE;
5994 break;
5995 case 'i':
5996 ignore_case = TRUE;
5997 break;
5998 case 's':
5999 single_line = TRUE;
6000 /* FALLTHRU */
6001 case 'm':
6002 multi_line = TRUE;
6003 need_filebuf = TRUE;
6004 break;
6005 default:
6007 char wrongmod [2];
6008 wrongmod[0] = modifiers[0];
6009 wrongmod[1] = '\0';
6010 error ("invalid regexp modifier `%s', ignoring", wrongmod);
6012 break;
6015 patbuf = xnew (1, struct re_pattern_buffer);
6016 *patbuf = zeropattern;
6017 if (ignore_case)
6019 static char lc_trans[CHARS];
6020 int i;
6021 for (i = 0; i < CHARS; i++)
6022 lc_trans[i] = lowcase (i);
6023 patbuf->translate = lc_trans; /* translation table to fold case */
6026 if (multi_line)
6027 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6028 else
6029 pat = regexp_pattern;
6031 if (single_line)
6032 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6033 else
6034 re_set_syntax (RE_SYNTAX_EMACS);
6036 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
6037 if (multi_line)
6038 free (pat);
6039 if (err != NULL)
6041 error ("%s while compiling pattern", err);
6042 return;
6045 rp = p_head;
6046 p_head = xnew (1, regexp);
6047 p_head->pattern = savestr (regexp_pattern);
6048 p_head->p_next = rp;
6049 p_head->lang = lang;
6050 p_head->pat = patbuf;
6051 p_head->name = savestr (name);
6052 p_head->error_signaled = FALSE;
6053 p_head->force_explicit_name = force_explicit_name;
6054 p_head->ignore_case = ignore_case;
6055 p_head->multi_line = multi_line;
6059 * Do the substitutions indicated by the regular expression and
6060 * arguments.
6062 static char *
6063 substitute (in, out, regs)
6064 char *in, *out;
6065 struct re_registers *regs;
6067 char *result, *t;
6068 int size, dig, diglen;
6070 result = NULL;
6071 size = strlen (out);
6073 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6074 if (out[size - 1] == '\\')
6075 fatal ("pattern error in \"%s\"", out);
6076 for (t = etags_strchr (out, '\\');
6077 t != NULL;
6078 t = etags_strchr (t + 2, '\\'))
6079 if (ISDIGIT (t[1]))
6081 dig = t[1] - '0';
6082 diglen = regs->end[dig] - regs->start[dig];
6083 size += diglen - 2;
6085 else
6086 size -= 1;
6088 /* Allocate space and do the substitutions. */
6089 assert (size >= 0);
6090 result = xnew (size + 1, char);
6092 for (t = result; *out != '\0'; out++)
6093 if (*out == '\\' && ISDIGIT (*++out))
6095 dig = *out - '0';
6096 diglen = regs->end[dig] - regs->start[dig];
6097 strncpy (t, in + regs->start[dig], diglen);
6098 t += diglen;
6100 else
6101 *t++ = *out;
6102 *t = '\0';
6104 assert (t <= result + size);
6105 assert (t - result == (int)strlen (result));
6107 return result;
6110 /* Deallocate all regexps. */
6111 static void
6112 free_regexps ()
6114 regexp *rp;
6115 while (p_head != NULL)
6117 rp = p_head->p_next;
6118 free (p_head->pattern);
6119 free (p_head->name);
6120 free (p_head);
6121 p_head = rp;
6123 return;
6127 * Reads the whole file as a single string from `filebuf' and looks for
6128 * multi-line regular expressions, creating tags on matches.
6129 * readline already dealt with normal regexps.
6131 * Idea by Ben Wing <ben@666.com> (2002).
6133 static void
6134 regex_tag_multiline ()
6136 char *buffer = filebuf.buffer;
6137 regexp *rp;
6138 char *name;
6140 for (rp = p_head; rp != NULL; rp = rp->p_next)
6142 int match = 0;
6144 if (!rp->multi_line)
6145 continue; /* skip normal regexps */
6147 /* Generic initialisations before parsing file from memory. */
6148 lineno = 1; /* reset global line number */
6149 charno = 0; /* reset global char number */
6150 linecharno = 0; /* reset global char number of line start */
6152 /* Only use generic regexps or those for the current language. */
6153 if (rp->lang != NULL && rp->lang != curfdp->lang)
6154 continue;
6156 while (match >= 0 && match < filebuf.len)
6158 match = re_search (rp->pat, buffer, filebuf.len, charno,
6159 filebuf.len - match, &rp->regs);
6160 switch (match)
6162 case -2:
6163 /* Some error. */
6164 if (!rp->error_signaled)
6166 error ("regexp stack overflow while matching \"%s\"",
6167 rp->pattern);
6168 rp->error_signaled = TRUE;
6170 break;
6171 case -1:
6172 /* No match. */
6173 break;
6174 default:
6175 if (match == rp->regs.end[0])
6177 if (!rp->error_signaled)
6179 error ("regexp matches the empty string: \"%s\"",
6180 rp->pattern);
6181 rp->error_signaled = TRUE;
6183 match = -3; /* exit from while loop */
6184 break;
6187 /* Match occurred. Construct a tag. */
6188 while (charno < rp->regs.end[0])
6189 if (buffer[charno++] == '\n')
6190 lineno++, linecharno = charno;
6191 name = rp->name;
6192 if (name[0] == '\0')
6193 name = NULL;
6194 else /* make a named tag */
6195 name = substitute (buffer, rp->name, &rp->regs);
6196 if (rp->force_explicit_name)
6197 /* Force explicit tag name, if a name is there. */
6198 pfnote (name, TRUE, buffer + linecharno,
6199 charno - linecharno + 1, lineno, linecharno);
6200 else
6201 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6202 charno - linecharno + 1, lineno, linecharno);
6203 break;
6210 static bool
6211 nocase_tail (cp)
6212 char *cp;
6214 register int len = 0;
6216 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6217 cp++, len++;
6218 if (*cp == '\0' && !intoken (dbp[len]))
6220 dbp += len;
6221 return TRUE;
6223 return FALSE;
6226 static void
6227 get_tag (bp, namepp)
6228 register char *bp;
6229 char **namepp;
6231 register char *cp = bp;
6233 if (*bp != '\0')
6235 /* Go till you get to white space or a syntactic break */
6236 for (cp = bp + 1; !notinname (*cp); cp++)
6237 continue;
6238 make_tag (bp, cp - bp, TRUE,
6239 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6242 if (namepp != NULL)
6243 *namepp = savenstr (bp, cp - bp);
6247 * Read a line of text from `stream' into `lbp', excluding the
6248 * newline or CR-NL, if any. Return the number of characters read from
6249 * `stream', which is the length of the line including the newline.
6251 * On DOS or Windows we do not count the CR character, if any before the
6252 * NL, in the returned length; this mirrors the behavior of Emacs on those
6253 * platforms (for text files, it translates CR-NL to NL as it reads in the
6254 * file).
6256 * If multi-line regular expressions are requested, each line read is
6257 * appended to `filebuf'.
6259 static long
6260 readline_internal (lbp, stream)
6261 linebuffer *lbp;
6262 register FILE *stream;
6264 char *buffer = lbp->buffer;
6265 register char *p = lbp->buffer;
6266 register char *pend;
6267 int chars_deleted;
6269 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6271 for (;;)
6273 register int c = getc (stream);
6274 if (p == pend)
6276 /* We're at the end of linebuffer: expand it. */
6277 lbp->size *= 2;
6278 xrnew (buffer, lbp->size, char);
6279 p += buffer - lbp->buffer;
6280 pend = buffer + lbp->size;
6281 lbp->buffer = buffer;
6283 if (c == EOF)
6285 *p = '\0';
6286 chars_deleted = 0;
6287 break;
6289 if (c == '\n')
6291 if (p > buffer && p[-1] == '\r')
6293 p -= 1;
6294 #ifdef DOS_NT
6295 /* Assume CRLF->LF translation will be performed by Emacs
6296 when loading this file, so CRs won't appear in the buffer.
6297 It would be cleaner to compensate within Emacs;
6298 however, Emacs does not know how many CRs were deleted
6299 before any given point in the file. */
6300 chars_deleted = 1;
6301 #else
6302 chars_deleted = 2;
6303 #endif
6305 else
6307 chars_deleted = 1;
6309 *p = '\0';
6310 break;
6312 *p++ = c;
6314 lbp->len = p - buffer;
6316 if (need_filebuf /* we need filebuf for multi-line regexps */
6317 && chars_deleted > 0) /* not at EOF */
6319 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6321 /* Expand filebuf. */
6322 filebuf.size *= 2;
6323 xrnew (filebuf.buffer, filebuf.size, char);
6325 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6326 filebuf.len += lbp->len;
6327 filebuf.buffer[filebuf.len++] = '\n';
6328 filebuf.buffer[filebuf.len] = '\0';
6331 return lbp->len + chars_deleted;
6335 * Like readline_internal, above, but in addition try to match the
6336 * input line against relevant regular expressions and manage #line
6337 * directives.
6339 static void
6340 readline (lbp, stream)
6341 linebuffer *lbp;
6342 FILE *stream;
6344 long result;
6346 linecharno = charno; /* update global char number of line start */
6347 result = readline_internal (lbp, stream); /* read line */
6348 lineno += 1; /* increment global line number */
6349 charno += result; /* increment global char number */
6351 /* Honour #line directives. */
6352 if (!no_line_directive)
6354 static bool discard_until_line_directive;
6356 /* Check whether this is a #line directive. */
6357 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6359 unsigned int lno;
6360 int start = 0;
6362 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6363 && start > 0) /* double quote character found */
6365 char *endp = lbp->buffer + start;
6367 while ((endp = etags_strchr (endp, '"')) != NULL
6368 && endp[-1] == '\\')
6369 endp++;
6370 if (endp != NULL)
6371 /* Ok, this is a real #line directive. Let's deal with it. */
6373 char *taggedabsname; /* absolute name of original file */
6374 char *taggedfname; /* name of original file as given */
6375 char *name; /* temp var */
6377 discard_until_line_directive = FALSE; /* found it */
6378 name = lbp->buffer + start;
6379 *endp = '\0';
6380 canonicalize_filename (name); /* for DOS */
6381 taggedabsname = absolute_filename (name, tagfiledir);
6382 if (filename_is_absolute (name)
6383 || filename_is_absolute (curfdp->infname))
6384 taggedfname = savestr (taggedabsname);
6385 else
6386 taggedfname = relative_filename (taggedabsname,tagfiledir);
6388 if (streq (curfdp->taggedfname, taggedfname))
6389 /* The #line directive is only a line number change. We
6390 deal with this afterwards. */
6391 free (taggedfname);
6392 else
6393 /* The tags following this #line directive should be
6394 attributed to taggedfname. In order to do this, set
6395 curfdp accordingly. */
6397 fdesc *fdp; /* file description pointer */
6399 /* Go look for a file description already set up for the
6400 file indicated in the #line directive. If there is
6401 one, use it from now until the next #line
6402 directive. */
6403 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6404 if (streq (fdp->infname, curfdp->infname)
6405 && streq (fdp->taggedfname, taggedfname))
6406 /* If we remove the second test above (after the &&)
6407 then all entries pertaining to the same file are
6408 coalesced in the tags file. If we use it, then
6409 entries pertaining to the same file but generated
6410 from different files (via #line directives) will
6411 go into separate sections in the tags file. These
6412 alternatives look equivalent. The first one
6413 destroys some apparently useless information. */
6415 curfdp = fdp;
6416 free (taggedfname);
6417 break;
6419 /* Else, if we already tagged the real file, skip all
6420 input lines until the next #line directive. */
6421 if (fdp == NULL) /* not found */
6422 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6423 if (streq (fdp->infabsname, taggedabsname))
6425 discard_until_line_directive = TRUE;
6426 free (taggedfname);
6427 break;
6429 /* Else create a new file description and use that from
6430 now on, until the next #line directive. */
6431 if (fdp == NULL) /* not found */
6433 fdp = fdhead;
6434 fdhead = xnew (1, fdesc);
6435 *fdhead = *curfdp; /* copy curr. file description */
6436 fdhead->next = fdp;
6437 fdhead->infname = savestr (curfdp->infname);
6438 fdhead->infabsname = savestr (curfdp->infabsname);
6439 fdhead->infabsdir = savestr (curfdp->infabsdir);
6440 fdhead->taggedfname = taggedfname;
6441 fdhead->usecharno = FALSE;
6442 fdhead->prop = NULL;
6443 fdhead->written = FALSE;
6444 curfdp = fdhead;
6447 free (taggedabsname);
6448 lineno = lno - 1;
6449 readline (lbp, stream);
6450 return;
6451 } /* if a real #line directive */
6452 } /* if #line is followed by a a number */
6453 } /* if line begins with "#line " */
6455 /* If we are here, no #line directive was found. */
6456 if (discard_until_line_directive)
6458 if (result > 0)
6460 /* Do a tail recursion on ourselves, thus discarding the contents
6461 of the line buffer. */
6462 readline (lbp, stream);
6463 return;
6465 /* End of file. */
6466 discard_until_line_directive = FALSE;
6467 return;
6469 } /* if #line directives should be considered */
6472 int match;
6473 regexp *rp;
6474 char *name;
6476 /* Match against relevant regexps. */
6477 if (lbp->len > 0)
6478 for (rp = p_head; rp != NULL; rp = rp->p_next)
6480 /* Only use generic regexps or those for the current language.
6481 Also do not use multiline regexps, which is the job of
6482 regex_tag_multiline. */
6483 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6484 || rp->multi_line)
6485 continue;
6487 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6488 switch (match)
6490 case -2:
6491 /* Some error. */
6492 if (!rp->error_signaled)
6494 error ("regexp stack overflow while matching \"%s\"",
6495 rp->pattern);
6496 rp->error_signaled = TRUE;
6498 break;
6499 case -1:
6500 /* No match. */
6501 break;
6502 case 0:
6503 /* Empty string matched. */
6504 if (!rp->error_signaled)
6506 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6507 rp->error_signaled = TRUE;
6509 break;
6510 default:
6511 /* Match occurred. Construct a tag. */
6512 name = rp->name;
6513 if (name[0] == '\0')
6514 name = NULL;
6515 else /* make a named tag */
6516 name = substitute (lbp->buffer, rp->name, &rp->regs);
6517 if (rp->force_explicit_name)
6518 /* Force explicit tag name, if a name is there. */
6519 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6520 else
6521 make_tag (name, strlen (name), TRUE,
6522 lbp->buffer, match, lineno, linecharno);
6523 break;
6531 * Return a pointer to a space of size strlen(cp)+1 allocated
6532 * with xnew where the string CP has been copied.
6534 static char *
6535 savestr (cp)
6536 char *cp;
6538 return savenstr (cp, strlen (cp));
6542 * Return a pointer to a space of size LEN+1 allocated with xnew where
6543 * the string CP has been copied for at most the first LEN characters.
6545 static char *
6546 savenstr (cp, len)
6547 char *cp;
6548 int len;
6550 register char *dp;
6552 dp = xnew (len + 1, char);
6553 strncpy (dp, cp, len);
6554 dp[len] = '\0';
6555 return dp;
6559 * Return the ptr in sp at which the character c last
6560 * appears; NULL if not found
6562 * Identical to POSIX strrchr, included for portability.
6564 static char *
6565 etags_strrchr (sp, c)
6566 register const char *sp;
6567 register int c;
6569 register const char *r;
6571 r = NULL;
6574 if (*sp == c)
6575 r = sp;
6576 } while (*sp++);
6577 return (char *)r;
6581 * Return the ptr in sp at which the character c first
6582 * appears; NULL if not found
6584 * Identical to POSIX strchr, included for portability.
6586 static char *
6587 etags_strchr (sp, c)
6588 register const char *sp;
6589 register int c;
6593 if (*sp == c)
6594 return (char *)sp;
6595 } while (*sp++);
6596 return NULL;
6600 * Compare two strings, ignoring case for alphabetic characters.
6602 * Same as BSD's strcasecmp, included for portability.
6604 static int
6605 etags_strcasecmp (s1, s2)
6606 register const char *s1;
6607 register const char *s2;
6609 while (*s1 != '\0'
6610 && (ISALPHA (*s1) && ISALPHA (*s2)
6611 ? lowcase (*s1) == lowcase (*s2)
6612 : *s1 == *s2))
6613 s1++, s2++;
6615 return (ISALPHA (*s1) && ISALPHA (*s2)
6616 ? lowcase (*s1) - lowcase (*s2)
6617 : *s1 - *s2);
6621 * Compare two strings, ignoring case for alphabetic characters.
6622 * Stop after a given number of characters
6624 * Same as BSD's strncasecmp, included for portability.
6626 static int
6627 etags_strncasecmp (s1, s2, n)
6628 register const char *s1;
6629 register const char *s2;
6630 register int n;
6632 while (*s1 != '\0' && n-- > 0
6633 && (ISALPHA (*s1) && ISALPHA (*s2)
6634 ? lowcase (*s1) == lowcase (*s2)
6635 : *s1 == *s2))
6636 s1++, s2++;
6638 if (n < 0)
6639 return 0;
6640 else
6641 return (ISALPHA (*s1) && ISALPHA (*s2)
6642 ? lowcase (*s1) - lowcase (*s2)
6643 : *s1 - *s2);
6646 /* Skip spaces (end of string is not space), return new pointer. */
6647 static char *
6648 skip_spaces (cp)
6649 char *cp;
6651 while (iswhite (*cp))
6652 cp++;
6653 return cp;
6656 /* Skip non spaces, except end of string, return new pointer. */
6657 static char *
6658 skip_non_spaces (cp)
6659 char *cp;
6661 while (*cp != '\0' && !iswhite (*cp))
6662 cp++;
6663 return cp;
6666 /* Print error message and exit. */
6667 void
6668 fatal (s1, s2)
6669 char *s1, *s2;
6671 error (s1, s2);
6672 exit (EXIT_FAILURE);
6675 static void
6676 pfatal (s1)
6677 char *s1;
6679 perror (s1);
6680 exit (EXIT_FAILURE);
6683 static void
6684 suggest_asking_for_help ()
6686 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6687 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6688 exit (EXIT_FAILURE);
6691 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6692 static void
6693 error (s1, s2)
6694 const char *s1, *s2;
6696 fprintf (stderr, "%s: ", progname);
6697 fprintf (stderr, s1, s2);
6698 fprintf (stderr, "\n");
6701 /* Return a newly-allocated string whose contents
6702 concatenate those of s1, s2, s3. */
6703 static char *
6704 concat (s1, s2, s3)
6705 char *s1, *s2, *s3;
6707 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6708 char *result = xnew (len1 + len2 + len3 + 1, char);
6710 strcpy (result, s1);
6711 strcpy (result + len1, s2);
6712 strcpy (result + len1 + len2, s3);
6713 result[len1 + len2 + len3] = '\0';
6715 return result;
6719 /* Does the same work as the system V getcwd, but does not need to
6720 guess the buffer size in advance. */
6721 static char *
6722 etags_getcwd ()
6724 #ifdef HAVE_GETCWD
6725 int bufsize = 200;
6726 char *path = xnew (bufsize, char);
6728 while (getcwd (path, bufsize) == NULL)
6730 if (errno != ERANGE)
6731 pfatal ("getcwd");
6732 bufsize *= 2;
6733 free (path);
6734 path = xnew (bufsize, char);
6737 canonicalize_filename (path);
6738 return path;
6740 #else /* not HAVE_GETCWD */
6741 #if MSDOS
6743 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6745 getwd (path);
6747 for (p = path; *p != '\0'; p++)
6748 if (*p == '\\')
6749 *p = '/';
6750 else
6751 *p = lowcase (*p);
6753 return strdup (path);
6754 #else /* not MSDOS */
6755 linebuffer path;
6756 FILE *pipe;
6758 linebuffer_init (&path);
6759 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6760 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6761 pfatal ("pwd");
6762 pclose (pipe);
6764 return path.buffer;
6765 #endif /* not MSDOS */
6766 #endif /* not HAVE_GETCWD */
6769 /* Return a newly allocated string containing the file name of FILE
6770 relative to the absolute directory DIR (which should end with a slash). */
6771 static char *
6772 relative_filename (file, dir)
6773 char *file, *dir;
6775 char *fp, *dp, *afn, *res;
6776 int i;
6778 /* Find the common root of file and dir (with a trailing slash). */
6779 afn = absolute_filename (file, cwd);
6780 fp = afn;
6781 dp = dir;
6782 while (*fp++ == *dp++)
6783 continue;
6784 fp--, dp--; /* back to the first differing char */
6785 #ifdef DOS_NT
6786 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6787 return afn;
6788 #endif
6789 do /* look at the equal chars until '/' */
6790 fp--, dp--;
6791 while (*fp != '/');
6793 /* Build a sequence of "../" strings for the resulting relative file name. */
6794 i = 0;
6795 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6796 i += 1;
6797 res = xnew (3*i + strlen (fp + 1) + 1, char);
6798 res[0] = '\0';
6799 while (i-- > 0)
6800 strcat (res, "../");
6802 /* Add the file name relative to the common root of file and dir. */
6803 strcat (res, fp + 1);
6804 free (afn);
6806 return res;
6809 /* Return a newly allocated string containing the absolute file name
6810 of FILE given DIR (which should end with a slash). */
6811 static char *
6812 absolute_filename (file, dir)
6813 char *file, *dir;
6815 char *slashp, *cp, *res;
6817 if (filename_is_absolute (file))
6818 res = savestr (file);
6819 #ifdef DOS_NT
6820 /* We don't support non-absolute file names with a drive
6821 letter, like `d:NAME' (it's too much hassle). */
6822 else if (file[1] == ':')
6823 fatal ("%s: relative file names with drive letters not supported", file);
6824 #endif
6825 else
6826 res = concat (dir, file, "");
6828 /* Delete the "/dirname/.." and "/." substrings. */
6829 slashp = etags_strchr (res, '/');
6830 while (slashp != NULL && slashp[0] != '\0')
6832 if (slashp[1] == '.')
6834 if (slashp[2] == '.'
6835 && (slashp[3] == '/' || slashp[3] == '\0'))
6837 cp = slashp;
6839 cp--;
6840 while (cp >= res && !filename_is_absolute (cp));
6841 if (cp < res)
6842 cp = slashp; /* the absolute name begins with "/.." */
6843 #ifdef DOS_NT
6844 /* Under MSDOS and NT we get `d:/NAME' as absolute
6845 file name, so the luser could say `d:/../NAME'.
6846 We silently treat this as `d:/NAME'. */
6847 else if (cp[0] != '/')
6848 cp = slashp;
6849 #endif
6850 strcpy (cp, slashp + 3);
6851 slashp = cp;
6852 continue;
6854 else if (slashp[2] == '/' || slashp[2] == '\0')
6856 strcpy (slashp, slashp + 2);
6857 continue;
6861 slashp = etags_strchr (slashp + 1, '/');
6864 if (res[0] == '\0') /* just a safety net: should never happen */
6866 free (res);
6867 return savestr ("/");
6869 else
6870 return res;
6873 /* Return a newly allocated string containing the absolute
6874 file name of dir where FILE resides given DIR (which should
6875 end with a slash). */
6876 static char *
6877 absolute_dirname (file, dir)
6878 char *file, *dir;
6880 char *slashp, *res;
6881 char save;
6883 canonicalize_filename (file);
6884 slashp = etags_strrchr (file, '/');
6885 if (slashp == NULL)
6886 return savestr (dir);
6887 save = slashp[1];
6888 slashp[1] = '\0';
6889 res = absolute_filename (file, dir);
6890 slashp[1] = save;
6892 return res;
6895 /* Whether the argument string is an absolute file name. The argument
6896 string must have been canonicalized with canonicalize_filename. */
6897 static bool
6898 filename_is_absolute (fn)
6899 char *fn;
6901 return (fn[0] == '/'
6902 #ifdef DOS_NT
6903 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6904 #endif
6908 /* Translate backslashes into slashes. Works in place. */
6909 static void
6910 canonicalize_filename (fn)
6911 register char *fn;
6913 #ifdef DOS_NT
6914 /* Canonicalize drive letter case. */
6915 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6916 fn[0] = upcase (fn[0]);
6917 /* Convert backslashes to slashes. */
6918 for (; *fn != '\0'; fn++)
6919 if (*fn == '\\')
6920 *fn = '/';
6921 #else
6922 /* No action. */
6923 fn = NULL; /* shut up the compiler */
6924 #endif
6928 /* Initialize a linebuffer for use */
6929 static void
6930 linebuffer_init (lbp)
6931 linebuffer *lbp;
6933 lbp->size = (DEBUG) ? 3 : 200;
6934 lbp->buffer = xnew (lbp->size, char);
6935 lbp->buffer[0] = '\0';
6936 lbp->len = 0;
6939 /* Set the minimum size of a string contained in a linebuffer. */
6940 static void
6941 linebuffer_setlen (lbp, toksize)
6942 linebuffer *lbp;
6943 int toksize;
6945 while (lbp->size <= toksize)
6947 lbp->size *= 2;
6948 xrnew (lbp->buffer, lbp->size, char);
6950 lbp->len = toksize;
6953 /* Like malloc but get fatal error if memory is exhausted. */
6954 static PTR
6955 xmalloc (size)
6956 unsigned int size;
6958 PTR result = (PTR) malloc (size);
6959 if (result == NULL)
6960 fatal ("virtual memory exhausted", (char *)NULL);
6961 return result;
6964 static PTR
6965 xrealloc (ptr, size)
6966 char *ptr;
6967 unsigned int size;
6969 PTR result = (PTR) realloc (ptr, size);
6970 if (result == NULL)
6971 fatal ("virtual memory exhausted", (char *)NULL);
6972 return result;
6976 * Local Variables:
6977 * indent-tabs-mode: t
6978 * tab-width: 8
6979 * fill-column: 79
6980 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6981 * c-file-style: "gnu"
6982 * End:
6985 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6986 (do not change this comment) */
6988 /* etags.c ends here */