Add arch tagline
[emacs.git] / lib-src / etags.c
blob40b7242e2e319cdfa676d99fa57dd493439a78cd
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
33 Free Software Foundation, Inc.
35 This file is not considered part of GNU Emacs.
37 This program is free software; you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation; either version 3, or (at your option)
40 any later version.
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
47 You should have received a copy of the GNU General Public License
48 along with this program; see the file COPYING. If not, write to the
49 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
50 Boston, MA 02110-1301, USA. */
53 /* NB To comply with the above BSD license, copyright information is
54 reproduced in etc/ETAGS.README. That file should be updated when the
55 above notices are.
57 To the best of our knowledge, this code was originally based on the
58 ctags.c distributed with BSD4.2, which was copyrighted by the
59 University of California, as described above. */
63 * Authors:
64 * 1983 Ctags originally by Ken Arnold.
65 * 1984 Fortran added by Jim Kleckner.
66 * 1984 Ed Pelegri-Llopart added C typedefs.
67 * 1985 Emacs TAGS format by Richard Stallman.
68 * 1989 Sam Kendall added C++.
69 * 1992 Joseph B. Wells improved C and C++ parsing.
70 * 1993 Francesco Potortì reorganised C and C++.
71 * 1994 Line-by-line regexp tags by Tom Tromey.
72 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
73 * 2002 #line directives by Francesco Potortì.
75 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
79 * If you want to add support for a new language, start by looking at the LUA
80 * language, which is the simplest. Alternatively, consider shipping a
81 * configuration file containing regexp definitions for etags.
84 char pot_etags_version[] = "@(#) pot revision number is 17.38";
86 #define TRUE 1
87 #define FALSE 0
89 #ifdef DEBUG
90 # undef DEBUG
91 # define DEBUG TRUE
92 #else
93 # define DEBUG FALSE
94 # define NDEBUG /* disable assert */
95 #endif
97 #ifdef HAVE_CONFIG_H
98 # include <config.h>
99 /* On some systems, Emacs defines static as nothing for the sake
100 of unexec. We don't want that here since we don't use unexec. */
101 # undef static
102 # ifndef PTR /* for XEmacs */
103 # define PTR void *
104 # endif
105 # ifndef __P /* for XEmacs */
106 # define __P(args) args
107 # endif
108 #else /* no config.h */
109 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
110 # define __P(args) args /* use prototypes */
111 # define PTR void * /* for generic pointers */
112 # else /* not standard C */
113 # define __P(args) () /* no prototypes */
114 # define const /* remove const for old compilers' sake */
115 # define PTR long * /* don't use void* */
116 # endif
117 #endif /* !HAVE_CONFIG_H */
119 #ifndef _GNU_SOURCE
120 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
121 #endif
123 /* WIN32_NATIVE is for XEmacs.
124 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
125 #ifdef WIN32_NATIVE
126 # undef MSDOS
127 # undef WINDOWSNT
128 # define WINDOWSNT
129 #endif /* WIN32_NATIVE */
131 #ifdef MSDOS
132 # undef MSDOS
133 # define MSDOS TRUE
134 # include <fcntl.h>
135 # include <sys/param.h>
136 # include <io.h>
137 # ifndef HAVE_CONFIG_H
138 # define DOS_NT
139 # include <sys/config.h>
140 # endif
141 #else
142 # define MSDOS FALSE
143 #endif /* MSDOS */
145 #ifdef WINDOWSNT
146 # include <stdlib.h>
147 # include <fcntl.h>
148 # include <string.h>
149 # include <direct.h>
150 # include <io.h>
151 # define MAXPATHLEN _MAX_PATH
152 # undef HAVE_NTGUI
153 # undef DOS_NT
154 # define DOS_NT
155 # ifndef HAVE_GETCWD
156 # define HAVE_GETCWD
157 # endif /* undef HAVE_GETCWD */
158 #else /* not WINDOWSNT */
159 # ifdef STDC_HEADERS
160 # include <stdlib.h>
161 # include <string.h>
162 # else /* no standard C headers */
163 extern char *getenv __P((const char *));
164 extern char *strcpy __P((char *, const char *));
165 extern char *strncpy __P((char *, const char *, unsigned long));
166 extern char *strcat __P((char *, const char *));
167 extern char *strncat __P((char *, const char *, unsigned long));
168 extern int strcmp __P((const char *, const char *));
169 extern int strncmp __P((const char *, const char *, unsigned long));
170 extern int system __P((const char *));
171 extern unsigned long strlen __P((const char *));
172 extern void *malloc __P((unsigned long));
173 extern void *realloc __P((void *, unsigned long));
174 extern void exit __P((int));
175 extern void free __P((void *));
176 extern void *memmove __P((void *, const void *, unsigned long));
177 # ifdef VMS
178 # define EXIT_SUCCESS 1
179 # define EXIT_FAILURE 0
180 # else /* no VMS */
181 # define EXIT_SUCCESS 0
182 # define EXIT_FAILURE 1
183 # endif
184 # endif
185 #endif /* !WINDOWSNT */
187 #ifdef HAVE_UNISTD_H
188 # include <unistd.h>
189 #else
190 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
191 extern char *getcwd (char *buf, size_t size);
192 # endif
193 #endif /* HAVE_UNISTD_H */
195 #include <stdio.h>
196 #include <ctype.h>
197 #include <errno.h>
198 #ifndef errno
199 extern int errno;
200 #endif
201 #include <sys/types.h>
202 #include <sys/stat.h>
204 #include <assert.h>
205 #ifdef NDEBUG
206 # undef assert /* some systems have a buggy assert.h */
207 # define assert(x) ((void) 0)
208 #endif
210 #if !defined (S_ISREG) && defined (S_IFREG)
211 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
212 #endif
214 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
215 # define NO_LONG_OPTIONS TRUE
216 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
217 extern char *optarg;
218 extern int optind, opterr;
219 #else
220 # define NO_LONG_OPTIONS FALSE
221 # include <getopt.h>
222 #endif /* NO_LONG_OPTIONS */
224 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
225 # ifdef __CYGWIN__ /* compiling on Cygwin */
226 !!! NOTICE !!!
227 the regex.h distributed with Cygwin is not compatible with etags, alas!
228 If you want regular expression support, you should delete this notice and
229 arrange to use the GNU regex.h and regex.c.
230 # endif
231 #endif
232 #include <regex.h>
234 /* Define CTAGS to make the program "ctags" compatible with the usual one.
235 Leave it undefined to make the program "etags", which makes emacs-style
236 tag tables and tags typedefs, #defines and struct/union/enum by default. */
237 #ifdef CTAGS
238 # undef CTAGS
239 # define CTAGS TRUE
240 #else
241 # define CTAGS FALSE
242 #endif
244 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
245 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
246 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
247 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
249 #define CHARS 256 /* 2^sizeof(char) */
250 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
251 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
252 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
253 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
254 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
255 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
257 #define ISALNUM(c) isalnum (CHAR(c))
258 #define ISALPHA(c) isalpha (CHAR(c))
259 #define ISDIGIT(c) isdigit (CHAR(c))
260 #define ISLOWER(c) islower (CHAR(c))
262 #define lowcase(c) tolower (CHAR(c))
263 #define upcase(c) toupper (CHAR(c))
267 * xnew, xrnew -- allocate, reallocate storage
269 * SYNOPSIS: Type *xnew (int n, Type);
270 * void xrnew (OldPointer, int n, Type);
272 #if DEBUG
273 # include "chkmalloc.h"
274 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
275 (n) * sizeof (Type)))
276 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
277 (char *) (op), (n) * sizeof (Type)))
278 #else
279 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
280 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
281 (char *) (op), (n) * sizeof (Type)))
282 #endif
284 #define bool int
286 typedef void Lang_function __P((FILE *));
288 typedef struct
290 char *suffix; /* file name suffix for this compressor */
291 char *command; /* takes one arg and decompresses to stdout */
292 } compressor;
294 typedef struct
296 char *name; /* language name */
297 char *help; /* detailed help for the language */
298 Lang_function *function; /* parse function */
299 char **suffixes; /* name suffixes of this language's files */
300 char **filenames; /* names of this language's files */
301 char **interpreters; /* interpreters for this language */
302 bool metasource; /* source used to generate other sources */
303 } language;
305 typedef struct fdesc
307 struct fdesc *next; /* for the linked list */
308 char *infname; /* uncompressed input file name */
309 char *infabsname; /* absolute uncompressed input file name */
310 char *infabsdir; /* absolute dir of input file */
311 char *taggedfname; /* file name to write in tagfile */
312 language *lang; /* language of file */
313 char *prop; /* file properties to write in tagfile */
314 bool usecharno; /* etags tags shall contain char number */
315 bool written; /* entry written in the tags file */
316 } fdesc;
318 typedef struct node_st
319 { /* sorting structure */
320 struct node_st *left, *right; /* left and right sons */
321 fdesc *fdp; /* description of file to whom tag belongs */
322 char *name; /* tag name */
323 char *regex; /* search regexp */
324 bool valid; /* write this tag on the tag file */
325 bool is_func; /* function tag: use regexp in CTAGS mode */
326 bool been_warned; /* warning already given for duplicated tag */
327 int lno; /* line number tag is on */
328 long cno; /* character number line starts on */
329 } node;
332 * A `linebuffer' is a structure which holds a line of text.
333 * `readline_internal' reads a line from a stream into a linebuffer
334 * and works regardless of the length of the line.
335 * SIZE is the size of BUFFER, LEN is the length of the string in
336 * BUFFER after readline reads it.
338 typedef struct
340 long size;
341 int len;
342 char *buffer;
343 } linebuffer;
345 /* Used to support mixing of --lang and file names. */
346 typedef struct
348 enum {
349 at_language, /* a language specification */
350 at_regexp, /* a regular expression */
351 at_filename, /* a file name */
352 at_stdin, /* read from stdin here */
353 at_end /* stop parsing the list */
354 } arg_type; /* argument type */
355 language *lang; /* language associated with the argument */
356 char *what; /* the argument itself */
357 } argument;
359 /* Structure defining a regular expression. */
360 typedef struct regexp
362 struct regexp *p_next; /* pointer to next in list */
363 language *lang; /* if set, use only for this language */
364 char *pattern; /* the regexp pattern */
365 char *name; /* tag name */
366 struct re_pattern_buffer *pat; /* the compiled pattern */
367 struct re_registers regs; /* re registers */
368 bool error_signaled; /* already signaled for this regexp */
369 bool force_explicit_name; /* do not allow implict tag name */
370 bool ignore_case; /* ignore case when matching */
371 bool multi_line; /* do a multi-line match on the whole file */
372 } regexp;
375 /* Many compilers barf on this:
376 Lang_function Ada_funcs;
377 so let's write it this way */
378 static void Ada_funcs __P((FILE *));
379 static void Asm_labels __P((FILE *));
380 static void C_entries __P((int c_ext, FILE *));
381 static void default_C_entries __P((FILE *));
382 static void plain_C_entries __P((FILE *));
383 static void Cjava_entries __P((FILE *));
384 static void Cobol_paragraphs __P((FILE *));
385 static void Cplusplus_entries __P((FILE *));
386 static void Cstar_entries __P((FILE *));
387 static void Erlang_functions __P((FILE *));
388 static void Forth_words __P((FILE *));
389 static void Fortran_functions __P((FILE *));
390 static void HTML_labels __P((FILE *));
391 static void Lisp_functions __P((FILE *));
392 static void Lua_functions __P((FILE *));
393 static void Makefile_targets __P((FILE *));
394 static void Pascal_functions __P((FILE *));
395 static void Perl_functions __P((FILE *));
396 static void PHP_functions __P((FILE *));
397 static void PS_functions __P((FILE *));
398 static void Prolog_functions __P((FILE *));
399 static void Python_functions __P((FILE *));
400 static void Scheme_functions __P((FILE *));
401 static void TeX_commands __P((FILE *));
402 static void Texinfo_nodes __P((FILE *));
403 static void Yacc_entries __P((FILE *));
404 static void just_read_file __P((FILE *));
406 static void print_language_names __P((void));
407 static void print_version __P((void));
408 static void print_help __P((argument *));
409 int main __P((int, char **));
411 static compressor *get_compressor_from_suffix __P((char *, char **));
412 static language *get_language_from_langname __P((const char *));
413 static language *get_language_from_interpreter __P((char *));
414 static language *get_language_from_filename __P((char *, bool));
415 static void readline __P((linebuffer *, FILE *));
416 static long readline_internal __P((linebuffer *, FILE *));
417 static bool nocase_tail __P((char *));
418 static void get_tag __P((char *, char **));
420 static void analyse_regex __P((char *));
421 static void free_regexps __P((void));
422 static void regex_tag_multiline __P((void));
423 static void error __P((const char *, const char *));
424 static void suggest_asking_for_help __P((void));
425 void fatal __P((char *, char *));
426 static void pfatal __P((char *));
427 static void add_node __P((node *, node **));
429 static void init __P((void));
430 static void process_file_name __P((char *, language *));
431 static void process_file __P((FILE *, char *, language *));
432 static void find_entries __P((FILE *));
433 static void free_tree __P((node *));
434 static void free_fdesc __P((fdesc *));
435 static void pfnote __P((char *, bool, char *, int, int, long));
436 static void make_tag __P((char *, int, bool, char *, int, int, long));
437 static void invalidate_nodes __P((fdesc *, node **));
438 static void put_entries __P((node *));
440 static char *concat __P((char *, char *, char *));
441 static char *skip_spaces __P((char *));
442 static char *skip_non_spaces __P((char *));
443 static char *savenstr __P((char *, int));
444 static char *savestr __P((char *));
445 static char *etags_strchr __P((const char *, int));
446 static char *etags_strrchr __P((const char *, int));
447 static int etags_strcasecmp __P((const char *, const char *));
448 static int etags_strncasecmp __P((const char *, const char *, int));
449 static char *etags_getcwd __P((void));
450 static char *relative_filename __P((char *, char *));
451 static char *absolute_filename __P((char *, char *));
452 static char *absolute_dirname __P((char *, char *));
453 static bool filename_is_absolute __P((char *f));
454 static void canonicalize_filename __P((char *));
455 static void linebuffer_init __P((linebuffer *));
456 static void linebuffer_setlen __P((linebuffer *, int));
457 static PTR xmalloc __P((unsigned int));
458 static PTR xrealloc __P((char *, unsigned int));
461 static char searchar = '/'; /* use /.../ searches */
463 static char *tagfile; /* output file */
464 static char *progname; /* name this program was invoked with */
465 static char *cwd; /* current working directory */
466 static char *tagfiledir; /* directory of tagfile */
467 static FILE *tagf; /* ioptr for tags file */
469 static fdesc *fdhead; /* head of file description list */
470 static fdesc *curfdp; /* current file description */
471 static int lineno; /* line number of current line */
472 static long charno; /* current character number */
473 static long linecharno; /* charno of start of current line */
474 static char *dbp; /* pointer to start of current tag */
476 static const int invalidcharno = -1;
478 static node *nodehead; /* the head of the binary tree of tags */
479 static node *last_node; /* the last node created */
481 static linebuffer lb; /* the current line */
482 static linebuffer filebuf; /* a buffer containing the whole file */
483 static linebuffer token_name; /* a buffer containing a tag name */
485 /* boolean "functions" (see init) */
486 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
487 static char
488 /* white chars */
489 *white = " \f\t\n\r\v",
490 /* not in a name */
491 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
492 /* token ending chars */
493 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
494 /* token starting chars */
495 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
496 /* valid in-token chars */
497 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
499 static bool append_to_tagfile; /* -a: append to tags */
500 /* The next five default to TRUE in C and derived languages. */
501 static bool typedefs; /* -t: create tags for C and Ada typedefs */
502 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
503 /* 0 struct/enum/union decls, and C++ */
504 /* member functions. */
505 static bool constantypedefs; /* -d: create tags for C #define, enum */
506 /* constants and variables. */
507 /* -D: opposite of -d. Default under ctags. */
508 static bool globals; /* create tags for global variables */
509 static bool members; /* create tags for C member variables */
510 static bool declarations; /* --declarations: tag them and extern in C&Co*/
511 static bool no_line_directive; /* ignore #line directives (undocumented) */
512 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
513 static bool update; /* -u: update tags */
514 static bool vgrind_style; /* -v: create vgrind style index output */
515 static bool no_warnings; /* -w: suppress warnings (undocumented) */
516 static bool cxref_style; /* -x: create cxref style output */
517 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
518 static bool ignoreindent; /* -I: ignore indentation in C */
519 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
521 /* STDIN is defined in LynxOS system headers */
522 #ifdef STDIN
523 # undef STDIN
524 #endif
526 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
527 static bool parsing_stdin; /* --parse-stdin used */
529 static regexp *p_head; /* list of all regexps */
530 static bool need_filebuf; /* some regexes are multi-line */
532 static struct option longopts[] =
534 { "append", no_argument, NULL, 'a' },
535 { "packages-only", no_argument, &packages_only, TRUE },
536 { "c++", no_argument, NULL, 'C' },
537 { "declarations", no_argument, &declarations, TRUE },
538 { "no-line-directive", no_argument, &no_line_directive, TRUE },
539 { "no-duplicates", no_argument, &no_duplicates, TRUE },
540 { "help", no_argument, NULL, 'h' },
541 { "help", no_argument, NULL, 'H' },
542 { "ignore-indentation", no_argument, NULL, 'I' },
543 { "language", required_argument, NULL, 'l' },
544 { "members", no_argument, &members, TRUE },
545 { "no-members", no_argument, &members, FALSE },
546 { "output", required_argument, NULL, 'o' },
547 { "regex", required_argument, NULL, 'r' },
548 { "no-regex", no_argument, NULL, 'R' },
549 { "ignore-case-regex", required_argument, NULL, 'c' },
550 { "parse-stdin", required_argument, NULL, STDIN },
551 { "version", no_argument, NULL, 'V' },
553 #if CTAGS /* Ctags options */
554 { "backward-search", no_argument, NULL, 'B' },
555 { "cxref", no_argument, NULL, 'x' },
556 { "defines", no_argument, NULL, 'd' },
557 { "globals", no_argument, &globals, TRUE },
558 { "typedefs", no_argument, NULL, 't' },
559 { "typedefs-and-c++", no_argument, NULL, 'T' },
560 { "update", no_argument, NULL, 'u' },
561 { "vgrind", no_argument, NULL, 'v' },
562 { "no-warn", no_argument, NULL, 'w' },
564 #else /* Etags options */
565 { "no-defines", no_argument, NULL, 'D' },
566 { "no-globals", no_argument, &globals, FALSE },
567 { "include", required_argument, NULL, 'i' },
568 #endif
569 { NULL }
572 static compressor compressors[] =
574 { "z", "gzip -d -c"},
575 { "Z", "gzip -d -c"},
576 { "gz", "gzip -d -c"},
577 { "GZ", "gzip -d -c"},
578 { "bz2", "bzip2 -d -c" },
579 { NULL }
583 * Language stuff.
586 /* Ada code */
587 static char *Ada_suffixes [] =
588 { "ads", "adb", "ada", NULL };
589 static char Ada_help [] =
590 "In Ada code, functions, procedures, packages, tasks and types are\n\
591 tags. Use the `--packages-only' option to create tags for\n\
592 packages only.\n\
593 Ada tag names have suffixes indicating the type of entity:\n\
594 Entity type: Qualifier:\n\
595 ------------ ----------\n\
596 function /f\n\
597 procedure /p\n\
598 package spec /s\n\
599 package body /b\n\
600 type /t\n\
601 task /k\n\
602 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
603 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
604 will just search for any tag `bidule'.";
606 /* Assembly code */
607 static char *Asm_suffixes [] =
608 { "a", /* Unix assembler */
609 "asm", /* Microcontroller assembly */
610 "def", /* BSO/Tasking definition includes */
611 "inc", /* Microcontroller include files */
612 "ins", /* Microcontroller include files */
613 "s", "sa", /* Unix assembler */
614 "S", /* cpp-processed Unix assembler */
615 "src", /* BSO/Tasking C compiler output */
616 NULL
618 static char Asm_help [] =
619 "In assembler code, labels appearing at the beginning of a line,\n\
620 followed by a colon, are tags.";
623 /* Note that .c and .h can be considered C++, if the --c++ flag was
624 given, or if the `class' or `template' keywords are met inside the file.
625 That is why default_C_entries is called for these. */
626 static char *default_C_suffixes [] =
627 { "c", "h", NULL };
628 #if CTAGS /* C help for Ctags */
629 static char default_C_help [] =
630 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
631 Use -T to tag definitions of `struct', `union' and `enum'.\n\
632 Use -d to tag `#define' macro definitions and `enum' constants.\n\
633 Use --globals to tag global variables.\n\
634 You can tag function declarations and external variables by\n\
635 using `--declarations', and struct members by using `--members'.";
636 #else /* C help for Etags */
637 static char default_C_help [] =
638 "In C code, any C function or typedef is a tag, and so are\n\
639 definitions of `struct', `union' and `enum'. `#define' macro\n\
640 definitions and `enum' constants are tags unless you specify\n\
641 `--no-defines'. Global variables are tags unless you specify\n\
642 `--no-globals' and so are struct members unless you specify\n\
643 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
644 `--no-members' can make the tags table file much smaller.\n\
645 You can tag function declarations and external variables by\n\
646 using `--declarations'.";
647 #endif /* C help for Ctags and Etags */
649 static char *Cplusplus_suffixes [] =
650 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
651 "M", /* Objective C++ */
652 "pdb", /* Postscript with C syntax */
653 NULL };
654 static char Cplusplus_help [] =
655 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
656 --help --lang=c --lang=c++ for full help.)\n\
657 In addition to C tags, member functions are also recognized. Member\n\
658 variables are recognized unless you use the `--no-members' option.\n\
659 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
660 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
661 `operator+'.";
663 static char *Cjava_suffixes [] =
664 { "java", NULL };
665 static char Cjava_help [] =
666 "In Java code, all the tags constructs of C and C++ code are\n\
667 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
670 static char *Cobol_suffixes [] =
671 { "COB", "cob", NULL };
672 static char Cobol_help [] =
673 "In Cobol code, tags are paragraph names; that is, any word\n\
674 starting in column 8 and followed by a period.";
676 static char *Cstar_suffixes [] =
677 { "cs", "hs", NULL };
679 static char *Erlang_suffixes [] =
680 { "erl", "hrl", NULL };
681 static char Erlang_help [] =
682 "In Erlang code, the tags are the functions, records and macros\n\
683 defined in the file.";
685 char *Forth_suffixes [] =
686 { "fth", "tok", NULL };
687 static char Forth_help [] =
688 "In Forth code, tags are words defined by `:',\n\
689 constant, code, create, defer, value, variable, buffer:, field.";
691 static char *Fortran_suffixes [] =
692 { "F", "f", "f90", "for", NULL };
693 static char Fortran_help [] =
694 "In Fortran code, functions, subroutines and block data are tags.";
696 static char *HTML_suffixes [] =
697 { "htm", "html", "shtml", NULL };
698 static char HTML_help [] =
699 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
700 `h3' headers. Also, tags are `name=' in anchors and all\n\
701 occurrences of `id='.";
703 static char *Lisp_suffixes [] =
704 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
705 static char Lisp_help [] =
706 "In Lisp code, any function defined with `defun', any variable\n\
707 defined with `defvar' or `defconst', and in general the first\n\
708 argument of any expression that starts with `(def' in column zero\n\
709 is a tag.";
711 static char *Lua_suffixes [] =
712 { "lua", "LUA", NULL };
713 static char Lua_help [] =
714 "In Lua scripts, all functions are tags.";
716 static char *Makefile_filenames [] =
717 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
718 static char Makefile_help [] =
719 "In makefiles, targets are tags; additionally, variables are tags\n\
720 unless you specify `--no-globals'.";
722 static char *Objc_suffixes [] =
723 { "lm", /* Objective lex file */
724 "m", /* Objective C file */
725 NULL };
726 static char Objc_help [] =
727 "In Objective C code, tags include Objective C definitions for classes,\n\
728 class categories, methods and protocols. Tags for variables and\n\
729 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
730 (Use --help --lang=c --lang=objc --lang=java for full help.)";
732 static char *Pascal_suffixes [] =
733 { "p", "pas", NULL };
734 static char Pascal_help [] =
735 "In Pascal code, the tags are the functions and procedures defined\n\
736 in the file.";
737 /* " // this is for working around an Emacs highlighting bug... */
739 static char *Perl_suffixes [] =
740 { "pl", "pm", NULL };
741 static char *Perl_interpreters [] =
742 { "perl", "@PERL@", NULL };
743 static char Perl_help [] =
744 "In Perl code, the tags are the packages, subroutines and variables\n\
745 defined by the `package', `sub', `my' and `local' keywords. Use\n\
746 `--globals' if you want to tag global variables. Tags for\n\
747 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
748 defined in the default package is `main::SUB'.";
750 static char *PHP_suffixes [] =
751 { "php", "php3", "php4", NULL };
752 static char PHP_help [] =
753 "In PHP code, tags are functions, classes and defines. Unless you use\n\
754 the `--no-members' option, vars are tags too.";
756 static char *plain_C_suffixes [] =
757 { "pc", /* Pro*C file */
758 NULL };
760 static char *PS_suffixes [] =
761 { "ps", "psw", NULL }; /* .psw is for PSWrap */
762 static char PS_help [] =
763 "In PostScript code, the tags are the functions.";
765 static char *Prolog_suffixes [] =
766 { "prolog", NULL };
767 static char Prolog_help [] =
768 "In Prolog code, tags are predicates and rules at the beginning of\n\
769 line.";
771 static char *Python_suffixes [] =
772 { "py", NULL };
773 static char Python_help [] =
774 "In Python code, `def' or `class' at the beginning of a line\n\
775 generate a tag.";
777 /* Can't do the `SCM' or `scm' prefix with a version number. */
778 static char *Scheme_suffixes [] =
779 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
780 static char Scheme_help [] =
781 "In Scheme code, tags include anything defined with `def' or with a\n\
782 construct whose name starts with `def'. They also include\n\
783 variables set with `set!' at top level in the file.";
785 static char *TeX_suffixes [] =
786 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
787 static char TeX_help [] =
788 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
789 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
790 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
791 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
792 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
794 Other commands can be specified by setting the environment variable\n\
795 `TEXTAGS' to a colon-separated list like, for example,\n\
796 TEXTAGS=\"mycommand:myothercommand\".";
799 static char *Texinfo_suffixes [] =
800 { "texi", "texinfo", "txi", NULL };
801 static char Texinfo_help [] =
802 "for texinfo files, lines starting with @node are tagged.";
804 static char *Yacc_suffixes [] =
805 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
806 static char Yacc_help [] =
807 "In Bison or Yacc input files, each rule defines as a tag the\n\
808 nonterminal it constructs. The portions of the file that contain\n\
809 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
810 for full help).";
812 static char auto_help [] =
813 "`auto' is not a real language, it indicates to use\n\
814 a default language for files base on file name suffix and file contents.";
816 static char none_help [] =
817 "`none' is not a real language, it indicates to only do\n\
818 regexp processing on files.";
820 static char no_lang_help [] =
821 "No detailed help available for this language.";
825 * Table of languages.
827 * It is ok for a given function to be listed under more than one
828 * name. I just didn't.
831 static language lang_names [] =
833 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
834 { "asm", Asm_help, Asm_labels, Asm_suffixes },
835 { "c", default_C_help, default_C_entries, default_C_suffixes },
836 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
837 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
838 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
839 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
840 { "forth", Forth_help, Forth_words, Forth_suffixes },
841 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
842 { "html", HTML_help, HTML_labels, HTML_suffixes },
843 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
844 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
845 { "lua", Lua_help, Lua_functions, Lua_suffixes },
846 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
847 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
848 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
849 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
850 { "php", PHP_help, PHP_functions, PHP_suffixes },
851 { "postscript",PS_help, PS_functions, PS_suffixes },
852 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
853 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
854 { "python", Python_help, Python_functions, Python_suffixes },
855 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
856 { "tex", TeX_help, TeX_commands, TeX_suffixes },
857 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
858 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
859 { "auto", auto_help }, /* default guessing scheme */
860 { "none", none_help, just_read_file }, /* regexp matching only */
861 { NULL } /* end of list */
865 static void
866 print_language_names ()
868 language *lang;
869 char **name, **ext;
871 puts ("\nThese are the currently supported languages, along with the\n\
872 default file names and dot suffixes:");
873 for (lang = lang_names; lang->name != NULL; lang++)
875 printf (" %-*s", 10, lang->name);
876 if (lang->filenames != NULL)
877 for (name = lang->filenames; *name != NULL; name++)
878 printf (" %s", *name);
879 if (lang->suffixes != NULL)
880 for (ext = lang->suffixes; *ext != NULL; ext++)
881 printf (" .%s", *ext);
882 puts ("");
884 puts ("where `auto' means use default language for files based on file\n\
885 name suffix, and `none' means only do regexp processing on files.\n\
886 If no language is specified and no matching suffix is found,\n\
887 the first line of the file is read for a sharp-bang (#!) sequence\n\
888 followed by the name of an interpreter. If no such sequence is found,\n\
889 Fortran is tried first; if no tags are found, C is tried next.\n\
890 When parsing any C file, a \"class\" or \"template\" keyword\n\
891 switches to C++.");
892 puts ("Compressed files are supported using gzip and bzip2.\n\
894 For detailed help on a given language use, for example,\n\
895 etags --help --lang=ada.");
898 #ifndef EMACS_NAME
899 # define EMACS_NAME "standalone"
900 #endif
901 #ifndef VERSION
902 # define VERSION "17.38"
903 #endif
904 static void
905 print_version ()
907 /* Makes it easier to update automatically. */
908 char emacs_copyright[] = "Copyright (C) 2008 Free Software Foundation, Inc.";
910 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
911 puts (emacs_copyright);
912 puts ("This program is distributed under the terms in ETAGS.README");
914 exit (EXIT_SUCCESS);
917 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
918 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
919 #endif
921 static void
922 print_help (argbuffer)
923 argument *argbuffer;
925 bool help_for_lang = FALSE;
927 for (; argbuffer->arg_type != at_end; argbuffer++)
928 if (argbuffer->arg_type == at_language)
930 if (help_for_lang)
931 puts ("");
932 puts (argbuffer->lang->help);
933 help_for_lang = TRUE;
936 if (help_for_lang)
937 exit (EXIT_SUCCESS);
939 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
941 These are the options accepted by %s.\n", progname, progname);
942 if (NO_LONG_OPTIONS)
943 puts ("WARNING: long option names do not work with this executable,\n\
944 as it is not linked with GNU getopt.");
945 else
946 puts ("You may use unambiguous abbreviations for the long option names.");
947 puts (" A - as file name means read names from stdin (one per line).\n\
948 Absolute names are stored in the output file as they are.\n\
949 Relative ones are stored relative to the output file's directory.\n");
951 puts ("-a, --append\n\
952 Append tag entries to existing tags file.");
954 puts ("--packages-only\n\
955 For Ada files, only generate tags for packages.");
957 if (CTAGS)
958 puts ("-B, --backward-search\n\
959 Write the search commands for the tag entries using '?', the\n\
960 backward-search command instead of '/', the forward-search command.");
962 /* This option is mostly obsolete, because etags can now automatically
963 detect C++. Retained for backward compatibility and for debugging and
964 experimentation. In principle, we could want to tag as C++ even
965 before any "class" or "template" keyword.
966 puts ("-C, --c++\n\
967 Treat files whose name suffix defaults to C language as C++ files.");
970 puts ("--declarations\n\
971 In C and derived languages, create tags for function declarations,");
972 if (CTAGS)
973 puts ("\tand create tags for extern variables if --globals is used.");
974 else
975 puts
976 ("\tand create tags for extern variables unless --no-globals is used.");
978 if (CTAGS)
979 puts ("-d, --defines\n\
980 Create tag entries for C #define constants and enum constants, too.");
981 else
982 puts ("-D, --no-defines\n\
983 Don't create tag entries for C #define constants and enum constants.\n\
984 This makes the tags file smaller.");
986 if (!CTAGS)
987 puts ("-i FILE, --include=FILE\n\
988 Include a note in tag file indicating that, when searching for\n\
989 a tag, one should also consult the tags file FILE after\n\
990 checking the current file.");
992 puts ("-l LANG, --language=LANG\n\
993 Force the following files to be considered as written in the\n\
994 named language up to the next --language=LANG option.");
996 if (CTAGS)
997 puts ("--globals\n\
998 Create tag entries for global variables in some languages.");
999 else
1000 puts ("--no-globals\n\
1001 Do not create tag entries for global variables in some\n\
1002 languages. This makes the tags file smaller.");
1004 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1005 puts ("--no-line-directive\n\
1006 Ignore #line preprocessor directives in C and derived languages.");
1008 if (CTAGS)
1009 puts ("--members\n\
1010 Create tag entries for members of structures in some languages.");
1011 else
1012 puts ("--no-members\n\
1013 Do not create tag entries for members of structures\n\
1014 in some languages.");
1016 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1017 Make a tag for each line matching a regular expression pattern\n\
1018 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1019 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1020 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1021 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1022 puts (" If TAGNAME/ is present, the tags created are named.\n\
1023 For example Tcl named tags can be created with:\n\
1024 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1025 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1026 `m' means to allow multi-line matches, `s' implies `m' and\n\
1027 causes dot to match any character, including newline.");
1029 puts ("-R, --no-regex\n\
1030 Don't create tags from regexps for the following files.");
1032 puts ("-I, --ignore-indentation\n\
1033 In C and C++ do not assume that a closing brace in the first\n\
1034 column is the final brace of a function or structure definition.");
1036 puts ("-o FILE, --output=FILE\n\
1037 Write the tags to FILE.");
1039 puts ("--parse-stdin=NAME\n\
1040 Read from standard input and record tags as belonging to file NAME.");
1042 if (CTAGS)
1044 puts ("-t, --typedefs\n\
1045 Generate tag entries for C and Ada typedefs.");
1046 puts ("-T, --typedefs-and-c++\n\
1047 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1048 and C++ member functions.");
1051 if (CTAGS)
1052 puts ("-u, --update\n\
1053 Update the tag entries for the given files, leaving tag\n\
1054 entries for other files in place. Currently, this is\n\
1055 implemented by deleting the existing entries for the given\n\
1056 files and then rewriting the new entries at the end of the\n\
1057 tags file. It is often faster to simply rebuild the entire\n\
1058 tag file than to use this.");
1060 if (CTAGS)
1062 puts ("-v, --vgrind\n\
1063 Print on the standard output an index of items intended for\n\
1064 human consumption, similar to the output of vgrind. The index\n\
1065 is sorted, and gives the page number of each item.");
1067 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1068 puts ("-w, --no-duplicates\n\
1069 Do not create duplicate tag entries, for compatibility with\n\
1070 traditional ctags.");
1072 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1073 puts ("-w, --no-warn\n\
1074 Suppress warning messages about duplicate tag entries.");
1076 puts ("-x, --cxref\n\
1077 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1078 The output uses line numbers instead of page numbers, but\n\
1079 beyond that the differences are cosmetic; try both to see\n\
1080 which you like.");
1083 puts ("-V, --version\n\
1084 Print the version of the program.\n\
1085 -h, --help\n\
1086 Print this help message.\n\
1087 Followed by one or more `--language' options prints detailed\n\
1088 help about tag generation for the specified languages.");
1090 print_language_names ();
1092 puts ("");
1093 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1095 exit (EXIT_SUCCESS);
1099 #ifdef VMS /* VMS specific functions */
1101 #define EOS '\0'
1103 /* This is a BUG! ANY arbitrary limit is a BUG!
1104 Won't someone please fix this? */
1105 #define MAX_FILE_SPEC_LEN 255
1106 typedef struct {
1107 short curlen;
1108 char body[MAX_FILE_SPEC_LEN + 1];
1109 } vspec;
1112 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1113 returning in each successive call the next file name matching the input
1114 spec. The function expects that each in_spec passed
1115 to it will be processed to completion; in particular, up to and
1116 including the call following that in which the last matching name
1117 is returned, the function ignores the value of in_spec, and will
1118 only start processing a new spec with the following call.
1119 If an error occurs, on return out_spec contains the value
1120 of in_spec when the error occurred.
1122 With each successive file name returned in out_spec, the
1123 function's return value is one. When there are no more matching
1124 names the function returns zero. If on the first call no file
1125 matches in_spec, or there is any other error, -1 is returned.
1128 #include <rmsdef.h>
1129 #include <descrip.h>
1130 #define OUTSIZE MAX_FILE_SPEC_LEN
1131 static short
1132 fn_exp (out, in)
1133 vspec *out;
1134 char *in;
1136 static long context = 0;
1137 static struct dsc$descriptor_s o;
1138 static struct dsc$descriptor_s i;
1139 static bool pass1 = TRUE;
1140 long status;
1141 short retval;
1143 if (pass1)
1145 pass1 = FALSE;
1146 o.dsc$a_pointer = (char *) out;
1147 o.dsc$w_length = (short)OUTSIZE;
1148 i.dsc$a_pointer = in;
1149 i.dsc$w_length = (short)strlen(in);
1150 i.dsc$b_dtype = DSC$K_DTYPE_T;
1151 i.dsc$b_class = DSC$K_CLASS_S;
1152 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1153 o.dsc$b_class = DSC$K_CLASS_VS;
1155 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1157 out->body[out->curlen] = EOS;
1158 return 1;
1160 else if (status == RMS$_NMF)
1161 retval = 0;
1162 else
1164 strcpy(out->body, in);
1165 retval = -1;
1167 lib$find_file_end(&context);
1168 pass1 = TRUE;
1169 return retval;
1173 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1174 name of each file specified by the provided arg expanding wildcards.
1176 static char *
1177 gfnames (arg, p_error)
1178 char *arg;
1179 bool *p_error;
1181 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1183 switch (fn_exp (&filename, arg))
1185 case 1:
1186 *p_error = FALSE;
1187 return filename.body;
1188 case 0:
1189 *p_error = FALSE;
1190 return NULL;
1191 default:
1192 *p_error = TRUE;
1193 return filename.body;
1197 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1198 system (cmd)
1199 char *cmd;
1201 error ("%s", "system() function not implemented under VMS");
1203 #endif
1205 #define VERSION_DELIM ';'
1206 char *massage_name (s)
1207 char *s;
1209 char *start = s;
1211 for ( ; *s; s++)
1212 if (*s == VERSION_DELIM)
1214 *s = EOS;
1215 break;
1217 else
1218 *s = lowcase (*s);
1219 return start;
1221 #endif /* VMS */
1225 main (argc, argv)
1226 int argc;
1227 char *argv[];
1229 int i;
1230 unsigned int nincluded_files;
1231 char **included_files;
1232 argument *argbuffer;
1233 int current_arg, file_count;
1234 linebuffer filename_lb;
1235 bool help_asked = FALSE;
1236 #ifdef VMS
1237 bool got_err;
1238 #endif
1239 char *optstring;
1240 int opt;
1243 #ifdef DOS_NT
1244 _fmode = O_BINARY; /* all of files are treated as binary files */
1245 #endif /* DOS_NT */
1247 progname = argv[0];
1248 nincluded_files = 0;
1249 included_files = xnew (argc, char *);
1250 current_arg = 0;
1251 file_count = 0;
1253 /* Allocate enough no matter what happens. Overkill, but each one
1254 is small. */
1255 argbuffer = xnew (argc, argument);
1258 * Always find typedefs and structure tags.
1259 * Also default to find macro constants, enum constants, struct
1260 * members and global variables. Do it for both etags and ctags.
1262 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1263 globals = members = TRUE;
1265 /* When the optstring begins with a '-' getopt_long does not rearrange the
1266 non-options arguments to be at the end, but leaves them alone. */
1267 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1268 "ac:Cf:Il:o:r:RSVhH",
1269 (CTAGS) ? "BxdtTuvw" : "Di:");
1271 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1272 switch (opt)
1274 case 0:
1275 /* If getopt returns 0, then it has already processed a
1276 long-named option. We should do nothing. */
1277 break;
1279 case 1:
1280 /* This means that a file name has been seen. Record it. */
1281 argbuffer[current_arg].arg_type = at_filename;
1282 argbuffer[current_arg].what = optarg;
1283 ++current_arg;
1284 ++file_count;
1285 break;
1287 case STDIN:
1288 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1289 argbuffer[current_arg].arg_type = at_stdin;
1290 argbuffer[current_arg].what = optarg;
1291 ++current_arg;
1292 ++file_count;
1293 if (parsing_stdin)
1294 fatal ("cannot parse standard input more than once", (char *)NULL);
1295 parsing_stdin = TRUE;
1296 break;
1298 /* Common options. */
1299 case 'a': append_to_tagfile = TRUE; break;
1300 case 'C': cplusplus = TRUE; break;
1301 case 'f': /* for compatibility with old makefiles */
1302 case 'o':
1303 if (tagfile)
1305 error ("-o option may only be given once.", (char *)NULL);
1306 suggest_asking_for_help ();
1307 /* NOTREACHED */
1309 tagfile = optarg;
1310 break;
1311 case 'I':
1312 case 'S': /* for backward compatibility */
1313 ignoreindent = TRUE;
1314 break;
1315 case 'l':
1317 language *lang = get_language_from_langname (optarg);
1318 if (lang != NULL)
1320 argbuffer[current_arg].lang = lang;
1321 argbuffer[current_arg].arg_type = at_language;
1322 ++current_arg;
1325 break;
1326 case 'c':
1327 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1328 optarg = concat (optarg, "i", ""); /* memory leak here */
1329 /* FALLTHRU */
1330 case 'r':
1331 argbuffer[current_arg].arg_type = at_regexp;
1332 argbuffer[current_arg].what = optarg;
1333 ++current_arg;
1334 break;
1335 case 'R':
1336 argbuffer[current_arg].arg_type = at_regexp;
1337 argbuffer[current_arg].what = NULL;
1338 ++current_arg;
1339 break;
1340 case 'V':
1341 print_version ();
1342 break;
1343 case 'h':
1344 case 'H':
1345 help_asked = TRUE;
1346 break;
1348 /* Etags options */
1349 case 'D': constantypedefs = FALSE; break;
1350 case 'i': included_files[nincluded_files++] = optarg; break;
1352 /* Ctags options. */
1353 case 'B': searchar = '?'; break;
1354 case 'd': constantypedefs = TRUE; break;
1355 case 't': typedefs = TRUE; break;
1356 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1357 case 'u': update = TRUE; break;
1358 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1359 case 'x': cxref_style = TRUE; break;
1360 case 'w': no_warnings = TRUE; break;
1361 default:
1362 suggest_asking_for_help ();
1363 /* NOTREACHED */
1366 /* No more options. Store the rest of arguments. */
1367 for (; optind < argc; optind++)
1369 argbuffer[current_arg].arg_type = at_filename;
1370 argbuffer[current_arg].what = argv[optind];
1371 ++current_arg;
1372 ++file_count;
1375 argbuffer[current_arg].arg_type = at_end;
1377 if (help_asked)
1378 print_help (argbuffer);
1379 /* NOTREACHED */
1381 if (nincluded_files == 0 && file_count == 0)
1383 error ("no input files specified.", (char *)NULL);
1384 suggest_asking_for_help ();
1385 /* NOTREACHED */
1388 if (tagfile == NULL)
1389 tagfile = CTAGS ? "tags" : "TAGS";
1390 cwd = etags_getcwd (); /* the current working directory */
1391 if (cwd[strlen (cwd) - 1] != '/')
1393 char *oldcwd = cwd;
1394 cwd = concat (oldcwd, "/", "");
1395 free (oldcwd);
1397 /* Relative file names are made relative to the current directory. */
1398 if (streq (tagfile, "-")
1399 || strneq (tagfile, "/dev/", 5))
1400 tagfiledir = cwd;
1401 else
1402 tagfiledir = absolute_dirname (tagfile, cwd);
1404 init (); /* set up boolean "functions" */
1406 linebuffer_init (&lb);
1407 linebuffer_init (&filename_lb);
1408 linebuffer_init (&filebuf);
1409 linebuffer_init (&token_name);
1411 if (!CTAGS)
1413 if (streq (tagfile, "-"))
1415 tagf = stdout;
1416 #ifdef DOS_NT
1417 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1418 doesn't take effect until after `stdout' is already open). */
1419 if (!isatty (fileno (stdout)))
1420 setmode (fileno (stdout), O_BINARY);
1421 #endif /* DOS_NT */
1423 else
1424 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1425 if (tagf == NULL)
1426 pfatal (tagfile);
1430 * Loop through files finding functions.
1432 for (i = 0; i < current_arg; i++)
1434 static language *lang; /* non-NULL if language is forced */
1435 char *this_file;
1437 switch (argbuffer[i].arg_type)
1439 case at_language:
1440 lang = argbuffer[i].lang;
1441 break;
1442 case at_regexp:
1443 analyse_regex (argbuffer[i].what);
1444 break;
1445 case at_filename:
1446 #ifdef VMS
1447 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1449 if (got_err)
1451 error ("can't find file %s\n", this_file);
1452 argc--, argv++;
1454 else
1456 this_file = massage_name (this_file);
1458 #else
1459 this_file = argbuffer[i].what;
1460 #endif
1461 /* Input file named "-" means read file names from stdin
1462 (one per line) and use them. */
1463 if (streq (this_file, "-"))
1465 if (parsing_stdin)
1466 fatal ("cannot parse standard input AND read file names from it",
1467 (char *)NULL);
1468 while (readline_internal (&filename_lb, stdin) > 0)
1469 process_file_name (filename_lb.buffer, lang);
1471 else
1472 process_file_name (this_file, lang);
1473 #ifdef VMS
1475 #endif
1476 break;
1477 case at_stdin:
1478 this_file = argbuffer[i].what;
1479 process_file (stdin, this_file, lang);
1480 break;
1484 free_regexps ();
1485 free (lb.buffer);
1486 free (filebuf.buffer);
1487 free (token_name.buffer);
1489 if (!CTAGS || cxref_style)
1491 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1492 put_entries (nodehead);
1493 free_tree (nodehead);
1494 nodehead = NULL;
1495 if (!CTAGS)
1497 fdesc *fdp;
1499 /* Output file entries that have no tags. */
1500 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1501 if (!fdp->written)
1502 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1504 while (nincluded_files-- > 0)
1505 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1507 if (fclose (tagf) == EOF)
1508 pfatal (tagfile);
1511 exit (EXIT_SUCCESS);
1514 /* From here on, we are in (CTAGS && !cxref_style) */
1515 if (update)
1517 char cmd[BUFSIZ];
1518 for (i = 0; i < current_arg; ++i)
1520 switch (argbuffer[i].arg_type)
1522 case at_filename:
1523 case at_stdin:
1524 break;
1525 default:
1526 continue; /* the for loop */
1528 sprintf (cmd,
1529 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1530 tagfile, argbuffer[i].what, tagfile);
1531 if (system (cmd) != EXIT_SUCCESS)
1532 fatal ("failed to execute shell command", (char *)NULL);
1534 append_to_tagfile = TRUE;
1537 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1538 if (tagf == NULL)
1539 pfatal (tagfile);
1540 put_entries (nodehead); /* write all the tags (CTAGS) */
1541 free_tree (nodehead);
1542 nodehead = NULL;
1543 if (fclose (tagf) == EOF)
1544 pfatal (tagfile);
1546 if (CTAGS)
1547 if (append_to_tagfile || update)
1549 char cmd[2*BUFSIZ+20];
1550 /* Maybe these should be used:
1551 setenv ("LC_COLLATE", "C", 1);
1552 setenv ("LC_ALL", "C", 1); */
1553 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1554 exit (system (cmd));
1556 return EXIT_SUCCESS;
1561 * Return a compressor given the file name. If EXTPTR is non-zero,
1562 * return a pointer into FILE where the compressor-specific
1563 * extension begins. If no compressor is found, NULL is returned
1564 * and EXTPTR is not significant.
1565 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1567 static compressor *
1568 get_compressor_from_suffix (file, extptr)
1569 char *file;
1570 char **extptr;
1572 compressor *compr;
1573 char *slash, *suffix;
1575 /* This relies on FN to be after canonicalize_filename,
1576 so we don't need to consider backslashes on DOS_NT. */
1577 slash = etags_strrchr (file, '/');
1578 suffix = etags_strrchr (file, '.');
1579 if (suffix == NULL || suffix < slash)
1580 return NULL;
1581 if (extptr != NULL)
1582 *extptr = suffix;
1583 suffix += 1;
1584 /* Let those poor souls who live with DOS 8+3 file name limits get
1585 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1586 Only the first do loop is run if not MSDOS */
1589 for (compr = compressors; compr->suffix != NULL; compr++)
1590 if (streq (compr->suffix, suffix))
1591 return compr;
1592 if (!MSDOS)
1593 break; /* do it only once: not really a loop */
1594 if (extptr != NULL)
1595 *extptr = ++suffix;
1596 } while (*suffix != '\0');
1597 return NULL;
1603 * Return a language given the name.
1605 static language *
1606 get_language_from_langname (name)
1607 const char *name;
1609 language *lang;
1611 if (name == NULL)
1612 error ("empty language name", (char *)NULL);
1613 else
1615 for (lang = lang_names; lang->name != NULL; lang++)
1616 if (streq (name, lang->name))
1617 return lang;
1618 error ("unknown language \"%s\"", name);
1621 return NULL;
1626 * Return a language given the interpreter name.
1628 static language *
1629 get_language_from_interpreter (interpreter)
1630 char *interpreter;
1632 language *lang;
1633 char **iname;
1635 if (interpreter == NULL)
1636 return NULL;
1637 for (lang = lang_names; lang->name != NULL; lang++)
1638 if (lang->interpreters != NULL)
1639 for (iname = lang->interpreters; *iname != NULL; iname++)
1640 if (streq (*iname, interpreter))
1641 return lang;
1643 return NULL;
1649 * Return a language given the file name.
1651 static language *
1652 get_language_from_filename (file, case_sensitive)
1653 char *file;
1654 bool case_sensitive;
1656 language *lang;
1657 char **name, **ext, *suffix;
1659 /* Try whole file name first. */
1660 for (lang = lang_names; lang->name != NULL; lang++)
1661 if (lang->filenames != NULL)
1662 for (name = lang->filenames; *name != NULL; name++)
1663 if ((case_sensitive)
1664 ? streq (*name, file)
1665 : strcaseeq (*name, file))
1666 return lang;
1668 /* If not found, try suffix after last dot. */
1669 suffix = etags_strrchr (file, '.');
1670 if (suffix == NULL)
1671 return NULL;
1672 suffix += 1;
1673 for (lang = lang_names; lang->name != NULL; lang++)
1674 if (lang->suffixes != NULL)
1675 for (ext = lang->suffixes; *ext != NULL; ext++)
1676 if ((case_sensitive)
1677 ? streq (*ext, suffix)
1678 : strcaseeq (*ext, suffix))
1679 return lang;
1680 return NULL;
1685 * This routine is called on each file argument.
1687 static void
1688 process_file_name (file, lang)
1689 char *file;
1690 language *lang;
1692 struct stat stat_buf;
1693 FILE *inf;
1694 fdesc *fdp;
1695 compressor *compr;
1696 char *compressed_name, *uncompressed_name;
1697 char *ext, *real_name;
1698 int retval;
1700 canonicalize_filename (file);
1701 if (streq (file, tagfile) && !streq (tagfile, "-"))
1703 error ("skipping inclusion of %s in self.", file);
1704 return;
1706 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1708 compressed_name = NULL;
1709 real_name = uncompressed_name = savestr (file);
1711 else
1713 real_name = compressed_name = savestr (file);
1714 uncompressed_name = savenstr (file, ext - file);
1717 /* If the canonicalized uncompressed name
1718 has already been dealt with, skip it silently. */
1719 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1721 assert (fdp->infname != NULL);
1722 if (streq (uncompressed_name, fdp->infname))
1723 goto cleanup;
1726 if (stat (real_name, &stat_buf) != 0)
1728 /* Reset real_name and try with a different name. */
1729 real_name = NULL;
1730 if (compressed_name != NULL) /* try with the given suffix */
1732 if (stat (uncompressed_name, &stat_buf) == 0)
1733 real_name = uncompressed_name;
1735 else /* try all possible suffixes */
1737 for (compr = compressors; compr->suffix != NULL; compr++)
1739 compressed_name = concat (file, ".", compr->suffix);
1740 if (stat (compressed_name, &stat_buf) != 0)
1742 if (MSDOS)
1744 char *suf = compressed_name + strlen (file);
1745 size_t suflen = strlen (compr->suffix) + 1;
1746 for ( ; suf[1]; suf++, suflen--)
1748 memmove (suf, suf + 1, suflen);
1749 if (stat (compressed_name, &stat_buf) == 0)
1751 real_name = compressed_name;
1752 break;
1755 if (real_name != NULL)
1756 break;
1757 } /* MSDOS */
1758 free (compressed_name);
1759 compressed_name = NULL;
1761 else
1763 real_name = compressed_name;
1764 break;
1768 if (real_name == NULL)
1770 perror (file);
1771 goto cleanup;
1773 } /* try with a different name */
1775 if (!S_ISREG (stat_buf.st_mode))
1777 error ("skipping %s: it is not a regular file.", real_name);
1778 goto cleanup;
1780 if (real_name == compressed_name)
1782 char *cmd = concat (compr->command, " ", real_name);
1783 inf = (FILE *) popen (cmd, "r");
1784 free (cmd);
1786 else
1787 inf = fopen (real_name, "r");
1788 if (inf == NULL)
1790 perror (real_name);
1791 goto cleanup;
1794 process_file (inf, uncompressed_name, lang);
1796 if (real_name == compressed_name)
1797 retval = pclose (inf);
1798 else
1799 retval = fclose (inf);
1800 if (retval < 0)
1801 pfatal (file);
1803 cleanup:
1804 if (compressed_name) free (compressed_name);
1805 if (uncompressed_name) free (uncompressed_name);
1806 last_node = NULL;
1807 curfdp = NULL;
1808 return;
1811 static void
1812 process_file (fh, fn, lang)
1813 FILE *fh;
1814 char *fn;
1815 language *lang;
1817 static const fdesc emptyfdesc;
1818 fdesc *fdp;
1820 /* Create a new input file description entry. */
1821 fdp = xnew (1, fdesc);
1822 *fdp = emptyfdesc;
1823 fdp->next = fdhead;
1824 fdp->infname = savestr (fn);
1825 fdp->lang = lang;
1826 fdp->infabsname = absolute_filename (fn, cwd);
1827 fdp->infabsdir = absolute_dirname (fn, cwd);
1828 if (filename_is_absolute (fn))
1830 /* An absolute file name. Canonicalize it. */
1831 fdp->taggedfname = absolute_filename (fn, NULL);
1833 else
1835 /* A file name relative to cwd. Make it relative
1836 to the directory of the tags file. */
1837 fdp->taggedfname = relative_filename (fn, tagfiledir);
1839 fdp->usecharno = TRUE; /* use char position when making tags */
1840 fdp->prop = NULL;
1841 fdp->written = FALSE; /* not written on tags file yet */
1843 fdhead = fdp;
1844 curfdp = fdhead; /* the current file description */
1846 find_entries (fh);
1848 /* If not Ctags, and if this is not metasource and if it contained no #line
1849 directives, we can write the tags and free all nodes pointing to
1850 curfdp. */
1851 if (!CTAGS
1852 && curfdp->usecharno /* no #line directives in this file */
1853 && !curfdp->lang->metasource)
1855 node *np, *prev;
1857 /* Look for the head of the sublist relative to this file. See add_node
1858 for the structure of the node tree. */
1859 prev = NULL;
1860 for (np = nodehead; np != NULL; prev = np, np = np->left)
1861 if (np->fdp == curfdp)
1862 break;
1864 /* If we generated tags for this file, write and delete them. */
1865 if (np != NULL)
1867 /* This is the head of the last sublist, if any. The following
1868 instructions depend on this being true. */
1869 assert (np->left == NULL);
1871 assert (fdhead == curfdp);
1872 assert (last_node->fdp == curfdp);
1873 put_entries (np); /* write tags for file curfdp->taggedfname */
1874 free_tree (np); /* remove the written nodes */
1875 if (prev == NULL)
1876 nodehead = NULL; /* no nodes left */
1877 else
1878 prev->left = NULL; /* delete the pointer to the sublist */
1884 * This routine sets up the boolean pseudo-functions which work
1885 * by setting boolean flags dependent upon the corresponding character.
1886 * Every char which is NOT in that string is not a white char. Therefore,
1887 * all of the array "_wht" is set to FALSE, and then the elements
1888 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1889 * of a char is TRUE if it is the string "white", else FALSE.
1891 static void
1892 init ()
1894 register char *sp;
1895 register int i;
1897 for (i = 0; i < CHARS; i++)
1898 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1899 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1900 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1901 notinname('\0') = notinname('\n');
1902 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1903 begtoken('\0') = begtoken('\n');
1904 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1905 intoken('\0') = intoken('\n');
1906 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1907 endtoken('\0') = endtoken('\n');
1911 * This routine opens the specified file and calls the function
1912 * which finds the function and type definitions.
1914 static void
1915 find_entries (inf)
1916 FILE *inf;
1918 char *cp;
1919 language *lang = curfdp->lang;
1920 Lang_function *parser = NULL;
1922 /* If user specified a language, use it. */
1923 if (lang != NULL && lang->function != NULL)
1925 parser = lang->function;
1928 /* Else try to guess the language given the file name. */
1929 if (parser == NULL)
1931 lang = get_language_from_filename (curfdp->infname, TRUE);
1932 if (lang != NULL && lang->function != NULL)
1934 curfdp->lang = lang;
1935 parser = lang->function;
1939 /* Else look for sharp-bang as the first two characters. */
1940 if (parser == NULL
1941 && readline_internal (&lb, inf) > 0
1942 && lb.len >= 2
1943 && lb.buffer[0] == '#'
1944 && lb.buffer[1] == '!')
1946 char *lp;
1948 /* Set lp to point at the first char after the last slash in the
1949 line or, if no slashes, at the first nonblank. Then set cp to
1950 the first successive blank and terminate the string. */
1951 lp = etags_strrchr (lb.buffer+2, '/');
1952 if (lp != NULL)
1953 lp += 1;
1954 else
1955 lp = skip_spaces (lb.buffer + 2);
1956 cp = skip_non_spaces (lp);
1957 *cp = '\0';
1959 if (strlen (lp) > 0)
1961 lang = get_language_from_interpreter (lp);
1962 if (lang != NULL && lang->function != NULL)
1964 curfdp->lang = lang;
1965 parser = lang->function;
1970 /* We rewind here, even if inf may be a pipe. We fail if the
1971 length of the first line is longer than the pipe block size,
1972 which is unlikely. */
1973 rewind (inf);
1975 /* Else try to guess the language given the case insensitive file name. */
1976 if (parser == NULL)
1978 lang = get_language_from_filename (curfdp->infname, FALSE);
1979 if (lang != NULL && lang->function != NULL)
1981 curfdp->lang = lang;
1982 parser = lang->function;
1986 /* Else try Fortran or C. */
1987 if (parser == NULL)
1989 node *old_last_node = last_node;
1991 curfdp->lang = get_language_from_langname ("fortran");
1992 find_entries (inf);
1994 if (old_last_node == last_node)
1995 /* No Fortran entries found. Try C. */
1997 /* We do not tag if rewind fails.
1998 Only the file name will be recorded in the tags file. */
1999 rewind (inf);
2000 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
2001 find_entries (inf);
2003 return;
2006 if (!no_line_directive
2007 && curfdp->lang != NULL && curfdp->lang->metasource)
2008 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
2009 file, or anyway we parsed a file that is automatically generated from
2010 this one. If this is the case, the bingo.c file contained #line
2011 directives that generated tags pointing to this file. Let's delete
2012 them all before parsing this file, which is the real source. */
2014 fdesc **fdpp = &fdhead;
2015 while (*fdpp != NULL)
2016 if (*fdpp != curfdp
2017 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
2018 /* We found one of those! We must delete both the file description
2019 and all tags referring to it. */
2021 fdesc *badfdp = *fdpp;
2023 /* Delete the tags referring to badfdp->taggedfname
2024 that were obtained from badfdp->infname. */
2025 invalidate_nodes (badfdp, &nodehead);
2027 *fdpp = badfdp->next; /* remove the bad description from the list */
2028 free_fdesc (badfdp);
2030 else
2031 fdpp = &(*fdpp)->next; /* advance the list pointer */
2034 assert (parser != NULL);
2036 /* Generic initialisations before reading from file. */
2037 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
2039 /* Generic initialisations before parsing file with readline. */
2040 lineno = 0; /* reset global line number */
2041 charno = 0; /* reset global char number */
2042 linecharno = 0; /* reset global char number of line start */
2044 parser (inf);
2046 regex_tag_multiline ();
2051 * Check whether an implicitly named tag should be created,
2052 * then call `pfnote'.
2053 * NAME is a string that is internally copied by this function.
2055 * TAGS format specification
2056 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2057 * The following is explained in some more detail in etc/ETAGS.EBNF.
2059 * make_tag creates tags with "implicit tag names" (unnamed tags)
2060 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2061 * 1. NAME does not contain any of the characters in NONAM;
2062 * 2. LINESTART contains name as either a rightmost, or rightmost but
2063 * one character, substring;
2064 * 3. the character, if any, immediately before NAME in LINESTART must
2065 * be a character in NONAM;
2066 * 4. the character, if any, immediately after NAME in LINESTART must
2067 * also be a character in NONAM.
2069 * The implementation uses the notinname() macro, which recognises the
2070 * characters stored in the string `nonam'.
2071 * etags.el needs to use the same characters that are in NONAM.
2073 static void
2074 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2075 char *name; /* tag name, or NULL if unnamed */
2076 int namelen; /* tag length */
2077 bool is_func; /* tag is a function */
2078 char *linestart; /* start of the line where tag is */
2079 int linelen; /* length of the line where tag is */
2080 int lno; /* line number */
2081 long cno; /* character number */
2083 bool named = (name != NULL && namelen > 0);
2085 if (!CTAGS && named) /* maybe set named to false */
2086 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2087 such that etags.el can guess a name from it. */
2089 int i;
2090 register char *cp = name;
2092 for (i = 0; i < namelen; i++)
2093 if (notinname (*cp++))
2094 break;
2095 if (i == namelen) /* rule #1 */
2097 cp = linestart + linelen - namelen;
2098 if (notinname (linestart[linelen-1]))
2099 cp -= 1; /* rule #4 */
2100 if (cp >= linestart /* rule #2 */
2101 && (cp == linestart
2102 || notinname (cp[-1])) /* rule #3 */
2103 && strneq (name, cp, namelen)) /* rule #2 */
2104 named = FALSE; /* use implicit tag name */
2108 if (named)
2109 name = savenstr (name, namelen);
2110 else
2111 name = NULL;
2112 pfnote (name, is_func, linestart, linelen, lno, cno);
2115 /* Record a tag. */
2116 static void
2117 pfnote (name, is_func, linestart, linelen, lno, cno)
2118 char *name; /* tag name, or NULL if unnamed */
2119 bool is_func; /* tag is a function */
2120 char *linestart; /* start of the line where tag is */
2121 int linelen; /* length of the line where tag is */
2122 int lno; /* line number */
2123 long cno; /* character number */
2125 register node *np;
2127 assert (name == NULL || name[0] != '\0');
2128 if (CTAGS && name == NULL)
2129 return;
2131 np = xnew (1, node);
2133 /* If ctags mode, change name "main" to M<thisfilename>. */
2134 if (CTAGS && !cxref_style && streq (name, "main"))
2136 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2137 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2138 fp = etags_strrchr (np->name, '.');
2139 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2140 fp[0] = '\0';
2142 else
2143 np->name = name;
2144 np->valid = TRUE;
2145 np->been_warned = FALSE;
2146 np->fdp = curfdp;
2147 np->is_func = is_func;
2148 np->lno = lno;
2149 if (np->fdp->usecharno)
2150 /* Our char numbers are 0-base, because of C language tradition?
2151 ctags compatibility? old versions compatibility? I don't know.
2152 Anyway, since emacs's are 1-base we expect etags.el to take care
2153 of the difference. If we wanted to have 1-based numbers, we would
2154 uncomment the +1 below. */
2155 np->cno = cno /* + 1 */ ;
2156 else
2157 np->cno = invalidcharno;
2158 np->left = np->right = NULL;
2159 if (CTAGS && !cxref_style)
2161 if (strlen (linestart) < 50)
2162 np->regex = concat (linestart, "$", "");
2163 else
2164 np->regex = savenstr (linestart, 50);
2166 else
2167 np->regex = savenstr (linestart, linelen);
2169 add_node (np, &nodehead);
2173 * free_tree ()
2174 * recurse on left children, iterate on right children.
2176 static void
2177 free_tree (np)
2178 register node *np;
2180 while (np)
2182 register node *node_right = np->right;
2183 free_tree (np->left);
2184 if (np->name != NULL)
2185 free (np->name);
2186 free (np->regex);
2187 free (np);
2188 np = node_right;
2193 * free_fdesc ()
2194 * delete a file description
2196 static void
2197 free_fdesc (fdp)
2198 register fdesc *fdp;
2200 if (fdp->infname != NULL) free (fdp->infname);
2201 if (fdp->infabsname != NULL) free (fdp->infabsname);
2202 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2203 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2204 if (fdp->prop != NULL) free (fdp->prop);
2205 free (fdp);
2209 * add_node ()
2210 * Adds a node to the tree of nodes. In etags mode, sort by file
2211 * name. In ctags mode, sort by tag name. Make no attempt at
2212 * balancing.
2214 * add_node is the only function allowed to add nodes, so it can
2215 * maintain state.
2217 static void
2218 add_node (np, cur_node_p)
2219 node *np, **cur_node_p;
2221 register int dif;
2222 register node *cur_node = *cur_node_p;
2224 if (cur_node == NULL)
2226 *cur_node_p = np;
2227 last_node = np;
2228 return;
2231 if (!CTAGS)
2232 /* Etags Mode */
2234 /* For each file name, tags are in a linked sublist on the right
2235 pointer. The first tags of different files are a linked list
2236 on the left pointer. last_node points to the end of the last
2237 used sublist. */
2238 if (last_node != NULL && last_node->fdp == np->fdp)
2240 /* Let's use the same sublist as the last added node. */
2241 assert (last_node->right == NULL);
2242 last_node->right = np;
2243 last_node = np;
2245 else if (cur_node->fdp == np->fdp)
2247 /* Scanning the list we found the head of a sublist which is
2248 good for us. Let's scan this sublist. */
2249 add_node (np, &cur_node->right);
2251 else
2252 /* The head of this sublist is not good for us. Let's try the
2253 next one. */
2254 add_node (np, &cur_node->left);
2255 } /* if ETAGS mode */
2257 else
2259 /* Ctags Mode */
2260 dif = strcmp (np->name, cur_node->name);
2263 * If this tag name matches an existing one, then
2264 * do not add the node, but maybe print a warning.
2266 if (no_duplicates && !dif)
2268 if (np->fdp == cur_node->fdp)
2270 if (!no_warnings)
2272 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2273 np->fdp->infname, lineno, np->name);
2274 fprintf (stderr, "Second entry ignored\n");
2277 else if (!cur_node->been_warned && !no_warnings)
2279 fprintf
2280 (stderr,
2281 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2282 np->fdp->infname, cur_node->fdp->infname, np->name);
2283 cur_node->been_warned = TRUE;
2285 return;
2288 /* Actually add the node */
2289 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2290 } /* if CTAGS mode */
2294 * invalidate_nodes ()
2295 * Scan the node tree and invalidate all nodes pointing to the
2296 * given file description (CTAGS case) or free them (ETAGS case).
2298 static void
2299 invalidate_nodes (badfdp, npp)
2300 fdesc *badfdp;
2301 node **npp;
2303 node *np = *npp;
2305 if (np == NULL)
2306 return;
2308 if (CTAGS)
2310 if (np->left != NULL)
2311 invalidate_nodes (badfdp, &np->left);
2312 if (np->fdp == badfdp)
2313 np->valid = FALSE;
2314 if (np->right != NULL)
2315 invalidate_nodes (badfdp, &np->right);
2317 else
2319 assert (np->fdp != NULL);
2320 if (np->fdp == badfdp)
2322 *npp = np->left; /* detach the sublist from the list */
2323 np->left = NULL; /* isolate it */
2324 free_tree (np); /* free it */
2325 invalidate_nodes (badfdp, npp);
2327 else
2328 invalidate_nodes (badfdp, &np->left);
2333 static int total_size_of_entries __P((node *));
2334 static int number_len __P((long));
2336 /* Length of a non-negative number's decimal representation. */
2337 static int
2338 number_len (num)
2339 long num;
2341 int len = 1;
2342 while ((num /= 10) > 0)
2343 len += 1;
2344 return len;
2348 * Return total number of characters that put_entries will output for
2349 * the nodes in the linked list at the right of the specified node.
2350 * This count is irrelevant with etags.el since emacs 19.34 at least,
2351 * but is still supplied for backward compatibility.
2353 static int
2354 total_size_of_entries (np)
2355 register node *np;
2357 register int total = 0;
2359 for (; np != NULL; np = np->right)
2360 if (np->valid)
2362 total += strlen (np->regex) + 1; /* pat\177 */
2363 if (np->name != NULL)
2364 total += strlen (np->name) + 1; /* name\001 */
2365 total += number_len ((long) np->lno) + 1; /* lno, */
2366 if (np->cno != invalidcharno) /* cno */
2367 total += number_len (np->cno);
2368 total += 1; /* newline */
2371 return total;
2374 static void
2375 put_entries (np)
2376 register node *np;
2378 register char *sp;
2379 static fdesc *fdp = NULL;
2381 if (np == NULL)
2382 return;
2384 /* Output subentries that precede this one */
2385 if (CTAGS)
2386 put_entries (np->left);
2388 /* Output this entry */
2389 if (np->valid)
2391 if (!CTAGS)
2393 /* Etags mode */
2394 if (fdp != np->fdp)
2396 fdp = np->fdp;
2397 fprintf (tagf, "\f\n%s,%d\n",
2398 fdp->taggedfname, total_size_of_entries (np));
2399 fdp->written = TRUE;
2401 fputs (np->regex, tagf);
2402 fputc ('\177', tagf);
2403 if (np->name != NULL)
2405 fputs (np->name, tagf);
2406 fputc ('\001', tagf);
2408 fprintf (tagf, "%d,", np->lno);
2409 if (np->cno != invalidcharno)
2410 fprintf (tagf, "%ld", np->cno);
2411 fputs ("\n", tagf);
2413 else
2415 /* Ctags mode */
2416 if (np->name == NULL)
2417 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2419 if (cxref_style)
2421 if (vgrind_style)
2422 fprintf (stdout, "%s %s %d\n",
2423 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2424 else
2425 fprintf (stdout, "%-16s %3d %-16s %s\n",
2426 np->name, np->lno, np->fdp->taggedfname, np->regex);
2428 else
2430 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2432 if (np->is_func)
2433 { /* function or #define macro with args */
2434 putc (searchar, tagf);
2435 putc ('^', tagf);
2437 for (sp = np->regex; *sp; sp++)
2439 if (*sp == '\\' || *sp == searchar)
2440 putc ('\\', tagf);
2441 putc (*sp, tagf);
2443 putc (searchar, tagf);
2445 else
2446 { /* anything else; text pattern inadequate */
2447 fprintf (tagf, "%d", np->lno);
2449 putc ('\n', tagf);
2452 } /* if this node contains a valid tag */
2454 /* Output subentries that follow this one */
2455 put_entries (np->right);
2456 if (!CTAGS)
2457 put_entries (np->left);
2461 /* C extensions. */
2462 #define C_EXT 0x00fff /* C extensions */
2463 #define C_PLAIN 0x00000 /* C */
2464 #define C_PLPL 0x00001 /* C++ */
2465 #define C_STAR 0x00003 /* C* */
2466 #define C_JAVA 0x00005 /* JAVA */
2467 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2468 #define YACC 0x10000 /* yacc file */
2471 * The C symbol tables.
2473 enum sym_type
2475 st_none,
2476 st_C_objprot, st_C_objimpl, st_C_objend,
2477 st_C_gnumacro,
2478 st_C_ignore, st_C_attribute,
2479 st_C_javastruct,
2480 st_C_operator,
2481 st_C_class, st_C_template,
2482 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2485 static unsigned int hash __P((const char *, unsigned int));
2486 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2487 static enum sym_type C_symtype __P((char *, int, int));
2489 /* Feed stuff between (but not including) %[ and %] lines to:
2490 gperf -m 5
2492 %compare-strncmp
2493 %enum
2494 %struct-type
2495 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2497 if, 0, st_C_ignore
2498 for, 0, st_C_ignore
2499 while, 0, st_C_ignore
2500 switch, 0, st_C_ignore
2501 return, 0, st_C_ignore
2502 __attribute__, 0, st_C_attribute
2503 @interface, 0, st_C_objprot
2504 @protocol, 0, st_C_objprot
2505 @implementation,0, st_C_objimpl
2506 @end, 0, st_C_objend
2507 import, (C_JAVA & ~C_PLPL), st_C_ignore
2508 package, (C_JAVA & ~C_PLPL), st_C_ignore
2509 friend, C_PLPL, st_C_ignore
2510 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2511 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2512 interface, (C_JAVA & ~C_PLPL), st_C_struct
2513 class, 0, st_C_class
2514 namespace, C_PLPL, st_C_struct
2515 domain, C_STAR, st_C_struct
2516 union, 0, st_C_struct
2517 struct, 0, st_C_struct
2518 extern, 0, st_C_extern
2519 enum, 0, st_C_enum
2520 typedef, 0, st_C_typedef
2521 define, 0, st_C_define
2522 undef, 0, st_C_define
2523 operator, C_PLPL, st_C_operator
2524 template, 0, st_C_template
2525 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2526 DEFUN, 0, st_C_gnumacro
2527 SYSCALL, 0, st_C_gnumacro
2528 ENTRY, 0, st_C_gnumacro
2529 PSEUDO, 0, st_C_gnumacro
2530 # These are defined inside C functions, so currently they are not met.
2531 # EXFUN used in glibc, DEFVAR_* in emacs.
2532 #EXFUN, 0, st_C_gnumacro
2533 #DEFVAR_, 0, st_C_gnumacro
2535 and replace lines between %< and %> with its output, then:
2536 - remove the #if characterset check
2537 - make in_word_set static and not inline. */
2538 /*%<*/
2539 /* C code produced by gperf version 3.0.1 */
2540 /* Command-line: gperf -m 5 */
2541 /* Computed positions: -k'2-3' */
2543 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2544 /* maximum key range = 33, duplicates = 0 */
2546 #ifdef __GNUC__
2547 __inline
2548 #else
2549 #ifdef __cplusplus
2550 inline
2551 #endif
2552 #endif
2553 static unsigned int
2554 hash (str, len)
2555 register const char *str;
2556 register unsigned int len;
2558 static unsigned char asso_values[] =
2560 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2561 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2562 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2563 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2564 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2565 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2566 35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2567 14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2568 35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2569 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2570 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2571 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2572 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2573 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2574 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2575 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2576 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2577 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2578 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2579 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2580 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2581 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2582 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2583 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2584 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2585 35, 35, 35, 35, 35, 35
2587 register int hval = len;
2589 switch (hval)
2591 default:
2592 hval += asso_values[(unsigned char)str[2]];
2593 /*FALLTHROUGH*/
2594 case 2:
2595 hval += asso_values[(unsigned char)str[1]];
2596 break;
2598 return hval;
2601 static struct C_stab_entry *
2602 in_word_set (str, len)
2603 register const char *str;
2604 register unsigned int len;
2606 enum
2608 TOTAL_KEYWORDS = 32,
2609 MIN_WORD_LENGTH = 2,
2610 MAX_WORD_LENGTH = 15,
2611 MIN_HASH_VALUE = 2,
2612 MAX_HASH_VALUE = 34
2615 static struct C_stab_entry wordlist[] =
2617 {""}, {""},
2618 {"if", 0, st_C_ignore},
2619 {""},
2620 {"@end", 0, st_C_objend},
2621 {"union", 0, st_C_struct},
2622 {"define", 0, st_C_define},
2623 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2624 {"template", 0, st_C_template},
2625 {"operator", C_PLPL, st_C_operator},
2626 {"@interface", 0, st_C_objprot},
2627 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2628 {"friend", C_PLPL, st_C_ignore},
2629 {"typedef", 0, st_C_typedef},
2630 {"return", 0, st_C_ignore},
2631 {"@implementation",0, st_C_objimpl},
2632 {"@protocol", 0, st_C_objprot},
2633 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2634 {"extern", 0, st_C_extern},
2635 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2636 {"struct", 0, st_C_struct},
2637 {"domain", C_STAR, st_C_struct},
2638 {"switch", 0, st_C_ignore},
2639 {"enum", 0, st_C_enum},
2640 {"for", 0, st_C_ignore},
2641 {"namespace", C_PLPL, st_C_struct},
2642 {"class", 0, st_C_class},
2643 {"while", 0, st_C_ignore},
2644 {"undef", 0, st_C_define},
2645 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2646 {"__attribute__", 0, st_C_attribute},
2647 {"SYSCALL", 0, st_C_gnumacro},
2648 {"ENTRY", 0, st_C_gnumacro},
2649 {"PSEUDO", 0, st_C_gnumacro},
2650 {"DEFUN", 0, st_C_gnumacro}
2653 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2655 register int key = hash (str, len);
2657 if (key <= MAX_HASH_VALUE && key >= 0)
2659 register const char *s = wordlist[key].name;
2661 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2662 return &wordlist[key];
2665 return 0;
2667 /*%>*/
2669 static enum sym_type
2670 C_symtype (str, len, c_ext)
2671 char *str;
2672 int len;
2673 int c_ext;
2675 register struct C_stab_entry *se = in_word_set (str, len);
2677 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2678 return st_none;
2679 return se->type;
2684 * Ignoring __attribute__ ((list))
2686 static bool inattribute; /* looking at an __attribute__ construct */
2689 * C functions and variables are recognized using a simple
2690 * finite automaton. fvdef is its state variable.
2692 static enum
2694 fvnone, /* nothing seen */
2695 fdefunkey, /* Emacs DEFUN keyword seen */
2696 fdefunname, /* Emacs DEFUN name seen */
2697 foperator, /* func: operator keyword seen (cplpl) */
2698 fvnameseen, /* function or variable name seen */
2699 fstartlist, /* func: just after open parenthesis */
2700 finlist, /* func: in parameter list */
2701 flistseen, /* func: after parameter list */
2702 fignore, /* func: before open brace */
2703 vignore /* var-like: ignore until ';' */
2704 } fvdef;
2706 static bool fvextern; /* func or var: extern keyword seen; */
2709 * typedefs are recognized using a simple finite automaton.
2710 * typdef is its state variable.
2712 static enum
2714 tnone, /* nothing seen */
2715 tkeyseen, /* typedef keyword seen */
2716 ttypeseen, /* defined type seen */
2717 tinbody, /* inside typedef body */
2718 tend, /* just before typedef tag */
2719 tignore /* junk after typedef tag */
2720 } typdef;
2723 * struct-like structures (enum, struct and union) are recognized
2724 * using another simple finite automaton. `structdef' is its state
2725 * variable.
2727 static enum
2729 snone, /* nothing seen yet,
2730 or in struct body if bracelev > 0 */
2731 skeyseen, /* struct-like keyword seen */
2732 stagseen, /* struct-like tag seen */
2733 scolonseen /* colon seen after struct-like tag */
2734 } structdef;
2737 * When objdef is different from onone, objtag is the name of the class.
2739 static char *objtag = "<uninited>";
2742 * Yet another little state machine to deal with preprocessor lines.
2744 static enum
2746 dnone, /* nothing seen */
2747 dsharpseen, /* '#' seen as first char on line */
2748 ddefineseen, /* '#' and 'define' seen */
2749 dignorerest /* ignore rest of line */
2750 } definedef;
2753 * State machine for Objective C protocols and implementations.
2754 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2756 static enum
2758 onone, /* nothing seen */
2759 oprotocol, /* @interface or @protocol seen */
2760 oimplementation, /* @implementations seen */
2761 otagseen, /* class name seen */
2762 oparenseen, /* parenthesis before category seen */
2763 ocatseen, /* category name seen */
2764 oinbody, /* in @implementation body */
2765 omethodsign, /* in @implementation body, after +/- */
2766 omethodtag, /* after method name */
2767 omethodcolon, /* after method colon */
2768 omethodparm, /* after method parameter */
2769 oignore /* wait for @end */
2770 } objdef;
2774 * Use this structure to keep info about the token read, and how it
2775 * should be tagged. Used by the make_C_tag function to build a tag.
2777 static struct tok
2779 char *line; /* string containing the token */
2780 int offset; /* where the token starts in LINE */
2781 int length; /* token length */
2783 The previous members can be used to pass strings around for generic
2784 purposes. The following ones specifically refer to creating tags. In this
2785 case the token contained here is the pattern that will be used to create a
2786 tag.
2788 bool valid; /* do not create a tag; the token should be
2789 invalidated whenever a state machine is
2790 reset prematurely */
2791 bool named; /* create a named tag */
2792 int lineno; /* source line number of tag */
2793 long linepos; /* source char number of tag */
2794 } token; /* latest token read */
2797 * Variables and functions for dealing with nested structures.
2798 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2800 static void pushclass_above __P((int, char *, int));
2801 static void popclass_above __P((int));
2802 static void write_classname __P((linebuffer *, char *qualifier));
2804 static struct {
2805 char **cname; /* nested class names */
2806 int *bracelev; /* nested class brace level */
2807 int nl; /* class nesting level (elements used) */
2808 int size; /* length of the array */
2809 } cstack; /* stack for nested declaration tags */
2810 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2811 #define nestlev (cstack.nl)
2812 /* After struct keyword or in struct body, not inside a nested function. */
2813 #define instruct (structdef == snone && nestlev > 0 \
2814 && bracelev == cstack.bracelev[nestlev-1] + 1)
2816 static void
2817 pushclass_above (bracelev, str, len)
2818 int bracelev;
2819 char *str;
2820 int len;
2822 int nl;
2824 popclass_above (bracelev);
2825 nl = cstack.nl;
2826 if (nl >= cstack.size)
2828 int size = cstack.size *= 2;
2829 xrnew (cstack.cname, size, char *);
2830 xrnew (cstack.bracelev, size, int);
2832 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2833 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2834 cstack.bracelev[nl] = bracelev;
2835 cstack.nl = nl + 1;
2838 static void
2839 popclass_above (bracelev)
2840 int bracelev;
2842 int nl;
2844 for (nl = cstack.nl - 1;
2845 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2846 nl--)
2848 if (cstack.cname[nl] != NULL)
2849 free (cstack.cname[nl]);
2850 cstack.nl = nl;
2854 static void
2855 write_classname (cn, qualifier)
2856 linebuffer *cn;
2857 char *qualifier;
2859 int i, len;
2860 int qlen = strlen (qualifier);
2862 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2864 len = 0;
2865 cn->len = 0;
2866 cn->buffer[0] = '\0';
2868 else
2870 len = strlen (cstack.cname[0]);
2871 linebuffer_setlen (cn, len);
2872 strcpy (cn->buffer, cstack.cname[0]);
2874 for (i = 1; i < cstack.nl; i++)
2876 char *s;
2877 int slen;
2879 s = cstack.cname[i];
2880 if (s == NULL)
2881 continue;
2882 slen = strlen (s);
2883 len += slen + qlen;
2884 linebuffer_setlen (cn, len);
2885 strncat (cn->buffer, qualifier, qlen);
2886 strncat (cn->buffer, s, slen);
2891 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2892 static void make_C_tag __P((bool));
2895 * consider_token ()
2896 * checks to see if the current token is at the start of a
2897 * function or variable, or corresponds to a typedef, or
2898 * is a struct/union/enum tag, or #define, or an enum constant.
2900 * *IS_FUNC gets TRUE if the token is a function or #define macro
2901 * with args. C_EXTP points to which language we are looking at.
2903 * Globals
2904 * fvdef IN OUT
2905 * structdef IN OUT
2906 * definedef IN OUT
2907 * typdef IN OUT
2908 * objdef IN OUT
2911 static bool
2912 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2913 register char *str; /* IN: token pointer */
2914 register int len; /* IN: token length */
2915 register int c; /* IN: first char after the token */
2916 int *c_extp; /* IN, OUT: C extensions mask */
2917 int bracelev; /* IN: brace level */
2918 int parlev; /* IN: parenthesis level */
2919 bool *is_func_or_var; /* OUT: function or variable found */
2921 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2922 structtype is the type of the preceding struct-like keyword, and
2923 structbracelev is the brace level where it has been seen. */
2924 static enum sym_type structtype;
2925 static int structbracelev;
2926 static enum sym_type toktype;
2929 toktype = C_symtype (str, len, *c_extp);
2932 * Skip __attribute__
2934 if (toktype == st_C_attribute)
2936 inattribute = TRUE;
2937 return FALSE;
2941 * Advance the definedef state machine.
2943 switch (definedef)
2945 case dnone:
2946 /* We're not on a preprocessor line. */
2947 if (toktype == st_C_gnumacro)
2949 fvdef = fdefunkey;
2950 return FALSE;
2952 break;
2953 case dsharpseen:
2954 if (toktype == st_C_define)
2956 definedef = ddefineseen;
2958 else
2960 definedef = dignorerest;
2962 return FALSE;
2963 case ddefineseen:
2965 * Make a tag for any macro, unless it is a constant
2966 * and constantypedefs is FALSE.
2968 definedef = dignorerest;
2969 *is_func_or_var = (c == '(');
2970 if (!*is_func_or_var && !constantypedefs)
2971 return FALSE;
2972 else
2973 return TRUE;
2974 case dignorerest:
2975 return FALSE;
2976 default:
2977 error ("internal error: definedef value.", (char *)NULL);
2981 * Now typedefs
2983 switch (typdef)
2985 case tnone:
2986 if (toktype == st_C_typedef)
2988 if (typedefs)
2989 typdef = tkeyseen;
2990 fvextern = FALSE;
2991 fvdef = fvnone;
2992 return FALSE;
2994 break;
2995 case tkeyseen:
2996 switch (toktype)
2998 case st_none:
2999 case st_C_class:
3000 case st_C_struct:
3001 case st_C_enum:
3002 typdef = ttypeseen;
3004 break;
3005 case ttypeseen:
3006 if (structdef == snone && fvdef == fvnone)
3008 fvdef = fvnameseen;
3009 return TRUE;
3011 break;
3012 case tend:
3013 switch (toktype)
3015 case st_C_class:
3016 case st_C_struct:
3017 case st_C_enum:
3018 return FALSE;
3020 return TRUE;
3023 switch (toktype)
3025 case st_C_javastruct:
3026 if (structdef == stagseen)
3027 structdef = scolonseen;
3028 return FALSE;
3029 case st_C_template:
3030 case st_C_class:
3031 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
3032 && bracelev == 0
3033 && definedef == dnone && structdef == snone
3034 && typdef == tnone && fvdef == fvnone)
3035 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3036 if (toktype == st_C_template)
3037 break;
3038 /* FALLTHRU */
3039 case st_C_struct:
3040 case st_C_enum:
3041 if (parlev == 0
3042 && fvdef != vignore
3043 && (typdef == tkeyseen
3044 || (typedefs_or_cplusplus && structdef == snone)))
3046 structdef = skeyseen;
3047 structtype = toktype;
3048 structbracelev = bracelev;
3049 if (fvdef == fvnameseen)
3050 fvdef = fvnone;
3052 return FALSE;
3055 if (structdef == skeyseen)
3057 structdef = stagseen;
3058 return TRUE;
3061 if (typdef != tnone)
3062 definedef = dnone;
3064 /* Detect Objective C constructs. */
3065 switch (objdef)
3067 case onone:
3068 switch (toktype)
3070 case st_C_objprot:
3071 objdef = oprotocol;
3072 return FALSE;
3073 case st_C_objimpl:
3074 objdef = oimplementation;
3075 return FALSE;
3077 break;
3078 case oimplementation:
3079 /* Save the class tag for functions or variables defined inside. */
3080 objtag = savenstr (str, len);
3081 objdef = oinbody;
3082 return FALSE;
3083 case oprotocol:
3084 /* Save the class tag for categories. */
3085 objtag = savenstr (str, len);
3086 objdef = otagseen;
3087 *is_func_or_var = TRUE;
3088 return TRUE;
3089 case oparenseen:
3090 objdef = ocatseen;
3091 *is_func_or_var = TRUE;
3092 return TRUE;
3093 case oinbody:
3094 break;
3095 case omethodsign:
3096 if (parlev == 0)
3098 fvdef = fvnone;
3099 objdef = omethodtag;
3100 linebuffer_setlen (&token_name, len);
3101 strncpy (token_name.buffer, str, len);
3102 token_name.buffer[len] = '\0';
3103 return TRUE;
3105 return FALSE;
3106 case omethodcolon:
3107 if (parlev == 0)
3108 objdef = omethodparm;
3109 return FALSE;
3110 case omethodparm:
3111 if (parlev == 0)
3113 fvdef = fvnone;
3114 objdef = omethodtag;
3115 linebuffer_setlen (&token_name, token_name.len + len);
3116 strncat (token_name.buffer, str, len);
3117 return TRUE;
3119 return FALSE;
3120 case oignore:
3121 if (toktype == st_C_objend)
3123 /* Memory leakage here: the string pointed by objtag is
3124 never released, because many tests would be needed to
3125 avoid breaking on incorrect input code. The amount of
3126 memory leaked here is the sum of the lengths of the
3127 class tags.
3128 free (objtag); */
3129 objdef = onone;
3131 return FALSE;
3134 /* A function, variable or enum constant? */
3135 switch (toktype)
3137 case st_C_extern:
3138 fvextern = TRUE;
3139 switch (fvdef)
3141 case finlist:
3142 case flistseen:
3143 case fignore:
3144 case vignore:
3145 break;
3146 default:
3147 fvdef = fvnone;
3149 return FALSE;
3150 case st_C_ignore:
3151 fvextern = FALSE;
3152 fvdef = vignore;
3153 return FALSE;
3154 case st_C_operator:
3155 fvdef = foperator;
3156 *is_func_or_var = TRUE;
3157 return TRUE;
3158 case st_none:
3159 if (constantypedefs
3160 && structdef == snone
3161 && structtype == st_C_enum && bracelev > structbracelev)
3162 return TRUE; /* enum constant */
3163 switch (fvdef)
3165 case fdefunkey:
3166 if (bracelev > 0)
3167 break;
3168 fvdef = fdefunname; /* GNU macro */
3169 *is_func_or_var = TRUE;
3170 return TRUE;
3171 case fvnone:
3172 switch (typdef)
3174 case ttypeseen:
3175 return FALSE;
3176 case tnone:
3177 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3178 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3180 fvdef = vignore;
3181 return FALSE;
3183 break;
3185 /* FALLTHRU */
3186 case fvnameseen:
3187 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3189 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3190 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3191 fvdef = foperator;
3192 *is_func_or_var = TRUE;
3193 return TRUE;
3195 if (bracelev > 0 && !instruct)
3196 break;
3197 fvdef = fvnameseen; /* function or variable */
3198 *is_func_or_var = TRUE;
3199 return TRUE;
3201 break;
3204 return FALSE;
3209 * C_entries often keeps pointers to tokens or lines which are older than
3210 * the line currently read. By keeping two line buffers, and switching
3211 * them at end of line, it is possible to use those pointers.
3213 static struct
3215 long linepos;
3216 linebuffer lb;
3217 } lbs[2];
3219 #define current_lb_is_new (newndx == curndx)
3220 #define switch_line_buffers() (curndx = 1 - curndx)
3222 #define curlb (lbs[curndx].lb)
3223 #define newlb (lbs[newndx].lb)
3224 #define curlinepos (lbs[curndx].linepos)
3225 #define newlinepos (lbs[newndx].linepos)
3227 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3228 #define cplpl (c_ext & C_PLPL)
3229 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3231 #define CNL_SAVE_DEFINEDEF() \
3232 do { \
3233 curlinepos = charno; \
3234 readline (&curlb, inf); \
3235 lp = curlb.buffer; \
3236 quotednl = FALSE; \
3237 newndx = curndx; \
3238 } while (0)
3240 #define CNL() \
3241 do { \
3242 CNL_SAVE_DEFINEDEF(); \
3243 if (savetoken.valid) \
3245 token = savetoken; \
3246 savetoken.valid = FALSE; \
3248 definedef = dnone; \
3249 } while (0)
3252 static void
3253 make_C_tag (isfun)
3254 bool isfun;
3256 /* This function is never called when token.valid is FALSE, but
3257 we must protect against invalid input or internal errors. */
3258 if (token.valid)
3259 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3260 token.offset+token.length+1, token.lineno, token.linepos);
3261 else if (DEBUG)
3262 { /* this branch is optimised away if !DEBUG */
3263 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3264 token_name.len + 17, isfun, token.line,
3265 token.offset+token.length+1, token.lineno, token.linepos);
3266 error ("INVALID TOKEN", NULL);
3269 token.valid = FALSE;
3274 * C_entries ()
3275 * This routine finds functions, variables, typedefs,
3276 * #define's, enum constants and struct/union/enum definitions in
3277 * C syntax and adds them to the list.
3279 static void
3280 C_entries (c_ext, inf)
3281 int c_ext; /* extension of C */
3282 FILE *inf; /* input file */
3284 register char c; /* latest char read; '\0' for end of line */
3285 register char *lp; /* pointer one beyond the character `c' */
3286 int curndx, newndx; /* indices for current and new lb */
3287 register int tokoff; /* offset in line of start of current token */
3288 register int toklen; /* length of current token */
3289 char *qualifier; /* string used to qualify names */
3290 int qlen; /* length of qualifier */
3291 int bracelev; /* current brace level */
3292 int bracketlev; /* current bracket level */
3293 int parlev; /* current parenthesis level */
3294 int attrparlev; /* __attribute__ parenthesis level */
3295 int templatelev; /* current template level */
3296 int typdefbracelev; /* bracelev where a typedef struct body begun */
3297 bool incomm, inquote, inchar, quotednl, midtoken;
3298 bool yacc_rules; /* in the rules part of a yacc file */
3299 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3302 linebuffer_init (&lbs[0].lb);
3303 linebuffer_init (&lbs[1].lb);
3304 if (cstack.size == 0)
3306 cstack.size = (DEBUG) ? 1 : 4;
3307 cstack.nl = 0;
3308 cstack.cname = xnew (cstack.size, char *);
3309 cstack.bracelev = xnew (cstack.size, int);
3312 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3313 curndx = newndx = 0;
3314 lp = curlb.buffer;
3315 *lp = 0;
3317 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3318 structdef = snone; definedef = dnone; objdef = onone;
3319 yacc_rules = FALSE;
3320 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3321 token.valid = savetoken.valid = FALSE;
3322 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3323 if (cjava)
3324 { qualifier = "."; qlen = 1; }
3325 else
3326 { qualifier = "::"; qlen = 2; }
3329 while (!feof (inf))
3331 c = *lp++;
3332 if (c == '\\')
3334 /* If we are at the end of the line, the next character is a
3335 '\0'; do not skip it, because it is what tells us
3336 to read the next line. */
3337 if (*lp == '\0')
3339 quotednl = TRUE;
3340 continue;
3342 lp++;
3343 c = ' ';
3345 else if (incomm)
3347 switch (c)
3349 case '*':
3350 if (*lp == '/')
3352 c = *lp++;
3353 incomm = FALSE;
3355 break;
3356 case '\0':
3357 /* Newlines inside comments do not end macro definitions in
3358 traditional cpp. */
3359 CNL_SAVE_DEFINEDEF ();
3360 break;
3362 continue;
3364 else if (inquote)
3366 switch (c)
3368 case '"':
3369 inquote = FALSE;
3370 break;
3371 case '\0':
3372 /* Newlines inside strings do not end macro definitions
3373 in traditional cpp, even though compilers don't
3374 usually accept them. */
3375 CNL_SAVE_DEFINEDEF ();
3376 break;
3378 continue;
3380 else if (inchar)
3382 switch (c)
3384 case '\0':
3385 /* Hmmm, something went wrong. */
3386 CNL ();
3387 /* FALLTHRU */
3388 case '\'':
3389 inchar = FALSE;
3390 break;
3392 continue;
3394 else if (bracketlev > 0)
3396 switch (c)
3398 case ']':
3399 if (--bracketlev > 0)
3400 continue;
3401 break;
3402 case '\0':
3403 CNL_SAVE_DEFINEDEF ();
3404 break;
3406 continue;
3408 else switch (c)
3410 case '"':
3411 inquote = TRUE;
3412 if (inattribute)
3413 break;
3414 switch (fvdef)
3416 case fdefunkey:
3417 case fstartlist:
3418 case finlist:
3419 case fignore:
3420 case vignore:
3421 break;
3422 default:
3423 fvextern = FALSE;
3424 fvdef = fvnone;
3426 continue;
3427 case '\'':
3428 inchar = TRUE;
3429 if (inattribute)
3430 break;
3431 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3433 fvextern = FALSE;
3434 fvdef = fvnone;
3436 continue;
3437 case '/':
3438 if (*lp == '*')
3440 incomm = TRUE;
3441 lp++;
3442 c = ' ';
3444 else if (/* cplpl && */ *lp == '/')
3446 c = '\0';
3448 break;
3449 case '%':
3450 if ((c_ext & YACC) && *lp == '%')
3452 /* Entering or exiting rules section in yacc file. */
3453 lp++;
3454 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3455 typdef = tnone; structdef = snone;
3456 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3457 bracelev = 0;
3458 yacc_rules = !yacc_rules;
3459 continue;
3461 else
3462 break;
3463 case '#':
3464 if (definedef == dnone)
3466 char *cp;
3467 bool cpptoken = TRUE;
3469 /* Look back on this line. If all blanks, or nonblanks
3470 followed by an end of comment, this is a preprocessor
3471 token. */
3472 for (cp = newlb.buffer; cp < lp-1; cp++)
3473 if (!iswhite (*cp))
3475 if (*cp == '*' && *(cp+1) == '/')
3477 cp++;
3478 cpptoken = TRUE;
3480 else
3481 cpptoken = FALSE;
3483 if (cpptoken)
3484 definedef = dsharpseen;
3485 } /* if (definedef == dnone) */
3486 continue;
3487 case '[':
3488 bracketlev++;
3489 continue;
3490 } /* switch (c) */
3493 /* Consider token only if some involved conditions are satisfied. */
3494 if (typdef != tignore
3495 && definedef != dignorerest
3496 && fvdef != finlist
3497 && templatelev == 0
3498 && (definedef != dnone
3499 || structdef != scolonseen)
3500 && !inattribute)
3502 if (midtoken)
3504 if (endtoken (c))
3506 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3507 /* This handles :: in the middle,
3508 but not at the beginning of an identifier.
3509 Also, space-separated :: is not recognised. */
3511 if (c_ext & C_AUTO) /* automatic detection of C++ */
3512 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3513 lp += 2;
3514 toklen += 2;
3515 c = lp[-1];
3516 goto still_in_token;
3518 else
3520 bool funorvar = FALSE;
3522 if (yacc_rules
3523 || consider_token (newlb.buffer + tokoff, toklen, c,
3524 &c_ext, bracelev, parlev,
3525 &funorvar))
3527 if (fvdef == foperator)
3529 char *oldlp = lp;
3530 lp = skip_spaces (lp-1);
3531 if (*lp != '\0')
3532 lp += 1;
3533 while (*lp != '\0'
3534 && !iswhite (*lp) && *lp != '(')
3535 lp += 1;
3536 c = *lp++;
3537 toklen += lp - oldlp;
3539 token.named = FALSE;
3540 if (!plainc
3541 && nestlev > 0 && definedef == dnone)
3542 /* in struct body */
3544 write_classname (&token_name, qualifier);
3545 linebuffer_setlen (&token_name,
3546 token_name.len+qlen+toklen);
3547 strcat (token_name.buffer, qualifier);
3548 strncat (token_name.buffer,
3549 newlb.buffer + tokoff, toklen);
3550 token.named = TRUE;
3552 else if (objdef == ocatseen)
3553 /* Objective C category */
3555 int len = strlen (objtag) + 2 + toklen;
3556 linebuffer_setlen (&token_name, len);
3557 strcpy (token_name.buffer, objtag);
3558 strcat (token_name.buffer, "(");
3559 strncat (token_name.buffer,
3560 newlb.buffer + tokoff, toklen);
3561 strcat (token_name.buffer, ")");
3562 token.named = TRUE;
3564 else if (objdef == omethodtag
3565 || objdef == omethodparm)
3566 /* Objective C method */
3568 token.named = TRUE;
3570 else if (fvdef == fdefunname)
3571 /* GNU DEFUN and similar macros */
3573 bool defun = (newlb.buffer[tokoff] == 'F');
3574 int off = tokoff;
3575 int len = toklen;
3577 /* Rewrite the tag so that emacs lisp DEFUNs
3578 can be found by their elisp name */
3579 if (defun)
3581 off += 1;
3582 len -= 1;
3584 linebuffer_setlen (&token_name, len);
3585 strncpy (token_name.buffer,
3586 newlb.buffer + off, len);
3587 token_name.buffer[len] = '\0';
3588 if (defun)
3589 while (--len >= 0)
3590 if (token_name.buffer[len] == '_')
3591 token_name.buffer[len] = '-';
3592 token.named = defun;
3594 else
3596 linebuffer_setlen (&token_name, toklen);
3597 strncpy (token_name.buffer,
3598 newlb.buffer + tokoff, toklen);
3599 token_name.buffer[toklen] = '\0';
3600 /* Name macros and members. */
3601 token.named = (structdef == stagseen
3602 || typdef == ttypeseen
3603 || typdef == tend
3604 || (funorvar
3605 && definedef == dignorerest)
3606 || (funorvar
3607 && definedef == dnone
3608 && structdef == snone
3609 && bracelev > 0));
3611 token.lineno = lineno;
3612 token.offset = tokoff;
3613 token.length = toklen;
3614 token.line = newlb.buffer;
3615 token.linepos = newlinepos;
3616 token.valid = TRUE;
3618 if (definedef == dnone
3619 && (fvdef == fvnameseen
3620 || fvdef == foperator
3621 || structdef == stagseen
3622 || typdef == tend
3623 || typdef == ttypeseen
3624 || objdef != onone))
3626 if (current_lb_is_new)
3627 switch_line_buffers ();
3629 else if (definedef != dnone
3630 || fvdef == fdefunname
3631 || instruct)
3632 make_C_tag (funorvar);
3634 else /* not yacc and consider_token failed */
3636 if (inattribute && fvdef == fignore)
3638 /* We have just met __attribute__ after a
3639 function parameter list: do not tag the
3640 function again. */
3641 fvdef = fvnone;
3644 midtoken = FALSE;
3646 } /* if (endtoken (c)) */
3647 else if (intoken (c))
3648 still_in_token:
3650 toklen++;
3651 continue;
3653 } /* if (midtoken) */
3654 else if (begtoken (c))
3656 switch (definedef)
3658 case dnone:
3659 switch (fvdef)
3661 case fstartlist:
3662 /* This prevents tagging fb in
3663 void (__attribute__((noreturn)) *fb) (void);
3664 Fixing this is not easy and not very important. */
3665 fvdef = finlist;
3666 continue;
3667 case flistseen:
3668 if (plainc || declarations)
3670 make_C_tag (TRUE); /* a function */
3671 fvdef = fignore;
3673 break;
3675 if (structdef == stagseen && !cjava)
3677 popclass_above (bracelev);
3678 structdef = snone;
3680 break;
3681 case dsharpseen:
3682 savetoken = token;
3683 break;
3685 if (!yacc_rules || lp == newlb.buffer + 1)
3687 tokoff = lp - 1 - newlb.buffer;
3688 toklen = 1;
3689 midtoken = TRUE;
3691 continue;
3692 } /* if (begtoken) */
3693 } /* if must look at token */
3696 /* Detect end of line, colon, comma, semicolon and various braces
3697 after having handled a token.*/
3698 switch (c)
3700 case ':':
3701 if (inattribute)
3702 break;
3703 if (yacc_rules && token.offset == 0 && token.valid)
3705 make_C_tag (FALSE); /* a yacc function */
3706 break;
3708 if (definedef != dnone)
3709 break;
3710 switch (objdef)
3712 case otagseen:
3713 objdef = oignore;
3714 make_C_tag (TRUE); /* an Objective C class */
3715 break;
3716 case omethodtag:
3717 case omethodparm:
3718 objdef = omethodcolon;
3719 linebuffer_setlen (&token_name, token_name.len + 1);
3720 strcat (token_name.buffer, ":");
3721 break;
3723 if (structdef == stagseen)
3725 structdef = scolonseen;
3726 break;
3728 /* Should be useless, but may be work as a safety net. */
3729 if (cplpl && fvdef == flistseen)
3731 make_C_tag (TRUE); /* a function */
3732 fvdef = fignore;
3733 break;
3735 break;
3736 case ';':
3737 if (definedef != dnone || inattribute)
3738 break;
3739 switch (typdef)
3741 case tend:
3742 case ttypeseen:
3743 make_C_tag (FALSE); /* a typedef */
3744 typdef = tnone;
3745 fvdef = fvnone;
3746 break;
3747 case tnone:
3748 case tinbody:
3749 case tignore:
3750 switch (fvdef)
3752 case fignore:
3753 if (typdef == tignore || cplpl)
3754 fvdef = fvnone;
3755 break;
3756 case fvnameseen:
3757 if ((globals && bracelev == 0 && (!fvextern || declarations))
3758 || (members && instruct))
3759 make_C_tag (FALSE); /* a variable */
3760 fvextern = FALSE;
3761 fvdef = fvnone;
3762 token.valid = FALSE;
3763 break;
3764 case flistseen:
3765 if ((declarations
3766 && (cplpl || !instruct)
3767 && (typdef == tnone || (typdef != tignore && instruct)))
3768 || (members
3769 && plainc && instruct))
3770 make_C_tag (TRUE); /* a function */
3771 /* FALLTHRU */
3772 default:
3773 fvextern = FALSE;
3774 fvdef = fvnone;
3775 if (declarations
3776 && cplpl && structdef == stagseen)
3777 make_C_tag (FALSE); /* forward declaration */
3778 else
3779 token.valid = FALSE;
3780 } /* switch (fvdef) */
3781 /* FALLTHRU */
3782 default:
3783 if (!instruct)
3784 typdef = tnone;
3786 if (structdef == stagseen)
3787 structdef = snone;
3788 break;
3789 case ',':
3790 if (definedef != dnone || inattribute)
3791 break;
3792 switch (objdef)
3794 case omethodtag:
3795 case omethodparm:
3796 make_C_tag (TRUE); /* an Objective C method */
3797 objdef = oinbody;
3798 break;
3800 switch (fvdef)
3802 case fdefunkey:
3803 case foperator:
3804 case fstartlist:
3805 case finlist:
3806 case fignore:
3807 case vignore:
3808 break;
3809 case fdefunname:
3810 fvdef = fignore;
3811 break;
3812 case fvnameseen:
3813 if (parlev == 0
3814 && ((globals
3815 && bracelev == 0
3816 && templatelev == 0
3817 && (!fvextern || declarations))
3818 || (members && instruct)))
3819 make_C_tag (FALSE); /* a variable */
3820 break;
3821 case flistseen:
3822 if ((declarations && typdef == tnone && !instruct)
3823 || (members && typdef != tignore && instruct))
3825 make_C_tag (TRUE); /* a function */
3826 fvdef = fvnameseen;
3828 else if (!declarations)
3829 fvdef = fvnone;
3830 token.valid = FALSE;
3831 break;
3832 default:
3833 fvdef = fvnone;
3835 if (structdef == stagseen)
3836 structdef = snone;
3837 break;
3838 case ']':
3839 if (definedef != dnone || inattribute)
3840 break;
3841 if (structdef == stagseen)
3842 structdef = snone;
3843 switch (typdef)
3845 case ttypeseen:
3846 case tend:
3847 typdef = tignore;
3848 make_C_tag (FALSE); /* a typedef */
3849 break;
3850 case tnone:
3851 case tinbody:
3852 switch (fvdef)
3854 case foperator:
3855 case finlist:
3856 case fignore:
3857 case vignore:
3858 break;
3859 case fvnameseen:
3860 if ((members && bracelev == 1)
3861 || (globals && bracelev == 0
3862 && (!fvextern || declarations)))
3863 make_C_tag (FALSE); /* a variable */
3864 /* FALLTHRU */
3865 default:
3866 fvdef = fvnone;
3868 break;
3870 break;
3871 case '(':
3872 if (inattribute)
3874 attrparlev++;
3875 break;
3877 if (definedef != dnone)
3878 break;
3879 if (objdef == otagseen && parlev == 0)
3880 objdef = oparenseen;
3881 switch (fvdef)
3883 case fvnameseen:
3884 if (typdef == ttypeseen
3885 && *lp != '*'
3886 && !instruct)
3888 /* This handles constructs like:
3889 typedef void OperatorFun (int fun); */
3890 make_C_tag (FALSE);
3891 typdef = tignore;
3892 fvdef = fignore;
3893 break;
3895 /* FALLTHRU */
3896 case foperator:
3897 fvdef = fstartlist;
3898 break;
3899 case flistseen:
3900 fvdef = finlist;
3901 break;
3903 parlev++;
3904 break;
3905 case ')':
3906 if (inattribute)
3908 if (--attrparlev == 0)
3909 inattribute = FALSE;
3910 break;
3912 if (definedef != dnone)
3913 break;
3914 if (objdef == ocatseen && parlev == 1)
3916 make_C_tag (TRUE); /* an Objective C category */
3917 objdef = oignore;
3919 if (--parlev == 0)
3921 switch (fvdef)
3923 case fstartlist:
3924 case finlist:
3925 fvdef = flistseen;
3926 break;
3928 if (!instruct
3929 && (typdef == tend
3930 || typdef == ttypeseen))
3932 typdef = tignore;
3933 make_C_tag (FALSE); /* a typedef */
3936 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3937 parlev = 0;
3938 break;
3939 case '{':
3940 if (definedef != dnone)
3941 break;
3942 if (typdef == ttypeseen)
3944 /* Whenever typdef is set to tinbody (currently only
3945 here), typdefbracelev should be set to bracelev. */
3946 typdef = tinbody;
3947 typdefbracelev = bracelev;
3949 switch (fvdef)
3951 case flistseen:
3952 make_C_tag (TRUE); /* a function */
3953 /* FALLTHRU */
3954 case fignore:
3955 fvdef = fvnone;
3956 break;
3957 case fvnone:
3958 switch (objdef)
3960 case otagseen:
3961 make_C_tag (TRUE); /* an Objective C class */
3962 objdef = oignore;
3963 break;
3964 case omethodtag:
3965 case omethodparm:
3966 make_C_tag (TRUE); /* an Objective C method */
3967 objdef = oinbody;
3968 break;
3969 default:
3970 /* Neutralize `extern "C" {' grot. */
3971 if (bracelev == 0 && structdef == snone && nestlev == 0
3972 && typdef == tnone)
3973 bracelev = -1;
3975 break;
3977 switch (structdef)
3979 case skeyseen: /* unnamed struct */
3980 pushclass_above (bracelev, NULL, 0);
3981 structdef = snone;
3982 break;
3983 case stagseen: /* named struct or enum */
3984 case scolonseen: /* a class */
3985 pushclass_above (bracelev,token.line+token.offset, token.length);
3986 structdef = snone;
3987 make_C_tag (FALSE); /* a struct or enum */
3988 break;
3990 bracelev += 1;
3991 break;
3992 case '*':
3993 if (definedef != dnone)
3994 break;
3995 if (fvdef == fstartlist)
3997 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3998 token.valid = FALSE;
4000 break;
4001 case '}':
4002 if (definedef != dnone)
4003 break;
4004 bracelev -= 1;
4005 if (!ignoreindent && lp == newlb.buffer + 1)
4007 if (bracelev != 0)
4008 token.valid = FALSE; /* unexpected value, token unreliable */
4009 bracelev = 0; /* reset brace level if first column */
4010 parlev = 0; /* also reset paren level, just in case... */
4012 else if (bracelev < 0)
4014 token.valid = FALSE; /* something gone amiss, token unreliable */
4015 bracelev = 0;
4017 if (bracelev == 0 && fvdef == vignore)
4018 fvdef = fvnone; /* end of function */
4019 popclass_above (bracelev);
4020 structdef = snone;
4021 /* Only if typdef == tinbody is typdefbracelev significant. */
4022 if (typdef == tinbody && bracelev <= typdefbracelev)
4024 assert (bracelev == typdefbracelev);
4025 typdef = tend;
4027 break;
4028 case '=':
4029 if (definedef != dnone)
4030 break;
4031 switch (fvdef)
4033 case foperator:
4034 case finlist:
4035 case fignore:
4036 case vignore:
4037 break;
4038 case fvnameseen:
4039 if ((members && bracelev == 1)
4040 || (globals && bracelev == 0 && (!fvextern || declarations)))
4041 make_C_tag (FALSE); /* a variable */
4042 /* FALLTHRU */
4043 default:
4044 fvdef = vignore;
4046 break;
4047 case '<':
4048 if (cplpl
4049 && (structdef == stagseen || fvdef == fvnameseen))
4051 templatelev++;
4052 break;
4054 goto resetfvdef;
4055 case '>':
4056 if (templatelev > 0)
4058 templatelev--;
4059 break;
4061 goto resetfvdef;
4062 case '+':
4063 case '-':
4064 if (objdef == oinbody && bracelev == 0)
4066 objdef = omethodsign;
4067 break;
4069 /* FALLTHRU */
4070 resetfvdef:
4071 case '#': case '~': case '&': case '%': case '/':
4072 case '|': case '^': case '!': case '.': case '?':
4073 if (definedef != dnone)
4074 break;
4075 /* These surely cannot follow a function tag in C. */
4076 switch (fvdef)
4078 case foperator:
4079 case finlist:
4080 case fignore:
4081 case vignore:
4082 break;
4083 default:
4084 fvdef = fvnone;
4086 break;
4087 case '\0':
4088 if (objdef == otagseen)
4090 make_C_tag (TRUE); /* an Objective C class */
4091 objdef = oignore;
4093 /* If a macro spans multiple lines don't reset its state. */
4094 if (quotednl)
4095 CNL_SAVE_DEFINEDEF ();
4096 else
4097 CNL ();
4098 break;
4099 } /* switch (c) */
4101 } /* while not eof */
4103 free (lbs[0].lb.buffer);
4104 free (lbs[1].lb.buffer);
4108 * Process either a C++ file or a C file depending on the setting
4109 * of a global flag.
4111 static void
4112 default_C_entries (inf)
4113 FILE *inf;
4115 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4118 /* Always do plain C. */
4119 static void
4120 plain_C_entries (inf)
4121 FILE *inf;
4123 C_entries (0, inf);
4126 /* Always do C++. */
4127 static void
4128 Cplusplus_entries (inf)
4129 FILE *inf;
4131 C_entries (C_PLPL, inf);
4134 /* Always do Java. */
4135 static void
4136 Cjava_entries (inf)
4137 FILE *inf;
4139 C_entries (C_JAVA, inf);
4142 /* Always do C*. */
4143 static void
4144 Cstar_entries (inf)
4145 FILE *inf;
4147 C_entries (C_STAR, inf);
4150 /* Always do Yacc. */
4151 static void
4152 Yacc_entries (inf)
4153 FILE *inf;
4155 C_entries (YACC, inf);
4159 /* Useful macros. */
4160 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4161 for (; /* loop initialization */ \
4162 !feof (file_pointer) /* loop test */ \
4163 && /* instructions at start of loop */ \
4164 (readline (&line_buffer, file_pointer), \
4165 char_pointer = line_buffer.buffer, \
4166 TRUE); \
4169 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4170 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4171 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4172 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4173 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4175 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4176 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4177 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4178 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4179 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4182 * Read a file, but do no processing. This is used to do regexp
4183 * matching on files that have no language defined.
4185 static void
4186 just_read_file (inf)
4187 FILE *inf;
4189 register char *dummy;
4191 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4192 continue;
4196 /* Fortran parsing */
4198 static void F_takeprec __P((void));
4199 static void F_getit __P((FILE *));
4201 static void
4202 F_takeprec ()
4204 dbp = skip_spaces (dbp);
4205 if (*dbp != '*')
4206 return;
4207 dbp++;
4208 dbp = skip_spaces (dbp);
4209 if (strneq (dbp, "(*)", 3))
4211 dbp += 3;
4212 return;
4214 if (!ISDIGIT (*dbp))
4216 --dbp; /* force failure */
4217 return;
4220 dbp++;
4221 while (ISDIGIT (*dbp));
4224 static void
4225 F_getit (inf)
4226 FILE *inf;
4228 register char *cp;
4230 dbp = skip_spaces (dbp);
4231 if (*dbp == '\0')
4233 readline (&lb, inf);
4234 dbp = lb.buffer;
4235 if (dbp[5] != '&')
4236 return;
4237 dbp += 6;
4238 dbp = skip_spaces (dbp);
4240 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4241 return;
4242 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4243 continue;
4244 make_tag (dbp, cp-dbp, TRUE,
4245 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4249 static void
4250 Fortran_functions (inf)
4251 FILE *inf;
4253 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4255 if (*dbp == '%')
4256 dbp++; /* Ratfor escape to fortran */
4257 dbp = skip_spaces (dbp);
4258 if (*dbp == '\0')
4259 continue;
4260 switch (lowcase (*dbp))
4262 case 'i':
4263 if (nocase_tail ("integer"))
4264 F_takeprec ();
4265 break;
4266 case 'r':
4267 if (nocase_tail ("real"))
4268 F_takeprec ();
4269 break;
4270 case 'l':
4271 if (nocase_tail ("logical"))
4272 F_takeprec ();
4273 break;
4274 case 'c':
4275 if (nocase_tail ("complex") || nocase_tail ("character"))
4276 F_takeprec ();
4277 break;
4278 case 'd':
4279 if (nocase_tail ("double"))
4281 dbp = skip_spaces (dbp);
4282 if (*dbp == '\0')
4283 continue;
4284 if (nocase_tail ("precision"))
4285 break;
4286 continue;
4288 break;
4290 dbp = skip_spaces (dbp);
4291 if (*dbp == '\0')
4292 continue;
4293 switch (lowcase (*dbp))
4295 case 'f':
4296 if (nocase_tail ("function"))
4297 F_getit (inf);
4298 continue;
4299 case 's':
4300 if (nocase_tail ("subroutine"))
4301 F_getit (inf);
4302 continue;
4303 case 'e':
4304 if (nocase_tail ("entry"))
4305 F_getit (inf);
4306 continue;
4307 case 'b':
4308 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4310 dbp = skip_spaces (dbp);
4311 if (*dbp == '\0') /* assume un-named */
4312 make_tag ("blockdata", 9, TRUE,
4313 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4314 else
4315 F_getit (inf); /* look for name */
4317 continue;
4324 * Ada parsing
4325 * Original code by
4326 * Philippe Waroquiers (1998)
4329 static void Ada_getit __P((FILE *, char *));
4331 /* Once we are positioned after an "interesting" keyword, let's get
4332 the real tag value necessary. */
4333 static void
4334 Ada_getit (inf, name_qualifier)
4335 FILE *inf;
4336 char *name_qualifier;
4338 register char *cp;
4339 char *name;
4340 char c;
4342 while (!feof (inf))
4344 dbp = skip_spaces (dbp);
4345 if (*dbp == '\0'
4346 || (dbp[0] == '-' && dbp[1] == '-'))
4348 readline (&lb, inf);
4349 dbp = lb.buffer;
4351 switch (lowcase(*dbp))
4353 case 'b':
4354 if (nocase_tail ("body"))
4356 /* Skipping body of procedure body or package body or ....
4357 resetting qualifier to body instead of spec. */
4358 name_qualifier = "/b";
4359 continue;
4361 break;
4362 case 't':
4363 /* Skipping type of task type or protected type ... */
4364 if (nocase_tail ("type"))
4365 continue;
4366 break;
4368 if (*dbp == '"')
4370 dbp += 1;
4371 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4372 continue;
4374 else
4376 dbp = skip_spaces (dbp);
4377 for (cp = dbp;
4378 (*cp != '\0'
4379 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4380 cp++)
4381 continue;
4382 if (cp == dbp)
4383 return;
4385 c = *cp;
4386 *cp = '\0';
4387 name = concat (dbp, name_qualifier, "");
4388 *cp = c;
4389 make_tag (name, strlen (name), TRUE,
4390 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4391 free (name);
4392 if (c == '"')
4393 dbp = cp + 1;
4394 return;
4398 static void
4399 Ada_funcs (inf)
4400 FILE *inf;
4402 bool inquote = FALSE;
4403 bool skip_till_semicolumn = FALSE;
4405 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4407 while (*dbp != '\0')
4409 /* Skip a string i.e. "abcd". */
4410 if (inquote || (*dbp == '"'))
4412 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4413 if (dbp != NULL)
4415 inquote = FALSE;
4416 dbp += 1;
4417 continue; /* advance char */
4419 else
4421 inquote = TRUE;
4422 break; /* advance line */
4426 /* Skip comments. */
4427 if (dbp[0] == '-' && dbp[1] == '-')
4428 break; /* advance line */
4430 /* Skip character enclosed in single quote i.e. 'a'
4431 and skip single quote starting an attribute i.e. 'Image. */
4432 if (*dbp == '\'')
4434 dbp++ ;
4435 if (*dbp != '\0')
4436 dbp++;
4437 continue;
4440 if (skip_till_semicolumn)
4442 if (*dbp == ';')
4443 skip_till_semicolumn = FALSE;
4444 dbp++;
4445 continue; /* advance char */
4448 /* Search for beginning of a token. */
4449 if (!begtoken (*dbp))
4451 dbp++;
4452 continue; /* advance char */
4455 /* We are at the beginning of a token. */
4456 switch (lowcase(*dbp))
4458 case 'f':
4459 if (!packages_only && nocase_tail ("function"))
4460 Ada_getit (inf, "/f");
4461 else
4462 break; /* from switch */
4463 continue; /* advance char */
4464 case 'p':
4465 if (!packages_only && nocase_tail ("procedure"))
4466 Ada_getit (inf, "/p");
4467 else if (nocase_tail ("package"))
4468 Ada_getit (inf, "/s");
4469 else if (nocase_tail ("protected")) /* protected type */
4470 Ada_getit (inf, "/t");
4471 else
4472 break; /* from switch */
4473 continue; /* advance char */
4475 case 'u':
4476 if (typedefs && !packages_only && nocase_tail ("use"))
4478 /* when tagging types, avoid tagging use type Pack.Typename;
4479 for this, we will skip everything till a ; */
4480 skip_till_semicolumn = TRUE;
4481 continue; /* advance char */
4484 case 't':
4485 if (!packages_only && nocase_tail ("task"))
4486 Ada_getit (inf, "/k");
4487 else if (typedefs && !packages_only && nocase_tail ("type"))
4489 Ada_getit (inf, "/t");
4490 while (*dbp != '\0')
4491 dbp += 1;
4493 else
4494 break; /* from switch */
4495 continue; /* advance char */
4498 /* Look for the end of the token. */
4499 while (!endtoken (*dbp))
4500 dbp++;
4502 } /* advance char */
4503 } /* advance line */
4508 * Unix and microcontroller assembly tag handling
4509 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4510 * Idea by Bob Weiner, Motorola Inc. (1994)
4512 static void
4513 Asm_labels (inf)
4514 FILE *inf;
4516 register char *cp;
4518 LOOP_ON_INPUT_LINES (inf, lb, cp)
4520 /* If first char is alphabetic or one of [_.$], test for colon
4521 following identifier. */
4522 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4524 /* Read past label. */
4525 cp++;
4526 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4527 cp++;
4528 if (*cp == ':' || iswhite (*cp))
4529 /* Found end of label, so copy it and add it to the table. */
4530 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4531 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4538 * Perl support
4539 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4540 * Perl variable names: /^(my|local).../
4541 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4542 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4543 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4545 static void
4546 Perl_functions (inf)
4547 FILE *inf;
4549 char *package = savestr ("main"); /* current package name */
4550 register char *cp;
4552 LOOP_ON_INPUT_LINES (inf, lb, cp)
4554 skip_spaces(cp);
4556 if (LOOKING_AT (cp, "package"))
4558 free (package);
4559 get_tag (cp, &package);
4561 else if (LOOKING_AT (cp, "sub"))
4563 char *pos;
4564 char *sp = cp;
4566 while (!notinname (*cp))
4567 cp++;
4568 if (cp == sp)
4569 continue; /* nothing found */
4570 if ((pos = etags_strchr (sp, ':')) != NULL
4571 && pos < cp && pos[1] == ':')
4572 /* The name is already qualified. */
4573 make_tag (sp, cp - sp, TRUE,
4574 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4575 else
4576 /* Qualify it. */
4578 char savechar, *name;
4580 savechar = *cp;
4581 *cp = '\0';
4582 name = concat (package, "::", sp);
4583 *cp = savechar;
4584 make_tag (name, strlen(name), TRUE,
4585 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4586 free (name);
4589 else if (globals) /* only if we are tagging global vars */
4591 /* Skip a qualifier, if any. */
4592 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4593 /* After "my" or "local", but before any following paren or space. */
4594 char *varstart = cp;
4596 if (qual /* should this be removed? If yes, how? */
4597 && (*cp == '$' || *cp == '@' || *cp == '%'))
4599 varstart += 1;
4601 cp++;
4602 while (ISALNUM (*cp) || *cp == '_');
4604 else if (qual)
4606 /* Should be examining a variable list at this point;
4607 could insist on seeing an open parenthesis. */
4608 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4609 cp++;
4611 else
4612 continue;
4614 make_tag (varstart, cp - varstart, FALSE,
4615 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4618 free (package);
4623 * Python support
4624 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4625 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4626 * More ideas by seb bacon <seb@jamkit.com> (2002)
4628 static void
4629 Python_functions (inf)
4630 FILE *inf;
4632 register char *cp;
4634 LOOP_ON_INPUT_LINES (inf, lb, cp)
4636 cp = skip_spaces (cp);
4637 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4639 char *name = cp;
4640 while (!notinname (*cp) && *cp != ':')
4641 cp++;
4642 make_tag (name, cp - name, TRUE,
4643 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4650 * PHP support
4651 * Look for:
4652 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4653 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4654 * - /^[ \t]*define\(\"[^\"]+/
4655 * Only with --members:
4656 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4657 * Idea by Diez B. Roggisch (2001)
4659 static void
4660 PHP_functions (inf)
4661 FILE *inf;
4663 register char *cp, *name;
4664 bool search_identifier = FALSE;
4666 LOOP_ON_INPUT_LINES (inf, lb, cp)
4668 cp = skip_spaces (cp);
4669 name = cp;
4670 if (search_identifier
4671 && *cp != '\0')
4673 while (!notinname (*cp))
4674 cp++;
4675 make_tag (name, cp - name, TRUE,
4676 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4677 search_identifier = FALSE;
4679 else if (LOOKING_AT (cp, "function"))
4681 if(*cp == '&')
4682 cp = skip_spaces (cp+1);
4683 if(*cp != '\0')
4685 name = cp;
4686 while (!notinname (*cp))
4687 cp++;
4688 make_tag (name, cp - name, TRUE,
4689 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4691 else
4692 search_identifier = TRUE;
4694 else if (LOOKING_AT (cp, "class"))
4696 if (*cp != '\0')
4698 name = cp;
4699 while (*cp != '\0' && !iswhite (*cp))
4700 cp++;
4701 make_tag (name, cp - name, FALSE,
4702 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4704 else
4705 search_identifier = TRUE;
4707 else if (strneq (cp, "define", 6)
4708 && (cp = skip_spaces (cp+6))
4709 && *cp++ == '('
4710 && (*cp == '"' || *cp == '\''))
4712 char quote = *cp++;
4713 name = cp;
4714 while (*cp != quote && *cp != '\0')
4715 cp++;
4716 make_tag (name, cp - name, FALSE,
4717 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4719 else if (members
4720 && LOOKING_AT (cp, "var")
4721 && *cp == '$')
4723 name = cp;
4724 while (!notinname(*cp))
4725 cp++;
4726 make_tag (name, cp - name, FALSE,
4727 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4734 * Cobol tag functions
4735 * We could look for anything that could be a paragraph name.
4736 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4737 * Idea by Corny de Souza (1993)
4739 static void
4740 Cobol_paragraphs (inf)
4741 FILE *inf;
4743 register char *bp, *ep;
4745 LOOP_ON_INPUT_LINES (inf, lb, bp)
4747 if (lb.len < 9)
4748 continue;
4749 bp += 8;
4751 /* If eoln, compiler option or comment ignore whole line. */
4752 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4753 continue;
4755 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4756 continue;
4757 if (*ep++ == '.')
4758 make_tag (bp, ep - bp, TRUE,
4759 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4765 * Makefile support
4766 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4768 static void
4769 Makefile_targets (inf)
4770 FILE *inf;
4772 register char *bp;
4774 LOOP_ON_INPUT_LINES (inf, lb, bp)
4776 if (*bp == '\t' || *bp == '#')
4777 continue;
4778 while (*bp != '\0' && *bp != '=' && *bp != ':')
4779 bp++;
4780 if (*bp == ':' || (globals && *bp == '='))
4782 /* We should detect if there is more than one tag, but we do not.
4783 We just skip initial and final spaces. */
4784 char * namestart = skip_spaces (lb.buffer);
4785 while (--bp > namestart)
4786 if (!notinname (*bp))
4787 break;
4788 make_tag (namestart, bp - namestart + 1, TRUE,
4789 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4796 * Pascal parsing
4797 * Original code by Mosur K. Mohan (1989)
4799 * Locates tags for procedures & functions. Doesn't do any type- or
4800 * var-definitions. It does look for the keyword "extern" or
4801 * "forward" immediately following the procedure statement; if found,
4802 * the tag is skipped.
4804 static void
4805 Pascal_functions (inf)
4806 FILE *inf;
4808 linebuffer tline; /* mostly copied from C_entries */
4809 long save_lcno;
4810 int save_lineno, namelen, taglen;
4811 char c, *name;
4813 bool /* each of these flags is TRUE if: */
4814 incomment, /* point is inside a comment */
4815 inquote, /* point is inside '..' string */
4816 get_tagname, /* point is after PROCEDURE/FUNCTION
4817 keyword, so next item = potential tag */
4818 found_tag, /* point is after a potential tag */
4819 inparms, /* point is within parameter-list */
4820 verify_tag; /* point has passed the parm-list, so the
4821 next token will determine whether this
4822 is a FORWARD/EXTERN to be ignored, or
4823 whether it is a real tag */
4825 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4826 name = NULL; /* keep compiler quiet */
4827 dbp = lb.buffer;
4828 *dbp = '\0';
4829 linebuffer_init (&tline);
4831 incomment = inquote = FALSE;
4832 found_tag = FALSE; /* have a proc name; check if extern */
4833 get_tagname = FALSE; /* found "procedure" keyword */
4834 inparms = FALSE; /* found '(' after "proc" */
4835 verify_tag = FALSE; /* check if "extern" is ahead */
4838 while (!feof (inf)) /* long main loop to get next char */
4840 c = *dbp++;
4841 if (c == '\0') /* if end of line */
4843 readline (&lb, inf);
4844 dbp = lb.buffer;
4845 if (*dbp == '\0')
4846 continue;
4847 if (!((found_tag && verify_tag)
4848 || get_tagname))
4849 c = *dbp++; /* only if don't need *dbp pointing
4850 to the beginning of the name of
4851 the procedure or function */
4853 if (incomment)
4855 if (c == '}') /* within { } comments */
4856 incomment = FALSE;
4857 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4859 dbp++;
4860 incomment = FALSE;
4862 continue;
4864 else if (inquote)
4866 if (c == '\'')
4867 inquote = FALSE;
4868 continue;
4870 else
4871 switch (c)
4873 case '\'':
4874 inquote = TRUE; /* found first quote */
4875 continue;
4876 case '{': /* found open { comment */
4877 incomment = TRUE;
4878 continue;
4879 case '(':
4880 if (*dbp == '*') /* found open (* comment */
4882 incomment = TRUE;
4883 dbp++;
4885 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4886 inparms = TRUE;
4887 continue;
4888 case ')': /* end of parms list */
4889 if (inparms)
4890 inparms = FALSE;
4891 continue;
4892 case ';':
4893 if (found_tag && !inparms) /* end of proc or fn stmt */
4895 verify_tag = TRUE;
4896 break;
4898 continue;
4900 if (found_tag && verify_tag && (*dbp != ' '))
4902 /* Check if this is an "extern" declaration. */
4903 if (*dbp == '\0')
4904 continue;
4905 if (lowcase (*dbp == 'e'))
4907 if (nocase_tail ("extern")) /* superfluous, really! */
4909 found_tag = FALSE;
4910 verify_tag = FALSE;
4913 else if (lowcase (*dbp) == 'f')
4915 if (nocase_tail ("forward")) /* check for forward reference */
4917 found_tag = FALSE;
4918 verify_tag = FALSE;
4921 if (found_tag && verify_tag) /* not external proc, so make tag */
4923 found_tag = FALSE;
4924 verify_tag = FALSE;
4925 make_tag (name, namelen, TRUE,
4926 tline.buffer, taglen, save_lineno, save_lcno);
4927 continue;
4930 if (get_tagname) /* grab name of proc or fn */
4932 char *cp;
4934 if (*dbp == '\0')
4935 continue;
4937 /* Find block name. */
4938 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4939 continue;
4941 /* Save all values for later tagging. */
4942 linebuffer_setlen (&tline, lb.len);
4943 strcpy (tline.buffer, lb.buffer);
4944 save_lineno = lineno;
4945 save_lcno = linecharno;
4946 name = tline.buffer + (dbp - lb.buffer);
4947 namelen = cp - dbp;
4948 taglen = cp - lb.buffer + 1;
4950 dbp = cp; /* set dbp to e-o-token */
4951 get_tagname = FALSE;
4952 found_tag = TRUE;
4953 continue;
4955 /* And proceed to check for "extern". */
4957 else if (!incomment && !inquote && !found_tag)
4959 /* Check for proc/fn keywords. */
4960 switch (lowcase (c))
4962 case 'p':
4963 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4964 get_tagname = TRUE;
4965 continue;
4966 case 'f':
4967 if (nocase_tail ("unction"))
4968 get_tagname = TRUE;
4969 continue;
4972 } /* while not eof */
4974 free (tline.buffer);
4979 * Lisp tag functions
4980 * look for (def or (DEF, quote or QUOTE
4983 static void L_getit __P((void));
4985 static void
4986 L_getit ()
4988 if (*dbp == '\'') /* Skip prefix quote */
4989 dbp++;
4990 else if (*dbp == '(')
4992 dbp++;
4993 /* Try to skip "(quote " */
4994 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4995 /* Ok, then skip "(" before name in (defstruct (foo)) */
4996 dbp = skip_spaces (dbp);
4998 get_tag (dbp, NULL);
5001 static void
5002 Lisp_functions (inf)
5003 FILE *inf;
5005 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5007 if (dbp[0] != '(')
5008 continue;
5010 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5012 dbp = skip_non_spaces (dbp);
5013 dbp = skip_spaces (dbp);
5014 L_getit ();
5016 else
5018 /* Check for (foo::defmumble name-defined ... */
5020 dbp++;
5021 while (!notinname (*dbp) && *dbp != ':');
5022 if (*dbp == ':')
5025 dbp++;
5026 while (*dbp == ':');
5028 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5030 dbp = skip_non_spaces (dbp);
5031 dbp = skip_spaces (dbp);
5032 L_getit ();
5041 * Lua script language parsing
5042 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5044 * "function" and "local function" are tags if they start at column 1.
5046 static void
5047 Lua_functions (inf)
5048 FILE *inf;
5050 register char *bp;
5052 LOOP_ON_INPUT_LINES (inf, lb, bp)
5054 if (bp[0] != 'f' && bp[0] != 'l')
5055 continue;
5057 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5059 if (LOOKING_AT (bp, "function"))
5060 get_tag (bp, NULL);
5066 * Postscript tags
5067 * Just look for lines where the first character is '/'
5068 * Also look at "defineps" for PSWrap
5069 * Ideas by:
5070 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5071 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5073 static void
5074 PS_functions (inf)
5075 FILE *inf;
5077 register char *bp, *ep;
5079 LOOP_ON_INPUT_LINES (inf, lb, bp)
5081 if (bp[0] == '/')
5083 for (ep = bp+1;
5084 *ep != '\0' && *ep != ' ' && *ep != '{';
5085 ep++)
5086 continue;
5087 make_tag (bp, ep - bp, TRUE,
5088 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5090 else if (LOOKING_AT (bp, "defineps"))
5091 get_tag (bp, NULL);
5097 * Forth tags
5098 * Ignore anything after \ followed by space or in ( )
5099 * Look for words defined by :
5100 * Look for constant, code, create, defer, value, and variable
5101 * OBP extensions: Look for buffer:, field,
5102 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5104 static void
5105 Forth_words (inf)
5106 FILE *inf;
5108 register char *bp;
5110 LOOP_ON_INPUT_LINES (inf, lb, bp)
5111 while ((bp = skip_spaces (bp))[0] != '\0')
5112 if (bp[0] == '\\' && iswhite(bp[1]))
5113 break; /* read next line */
5114 else if (bp[0] == '(' && iswhite(bp[1]))
5115 do /* skip to ) or eol */
5116 bp++;
5117 while (*bp != ')' && *bp != '\0');
5118 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5119 || LOOKING_AT_NOCASE (bp, "constant")
5120 || LOOKING_AT_NOCASE (bp, "code")
5121 || LOOKING_AT_NOCASE (bp, "create")
5122 || LOOKING_AT_NOCASE (bp, "defer")
5123 || LOOKING_AT_NOCASE (bp, "value")
5124 || LOOKING_AT_NOCASE (bp, "variable")
5125 || LOOKING_AT_NOCASE (bp, "buffer:")
5126 || LOOKING_AT_NOCASE (bp, "field"))
5127 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5128 else
5129 bp = skip_non_spaces (bp);
5134 * Scheme tag functions
5135 * look for (def... xyzzy
5136 * (def... (xyzzy
5137 * (def ... ((...(xyzzy ....
5138 * (set! xyzzy
5139 * Original code by Ken Haase (1985?)
5141 static void
5142 Scheme_functions (inf)
5143 FILE *inf;
5145 register char *bp;
5147 LOOP_ON_INPUT_LINES (inf, lb, bp)
5149 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5151 bp = skip_non_spaces (bp+4);
5152 /* Skip over open parens and white space */
5153 while (notinname (*bp))
5154 bp++;
5155 get_tag (bp, NULL);
5157 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5158 get_tag (bp, NULL);
5163 /* Find tags in TeX and LaTeX input files. */
5165 /* TEX_toktab is a table of TeX control sequences that define tags.
5166 * Each entry records one such control sequence.
5168 * Original code from who knows whom.
5169 * Ideas by:
5170 * Stefan Monnier (2002)
5173 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5175 /* Default set of control sequences to put into TEX_toktab.
5176 The value of environment var TEXTAGS is prepended to this. */
5177 static char *TEX_defenv = "\
5178 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5179 :part:appendix:entry:index:def\
5180 :newcommand:renewcommand:newenvironment:renewenvironment";
5182 static void TEX_mode __P((FILE *));
5183 static void TEX_decode_env __P((char *, char *));
5185 static char TEX_esc = '\\';
5186 static char TEX_opgrp = '{';
5187 static char TEX_clgrp = '}';
5190 * TeX/LaTeX scanning loop.
5192 static void
5193 TeX_commands (inf)
5194 FILE *inf;
5196 char *cp;
5197 linebuffer *key;
5199 /* Select either \ or ! as escape character. */
5200 TEX_mode (inf);
5202 /* Initialize token table once from environment. */
5203 if (TEX_toktab == NULL)
5204 TEX_decode_env ("TEXTAGS", TEX_defenv);
5206 LOOP_ON_INPUT_LINES (inf, lb, cp)
5208 /* Look at each TEX keyword in line. */
5209 for (;;)
5211 /* Look for a TEX escape. */
5212 while (*cp++ != TEX_esc)
5213 if (cp[-1] == '\0' || cp[-1] == '%')
5214 goto tex_next_line;
5216 for (key = TEX_toktab; key->buffer != NULL; key++)
5217 if (strneq (cp, key->buffer, key->len))
5219 register char *p;
5220 int namelen, linelen;
5221 bool opgrp = FALSE;
5223 cp = skip_spaces (cp + key->len);
5224 if (*cp == TEX_opgrp)
5226 opgrp = TRUE;
5227 cp++;
5229 for (p = cp;
5230 (!iswhite (*p) && *p != '#' &&
5231 *p != TEX_opgrp && *p != TEX_clgrp);
5232 p++)
5233 continue;
5234 namelen = p - cp;
5235 linelen = lb.len;
5236 if (!opgrp || *p == TEX_clgrp)
5238 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5239 p++;
5240 linelen = p - lb.buffer + 1;
5242 make_tag (cp, namelen, TRUE,
5243 lb.buffer, linelen, lineno, linecharno);
5244 goto tex_next_line; /* We only tag a line once */
5247 tex_next_line:
5252 #define TEX_LESC '\\'
5253 #define TEX_SESC '!'
5255 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5256 chars accordingly. */
5257 static void
5258 TEX_mode (inf)
5259 FILE *inf;
5261 int c;
5263 while ((c = getc (inf)) != EOF)
5265 /* Skip to next line if we hit the TeX comment char. */
5266 if (c == '%')
5267 while (c != '\n' && c != EOF)
5268 c = getc (inf);
5269 else if (c == TEX_LESC || c == TEX_SESC )
5270 break;
5273 if (c == TEX_LESC)
5275 TEX_esc = TEX_LESC;
5276 TEX_opgrp = '{';
5277 TEX_clgrp = '}';
5279 else
5281 TEX_esc = TEX_SESC;
5282 TEX_opgrp = '<';
5283 TEX_clgrp = '>';
5285 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5286 No attempt is made to correct the situation. */
5287 rewind (inf);
5290 /* Read environment and prepend it to the default string.
5291 Build token table. */
5292 static void
5293 TEX_decode_env (evarname, defenv)
5294 char *evarname;
5295 char *defenv;
5297 register char *env, *p;
5298 int i, len;
5300 /* Append default string to environment. */
5301 env = getenv (evarname);
5302 if (!env)
5303 env = defenv;
5304 else
5306 char *oldenv = env;
5307 env = concat (oldenv, defenv, "");
5310 /* Allocate a token table */
5311 for (len = 1, p = env; p;)
5312 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5313 len++;
5314 TEX_toktab = xnew (len, linebuffer);
5316 /* Unpack environment string into token table. Be careful about */
5317 /* zero-length strings (leading ':', "::" and trailing ':') */
5318 for (i = 0; *env != '\0';)
5320 p = etags_strchr (env, ':');
5321 if (!p) /* End of environment string. */
5322 p = env + strlen (env);
5323 if (p - env > 0)
5324 { /* Only non-zero strings. */
5325 TEX_toktab[i].buffer = savenstr (env, p - env);
5326 TEX_toktab[i].len = p - env;
5327 i++;
5329 if (*p)
5330 env = p + 1;
5331 else
5333 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5334 TEX_toktab[i].len = 0;
5335 break;
5341 /* Texinfo support. Dave Love, Mar. 2000. */
5342 static void
5343 Texinfo_nodes (inf)
5344 FILE * inf;
5346 char *cp, *start;
5347 LOOP_ON_INPUT_LINES (inf, lb, cp)
5348 if (LOOKING_AT (cp, "@node"))
5350 start = cp;
5351 while (*cp != '\0' && *cp != ',')
5352 cp++;
5353 make_tag (start, cp - start, TRUE,
5354 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5360 * HTML support.
5361 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5362 * Contents of <a name=xxx> are tags with name xxx.
5364 * Francesco Potortì, 2002.
5366 static void
5367 HTML_labels (inf)
5368 FILE * inf;
5370 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5371 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5372 bool intag = FALSE; /* inside an html tag, looking for ID= */
5373 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5374 char *end;
5377 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5379 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5380 for (;;) /* loop on the same line */
5382 if (skiptag) /* skip HTML tag */
5384 while (*dbp != '\0' && *dbp != '>')
5385 dbp++;
5386 if (*dbp == '>')
5388 dbp += 1;
5389 skiptag = FALSE;
5390 continue; /* look on the same line */
5392 break; /* go to next line */
5395 else if (intag) /* look for "name=" or "id=" */
5397 while (*dbp != '\0' && *dbp != '>'
5398 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5399 dbp++;
5400 if (*dbp == '\0')
5401 break; /* go to next line */
5402 if (*dbp == '>')
5404 dbp += 1;
5405 intag = FALSE;
5406 continue; /* look on the same line */
5408 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5409 || LOOKING_AT_NOCASE (dbp, "id="))
5411 bool quoted = (dbp[0] == '"');
5413 if (quoted)
5414 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5415 continue;
5416 else
5417 for (end = dbp; *end != '\0' && intoken (*end); end++)
5418 continue;
5419 linebuffer_setlen (&token_name, end - dbp);
5420 strncpy (token_name.buffer, dbp, end - dbp);
5421 token_name.buffer[end - dbp] = '\0';
5423 dbp = end;
5424 intag = FALSE; /* we found what we looked for */
5425 skiptag = TRUE; /* skip to the end of the tag */
5426 getnext = TRUE; /* then grab the text */
5427 continue; /* look on the same line */
5429 dbp += 1;
5432 else if (getnext) /* grab next tokens and tag them */
5434 dbp = skip_spaces (dbp);
5435 if (*dbp == '\0')
5436 break; /* go to next line */
5437 if (*dbp == '<')
5439 intag = TRUE;
5440 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5441 continue; /* look on the same line */
5444 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5445 continue;
5446 make_tag (token_name.buffer, token_name.len, TRUE,
5447 dbp, end - dbp, lineno, linecharno);
5448 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5449 getnext = FALSE;
5450 break; /* go to next line */
5453 else /* look for an interesting HTML tag */
5455 while (*dbp != '\0' && *dbp != '<')
5456 dbp++;
5457 if (*dbp == '\0')
5458 break; /* go to next line */
5459 intag = TRUE;
5460 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5462 inanchor = TRUE;
5463 continue; /* look on the same line */
5465 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5466 || LOOKING_AT_NOCASE (dbp, "<h1>")
5467 || LOOKING_AT_NOCASE (dbp, "<h2>")
5468 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5470 intag = FALSE;
5471 getnext = TRUE;
5472 continue; /* look on the same line */
5474 dbp += 1;
5481 * Prolog support
5483 * Assumes that the predicate or rule starts at column 0.
5484 * Only the first clause of a predicate or rule is added.
5485 * Original code by Sunichirou Sugou (1989)
5486 * Rewritten by Anders Lindgren (1996)
5488 static int prolog_pr __P((char *, char *));
5489 static void prolog_skip_comment __P((linebuffer *, FILE *));
5490 static int prolog_atom __P((char *, int));
5492 static void
5493 Prolog_functions (inf)
5494 FILE *inf;
5496 char *cp, *last;
5497 int len;
5498 int allocated;
5500 allocated = 0;
5501 len = 0;
5502 last = NULL;
5504 LOOP_ON_INPUT_LINES (inf, lb, cp)
5506 if (cp[0] == '\0') /* Empty line */
5507 continue;
5508 else if (iswhite (cp[0])) /* Not a predicate */
5509 continue;
5510 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5511 prolog_skip_comment (&lb, inf);
5512 else if ((len = prolog_pr (cp, last)) > 0)
5514 /* Predicate or rule. Store the function name so that we
5515 only generate a tag for the first clause. */
5516 if (last == NULL)
5517 last = xnew(len + 1, char);
5518 else if (len + 1 > allocated)
5519 xrnew (last, len + 1, char);
5520 allocated = len + 1;
5521 strncpy (last, cp, len);
5522 last[len] = '\0';
5525 if (last != NULL)
5526 free (last);
5530 static void
5531 prolog_skip_comment (plb, inf)
5532 linebuffer *plb;
5533 FILE *inf;
5535 char *cp;
5539 for (cp = plb->buffer; *cp != '\0'; cp++)
5540 if (cp[0] == '*' && cp[1] == '/')
5541 return;
5542 readline (plb, inf);
5544 while (!feof(inf));
5548 * A predicate or rule definition is added if it matches:
5549 * <beginning of line><Prolog Atom><whitespace>(
5550 * or <beginning of line><Prolog Atom><whitespace>:-
5552 * It is added to the tags database if it doesn't match the
5553 * name of the previous clause header.
5555 * Return the size of the name of the predicate or rule, or 0 if no
5556 * header was found.
5558 static int
5559 prolog_pr (s, last)
5560 char *s;
5561 char *last; /* Name of last clause. */
5563 int pos;
5564 int len;
5566 pos = prolog_atom (s, 0);
5567 if (pos < 1)
5568 return 0;
5570 len = pos;
5571 pos = skip_spaces (s + pos) - s;
5573 if ((s[pos] == '.'
5574 || (s[pos] == '(' && (pos += 1))
5575 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5576 && (last == NULL /* save only the first clause */
5577 || len != (int)strlen (last)
5578 || !strneq (s, last, len)))
5580 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5581 return len;
5583 else
5584 return 0;
5588 * Consume a Prolog atom.
5589 * Return the number of bytes consumed, or -1 if there was an error.
5591 * A prolog atom, in this context, could be one of:
5592 * - An alphanumeric sequence, starting with a lower case letter.
5593 * - A quoted arbitrary string. Single quotes can escape themselves.
5594 * Backslash quotes everything.
5596 static int
5597 prolog_atom (s, pos)
5598 char *s;
5599 int pos;
5601 int origpos;
5603 origpos = pos;
5605 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5607 /* The atom is unquoted. */
5608 pos++;
5609 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5611 pos++;
5613 return pos - origpos;
5615 else if (s[pos] == '\'')
5617 pos++;
5619 for (;;)
5621 if (s[pos] == '\'')
5623 pos++;
5624 if (s[pos] != '\'')
5625 break;
5626 pos++; /* A double quote */
5628 else if (s[pos] == '\0')
5629 /* Multiline quoted atoms are ignored. */
5630 return -1;
5631 else if (s[pos] == '\\')
5633 if (s[pos+1] == '\0')
5634 return -1;
5635 pos += 2;
5637 else
5638 pos++;
5640 return pos - origpos;
5642 else
5643 return -1;
5648 * Support for Erlang
5650 * Generates tags for functions, defines, and records.
5651 * Assumes that Erlang functions start at column 0.
5652 * Original code by Anders Lindgren (1996)
5654 static int erlang_func __P((char *, char *));
5655 static void erlang_attribute __P((char *));
5656 static int erlang_atom __P((char *));
5658 static void
5659 Erlang_functions (inf)
5660 FILE *inf;
5662 char *cp, *last;
5663 int len;
5664 int allocated;
5666 allocated = 0;
5667 len = 0;
5668 last = NULL;
5670 LOOP_ON_INPUT_LINES (inf, lb, cp)
5672 if (cp[0] == '\0') /* Empty line */
5673 continue;
5674 else if (iswhite (cp[0])) /* Not function nor attribute */
5675 continue;
5676 else if (cp[0] == '%') /* comment */
5677 continue;
5678 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5679 continue;
5680 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5682 erlang_attribute (cp);
5683 if (last != NULL)
5685 free (last);
5686 last = NULL;
5689 else if ((len = erlang_func (cp, last)) > 0)
5692 * Function. Store the function name so that we only
5693 * generates a tag for the first clause.
5695 if (last == NULL)
5696 last = xnew (len + 1, char);
5697 else if (len + 1 > allocated)
5698 xrnew (last, len + 1, char);
5699 allocated = len + 1;
5700 strncpy (last, cp, len);
5701 last[len] = '\0';
5704 if (last != NULL)
5705 free (last);
5710 * A function definition is added if it matches:
5711 * <beginning of line><Erlang Atom><whitespace>(
5713 * It is added to the tags database if it doesn't match the
5714 * name of the previous clause header.
5716 * Return the size of the name of the function, or 0 if no function
5717 * was found.
5719 static int
5720 erlang_func (s, last)
5721 char *s;
5722 char *last; /* Name of last clause. */
5724 int pos;
5725 int len;
5727 pos = erlang_atom (s);
5728 if (pos < 1)
5729 return 0;
5731 len = pos;
5732 pos = skip_spaces (s + pos) - s;
5734 /* Save only the first clause. */
5735 if (s[pos++] == '('
5736 && (last == NULL
5737 || len != (int)strlen (last)
5738 || !strneq (s, last, len)))
5740 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5741 return len;
5744 return 0;
5749 * Handle attributes. Currently, tags are generated for defines
5750 * and records.
5752 * They are on the form:
5753 * -define(foo, bar).
5754 * -define(Foo(M, N), M+N).
5755 * -record(graph, {vtab = notable, cyclic = true}).
5757 static void
5758 erlang_attribute (s)
5759 char *s;
5761 char *cp = s;
5763 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5764 && *cp++ == '(')
5766 int len = erlang_atom (skip_spaces (cp));
5767 if (len > 0)
5768 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5770 return;
5775 * Consume an Erlang atom (or variable).
5776 * Return the number of bytes consumed, or -1 if there was an error.
5778 static int
5779 erlang_atom (s)
5780 char *s;
5782 int pos = 0;
5784 if (ISALPHA (s[pos]) || s[pos] == '_')
5786 /* The atom is unquoted. */
5788 pos++;
5789 while (ISALNUM (s[pos]) || s[pos] == '_');
5791 else if (s[pos] == '\'')
5793 for (pos++; s[pos] != '\''; pos++)
5794 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5795 || (s[pos] == '\\' && s[++pos] == '\0'))
5796 return 0;
5797 pos++;
5800 return pos;
5804 static char *scan_separators __P((char *));
5805 static void add_regex __P((char *, language *));
5806 static char *substitute __P((char *, char *, struct re_registers *));
5809 * Take a string like "/blah/" and turn it into "blah", verifying
5810 * that the first and last characters are the same, and handling
5811 * quoted separator characters. Actually, stops on the occurrence of
5812 * an unquoted separator. Also process \t, \n, etc. and turn into
5813 * appropriate characters. Works in place. Null terminates name string.
5814 * Returns pointer to terminating separator, or NULL for
5815 * unterminated regexps.
5817 static char *
5818 scan_separators (name)
5819 char *name;
5821 char sep = name[0];
5822 char *copyto = name;
5823 bool quoted = FALSE;
5825 for (++name; *name != '\0'; ++name)
5827 if (quoted)
5829 switch (*name)
5831 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5832 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5833 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5834 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5835 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5836 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5837 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5838 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5839 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5840 default:
5841 if (*name == sep)
5842 *copyto++ = sep;
5843 else
5845 /* Something else is quoted, so preserve the quote. */
5846 *copyto++ = '\\';
5847 *copyto++ = *name;
5849 break;
5851 quoted = FALSE;
5853 else if (*name == '\\')
5854 quoted = TRUE;
5855 else if (*name == sep)
5856 break;
5857 else
5858 *copyto++ = *name;
5860 if (*name != sep)
5861 name = NULL; /* signal unterminated regexp */
5863 /* Terminate copied string. */
5864 *copyto = '\0';
5865 return name;
5868 /* Look at the argument of --regex or --no-regex and do the right
5869 thing. Same for each line of a regexp file. */
5870 static void
5871 analyse_regex (regex_arg)
5872 char *regex_arg;
5874 if (regex_arg == NULL)
5876 free_regexps (); /* --no-regex: remove existing regexps */
5877 return;
5880 /* A real --regexp option or a line in a regexp file. */
5881 switch (regex_arg[0])
5883 /* Comments in regexp file or null arg to --regex. */
5884 case '\0':
5885 case ' ':
5886 case '\t':
5887 break;
5889 /* Read a regex file. This is recursive and may result in a
5890 loop, which will stop when the file descriptors are exhausted. */
5891 case '@':
5893 FILE *regexfp;
5894 linebuffer regexbuf;
5895 char *regexfile = regex_arg + 1;
5897 /* regexfile is a file containing regexps, one per line. */
5898 regexfp = fopen (regexfile, "r");
5899 if (regexfp == NULL)
5901 pfatal (regexfile);
5902 return;
5904 linebuffer_init (&regexbuf);
5905 while (readline_internal (&regexbuf, regexfp) > 0)
5906 analyse_regex (regexbuf.buffer);
5907 free (regexbuf.buffer);
5908 fclose (regexfp);
5910 break;
5912 /* Regexp to be used for a specific language only. */
5913 case '{':
5915 language *lang;
5916 char *lang_name = regex_arg + 1;
5917 char *cp;
5919 for (cp = lang_name; *cp != '}'; cp++)
5920 if (*cp == '\0')
5922 error ("unterminated language name in regex: %s", regex_arg);
5923 return;
5925 *cp++ = '\0';
5926 lang = get_language_from_langname (lang_name);
5927 if (lang == NULL)
5928 return;
5929 add_regex (cp, lang);
5931 break;
5933 /* Regexp to be used for any language. */
5934 default:
5935 add_regex (regex_arg, NULL);
5936 break;
5940 /* Separate the regexp pattern, compile it,
5941 and care for optional name and modifiers. */
5942 static void
5943 add_regex (regexp_pattern, lang)
5944 char *regexp_pattern;
5945 language *lang;
5947 static struct re_pattern_buffer zeropattern;
5948 char sep, *pat, *name, *modifiers;
5949 const char *err;
5950 struct re_pattern_buffer *patbuf;
5951 regexp *rp;
5952 bool
5953 force_explicit_name = TRUE, /* do not use implicit tag names */
5954 ignore_case = FALSE, /* case is significant */
5955 multi_line = FALSE, /* matches are done one line at a time */
5956 single_line = FALSE; /* dot does not match newline */
5959 if (strlen(regexp_pattern) < 3)
5961 error ("null regexp", (char *)NULL);
5962 return;
5964 sep = regexp_pattern[0];
5965 name = scan_separators (regexp_pattern);
5966 if (name == NULL)
5968 error ("%s: unterminated regexp", regexp_pattern);
5969 return;
5971 if (name[1] == sep)
5973 error ("null name for regexp \"%s\"", regexp_pattern);
5974 return;
5976 modifiers = scan_separators (name);
5977 if (modifiers == NULL) /* no terminating separator --> no name */
5979 modifiers = name;
5980 name = "";
5982 else
5983 modifiers += 1; /* skip separator */
5985 /* Parse regex modifiers. */
5986 for (; modifiers[0] != '\0'; modifiers++)
5987 switch (modifiers[0])
5989 case 'N':
5990 if (modifiers == name)
5991 error ("forcing explicit tag name but no name, ignoring", NULL);
5992 force_explicit_name = TRUE;
5993 break;
5994 case 'i':
5995 ignore_case = TRUE;
5996 break;
5997 case 's':
5998 single_line = TRUE;
5999 /* FALLTHRU */
6000 case 'm':
6001 multi_line = TRUE;
6002 need_filebuf = TRUE;
6003 break;
6004 default:
6006 char wrongmod [2];
6007 wrongmod[0] = modifiers[0];
6008 wrongmod[1] = '\0';
6009 error ("invalid regexp modifier `%s', ignoring", wrongmod);
6011 break;
6014 patbuf = xnew (1, struct re_pattern_buffer);
6015 *patbuf = zeropattern;
6016 if (ignore_case)
6018 static char lc_trans[CHARS];
6019 int i;
6020 for (i = 0; i < CHARS; i++)
6021 lc_trans[i] = lowcase (i);
6022 patbuf->translate = lc_trans; /* translation table to fold case */
6025 if (multi_line)
6026 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6027 else
6028 pat = regexp_pattern;
6030 if (single_line)
6031 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6032 else
6033 re_set_syntax (RE_SYNTAX_EMACS);
6035 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
6036 if (multi_line)
6037 free (pat);
6038 if (err != NULL)
6040 error ("%s while compiling pattern", err);
6041 return;
6044 rp = p_head;
6045 p_head = xnew (1, regexp);
6046 p_head->pattern = savestr (regexp_pattern);
6047 p_head->p_next = rp;
6048 p_head->lang = lang;
6049 p_head->pat = patbuf;
6050 p_head->name = savestr (name);
6051 p_head->error_signaled = FALSE;
6052 p_head->force_explicit_name = force_explicit_name;
6053 p_head->ignore_case = ignore_case;
6054 p_head->multi_line = multi_line;
6058 * Do the substitutions indicated by the regular expression and
6059 * arguments.
6061 static char *
6062 substitute (in, out, regs)
6063 char *in, *out;
6064 struct re_registers *regs;
6066 char *result, *t;
6067 int size, dig, diglen;
6069 result = NULL;
6070 size = strlen (out);
6072 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6073 if (out[size - 1] == '\\')
6074 fatal ("pattern error in \"%s\"", out);
6075 for (t = etags_strchr (out, '\\');
6076 t != NULL;
6077 t = etags_strchr (t + 2, '\\'))
6078 if (ISDIGIT (t[1]))
6080 dig = t[1] - '0';
6081 diglen = regs->end[dig] - regs->start[dig];
6082 size += diglen - 2;
6084 else
6085 size -= 1;
6087 /* Allocate space and do the substitutions. */
6088 assert (size >= 0);
6089 result = xnew (size + 1, char);
6091 for (t = result; *out != '\0'; out++)
6092 if (*out == '\\' && ISDIGIT (*++out))
6094 dig = *out - '0';
6095 diglen = regs->end[dig] - regs->start[dig];
6096 strncpy (t, in + regs->start[dig], diglen);
6097 t += diglen;
6099 else
6100 *t++ = *out;
6101 *t = '\0';
6103 assert (t <= result + size);
6104 assert (t - result == (int)strlen (result));
6106 return result;
6109 /* Deallocate all regexps. */
6110 static void
6111 free_regexps ()
6113 regexp *rp;
6114 while (p_head != NULL)
6116 rp = p_head->p_next;
6117 free (p_head->pattern);
6118 free (p_head->name);
6119 free (p_head);
6120 p_head = rp;
6122 return;
6126 * Reads the whole file as a single string from `filebuf' and looks for
6127 * multi-line regular expressions, creating tags on matches.
6128 * readline already dealt with normal regexps.
6130 * Idea by Ben Wing <ben@666.com> (2002).
6132 static void
6133 regex_tag_multiline ()
6135 char *buffer = filebuf.buffer;
6136 regexp *rp;
6137 char *name;
6139 for (rp = p_head; rp != NULL; rp = rp->p_next)
6141 int match = 0;
6143 if (!rp->multi_line)
6144 continue; /* skip normal regexps */
6146 /* Generic initialisations before parsing file from memory. */
6147 lineno = 1; /* reset global line number */
6148 charno = 0; /* reset global char number */
6149 linecharno = 0; /* reset global char number of line start */
6151 /* Only use generic regexps or those for the current language. */
6152 if (rp->lang != NULL && rp->lang != curfdp->lang)
6153 continue;
6155 while (match >= 0 && match < filebuf.len)
6157 match = re_search (rp->pat, buffer, filebuf.len, charno,
6158 filebuf.len - match, &rp->regs);
6159 switch (match)
6161 case -2:
6162 /* Some error. */
6163 if (!rp->error_signaled)
6165 error ("regexp stack overflow while matching \"%s\"",
6166 rp->pattern);
6167 rp->error_signaled = TRUE;
6169 break;
6170 case -1:
6171 /* No match. */
6172 break;
6173 default:
6174 if (match == rp->regs.end[0])
6176 if (!rp->error_signaled)
6178 error ("regexp matches the empty string: \"%s\"",
6179 rp->pattern);
6180 rp->error_signaled = TRUE;
6182 match = -3; /* exit from while loop */
6183 break;
6186 /* Match occurred. Construct a tag. */
6187 while (charno < rp->regs.end[0])
6188 if (buffer[charno++] == '\n')
6189 lineno++, linecharno = charno;
6190 name = rp->name;
6191 if (name[0] == '\0')
6192 name = NULL;
6193 else /* make a named tag */
6194 name = substitute (buffer, rp->name, &rp->regs);
6195 if (rp->force_explicit_name)
6196 /* Force explicit tag name, if a name is there. */
6197 pfnote (name, TRUE, buffer + linecharno,
6198 charno - linecharno + 1, lineno, linecharno);
6199 else
6200 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6201 charno - linecharno + 1, lineno, linecharno);
6202 break;
6209 static bool
6210 nocase_tail (cp)
6211 char *cp;
6213 register int len = 0;
6215 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6216 cp++, len++;
6217 if (*cp == '\0' && !intoken (dbp[len]))
6219 dbp += len;
6220 return TRUE;
6222 return FALSE;
6225 static void
6226 get_tag (bp, namepp)
6227 register char *bp;
6228 char **namepp;
6230 register char *cp = bp;
6232 if (*bp != '\0')
6234 /* Go till you get to white space or a syntactic break */
6235 for (cp = bp + 1; !notinname (*cp); cp++)
6236 continue;
6237 make_tag (bp, cp - bp, TRUE,
6238 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6241 if (namepp != NULL)
6242 *namepp = savenstr (bp, cp - bp);
6246 * Read a line of text from `stream' into `lbp', excluding the
6247 * newline or CR-NL, if any. Return the number of characters read from
6248 * `stream', which is the length of the line including the newline.
6250 * On DOS or Windows we do not count the CR character, if any before the
6251 * NL, in the returned length; this mirrors the behavior of Emacs on those
6252 * platforms (for text files, it translates CR-NL to NL as it reads in the
6253 * file).
6255 * If multi-line regular expressions are requested, each line read is
6256 * appended to `filebuf'.
6258 static long
6259 readline_internal (lbp, stream)
6260 linebuffer *lbp;
6261 register FILE *stream;
6263 char *buffer = lbp->buffer;
6264 register char *p = lbp->buffer;
6265 register char *pend;
6266 int chars_deleted;
6268 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6270 for (;;)
6272 register int c = getc (stream);
6273 if (p == pend)
6275 /* We're at the end of linebuffer: expand it. */
6276 lbp->size *= 2;
6277 xrnew (buffer, lbp->size, char);
6278 p += buffer - lbp->buffer;
6279 pend = buffer + lbp->size;
6280 lbp->buffer = buffer;
6282 if (c == EOF)
6284 *p = '\0';
6285 chars_deleted = 0;
6286 break;
6288 if (c == '\n')
6290 if (p > buffer && p[-1] == '\r')
6292 p -= 1;
6293 #ifdef DOS_NT
6294 /* Assume CRLF->LF translation will be performed by Emacs
6295 when loading this file, so CRs won't appear in the buffer.
6296 It would be cleaner to compensate within Emacs;
6297 however, Emacs does not know how many CRs were deleted
6298 before any given point in the file. */
6299 chars_deleted = 1;
6300 #else
6301 chars_deleted = 2;
6302 #endif
6304 else
6306 chars_deleted = 1;
6308 *p = '\0';
6309 break;
6311 *p++ = c;
6313 lbp->len = p - buffer;
6315 if (need_filebuf /* we need filebuf for multi-line regexps */
6316 && chars_deleted > 0) /* not at EOF */
6318 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6320 /* Expand filebuf. */
6321 filebuf.size *= 2;
6322 xrnew (filebuf.buffer, filebuf.size, char);
6324 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6325 filebuf.len += lbp->len;
6326 filebuf.buffer[filebuf.len++] = '\n';
6327 filebuf.buffer[filebuf.len] = '\0';
6330 return lbp->len + chars_deleted;
6334 * Like readline_internal, above, but in addition try to match the
6335 * input line against relevant regular expressions and manage #line
6336 * directives.
6338 static void
6339 readline (lbp, stream)
6340 linebuffer *lbp;
6341 FILE *stream;
6343 long result;
6345 linecharno = charno; /* update global char number of line start */
6346 result = readline_internal (lbp, stream); /* read line */
6347 lineno += 1; /* increment global line number */
6348 charno += result; /* increment global char number */
6350 /* Honour #line directives. */
6351 if (!no_line_directive)
6353 static bool discard_until_line_directive;
6355 /* Check whether this is a #line directive. */
6356 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6358 unsigned int lno;
6359 int start = 0;
6361 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6362 && start > 0) /* double quote character found */
6364 char *endp = lbp->buffer + start;
6366 while ((endp = etags_strchr (endp, '"')) != NULL
6367 && endp[-1] == '\\')
6368 endp++;
6369 if (endp != NULL)
6370 /* Ok, this is a real #line directive. Let's deal with it. */
6372 char *taggedabsname; /* absolute name of original file */
6373 char *taggedfname; /* name of original file as given */
6374 char *name; /* temp var */
6376 discard_until_line_directive = FALSE; /* found it */
6377 name = lbp->buffer + start;
6378 *endp = '\0';
6379 canonicalize_filename (name); /* for DOS */
6380 taggedabsname = absolute_filename (name, tagfiledir);
6381 if (filename_is_absolute (name)
6382 || filename_is_absolute (curfdp->infname))
6383 taggedfname = savestr (taggedabsname);
6384 else
6385 taggedfname = relative_filename (taggedabsname,tagfiledir);
6387 if (streq (curfdp->taggedfname, taggedfname))
6388 /* The #line directive is only a line number change. We
6389 deal with this afterwards. */
6390 free (taggedfname);
6391 else
6392 /* The tags following this #line directive should be
6393 attributed to taggedfname. In order to do this, set
6394 curfdp accordingly. */
6396 fdesc *fdp; /* file description pointer */
6398 /* Go look for a file description already set up for the
6399 file indicated in the #line directive. If there is
6400 one, use it from now until the next #line
6401 directive. */
6402 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6403 if (streq (fdp->infname, curfdp->infname)
6404 && streq (fdp->taggedfname, taggedfname))
6405 /* If we remove the second test above (after the &&)
6406 then all entries pertaining to the same file are
6407 coalesced in the tags file. If we use it, then
6408 entries pertaining to the same file but generated
6409 from different files (via #line directives) will
6410 go into separate sections in the tags file. These
6411 alternatives look equivalent. The first one
6412 destroys some apparently useless information. */
6414 curfdp = fdp;
6415 free (taggedfname);
6416 break;
6418 /* Else, if we already tagged the real file, skip all
6419 input lines until the next #line directive. */
6420 if (fdp == NULL) /* not found */
6421 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6422 if (streq (fdp->infabsname, taggedabsname))
6424 discard_until_line_directive = TRUE;
6425 free (taggedfname);
6426 break;
6428 /* Else create a new file description and use that from
6429 now on, until the next #line directive. */
6430 if (fdp == NULL) /* not found */
6432 fdp = fdhead;
6433 fdhead = xnew (1, fdesc);
6434 *fdhead = *curfdp; /* copy curr. file description */
6435 fdhead->next = fdp;
6436 fdhead->infname = savestr (curfdp->infname);
6437 fdhead->infabsname = savestr (curfdp->infabsname);
6438 fdhead->infabsdir = savestr (curfdp->infabsdir);
6439 fdhead->taggedfname = taggedfname;
6440 fdhead->usecharno = FALSE;
6441 fdhead->prop = NULL;
6442 fdhead->written = FALSE;
6443 curfdp = fdhead;
6446 free (taggedabsname);
6447 lineno = lno - 1;
6448 readline (lbp, stream);
6449 return;
6450 } /* if a real #line directive */
6451 } /* if #line is followed by a a number */
6452 } /* if line begins with "#line " */
6454 /* If we are here, no #line directive was found. */
6455 if (discard_until_line_directive)
6457 if (result > 0)
6459 /* Do a tail recursion on ourselves, thus discarding the contents
6460 of the line buffer. */
6461 readline (lbp, stream);
6462 return;
6464 /* End of file. */
6465 discard_until_line_directive = FALSE;
6466 return;
6468 } /* if #line directives should be considered */
6471 int match;
6472 regexp *rp;
6473 char *name;
6475 /* Match against relevant regexps. */
6476 if (lbp->len > 0)
6477 for (rp = p_head; rp != NULL; rp = rp->p_next)
6479 /* Only use generic regexps or those for the current language.
6480 Also do not use multiline regexps, which is the job of
6481 regex_tag_multiline. */
6482 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6483 || rp->multi_line)
6484 continue;
6486 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6487 switch (match)
6489 case -2:
6490 /* Some error. */
6491 if (!rp->error_signaled)
6493 error ("regexp stack overflow while matching \"%s\"",
6494 rp->pattern);
6495 rp->error_signaled = TRUE;
6497 break;
6498 case -1:
6499 /* No match. */
6500 break;
6501 case 0:
6502 /* Empty string matched. */
6503 if (!rp->error_signaled)
6505 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6506 rp->error_signaled = TRUE;
6508 break;
6509 default:
6510 /* Match occurred. Construct a tag. */
6511 name = rp->name;
6512 if (name[0] == '\0')
6513 name = NULL;
6514 else /* make a named tag */
6515 name = substitute (lbp->buffer, rp->name, &rp->regs);
6516 if (rp->force_explicit_name)
6517 /* Force explicit tag name, if a name is there. */
6518 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6519 else
6520 make_tag (name, strlen (name), TRUE,
6521 lbp->buffer, match, lineno, linecharno);
6522 break;
6530 * Return a pointer to a space of size strlen(cp)+1 allocated
6531 * with xnew where the string CP has been copied.
6533 static char *
6534 savestr (cp)
6535 char *cp;
6537 return savenstr (cp, strlen (cp));
6541 * Return a pointer to a space of size LEN+1 allocated with xnew where
6542 * the string CP has been copied for at most the first LEN characters.
6544 static char *
6545 savenstr (cp, len)
6546 char *cp;
6547 int len;
6549 register char *dp;
6551 dp = xnew (len + 1, char);
6552 strncpy (dp, cp, len);
6553 dp[len] = '\0';
6554 return dp;
6558 * Return the ptr in sp at which the character c last
6559 * appears; NULL if not found
6561 * Identical to POSIX strrchr, included for portability.
6563 static char *
6564 etags_strrchr (sp, c)
6565 register const char *sp;
6566 register int c;
6568 register const char *r;
6570 r = NULL;
6573 if (*sp == c)
6574 r = sp;
6575 } while (*sp++);
6576 return (char *)r;
6580 * Return the ptr in sp at which the character c first
6581 * appears; NULL if not found
6583 * Identical to POSIX strchr, included for portability.
6585 static char *
6586 etags_strchr (sp, c)
6587 register const char *sp;
6588 register int c;
6592 if (*sp == c)
6593 return (char *)sp;
6594 } while (*sp++);
6595 return NULL;
6599 * Compare two strings, ignoring case for alphabetic characters.
6601 * Same as BSD's strcasecmp, included for portability.
6603 static int
6604 etags_strcasecmp (s1, s2)
6605 register const char *s1;
6606 register const char *s2;
6608 while (*s1 != '\0'
6609 && (ISALPHA (*s1) && ISALPHA (*s2)
6610 ? lowcase (*s1) == lowcase (*s2)
6611 : *s1 == *s2))
6612 s1++, s2++;
6614 return (ISALPHA (*s1) && ISALPHA (*s2)
6615 ? lowcase (*s1) - lowcase (*s2)
6616 : *s1 - *s2);
6620 * Compare two strings, ignoring case for alphabetic characters.
6621 * Stop after a given number of characters
6623 * Same as BSD's strncasecmp, included for portability.
6625 static int
6626 etags_strncasecmp (s1, s2, n)
6627 register const char *s1;
6628 register const char *s2;
6629 register int n;
6631 while (*s1 != '\0' && n-- > 0
6632 && (ISALPHA (*s1) && ISALPHA (*s2)
6633 ? lowcase (*s1) == lowcase (*s2)
6634 : *s1 == *s2))
6635 s1++, s2++;
6637 if (n < 0)
6638 return 0;
6639 else
6640 return (ISALPHA (*s1) && ISALPHA (*s2)
6641 ? lowcase (*s1) - lowcase (*s2)
6642 : *s1 - *s2);
6645 /* Skip spaces (end of string is not space), return new pointer. */
6646 static char *
6647 skip_spaces (cp)
6648 char *cp;
6650 while (iswhite (*cp))
6651 cp++;
6652 return cp;
6655 /* Skip non spaces, except end of string, return new pointer. */
6656 static char *
6657 skip_non_spaces (cp)
6658 char *cp;
6660 while (*cp != '\0' && !iswhite (*cp))
6661 cp++;
6662 return cp;
6665 /* Print error message and exit. */
6666 void
6667 fatal (s1, s2)
6668 char *s1, *s2;
6670 error (s1, s2);
6671 exit (EXIT_FAILURE);
6674 static void
6675 pfatal (s1)
6676 char *s1;
6678 perror (s1);
6679 exit (EXIT_FAILURE);
6682 static void
6683 suggest_asking_for_help ()
6685 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6686 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6687 exit (EXIT_FAILURE);
6690 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6691 static void
6692 error (s1, s2)
6693 const char *s1, *s2;
6695 fprintf (stderr, "%s: ", progname);
6696 fprintf (stderr, s1, s2);
6697 fprintf (stderr, "\n");
6700 /* Return a newly-allocated string whose contents
6701 concatenate those of s1, s2, s3. */
6702 static char *
6703 concat (s1, s2, s3)
6704 char *s1, *s2, *s3;
6706 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6707 char *result = xnew (len1 + len2 + len3 + 1, char);
6709 strcpy (result, s1);
6710 strcpy (result + len1, s2);
6711 strcpy (result + len1 + len2, s3);
6712 result[len1 + len2 + len3] = '\0';
6714 return result;
6718 /* Does the same work as the system V getcwd, but does not need to
6719 guess the buffer size in advance. */
6720 static char *
6721 etags_getcwd ()
6723 #ifdef HAVE_GETCWD
6724 int bufsize = 200;
6725 char *path = xnew (bufsize, char);
6727 while (getcwd (path, bufsize) == NULL)
6729 if (errno != ERANGE)
6730 pfatal ("getcwd");
6731 bufsize *= 2;
6732 free (path);
6733 path = xnew (bufsize, char);
6736 canonicalize_filename (path);
6737 return path;
6739 #else /* not HAVE_GETCWD */
6740 #if MSDOS
6742 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6744 getwd (path);
6746 for (p = path; *p != '\0'; p++)
6747 if (*p == '\\')
6748 *p = '/';
6749 else
6750 *p = lowcase (*p);
6752 return strdup (path);
6753 #else /* not MSDOS */
6754 linebuffer path;
6755 FILE *pipe;
6757 linebuffer_init (&path);
6758 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6759 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6760 pfatal ("pwd");
6761 pclose (pipe);
6763 return path.buffer;
6764 #endif /* not MSDOS */
6765 #endif /* not HAVE_GETCWD */
6768 /* Return a newly allocated string containing the file name of FILE
6769 relative to the absolute directory DIR (which should end with a slash). */
6770 static char *
6771 relative_filename (file, dir)
6772 char *file, *dir;
6774 char *fp, *dp, *afn, *res;
6775 int i;
6777 /* Find the common root of file and dir (with a trailing slash). */
6778 afn = absolute_filename (file, cwd);
6779 fp = afn;
6780 dp = dir;
6781 while (*fp++ == *dp++)
6782 continue;
6783 fp--, dp--; /* back to the first differing char */
6784 #ifdef DOS_NT
6785 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6786 return afn;
6787 #endif
6788 do /* look at the equal chars until '/' */
6789 fp--, dp--;
6790 while (*fp != '/');
6792 /* Build a sequence of "../" strings for the resulting relative file name. */
6793 i = 0;
6794 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6795 i += 1;
6796 res = xnew (3*i + strlen (fp + 1) + 1, char);
6797 res[0] = '\0';
6798 while (i-- > 0)
6799 strcat (res, "../");
6801 /* Add the file name relative to the common root of file and dir. */
6802 strcat (res, fp + 1);
6803 free (afn);
6805 return res;
6808 /* Return a newly allocated string containing the absolute file name
6809 of FILE given DIR (which should end with a slash). */
6810 static char *
6811 absolute_filename (file, dir)
6812 char *file, *dir;
6814 char *slashp, *cp, *res;
6816 if (filename_is_absolute (file))
6817 res = savestr (file);
6818 #ifdef DOS_NT
6819 /* We don't support non-absolute file names with a drive
6820 letter, like `d:NAME' (it's too much hassle). */
6821 else if (file[1] == ':')
6822 fatal ("%s: relative file names with drive letters not supported", file);
6823 #endif
6824 else
6825 res = concat (dir, file, "");
6827 /* Delete the "/dirname/.." and "/." substrings. */
6828 slashp = etags_strchr (res, '/');
6829 while (slashp != NULL && slashp[0] != '\0')
6831 if (slashp[1] == '.')
6833 if (slashp[2] == '.'
6834 && (slashp[3] == '/' || slashp[3] == '\0'))
6836 cp = slashp;
6838 cp--;
6839 while (cp >= res && !filename_is_absolute (cp));
6840 if (cp < res)
6841 cp = slashp; /* the absolute name begins with "/.." */
6842 #ifdef DOS_NT
6843 /* Under MSDOS and NT we get `d:/NAME' as absolute
6844 file name, so the luser could say `d:/../NAME'.
6845 We silently treat this as `d:/NAME'. */
6846 else if (cp[0] != '/')
6847 cp = slashp;
6848 #endif
6849 strcpy (cp, slashp + 3);
6850 slashp = cp;
6851 continue;
6853 else if (slashp[2] == '/' || slashp[2] == '\0')
6855 strcpy (slashp, slashp + 2);
6856 continue;
6860 slashp = etags_strchr (slashp + 1, '/');
6863 if (res[0] == '\0') /* just a safety net: should never happen */
6865 free (res);
6866 return savestr ("/");
6868 else
6869 return res;
6872 /* Return a newly allocated string containing the absolute
6873 file name of dir where FILE resides given DIR (which should
6874 end with a slash). */
6875 static char *
6876 absolute_dirname (file, dir)
6877 char *file, *dir;
6879 char *slashp, *res;
6880 char save;
6882 canonicalize_filename (file);
6883 slashp = etags_strrchr (file, '/');
6884 if (slashp == NULL)
6885 return savestr (dir);
6886 save = slashp[1];
6887 slashp[1] = '\0';
6888 res = absolute_filename (file, dir);
6889 slashp[1] = save;
6891 return res;
6894 /* Whether the argument string is an absolute file name. The argument
6895 string must have been canonicalized with canonicalize_filename. */
6896 static bool
6897 filename_is_absolute (fn)
6898 char *fn;
6900 return (fn[0] == '/'
6901 #ifdef DOS_NT
6902 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6903 #endif
6907 /* Translate backslashes into slashes. Works in place. */
6908 static void
6909 canonicalize_filename (fn)
6910 register char *fn;
6912 #ifdef DOS_NT
6913 /* Canonicalize drive letter case. */
6914 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6915 fn[0] = upcase (fn[0]);
6916 /* Convert backslashes to slashes. */
6917 for (; *fn != '\0'; fn++)
6918 if (*fn == '\\')
6919 *fn = '/';
6920 #else
6921 /* No action. */
6922 fn = NULL; /* shut up the compiler */
6923 #endif
6927 /* Initialize a linebuffer for use */
6928 static void
6929 linebuffer_init (lbp)
6930 linebuffer *lbp;
6932 lbp->size = (DEBUG) ? 3 : 200;
6933 lbp->buffer = xnew (lbp->size, char);
6934 lbp->buffer[0] = '\0';
6935 lbp->len = 0;
6938 /* Set the minimum size of a string contained in a linebuffer. */
6939 static void
6940 linebuffer_setlen (lbp, toksize)
6941 linebuffer *lbp;
6942 int toksize;
6944 while (lbp->size <= toksize)
6946 lbp->size *= 2;
6947 xrnew (lbp->buffer, lbp->size, char);
6949 lbp->len = toksize;
6952 /* Like malloc but get fatal error if memory is exhausted. */
6953 static PTR
6954 xmalloc (size)
6955 unsigned int size;
6957 PTR result = (PTR) malloc (size);
6958 if (result == NULL)
6959 fatal ("virtual memory exhausted", (char *)NULL);
6960 return result;
6963 static PTR
6964 xrealloc (ptr, size)
6965 char *ptr;
6966 unsigned int size;
6968 PTR result = (PTR) realloc (ptr, size);
6969 if (result == NULL)
6970 fatal ("virtual memory exhausted", (char *)NULL);
6971 return result;
6975 * Local Variables:
6976 * indent-tabs-mode: t
6977 * tab-width: 8
6978 * fill-column: 79
6979 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6980 * c-file-style: "gnu"
6981 * End:
6984 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6985 (do not change this comment) */
6987 /* etags.c ends here */