* minibuffer.el (comps): Declare for byte-compiler.
[emacs.git] / lib-src / etags.c
blob89edc6c1b27542406e8d0fc443c6991a73ad4712
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
33 Free Software Foundation, Inc.
35 This file is not considered part of GNU Emacs.
37 This program is free software: you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation, either version 3 of the License, or
40 (at your option) any later version.
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
47 You should have received a copy of the GNU General Public License
48 along with this program. If not, see <http://www.gnu.org/licenses/>. */
51 /* NB To comply with the above BSD license, copyright information is
52 reproduced in etc/ETAGS.README. That file should be updated when the
53 above notices are.
55 To the best of our knowledge, this code was originally based on the
56 ctags.c distributed with BSD4.2, which was copyrighted by the
57 University of California, as described above. */
61 * Authors:
62 * 1983 Ctags originally by Ken Arnold.
63 * 1984 Fortran added by Jim Kleckner.
64 * 1984 Ed Pelegri-Llopart added C typedefs.
65 * 1985 Emacs TAGS format by Richard Stallman.
66 * 1989 Sam Kendall added C++.
67 * 1992 Joseph B. Wells improved C and C++ parsing.
68 * 1993 Francesco Potortì reorganized C and C++.
69 * 1994 Line-by-line regexp tags by Tom Tromey.
70 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
71 * 2002 #line directives by Francesco Potortì.
73 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
77 * If you want to add support for a new language, start by looking at the LUA
78 * language, which is the simplest. Alternatively, consider distributing etags
79 * together with a configuration file containing regexp definitions for etags.
82 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
84 #define TRUE 1
85 #define FALSE 0
87 #ifdef DEBUG
88 # undef DEBUG
89 # define DEBUG TRUE
90 #else
91 # define DEBUG FALSE
92 # define NDEBUG /* disable assert */
93 #endif
95 #ifdef HAVE_CONFIG_H
96 # include <config.h>
97 /* On some systems, Emacs defines static as nothing for the sake
98 of unexec. We don't want that here since we don't use unexec. */
99 # undef static
100 # ifndef PTR /* for XEmacs */
101 # define PTR void *
102 # endif
103 # ifndef __P /* for XEmacs */
104 # define __P(args) args
105 # endif
106 #else /* no config.h */
107 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
108 # define __P(args) args /* use prototypes */
109 # define PTR void * /* for generic pointers */
110 # else /* not standard C */
111 # define __P(args) () /* no prototypes */
112 # define const /* remove const for old compilers' sake */
113 # define PTR long * /* don't use void* */
114 # endif
115 #endif /* !HAVE_CONFIG_H */
117 #ifndef _GNU_SOURCE
118 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
119 #endif
121 /* WIN32_NATIVE is for XEmacs.
122 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
123 #ifdef WIN32_NATIVE
124 # undef MSDOS
125 # undef WINDOWSNT
126 # define WINDOWSNT
127 #endif /* WIN32_NATIVE */
129 #ifdef MSDOS
130 # undef MSDOS
131 # define MSDOS TRUE
132 # include <fcntl.h>
133 # include <sys/param.h>
134 # include <io.h>
135 # ifndef HAVE_CONFIG_H
136 # define DOS_NT
137 # include <sys/config.h>
138 # endif
139 #else
140 # define MSDOS FALSE
141 #endif /* MSDOS */
143 #ifdef WINDOWSNT
144 # include <stdlib.h>
145 # include <fcntl.h>
146 # include <string.h>
147 # include <direct.h>
148 # include <io.h>
149 # define MAXPATHLEN _MAX_PATH
150 # undef HAVE_NTGUI
151 # undef DOS_NT
152 # define DOS_NT
153 # ifndef HAVE_GETCWD
154 # define HAVE_GETCWD
155 # endif /* undef HAVE_GETCWD */
156 #else /* not WINDOWSNT */
157 # ifdef STDC_HEADERS
158 # include <stdlib.h>
159 # include <string.h>
160 # else /* no standard C headers */
161 extern char *getenv __P((const char *));
162 extern char *strcpy __P((char *, const char *));
163 extern char *strncpy __P((char *, const char *, unsigned long));
164 extern char *strcat __P((char *, const char *));
165 extern char *strncat __P((char *, const char *, unsigned long));
166 extern int strcmp __P((const char *, const char *));
167 extern int strncmp __P((const char *, const char *, unsigned long));
168 extern int system __P((const char *));
169 extern unsigned long strlen __P((const char *));
170 extern void *malloc __P((unsigned long));
171 extern void *realloc __P((void *, unsigned long));
172 extern void exit __P((int));
173 extern void free __P((void *));
174 extern void *memmove __P((void *, const void *, unsigned long));
175 # define EXIT_SUCCESS 0
176 # define EXIT_FAILURE 1
177 # endif
178 #endif /* !WINDOWSNT */
180 #ifdef HAVE_UNISTD_H
181 # include <unistd.h>
182 #else
183 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
184 extern char *getcwd (char *buf, size_t size);
185 # endif
186 #endif /* HAVE_UNISTD_H */
188 #include <stdio.h>
189 #include <ctype.h>
190 #include <errno.h>
191 #include <sys/types.h>
192 #include <sys/stat.h>
194 #include <assert.h>
195 #ifdef NDEBUG
196 # undef assert /* some systems have a buggy assert.h */
197 # define assert(x) ((void) 0)
198 #endif
200 #if !defined (S_ISREG) && defined (S_IFREG)
201 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
202 #endif
204 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
205 # define NO_LONG_OPTIONS TRUE
206 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
207 extern char *optarg;
208 extern int optind, opterr;
209 #else
210 # define NO_LONG_OPTIONS FALSE
211 # include <getopt.h>
212 #endif /* NO_LONG_OPTIONS */
214 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
215 # ifdef __CYGWIN__ /* compiling on Cygwin */
216 !!! NOTICE !!!
217 the regex.h distributed with Cygwin is not compatible with etags, alas!
218 If you want regular expression support, you should delete this notice and
219 arrange to use the GNU regex.h and regex.c.
220 # endif
221 #endif
222 #include <regex.h>
224 /* Define CTAGS to make the program "ctags" compatible with the usual one.
225 Leave it undefined to make the program "etags", which makes emacs-style
226 tag tables and tags typedefs, #defines and struct/union/enum by default. */
227 #ifdef CTAGS
228 # undef CTAGS
229 # define CTAGS TRUE
230 #else
231 # define CTAGS FALSE
232 #endif
234 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
235 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
236 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
237 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
239 #define CHARS 256 /* 2^sizeof(char) */
240 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
241 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
242 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
243 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
244 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
245 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
247 #define ISALNUM(c) isalnum (CHAR(c))
248 #define ISALPHA(c) isalpha (CHAR(c))
249 #define ISDIGIT(c) isdigit (CHAR(c))
250 #define ISLOWER(c) islower (CHAR(c))
252 #define lowcase(c) tolower (CHAR(c))
253 #define upcase(c) toupper (CHAR(c))
257 * xnew, xrnew -- allocate, reallocate storage
259 * SYNOPSIS: Type *xnew (int n, Type);
260 * void xrnew (OldPointer, int n, Type);
262 #if DEBUG
263 # include "chkmalloc.h"
264 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
265 (n) * sizeof (Type)))
266 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
267 (char *) (op), (n) * sizeof (Type)))
268 #else
269 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
270 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
271 (char *) (op), (n) * sizeof (Type)))
272 #endif
274 #define bool int
276 typedef void Lang_function __P((FILE *));
278 typedef struct
280 char *suffix; /* file name suffix for this compressor */
281 char *command; /* takes one arg and decompresses to stdout */
282 } compressor;
284 typedef struct
286 char *name; /* language name */
287 char *help; /* detailed help for the language */
288 Lang_function *function; /* parse function */
289 char **suffixes; /* name suffixes of this language's files */
290 char **filenames; /* names of this language's files */
291 char **interpreters; /* interpreters for this language */
292 bool metasource; /* source used to generate other sources */
293 } language;
295 typedef struct fdesc
297 struct fdesc *next; /* for the linked list */
298 char *infname; /* uncompressed input file name */
299 char *infabsname; /* absolute uncompressed input file name */
300 char *infabsdir; /* absolute dir of input file */
301 char *taggedfname; /* file name to write in tagfile */
302 language *lang; /* language of file */
303 char *prop; /* file properties to write in tagfile */
304 bool usecharno; /* etags tags shall contain char number */
305 bool written; /* entry written in the tags file */
306 } fdesc;
308 typedef struct node_st
309 { /* sorting structure */
310 struct node_st *left, *right; /* left and right sons */
311 fdesc *fdp; /* description of file to whom tag belongs */
312 char *name; /* tag name */
313 char *regex; /* search regexp */
314 bool valid; /* write this tag on the tag file */
315 bool is_func; /* function tag: use regexp in CTAGS mode */
316 bool been_warned; /* warning already given for duplicated tag */
317 int lno; /* line number tag is on */
318 long cno; /* character number line starts on */
319 } node;
322 * A `linebuffer' is a structure which holds a line of text.
323 * `readline_internal' reads a line from a stream into a linebuffer
324 * and works regardless of the length of the line.
325 * SIZE is the size of BUFFER, LEN is the length of the string in
326 * BUFFER after readline reads it.
328 typedef struct
330 long size;
331 int len;
332 char *buffer;
333 } linebuffer;
335 /* Used to support mixing of --lang and file names. */
336 typedef struct
338 enum {
339 at_language, /* a language specification */
340 at_regexp, /* a regular expression */
341 at_filename, /* a file name */
342 at_stdin, /* read from stdin here */
343 at_end /* stop parsing the list */
344 } arg_type; /* argument type */
345 language *lang; /* language associated with the argument */
346 char *what; /* the argument itself */
347 } argument;
349 /* Structure defining a regular expression. */
350 typedef struct regexp
352 struct regexp *p_next; /* pointer to next in list */
353 language *lang; /* if set, use only for this language */
354 char *pattern; /* the regexp pattern */
355 char *name; /* tag name */
356 struct re_pattern_buffer *pat; /* the compiled pattern */
357 struct re_registers regs; /* re registers */
358 bool error_signaled; /* already signaled for this regexp */
359 bool force_explicit_name; /* do not allow implict tag name */
360 bool ignore_case; /* ignore case when matching */
361 bool multi_line; /* do a multi-line match on the whole file */
362 } regexp;
365 /* Many compilers barf on this:
366 Lang_function Ada_funcs;
367 so let's write it this way */
368 static void Ada_funcs __P((FILE *));
369 static void Asm_labels __P((FILE *));
370 static void C_entries __P((int c_ext, FILE *));
371 static void default_C_entries __P((FILE *));
372 static void plain_C_entries __P((FILE *));
373 static void Cjava_entries __P((FILE *));
374 static void Cobol_paragraphs __P((FILE *));
375 static void Cplusplus_entries __P((FILE *));
376 static void Cstar_entries __P((FILE *));
377 static void Erlang_functions __P((FILE *));
378 static void Forth_words __P((FILE *));
379 static void Fortran_functions __P((FILE *));
380 static void HTML_labels __P((FILE *));
381 static void Lisp_functions __P((FILE *));
382 static void Lua_functions __P((FILE *));
383 static void Makefile_targets __P((FILE *));
384 static void Pascal_functions __P((FILE *));
385 static void Perl_functions __P((FILE *));
386 static void PHP_functions __P((FILE *));
387 static void PS_functions __P((FILE *));
388 static void Prolog_functions __P((FILE *));
389 static void Python_functions __P((FILE *));
390 static void Scheme_functions __P((FILE *));
391 static void TeX_commands __P((FILE *));
392 static void Texinfo_nodes __P((FILE *));
393 static void Yacc_entries __P((FILE *));
394 static void just_read_file __P((FILE *));
396 static void print_language_names __P((void));
397 static void print_version __P((void));
398 static void print_help __P((argument *));
399 int main __P((int, char **));
401 static compressor *get_compressor_from_suffix __P((char *, char **));
402 static language *get_language_from_langname __P((const char *));
403 static language *get_language_from_interpreter __P((char *));
404 static language *get_language_from_filename __P((char *, bool));
405 static void readline __P((linebuffer *, FILE *));
406 static long readline_internal __P((linebuffer *, FILE *));
407 static bool nocase_tail __P((char *));
408 static void get_tag __P((char *, char **));
410 static void analyse_regex __P((char *));
411 static void free_regexps __P((void));
412 static void regex_tag_multiline __P((void));
413 static void error __P((const char *, const char *));
414 static void suggest_asking_for_help __P((void));
415 void fatal __P((char *, char *));
416 static void pfatal __P((char *));
417 static void add_node __P((node *, node **));
419 static void init __P((void));
420 static void process_file_name __P((char *, language *));
421 static void process_file __P((FILE *, char *, language *));
422 static void find_entries __P((FILE *));
423 static void free_tree __P((node *));
424 static void free_fdesc __P((fdesc *));
425 static void pfnote __P((char *, bool, char *, int, int, long));
426 static void make_tag __P((char *, int, bool, char *, int, int, long));
427 static void invalidate_nodes __P((fdesc *, node **));
428 static void put_entries __P((node *));
430 static char *concat __P((char *, char *, char *));
431 static char *skip_spaces __P((char *));
432 static char *skip_non_spaces __P((char *));
433 static char *savenstr __P((char *, int));
434 static char *savestr __P((char *));
435 static char *etags_strchr __P((const char *, int));
436 static char *etags_strrchr __P((const char *, int));
437 static int etags_strcasecmp __P((const char *, const char *));
438 static int etags_strncasecmp __P((const char *, const char *, int));
439 static char *etags_getcwd __P((void));
440 static char *relative_filename __P((char *, char *));
441 static char *absolute_filename __P((char *, char *));
442 static char *absolute_dirname __P((char *, char *));
443 static bool filename_is_absolute __P((char *f));
444 static void canonicalize_filename __P((char *));
445 static void linebuffer_init __P((linebuffer *));
446 static void linebuffer_setlen __P((linebuffer *, int));
447 static PTR xmalloc __P((unsigned int));
448 static PTR xrealloc __P((char *, unsigned int));
451 static char searchar = '/'; /* use /.../ searches */
453 static char *tagfile; /* output file */
454 static char *progname; /* name this program was invoked with */
455 static char *cwd; /* current working directory */
456 static char *tagfiledir; /* directory of tagfile */
457 static FILE *tagf; /* ioptr for tags file */
459 static fdesc *fdhead; /* head of file description list */
460 static fdesc *curfdp; /* current file description */
461 static int lineno; /* line number of current line */
462 static long charno; /* current character number */
463 static long linecharno; /* charno of start of current line */
464 static char *dbp; /* pointer to start of current tag */
466 static const int invalidcharno = -1;
468 static node *nodehead; /* the head of the binary tree of tags */
469 static node *last_node; /* the last node created */
471 static linebuffer lb; /* the current line */
472 static linebuffer filebuf; /* a buffer containing the whole file */
473 static linebuffer token_name; /* a buffer containing a tag name */
475 /* boolean "functions" (see init) */
476 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
477 static char
478 /* white chars */
479 *white = " \f\t\n\r\v",
480 /* not in a name */
481 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
482 /* token ending chars */
483 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
484 /* token starting chars */
485 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
486 /* valid in-token chars */
487 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
489 static bool append_to_tagfile; /* -a: append to tags */
490 /* The next five default to TRUE in C and derived languages. */
491 static bool typedefs; /* -t: create tags for C and Ada typedefs */
492 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
493 /* 0 struct/enum/union decls, and C++ */
494 /* member functions. */
495 static bool constantypedefs; /* -d: create tags for C #define, enum */
496 /* constants and variables. */
497 /* -D: opposite of -d. Default under ctags. */
498 static bool globals; /* create tags for global variables */
499 static bool members; /* create tags for C member variables */
500 static bool declarations; /* --declarations: tag them and extern in C&Co*/
501 static bool no_line_directive; /* ignore #line directives (undocumented) */
502 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
503 static bool update; /* -u: update tags */
504 static bool vgrind_style; /* -v: create vgrind style index output */
505 static bool no_warnings; /* -w: suppress warnings (undocumented) */
506 static bool cxref_style; /* -x: create cxref style output */
507 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
508 static bool ignoreindent; /* -I: ignore indentation in C */
509 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
511 /* STDIN is defined in LynxOS system headers */
512 #ifdef STDIN
513 # undef STDIN
514 #endif
516 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
517 static bool parsing_stdin; /* --parse-stdin used */
519 static regexp *p_head; /* list of all regexps */
520 static bool need_filebuf; /* some regexes are multi-line */
522 static struct option longopts[] =
524 { "append", no_argument, NULL, 'a' },
525 { "packages-only", no_argument, &packages_only, TRUE },
526 { "c++", no_argument, NULL, 'C' },
527 { "declarations", no_argument, &declarations, TRUE },
528 { "no-line-directive", no_argument, &no_line_directive, TRUE },
529 { "no-duplicates", no_argument, &no_duplicates, TRUE },
530 { "help", no_argument, NULL, 'h' },
531 { "help", no_argument, NULL, 'H' },
532 { "ignore-indentation", no_argument, NULL, 'I' },
533 { "language", required_argument, NULL, 'l' },
534 { "members", no_argument, &members, TRUE },
535 { "no-members", no_argument, &members, FALSE },
536 { "output", required_argument, NULL, 'o' },
537 { "regex", required_argument, NULL, 'r' },
538 { "no-regex", no_argument, NULL, 'R' },
539 { "ignore-case-regex", required_argument, NULL, 'c' },
540 { "parse-stdin", required_argument, NULL, STDIN },
541 { "version", no_argument, NULL, 'V' },
543 #if CTAGS /* Ctags options */
544 { "backward-search", no_argument, NULL, 'B' },
545 { "cxref", no_argument, NULL, 'x' },
546 { "defines", no_argument, NULL, 'd' },
547 { "globals", no_argument, &globals, TRUE },
548 { "typedefs", no_argument, NULL, 't' },
549 { "typedefs-and-c++", no_argument, NULL, 'T' },
550 { "update", no_argument, NULL, 'u' },
551 { "vgrind", no_argument, NULL, 'v' },
552 { "no-warn", no_argument, NULL, 'w' },
554 #else /* Etags options */
555 { "no-defines", no_argument, NULL, 'D' },
556 { "no-globals", no_argument, &globals, FALSE },
557 { "include", required_argument, NULL, 'i' },
558 #endif
559 { NULL }
562 static compressor compressors[] =
564 { "z", "gzip -d -c"},
565 { "Z", "gzip -d -c"},
566 { "gz", "gzip -d -c"},
567 { "GZ", "gzip -d -c"},
568 { "bz2", "bzip2 -d -c" },
569 { NULL }
573 * Language stuff.
576 /* Ada code */
577 static char *Ada_suffixes [] =
578 { "ads", "adb", "ada", NULL };
579 static char Ada_help [] =
580 "In Ada code, functions, procedures, packages, tasks and types are\n\
581 tags. Use the `--packages-only' option to create tags for\n\
582 packages only.\n\
583 Ada tag names have suffixes indicating the type of entity:\n\
584 Entity type: Qualifier:\n\
585 ------------ ----------\n\
586 function /f\n\
587 procedure /p\n\
588 package spec /s\n\
589 package body /b\n\
590 type /t\n\
591 task /k\n\
592 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
593 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
594 will just search for any tag `bidule'.";
596 /* Assembly code */
597 static char *Asm_suffixes [] =
598 { "a", /* Unix assembler */
599 "asm", /* Microcontroller assembly */
600 "def", /* BSO/Tasking definition includes */
601 "inc", /* Microcontroller include files */
602 "ins", /* Microcontroller include files */
603 "s", "sa", /* Unix assembler */
604 "S", /* cpp-processed Unix assembler */
605 "src", /* BSO/Tasking C compiler output */
606 NULL
608 static char Asm_help [] =
609 "In assembler code, labels appearing at the beginning of a line,\n\
610 followed by a colon, are tags.";
613 /* Note that .c and .h can be considered C++, if the --c++ flag was
614 given, or if the `class' or `template' keywords are met inside the file.
615 That is why default_C_entries is called for these. */
616 static char *default_C_suffixes [] =
617 { "c", "h", NULL };
618 #if CTAGS /* C help for Ctags */
619 static char default_C_help [] =
620 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
621 Use -T to tag definitions of `struct', `union' and `enum'.\n\
622 Use -d to tag `#define' macro definitions and `enum' constants.\n\
623 Use --globals to tag global variables.\n\
624 You can tag function declarations and external variables by\n\
625 using `--declarations', and struct members by using `--members'.";
626 #else /* C help for Etags */
627 static char default_C_help [] =
628 "In C code, any C function or typedef is a tag, and so are\n\
629 definitions of `struct', `union' and `enum'. `#define' macro\n\
630 definitions and `enum' constants are tags unless you specify\n\
631 `--no-defines'. Global variables are tags unless you specify\n\
632 `--no-globals' and so are struct members unless you specify\n\
633 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
634 `--no-members' can make the tags table file much smaller.\n\
635 You can tag function declarations and external variables by\n\
636 using `--declarations'.";
637 #endif /* C help for Ctags and Etags */
639 static char *Cplusplus_suffixes [] =
640 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
641 "M", /* Objective C++ */
642 "pdb", /* Postscript with C syntax */
643 NULL };
644 static char Cplusplus_help [] =
645 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
646 --help --lang=c --lang=c++ for full help.)\n\
647 In addition to C tags, member functions are also recognized. Member\n\
648 variables are recognized unless you use the `--no-members' option.\n\
649 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
650 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
651 `operator+'.";
653 static char *Cjava_suffixes [] =
654 { "java", NULL };
655 static char Cjava_help [] =
656 "In Java code, all the tags constructs of C and C++ code are\n\
657 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
660 static char *Cobol_suffixes [] =
661 { "COB", "cob", NULL };
662 static char Cobol_help [] =
663 "In Cobol code, tags are paragraph names; that is, any word\n\
664 starting in column 8 and followed by a period.";
666 static char *Cstar_suffixes [] =
667 { "cs", "hs", NULL };
669 static char *Erlang_suffixes [] =
670 { "erl", "hrl", NULL };
671 static char Erlang_help [] =
672 "In Erlang code, the tags are the functions, records and macros\n\
673 defined in the file.";
675 char *Forth_suffixes [] =
676 { "fth", "tok", NULL };
677 static char Forth_help [] =
678 "In Forth code, tags are words defined by `:',\n\
679 constant, code, create, defer, value, variable, buffer:, field.";
681 static char *Fortran_suffixes [] =
682 { "F", "f", "f90", "for", NULL };
683 static char Fortran_help [] =
684 "In Fortran code, functions, subroutines and block data are tags.";
686 static char *HTML_suffixes [] =
687 { "htm", "html", "shtml", NULL };
688 static char HTML_help [] =
689 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
690 `h3' headers. Also, tags are `name=' in anchors and all\n\
691 occurrences of `id='.";
693 static char *Lisp_suffixes [] =
694 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
695 static char Lisp_help [] =
696 "In Lisp code, any function defined with `defun', any variable\n\
697 defined with `defvar' or `defconst', and in general the first\n\
698 argument of any expression that starts with `(def' in column zero\n\
699 is a tag.";
701 static char *Lua_suffixes [] =
702 { "lua", "LUA", NULL };
703 static char Lua_help [] =
704 "In Lua scripts, all functions are tags.";
706 static char *Makefile_filenames [] =
707 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
708 static char Makefile_help [] =
709 "In makefiles, targets are tags; additionally, variables are tags\n\
710 unless you specify `--no-globals'.";
712 static char *Objc_suffixes [] =
713 { "lm", /* Objective lex file */
714 "m", /* Objective C file */
715 NULL };
716 static char Objc_help [] =
717 "In Objective C code, tags include Objective C definitions for classes,\n\
718 class categories, methods and protocols. Tags for variables and\n\
719 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
720 (Use --help --lang=c --lang=objc --lang=java for full help.)";
722 static char *Pascal_suffixes [] =
723 { "p", "pas", NULL };
724 static char Pascal_help [] =
725 "In Pascal code, the tags are the functions and procedures defined\n\
726 in the file.";
727 /* " // this is for working around an Emacs highlighting bug... */
729 static char *Perl_suffixes [] =
730 { "pl", "pm", NULL };
731 static char *Perl_interpreters [] =
732 { "perl", "@PERL@", NULL };
733 static char Perl_help [] =
734 "In Perl code, the tags are the packages, subroutines and variables\n\
735 defined by the `package', `sub', `my' and `local' keywords. Use\n\
736 `--globals' if you want to tag global variables. Tags for\n\
737 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
738 defined in the default package is `main::SUB'.";
740 static char *PHP_suffixes [] =
741 { "php", "php3", "php4", NULL };
742 static char PHP_help [] =
743 "In PHP code, tags are functions, classes and defines. Unless you use\n\
744 the `--no-members' option, vars are tags too.";
746 static char *plain_C_suffixes [] =
747 { "pc", /* Pro*C file */
748 NULL };
750 static char *PS_suffixes [] =
751 { "ps", "psw", NULL }; /* .psw is for PSWrap */
752 static char PS_help [] =
753 "In PostScript code, the tags are the functions.";
755 static char *Prolog_suffixes [] =
756 { "prolog", NULL };
757 static char Prolog_help [] =
758 "In Prolog code, tags are predicates and rules at the beginning of\n\
759 line.";
761 static char *Python_suffixes [] =
762 { "py", NULL };
763 static char Python_help [] =
764 "In Python code, `def' or `class' at the beginning of a line\n\
765 generate a tag.";
767 /* Can't do the `SCM' or `scm' prefix with a version number. */
768 static char *Scheme_suffixes [] =
769 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
770 static char Scheme_help [] =
771 "In Scheme code, tags include anything defined with `def' or with a\n\
772 construct whose name starts with `def'. They also include\n\
773 variables set with `set!' at top level in the file.";
775 static char *TeX_suffixes [] =
776 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
777 static char TeX_help [] =
778 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
779 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
780 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
781 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
782 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
784 Other commands can be specified by setting the environment variable\n\
785 `TEXTAGS' to a colon-separated list like, for example,\n\
786 TEXTAGS=\"mycommand:myothercommand\".";
789 static char *Texinfo_suffixes [] =
790 { "texi", "texinfo", "txi", NULL };
791 static char Texinfo_help [] =
792 "for texinfo files, lines starting with @node are tagged.";
794 static char *Yacc_suffixes [] =
795 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
796 static char Yacc_help [] =
797 "In Bison or Yacc input files, each rule defines as a tag the\n\
798 nonterminal it constructs. The portions of the file that contain\n\
799 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
800 for full help).";
802 static char auto_help [] =
803 "`auto' is not a real language, it indicates to use\n\
804 a default language for files base on file name suffix and file contents.";
806 static char none_help [] =
807 "`none' is not a real language, it indicates to only do\n\
808 regexp processing on files.";
810 static char no_lang_help [] =
811 "No detailed help available for this language.";
815 * Table of languages.
817 * It is ok for a given function to be listed under more than one
818 * name. I just didn't.
821 static language lang_names [] =
823 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
824 { "asm", Asm_help, Asm_labels, Asm_suffixes },
825 { "c", default_C_help, default_C_entries, default_C_suffixes },
826 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
827 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
828 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
829 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
830 { "forth", Forth_help, Forth_words, Forth_suffixes },
831 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
832 { "html", HTML_help, HTML_labels, HTML_suffixes },
833 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
834 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
835 { "lua", Lua_help, Lua_functions, Lua_suffixes },
836 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
837 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
838 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
839 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
840 { "php", PHP_help, PHP_functions, PHP_suffixes },
841 { "postscript",PS_help, PS_functions, PS_suffixes },
842 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
843 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
844 { "python", Python_help, Python_functions, Python_suffixes },
845 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
846 { "tex", TeX_help, TeX_commands, TeX_suffixes },
847 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
848 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
849 { "auto", auto_help }, /* default guessing scheme */
850 { "none", none_help, just_read_file }, /* regexp matching only */
851 { NULL } /* end of list */
855 static void
856 print_language_names ()
858 language *lang;
859 char **name, **ext;
861 puts ("\nThese are the currently supported languages, along with the\n\
862 default file names and dot suffixes:");
863 for (lang = lang_names; lang->name != NULL; lang++)
865 printf (" %-*s", 10, lang->name);
866 if (lang->filenames != NULL)
867 for (name = lang->filenames; *name != NULL; name++)
868 printf (" %s", *name);
869 if (lang->suffixes != NULL)
870 for (ext = lang->suffixes; *ext != NULL; ext++)
871 printf (" .%s", *ext);
872 puts ("");
874 puts ("where `auto' means use default language for files based on file\n\
875 name suffix, and `none' means only do regexp processing on files.\n\
876 If no language is specified and no matching suffix is found,\n\
877 the first line of the file is read for a sharp-bang (#!) sequence\n\
878 followed by the name of an interpreter. If no such sequence is found,\n\
879 Fortran is tried first; if no tags are found, C is tried next.\n\
880 When parsing any C file, a \"class\" or \"template\" keyword\n\
881 switches to C++.");
882 puts ("Compressed files are supported using gzip and bzip2.\n\
884 For detailed help on a given language use, for example,\n\
885 etags --help --lang=ada.");
888 #ifndef EMACS_NAME
889 # define EMACS_NAME "standalone"
890 #endif
891 #ifndef VERSION
892 # define VERSION "17.38.1.4"
893 #endif
894 static void
895 print_version ()
897 /* Makes it easier to update automatically. */
898 char emacs_copyright[] = "Copyright (C) 2010 Free Software Foundation, Inc.";
900 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
901 puts (emacs_copyright);
902 puts ("This program is distributed under the terms in ETAGS.README");
904 exit (EXIT_SUCCESS);
907 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
908 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
909 #endif
911 static void
912 print_help (argbuffer)
913 argument *argbuffer;
915 bool help_for_lang = FALSE;
917 for (; argbuffer->arg_type != at_end; argbuffer++)
918 if (argbuffer->arg_type == at_language)
920 if (help_for_lang)
921 puts ("");
922 puts (argbuffer->lang->help);
923 help_for_lang = TRUE;
926 if (help_for_lang)
927 exit (EXIT_SUCCESS);
929 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
931 These are the options accepted by %s.\n", progname, progname);
932 if (NO_LONG_OPTIONS)
933 puts ("WARNING: long option names do not work with this executable,\n\
934 as it is not linked with GNU getopt.");
935 else
936 puts ("You may use unambiguous abbreviations for the long option names.");
937 puts (" A - as file name means read names from stdin (one per line).\n\
938 Absolute names are stored in the output file as they are.\n\
939 Relative ones are stored relative to the output file's directory.\n");
941 puts ("-a, --append\n\
942 Append tag entries to existing tags file.");
944 puts ("--packages-only\n\
945 For Ada files, only generate tags for packages.");
947 if (CTAGS)
948 puts ("-B, --backward-search\n\
949 Write the search commands for the tag entries using '?', the\n\
950 backward-search command instead of '/', the forward-search command.");
952 /* This option is mostly obsolete, because etags can now automatically
953 detect C++. Retained for backward compatibility and for debugging and
954 experimentation. In principle, we could want to tag as C++ even
955 before any "class" or "template" keyword.
956 puts ("-C, --c++\n\
957 Treat files whose name suffix defaults to C language as C++ files.");
960 puts ("--declarations\n\
961 In C and derived languages, create tags for function declarations,");
962 if (CTAGS)
963 puts ("\tand create tags for extern variables if --globals is used.");
964 else
965 puts
966 ("\tand create tags for extern variables unless --no-globals is used.");
968 if (CTAGS)
969 puts ("-d, --defines\n\
970 Create tag entries for C #define constants and enum constants, too.");
971 else
972 puts ("-D, --no-defines\n\
973 Don't create tag entries for C #define constants and enum constants.\n\
974 This makes the tags file smaller.");
976 if (!CTAGS)
977 puts ("-i FILE, --include=FILE\n\
978 Include a note in tag file indicating that, when searching for\n\
979 a tag, one should also consult the tags file FILE after\n\
980 checking the current file.");
982 puts ("-l LANG, --language=LANG\n\
983 Force the following files to be considered as written in the\n\
984 named language up to the next --language=LANG option.");
986 if (CTAGS)
987 puts ("--globals\n\
988 Create tag entries for global variables in some languages.");
989 else
990 puts ("--no-globals\n\
991 Do not create tag entries for global variables in some\n\
992 languages. This makes the tags file smaller.");
994 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
995 puts ("--no-line-directive\n\
996 Ignore #line preprocessor directives in C and derived languages.");
998 if (CTAGS)
999 puts ("--members\n\
1000 Create tag entries for members of structures in some languages.");
1001 else
1002 puts ("--no-members\n\
1003 Do not create tag entries for members of structures\n\
1004 in some languages.");
1006 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1007 Make a tag for each line matching a regular expression pattern\n\
1008 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1009 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1010 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1011 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1012 puts (" If TAGNAME/ is present, the tags created are named.\n\
1013 For example Tcl named tags can be created with:\n\
1014 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1015 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1016 `m' means to allow multi-line matches, `s' implies `m' and\n\
1017 causes dot to match any character, including newline.");
1019 puts ("-R, --no-regex\n\
1020 Don't create tags from regexps for the following files.");
1022 puts ("-I, --ignore-indentation\n\
1023 In C and C++ do not assume that a closing brace in the first\n\
1024 column is the final brace of a function or structure definition.");
1026 puts ("-o FILE, --output=FILE\n\
1027 Write the tags to FILE.");
1029 puts ("--parse-stdin=NAME\n\
1030 Read from standard input and record tags as belonging to file NAME.");
1032 if (CTAGS)
1034 puts ("-t, --typedefs\n\
1035 Generate tag entries for C and Ada typedefs.");
1036 puts ("-T, --typedefs-and-c++\n\
1037 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1038 and C++ member functions.");
1041 if (CTAGS)
1042 puts ("-u, --update\n\
1043 Update the tag entries for the given files, leaving tag\n\
1044 entries for other files in place. Currently, this is\n\
1045 implemented by deleting the existing entries for the given\n\
1046 files and then rewriting the new entries at the end of the\n\
1047 tags file. It is often faster to simply rebuild the entire\n\
1048 tag file than to use this.");
1050 if (CTAGS)
1052 puts ("-v, --vgrind\n\
1053 Print on the standard output an index of items intended for\n\
1054 human consumption, similar to the output of vgrind. The index\n\
1055 is sorted, and gives the page number of each item.");
1057 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1058 puts ("-w, --no-duplicates\n\
1059 Do not create duplicate tag entries, for compatibility with\n\
1060 traditional ctags.");
1062 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1063 puts ("-w, --no-warn\n\
1064 Suppress warning messages about duplicate tag entries.");
1066 puts ("-x, --cxref\n\
1067 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1068 The output uses line numbers instead of page numbers, but\n\
1069 beyond that the differences are cosmetic; try both to see\n\
1070 which you like.");
1073 puts ("-V, --version\n\
1074 Print the version of the program.\n\
1075 -h, --help\n\
1076 Print this help message.\n\
1077 Followed by one or more `--language' options prints detailed\n\
1078 help about tag generation for the specified languages.");
1080 print_language_names ();
1082 puts ("");
1083 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1085 exit (EXIT_SUCCESS);
1090 main (argc, argv)
1091 int argc;
1092 char *argv[];
1094 int i;
1095 unsigned int nincluded_files;
1096 char **included_files;
1097 argument *argbuffer;
1098 int current_arg, file_count;
1099 linebuffer filename_lb;
1100 bool help_asked = FALSE;
1101 char *optstring;
1102 int opt;
1105 #ifdef DOS_NT
1106 _fmode = O_BINARY; /* all of files are treated as binary files */
1107 #endif /* DOS_NT */
1109 progname = argv[0];
1110 nincluded_files = 0;
1111 included_files = xnew (argc, char *);
1112 current_arg = 0;
1113 file_count = 0;
1115 /* Allocate enough no matter what happens. Overkill, but each one
1116 is small. */
1117 argbuffer = xnew (argc, argument);
1120 * Always find typedefs and structure tags.
1121 * Also default to find macro constants, enum constants, struct
1122 * members and global variables. Do it for both etags and ctags.
1124 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1125 globals = members = TRUE;
1127 /* When the optstring begins with a '-' getopt_long does not rearrange the
1128 non-options arguments to be at the end, but leaves them alone. */
1129 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1130 "ac:Cf:Il:o:r:RSVhH",
1131 (CTAGS) ? "BxdtTuvw" : "Di:");
1133 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1134 switch (opt)
1136 case 0:
1137 /* If getopt returns 0, then it has already processed a
1138 long-named option. We should do nothing. */
1139 break;
1141 case 1:
1142 /* This means that a file name has been seen. Record it. */
1143 argbuffer[current_arg].arg_type = at_filename;
1144 argbuffer[current_arg].what = optarg;
1145 ++current_arg;
1146 ++file_count;
1147 break;
1149 case STDIN:
1150 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1151 argbuffer[current_arg].arg_type = at_stdin;
1152 argbuffer[current_arg].what = optarg;
1153 ++current_arg;
1154 ++file_count;
1155 if (parsing_stdin)
1156 fatal ("cannot parse standard input more than once", (char *)NULL);
1157 parsing_stdin = TRUE;
1158 break;
1160 /* Common options. */
1161 case 'a': append_to_tagfile = TRUE; break;
1162 case 'C': cplusplus = TRUE; break;
1163 case 'f': /* for compatibility with old makefiles */
1164 case 'o':
1165 if (tagfile)
1167 error ("-o option may only be given once.", (char *)NULL);
1168 suggest_asking_for_help ();
1169 /* NOTREACHED */
1171 tagfile = optarg;
1172 break;
1173 case 'I':
1174 case 'S': /* for backward compatibility */
1175 ignoreindent = TRUE;
1176 break;
1177 case 'l':
1179 language *lang = get_language_from_langname (optarg);
1180 if (lang != NULL)
1182 argbuffer[current_arg].lang = lang;
1183 argbuffer[current_arg].arg_type = at_language;
1184 ++current_arg;
1187 break;
1188 case 'c':
1189 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1190 optarg = concat (optarg, "i", ""); /* memory leak here */
1191 /* FALLTHRU */
1192 case 'r':
1193 argbuffer[current_arg].arg_type = at_regexp;
1194 argbuffer[current_arg].what = optarg;
1195 ++current_arg;
1196 break;
1197 case 'R':
1198 argbuffer[current_arg].arg_type = at_regexp;
1199 argbuffer[current_arg].what = NULL;
1200 ++current_arg;
1201 break;
1202 case 'V':
1203 print_version ();
1204 break;
1205 case 'h':
1206 case 'H':
1207 help_asked = TRUE;
1208 break;
1210 /* Etags options */
1211 case 'D': constantypedefs = FALSE; break;
1212 case 'i': included_files[nincluded_files++] = optarg; break;
1214 /* Ctags options. */
1215 case 'B': searchar = '?'; break;
1216 case 'd': constantypedefs = TRUE; break;
1217 case 't': typedefs = TRUE; break;
1218 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1219 case 'u': update = TRUE; break;
1220 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1221 case 'x': cxref_style = TRUE; break;
1222 case 'w': no_warnings = TRUE; break;
1223 default:
1224 suggest_asking_for_help ();
1225 /* NOTREACHED */
1228 /* No more options. Store the rest of arguments. */
1229 for (; optind < argc; optind++)
1231 argbuffer[current_arg].arg_type = at_filename;
1232 argbuffer[current_arg].what = argv[optind];
1233 ++current_arg;
1234 ++file_count;
1237 argbuffer[current_arg].arg_type = at_end;
1239 if (help_asked)
1240 print_help (argbuffer);
1241 /* NOTREACHED */
1243 if (nincluded_files == 0 && file_count == 0)
1245 error ("no input files specified.", (char *)NULL);
1246 suggest_asking_for_help ();
1247 /* NOTREACHED */
1250 if (tagfile == NULL)
1251 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1252 cwd = etags_getcwd (); /* the current working directory */
1253 if (cwd[strlen (cwd) - 1] != '/')
1255 char *oldcwd = cwd;
1256 cwd = concat (oldcwd, "/", "");
1257 free (oldcwd);
1260 /* Compute base directory for relative file names. */
1261 if (streq (tagfile, "-")
1262 || strneq (tagfile, "/dev/", 5))
1263 tagfiledir = cwd; /* relative file names are relative to cwd */
1264 else
1266 canonicalize_filename (tagfile);
1267 tagfiledir = absolute_dirname (tagfile, cwd);
1270 init (); /* set up boolean "functions" */
1272 linebuffer_init (&lb);
1273 linebuffer_init (&filename_lb);
1274 linebuffer_init (&filebuf);
1275 linebuffer_init (&token_name);
1277 if (!CTAGS)
1279 if (streq (tagfile, "-"))
1281 tagf = stdout;
1282 #ifdef DOS_NT
1283 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1284 doesn't take effect until after `stdout' is already open). */
1285 if (!isatty (fileno (stdout)))
1286 setmode (fileno (stdout), O_BINARY);
1287 #endif /* DOS_NT */
1289 else
1290 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1291 if (tagf == NULL)
1292 pfatal (tagfile);
1296 * Loop through files finding functions.
1298 for (i = 0; i < current_arg; i++)
1300 static language *lang; /* non-NULL if language is forced */
1301 char *this_file;
1303 switch (argbuffer[i].arg_type)
1305 case at_language:
1306 lang = argbuffer[i].lang;
1307 break;
1308 case at_regexp:
1309 analyse_regex (argbuffer[i].what);
1310 break;
1311 case at_filename:
1312 this_file = argbuffer[i].what;
1313 /* Input file named "-" means read file names from stdin
1314 (one per line) and use them. */
1315 if (streq (this_file, "-"))
1317 if (parsing_stdin)
1318 fatal ("cannot parse standard input AND read file names from it",
1319 (char *)NULL);
1320 while (readline_internal (&filename_lb, stdin) > 0)
1321 process_file_name (filename_lb.buffer, lang);
1323 else
1324 process_file_name (this_file, lang);
1325 break;
1326 case at_stdin:
1327 this_file = argbuffer[i].what;
1328 process_file (stdin, this_file, lang);
1329 break;
1333 free_regexps ();
1334 free (lb.buffer);
1335 free (filebuf.buffer);
1336 free (token_name.buffer);
1338 if (!CTAGS || cxref_style)
1340 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1341 put_entries (nodehead);
1342 free_tree (nodehead);
1343 nodehead = NULL;
1344 if (!CTAGS)
1346 fdesc *fdp;
1348 /* Output file entries that have no tags. */
1349 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1350 if (!fdp->written)
1351 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1353 while (nincluded_files-- > 0)
1354 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1356 if (fclose (tagf) == EOF)
1357 pfatal (tagfile);
1360 exit (EXIT_SUCCESS);
1363 /* From here on, we are in (CTAGS && !cxref_style) */
1364 if (update)
1366 char cmd[BUFSIZ];
1367 for (i = 0; i < current_arg; ++i)
1369 switch (argbuffer[i].arg_type)
1371 case at_filename:
1372 case at_stdin:
1373 break;
1374 default:
1375 continue; /* the for loop */
1377 sprintf (cmd,
1378 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1379 tagfile, argbuffer[i].what, tagfile);
1380 if (system (cmd) != EXIT_SUCCESS)
1381 fatal ("failed to execute shell command", (char *)NULL);
1383 append_to_tagfile = TRUE;
1386 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1387 if (tagf == NULL)
1388 pfatal (tagfile);
1389 put_entries (nodehead); /* write all the tags (CTAGS) */
1390 free_tree (nodehead);
1391 nodehead = NULL;
1392 if (fclose (tagf) == EOF)
1393 pfatal (tagfile);
1395 if (CTAGS)
1396 if (append_to_tagfile || update)
1398 char cmd[2*BUFSIZ+20];
1399 /* Maybe these should be used:
1400 setenv ("LC_COLLATE", "C", 1);
1401 setenv ("LC_ALL", "C", 1); */
1402 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1403 exit (system (cmd));
1405 return EXIT_SUCCESS;
1410 * Return a compressor given the file name. If EXTPTR is non-zero,
1411 * return a pointer into FILE where the compressor-specific
1412 * extension begins. If no compressor is found, NULL is returned
1413 * and EXTPTR is not significant.
1414 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1416 static compressor *
1417 get_compressor_from_suffix (file, extptr)
1418 char *file;
1419 char **extptr;
1421 compressor *compr;
1422 char *slash, *suffix;
1424 /* File has been processed by canonicalize_filename,
1425 so we don't need to consider backslashes on DOS_NT. */
1426 slash = etags_strrchr (file, '/');
1427 suffix = etags_strrchr (file, '.');
1428 if (suffix == NULL || suffix < slash)
1429 return NULL;
1430 if (extptr != NULL)
1431 *extptr = suffix;
1432 suffix += 1;
1433 /* Let those poor souls who live with DOS 8+3 file name limits get
1434 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1435 Only the first do loop is run if not MSDOS */
1438 for (compr = compressors; compr->suffix != NULL; compr++)
1439 if (streq (compr->suffix, suffix))
1440 return compr;
1441 if (!MSDOS)
1442 break; /* do it only once: not really a loop */
1443 if (extptr != NULL)
1444 *extptr = ++suffix;
1445 } while (*suffix != '\0');
1446 return NULL;
1452 * Return a language given the name.
1454 static language *
1455 get_language_from_langname (name)
1456 const char *name;
1458 language *lang;
1460 if (name == NULL)
1461 error ("empty language name", (char *)NULL);
1462 else
1464 for (lang = lang_names; lang->name != NULL; lang++)
1465 if (streq (name, lang->name))
1466 return lang;
1467 error ("unknown language \"%s\"", name);
1470 return NULL;
1475 * Return a language given the interpreter name.
1477 static language *
1478 get_language_from_interpreter (interpreter)
1479 char *interpreter;
1481 language *lang;
1482 char **iname;
1484 if (interpreter == NULL)
1485 return NULL;
1486 for (lang = lang_names; lang->name != NULL; lang++)
1487 if (lang->interpreters != NULL)
1488 for (iname = lang->interpreters; *iname != NULL; iname++)
1489 if (streq (*iname, interpreter))
1490 return lang;
1492 return NULL;
1498 * Return a language given the file name.
1500 static language *
1501 get_language_from_filename (file, case_sensitive)
1502 char *file;
1503 bool case_sensitive;
1505 language *lang;
1506 char **name, **ext, *suffix;
1508 /* Try whole file name first. */
1509 for (lang = lang_names; lang->name != NULL; lang++)
1510 if (lang->filenames != NULL)
1511 for (name = lang->filenames; *name != NULL; name++)
1512 if ((case_sensitive)
1513 ? streq (*name, file)
1514 : strcaseeq (*name, file))
1515 return lang;
1517 /* If not found, try suffix after last dot. */
1518 suffix = etags_strrchr (file, '.');
1519 if (suffix == NULL)
1520 return NULL;
1521 suffix += 1;
1522 for (lang = lang_names; lang->name != NULL; lang++)
1523 if (lang->suffixes != NULL)
1524 for (ext = lang->suffixes; *ext != NULL; ext++)
1525 if ((case_sensitive)
1526 ? streq (*ext, suffix)
1527 : strcaseeq (*ext, suffix))
1528 return lang;
1529 return NULL;
1534 * This routine is called on each file argument.
1536 static void
1537 process_file_name (file, lang)
1538 char *file;
1539 language *lang;
1541 struct stat stat_buf;
1542 FILE *inf;
1543 fdesc *fdp;
1544 compressor *compr;
1545 char *compressed_name, *uncompressed_name;
1546 char *ext, *real_name;
1547 int retval;
1549 canonicalize_filename (file);
1550 if (streq (file, tagfile) && !streq (tagfile, "-"))
1552 error ("skipping inclusion of %s in self.", file);
1553 return;
1555 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1557 compressed_name = NULL;
1558 real_name = uncompressed_name = savestr (file);
1560 else
1562 real_name = compressed_name = savestr (file);
1563 uncompressed_name = savenstr (file, ext - file);
1566 /* If the canonicalized uncompressed name
1567 has already been dealt with, skip it silently. */
1568 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1570 assert (fdp->infname != NULL);
1571 if (streq (uncompressed_name, fdp->infname))
1572 goto cleanup;
1575 if (stat (real_name, &stat_buf) != 0)
1577 /* Reset real_name and try with a different name. */
1578 real_name = NULL;
1579 if (compressed_name != NULL) /* try with the given suffix */
1581 if (stat (uncompressed_name, &stat_buf) == 0)
1582 real_name = uncompressed_name;
1584 else /* try all possible suffixes */
1586 for (compr = compressors; compr->suffix != NULL; compr++)
1588 compressed_name = concat (file, ".", compr->suffix);
1589 if (stat (compressed_name, &stat_buf) != 0)
1591 if (MSDOS)
1593 char *suf = compressed_name + strlen (file);
1594 size_t suflen = strlen (compr->suffix) + 1;
1595 for ( ; suf[1]; suf++, suflen--)
1597 memmove (suf, suf + 1, suflen);
1598 if (stat (compressed_name, &stat_buf) == 0)
1600 real_name = compressed_name;
1601 break;
1604 if (real_name != NULL)
1605 break;
1606 } /* MSDOS */
1607 free (compressed_name);
1608 compressed_name = NULL;
1610 else
1612 real_name = compressed_name;
1613 break;
1617 if (real_name == NULL)
1619 perror (file);
1620 goto cleanup;
1622 } /* try with a different name */
1624 if (!S_ISREG (stat_buf.st_mode))
1626 error ("skipping %s: it is not a regular file.", real_name);
1627 goto cleanup;
1629 if (real_name == compressed_name)
1631 char *cmd = concat (compr->command, " ", real_name);
1632 inf = (FILE *) popen (cmd, "r");
1633 free (cmd);
1635 else
1636 inf = fopen (real_name, "r");
1637 if (inf == NULL)
1639 perror (real_name);
1640 goto cleanup;
1643 process_file (inf, uncompressed_name, lang);
1645 if (real_name == compressed_name)
1646 retval = pclose (inf);
1647 else
1648 retval = fclose (inf);
1649 if (retval < 0)
1650 pfatal (file);
1652 cleanup:
1653 free (compressed_name);
1654 free (uncompressed_name);
1655 last_node = NULL;
1656 curfdp = NULL;
1657 return;
1660 static void
1661 process_file (fh, fn, lang)
1662 FILE *fh;
1663 char *fn;
1664 language *lang;
1666 static const fdesc emptyfdesc;
1667 fdesc *fdp;
1669 /* Create a new input file description entry. */
1670 fdp = xnew (1, fdesc);
1671 *fdp = emptyfdesc;
1672 fdp->next = fdhead;
1673 fdp->infname = savestr (fn);
1674 fdp->lang = lang;
1675 fdp->infabsname = absolute_filename (fn, cwd);
1676 fdp->infabsdir = absolute_dirname (fn, cwd);
1677 if (filename_is_absolute (fn))
1679 /* An absolute file name. Canonicalize it. */
1680 fdp->taggedfname = absolute_filename (fn, NULL);
1682 else
1684 /* A file name relative to cwd. Make it relative
1685 to the directory of the tags file. */
1686 fdp->taggedfname = relative_filename (fn, tagfiledir);
1688 fdp->usecharno = TRUE; /* use char position when making tags */
1689 fdp->prop = NULL;
1690 fdp->written = FALSE; /* not written on tags file yet */
1692 fdhead = fdp;
1693 curfdp = fdhead; /* the current file description */
1695 find_entries (fh);
1697 /* If not Ctags, and if this is not metasource and if it contained no #line
1698 directives, we can write the tags and free all nodes pointing to
1699 curfdp. */
1700 if (!CTAGS
1701 && curfdp->usecharno /* no #line directives in this file */
1702 && !curfdp->lang->metasource)
1704 node *np, *prev;
1706 /* Look for the head of the sublist relative to this file. See add_node
1707 for the structure of the node tree. */
1708 prev = NULL;
1709 for (np = nodehead; np != NULL; prev = np, np = np->left)
1710 if (np->fdp == curfdp)
1711 break;
1713 /* If we generated tags for this file, write and delete them. */
1714 if (np != NULL)
1716 /* This is the head of the last sublist, if any. The following
1717 instructions depend on this being true. */
1718 assert (np->left == NULL);
1720 assert (fdhead == curfdp);
1721 assert (last_node->fdp == curfdp);
1722 put_entries (np); /* write tags for file curfdp->taggedfname */
1723 free_tree (np); /* remove the written nodes */
1724 if (prev == NULL)
1725 nodehead = NULL; /* no nodes left */
1726 else
1727 prev->left = NULL; /* delete the pointer to the sublist */
1733 * This routine sets up the boolean pseudo-functions which work
1734 * by setting boolean flags dependent upon the corresponding character.
1735 * Every char which is NOT in that string is not a white char. Therefore,
1736 * all of the array "_wht" is set to FALSE, and then the elements
1737 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1738 * of a char is TRUE if it is the string "white", else FALSE.
1740 static void
1741 init ()
1743 register char *sp;
1744 register int i;
1746 for (i = 0; i < CHARS; i++)
1747 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1748 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1749 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1750 notinname('\0') = notinname('\n');
1751 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1752 begtoken('\0') = begtoken('\n');
1753 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1754 intoken('\0') = intoken('\n');
1755 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1756 endtoken('\0') = endtoken('\n');
1760 * This routine opens the specified file and calls the function
1761 * which finds the function and type definitions.
1763 static void
1764 find_entries (inf)
1765 FILE *inf;
1767 char *cp;
1768 language *lang = curfdp->lang;
1769 Lang_function *parser = NULL;
1771 /* If user specified a language, use it. */
1772 if (lang != NULL && lang->function != NULL)
1774 parser = lang->function;
1777 /* Else try to guess the language given the file name. */
1778 if (parser == NULL)
1780 lang = get_language_from_filename (curfdp->infname, TRUE);
1781 if (lang != NULL && lang->function != NULL)
1783 curfdp->lang = lang;
1784 parser = lang->function;
1788 /* Else look for sharp-bang as the first two characters. */
1789 if (parser == NULL
1790 && readline_internal (&lb, inf) > 0
1791 && lb.len >= 2
1792 && lb.buffer[0] == '#'
1793 && lb.buffer[1] == '!')
1795 char *lp;
1797 /* Set lp to point at the first char after the last slash in the
1798 line or, if no slashes, at the first nonblank. Then set cp to
1799 the first successive blank and terminate the string. */
1800 lp = etags_strrchr (lb.buffer+2, '/');
1801 if (lp != NULL)
1802 lp += 1;
1803 else
1804 lp = skip_spaces (lb.buffer + 2);
1805 cp = skip_non_spaces (lp);
1806 *cp = '\0';
1808 if (strlen (lp) > 0)
1810 lang = get_language_from_interpreter (lp);
1811 if (lang != NULL && lang->function != NULL)
1813 curfdp->lang = lang;
1814 parser = lang->function;
1819 /* We rewind here, even if inf may be a pipe. We fail if the
1820 length of the first line is longer than the pipe block size,
1821 which is unlikely. */
1822 rewind (inf);
1824 /* Else try to guess the language given the case insensitive file name. */
1825 if (parser == NULL)
1827 lang = get_language_from_filename (curfdp->infname, FALSE);
1828 if (lang != NULL && lang->function != NULL)
1830 curfdp->lang = lang;
1831 parser = lang->function;
1835 /* Else try Fortran or C. */
1836 if (parser == NULL)
1838 node *old_last_node = last_node;
1840 curfdp->lang = get_language_from_langname ("fortran");
1841 find_entries (inf);
1843 if (old_last_node == last_node)
1844 /* No Fortran entries found. Try C. */
1846 /* We do not tag if rewind fails.
1847 Only the file name will be recorded in the tags file. */
1848 rewind (inf);
1849 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1850 find_entries (inf);
1852 return;
1855 if (!no_line_directive
1856 && curfdp->lang != NULL && curfdp->lang->metasource)
1857 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1858 file, or anyway we parsed a file that is automatically generated from
1859 this one. If this is the case, the bingo.c file contained #line
1860 directives that generated tags pointing to this file. Let's delete
1861 them all before parsing this file, which is the real source. */
1863 fdesc **fdpp = &fdhead;
1864 while (*fdpp != NULL)
1865 if (*fdpp != curfdp
1866 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1867 /* We found one of those! We must delete both the file description
1868 and all tags referring to it. */
1870 fdesc *badfdp = *fdpp;
1872 /* Delete the tags referring to badfdp->taggedfname
1873 that were obtained from badfdp->infname. */
1874 invalidate_nodes (badfdp, &nodehead);
1876 *fdpp = badfdp->next; /* remove the bad description from the list */
1877 free_fdesc (badfdp);
1879 else
1880 fdpp = &(*fdpp)->next; /* advance the list pointer */
1883 assert (parser != NULL);
1885 /* Generic initialisations before reading from file. */
1886 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1888 /* Generic initialisations before parsing file with readline. */
1889 lineno = 0; /* reset global line number */
1890 charno = 0; /* reset global char number */
1891 linecharno = 0; /* reset global char number of line start */
1893 parser (inf);
1895 regex_tag_multiline ();
1900 * Check whether an implicitly named tag should be created,
1901 * then call `pfnote'.
1902 * NAME is a string that is internally copied by this function.
1904 * TAGS format specification
1905 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1906 * The following is explained in some more detail in etc/ETAGS.EBNF.
1908 * make_tag creates tags with "implicit tag names" (unnamed tags)
1909 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1910 * 1. NAME does not contain any of the characters in NONAM;
1911 * 2. LINESTART contains name as either a rightmost, or rightmost but
1912 * one character, substring;
1913 * 3. the character, if any, immediately before NAME in LINESTART must
1914 * be a character in NONAM;
1915 * 4. the character, if any, immediately after NAME in LINESTART must
1916 * also be a character in NONAM.
1918 * The implementation uses the notinname() macro, which recognises the
1919 * characters stored in the string `nonam'.
1920 * etags.el needs to use the same characters that are in NONAM.
1922 static void
1923 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1924 char *name; /* tag name, or NULL if unnamed */
1925 int namelen; /* tag length */
1926 bool is_func; /* tag is a function */
1927 char *linestart; /* start of the line where tag is */
1928 int linelen; /* length of the line where tag is */
1929 int lno; /* line number */
1930 long cno; /* character number */
1932 bool named = (name != NULL && namelen > 0);
1934 if (!CTAGS && named) /* maybe set named to false */
1935 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1936 such that etags.el can guess a name from it. */
1938 int i;
1939 register char *cp = name;
1941 for (i = 0; i < namelen; i++)
1942 if (notinname (*cp++))
1943 break;
1944 if (i == namelen) /* rule #1 */
1946 cp = linestart + linelen - namelen;
1947 if (notinname (linestart[linelen-1]))
1948 cp -= 1; /* rule #4 */
1949 if (cp >= linestart /* rule #2 */
1950 && (cp == linestart
1951 || notinname (cp[-1])) /* rule #3 */
1952 && strneq (name, cp, namelen)) /* rule #2 */
1953 named = FALSE; /* use implicit tag name */
1957 if (named)
1958 name = savenstr (name, namelen);
1959 else
1960 name = NULL;
1961 pfnote (name, is_func, linestart, linelen, lno, cno);
1964 /* Record a tag. */
1965 static void
1966 pfnote (name, is_func, linestart, linelen, lno, cno)
1967 char *name; /* tag name, or NULL if unnamed */
1968 bool is_func; /* tag is a function */
1969 char *linestart; /* start of the line where tag is */
1970 int linelen; /* length of the line where tag is */
1971 int lno; /* line number */
1972 long cno; /* character number */
1974 register node *np;
1976 assert (name == NULL || name[0] != '\0');
1977 if (CTAGS && name == NULL)
1978 return;
1980 np = xnew (1, node);
1982 /* If ctags mode, change name "main" to M<thisfilename>. */
1983 if (CTAGS && !cxref_style && streq (name, "main"))
1985 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1986 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1987 fp = etags_strrchr (np->name, '.');
1988 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1989 fp[0] = '\0';
1991 else
1992 np->name = name;
1993 np->valid = TRUE;
1994 np->been_warned = FALSE;
1995 np->fdp = curfdp;
1996 np->is_func = is_func;
1997 np->lno = lno;
1998 if (np->fdp->usecharno)
1999 /* Our char numbers are 0-base, because of C language tradition?
2000 ctags compatibility? old versions compatibility? I don't know.
2001 Anyway, since emacs's are 1-base we expect etags.el to take care
2002 of the difference. If we wanted to have 1-based numbers, we would
2003 uncomment the +1 below. */
2004 np->cno = cno /* + 1 */ ;
2005 else
2006 np->cno = invalidcharno;
2007 np->left = np->right = NULL;
2008 if (CTAGS && !cxref_style)
2010 if (strlen (linestart) < 50)
2011 np->regex = concat (linestart, "$", "");
2012 else
2013 np->regex = savenstr (linestart, 50);
2015 else
2016 np->regex = savenstr (linestart, linelen);
2018 add_node (np, &nodehead);
2022 * free_tree ()
2023 * recurse on left children, iterate on right children.
2025 static void
2026 free_tree (np)
2027 register node *np;
2029 while (np)
2031 register node *node_right = np->right;
2032 free_tree (np->left);
2033 free (np->name);
2034 free (np->regex);
2035 free (np);
2036 np = node_right;
2041 * free_fdesc ()
2042 * delete a file description
2044 static void
2045 free_fdesc (fdp)
2046 register fdesc *fdp;
2048 free (fdp->infname);
2049 free (fdp->infabsname);
2050 free (fdp->infabsdir);
2051 free (fdp->taggedfname);
2052 free (fdp->prop);
2053 free (fdp);
2057 * add_node ()
2058 * Adds a node to the tree of nodes. In etags mode, sort by file
2059 * name. In ctags mode, sort by tag name. Make no attempt at
2060 * balancing.
2062 * add_node is the only function allowed to add nodes, so it can
2063 * maintain state.
2065 static void
2066 add_node (np, cur_node_p)
2067 node *np, **cur_node_p;
2069 register int dif;
2070 register node *cur_node = *cur_node_p;
2072 if (cur_node == NULL)
2074 *cur_node_p = np;
2075 last_node = np;
2076 return;
2079 if (!CTAGS)
2080 /* Etags Mode */
2082 /* For each file name, tags are in a linked sublist on the right
2083 pointer. The first tags of different files are a linked list
2084 on the left pointer. last_node points to the end of the last
2085 used sublist. */
2086 if (last_node != NULL && last_node->fdp == np->fdp)
2088 /* Let's use the same sublist as the last added node. */
2089 assert (last_node->right == NULL);
2090 last_node->right = np;
2091 last_node = np;
2093 else if (cur_node->fdp == np->fdp)
2095 /* Scanning the list we found the head of a sublist which is
2096 good for us. Let's scan this sublist. */
2097 add_node (np, &cur_node->right);
2099 else
2100 /* The head of this sublist is not good for us. Let's try the
2101 next one. */
2102 add_node (np, &cur_node->left);
2103 } /* if ETAGS mode */
2105 else
2107 /* Ctags Mode */
2108 dif = strcmp (np->name, cur_node->name);
2111 * If this tag name matches an existing one, then
2112 * do not add the node, but maybe print a warning.
2114 if (no_duplicates && !dif)
2116 if (np->fdp == cur_node->fdp)
2118 if (!no_warnings)
2120 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2121 np->fdp->infname, lineno, np->name);
2122 fprintf (stderr, "Second entry ignored\n");
2125 else if (!cur_node->been_warned && !no_warnings)
2127 fprintf
2128 (stderr,
2129 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2130 np->fdp->infname, cur_node->fdp->infname, np->name);
2131 cur_node->been_warned = TRUE;
2133 return;
2136 /* Actually add the node */
2137 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2138 } /* if CTAGS mode */
2142 * invalidate_nodes ()
2143 * Scan the node tree and invalidate all nodes pointing to the
2144 * given file description (CTAGS case) or free them (ETAGS case).
2146 static void
2147 invalidate_nodes (badfdp, npp)
2148 fdesc *badfdp;
2149 node **npp;
2151 node *np = *npp;
2153 if (np == NULL)
2154 return;
2156 if (CTAGS)
2158 if (np->left != NULL)
2159 invalidate_nodes (badfdp, &np->left);
2160 if (np->fdp == badfdp)
2161 np->valid = FALSE;
2162 if (np->right != NULL)
2163 invalidate_nodes (badfdp, &np->right);
2165 else
2167 assert (np->fdp != NULL);
2168 if (np->fdp == badfdp)
2170 *npp = np->left; /* detach the sublist from the list */
2171 np->left = NULL; /* isolate it */
2172 free_tree (np); /* free it */
2173 invalidate_nodes (badfdp, npp);
2175 else
2176 invalidate_nodes (badfdp, &np->left);
2181 static int total_size_of_entries __P((node *));
2182 static int number_len __P((long));
2184 /* Length of a non-negative number's decimal representation. */
2185 static int
2186 number_len (num)
2187 long num;
2189 int len = 1;
2190 while ((num /= 10) > 0)
2191 len += 1;
2192 return len;
2196 * Return total number of characters that put_entries will output for
2197 * the nodes in the linked list at the right of the specified node.
2198 * This count is irrelevant with etags.el since emacs 19.34 at least,
2199 * but is still supplied for backward compatibility.
2201 static int
2202 total_size_of_entries (np)
2203 register node *np;
2205 register int total = 0;
2207 for (; np != NULL; np = np->right)
2208 if (np->valid)
2210 total += strlen (np->regex) + 1; /* pat\177 */
2211 if (np->name != NULL)
2212 total += strlen (np->name) + 1; /* name\001 */
2213 total += number_len ((long) np->lno) + 1; /* lno, */
2214 if (np->cno != invalidcharno) /* cno */
2215 total += number_len (np->cno);
2216 total += 1; /* newline */
2219 return total;
2222 static void
2223 put_entries (np)
2224 register node *np;
2226 register char *sp;
2227 static fdesc *fdp = NULL;
2229 if (np == NULL)
2230 return;
2232 /* Output subentries that precede this one */
2233 if (CTAGS)
2234 put_entries (np->left);
2236 /* Output this entry */
2237 if (np->valid)
2239 if (!CTAGS)
2241 /* Etags mode */
2242 if (fdp != np->fdp)
2244 fdp = np->fdp;
2245 fprintf (tagf, "\f\n%s,%d\n",
2246 fdp->taggedfname, total_size_of_entries (np));
2247 fdp->written = TRUE;
2249 fputs (np->regex, tagf);
2250 fputc ('\177', tagf);
2251 if (np->name != NULL)
2253 fputs (np->name, tagf);
2254 fputc ('\001', tagf);
2256 fprintf (tagf, "%d,", np->lno);
2257 if (np->cno != invalidcharno)
2258 fprintf (tagf, "%ld", np->cno);
2259 fputs ("\n", tagf);
2261 else
2263 /* Ctags mode */
2264 if (np->name == NULL)
2265 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2267 if (cxref_style)
2269 if (vgrind_style)
2270 fprintf (stdout, "%s %s %d\n",
2271 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2272 else
2273 fprintf (stdout, "%-16s %3d %-16s %s\n",
2274 np->name, np->lno, np->fdp->taggedfname, np->regex);
2276 else
2278 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2280 if (np->is_func)
2281 { /* function or #define macro with args */
2282 putc (searchar, tagf);
2283 putc ('^', tagf);
2285 for (sp = np->regex; *sp; sp++)
2287 if (*sp == '\\' || *sp == searchar)
2288 putc ('\\', tagf);
2289 putc (*sp, tagf);
2291 putc (searchar, tagf);
2293 else
2294 { /* anything else; text pattern inadequate */
2295 fprintf (tagf, "%d", np->lno);
2297 putc ('\n', tagf);
2300 } /* if this node contains a valid tag */
2302 /* Output subentries that follow this one */
2303 put_entries (np->right);
2304 if (!CTAGS)
2305 put_entries (np->left);
2309 /* C extensions. */
2310 #define C_EXT 0x00fff /* C extensions */
2311 #define C_PLAIN 0x00000 /* C */
2312 #define C_PLPL 0x00001 /* C++ */
2313 #define C_STAR 0x00003 /* C* */
2314 #define C_JAVA 0x00005 /* JAVA */
2315 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2316 #define YACC 0x10000 /* yacc file */
2319 * The C symbol tables.
2321 enum sym_type
2323 st_none,
2324 st_C_objprot, st_C_objimpl, st_C_objend,
2325 st_C_gnumacro,
2326 st_C_ignore, st_C_attribute,
2327 st_C_javastruct,
2328 st_C_operator,
2329 st_C_class, st_C_template,
2330 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2333 static unsigned int hash __P((const char *, unsigned int));
2334 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2335 static enum sym_type C_symtype __P((char *, int, int));
2337 /* Feed stuff between (but not including) %[ and %] lines to:
2338 gperf -m 5
2340 %compare-strncmp
2341 %enum
2342 %struct-type
2343 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2345 if, 0, st_C_ignore
2346 for, 0, st_C_ignore
2347 while, 0, st_C_ignore
2348 switch, 0, st_C_ignore
2349 return, 0, st_C_ignore
2350 __attribute__, 0, st_C_attribute
2351 GTY, 0, st_C_attribute
2352 @interface, 0, st_C_objprot
2353 @protocol, 0, st_C_objprot
2354 @implementation,0, st_C_objimpl
2355 @end, 0, st_C_objend
2356 import, (C_JAVA & ~C_PLPL), st_C_ignore
2357 package, (C_JAVA & ~C_PLPL), st_C_ignore
2358 friend, C_PLPL, st_C_ignore
2359 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2360 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2361 interface, (C_JAVA & ~C_PLPL), st_C_struct
2362 class, 0, st_C_class
2363 namespace, C_PLPL, st_C_struct
2364 domain, C_STAR, st_C_struct
2365 union, 0, st_C_struct
2366 struct, 0, st_C_struct
2367 extern, 0, st_C_extern
2368 enum, 0, st_C_enum
2369 typedef, 0, st_C_typedef
2370 define, 0, st_C_define
2371 undef, 0, st_C_define
2372 operator, C_PLPL, st_C_operator
2373 template, 0, st_C_template
2374 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2375 DEFUN, 0, st_C_gnumacro
2376 SYSCALL, 0, st_C_gnumacro
2377 ENTRY, 0, st_C_gnumacro
2378 PSEUDO, 0, st_C_gnumacro
2379 # These are defined inside C functions, so currently they are not met.
2380 # EXFUN used in glibc, DEFVAR_* in emacs.
2381 #EXFUN, 0, st_C_gnumacro
2382 #DEFVAR_, 0, st_C_gnumacro
2384 and replace lines between %< and %> with its output, then:
2385 - remove the #if characterset check
2386 - make in_word_set static and not inline. */
2387 /*%<*/
2388 /* C code produced by gperf version 3.0.1 */
2389 /* Command-line: gperf -m 5 */
2390 /* Computed positions: -k'2-3' */
2392 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2393 /* maximum key range = 33, duplicates = 0 */
2395 #ifdef __GNUC__
2396 __inline
2397 #else
2398 #ifdef __cplusplus
2399 inline
2400 #endif
2401 #endif
2402 static unsigned int
2403 hash (str, len)
2404 register const char *str;
2405 register unsigned int len;
2407 static unsigned char asso_values[] =
2409 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2410 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2411 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2412 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2413 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2414 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2415 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2416 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2417 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2418 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2419 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2420 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2421 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2422 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2423 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2424 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2425 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2426 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2427 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2428 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2429 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2430 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2431 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2432 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2433 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2434 35, 35, 35, 35, 35, 35
2436 register int hval = len;
2438 switch (hval)
2440 default:
2441 hval += asso_values[(unsigned char)str[2]];
2442 /*FALLTHROUGH*/
2443 case 2:
2444 hval += asso_values[(unsigned char)str[1]];
2445 break;
2447 return hval;
2450 static struct C_stab_entry *
2451 in_word_set (str, len)
2452 register const char *str;
2453 register unsigned int len;
2455 enum
2457 TOTAL_KEYWORDS = 33,
2458 MIN_WORD_LENGTH = 2,
2459 MAX_WORD_LENGTH = 15,
2460 MIN_HASH_VALUE = 2,
2461 MAX_HASH_VALUE = 34
2464 static struct C_stab_entry wordlist[] =
2466 {""}, {""},
2467 {"if", 0, st_C_ignore},
2468 {"GTY", 0, st_C_attribute},
2469 {"@end", 0, st_C_objend},
2470 {"union", 0, st_C_struct},
2471 {"define", 0, st_C_define},
2472 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2473 {"template", 0, st_C_template},
2474 {"operator", C_PLPL, st_C_operator},
2475 {"@interface", 0, st_C_objprot},
2476 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2477 {"friend", C_PLPL, st_C_ignore},
2478 {"typedef", 0, st_C_typedef},
2479 {"return", 0, st_C_ignore},
2480 {"@implementation",0, st_C_objimpl},
2481 {"@protocol", 0, st_C_objprot},
2482 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2483 {"extern", 0, st_C_extern},
2484 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2485 {"struct", 0, st_C_struct},
2486 {"domain", C_STAR, st_C_struct},
2487 {"switch", 0, st_C_ignore},
2488 {"enum", 0, st_C_enum},
2489 {"for", 0, st_C_ignore},
2490 {"namespace", C_PLPL, st_C_struct},
2491 {"class", 0, st_C_class},
2492 {"while", 0, st_C_ignore},
2493 {"undef", 0, st_C_define},
2494 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2495 {"__attribute__", 0, st_C_attribute},
2496 {"SYSCALL", 0, st_C_gnumacro},
2497 {"ENTRY", 0, st_C_gnumacro},
2498 {"PSEUDO", 0, st_C_gnumacro},
2499 {"DEFUN", 0, st_C_gnumacro}
2502 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2504 register int key = hash (str, len);
2506 if (key <= MAX_HASH_VALUE && key >= 0)
2508 register const char *s = wordlist[key].name;
2510 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2511 return &wordlist[key];
2514 return 0;
2516 /*%>*/
2518 static enum sym_type
2519 C_symtype (str, len, c_ext)
2520 char *str;
2521 int len;
2522 int c_ext;
2524 register struct C_stab_entry *se = in_word_set (str, len);
2526 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2527 return st_none;
2528 return se->type;
2533 * Ignoring __attribute__ ((list))
2535 static bool inattribute; /* looking at an __attribute__ construct */
2538 * C functions and variables are recognized using a simple
2539 * finite automaton. fvdef is its state variable.
2541 static enum
2543 fvnone, /* nothing seen */
2544 fdefunkey, /* Emacs DEFUN keyword seen */
2545 fdefunname, /* Emacs DEFUN name seen */
2546 foperator, /* func: operator keyword seen (cplpl) */
2547 fvnameseen, /* function or variable name seen */
2548 fstartlist, /* func: just after open parenthesis */
2549 finlist, /* func: in parameter list */
2550 flistseen, /* func: after parameter list */
2551 fignore, /* func: before open brace */
2552 vignore /* var-like: ignore until ';' */
2553 } fvdef;
2555 static bool fvextern; /* func or var: extern keyword seen; */
2558 * typedefs are recognized using a simple finite automaton.
2559 * typdef is its state variable.
2561 static enum
2563 tnone, /* nothing seen */
2564 tkeyseen, /* typedef keyword seen */
2565 ttypeseen, /* defined type seen */
2566 tinbody, /* inside typedef body */
2567 tend, /* just before typedef tag */
2568 tignore /* junk after typedef tag */
2569 } typdef;
2572 * struct-like structures (enum, struct and union) are recognized
2573 * using another simple finite automaton. `structdef' is its state
2574 * variable.
2576 static enum
2578 snone, /* nothing seen yet,
2579 or in struct body if bracelev > 0 */
2580 skeyseen, /* struct-like keyword seen */
2581 stagseen, /* struct-like tag seen */
2582 scolonseen /* colon seen after struct-like tag */
2583 } structdef;
2586 * When objdef is different from onone, objtag is the name of the class.
2588 static char *objtag = "<uninited>";
2591 * Yet another little state machine to deal with preprocessor lines.
2593 static enum
2595 dnone, /* nothing seen */
2596 dsharpseen, /* '#' seen as first char on line */
2597 ddefineseen, /* '#' and 'define' seen */
2598 dignorerest /* ignore rest of line */
2599 } definedef;
2602 * State machine for Objective C protocols and implementations.
2603 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2605 static enum
2607 onone, /* nothing seen */
2608 oprotocol, /* @interface or @protocol seen */
2609 oimplementation, /* @implementations seen */
2610 otagseen, /* class name seen */
2611 oparenseen, /* parenthesis before category seen */
2612 ocatseen, /* category name seen */
2613 oinbody, /* in @implementation body */
2614 omethodsign, /* in @implementation body, after +/- */
2615 omethodtag, /* after method name */
2616 omethodcolon, /* after method colon */
2617 omethodparm, /* after method parameter */
2618 oignore /* wait for @end */
2619 } objdef;
2623 * Use this structure to keep info about the token read, and how it
2624 * should be tagged. Used by the make_C_tag function to build a tag.
2626 static struct tok
2628 char *line; /* string containing the token */
2629 int offset; /* where the token starts in LINE */
2630 int length; /* token length */
2632 The previous members can be used to pass strings around for generic
2633 purposes. The following ones specifically refer to creating tags. In this
2634 case the token contained here is the pattern that will be used to create a
2635 tag.
2637 bool valid; /* do not create a tag; the token should be
2638 invalidated whenever a state machine is
2639 reset prematurely */
2640 bool named; /* create a named tag */
2641 int lineno; /* source line number of tag */
2642 long linepos; /* source char number of tag */
2643 } token; /* latest token read */
2646 * Variables and functions for dealing with nested structures.
2647 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2649 static void pushclass_above __P((int, char *, int));
2650 static void popclass_above __P((int));
2651 static void write_classname __P((linebuffer *, char *qualifier));
2653 static struct {
2654 char **cname; /* nested class names */
2655 int *bracelev; /* nested class brace level */
2656 int nl; /* class nesting level (elements used) */
2657 int size; /* length of the array */
2658 } cstack; /* stack for nested declaration tags */
2659 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2660 #define nestlev (cstack.nl)
2661 /* After struct keyword or in struct body, not inside a nested function. */
2662 #define instruct (structdef == snone && nestlev > 0 \
2663 && bracelev == cstack.bracelev[nestlev-1] + 1)
2665 static void
2666 pushclass_above (bracelev, str, len)
2667 int bracelev;
2668 char *str;
2669 int len;
2671 int nl;
2673 popclass_above (bracelev);
2674 nl = cstack.nl;
2675 if (nl >= cstack.size)
2677 int size = cstack.size *= 2;
2678 xrnew (cstack.cname, size, char *);
2679 xrnew (cstack.bracelev, size, int);
2681 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2682 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2683 cstack.bracelev[nl] = bracelev;
2684 cstack.nl = nl + 1;
2687 static void
2688 popclass_above (bracelev)
2689 int bracelev;
2691 int nl;
2693 for (nl = cstack.nl - 1;
2694 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2695 nl--)
2697 free (cstack.cname[nl]);
2698 cstack.nl = nl;
2702 static void
2703 write_classname (cn, qualifier)
2704 linebuffer *cn;
2705 char *qualifier;
2707 int i, len;
2708 int qlen = strlen (qualifier);
2710 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2712 len = 0;
2713 cn->len = 0;
2714 cn->buffer[0] = '\0';
2716 else
2718 len = strlen (cstack.cname[0]);
2719 linebuffer_setlen (cn, len);
2720 strcpy (cn->buffer, cstack.cname[0]);
2722 for (i = 1; i < cstack.nl; i++)
2724 char *s;
2725 int slen;
2727 s = cstack.cname[i];
2728 if (s == NULL)
2729 continue;
2730 slen = strlen (s);
2731 len += slen + qlen;
2732 linebuffer_setlen (cn, len);
2733 strncat (cn->buffer, qualifier, qlen);
2734 strncat (cn->buffer, s, slen);
2739 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2740 static void make_C_tag __P((bool));
2743 * consider_token ()
2744 * checks to see if the current token is at the start of a
2745 * function or variable, or corresponds to a typedef, or
2746 * is a struct/union/enum tag, or #define, or an enum constant.
2748 * *IS_FUNC gets TRUE if the token is a function or #define macro
2749 * with args. C_EXTP points to which language we are looking at.
2751 * Globals
2752 * fvdef IN OUT
2753 * structdef IN OUT
2754 * definedef IN OUT
2755 * typdef IN OUT
2756 * objdef IN OUT
2759 static bool
2760 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2761 register char *str; /* IN: token pointer */
2762 register int len; /* IN: token length */
2763 register int c; /* IN: first char after the token */
2764 int *c_extp; /* IN, OUT: C extensions mask */
2765 int bracelev; /* IN: brace level */
2766 int parlev; /* IN: parenthesis level */
2767 bool *is_func_or_var; /* OUT: function or variable found */
2769 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2770 structtype is the type of the preceding struct-like keyword, and
2771 structbracelev is the brace level where it has been seen. */
2772 static enum sym_type structtype;
2773 static int structbracelev;
2774 static enum sym_type toktype;
2777 toktype = C_symtype (str, len, *c_extp);
2780 * Skip __attribute__
2782 if (toktype == st_C_attribute)
2784 inattribute = TRUE;
2785 return FALSE;
2789 * Advance the definedef state machine.
2791 switch (definedef)
2793 case dnone:
2794 /* We're not on a preprocessor line. */
2795 if (toktype == st_C_gnumacro)
2797 fvdef = fdefunkey;
2798 return FALSE;
2800 break;
2801 case dsharpseen:
2802 if (toktype == st_C_define)
2804 definedef = ddefineseen;
2806 else
2808 definedef = dignorerest;
2810 return FALSE;
2811 case ddefineseen:
2813 * Make a tag for any macro, unless it is a constant
2814 * and constantypedefs is FALSE.
2816 definedef = dignorerest;
2817 *is_func_or_var = (c == '(');
2818 if (!*is_func_or_var && !constantypedefs)
2819 return FALSE;
2820 else
2821 return TRUE;
2822 case dignorerest:
2823 return FALSE;
2824 default:
2825 error ("internal error: definedef value.", (char *)NULL);
2829 * Now typedefs
2831 switch (typdef)
2833 case tnone:
2834 if (toktype == st_C_typedef)
2836 if (typedefs)
2837 typdef = tkeyseen;
2838 fvextern = FALSE;
2839 fvdef = fvnone;
2840 return FALSE;
2842 break;
2843 case tkeyseen:
2844 switch (toktype)
2846 case st_none:
2847 case st_C_class:
2848 case st_C_struct:
2849 case st_C_enum:
2850 typdef = ttypeseen;
2852 break;
2853 case ttypeseen:
2854 if (structdef == snone && fvdef == fvnone)
2856 fvdef = fvnameseen;
2857 return TRUE;
2859 break;
2860 case tend:
2861 switch (toktype)
2863 case st_C_class:
2864 case st_C_struct:
2865 case st_C_enum:
2866 return FALSE;
2868 return TRUE;
2871 switch (toktype)
2873 case st_C_javastruct:
2874 if (structdef == stagseen)
2875 structdef = scolonseen;
2876 return FALSE;
2877 case st_C_template:
2878 case st_C_class:
2879 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2880 && bracelev == 0
2881 && definedef == dnone && structdef == snone
2882 && typdef == tnone && fvdef == fvnone)
2883 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2884 if (toktype == st_C_template)
2885 break;
2886 /* FALLTHRU */
2887 case st_C_struct:
2888 case st_C_enum:
2889 if (parlev == 0
2890 && fvdef != vignore
2891 && (typdef == tkeyseen
2892 || (typedefs_or_cplusplus && structdef == snone)))
2894 structdef = skeyseen;
2895 structtype = toktype;
2896 structbracelev = bracelev;
2897 if (fvdef == fvnameseen)
2898 fvdef = fvnone;
2900 return FALSE;
2903 if (structdef == skeyseen)
2905 structdef = stagseen;
2906 return TRUE;
2909 if (typdef != tnone)
2910 definedef = dnone;
2912 /* Detect Objective C constructs. */
2913 switch (objdef)
2915 case onone:
2916 switch (toktype)
2918 case st_C_objprot:
2919 objdef = oprotocol;
2920 return FALSE;
2921 case st_C_objimpl:
2922 objdef = oimplementation;
2923 return FALSE;
2925 break;
2926 case oimplementation:
2927 /* Save the class tag for functions or variables defined inside. */
2928 objtag = savenstr (str, len);
2929 objdef = oinbody;
2930 return FALSE;
2931 case oprotocol:
2932 /* Save the class tag for categories. */
2933 objtag = savenstr (str, len);
2934 objdef = otagseen;
2935 *is_func_or_var = TRUE;
2936 return TRUE;
2937 case oparenseen:
2938 objdef = ocatseen;
2939 *is_func_or_var = TRUE;
2940 return TRUE;
2941 case oinbody:
2942 break;
2943 case omethodsign:
2944 if (parlev == 0)
2946 fvdef = fvnone;
2947 objdef = omethodtag;
2948 linebuffer_setlen (&token_name, len);
2949 strncpy (token_name.buffer, str, len);
2950 token_name.buffer[len] = '\0';
2951 return TRUE;
2953 return FALSE;
2954 case omethodcolon:
2955 if (parlev == 0)
2956 objdef = omethodparm;
2957 return FALSE;
2958 case omethodparm:
2959 if (parlev == 0)
2961 fvdef = fvnone;
2962 objdef = omethodtag;
2963 linebuffer_setlen (&token_name, token_name.len + len);
2964 strncat (token_name.buffer, str, len);
2965 return TRUE;
2967 return FALSE;
2968 case oignore:
2969 if (toktype == st_C_objend)
2971 /* Memory leakage here: the string pointed by objtag is
2972 never released, because many tests would be needed to
2973 avoid breaking on incorrect input code. The amount of
2974 memory leaked here is the sum of the lengths of the
2975 class tags.
2976 free (objtag); */
2977 objdef = onone;
2979 return FALSE;
2982 /* A function, variable or enum constant? */
2983 switch (toktype)
2985 case st_C_extern:
2986 fvextern = TRUE;
2987 switch (fvdef)
2989 case finlist:
2990 case flistseen:
2991 case fignore:
2992 case vignore:
2993 break;
2994 default:
2995 fvdef = fvnone;
2997 return FALSE;
2998 case st_C_ignore:
2999 fvextern = FALSE;
3000 fvdef = vignore;
3001 return FALSE;
3002 case st_C_operator:
3003 fvdef = foperator;
3004 *is_func_or_var = TRUE;
3005 return TRUE;
3006 case st_none:
3007 if (constantypedefs
3008 && structdef == snone
3009 && structtype == st_C_enum && bracelev > structbracelev)
3010 return TRUE; /* enum constant */
3011 switch (fvdef)
3013 case fdefunkey:
3014 if (bracelev > 0)
3015 break;
3016 fvdef = fdefunname; /* GNU macro */
3017 *is_func_or_var = TRUE;
3018 return TRUE;
3019 case fvnone:
3020 switch (typdef)
3022 case ttypeseen:
3023 return FALSE;
3024 case tnone:
3025 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3026 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3028 fvdef = vignore;
3029 return FALSE;
3031 break;
3033 /* FALLTHRU */
3034 case fvnameseen:
3035 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3037 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3038 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3039 fvdef = foperator;
3040 *is_func_or_var = TRUE;
3041 return TRUE;
3043 if (bracelev > 0 && !instruct)
3044 break;
3045 fvdef = fvnameseen; /* function or variable */
3046 *is_func_or_var = TRUE;
3047 return TRUE;
3049 break;
3052 return FALSE;
3057 * C_entries often keeps pointers to tokens or lines which are older than
3058 * the line currently read. By keeping two line buffers, and switching
3059 * them at end of line, it is possible to use those pointers.
3061 static struct
3063 long linepos;
3064 linebuffer lb;
3065 } lbs[2];
3067 #define current_lb_is_new (newndx == curndx)
3068 #define switch_line_buffers() (curndx = 1 - curndx)
3070 #define curlb (lbs[curndx].lb)
3071 #define newlb (lbs[newndx].lb)
3072 #define curlinepos (lbs[curndx].linepos)
3073 #define newlinepos (lbs[newndx].linepos)
3075 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3076 #define cplpl (c_ext & C_PLPL)
3077 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3079 #define CNL_SAVE_DEFINEDEF() \
3080 do { \
3081 curlinepos = charno; \
3082 readline (&curlb, inf); \
3083 lp = curlb.buffer; \
3084 quotednl = FALSE; \
3085 newndx = curndx; \
3086 } while (0)
3088 #define CNL() \
3089 do { \
3090 CNL_SAVE_DEFINEDEF(); \
3091 if (savetoken.valid) \
3093 token = savetoken; \
3094 savetoken.valid = FALSE; \
3096 definedef = dnone; \
3097 } while (0)
3100 static void
3101 make_C_tag (isfun)
3102 bool isfun;
3104 /* This function is never called when token.valid is FALSE, but
3105 we must protect against invalid input or internal errors. */
3106 if (token.valid)
3107 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3108 token.offset+token.length+1, token.lineno, token.linepos);
3109 else if (DEBUG)
3110 { /* this branch is optimised away if !DEBUG */
3111 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3112 token_name.len + 17, isfun, token.line,
3113 token.offset+token.length+1, token.lineno, token.linepos);
3114 error ("INVALID TOKEN", NULL);
3117 token.valid = FALSE;
3122 * C_entries ()
3123 * This routine finds functions, variables, typedefs,
3124 * #define's, enum constants and struct/union/enum definitions in
3125 * C syntax and adds them to the list.
3127 static void
3128 C_entries (c_ext, inf)
3129 int c_ext; /* extension of C */
3130 FILE *inf; /* input file */
3132 register char c; /* latest char read; '\0' for end of line */
3133 register char *lp; /* pointer one beyond the character `c' */
3134 int curndx, newndx; /* indices for current and new lb */
3135 register int tokoff; /* offset in line of start of current token */
3136 register int toklen; /* length of current token */
3137 char *qualifier; /* string used to qualify names */
3138 int qlen; /* length of qualifier */
3139 int bracelev; /* current brace level */
3140 int bracketlev; /* current bracket level */
3141 int parlev; /* current parenthesis level */
3142 int attrparlev; /* __attribute__ parenthesis level */
3143 int templatelev; /* current template level */
3144 int typdefbracelev; /* bracelev where a typedef struct body begun */
3145 bool incomm, inquote, inchar, quotednl, midtoken;
3146 bool yacc_rules; /* in the rules part of a yacc file */
3147 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3150 linebuffer_init (&lbs[0].lb);
3151 linebuffer_init (&lbs[1].lb);
3152 if (cstack.size == 0)
3154 cstack.size = (DEBUG) ? 1 : 4;
3155 cstack.nl = 0;
3156 cstack.cname = xnew (cstack.size, char *);
3157 cstack.bracelev = xnew (cstack.size, int);
3160 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3161 curndx = newndx = 0;
3162 lp = curlb.buffer;
3163 *lp = 0;
3165 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3166 structdef = snone; definedef = dnone; objdef = onone;
3167 yacc_rules = FALSE;
3168 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3169 token.valid = savetoken.valid = FALSE;
3170 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3171 if (cjava)
3172 { qualifier = "."; qlen = 1; }
3173 else
3174 { qualifier = "::"; qlen = 2; }
3177 while (!feof (inf))
3179 c = *lp++;
3180 if (c == '\\')
3182 /* If we are at the end of the line, the next character is a
3183 '\0'; do not skip it, because it is what tells us
3184 to read the next line. */
3185 if (*lp == '\0')
3187 quotednl = TRUE;
3188 continue;
3190 lp++;
3191 c = ' ';
3193 else if (incomm)
3195 switch (c)
3197 case '*':
3198 if (*lp == '/')
3200 c = *lp++;
3201 incomm = FALSE;
3203 break;
3204 case '\0':
3205 /* Newlines inside comments do not end macro definitions in
3206 traditional cpp. */
3207 CNL_SAVE_DEFINEDEF ();
3208 break;
3210 continue;
3212 else if (inquote)
3214 switch (c)
3216 case '"':
3217 inquote = FALSE;
3218 break;
3219 case '\0':
3220 /* Newlines inside strings do not end macro definitions
3221 in traditional cpp, even though compilers don't
3222 usually accept them. */
3223 CNL_SAVE_DEFINEDEF ();
3224 break;
3226 continue;
3228 else if (inchar)
3230 switch (c)
3232 case '\0':
3233 /* Hmmm, something went wrong. */
3234 CNL ();
3235 /* FALLTHRU */
3236 case '\'':
3237 inchar = FALSE;
3238 break;
3240 continue;
3242 else if (bracketlev > 0)
3244 switch (c)
3246 case ']':
3247 if (--bracketlev > 0)
3248 continue;
3249 break;
3250 case '\0':
3251 CNL_SAVE_DEFINEDEF ();
3252 break;
3254 continue;
3256 else switch (c)
3258 case '"':
3259 inquote = TRUE;
3260 if (inattribute)
3261 break;
3262 switch (fvdef)
3264 case fdefunkey:
3265 case fstartlist:
3266 case finlist:
3267 case fignore:
3268 case vignore:
3269 break;
3270 default:
3271 fvextern = FALSE;
3272 fvdef = fvnone;
3274 continue;
3275 case '\'':
3276 inchar = TRUE;
3277 if (inattribute)
3278 break;
3279 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3281 fvextern = FALSE;
3282 fvdef = fvnone;
3284 continue;
3285 case '/':
3286 if (*lp == '*')
3288 incomm = TRUE;
3289 lp++;
3290 c = ' ';
3292 else if (/* cplpl && */ *lp == '/')
3294 c = '\0';
3296 break;
3297 case '%':
3298 if ((c_ext & YACC) && *lp == '%')
3300 /* Entering or exiting rules section in yacc file. */
3301 lp++;
3302 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3303 typdef = tnone; structdef = snone;
3304 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3305 bracelev = 0;
3306 yacc_rules = !yacc_rules;
3307 continue;
3309 else
3310 break;
3311 case '#':
3312 if (definedef == dnone)
3314 char *cp;
3315 bool cpptoken = TRUE;
3317 /* Look back on this line. If all blanks, or nonblanks
3318 followed by an end of comment, this is a preprocessor
3319 token. */
3320 for (cp = newlb.buffer; cp < lp-1; cp++)
3321 if (!iswhite (*cp))
3323 if (*cp == '*' && *(cp+1) == '/')
3325 cp++;
3326 cpptoken = TRUE;
3328 else
3329 cpptoken = FALSE;
3331 if (cpptoken)
3332 definedef = dsharpseen;
3333 } /* if (definedef == dnone) */
3334 continue;
3335 case '[':
3336 bracketlev++;
3337 continue;
3338 } /* switch (c) */
3341 /* Consider token only if some involved conditions are satisfied. */
3342 if (typdef != tignore
3343 && definedef != dignorerest
3344 && fvdef != finlist
3345 && templatelev == 0
3346 && (definedef != dnone
3347 || structdef != scolonseen)
3348 && !inattribute)
3350 if (midtoken)
3352 if (endtoken (c))
3354 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3355 /* This handles :: in the middle,
3356 but not at the beginning of an identifier.
3357 Also, space-separated :: is not recognised. */
3359 if (c_ext & C_AUTO) /* automatic detection of C++ */
3360 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3361 lp += 2;
3362 toklen += 2;
3363 c = lp[-1];
3364 goto still_in_token;
3366 else
3368 bool funorvar = FALSE;
3370 if (yacc_rules
3371 || consider_token (newlb.buffer + tokoff, toklen, c,
3372 &c_ext, bracelev, parlev,
3373 &funorvar))
3375 if (fvdef == foperator)
3377 char *oldlp = lp;
3378 lp = skip_spaces (lp-1);
3379 if (*lp != '\0')
3380 lp += 1;
3381 while (*lp != '\0'
3382 && !iswhite (*lp) && *lp != '(')
3383 lp += 1;
3384 c = *lp++;
3385 toklen += lp - oldlp;
3387 token.named = FALSE;
3388 if (!plainc
3389 && nestlev > 0 && definedef == dnone)
3390 /* in struct body */
3392 write_classname (&token_name, qualifier);
3393 linebuffer_setlen (&token_name,
3394 token_name.len+qlen+toklen);
3395 strcat (token_name.buffer, qualifier);
3396 strncat (token_name.buffer,
3397 newlb.buffer + tokoff, toklen);
3398 token.named = TRUE;
3400 else if (objdef == ocatseen)
3401 /* Objective C category */
3403 int len = strlen (objtag) + 2 + toklen;
3404 linebuffer_setlen (&token_name, len);
3405 strcpy (token_name.buffer, objtag);
3406 strcat (token_name.buffer, "(");
3407 strncat (token_name.buffer,
3408 newlb.buffer + tokoff, toklen);
3409 strcat (token_name.buffer, ")");
3410 token.named = TRUE;
3412 else if (objdef == omethodtag
3413 || objdef == omethodparm)
3414 /* Objective C method */
3416 token.named = TRUE;
3418 else if (fvdef == fdefunname)
3419 /* GNU DEFUN and similar macros */
3421 bool defun = (newlb.buffer[tokoff] == 'F');
3422 int off = tokoff;
3423 int len = toklen;
3425 /* Rewrite the tag so that emacs lisp DEFUNs
3426 can be found by their elisp name */
3427 if (defun)
3429 off += 1;
3430 len -= 1;
3432 linebuffer_setlen (&token_name, len);
3433 strncpy (token_name.buffer,
3434 newlb.buffer + off, len);
3435 token_name.buffer[len] = '\0';
3436 if (defun)
3437 while (--len >= 0)
3438 if (token_name.buffer[len] == '_')
3439 token_name.buffer[len] = '-';
3440 token.named = defun;
3442 else
3444 linebuffer_setlen (&token_name, toklen);
3445 strncpy (token_name.buffer,
3446 newlb.buffer + tokoff, toklen);
3447 token_name.buffer[toklen] = '\0';
3448 /* Name macros and members. */
3449 token.named = (structdef == stagseen
3450 || typdef == ttypeseen
3451 || typdef == tend
3452 || (funorvar
3453 && definedef == dignorerest)
3454 || (funorvar
3455 && definedef == dnone
3456 && structdef == snone
3457 && bracelev > 0));
3459 token.lineno = lineno;
3460 token.offset = tokoff;
3461 token.length = toklen;
3462 token.line = newlb.buffer;
3463 token.linepos = newlinepos;
3464 token.valid = TRUE;
3466 if (definedef == dnone
3467 && (fvdef == fvnameseen
3468 || fvdef == foperator
3469 || structdef == stagseen
3470 || typdef == tend
3471 || typdef == ttypeseen
3472 || objdef != onone))
3474 if (current_lb_is_new)
3475 switch_line_buffers ();
3477 else if (definedef != dnone
3478 || fvdef == fdefunname
3479 || instruct)
3480 make_C_tag (funorvar);
3482 else /* not yacc and consider_token failed */
3484 if (inattribute && fvdef == fignore)
3486 /* We have just met __attribute__ after a
3487 function parameter list: do not tag the
3488 function again. */
3489 fvdef = fvnone;
3492 midtoken = FALSE;
3494 } /* if (endtoken (c)) */
3495 else if (intoken (c))
3496 still_in_token:
3498 toklen++;
3499 continue;
3501 } /* if (midtoken) */
3502 else if (begtoken (c))
3504 switch (definedef)
3506 case dnone:
3507 switch (fvdef)
3509 case fstartlist:
3510 /* This prevents tagging fb in
3511 void (__attribute__((noreturn)) *fb) (void);
3512 Fixing this is not easy and not very important. */
3513 fvdef = finlist;
3514 continue;
3515 case flistseen:
3516 if (plainc || declarations)
3518 make_C_tag (TRUE); /* a function */
3519 fvdef = fignore;
3521 break;
3523 if (structdef == stagseen && !cjava)
3525 popclass_above (bracelev);
3526 structdef = snone;
3528 break;
3529 case dsharpseen:
3530 savetoken = token;
3531 break;
3533 if (!yacc_rules || lp == newlb.buffer + 1)
3535 tokoff = lp - 1 - newlb.buffer;
3536 toklen = 1;
3537 midtoken = TRUE;
3539 continue;
3540 } /* if (begtoken) */
3541 } /* if must look at token */
3544 /* Detect end of line, colon, comma, semicolon and various braces
3545 after having handled a token.*/
3546 switch (c)
3548 case ':':
3549 if (inattribute)
3550 break;
3551 if (yacc_rules && token.offset == 0 && token.valid)
3553 make_C_tag (FALSE); /* a yacc function */
3554 break;
3556 if (definedef != dnone)
3557 break;
3558 switch (objdef)
3560 case otagseen:
3561 objdef = oignore;
3562 make_C_tag (TRUE); /* an Objective C class */
3563 break;
3564 case omethodtag:
3565 case omethodparm:
3566 objdef = omethodcolon;
3567 linebuffer_setlen (&token_name, token_name.len + 1);
3568 strcat (token_name.buffer, ":");
3569 break;
3571 if (structdef == stagseen)
3573 structdef = scolonseen;
3574 break;
3576 /* Should be useless, but may be work as a safety net. */
3577 if (cplpl && fvdef == flistseen)
3579 make_C_tag (TRUE); /* a function */
3580 fvdef = fignore;
3581 break;
3583 break;
3584 case ';':
3585 if (definedef != dnone || inattribute)
3586 break;
3587 switch (typdef)
3589 case tend:
3590 case ttypeseen:
3591 make_C_tag (FALSE); /* a typedef */
3592 typdef = tnone;
3593 fvdef = fvnone;
3594 break;
3595 case tnone:
3596 case tinbody:
3597 case tignore:
3598 switch (fvdef)
3600 case fignore:
3601 if (typdef == tignore || cplpl)
3602 fvdef = fvnone;
3603 break;
3604 case fvnameseen:
3605 if ((globals && bracelev == 0 && (!fvextern || declarations))
3606 || (members && instruct))
3607 make_C_tag (FALSE); /* a variable */
3608 fvextern = FALSE;
3609 fvdef = fvnone;
3610 token.valid = FALSE;
3611 break;
3612 case flistseen:
3613 if ((declarations
3614 && (cplpl || !instruct)
3615 && (typdef == tnone || (typdef != tignore && instruct)))
3616 || (members
3617 && plainc && instruct))
3618 make_C_tag (TRUE); /* a function */
3619 /* FALLTHRU */
3620 default:
3621 fvextern = FALSE;
3622 fvdef = fvnone;
3623 if (declarations
3624 && cplpl && structdef == stagseen)
3625 make_C_tag (FALSE); /* forward declaration */
3626 else
3627 token.valid = FALSE;
3628 } /* switch (fvdef) */
3629 /* FALLTHRU */
3630 default:
3631 if (!instruct)
3632 typdef = tnone;
3634 if (structdef == stagseen)
3635 structdef = snone;
3636 break;
3637 case ',':
3638 if (definedef != dnone || inattribute)
3639 break;
3640 switch (objdef)
3642 case omethodtag:
3643 case omethodparm:
3644 make_C_tag (TRUE); /* an Objective C method */
3645 objdef = oinbody;
3646 break;
3648 switch (fvdef)
3650 case fdefunkey:
3651 case foperator:
3652 case fstartlist:
3653 case finlist:
3654 case fignore:
3655 case vignore:
3656 break;
3657 case fdefunname:
3658 fvdef = fignore;
3659 break;
3660 case fvnameseen:
3661 if (parlev == 0
3662 && ((globals
3663 && bracelev == 0
3664 && templatelev == 0
3665 && (!fvextern || declarations))
3666 || (members && instruct)))
3667 make_C_tag (FALSE); /* a variable */
3668 break;
3669 case flistseen:
3670 if ((declarations && typdef == tnone && !instruct)
3671 || (members && typdef != tignore && instruct))
3673 make_C_tag (TRUE); /* a function */
3674 fvdef = fvnameseen;
3676 else if (!declarations)
3677 fvdef = fvnone;
3678 token.valid = FALSE;
3679 break;
3680 default:
3681 fvdef = fvnone;
3683 if (structdef == stagseen)
3684 structdef = snone;
3685 break;
3686 case ']':
3687 if (definedef != dnone || inattribute)
3688 break;
3689 if (structdef == stagseen)
3690 structdef = snone;
3691 switch (typdef)
3693 case ttypeseen:
3694 case tend:
3695 typdef = tignore;
3696 make_C_tag (FALSE); /* a typedef */
3697 break;
3698 case tnone:
3699 case tinbody:
3700 switch (fvdef)
3702 case foperator:
3703 case finlist:
3704 case fignore:
3705 case vignore:
3706 break;
3707 case fvnameseen:
3708 if ((members && bracelev == 1)
3709 || (globals && bracelev == 0
3710 && (!fvextern || declarations)))
3711 make_C_tag (FALSE); /* a variable */
3712 /* FALLTHRU */
3713 default:
3714 fvdef = fvnone;
3716 break;
3718 break;
3719 case '(':
3720 if (inattribute)
3722 attrparlev++;
3723 break;
3725 if (definedef != dnone)
3726 break;
3727 if (objdef == otagseen && parlev == 0)
3728 objdef = oparenseen;
3729 switch (fvdef)
3731 case fvnameseen:
3732 if (typdef == ttypeseen
3733 && *lp != '*'
3734 && !instruct)
3736 /* This handles constructs like:
3737 typedef void OperatorFun (int fun); */
3738 make_C_tag (FALSE);
3739 typdef = tignore;
3740 fvdef = fignore;
3741 break;
3743 /* FALLTHRU */
3744 case foperator:
3745 fvdef = fstartlist;
3746 break;
3747 case flistseen:
3748 fvdef = finlist;
3749 break;
3751 parlev++;
3752 break;
3753 case ')':
3754 if (inattribute)
3756 if (--attrparlev == 0)
3757 inattribute = FALSE;
3758 break;
3760 if (definedef != dnone)
3761 break;
3762 if (objdef == ocatseen && parlev == 1)
3764 make_C_tag (TRUE); /* an Objective C category */
3765 objdef = oignore;
3767 if (--parlev == 0)
3769 switch (fvdef)
3771 case fstartlist:
3772 case finlist:
3773 fvdef = flistseen;
3774 break;
3776 if (!instruct
3777 && (typdef == tend
3778 || typdef == ttypeseen))
3780 typdef = tignore;
3781 make_C_tag (FALSE); /* a typedef */
3784 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3785 parlev = 0;
3786 break;
3787 case '{':
3788 if (definedef != dnone)
3789 break;
3790 if (typdef == ttypeseen)
3792 /* Whenever typdef is set to tinbody (currently only
3793 here), typdefbracelev should be set to bracelev. */
3794 typdef = tinbody;
3795 typdefbracelev = bracelev;
3797 switch (fvdef)
3799 case flistseen:
3800 make_C_tag (TRUE); /* a function */
3801 /* FALLTHRU */
3802 case fignore:
3803 fvdef = fvnone;
3804 break;
3805 case fvnone:
3806 switch (objdef)
3808 case otagseen:
3809 make_C_tag (TRUE); /* an Objective C class */
3810 objdef = oignore;
3811 break;
3812 case omethodtag:
3813 case omethodparm:
3814 make_C_tag (TRUE); /* an Objective C method */
3815 objdef = oinbody;
3816 break;
3817 default:
3818 /* Neutralize `extern "C" {' grot. */
3819 if (bracelev == 0 && structdef == snone && nestlev == 0
3820 && typdef == tnone)
3821 bracelev = -1;
3823 break;
3825 switch (structdef)
3827 case skeyseen: /* unnamed struct */
3828 pushclass_above (bracelev, NULL, 0);
3829 structdef = snone;
3830 break;
3831 case stagseen: /* named struct or enum */
3832 case scolonseen: /* a class */
3833 pushclass_above (bracelev,token.line+token.offset, token.length);
3834 structdef = snone;
3835 make_C_tag (FALSE); /* a struct or enum */
3836 break;
3838 bracelev += 1;
3839 break;
3840 case '*':
3841 if (definedef != dnone)
3842 break;
3843 if (fvdef == fstartlist)
3845 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3846 token.valid = FALSE;
3848 break;
3849 case '}':
3850 if (definedef != dnone)
3851 break;
3852 bracelev -= 1;
3853 if (!ignoreindent && lp == newlb.buffer + 1)
3855 if (bracelev != 0)
3856 token.valid = FALSE; /* unexpected value, token unreliable */
3857 bracelev = 0; /* reset brace level if first column */
3858 parlev = 0; /* also reset paren level, just in case... */
3860 else if (bracelev < 0)
3862 token.valid = FALSE; /* something gone amiss, token unreliable */
3863 bracelev = 0;
3865 if (bracelev == 0 && fvdef == vignore)
3866 fvdef = fvnone; /* end of function */
3867 popclass_above (bracelev);
3868 structdef = snone;
3869 /* Only if typdef == tinbody is typdefbracelev significant. */
3870 if (typdef == tinbody && bracelev <= typdefbracelev)
3872 assert (bracelev == typdefbracelev);
3873 typdef = tend;
3875 break;
3876 case '=':
3877 if (definedef != dnone)
3878 break;
3879 switch (fvdef)
3881 case foperator:
3882 case finlist:
3883 case fignore:
3884 case vignore:
3885 break;
3886 case fvnameseen:
3887 if ((members && bracelev == 1)
3888 || (globals && bracelev == 0 && (!fvextern || declarations)))
3889 make_C_tag (FALSE); /* a variable */
3890 /* FALLTHRU */
3891 default:
3892 fvdef = vignore;
3894 break;
3895 case '<':
3896 if (cplpl
3897 && (structdef == stagseen || fvdef == fvnameseen))
3899 templatelev++;
3900 break;
3902 goto resetfvdef;
3903 case '>':
3904 if (templatelev > 0)
3906 templatelev--;
3907 break;
3909 goto resetfvdef;
3910 case '+':
3911 case '-':
3912 if (objdef == oinbody && bracelev == 0)
3914 objdef = omethodsign;
3915 break;
3917 /* FALLTHRU */
3918 resetfvdef:
3919 case '#': case '~': case '&': case '%': case '/':
3920 case '|': case '^': case '!': case '.': case '?':
3921 if (definedef != dnone)
3922 break;
3923 /* These surely cannot follow a function tag in C. */
3924 switch (fvdef)
3926 case foperator:
3927 case finlist:
3928 case fignore:
3929 case vignore:
3930 break;
3931 default:
3932 fvdef = fvnone;
3934 break;
3935 case '\0':
3936 if (objdef == otagseen)
3938 make_C_tag (TRUE); /* an Objective C class */
3939 objdef = oignore;
3941 /* If a macro spans multiple lines don't reset its state. */
3942 if (quotednl)
3943 CNL_SAVE_DEFINEDEF ();
3944 else
3945 CNL ();
3946 break;
3947 } /* switch (c) */
3949 } /* while not eof */
3951 free (lbs[0].lb.buffer);
3952 free (lbs[1].lb.buffer);
3956 * Process either a C++ file or a C file depending on the setting
3957 * of a global flag.
3959 static void
3960 default_C_entries (inf)
3961 FILE *inf;
3963 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3966 /* Always do plain C. */
3967 static void
3968 plain_C_entries (inf)
3969 FILE *inf;
3971 C_entries (0, inf);
3974 /* Always do C++. */
3975 static void
3976 Cplusplus_entries (inf)
3977 FILE *inf;
3979 C_entries (C_PLPL, inf);
3982 /* Always do Java. */
3983 static void
3984 Cjava_entries (inf)
3985 FILE *inf;
3987 C_entries (C_JAVA, inf);
3990 /* Always do C*. */
3991 static void
3992 Cstar_entries (inf)
3993 FILE *inf;
3995 C_entries (C_STAR, inf);
3998 /* Always do Yacc. */
3999 static void
4000 Yacc_entries (inf)
4001 FILE *inf;
4003 C_entries (YACC, inf);
4007 /* Useful macros. */
4008 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4009 for (; /* loop initialization */ \
4010 !feof (file_pointer) /* loop test */ \
4011 && /* instructions at start of loop */ \
4012 (readline (&line_buffer, file_pointer), \
4013 char_pointer = line_buffer.buffer, \
4014 TRUE); \
4017 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4018 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4019 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4020 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4021 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4023 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4024 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4025 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4026 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4027 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4030 * Read a file, but do no processing. This is used to do regexp
4031 * matching on files that have no language defined.
4033 static void
4034 just_read_file (inf)
4035 FILE *inf;
4037 register char *dummy;
4039 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4040 continue;
4044 /* Fortran parsing */
4046 static void F_takeprec __P((void));
4047 static void F_getit __P((FILE *));
4049 static void
4050 F_takeprec ()
4052 dbp = skip_spaces (dbp);
4053 if (*dbp != '*')
4054 return;
4055 dbp++;
4056 dbp = skip_spaces (dbp);
4057 if (strneq (dbp, "(*)", 3))
4059 dbp += 3;
4060 return;
4062 if (!ISDIGIT (*dbp))
4064 --dbp; /* force failure */
4065 return;
4068 dbp++;
4069 while (ISDIGIT (*dbp));
4072 static void
4073 F_getit (inf)
4074 FILE *inf;
4076 register char *cp;
4078 dbp = skip_spaces (dbp);
4079 if (*dbp == '\0')
4081 readline (&lb, inf);
4082 dbp = lb.buffer;
4083 if (dbp[5] != '&')
4084 return;
4085 dbp += 6;
4086 dbp = skip_spaces (dbp);
4088 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4089 return;
4090 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4091 continue;
4092 make_tag (dbp, cp-dbp, TRUE,
4093 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4097 static void
4098 Fortran_functions (inf)
4099 FILE *inf;
4101 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4103 if (*dbp == '%')
4104 dbp++; /* Ratfor escape to fortran */
4105 dbp = skip_spaces (dbp);
4106 if (*dbp == '\0')
4107 continue;
4109 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4110 dbp = skip_spaces (dbp);
4112 switch (lowcase (*dbp))
4114 case 'i':
4115 if (nocase_tail ("integer"))
4116 F_takeprec ();
4117 break;
4118 case 'r':
4119 if (nocase_tail ("real"))
4120 F_takeprec ();
4121 break;
4122 case 'l':
4123 if (nocase_tail ("logical"))
4124 F_takeprec ();
4125 break;
4126 case 'c':
4127 if (nocase_tail ("complex") || nocase_tail ("character"))
4128 F_takeprec ();
4129 break;
4130 case 'd':
4131 if (nocase_tail ("double"))
4133 dbp = skip_spaces (dbp);
4134 if (*dbp == '\0')
4135 continue;
4136 if (nocase_tail ("precision"))
4137 break;
4138 continue;
4140 break;
4142 dbp = skip_spaces (dbp);
4143 if (*dbp == '\0')
4144 continue;
4145 switch (lowcase (*dbp))
4147 case 'f':
4148 if (nocase_tail ("function"))
4149 F_getit (inf);
4150 continue;
4151 case 's':
4152 if (nocase_tail ("subroutine"))
4153 F_getit (inf);
4154 continue;
4155 case 'e':
4156 if (nocase_tail ("entry"))
4157 F_getit (inf);
4158 continue;
4159 case 'b':
4160 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4162 dbp = skip_spaces (dbp);
4163 if (*dbp == '\0') /* assume un-named */
4164 make_tag ("blockdata", 9, TRUE,
4165 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4166 else
4167 F_getit (inf); /* look for name */
4169 continue;
4176 * Ada parsing
4177 * Original code by
4178 * Philippe Waroquiers (1998)
4181 static void Ada_getit __P((FILE *, char *));
4183 /* Once we are positioned after an "interesting" keyword, let's get
4184 the real tag value necessary. */
4185 static void
4186 Ada_getit (inf, name_qualifier)
4187 FILE *inf;
4188 char *name_qualifier;
4190 register char *cp;
4191 char *name;
4192 char c;
4194 while (!feof (inf))
4196 dbp = skip_spaces (dbp);
4197 if (*dbp == '\0'
4198 || (dbp[0] == '-' && dbp[1] == '-'))
4200 readline (&lb, inf);
4201 dbp = lb.buffer;
4203 switch (lowcase(*dbp))
4205 case 'b':
4206 if (nocase_tail ("body"))
4208 /* Skipping body of procedure body or package body or ....
4209 resetting qualifier to body instead of spec. */
4210 name_qualifier = "/b";
4211 continue;
4213 break;
4214 case 't':
4215 /* Skipping type of task type or protected type ... */
4216 if (nocase_tail ("type"))
4217 continue;
4218 break;
4220 if (*dbp == '"')
4222 dbp += 1;
4223 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4224 continue;
4226 else
4228 dbp = skip_spaces (dbp);
4229 for (cp = dbp;
4230 (*cp != '\0'
4231 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4232 cp++)
4233 continue;
4234 if (cp == dbp)
4235 return;
4237 c = *cp;
4238 *cp = '\0';
4239 name = concat (dbp, name_qualifier, "");
4240 *cp = c;
4241 make_tag (name, strlen (name), TRUE,
4242 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4243 free (name);
4244 if (c == '"')
4245 dbp = cp + 1;
4246 return;
4250 static void
4251 Ada_funcs (inf)
4252 FILE *inf;
4254 bool inquote = FALSE;
4255 bool skip_till_semicolumn = FALSE;
4257 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4259 while (*dbp != '\0')
4261 /* Skip a string i.e. "abcd". */
4262 if (inquote || (*dbp == '"'))
4264 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4265 if (dbp != NULL)
4267 inquote = FALSE;
4268 dbp += 1;
4269 continue; /* advance char */
4271 else
4273 inquote = TRUE;
4274 break; /* advance line */
4278 /* Skip comments. */
4279 if (dbp[0] == '-' && dbp[1] == '-')
4280 break; /* advance line */
4282 /* Skip character enclosed in single quote i.e. 'a'
4283 and skip single quote starting an attribute i.e. 'Image. */
4284 if (*dbp == '\'')
4286 dbp++ ;
4287 if (*dbp != '\0')
4288 dbp++;
4289 continue;
4292 if (skip_till_semicolumn)
4294 if (*dbp == ';')
4295 skip_till_semicolumn = FALSE;
4296 dbp++;
4297 continue; /* advance char */
4300 /* Search for beginning of a token. */
4301 if (!begtoken (*dbp))
4303 dbp++;
4304 continue; /* advance char */
4307 /* We are at the beginning of a token. */
4308 switch (lowcase(*dbp))
4310 case 'f':
4311 if (!packages_only && nocase_tail ("function"))
4312 Ada_getit (inf, "/f");
4313 else
4314 break; /* from switch */
4315 continue; /* advance char */
4316 case 'p':
4317 if (!packages_only && nocase_tail ("procedure"))
4318 Ada_getit (inf, "/p");
4319 else if (nocase_tail ("package"))
4320 Ada_getit (inf, "/s");
4321 else if (nocase_tail ("protected")) /* protected type */
4322 Ada_getit (inf, "/t");
4323 else
4324 break; /* from switch */
4325 continue; /* advance char */
4327 case 'u':
4328 if (typedefs && !packages_only && nocase_tail ("use"))
4330 /* when tagging types, avoid tagging use type Pack.Typename;
4331 for this, we will skip everything till a ; */
4332 skip_till_semicolumn = TRUE;
4333 continue; /* advance char */
4336 case 't':
4337 if (!packages_only && nocase_tail ("task"))
4338 Ada_getit (inf, "/k");
4339 else if (typedefs && !packages_only && nocase_tail ("type"))
4341 Ada_getit (inf, "/t");
4342 while (*dbp != '\0')
4343 dbp += 1;
4345 else
4346 break; /* from switch */
4347 continue; /* advance char */
4350 /* Look for the end of the token. */
4351 while (!endtoken (*dbp))
4352 dbp++;
4354 } /* advance char */
4355 } /* advance line */
4360 * Unix and microcontroller assembly tag handling
4361 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4362 * Idea by Bob Weiner, Motorola Inc. (1994)
4364 static void
4365 Asm_labels (inf)
4366 FILE *inf;
4368 register char *cp;
4370 LOOP_ON_INPUT_LINES (inf, lb, cp)
4372 /* If first char is alphabetic or one of [_.$], test for colon
4373 following identifier. */
4374 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4376 /* Read past label. */
4377 cp++;
4378 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4379 cp++;
4380 if (*cp == ':' || iswhite (*cp))
4381 /* Found end of label, so copy it and add it to the table. */
4382 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4383 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4390 * Perl support
4391 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4392 * Perl variable names: /^(my|local).../
4393 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4394 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4395 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4397 static void
4398 Perl_functions (inf)
4399 FILE *inf;
4401 char *package = savestr ("main"); /* current package name */
4402 register char *cp;
4404 LOOP_ON_INPUT_LINES (inf, lb, cp)
4406 cp = skip_spaces (cp);
4408 if (LOOKING_AT (cp, "package"))
4410 free (package);
4411 get_tag (cp, &package);
4413 else if (LOOKING_AT (cp, "sub"))
4415 char *pos;
4416 char *sp = cp;
4418 while (!notinname (*cp))
4419 cp++;
4420 if (cp == sp)
4421 continue; /* nothing found */
4422 if ((pos = etags_strchr (sp, ':')) != NULL
4423 && pos < cp && pos[1] == ':')
4424 /* The name is already qualified. */
4425 make_tag (sp, cp - sp, TRUE,
4426 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4427 else
4428 /* Qualify it. */
4430 char savechar, *name;
4432 savechar = *cp;
4433 *cp = '\0';
4434 name = concat (package, "::", sp);
4435 *cp = savechar;
4436 make_tag (name, strlen(name), TRUE,
4437 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4438 free (name);
4441 else if (globals) /* only if we are tagging global vars */
4443 /* Skip a qualifier, if any. */
4444 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4445 /* After "my" or "local", but before any following paren or space. */
4446 char *varstart = cp;
4448 if (qual /* should this be removed? If yes, how? */
4449 && (*cp == '$' || *cp == '@' || *cp == '%'))
4451 varstart += 1;
4453 cp++;
4454 while (ISALNUM (*cp) || *cp == '_');
4456 else if (qual)
4458 /* Should be examining a variable list at this point;
4459 could insist on seeing an open parenthesis. */
4460 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4461 cp++;
4463 else
4464 continue;
4466 make_tag (varstart, cp - varstart, FALSE,
4467 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4470 free (package);
4475 * Python support
4476 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4477 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4478 * More ideas by seb bacon <seb@jamkit.com> (2002)
4480 static void
4481 Python_functions (inf)
4482 FILE *inf;
4484 register char *cp;
4486 LOOP_ON_INPUT_LINES (inf, lb, cp)
4488 cp = skip_spaces (cp);
4489 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4491 char *name = cp;
4492 while (!notinname (*cp) && *cp != ':')
4493 cp++;
4494 make_tag (name, cp - name, TRUE,
4495 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4502 * PHP support
4503 * Look for:
4504 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4505 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4506 * - /^[ \t]*define\(\"[^\"]+/
4507 * Only with --members:
4508 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4509 * Idea by Diez B. Roggisch (2001)
4511 static void
4512 PHP_functions (inf)
4513 FILE *inf;
4515 register char *cp, *name;
4516 bool search_identifier = FALSE;
4518 LOOP_ON_INPUT_LINES (inf, lb, cp)
4520 cp = skip_spaces (cp);
4521 name = cp;
4522 if (search_identifier
4523 && *cp != '\0')
4525 while (!notinname (*cp))
4526 cp++;
4527 make_tag (name, cp - name, TRUE,
4528 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4529 search_identifier = FALSE;
4531 else if (LOOKING_AT (cp, "function"))
4533 if(*cp == '&')
4534 cp = skip_spaces (cp+1);
4535 if(*cp != '\0')
4537 name = cp;
4538 while (!notinname (*cp))
4539 cp++;
4540 make_tag (name, cp - name, TRUE,
4541 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4543 else
4544 search_identifier = TRUE;
4546 else if (LOOKING_AT (cp, "class"))
4548 if (*cp != '\0')
4550 name = cp;
4551 while (*cp != '\0' && !iswhite (*cp))
4552 cp++;
4553 make_tag (name, cp - name, FALSE,
4554 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4556 else
4557 search_identifier = TRUE;
4559 else if (strneq (cp, "define", 6)
4560 && (cp = skip_spaces (cp+6))
4561 && *cp++ == '('
4562 && (*cp == '"' || *cp == '\''))
4564 char quote = *cp++;
4565 name = cp;
4566 while (*cp != quote && *cp != '\0')
4567 cp++;
4568 make_tag (name, cp - name, FALSE,
4569 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4571 else if (members
4572 && LOOKING_AT (cp, "var")
4573 && *cp == '$')
4575 name = cp;
4576 while (!notinname(*cp))
4577 cp++;
4578 make_tag (name, cp - name, FALSE,
4579 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4586 * Cobol tag functions
4587 * We could look for anything that could be a paragraph name.
4588 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4589 * Idea by Corny de Souza (1993)
4591 static void
4592 Cobol_paragraphs (inf)
4593 FILE *inf;
4595 register char *bp, *ep;
4597 LOOP_ON_INPUT_LINES (inf, lb, bp)
4599 if (lb.len < 9)
4600 continue;
4601 bp += 8;
4603 /* If eoln, compiler option or comment ignore whole line. */
4604 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4605 continue;
4607 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4608 continue;
4609 if (*ep++ == '.')
4610 make_tag (bp, ep - bp, TRUE,
4611 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4617 * Makefile support
4618 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4620 static void
4621 Makefile_targets (inf)
4622 FILE *inf;
4624 register char *bp;
4626 LOOP_ON_INPUT_LINES (inf, lb, bp)
4628 if (*bp == '\t' || *bp == '#')
4629 continue;
4630 while (*bp != '\0' && *bp != '=' && *bp != ':')
4631 bp++;
4632 if (*bp == ':' || (globals && *bp == '='))
4634 /* We should detect if there is more than one tag, but we do not.
4635 We just skip initial and final spaces. */
4636 char * namestart = skip_spaces (lb.buffer);
4637 while (--bp > namestart)
4638 if (!notinname (*bp))
4639 break;
4640 make_tag (namestart, bp - namestart + 1, TRUE,
4641 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4648 * Pascal parsing
4649 * Original code by Mosur K. Mohan (1989)
4651 * Locates tags for procedures & functions. Doesn't do any type- or
4652 * var-definitions. It does look for the keyword "extern" or
4653 * "forward" immediately following the procedure statement; if found,
4654 * the tag is skipped.
4656 static void
4657 Pascal_functions (inf)
4658 FILE *inf;
4660 linebuffer tline; /* mostly copied from C_entries */
4661 long save_lcno;
4662 int save_lineno, namelen, taglen;
4663 char c, *name;
4665 bool /* each of these flags is TRUE if: */
4666 incomment, /* point is inside a comment */
4667 inquote, /* point is inside '..' string */
4668 get_tagname, /* point is after PROCEDURE/FUNCTION
4669 keyword, so next item = potential tag */
4670 found_tag, /* point is after a potential tag */
4671 inparms, /* point is within parameter-list */
4672 verify_tag; /* point has passed the parm-list, so the
4673 next token will determine whether this
4674 is a FORWARD/EXTERN to be ignored, or
4675 whether it is a real tag */
4677 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4678 name = NULL; /* keep compiler quiet */
4679 dbp = lb.buffer;
4680 *dbp = '\0';
4681 linebuffer_init (&tline);
4683 incomment = inquote = FALSE;
4684 found_tag = FALSE; /* have a proc name; check if extern */
4685 get_tagname = FALSE; /* found "procedure" keyword */
4686 inparms = FALSE; /* found '(' after "proc" */
4687 verify_tag = FALSE; /* check if "extern" is ahead */
4690 while (!feof (inf)) /* long main loop to get next char */
4692 c = *dbp++;
4693 if (c == '\0') /* if end of line */
4695 readline (&lb, inf);
4696 dbp = lb.buffer;
4697 if (*dbp == '\0')
4698 continue;
4699 if (!((found_tag && verify_tag)
4700 || get_tagname))
4701 c = *dbp++; /* only if don't need *dbp pointing
4702 to the beginning of the name of
4703 the procedure or function */
4705 if (incomment)
4707 if (c == '}') /* within { } comments */
4708 incomment = FALSE;
4709 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4711 dbp++;
4712 incomment = FALSE;
4714 continue;
4716 else if (inquote)
4718 if (c == '\'')
4719 inquote = FALSE;
4720 continue;
4722 else
4723 switch (c)
4725 case '\'':
4726 inquote = TRUE; /* found first quote */
4727 continue;
4728 case '{': /* found open { comment */
4729 incomment = TRUE;
4730 continue;
4731 case '(':
4732 if (*dbp == '*') /* found open (* comment */
4734 incomment = TRUE;
4735 dbp++;
4737 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4738 inparms = TRUE;
4739 continue;
4740 case ')': /* end of parms list */
4741 if (inparms)
4742 inparms = FALSE;
4743 continue;
4744 case ';':
4745 if (found_tag && !inparms) /* end of proc or fn stmt */
4747 verify_tag = TRUE;
4748 break;
4750 continue;
4752 if (found_tag && verify_tag && (*dbp != ' '))
4754 /* Check if this is an "extern" declaration. */
4755 if (*dbp == '\0')
4756 continue;
4757 if (lowcase (*dbp == 'e'))
4759 if (nocase_tail ("extern")) /* superfluous, really! */
4761 found_tag = FALSE;
4762 verify_tag = FALSE;
4765 else if (lowcase (*dbp) == 'f')
4767 if (nocase_tail ("forward")) /* check for forward reference */
4769 found_tag = FALSE;
4770 verify_tag = FALSE;
4773 if (found_tag && verify_tag) /* not external proc, so make tag */
4775 found_tag = FALSE;
4776 verify_tag = FALSE;
4777 make_tag (name, namelen, TRUE,
4778 tline.buffer, taglen, save_lineno, save_lcno);
4779 continue;
4782 if (get_tagname) /* grab name of proc or fn */
4784 char *cp;
4786 if (*dbp == '\0')
4787 continue;
4789 /* Find block name. */
4790 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4791 continue;
4793 /* Save all values for later tagging. */
4794 linebuffer_setlen (&tline, lb.len);
4795 strcpy (tline.buffer, lb.buffer);
4796 save_lineno = lineno;
4797 save_lcno = linecharno;
4798 name = tline.buffer + (dbp - lb.buffer);
4799 namelen = cp - dbp;
4800 taglen = cp - lb.buffer + 1;
4802 dbp = cp; /* set dbp to e-o-token */
4803 get_tagname = FALSE;
4804 found_tag = TRUE;
4805 continue;
4807 /* And proceed to check for "extern". */
4809 else if (!incomment && !inquote && !found_tag)
4811 /* Check for proc/fn keywords. */
4812 switch (lowcase (c))
4814 case 'p':
4815 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4816 get_tagname = TRUE;
4817 continue;
4818 case 'f':
4819 if (nocase_tail ("unction"))
4820 get_tagname = TRUE;
4821 continue;
4824 } /* while not eof */
4826 free (tline.buffer);
4831 * Lisp tag functions
4832 * look for (def or (DEF, quote or QUOTE
4835 static void L_getit __P((void));
4837 static void
4838 L_getit ()
4840 if (*dbp == '\'') /* Skip prefix quote */
4841 dbp++;
4842 else if (*dbp == '(')
4844 dbp++;
4845 /* Try to skip "(quote " */
4846 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4847 /* Ok, then skip "(" before name in (defstruct (foo)) */
4848 dbp = skip_spaces (dbp);
4850 get_tag (dbp, NULL);
4853 static void
4854 Lisp_functions (inf)
4855 FILE *inf;
4857 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4859 if (dbp[0] != '(')
4860 continue;
4862 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4864 dbp = skip_non_spaces (dbp);
4865 dbp = skip_spaces (dbp);
4866 L_getit ();
4868 else
4870 /* Check for (foo::defmumble name-defined ... */
4872 dbp++;
4873 while (!notinname (*dbp) && *dbp != ':');
4874 if (*dbp == ':')
4877 dbp++;
4878 while (*dbp == ':');
4880 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4882 dbp = skip_non_spaces (dbp);
4883 dbp = skip_spaces (dbp);
4884 L_getit ();
4893 * Lua script language parsing
4894 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4896 * "function" and "local function" are tags if they start at column 1.
4898 static void
4899 Lua_functions (inf)
4900 FILE *inf;
4902 register char *bp;
4904 LOOP_ON_INPUT_LINES (inf, lb, bp)
4906 if (bp[0] != 'f' && bp[0] != 'l')
4907 continue;
4909 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4911 if (LOOKING_AT (bp, "function"))
4912 get_tag (bp, NULL);
4918 * Postscript tags
4919 * Just look for lines where the first character is '/'
4920 * Also look at "defineps" for PSWrap
4921 * Ideas by:
4922 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4923 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4925 static void
4926 PS_functions (inf)
4927 FILE *inf;
4929 register char *bp, *ep;
4931 LOOP_ON_INPUT_LINES (inf, lb, bp)
4933 if (bp[0] == '/')
4935 for (ep = bp+1;
4936 *ep != '\0' && *ep != ' ' && *ep != '{';
4937 ep++)
4938 continue;
4939 make_tag (bp, ep - bp, TRUE,
4940 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4942 else if (LOOKING_AT (bp, "defineps"))
4943 get_tag (bp, NULL);
4949 * Forth tags
4950 * Ignore anything after \ followed by space or in ( )
4951 * Look for words defined by :
4952 * Look for constant, code, create, defer, value, and variable
4953 * OBP extensions: Look for buffer:, field,
4954 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4956 static void
4957 Forth_words (inf)
4958 FILE *inf;
4960 register char *bp;
4962 LOOP_ON_INPUT_LINES (inf, lb, bp)
4963 while ((bp = skip_spaces (bp))[0] != '\0')
4964 if (bp[0] == '\\' && iswhite(bp[1]))
4965 break; /* read next line */
4966 else if (bp[0] == '(' && iswhite(bp[1]))
4967 do /* skip to ) or eol */
4968 bp++;
4969 while (*bp != ')' && *bp != '\0');
4970 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4971 || LOOKING_AT_NOCASE (bp, "constant")
4972 || LOOKING_AT_NOCASE (bp, "code")
4973 || LOOKING_AT_NOCASE (bp, "create")
4974 || LOOKING_AT_NOCASE (bp, "defer")
4975 || LOOKING_AT_NOCASE (bp, "value")
4976 || LOOKING_AT_NOCASE (bp, "variable")
4977 || LOOKING_AT_NOCASE (bp, "buffer:")
4978 || LOOKING_AT_NOCASE (bp, "field"))
4979 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4980 else
4981 bp = skip_non_spaces (bp);
4986 * Scheme tag functions
4987 * look for (def... xyzzy
4988 * (def... (xyzzy
4989 * (def ... ((...(xyzzy ....
4990 * (set! xyzzy
4991 * Original code by Ken Haase (1985?)
4993 static void
4994 Scheme_functions (inf)
4995 FILE *inf;
4997 register char *bp;
4999 LOOP_ON_INPUT_LINES (inf, lb, bp)
5001 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5003 bp = skip_non_spaces (bp+4);
5004 /* Skip over open parens and white space. Don't continue past
5005 '\0'. */
5006 while (*bp && notinname (*bp))
5007 bp++;
5008 get_tag (bp, NULL);
5010 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5011 get_tag (bp, NULL);
5016 /* Find tags in TeX and LaTeX input files. */
5018 /* TEX_toktab is a table of TeX control sequences that define tags.
5019 * Each entry records one such control sequence.
5021 * Original code from who knows whom.
5022 * Ideas by:
5023 * Stefan Monnier (2002)
5026 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5028 /* Default set of control sequences to put into TEX_toktab.
5029 The value of environment var TEXTAGS is prepended to this. */
5030 static char *TEX_defenv = "\
5031 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5032 :part:appendix:entry:index:def\
5033 :newcommand:renewcommand:newenvironment:renewenvironment";
5035 static void TEX_mode __P((FILE *));
5036 static void TEX_decode_env __P((char *, char *));
5038 static char TEX_esc = '\\';
5039 static char TEX_opgrp = '{';
5040 static char TEX_clgrp = '}';
5043 * TeX/LaTeX scanning loop.
5045 static void
5046 TeX_commands (inf)
5047 FILE *inf;
5049 char *cp;
5050 linebuffer *key;
5052 /* Select either \ or ! as escape character. */
5053 TEX_mode (inf);
5055 /* Initialize token table once from environment. */
5056 if (TEX_toktab == NULL)
5057 TEX_decode_env ("TEXTAGS", TEX_defenv);
5059 LOOP_ON_INPUT_LINES (inf, lb, cp)
5061 /* Look at each TEX keyword in line. */
5062 for (;;)
5064 /* Look for a TEX escape. */
5065 while (*cp++ != TEX_esc)
5066 if (cp[-1] == '\0' || cp[-1] == '%')
5067 goto tex_next_line;
5069 for (key = TEX_toktab; key->buffer != NULL; key++)
5070 if (strneq (cp, key->buffer, key->len))
5072 register char *p;
5073 int namelen, linelen;
5074 bool opgrp = FALSE;
5076 cp = skip_spaces (cp + key->len);
5077 if (*cp == TEX_opgrp)
5079 opgrp = TRUE;
5080 cp++;
5082 for (p = cp;
5083 (!iswhite (*p) && *p != '#' &&
5084 *p != TEX_opgrp && *p != TEX_clgrp);
5085 p++)
5086 continue;
5087 namelen = p - cp;
5088 linelen = lb.len;
5089 if (!opgrp || *p == TEX_clgrp)
5091 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5092 p++;
5093 linelen = p - lb.buffer + 1;
5095 make_tag (cp, namelen, TRUE,
5096 lb.buffer, linelen, lineno, linecharno);
5097 goto tex_next_line; /* We only tag a line once */
5100 tex_next_line:
5105 #define TEX_LESC '\\'
5106 #define TEX_SESC '!'
5108 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5109 chars accordingly. */
5110 static void
5111 TEX_mode (inf)
5112 FILE *inf;
5114 int c;
5116 while ((c = getc (inf)) != EOF)
5118 /* Skip to next line if we hit the TeX comment char. */
5119 if (c == '%')
5120 while (c != '\n' && c != EOF)
5121 c = getc (inf);
5122 else if (c == TEX_LESC || c == TEX_SESC )
5123 break;
5126 if (c == TEX_LESC)
5128 TEX_esc = TEX_LESC;
5129 TEX_opgrp = '{';
5130 TEX_clgrp = '}';
5132 else
5134 TEX_esc = TEX_SESC;
5135 TEX_opgrp = '<';
5136 TEX_clgrp = '>';
5138 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5139 No attempt is made to correct the situation. */
5140 rewind (inf);
5143 /* Read environment and prepend it to the default string.
5144 Build token table. */
5145 static void
5146 TEX_decode_env (evarname, defenv)
5147 char *evarname;
5148 char *defenv;
5150 register char *env, *p;
5151 int i, len;
5153 /* Append default string to environment. */
5154 env = getenv (evarname);
5155 if (!env)
5156 env = defenv;
5157 else
5159 char *oldenv = env;
5160 env = concat (oldenv, defenv, "");
5163 /* Allocate a token table */
5164 for (len = 1, p = env; p;)
5165 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5166 len++;
5167 TEX_toktab = xnew (len, linebuffer);
5169 /* Unpack environment string into token table. Be careful about */
5170 /* zero-length strings (leading ':', "::" and trailing ':') */
5171 for (i = 0; *env != '\0';)
5173 p = etags_strchr (env, ':');
5174 if (!p) /* End of environment string. */
5175 p = env + strlen (env);
5176 if (p - env > 0)
5177 { /* Only non-zero strings. */
5178 TEX_toktab[i].buffer = savenstr (env, p - env);
5179 TEX_toktab[i].len = p - env;
5180 i++;
5182 if (*p)
5183 env = p + 1;
5184 else
5186 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5187 TEX_toktab[i].len = 0;
5188 break;
5194 /* Texinfo support. Dave Love, Mar. 2000. */
5195 static void
5196 Texinfo_nodes (inf)
5197 FILE * inf;
5199 char *cp, *start;
5200 LOOP_ON_INPUT_LINES (inf, lb, cp)
5201 if (LOOKING_AT (cp, "@node"))
5203 start = cp;
5204 while (*cp != '\0' && *cp != ',')
5205 cp++;
5206 make_tag (start, cp - start, TRUE,
5207 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5213 * HTML support.
5214 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5215 * Contents of <a name=xxx> are tags with name xxx.
5217 * Francesco Potortì, 2002.
5219 static void
5220 HTML_labels (inf)
5221 FILE * inf;
5223 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5224 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5225 bool intag = FALSE; /* inside an html tag, looking for ID= */
5226 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5227 char *end;
5230 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5232 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5233 for (;;) /* loop on the same line */
5235 if (skiptag) /* skip HTML tag */
5237 while (*dbp != '\0' && *dbp != '>')
5238 dbp++;
5239 if (*dbp == '>')
5241 dbp += 1;
5242 skiptag = FALSE;
5243 continue; /* look on the same line */
5245 break; /* go to next line */
5248 else if (intag) /* look for "name=" or "id=" */
5250 while (*dbp != '\0' && *dbp != '>'
5251 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5252 dbp++;
5253 if (*dbp == '\0')
5254 break; /* go to next line */
5255 if (*dbp == '>')
5257 dbp += 1;
5258 intag = FALSE;
5259 continue; /* look on the same line */
5261 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5262 || LOOKING_AT_NOCASE (dbp, "id="))
5264 bool quoted = (dbp[0] == '"');
5266 if (quoted)
5267 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5268 continue;
5269 else
5270 for (end = dbp; *end != '\0' && intoken (*end); end++)
5271 continue;
5272 linebuffer_setlen (&token_name, end - dbp);
5273 strncpy (token_name.buffer, dbp, end - dbp);
5274 token_name.buffer[end - dbp] = '\0';
5276 dbp = end;
5277 intag = FALSE; /* we found what we looked for */
5278 skiptag = TRUE; /* skip to the end of the tag */
5279 getnext = TRUE; /* then grab the text */
5280 continue; /* look on the same line */
5282 dbp += 1;
5285 else if (getnext) /* grab next tokens and tag them */
5287 dbp = skip_spaces (dbp);
5288 if (*dbp == '\0')
5289 break; /* go to next line */
5290 if (*dbp == '<')
5292 intag = TRUE;
5293 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5294 continue; /* look on the same line */
5297 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5298 continue;
5299 make_tag (token_name.buffer, token_name.len, TRUE,
5300 dbp, end - dbp, lineno, linecharno);
5301 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5302 getnext = FALSE;
5303 break; /* go to next line */
5306 else /* look for an interesting HTML tag */
5308 while (*dbp != '\0' && *dbp != '<')
5309 dbp++;
5310 if (*dbp == '\0')
5311 break; /* go to next line */
5312 intag = TRUE;
5313 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5315 inanchor = TRUE;
5316 continue; /* look on the same line */
5318 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5319 || LOOKING_AT_NOCASE (dbp, "<h1>")
5320 || LOOKING_AT_NOCASE (dbp, "<h2>")
5321 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5323 intag = FALSE;
5324 getnext = TRUE;
5325 continue; /* look on the same line */
5327 dbp += 1;
5334 * Prolog support
5336 * Assumes that the predicate or rule starts at column 0.
5337 * Only the first clause of a predicate or rule is added.
5338 * Original code by Sunichirou Sugou (1989)
5339 * Rewritten by Anders Lindgren (1996)
5341 static int prolog_pr __P((char *, char *));
5342 static void prolog_skip_comment __P((linebuffer *, FILE *));
5343 static int prolog_atom __P((char *, int));
5345 static void
5346 Prolog_functions (inf)
5347 FILE *inf;
5349 char *cp, *last;
5350 int len;
5351 int allocated;
5353 allocated = 0;
5354 len = 0;
5355 last = NULL;
5357 LOOP_ON_INPUT_LINES (inf, lb, cp)
5359 if (cp[0] == '\0') /* Empty line */
5360 continue;
5361 else if (iswhite (cp[0])) /* Not a predicate */
5362 continue;
5363 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5364 prolog_skip_comment (&lb, inf);
5365 else if ((len = prolog_pr (cp, last)) > 0)
5367 /* Predicate or rule. Store the function name so that we
5368 only generate a tag for the first clause. */
5369 if (last == NULL)
5370 last = xnew(len + 1, char);
5371 else if (len + 1 > allocated)
5372 xrnew (last, len + 1, char);
5373 allocated = len + 1;
5374 strncpy (last, cp, len);
5375 last[len] = '\0';
5378 free (last);
5382 static void
5383 prolog_skip_comment (plb, inf)
5384 linebuffer *plb;
5385 FILE *inf;
5387 char *cp;
5391 for (cp = plb->buffer; *cp != '\0'; cp++)
5392 if (cp[0] == '*' && cp[1] == '/')
5393 return;
5394 readline (plb, inf);
5396 while (!feof(inf));
5400 * A predicate or rule definition is added if it matches:
5401 * <beginning of line><Prolog Atom><whitespace>(
5402 * or <beginning of line><Prolog Atom><whitespace>:-
5404 * It is added to the tags database if it doesn't match the
5405 * name of the previous clause header.
5407 * Return the size of the name of the predicate or rule, or 0 if no
5408 * header was found.
5410 static int
5411 prolog_pr (s, last)
5412 char *s;
5413 char *last; /* Name of last clause. */
5415 int pos;
5416 int len;
5418 pos = prolog_atom (s, 0);
5419 if (pos < 1)
5420 return 0;
5422 len = pos;
5423 pos = skip_spaces (s + pos) - s;
5425 if ((s[pos] == '.'
5426 || (s[pos] == '(' && (pos += 1))
5427 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5428 && (last == NULL /* save only the first clause */
5429 || len != (int)strlen (last)
5430 || !strneq (s, last, len)))
5432 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5433 return len;
5435 else
5436 return 0;
5440 * Consume a Prolog atom.
5441 * Return the number of bytes consumed, or -1 if there was an error.
5443 * A prolog atom, in this context, could be one of:
5444 * - An alphanumeric sequence, starting with a lower case letter.
5445 * - A quoted arbitrary string. Single quotes can escape themselves.
5446 * Backslash quotes everything.
5448 static int
5449 prolog_atom (s, pos)
5450 char *s;
5451 int pos;
5453 int origpos;
5455 origpos = pos;
5457 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5459 /* The atom is unquoted. */
5460 pos++;
5461 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5463 pos++;
5465 return pos - origpos;
5467 else if (s[pos] == '\'')
5469 pos++;
5471 for (;;)
5473 if (s[pos] == '\'')
5475 pos++;
5476 if (s[pos] != '\'')
5477 break;
5478 pos++; /* A double quote */
5480 else if (s[pos] == '\0')
5481 /* Multiline quoted atoms are ignored. */
5482 return -1;
5483 else if (s[pos] == '\\')
5485 if (s[pos+1] == '\0')
5486 return -1;
5487 pos += 2;
5489 else
5490 pos++;
5492 return pos - origpos;
5494 else
5495 return -1;
5500 * Support for Erlang
5502 * Generates tags for functions, defines, and records.
5503 * Assumes that Erlang functions start at column 0.
5504 * Original code by Anders Lindgren (1996)
5506 static int erlang_func __P((char *, char *));
5507 static void erlang_attribute __P((char *));
5508 static int erlang_atom __P((char *));
5510 static void
5511 Erlang_functions (inf)
5512 FILE *inf;
5514 char *cp, *last;
5515 int len;
5516 int allocated;
5518 allocated = 0;
5519 len = 0;
5520 last = NULL;
5522 LOOP_ON_INPUT_LINES (inf, lb, cp)
5524 if (cp[0] == '\0') /* Empty line */
5525 continue;
5526 else if (iswhite (cp[0])) /* Not function nor attribute */
5527 continue;
5528 else if (cp[0] == '%') /* comment */
5529 continue;
5530 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5531 continue;
5532 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5534 erlang_attribute (cp);
5535 if (last != NULL)
5537 free (last);
5538 last = NULL;
5541 else if ((len = erlang_func (cp, last)) > 0)
5544 * Function. Store the function name so that we only
5545 * generates a tag for the first clause.
5547 if (last == NULL)
5548 last = xnew (len + 1, char);
5549 else if (len + 1 > allocated)
5550 xrnew (last, len + 1, char);
5551 allocated = len + 1;
5552 strncpy (last, cp, len);
5553 last[len] = '\0';
5556 free (last);
5561 * A function definition is added if it matches:
5562 * <beginning of line><Erlang Atom><whitespace>(
5564 * It is added to the tags database if it doesn't match the
5565 * name of the previous clause header.
5567 * Return the size of the name of the function, or 0 if no function
5568 * was found.
5570 static int
5571 erlang_func (s, last)
5572 char *s;
5573 char *last; /* Name of last clause. */
5575 int pos;
5576 int len;
5578 pos = erlang_atom (s);
5579 if (pos < 1)
5580 return 0;
5582 len = pos;
5583 pos = skip_spaces (s + pos) - s;
5585 /* Save only the first clause. */
5586 if (s[pos++] == '('
5587 && (last == NULL
5588 || len != (int)strlen (last)
5589 || !strneq (s, last, len)))
5591 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5592 return len;
5595 return 0;
5600 * Handle attributes. Currently, tags are generated for defines
5601 * and records.
5603 * They are on the form:
5604 * -define(foo, bar).
5605 * -define(Foo(M, N), M+N).
5606 * -record(graph, {vtab = notable, cyclic = true}).
5608 static void
5609 erlang_attribute (s)
5610 char *s;
5612 char *cp = s;
5614 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5615 && *cp++ == '(')
5617 int len = erlang_atom (skip_spaces (cp));
5618 if (len > 0)
5619 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5621 return;
5626 * Consume an Erlang atom (or variable).
5627 * Return the number of bytes consumed, or -1 if there was an error.
5629 static int
5630 erlang_atom (s)
5631 char *s;
5633 int pos = 0;
5635 if (ISALPHA (s[pos]) || s[pos] == '_')
5637 /* The atom is unquoted. */
5639 pos++;
5640 while (ISALNUM (s[pos]) || s[pos] == '_');
5642 else if (s[pos] == '\'')
5644 for (pos++; s[pos] != '\''; pos++)
5645 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5646 || (s[pos] == '\\' && s[++pos] == '\0'))
5647 return 0;
5648 pos++;
5651 return pos;
5655 static char *scan_separators __P((char *));
5656 static void add_regex __P((char *, language *));
5657 static char *substitute __P((char *, char *, struct re_registers *));
5660 * Take a string like "/blah/" and turn it into "blah", verifying
5661 * that the first and last characters are the same, and handling
5662 * quoted separator characters. Actually, stops on the occurrence of
5663 * an unquoted separator. Also process \t, \n, etc. and turn into
5664 * appropriate characters. Works in place. Null terminates name string.
5665 * Returns pointer to terminating separator, or NULL for
5666 * unterminated regexps.
5668 static char *
5669 scan_separators (name)
5670 char *name;
5672 char sep = name[0];
5673 char *copyto = name;
5674 bool quoted = FALSE;
5676 for (++name; *name != '\0'; ++name)
5678 if (quoted)
5680 switch (*name)
5682 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5683 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5684 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5685 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5686 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5687 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5688 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5689 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5690 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5691 default:
5692 if (*name == sep)
5693 *copyto++ = sep;
5694 else
5696 /* Something else is quoted, so preserve the quote. */
5697 *copyto++ = '\\';
5698 *copyto++ = *name;
5700 break;
5702 quoted = FALSE;
5704 else if (*name == '\\')
5705 quoted = TRUE;
5706 else if (*name == sep)
5707 break;
5708 else
5709 *copyto++ = *name;
5711 if (*name != sep)
5712 name = NULL; /* signal unterminated regexp */
5714 /* Terminate copied string. */
5715 *copyto = '\0';
5716 return name;
5719 /* Look at the argument of --regex or --no-regex and do the right
5720 thing. Same for each line of a regexp file. */
5721 static void
5722 analyse_regex (regex_arg)
5723 char *regex_arg;
5725 if (regex_arg == NULL)
5727 free_regexps (); /* --no-regex: remove existing regexps */
5728 return;
5731 /* A real --regexp option or a line in a regexp file. */
5732 switch (regex_arg[0])
5734 /* Comments in regexp file or null arg to --regex. */
5735 case '\0':
5736 case ' ':
5737 case '\t':
5738 break;
5740 /* Read a regex file. This is recursive and may result in a
5741 loop, which will stop when the file descriptors are exhausted. */
5742 case '@':
5744 FILE *regexfp;
5745 linebuffer regexbuf;
5746 char *regexfile = regex_arg + 1;
5748 /* regexfile is a file containing regexps, one per line. */
5749 regexfp = fopen (regexfile, "r");
5750 if (regexfp == NULL)
5752 pfatal (regexfile);
5753 return;
5755 linebuffer_init (&regexbuf);
5756 while (readline_internal (&regexbuf, regexfp) > 0)
5757 analyse_regex (regexbuf.buffer);
5758 free (regexbuf.buffer);
5759 fclose (regexfp);
5761 break;
5763 /* Regexp to be used for a specific language only. */
5764 case '{':
5766 language *lang;
5767 char *lang_name = regex_arg + 1;
5768 char *cp;
5770 for (cp = lang_name; *cp != '}'; cp++)
5771 if (*cp == '\0')
5773 error ("unterminated language name in regex: %s", regex_arg);
5774 return;
5776 *cp++ = '\0';
5777 lang = get_language_from_langname (lang_name);
5778 if (lang == NULL)
5779 return;
5780 add_regex (cp, lang);
5782 break;
5784 /* Regexp to be used for any language. */
5785 default:
5786 add_regex (regex_arg, NULL);
5787 break;
5791 /* Separate the regexp pattern, compile it,
5792 and care for optional name and modifiers. */
5793 static void
5794 add_regex (regexp_pattern, lang)
5795 char *regexp_pattern;
5796 language *lang;
5798 static struct re_pattern_buffer zeropattern;
5799 char sep, *pat, *name, *modifiers;
5800 const char *err;
5801 struct re_pattern_buffer *patbuf;
5802 regexp *rp;
5803 bool
5804 force_explicit_name = TRUE, /* do not use implicit tag names */
5805 ignore_case = FALSE, /* case is significant */
5806 multi_line = FALSE, /* matches are done one line at a time */
5807 single_line = FALSE; /* dot does not match newline */
5810 if (strlen(regexp_pattern) < 3)
5812 error ("null regexp", (char *)NULL);
5813 return;
5815 sep = regexp_pattern[0];
5816 name = scan_separators (regexp_pattern);
5817 if (name == NULL)
5819 error ("%s: unterminated regexp", regexp_pattern);
5820 return;
5822 if (name[1] == sep)
5824 error ("null name for regexp \"%s\"", regexp_pattern);
5825 return;
5827 modifiers = scan_separators (name);
5828 if (modifiers == NULL) /* no terminating separator --> no name */
5830 modifiers = name;
5831 name = "";
5833 else
5834 modifiers += 1; /* skip separator */
5836 /* Parse regex modifiers. */
5837 for (; modifiers[0] != '\0'; modifiers++)
5838 switch (modifiers[0])
5840 case 'N':
5841 if (modifiers == name)
5842 error ("forcing explicit tag name but no name, ignoring", NULL);
5843 force_explicit_name = TRUE;
5844 break;
5845 case 'i':
5846 ignore_case = TRUE;
5847 break;
5848 case 's':
5849 single_line = TRUE;
5850 /* FALLTHRU */
5851 case 'm':
5852 multi_line = TRUE;
5853 need_filebuf = TRUE;
5854 break;
5855 default:
5857 char wrongmod [2];
5858 wrongmod[0] = modifiers[0];
5859 wrongmod[1] = '\0';
5860 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5862 break;
5865 patbuf = xnew (1, struct re_pattern_buffer);
5866 *patbuf = zeropattern;
5867 if (ignore_case)
5869 static char lc_trans[CHARS];
5870 int i;
5871 for (i = 0; i < CHARS; i++)
5872 lc_trans[i] = lowcase (i);
5873 patbuf->translate = lc_trans; /* translation table to fold case */
5876 if (multi_line)
5877 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5878 else
5879 pat = regexp_pattern;
5881 if (single_line)
5882 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5883 else
5884 re_set_syntax (RE_SYNTAX_EMACS);
5886 err = re_compile_pattern (pat, strlen (pat), patbuf);
5887 if (multi_line)
5888 free (pat);
5889 if (err != NULL)
5891 error ("%s while compiling pattern", err);
5892 return;
5895 rp = p_head;
5896 p_head = xnew (1, regexp);
5897 p_head->pattern = savestr (regexp_pattern);
5898 p_head->p_next = rp;
5899 p_head->lang = lang;
5900 p_head->pat = patbuf;
5901 p_head->name = savestr (name);
5902 p_head->error_signaled = FALSE;
5903 p_head->force_explicit_name = force_explicit_name;
5904 p_head->ignore_case = ignore_case;
5905 p_head->multi_line = multi_line;
5909 * Do the substitutions indicated by the regular expression and
5910 * arguments.
5912 static char *
5913 substitute (in, out, regs)
5914 char *in, *out;
5915 struct re_registers *regs;
5917 char *result, *t;
5918 int size, dig, diglen;
5920 result = NULL;
5921 size = strlen (out);
5923 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5924 if (out[size - 1] == '\\')
5925 fatal ("pattern error in \"%s\"", out);
5926 for (t = etags_strchr (out, '\\');
5927 t != NULL;
5928 t = etags_strchr (t + 2, '\\'))
5929 if (ISDIGIT (t[1]))
5931 dig = t[1] - '0';
5932 diglen = regs->end[dig] - regs->start[dig];
5933 size += diglen - 2;
5935 else
5936 size -= 1;
5938 /* Allocate space and do the substitutions. */
5939 assert (size >= 0);
5940 result = xnew (size + 1, char);
5942 for (t = result; *out != '\0'; out++)
5943 if (*out == '\\' && ISDIGIT (*++out))
5945 dig = *out - '0';
5946 diglen = regs->end[dig] - regs->start[dig];
5947 strncpy (t, in + regs->start[dig], diglen);
5948 t += diglen;
5950 else
5951 *t++ = *out;
5952 *t = '\0';
5954 assert (t <= result + size);
5955 assert (t - result == (int)strlen (result));
5957 return result;
5960 /* Deallocate all regexps. */
5961 static void
5962 free_regexps ()
5964 regexp *rp;
5965 while (p_head != NULL)
5967 rp = p_head->p_next;
5968 free (p_head->pattern);
5969 free (p_head->name);
5970 free (p_head);
5971 p_head = rp;
5973 return;
5977 * Reads the whole file as a single string from `filebuf' and looks for
5978 * multi-line regular expressions, creating tags on matches.
5979 * readline already dealt with normal regexps.
5981 * Idea by Ben Wing <ben@666.com> (2002).
5983 static void
5984 regex_tag_multiline ()
5986 char *buffer = filebuf.buffer;
5987 regexp *rp;
5988 char *name;
5990 for (rp = p_head; rp != NULL; rp = rp->p_next)
5992 int match = 0;
5994 if (!rp->multi_line)
5995 continue; /* skip normal regexps */
5997 /* Generic initialisations before parsing file from memory. */
5998 lineno = 1; /* reset global line number */
5999 charno = 0; /* reset global char number */
6000 linecharno = 0; /* reset global char number of line start */
6002 /* Only use generic regexps or those for the current language. */
6003 if (rp->lang != NULL && rp->lang != curfdp->lang)
6004 continue;
6006 while (match >= 0 && match < filebuf.len)
6008 match = re_search (rp->pat, buffer, filebuf.len, charno,
6009 filebuf.len - match, &rp->regs);
6010 switch (match)
6012 case -2:
6013 /* Some error. */
6014 if (!rp->error_signaled)
6016 error ("regexp stack overflow while matching \"%s\"",
6017 rp->pattern);
6018 rp->error_signaled = TRUE;
6020 break;
6021 case -1:
6022 /* No match. */
6023 break;
6024 default:
6025 if (match == rp->regs.end[0])
6027 if (!rp->error_signaled)
6029 error ("regexp matches the empty string: \"%s\"",
6030 rp->pattern);
6031 rp->error_signaled = TRUE;
6033 match = -3; /* exit from while loop */
6034 break;
6037 /* Match occurred. Construct a tag. */
6038 while (charno < rp->regs.end[0])
6039 if (buffer[charno++] == '\n')
6040 lineno++, linecharno = charno;
6041 name = rp->name;
6042 if (name[0] == '\0')
6043 name = NULL;
6044 else /* make a named tag */
6045 name = substitute (buffer, rp->name, &rp->regs);
6046 if (rp->force_explicit_name)
6047 /* Force explicit tag name, if a name is there. */
6048 pfnote (name, TRUE, buffer + linecharno,
6049 charno - linecharno + 1, lineno, linecharno);
6050 else
6051 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6052 charno - linecharno + 1, lineno, linecharno);
6053 break;
6060 static bool
6061 nocase_tail (cp)
6062 char *cp;
6064 register int len = 0;
6066 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6067 cp++, len++;
6068 if (*cp == '\0' && !intoken (dbp[len]))
6070 dbp += len;
6071 return TRUE;
6073 return FALSE;
6076 static void
6077 get_tag (bp, namepp)
6078 register char *bp;
6079 char **namepp;
6081 register char *cp = bp;
6083 if (*bp != '\0')
6085 /* Go till you get to white space or a syntactic break */
6086 for (cp = bp + 1; !notinname (*cp); cp++)
6087 continue;
6088 make_tag (bp, cp - bp, TRUE,
6089 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6092 if (namepp != NULL)
6093 *namepp = savenstr (bp, cp - bp);
6097 * Read a line of text from `stream' into `lbp', excluding the
6098 * newline or CR-NL, if any. Return the number of characters read from
6099 * `stream', which is the length of the line including the newline.
6101 * On DOS or Windows we do not count the CR character, if any before the
6102 * NL, in the returned length; this mirrors the behavior of Emacs on those
6103 * platforms (for text files, it translates CR-NL to NL as it reads in the
6104 * file).
6106 * If multi-line regular expressions are requested, each line read is
6107 * appended to `filebuf'.
6109 static long
6110 readline_internal (lbp, stream)
6111 linebuffer *lbp;
6112 register FILE *stream;
6114 char *buffer = lbp->buffer;
6115 register char *p = lbp->buffer;
6116 register char *pend;
6117 int chars_deleted;
6119 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6121 for (;;)
6123 register int c = getc (stream);
6124 if (p == pend)
6126 /* We're at the end of linebuffer: expand it. */
6127 lbp->size *= 2;
6128 xrnew (buffer, lbp->size, char);
6129 p += buffer - lbp->buffer;
6130 pend = buffer + lbp->size;
6131 lbp->buffer = buffer;
6133 if (c == EOF)
6135 *p = '\0';
6136 chars_deleted = 0;
6137 break;
6139 if (c == '\n')
6141 if (p > buffer && p[-1] == '\r')
6143 p -= 1;
6144 #ifdef DOS_NT
6145 /* Assume CRLF->LF translation will be performed by Emacs
6146 when loading this file, so CRs won't appear in the buffer.
6147 It would be cleaner to compensate within Emacs;
6148 however, Emacs does not know how many CRs were deleted
6149 before any given point in the file. */
6150 chars_deleted = 1;
6151 #else
6152 chars_deleted = 2;
6153 #endif
6155 else
6157 chars_deleted = 1;
6159 *p = '\0';
6160 break;
6162 *p++ = c;
6164 lbp->len = p - buffer;
6166 if (need_filebuf /* we need filebuf for multi-line regexps */
6167 && chars_deleted > 0) /* not at EOF */
6169 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6171 /* Expand filebuf. */
6172 filebuf.size *= 2;
6173 xrnew (filebuf.buffer, filebuf.size, char);
6175 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6176 filebuf.len += lbp->len;
6177 filebuf.buffer[filebuf.len++] = '\n';
6178 filebuf.buffer[filebuf.len] = '\0';
6181 return lbp->len + chars_deleted;
6185 * Like readline_internal, above, but in addition try to match the
6186 * input line against relevant regular expressions and manage #line
6187 * directives.
6189 static void
6190 readline (lbp, stream)
6191 linebuffer *lbp;
6192 FILE *stream;
6194 long result;
6196 linecharno = charno; /* update global char number of line start */
6197 result = readline_internal (lbp, stream); /* read line */
6198 lineno += 1; /* increment global line number */
6199 charno += result; /* increment global char number */
6201 /* Honour #line directives. */
6202 if (!no_line_directive)
6204 static bool discard_until_line_directive;
6206 /* Check whether this is a #line directive. */
6207 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6209 unsigned int lno;
6210 int start = 0;
6212 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6213 && start > 0) /* double quote character found */
6215 char *endp = lbp->buffer + start;
6217 while ((endp = etags_strchr (endp, '"')) != NULL
6218 && endp[-1] == '\\')
6219 endp++;
6220 if (endp != NULL)
6221 /* Ok, this is a real #line directive. Let's deal with it. */
6223 char *taggedabsname; /* absolute name of original file */
6224 char *taggedfname; /* name of original file as given */
6225 char *name; /* temp var */
6227 discard_until_line_directive = FALSE; /* found it */
6228 name = lbp->buffer + start;
6229 *endp = '\0';
6230 canonicalize_filename (name);
6231 taggedabsname = absolute_filename (name, tagfiledir);
6232 if (filename_is_absolute (name)
6233 || filename_is_absolute (curfdp->infname))
6234 taggedfname = savestr (taggedabsname);
6235 else
6236 taggedfname = relative_filename (taggedabsname,tagfiledir);
6238 if (streq (curfdp->taggedfname, taggedfname))
6239 /* The #line directive is only a line number change. We
6240 deal with this afterwards. */
6241 free (taggedfname);
6242 else
6243 /* The tags following this #line directive should be
6244 attributed to taggedfname. In order to do this, set
6245 curfdp accordingly. */
6247 fdesc *fdp; /* file description pointer */
6249 /* Go look for a file description already set up for the
6250 file indicated in the #line directive. If there is
6251 one, use it from now until the next #line
6252 directive. */
6253 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6254 if (streq (fdp->infname, curfdp->infname)
6255 && streq (fdp->taggedfname, taggedfname))
6256 /* If we remove the second test above (after the &&)
6257 then all entries pertaining to the same file are
6258 coalesced in the tags file. If we use it, then
6259 entries pertaining to the same file but generated
6260 from different files (via #line directives) will
6261 go into separate sections in the tags file. These
6262 alternatives look equivalent. The first one
6263 destroys some apparently useless information. */
6265 curfdp = fdp;
6266 free (taggedfname);
6267 break;
6269 /* Else, if we already tagged the real file, skip all
6270 input lines until the next #line directive. */
6271 if (fdp == NULL) /* not found */
6272 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6273 if (streq (fdp->infabsname, taggedabsname))
6275 discard_until_line_directive = TRUE;
6276 free (taggedfname);
6277 break;
6279 /* Else create a new file description and use that from
6280 now on, until the next #line directive. */
6281 if (fdp == NULL) /* not found */
6283 fdp = fdhead;
6284 fdhead = xnew (1, fdesc);
6285 *fdhead = *curfdp; /* copy curr. file description */
6286 fdhead->next = fdp;
6287 fdhead->infname = savestr (curfdp->infname);
6288 fdhead->infabsname = savestr (curfdp->infabsname);
6289 fdhead->infabsdir = savestr (curfdp->infabsdir);
6290 fdhead->taggedfname = taggedfname;
6291 fdhead->usecharno = FALSE;
6292 fdhead->prop = NULL;
6293 fdhead->written = FALSE;
6294 curfdp = fdhead;
6297 free (taggedabsname);
6298 lineno = lno - 1;
6299 readline (lbp, stream);
6300 return;
6301 } /* if a real #line directive */
6302 } /* if #line is followed by a number */
6303 } /* if line begins with "#line " */
6305 /* If we are here, no #line directive was found. */
6306 if (discard_until_line_directive)
6308 if (result > 0)
6310 /* Do a tail recursion on ourselves, thus discarding the contents
6311 of the line buffer. */
6312 readline (lbp, stream);
6313 return;
6315 /* End of file. */
6316 discard_until_line_directive = FALSE;
6317 return;
6319 } /* if #line directives should be considered */
6322 int match;
6323 regexp *rp;
6324 char *name;
6326 /* Match against relevant regexps. */
6327 if (lbp->len > 0)
6328 for (rp = p_head; rp != NULL; rp = rp->p_next)
6330 /* Only use generic regexps or those for the current language.
6331 Also do not use multiline regexps, which is the job of
6332 regex_tag_multiline. */
6333 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6334 || rp->multi_line)
6335 continue;
6337 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6338 switch (match)
6340 case -2:
6341 /* Some error. */
6342 if (!rp->error_signaled)
6344 error ("regexp stack overflow while matching \"%s\"",
6345 rp->pattern);
6346 rp->error_signaled = TRUE;
6348 break;
6349 case -1:
6350 /* No match. */
6351 break;
6352 case 0:
6353 /* Empty string matched. */
6354 if (!rp->error_signaled)
6356 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6357 rp->error_signaled = TRUE;
6359 break;
6360 default:
6361 /* Match occurred. Construct a tag. */
6362 name = rp->name;
6363 if (name[0] == '\0')
6364 name = NULL;
6365 else /* make a named tag */
6366 name = substitute (lbp->buffer, rp->name, &rp->regs);
6367 if (rp->force_explicit_name)
6368 /* Force explicit tag name, if a name is there. */
6369 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6370 else
6371 make_tag (name, strlen (name), TRUE,
6372 lbp->buffer, match, lineno, linecharno);
6373 break;
6381 * Return a pointer to a space of size strlen(cp)+1 allocated
6382 * with xnew where the string CP has been copied.
6384 static char *
6385 savestr (cp)
6386 char *cp;
6388 return savenstr (cp, strlen (cp));
6392 * Return a pointer to a space of size LEN+1 allocated with xnew where
6393 * the string CP has been copied for at most the first LEN characters.
6395 static char *
6396 savenstr (cp, len)
6397 char *cp;
6398 int len;
6400 register char *dp;
6402 dp = xnew (len + 1, char);
6403 strncpy (dp, cp, len);
6404 dp[len] = '\0';
6405 return dp;
6409 * Return the ptr in sp at which the character c last
6410 * appears; NULL if not found
6412 * Identical to POSIX strrchr, included for portability.
6414 static char *
6415 etags_strrchr (sp, c)
6416 register const char *sp;
6417 register int c;
6419 register const char *r;
6421 r = NULL;
6424 if (*sp == c)
6425 r = sp;
6426 } while (*sp++);
6427 return (char *)r;
6431 * Return the ptr in sp at which the character c first
6432 * appears; NULL if not found
6434 * Identical to POSIX strchr, included for portability.
6436 static char *
6437 etags_strchr (sp, c)
6438 register const char *sp;
6439 register int c;
6443 if (*sp == c)
6444 return (char *)sp;
6445 } while (*sp++);
6446 return NULL;
6450 * Compare two strings, ignoring case for alphabetic characters.
6452 * Same as BSD's strcasecmp, included for portability.
6454 static int
6455 etags_strcasecmp (s1, s2)
6456 register const char *s1;
6457 register const char *s2;
6459 while (*s1 != '\0'
6460 && (ISALPHA (*s1) && ISALPHA (*s2)
6461 ? lowcase (*s1) == lowcase (*s2)
6462 : *s1 == *s2))
6463 s1++, s2++;
6465 return (ISALPHA (*s1) && ISALPHA (*s2)
6466 ? lowcase (*s1) - lowcase (*s2)
6467 : *s1 - *s2);
6471 * Compare two strings, ignoring case for alphabetic characters.
6472 * Stop after a given number of characters
6474 * Same as BSD's strncasecmp, included for portability.
6476 static int
6477 etags_strncasecmp (s1, s2, n)
6478 register const char *s1;
6479 register const char *s2;
6480 register int n;
6482 while (*s1 != '\0' && n-- > 0
6483 && (ISALPHA (*s1) && ISALPHA (*s2)
6484 ? lowcase (*s1) == lowcase (*s2)
6485 : *s1 == *s2))
6486 s1++, s2++;
6488 if (n < 0)
6489 return 0;
6490 else
6491 return (ISALPHA (*s1) && ISALPHA (*s2)
6492 ? lowcase (*s1) - lowcase (*s2)
6493 : *s1 - *s2);
6496 /* Skip spaces (end of string is not space), return new pointer. */
6497 static char *
6498 skip_spaces (cp)
6499 char *cp;
6501 while (iswhite (*cp))
6502 cp++;
6503 return cp;
6506 /* Skip non spaces, except end of string, return new pointer. */
6507 static char *
6508 skip_non_spaces (cp)
6509 char *cp;
6511 while (*cp != '\0' && !iswhite (*cp))
6512 cp++;
6513 return cp;
6516 /* Print error message and exit. */
6517 void
6518 fatal (s1, s2)
6519 char *s1, *s2;
6521 error (s1, s2);
6522 exit (EXIT_FAILURE);
6525 static void
6526 pfatal (s1)
6527 char *s1;
6529 perror (s1);
6530 exit (EXIT_FAILURE);
6533 static void
6534 suggest_asking_for_help ()
6536 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6537 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6538 exit (EXIT_FAILURE);
6541 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6542 static void
6543 error (s1, s2)
6544 const char *s1, *s2;
6546 fprintf (stderr, "%s: ", progname);
6547 fprintf (stderr, s1, s2);
6548 fprintf (stderr, "\n");
6551 /* Return a newly-allocated string whose contents
6552 concatenate those of s1, s2, s3. */
6553 static char *
6554 concat (s1, s2, s3)
6555 char *s1, *s2, *s3;
6557 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6558 char *result = xnew (len1 + len2 + len3 + 1, char);
6560 strcpy (result, s1);
6561 strcpy (result + len1, s2);
6562 strcpy (result + len1 + len2, s3);
6563 result[len1 + len2 + len3] = '\0';
6565 return result;
6569 /* Does the same work as the system V getcwd, but does not need to
6570 guess the buffer size in advance. */
6571 static char *
6572 etags_getcwd ()
6574 #ifdef HAVE_GETCWD
6575 int bufsize = 200;
6576 char *path = xnew (bufsize, char);
6578 while (getcwd (path, bufsize) == NULL)
6580 if (errno != ERANGE)
6581 pfatal ("getcwd");
6582 bufsize *= 2;
6583 free (path);
6584 path = xnew (bufsize, char);
6587 canonicalize_filename (path);
6588 return path;
6590 #else /* not HAVE_GETCWD */
6591 #if MSDOS
6593 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6595 getwd (path);
6597 for (p = path; *p != '\0'; p++)
6598 if (*p == '\\')
6599 *p = '/';
6600 else
6601 *p = lowcase (*p);
6603 return strdup (path);
6604 #else /* not MSDOS */
6605 linebuffer path;
6606 FILE *pipe;
6608 linebuffer_init (&path);
6609 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6610 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6611 pfatal ("pwd");
6612 pclose (pipe);
6614 return path.buffer;
6615 #endif /* not MSDOS */
6616 #endif /* not HAVE_GETCWD */
6619 /* Return a newly allocated string containing the file name of FILE
6620 relative to the absolute directory DIR (which should end with a slash). */
6621 static char *
6622 relative_filename (file, dir)
6623 char *file, *dir;
6625 char *fp, *dp, *afn, *res;
6626 int i;
6628 /* Find the common root of file and dir (with a trailing slash). */
6629 afn = absolute_filename (file, cwd);
6630 fp = afn;
6631 dp = dir;
6632 while (*fp++ == *dp++)
6633 continue;
6634 fp--, dp--; /* back to the first differing char */
6635 #ifdef DOS_NT
6636 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6637 return afn;
6638 #endif
6639 do /* look at the equal chars until '/' */
6640 fp--, dp--;
6641 while (*fp != '/');
6643 /* Build a sequence of "../" strings for the resulting relative file name. */
6644 i = 0;
6645 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6646 i += 1;
6647 res = xnew (3*i + strlen (fp + 1) + 1, char);
6648 res[0] = '\0';
6649 while (i-- > 0)
6650 strcat (res, "../");
6652 /* Add the file name relative to the common root of file and dir. */
6653 strcat (res, fp + 1);
6654 free (afn);
6656 return res;
6659 /* Return a newly allocated string containing the absolute file name
6660 of FILE given DIR (which should end with a slash). */
6661 static char *
6662 absolute_filename (file, dir)
6663 char *file, *dir;
6665 char *slashp, *cp, *res;
6667 if (filename_is_absolute (file))
6668 res = savestr (file);
6669 #ifdef DOS_NT
6670 /* We don't support non-absolute file names with a drive
6671 letter, like `d:NAME' (it's too much hassle). */
6672 else if (file[1] == ':')
6673 fatal ("%s: relative file names with drive letters not supported", file);
6674 #endif
6675 else
6676 res = concat (dir, file, "");
6678 /* Delete the "/dirname/.." and "/." substrings. */
6679 slashp = etags_strchr (res, '/');
6680 while (slashp != NULL && slashp[0] != '\0')
6682 if (slashp[1] == '.')
6684 if (slashp[2] == '.'
6685 && (slashp[3] == '/' || slashp[3] == '\0'))
6687 cp = slashp;
6689 cp--;
6690 while (cp >= res && !filename_is_absolute (cp));
6691 if (cp < res)
6692 cp = slashp; /* the absolute name begins with "/.." */
6693 #ifdef DOS_NT
6694 /* Under MSDOS and NT we get `d:/NAME' as absolute
6695 file name, so the luser could say `d:/../NAME'.
6696 We silently treat this as `d:/NAME'. */
6697 else if (cp[0] != '/')
6698 cp = slashp;
6699 #endif
6700 #ifdef HAVE_MEMMOVE
6701 memmove (cp, slashp + 3, strlen (slashp + 2));
6702 #else
6703 /* Overlapping copy isn't really okay */
6704 strcpy (cp, slashp + 3);
6705 #endif
6706 slashp = cp;
6707 continue;
6709 else if (slashp[2] == '/' || slashp[2] == '\0')
6711 #ifdef HAVE_MEMMOVE
6712 memmove (slashp, slashp + 2, strlen (slashp + 1));
6713 #else
6714 strcpy (slashp, slashp + 2);
6715 #endif
6716 continue;
6720 slashp = etags_strchr (slashp + 1, '/');
6723 if (res[0] == '\0') /* just a safety net: should never happen */
6725 free (res);
6726 return savestr ("/");
6728 else
6729 return res;
6732 /* Return a newly allocated string containing the absolute
6733 file name of dir where FILE resides given DIR (which should
6734 end with a slash). */
6735 static char *
6736 absolute_dirname (file, dir)
6737 char *file, *dir;
6739 char *slashp, *res;
6740 char save;
6742 slashp = etags_strrchr (file, '/');
6743 if (slashp == NULL)
6744 return savestr (dir);
6745 save = slashp[1];
6746 slashp[1] = '\0';
6747 res = absolute_filename (file, dir);
6748 slashp[1] = save;
6750 return res;
6753 /* Whether the argument string is an absolute file name. The argument
6754 string must have been canonicalized with canonicalize_filename. */
6755 static bool
6756 filename_is_absolute (fn)
6757 char *fn;
6759 return (fn[0] == '/'
6760 #ifdef DOS_NT
6761 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6762 #endif
6766 /* Upcase DOS drive letter and collapse separators into single slashes.
6767 Works in place. */
6768 static void
6769 canonicalize_filename (fn)
6770 register char *fn;
6772 register char* cp;
6773 char sep = '/';
6775 #ifdef DOS_NT
6776 /* Canonicalize drive letter case. */
6777 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6778 fn[0] = upcase (fn[0]);
6780 sep = '\\';
6781 #endif
6783 /* Collapse multiple separators into a single slash. */
6784 for (cp = fn; *cp != '\0'; cp++, fn++)
6785 if (*cp == sep)
6787 *fn = '/';
6788 while (cp[1] == sep)
6789 cp++;
6791 else
6792 *fn = *cp;
6793 *fn = '\0';
6797 /* Initialize a linebuffer for use. */
6798 static void
6799 linebuffer_init (lbp)
6800 linebuffer *lbp;
6802 lbp->size = (DEBUG) ? 3 : 200;
6803 lbp->buffer = xnew (lbp->size, char);
6804 lbp->buffer[0] = '\0';
6805 lbp->len = 0;
6808 /* Set the minimum size of a string contained in a linebuffer. */
6809 static void
6810 linebuffer_setlen (lbp, toksize)
6811 linebuffer *lbp;
6812 int toksize;
6814 while (lbp->size <= toksize)
6816 lbp->size *= 2;
6817 xrnew (lbp->buffer, lbp->size, char);
6819 lbp->len = toksize;
6822 /* Like malloc but get fatal error if memory is exhausted. */
6823 static PTR
6824 xmalloc (size)
6825 unsigned int size;
6827 PTR result = (PTR) malloc (size);
6828 if (result == NULL)
6829 fatal ("virtual memory exhausted", (char *)NULL);
6830 return result;
6833 static PTR
6834 xrealloc (ptr, size)
6835 char *ptr;
6836 unsigned int size;
6838 PTR result = (PTR) realloc (ptr, size);
6839 if (result == NULL)
6840 fatal ("virtual memory exhausted", (char *)NULL);
6841 return result;
6845 * Local Variables:
6846 * indent-tabs-mode: t
6847 * tab-width: 8
6848 * fill-column: 79
6849 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6850 * c-file-style: "gnu"
6851 * End:
6854 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6855 (do not change this comment) */
6857 /* etags.c ends here */