* customize.texi (Composite Types): Move alist/plist from Simple Types (Bug#7545).
[emacs.git] / lib-src / etags.c
blobda43b89e40a831cd4a6edf4b08e93091495a4747
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
33 2011 Free Software Foundation, Inc.
35 This file is not considered part of GNU Emacs.
37 This program is free software: you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation, either version 3 of the License, or
40 (at your option) any later version.
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
47 You should have received a copy of the GNU General Public License
48 along with this program. If not, see <http://www.gnu.org/licenses/>. */
51 /* NB To comply with the above BSD license, copyright information is
52 reproduced in etc/ETAGS.README. That file should be updated when the
53 above notices are.
55 To the best of our knowledge, this code was originally based on the
56 ctags.c distributed with BSD4.2, which was copyrighted by the
57 University of California, as described above. */
61 * Authors:
62 * 1983 Ctags originally by Ken Arnold.
63 * 1984 Fortran added by Jim Kleckner.
64 * 1984 Ed Pelegri-Llopart added C typedefs.
65 * 1985 Emacs TAGS format by Richard Stallman.
66 * 1989 Sam Kendall added C++.
67 * 1992 Joseph B. Wells improved C and C++ parsing.
68 * 1993 Francesco Potortì reorganized C and C++.
69 * 1994 Line-by-line regexp tags by Tom Tromey.
70 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
71 * 2002 #line directives by Francesco Potortì.
73 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
77 * If you want to add support for a new language, start by looking at the LUA
78 * language, which is the simplest. Alternatively, consider distributing etags
79 * together with a configuration file containing regexp definitions for etags.
82 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
84 #define TRUE 1
85 #define FALSE 0
87 #ifdef DEBUG
88 # undef DEBUG
89 # define DEBUG TRUE
90 #else
91 # define DEBUG FALSE
92 # define NDEBUG /* disable assert */
93 #endif
95 #ifdef HAVE_CONFIG_H
96 # include <config.h>
97 /* On some systems, Emacs defines static as nothing for the sake
98 of unexec. We don't want that here since we don't use unexec. */
99 # undef static
100 # ifndef PTR /* for XEmacs */
101 # define PTR void *
102 # endif
103 # ifndef __P /* for XEmacs */
104 # define __P(args) args
105 # endif
106 #else /* no config.h */
107 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
108 # define __P(args) args /* use prototypes */
109 # define PTR void * /* for generic pointers */
110 # else /* not standard C */
111 # define __P(args) () /* no prototypes */
112 # define const /* remove const for old compilers' sake */
113 # define PTR long * /* don't use void* */
114 # endif
115 #endif /* !HAVE_CONFIG_H */
117 #ifndef _GNU_SOURCE
118 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
119 #endif
121 /* WIN32_NATIVE is for XEmacs.
122 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
123 #ifdef WIN32_NATIVE
124 # undef MSDOS
125 # undef WINDOWSNT
126 # define WINDOWSNT
127 #endif /* WIN32_NATIVE */
129 #ifdef MSDOS
130 # undef MSDOS
131 # define MSDOS TRUE
132 # include <fcntl.h>
133 # include <sys/param.h>
134 # include <io.h>
135 # ifndef HAVE_CONFIG_H
136 # define DOS_NT
137 # include <sys/config.h>
138 # endif
139 #else
140 # define MSDOS FALSE
141 #endif /* MSDOS */
143 #ifdef WINDOWSNT
144 # include <stdlib.h>
145 # include <fcntl.h>
146 # include <string.h>
147 # include <direct.h>
148 # include <io.h>
149 # define MAXPATHLEN _MAX_PATH
150 # undef HAVE_NTGUI
151 # undef DOS_NT
152 # define DOS_NT
153 # ifndef HAVE_GETCWD
154 # define HAVE_GETCWD
155 # endif /* undef HAVE_GETCWD */
156 #else /* not WINDOWSNT */
157 # ifdef STDC_HEADERS
158 # include <stdlib.h>
159 # include <string.h>
160 # else /* no standard C headers */
161 extern char *getenv __P((const char *));
162 extern char *strcpy __P((char *, const char *));
163 extern char *strncpy __P((char *, const char *, unsigned long));
164 extern char *strcat __P((char *, const char *));
165 extern char *strncat __P((char *, const char *, unsigned long));
166 extern int strcmp __P((const char *, const char *));
167 extern int strncmp __P((const char *, const char *, unsigned long));
168 extern int system __P((const char *));
169 extern unsigned long strlen __P((const char *));
170 extern void *malloc __P((unsigned long));
171 extern void *realloc __P((void *, unsigned long));
172 extern void exit __P((int));
173 extern void free __P((void *));
174 extern void *memmove __P((void *, const void *, unsigned long));
175 # define EXIT_SUCCESS 0
176 # define EXIT_FAILURE 1
177 # endif
178 #endif /* !WINDOWSNT */
180 #ifdef HAVE_UNISTD_H
181 # include <unistd.h>
182 #else
183 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
184 extern char *getcwd (char *buf, size_t size);
185 # endif
186 #endif /* HAVE_UNISTD_H */
188 #include <stdio.h>
189 #include <ctype.h>
190 #include <errno.h>
191 #ifndef errno
192 extern int errno;
193 #endif
194 #include <sys/types.h>
195 #include <sys/stat.h>
197 #include <assert.h>
198 #ifdef NDEBUG
199 # undef assert /* some systems have a buggy assert.h */
200 # define assert(x) ((void) 0)
201 #endif
203 #if !defined (S_ISREG) && defined (S_IFREG)
204 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
205 #endif
207 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
208 # define NO_LONG_OPTIONS TRUE
209 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
210 extern char *optarg;
211 extern int optind, opterr;
212 #else
213 # define NO_LONG_OPTIONS FALSE
214 # include <getopt.h>
215 #endif /* NO_LONG_OPTIONS */
217 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
218 # ifdef __CYGWIN__ /* compiling on Cygwin */
219 !!! NOTICE !!!
220 the regex.h distributed with Cygwin is not compatible with etags, alas!
221 If you want regular expression support, you should delete this notice and
222 arrange to use the GNU regex.h and regex.c.
223 # endif
224 #endif
225 #include <regex.h>
227 /* Define CTAGS to make the program "ctags" compatible with the usual one.
228 Leave it undefined to make the program "etags", which makes emacs-style
229 tag tables and tags typedefs, #defines and struct/union/enum by default. */
230 #ifdef CTAGS
231 # undef CTAGS
232 # define CTAGS TRUE
233 #else
234 # define CTAGS FALSE
235 #endif
237 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
238 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
239 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
240 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
242 #define CHARS 256 /* 2^sizeof(char) */
243 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
244 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
245 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
246 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
247 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
248 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
250 #define ISALNUM(c) isalnum (CHAR(c))
251 #define ISALPHA(c) isalpha (CHAR(c))
252 #define ISDIGIT(c) isdigit (CHAR(c))
253 #define ISLOWER(c) islower (CHAR(c))
255 #define lowcase(c) tolower (CHAR(c))
256 #define upcase(c) toupper (CHAR(c))
260 * xnew, xrnew -- allocate, reallocate storage
262 * SYNOPSIS: Type *xnew (int n, Type);
263 * void xrnew (OldPointer, int n, Type);
265 #if DEBUG
266 # include "chkmalloc.h"
267 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
268 (n) * sizeof (Type)))
269 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
270 (char *) (op), (n) * sizeof (Type)))
271 #else
272 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
273 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
274 (char *) (op), (n) * sizeof (Type)))
275 #endif
277 #define bool int
279 typedef void Lang_function __P((FILE *));
281 typedef struct
283 char *suffix; /* file name suffix for this compressor */
284 char *command; /* takes one arg and decompresses to stdout */
285 } compressor;
287 typedef struct
289 char *name; /* language name */
290 char *help; /* detailed help for the language */
291 Lang_function *function; /* parse function */
292 char **suffixes; /* name suffixes of this language's files */
293 char **filenames; /* names of this language's files */
294 char **interpreters; /* interpreters for this language */
295 bool metasource; /* source used to generate other sources */
296 } language;
298 typedef struct fdesc
300 struct fdesc *next; /* for the linked list */
301 char *infname; /* uncompressed input file name */
302 char *infabsname; /* absolute uncompressed input file name */
303 char *infabsdir; /* absolute dir of input file */
304 char *taggedfname; /* file name to write in tagfile */
305 language *lang; /* language of file */
306 char *prop; /* file properties to write in tagfile */
307 bool usecharno; /* etags tags shall contain char number */
308 bool written; /* entry written in the tags file */
309 } fdesc;
311 typedef struct node_st
312 { /* sorting structure */
313 struct node_st *left, *right; /* left and right sons */
314 fdesc *fdp; /* description of file to whom tag belongs */
315 char *name; /* tag name */
316 char *regex; /* search regexp */
317 bool valid; /* write this tag on the tag file */
318 bool is_func; /* function tag: use regexp in CTAGS mode */
319 bool been_warned; /* warning already given for duplicated tag */
320 int lno; /* line number tag is on */
321 long cno; /* character number line starts on */
322 } node;
325 * A `linebuffer' is a structure which holds a line of text.
326 * `readline_internal' reads a line from a stream into a linebuffer
327 * and works regardless of the length of the line.
328 * SIZE is the size of BUFFER, LEN is the length of the string in
329 * BUFFER after readline reads it.
331 typedef struct
333 long size;
334 int len;
335 char *buffer;
336 } linebuffer;
338 /* Used to support mixing of --lang and file names. */
339 typedef struct
341 enum {
342 at_language, /* a language specification */
343 at_regexp, /* a regular expression */
344 at_filename, /* a file name */
345 at_stdin, /* read from stdin here */
346 at_end /* stop parsing the list */
347 } arg_type; /* argument type */
348 language *lang; /* language associated with the argument */
349 char *what; /* the argument itself */
350 } argument;
352 /* Structure defining a regular expression. */
353 typedef struct regexp
355 struct regexp *p_next; /* pointer to next in list */
356 language *lang; /* if set, use only for this language */
357 char *pattern; /* the regexp pattern */
358 char *name; /* tag name */
359 struct re_pattern_buffer *pat; /* the compiled pattern */
360 struct re_registers regs; /* re registers */
361 bool error_signaled; /* already signaled for this regexp */
362 bool force_explicit_name; /* do not allow implict tag name */
363 bool ignore_case; /* ignore case when matching */
364 bool multi_line; /* do a multi-line match on the whole file */
365 } regexp;
368 /* Many compilers barf on this:
369 Lang_function Ada_funcs;
370 so let's write it this way */
371 static void Ada_funcs __P((FILE *));
372 static void Asm_labels __P((FILE *));
373 static void C_entries __P((int c_ext, FILE *));
374 static void default_C_entries __P((FILE *));
375 static void plain_C_entries __P((FILE *));
376 static void Cjava_entries __P((FILE *));
377 static void Cobol_paragraphs __P((FILE *));
378 static void Cplusplus_entries __P((FILE *));
379 static void Cstar_entries __P((FILE *));
380 static void Erlang_functions __P((FILE *));
381 static void Forth_words __P((FILE *));
382 static void Fortran_functions __P((FILE *));
383 static void HTML_labels __P((FILE *));
384 static void Lisp_functions __P((FILE *));
385 static void Lua_functions __P((FILE *));
386 static void Makefile_targets __P((FILE *));
387 static void Pascal_functions __P((FILE *));
388 static void Perl_functions __P((FILE *));
389 static void PHP_functions __P((FILE *));
390 static void PS_functions __P((FILE *));
391 static void Prolog_functions __P((FILE *));
392 static void Python_functions __P((FILE *));
393 static void Scheme_functions __P((FILE *));
394 static void TeX_commands __P((FILE *));
395 static void Texinfo_nodes __P((FILE *));
396 static void Yacc_entries __P((FILE *));
397 static void just_read_file __P((FILE *));
399 static void print_language_names __P((void));
400 static void print_version __P((void));
401 static void print_help __P((argument *));
402 int main __P((int, char **));
404 static compressor *get_compressor_from_suffix __P((char *, char **));
405 static language *get_language_from_langname __P((const char *));
406 static language *get_language_from_interpreter __P((char *));
407 static language *get_language_from_filename __P((char *, bool));
408 static void readline __P((linebuffer *, FILE *));
409 static long readline_internal __P((linebuffer *, FILE *));
410 static bool nocase_tail __P((char *));
411 static void get_tag __P((char *, char **));
413 static void analyse_regex __P((char *));
414 static void free_regexps __P((void));
415 static void regex_tag_multiline __P((void));
416 static void error __P((const char *, const char *));
417 static void suggest_asking_for_help __P((void));
418 void fatal __P((char *, char *));
419 static void pfatal __P((char *));
420 static void add_node __P((node *, node **));
422 static void init __P((void));
423 static void process_file_name __P((char *, language *));
424 static void process_file __P((FILE *, char *, language *));
425 static void find_entries __P((FILE *));
426 static void free_tree __P((node *));
427 static void free_fdesc __P((fdesc *));
428 static void pfnote __P((char *, bool, char *, int, int, long));
429 static void make_tag __P((char *, int, bool, char *, int, int, long));
430 static void invalidate_nodes __P((fdesc *, node **));
431 static void put_entries __P((node *));
433 static char *concat __P((char *, char *, char *));
434 static char *skip_spaces __P((char *));
435 static char *skip_non_spaces __P((char *));
436 static char *savenstr __P((char *, int));
437 static char *savestr __P((char *));
438 static char *etags_strchr __P((const char *, int));
439 static char *etags_strrchr __P((const char *, int));
440 static int etags_strcasecmp __P((const char *, const char *));
441 static int etags_strncasecmp __P((const char *, const char *, int));
442 static char *etags_getcwd __P((void));
443 static char *relative_filename __P((char *, char *));
444 static char *absolute_filename __P((char *, char *));
445 static char *absolute_dirname __P((char *, char *));
446 static bool filename_is_absolute __P((char *f));
447 static void canonicalize_filename __P((char *));
448 static void linebuffer_init __P((linebuffer *));
449 static void linebuffer_setlen __P((linebuffer *, int));
450 static PTR xmalloc __P((unsigned int));
451 static PTR xrealloc __P((char *, unsigned int));
454 static char searchar = '/'; /* use /.../ searches */
456 static char *tagfile; /* output file */
457 static char *progname; /* name this program was invoked with */
458 static char *cwd; /* current working directory */
459 static char *tagfiledir; /* directory of tagfile */
460 static FILE *tagf; /* ioptr for tags file */
462 static fdesc *fdhead; /* head of file description list */
463 static fdesc *curfdp; /* current file description */
464 static int lineno; /* line number of current line */
465 static long charno; /* current character number */
466 static long linecharno; /* charno of start of current line */
467 static char *dbp; /* pointer to start of current tag */
469 static const int invalidcharno = -1;
471 static node *nodehead; /* the head of the binary tree of tags */
472 static node *last_node; /* the last node created */
474 static linebuffer lb; /* the current line */
475 static linebuffer filebuf; /* a buffer containing the whole file */
476 static linebuffer token_name; /* a buffer containing a tag name */
478 /* boolean "functions" (see init) */
479 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
480 static char
481 /* white chars */
482 *white = " \f\t\n\r\v",
483 /* not in a name */
484 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
485 /* token ending chars */
486 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
487 /* token starting chars */
488 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
489 /* valid in-token chars */
490 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
492 static bool append_to_tagfile; /* -a: append to tags */
493 /* The next five default to TRUE in C and derived languages. */
494 static bool typedefs; /* -t: create tags for C and Ada typedefs */
495 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
496 /* 0 struct/enum/union decls, and C++ */
497 /* member functions. */
498 static bool constantypedefs; /* -d: create tags for C #define, enum */
499 /* constants and variables. */
500 /* -D: opposite of -d. Default under ctags. */
501 static bool globals; /* create tags for global variables */
502 static bool members; /* create tags for C member variables */
503 static bool declarations; /* --declarations: tag them and extern in C&Co*/
504 static bool no_line_directive; /* ignore #line directives (undocumented) */
505 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
506 static bool update; /* -u: update tags */
507 static bool vgrind_style; /* -v: create vgrind style index output */
508 static bool no_warnings; /* -w: suppress warnings (undocumented) */
509 static bool cxref_style; /* -x: create cxref style output */
510 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
511 static bool ignoreindent; /* -I: ignore indentation in C */
512 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
514 /* STDIN is defined in LynxOS system headers */
515 #ifdef STDIN
516 # undef STDIN
517 #endif
519 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
520 static bool parsing_stdin; /* --parse-stdin used */
522 static regexp *p_head; /* list of all regexps */
523 static bool need_filebuf; /* some regexes are multi-line */
525 static struct option longopts[] =
527 { "append", no_argument, NULL, 'a' },
528 { "packages-only", no_argument, &packages_only, TRUE },
529 { "c++", no_argument, NULL, 'C' },
530 { "declarations", no_argument, &declarations, TRUE },
531 { "no-line-directive", no_argument, &no_line_directive, TRUE },
532 { "no-duplicates", no_argument, &no_duplicates, TRUE },
533 { "help", no_argument, NULL, 'h' },
534 { "help", no_argument, NULL, 'H' },
535 { "ignore-indentation", no_argument, NULL, 'I' },
536 { "language", required_argument, NULL, 'l' },
537 { "members", no_argument, &members, TRUE },
538 { "no-members", no_argument, &members, FALSE },
539 { "output", required_argument, NULL, 'o' },
540 { "regex", required_argument, NULL, 'r' },
541 { "no-regex", no_argument, NULL, 'R' },
542 { "ignore-case-regex", required_argument, NULL, 'c' },
543 { "parse-stdin", required_argument, NULL, STDIN },
544 { "version", no_argument, NULL, 'V' },
546 #if CTAGS /* Ctags options */
547 { "backward-search", no_argument, NULL, 'B' },
548 { "cxref", no_argument, NULL, 'x' },
549 { "defines", no_argument, NULL, 'd' },
550 { "globals", no_argument, &globals, TRUE },
551 { "typedefs", no_argument, NULL, 't' },
552 { "typedefs-and-c++", no_argument, NULL, 'T' },
553 { "update", no_argument, NULL, 'u' },
554 { "vgrind", no_argument, NULL, 'v' },
555 { "no-warn", no_argument, NULL, 'w' },
557 #else /* Etags options */
558 { "no-defines", no_argument, NULL, 'D' },
559 { "no-globals", no_argument, &globals, FALSE },
560 { "include", required_argument, NULL, 'i' },
561 #endif
562 { NULL }
565 static compressor compressors[] =
567 { "z", "gzip -d -c"},
568 { "Z", "gzip -d -c"},
569 { "gz", "gzip -d -c"},
570 { "GZ", "gzip -d -c"},
571 { "bz2", "bzip2 -d -c" },
572 { NULL }
576 * Language stuff.
579 /* Ada code */
580 static char *Ada_suffixes [] =
581 { "ads", "adb", "ada", NULL };
582 static char Ada_help [] =
583 "In Ada code, functions, procedures, packages, tasks and types are\n\
584 tags. Use the `--packages-only' option to create tags for\n\
585 packages only.\n\
586 Ada tag names have suffixes indicating the type of entity:\n\
587 Entity type: Qualifier:\n\
588 ------------ ----------\n\
589 function /f\n\
590 procedure /p\n\
591 package spec /s\n\
592 package body /b\n\
593 type /t\n\
594 task /k\n\
595 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
596 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
597 will just search for any tag `bidule'.";
599 /* Assembly code */
600 static char *Asm_suffixes [] =
601 { "a", /* Unix assembler */
602 "asm", /* Microcontroller assembly */
603 "def", /* BSO/Tasking definition includes */
604 "inc", /* Microcontroller include files */
605 "ins", /* Microcontroller include files */
606 "s", "sa", /* Unix assembler */
607 "S", /* cpp-processed Unix assembler */
608 "src", /* BSO/Tasking C compiler output */
609 NULL
611 static char Asm_help [] =
612 "In assembler code, labels appearing at the beginning of a line,\n\
613 followed by a colon, are tags.";
616 /* Note that .c and .h can be considered C++, if the --c++ flag was
617 given, or if the `class' or `template' keywords are met inside the file.
618 That is why default_C_entries is called for these. */
619 static char *default_C_suffixes [] =
620 { "c", "h", NULL };
621 #if CTAGS /* C help for Ctags */
622 static char default_C_help [] =
623 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
624 Use -T to tag definitions of `struct', `union' and `enum'.\n\
625 Use -d to tag `#define' macro definitions and `enum' constants.\n\
626 Use --globals to tag global variables.\n\
627 You can tag function declarations and external variables by\n\
628 using `--declarations', and struct members by using `--members'.";
629 #else /* C help for Etags */
630 static char default_C_help [] =
631 "In C code, any C function or typedef is a tag, and so are\n\
632 definitions of `struct', `union' and `enum'. `#define' macro\n\
633 definitions and `enum' constants are tags unless you specify\n\
634 `--no-defines'. Global variables are tags unless you specify\n\
635 `--no-globals' and so are struct members unless you specify\n\
636 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
637 `--no-members' can make the tags table file much smaller.\n\
638 You can tag function declarations and external variables by\n\
639 using `--declarations'.";
640 #endif /* C help for Ctags and Etags */
642 static char *Cplusplus_suffixes [] =
643 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
644 "M", /* Objective C++ */
645 "pdb", /* Postscript with C syntax */
646 NULL };
647 static char Cplusplus_help [] =
648 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
649 --help --lang=c --lang=c++ for full help.)\n\
650 In addition to C tags, member functions are also recognized. Member\n\
651 variables are recognized unless you use the `--no-members' option.\n\
652 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
653 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
654 `operator+'.";
656 static char *Cjava_suffixes [] =
657 { "java", NULL };
658 static char Cjava_help [] =
659 "In Java code, all the tags constructs of C and C++ code are\n\
660 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
663 static char *Cobol_suffixes [] =
664 { "COB", "cob", NULL };
665 static char Cobol_help [] =
666 "In Cobol code, tags are paragraph names; that is, any word\n\
667 starting in column 8 and followed by a period.";
669 static char *Cstar_suffixes [] =
670 { "cs", "hs", NULL };
672 static char *Erlang_suffixes [] =
673 { "erl", "hrl", NULL };
674 static char Erlang_help [] =
675 "In Erlang code, the tags are the functions, records and macros\n\
676 defined in the file.";
678 char *Forth_suffixes [] =
679 { "fth", "tok", NULL };
680 static char Forth_help [] =
681 "In Forth code, tags are words defined by `:',\n\
682 constant, code, create, defer, value, variable, buffer:, field.";
684 static char *Fortran_suffixes [] =
685 { "F", "f", "f90", "for", NULL };
686 static char Fortran_help [] =
687 "In Fortran code, functions, subroutines and block data are tags.";
689 static char *HTML_suffixes [] =
690 { "htm", "html", "shtml", NULL };
691 static char HTML_help [] =
692 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
693 `h3' headers. Also, tags are `name=' in anchors and all\n\
694 occurrences of `id='.";
696 static char *Lisp_suffixes [] =
697 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
698 static char Lisp_help [] =
699 "In Lisp code, any function defined with `defun', any variable\n\
700 defined with `defvar' or `defconst', and in general the first\n\
701 argument of any expression that starts with `(def' in column zero\n\
702 is a tag.";
704 static char *Lua_suffixes [] =
705 { "lua", "LUA", NULL };
706 static char Lua_help [] =
707 "In Lua scripts, all functions are tags.";
709 static char *Makefile_filenames [] =
710 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
711 static char Makefile_help [] =
712 "In makefiles, targets are tags; additionally, variables are tags\n\
713 unless you specify `--no-globals'.";
715 static char *Objc_suffixes [] =
716 { "lm", /* Objective lex file */
717 "m", /* Objective C file */
718 NULL };
719 static char Objc_help [] =
720 "In Objective C code, tags include Objective C definitions for classes,\n\
721 class categories, methods and protocols. Tags for variables and\n\
722 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
723 (Use --help --lang=c --lang=objc --lang=java for full help.)";
725 static char *Pascal_suffixes [] =
726 { "p", "pas", NULL };
727 static char Pascal_help [] =
728 "In Pascal code, the tags are the functions and procedures defined\n\
729 in the file.";
730 /* " // this is for working around an Emacs highlighting bug... */
732 static char *Perl_suffixes [] =
733 { "pl", "pm", NULL };
734 static char *Perl_interpreters [] =
735 { "perl", "@PERL@", NULL };
736 static char Perl_help [] =
737 "In Perl code, the tags are the packages, subroutines and variables\n\
738 defined by the `package', `sub', `my' and `local' keywords. Use\n\
739 `--globals' if you want to tag global variables. Tags for\n\
740 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
741 defined in the default package is `main::SUB'.";
743 static char *PHP_suffixes [] =
744 { "php", "php3", "php4", NULL };
745 static char PHP_help [] =
746 "In PHP code, tags are functions, classes and defines. Unless you use\n\
747 the `--no-members' option, vars are tags too.";
749 static char *plain_C_suffixes [] =
750 { "pc", /* Pro*C file */
751 NULL };
753 static char *PS_suffixes [] =
754 { "ps", "psw", NULL }; /* .psw is for PSWrap */
755 static char PS_help [] =
756 "In PostScript code, the tags are the functions.";
758 static char *Prolog_suffixes [] =
759 { "prolog", NULL };
760 static char Prolog_help [] =
761 "In Prolog code, tags are predicates and rules at the beginning of\n\
762 line.";
764 static char *Python_suffixes [] =
765 { "py", NULL };
766 static char Python_help [] =
767 "In Python code, `def' or `class' at the beginning of a line\n\
768 generate a tag.";
770 /* Can't do the `SCM' or `scm' prefix with a version number. */
771 static char *Scheme_suffixes [] =
772 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
773 static char Scheme_help [] =
774 "In Scheme code, tags include anything defined with `def' or with a\n\
775 construct whose name starts with `def'. They also include\n\
776 variables set with `set!' at top level in the file.";
778 static char *TeX_suffixes [] =
779 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
780 static char TeX_help [] =
781 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
782 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
783 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
784 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
785 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
787 Other commands can be specified by setting the environment variable\n\
788 `TEXTAGS' to a colon-separated list like, for example,\n\
789 TEXTAGS=\"mycommand:myothercommand\".";
792 static char *Texinfo_suffixes [] =
793 { "texi", "texinfo", "txi", NULL };
794 static char Texinfo_help [] =
795 "for texinfo files, lines starting with @node are tagged.";
797 static char *Yacc_suffixes [] =
798 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
799 static char Yacc_help [] =
800 "In Bison or Yacc input files, each rule defines as a tag the\n\
801 nonterminal it constructs. The portions of the file that contain\n\
802 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
803 for full help).";
805 static char auto_help [] =
806 "`auto' is not a real language, it indicates to use\n\
807 a default language for files base on file name suffix and file contents.";
809 static char none_help [] =
810 "`none' is not a real language, it indicates to only do\n\
811 regexp processing on files.";
813 static char no_lang_help [] =
814 "No detailed help available for this language.";
818 * Table of languages.
820 * It is ok for a given function to be listed under more than one
821 * name. I just didn't.
824 static language lang_names [] =
826 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
827 { "asm", Asm_help, Asm_labels, Asm_suffixes },
828 { "c", default_C_help, default_C_entries, default_C_suffixes },
829 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
830 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
831 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
832 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
833 { "forth", Forth_help, Forth_words, Forth_suffixes },
834 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
835 { "html", HTML_help, HTML_labels, HTML_suffixes },
836 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
837 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
838 { "lua", Lua_help, Lua_functions, Lua_suffixes },
839 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
840 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
841 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
842 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
843 { "php", PHP_help, PHP_functions, PHP_suffixes },
844 { "postscript",PS_help, PS_functions, PS_suffixes },
845 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
846 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
847 { "python", Python_help, Python_functions, Python_suffixes },
848 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
849 { "tex", TeX_help, TeX_commands, TeX_suffixes },
850 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
851 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
852 { "auto", auto_help }, /* default guessing scheme */
853 { "none", none_help, just_read_file }, /* regexp matching only */
854 { NULL } /* end of list */
858 static void
859 print_language_names ()
861 language *lang;
862 char **name, **ext;
864 puts ("\nThese are the currently supported languages, along with the\n\
865 default file names and dot suffixes:");
866 for (lang = lang_names; lang->name != NULL; lang++)
868 printf (" %-*s", 10, lang->name);
869 if (lang->filenames != NULL)
870 for (name = lang->filenames; *name != NULL; name++)
871 printf (" %s", *name);
872 if (lang->suffixes != NULL)
873 for (ext = lang->suffixes; *ext != NULL; ext++)
874 printf (" .%s", *ext);
875 puts ("");
877 puts ("where `auto' means use default language for files based on file\n\
878 name suffix, and `none' means only do regexp processing on files.\n\
879 If no language is specified and no matching suffix is found,\n\
880 the first line of the file is read for a sharp-bang (#!) sequence\n\
881 followed by the name of an interpreter. If no such sequence is found,\n\
882 Fortran is tried first; if no tags are found, C is tried next.\n\
883 When parsing any C file, a \"class\" or \"template\" keyword\n\
884 switches to C++.");
885 puts ("Compressed files are supported using gzip and bzip2.\n\
887 For detailed help on a given language use, for example,\n\
888 etags --help --lang=ada.");
891 #ifndef EMACS_NAME
892 # define EMACS_NAME "standalone"
893 #endif
894 #ifndef VERSION
895 # define VERSION "17.38.1.4"
896 #endif
897 static void
898 print_version ()
900 /* Makes it easier to update automatically. */
901 char emacs_copyright[] = "Copyright (C) 2011 Free Software Foundation, Inc.";
903 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
904 puts (emacs_copyright);
905 puts ("This program is distributed under the terms in ETAGS.README");
907 exit (EXIT_SUCCESS);
910 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
911 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
912 #endif
914 static void
915 print_help (argbuffer)
916 argument *argbuffer;
918 bool help_for_lang = FALSE;
920 for (; argbuffer->arg_type != at_end; argbuffer++)
921 if (argbuffer->arg_type == at_language)
923 if (help_for_lang)
924 puts ("");
925 puts (argbuffer->lang->help);
926 help_for_lang = TRUE;
929 if (help_for_lang)
930 exit (EXIT_SUCCESS);
932 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
934 These are the options accepted by %s.\n", progname, progname);
935 if (NO_LONG_OPTIONS)
936 puts ("WARNING: long option names do not work with this executable,\n\
937 as it is not linked with GNU getopt.");
938 else
939 puts ("You may use unambiguous abbreviations for the long option names.");
940 puts (" A - as file name means read names from stdin (one per line).\n\
941 Absolute names are stored in the output file as they are.\n\
942 Relative ones are stored relative to the output file's directory.\n");
944 puts ("-a, --append\n\
945 Append tag entries to existing tags file.");
947 puts ("--packages-only\n\
948 For Ada files, only generate tags for packages.");
950 if (CTAGS)
951 puts ("-B, --backward-search\n\
952 Write the search commands for the tag entries using '?', the\n\
953 backward-search command instead of '/', the forward-search command.");
955 /* This option is mostly obsolete, because etags can now automatically
956 detect C++. Retained for backward compatibility and for debugging and
957 experimentation. In principle, we could want to tag as C++ even
958 before any "class" or "template" keyword.
959 puts ("-C, --c++\n\
960 Treat files whose name suffix defaults to C language as C++ files.");
963 puts ("--declarations\n\
964 In C and derived languages, create tags for function declarations,");
965 if (CTAGS)
966 puts ("\tand create tags for extern variables if --globals is used.");
967 else
968 puts
969 ("\tand create tags for extern variables unless --no-globals is used.");
971 if (CTAGS)
972 puts ("-d, --defines\n\
973 Create tag entries for C #define constants and enum constants, too.");
974 else
975 puts ("-D, --no-defines\n\
976 Don't create tag entries for C #define constants and enum constants.\n\
977 This makes the tags file smaller.");
979 if (!CTAGS)
980 puts ("-i FILE, --include=FILE\n\
981 Include a note in tag file indicating that, when searching for\n\
982 a tag, one should also consult the tags file FILE after\n\
983 checking the current file.");
985 puts ("-l LANG, --language=LANG\n\
986 Force the following files to be considered as written in the\n\
987 named language up to the next --language=LANG option.");
989 if (CTAGS)
990 puts ("--globals\n\
991 Create tag entries for global variables in some languages.");
992 else
993 puts ("--no-globals\n\
994 Do not create tag entries for global variables in some\n\
995 languages. This makes the tags file smaller.");
997 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
998 puts ("--no-line-directive\n\
999 Ignore #line preprocessor directives in C and derived languages.");
1001 if (CTAGS)
1002 puts ("--members\n\
1003 Create tag entries for members of structures in some languages.");
1004 else
1005 puts ("--no-members\n\
1006 Do not create tag entries for members of structures\n\
1007 in some languages.");
1009 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1010 Make a tag for each line matching a regular expression pattern\n\
1011 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1012 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1013 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1014 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1015 puts (" If TAGNAME/ is present, the tags created are named.\n\
1016 For example Tcl named tags can be created with:\n\
1017 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1018 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1019 `m' means to allow multi-line matches, `s' implies `m' and\n\
1020 causes dot to match any character, including newline.");
1022 puts ("-R, --no-regex\n\
1023 Don't create tags from regexps for the following files.");
1025 puts ("-I, --ignore-indentation\n\
1026 In C and C++ do not assume that a closing brace in the first\n\
1027 column is the final brace of a function or structure definition.");
1029 puts ("-o FILE, --output=FILE\n\
1030 Write the tags to FILE.");
1032 puts ("--parse-stdin=NAME\n\
1033 Read from standard input and record tags as belonging to file NAME.");
1035 if (CTAGS)
1037 puts ("-t, --typedefs\n\
1038 Generate tag entries for C and Ada typedefs.");
1039 puts ("-T, --typedefs-and-c++\n\
1040 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1041 and C++ member functions.");
1044 if (CTAGS)
1045 puts ("-u, --update\n\
1046 Update the tag entries for the given files, leaving tag\n\
1047 entries for other files in place. Currently, this is\n\
1048 implemented by deleting the existing entries for the given\n\
1049 files and then rewriting the new entries at the end of the\n\
1050 tags file. It is often faster to simply rebuild the entire\n\
1051 tag file than to use this.");
1053 if (CTAGS)
1055 puts ("-v, --vgrind\n\
1056 Print on the standard output an index of items intended for\n\
1057 human consumption, similar to the output of vgrind. The index\n\
1058 is sorted, and gives the page number of each item.");
1060 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1061 puts ("-w, --no-duplicates\n\
1062 Do not create duplicate tag entries, for compatibility with\n\
1063 traditional ctags.");
1065 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1066 puts ("-w, --no-warn\n\
1067 Suppress warning messages about duplicate tag entries.");
1069 puts ("-x, --cxref\n\
1070 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1071 The output uses line numbers instead of page numbers, but\n\
1072 beyond that the differences are cosmetic; try both to see\n\
1073 which you like.");
1076 puts ("-V, --version\n\
1077 Print the version of the program.\n\
1078 -h, --help\n\
1079 Print this help message.\n\
1080 Followed by one or more `--language' options prints detailed\n\
1081 help about tag generation for the specified languages.");
1083 print_language_names ();
1085 puts ("");
1086 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1088 exit (EXIT_SUCCESS);
1093 main (argc, argv)
1094 int argc;
1095 char *argv[];
1097 int i;
1098 unsigned int nincluded_files;
1099 char **included_files;
1100 argument *argbuffer;
1101 int current_arg, file_count;
1102 linebuffer filename_lb;
1103 bool help_asked = FALSE;
1104 char *optstring;
1105 int opt;
1108 #ifdef DOS_NT
1109 _fmode = O_BINARY; /* all of files are treated as binary files */
1110 #endif /* DOS_NT */
1112 progname = argv[0];
1113 nincluded_files = 0;
1114 included_files = xnew (argc, char *);
1115 current_arg = 0;
1116 file_count = 0;
1118 /* Allocate enough no matter what happens. Overkill, but each one
1119 is small. */
1120 argbuffer = xnew (argc, argument);
1123 * Always find typedefs and structure tags.
1124 * Also default to find macro constants, enum constants, struct
1125 * members and global variables. Do it for both etags and ctags.
1127 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1128 globals = members = TRUE;
1130 /* When the optstring begins with a '-' getopt_long does not rearrange the
1131 non-options arguments to be at the end, but leaves them alone. */
1132 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1133 "ac:Cf:Il:o:r:RSVhH",
1134 (CTAGS) ? "BxdtTuvw" : "Di:");
1136 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1137 switch (opt)
1139 case 0:
1140 /* If getopt returns 0, then it has already processed a
1141 long-named option. We should do nothing. */
1142 break;
1144 case 1:
1145 /* This means that a file name has been seen. Record it. */
1146 argbuffer[current_arg].arg_type = at_filename;
1147 argbuffer[current_arg].what = optarg;
1148 ++current_arg;
1149 ++file_count;
1150 break;
1152 case STDIN:
1153 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1154 argbuffer[current_arg].arg_type = at_stdin;
1155 argbuffer[current_arg].what = optarg;
1156 ++current_arg;
1157 ++file_count;
1158 if (parsing_stdin)
1159 fatal ("cannot parse standard input more than once", (char *)NULL);
1160 parsing_stdin = TRUE;
1161 break;
1163 /* Common options. */
1164 case 'a': append_to_tagfile = TRUE; break;
1165 case 'C': cplusplus = TRUE; break;
1166 case 'f': /* for compatibility with old makefiles */
1167 case 'o':
1168 if (tagfile)
1170 error ("-o option may only be given once.", (char *)NULL);
1171 suggest_asking_for_help ();
1172 /* NOTREACHED */
1174 tagfile = optarg;
1175 break;
1176 case 'I':
1177 case 'S': /* for backward compatibility */
1178 ignoreindent = TRUE;
1179 break;
1180 case 'l':
1182 language *lang = get_language_from_langname (optarg);
1183 if (lang != NULL)
1185 argbuffer[current_arg].lang = lang;
1186 argbuffer[current_arg].arg_type = at_language;
1187 ++current_arg;
1190 break;
1191 case 'c':
1192 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1193 optarg = concat (optarg, "i", ""); /* memory leak here */
1194 /* FALLTHRU */
1195 case 'r':
1196 argbuffer[current_arg].arg_type = at_regexp;
1197 argbuffer[current_arg].what = optarg;
1198 ++current_arg;
1199 break;
1200 case 'R':
1201 argbuffer[current_arg].arg_type = at_regexp;
1202 argbuffer[current_arg].what = NULL;
1203 ++current_arg;
1204 break;
1205 case 'V':
1206 print_version ();
1207 break;
1208 case 'h':
1209 case 'H':
1210 help_asked = TRUE;
1211 break;
1213 /* Etags options */
1214 case 'D': constantypedefs = FALSE; break;
1215 case 'i': included_files[nincluded_files++] = optarg; break;
1217 /* Ctags options. */
1218 case 'B': searchar = '?'; break;
1219 case 'd': constantypedefs = TRUE; break;
1220 case 't': typedefs = TRUE; break;
1221 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1222 case 'u': update = TRUE; break;
1223 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1224 case 'x': cxref_style = TRUE; break;
1225 case 'w': no_warnings = TRUE; break;
1226 default:
1227 suggest_asking_for_help ();
1228 /* NOTREACHED */
1231 /* No more options. Store the rest of arguments. */
1232 for (; optind < argc; optind++)
1234 argbuffer[current_arg].arg_type = at_filename;
1235 argbuffer[current_arg].what = argv[optind];
1236 ++current_arg;
1237 ++file_count;
1240 argbuffer[current_arg].arg_type = at_end;
1242 if (help_asked)
1243 print_help (argbuffer);
1244 /* NOTREACHED */
1246 if (nincluded_files == 0 && file_count == 0)
1248 error ("no input files specified.", (char *)NULL);
1249 suggest_asking_for_help ();
1250 /* NOTREACHED */
1253 if (tagfile == NULL)
1254 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1255 cwd = etags_getcwd (); /* the current working directory */
1256 if (cwd[strlen (cwd) - 1] != '/')
1258 char *oldcwd = cwd;
1259 cwd = concat (oldcwd, "/", "");
1260 free (oldcwd);
1263 /* Compute base directory for relative file names. */
1264 if (streq (tagfile, "-")
1265 || strneq (tagfile, "/dev/", 5))
1266 tagfiledir = cwd; /* relative file names are relative to cwd */
1267 else
1269 canonicalize_filename (tagfile);
1270 tagfiledir = absolute_dirname (tagfile, cwd);
1273 init (); /* set up boolean "functions" */
1275 linebuffer_init (&lb);
1276 linebuffer_init (&filename_lb);
1277 linebuffer_init (&filebuf);
1278 linebuffer_init (&token_name);
1280 if (!CTAGS)
1282 if (streq (tagfile, "-"))
1284 tagf = stdout;
1285 #ifdef DOS_NT
1286 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1287 doesn't take effect until after `stdout' is already open). */
1288 if (!isatty (fileno (stdout)))
1289 setmode (fileno (stdout), O_BINARY);
1290 #endif /* DOS_NT */
1292 else
1293 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1294 if (tagf == NULL)
1295 pfatal (tagfile);
1299 * Loop through files finding functions.
1301 for (i = 0; i < current_arg; i++)
1303 static language *lang; /* non-NULL if language is forced */
1304 char *this_file;
1306 switch (argbuffer[i].arg_type)
1308 case at_language:
1309 lang = argbuffer[i].lang;
1310 break;
1311 case at_regexp:
1312 analyse_regex (argbuffer[i].what);
1313 break;
1314 case at_filename:
1315 this_file = argbuffer[i].what;
1316 /* Input file named "-" means read file names from stdin
1317 (one per line) and use them. */
1318 if (streq (this_file, "-"))
1320 if (parsing_stdin)
1321 fatal ("cannot parse standard input AND read file names from it",
1322 (char *)NULL);
1323 while (readline_internal (&filename_lb, stdin) > 0)
1324 process_file_name (filename_lb.buffer, lang);
1326 else
1327 process_file_name (this_file, lang);
1328 break;
1329 case at_stdin:
1330 this_file = argbuffer[i].what;
1331 process_file (stdin, this_file, lang);
1332 break;
1336 free_regexps ();
1337 free (lb.buffer);
1338 free (filebuf.buffer);
1339 free (token_name.buffer);
1341 if (!CTAGS || cxref_style)
1343 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1344 put_entries (nodehead);
1345 free_tree (nodehead);
1346 nodehead = NULL;
1347 if (!CTAGS)
1349 fdesc *fdp;
1351 /* Output file entries that have no tags. */
1352 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1353 if (!fdp->written)
1354 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1356 while (nincluded_files-- > 0)
1357 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1359 if (fclose (tagf) == EOF)
1360 pfatal (tagfile);
1363 exit (EXIT_SUCCESS);
1366 /* From here on, we are in (CTAGS && !cxref_style) */
1367 if (update)
1369 char cmd[BUFSIZ];
1370 for (i = 0; i < current_arg; ++i)
1372 switch (argbuffer[i].arg_type)
1374 case at_filename:
1375 case at_stdin:
1376 break;
1377 default:
1378 continue; /* the for loop */
1380 sprintf (cmd,
1381 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1382 tagfile, argbuffer[i].what, tagfile);
1383 if (system (cmd) != EXIT_SUCCESS)
1384 fatal ("failed to execute shell command", (char *)NULL);
1386 append_to_tagfile = TRUE;
1389 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1390 if (tagf == NULL)
1391 pfatal (tagfile);
1392 put_entries (nodehead); /* write all the tags (CTAGS) */
1393 free_tree (nodehead);
1394 nodehead = NULL;
1395 if (fclose (tagf) == EOF)
1396 pfatal (tagfile);
1398 if (CTAGS)
1399 if (append_to_tagfile || update)
1401 char cmd[2*BUFSIZ+20];
1402 /* Maybe these should be used:
1403 setenv ("LC_COLLATE", "C", 1);
1404 setenv ("LC_ALL", "C", 1); */
1405 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1406 exit (system (cmd));
1408 return EXIT_SUCCESS;
1413 * Return a compressor given the file name. If EXTPTR is non-zero,
1414 * return a pointer into FILE where the compressor-specific
1415 * extension begins. If no compressor is found, NULL is returned
1416 * and EXTPTR is not significant.
1417 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1419 static compressor *
1420 get_compressor_from_suffix (file, extptr)
1421 char *file;
1422 char **extptr;
1424 compressor *compr;
1425 char *slash, *suffix;
1427 /* File has been processed by canonicalize_filename,
1428 so we don't need to consider backslashes on DOS_NT. */
1429 slash = etags_strrchr (file, '/');
1430 suffix = etags_strrchr (file, '.');
1431 if (suffix == NULL || suffix < slash)
1432 return NULL;
1433 if (extptr != NULL)
1434 *extptr = suffix;
1435 suffix += 1;
1436 /* Let those poor souls who live with DOS 8+3 file name limits get
1437 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1438 Only the first do loop is run if not MSDOS */
1441 for (compr = compressors; compr->suffix != NULL; compr++)
1442 if (streq (compr->suffix, suffix))
1443 return compr;
1444 if (!MSDOS)
1445 break; /* do it only once: not really a loop */
1446 if (extptr != NULL)
1447 *extptr = ++suffix;
1448 } while (*suffix != '\0');
1449 return NULL;
1455 * Return a language given the name.
1457 static language *
1458 get_language_from_langname (name)
1459 const char *name;
1461 language *lang;
1463 if (name == NULL)
1464 error ("empty language name", (char *)NULL);
1465 else
1467 for (lang = lang_names; lang->name != NULL; lang++)
1468 if (streq (name, lang->name))
1469 return lang;
1470 error ("unknown language \"%s\"", name);
1473 return NULL;
1478 * Return a language given the interpreter name.
1480 static language *
1481 get_language_from_interpreter (interpreter)
1482 char *interpreter;
1484 language *lang;
1485 char **iname;
1487 if (interpreter == NULL)
1488 return NULL;
1489 for (lang = lang_names; lang->name != NULL; lang++)
1490 if (lang->interpreters != NULL)
1491 for (iname = lang->interpreters; *iname != NULL; iname++)
1492 if (streq (*iname, interpreter))
1493 return lang;
1495 return NULL;
1501 * Return a language given the file name.
1503 static language *
1504 get_language_from_filename (file, case_sensitive)
1505 char *file;
1506 bool case_sensitive;
1508 language *lang;
1509 char **name, **ext, *suffix;
1511 /* Try whole file name first. */
1512 for (lang = lang_names; lang->name != NULL; lang++)
1513 if (lang->filenames != NULL)
1514 for (name = lang->filenames; *name != NULL; name++)
1515 if ((case_sensitive)
1516 ? streq (*name, file)
1517 : strcaseeq (*name, file))
1518 return lang;
1520 /* If not found, try suffix after last dot. */
1521 suffix = etags_strrchr (file, '.');
1522 if (suffix == NULL)
1523 return NULL;
1524 suffix += 1;
1525 for (lang = lang_names; lang->name != NULL; lang++)
1526 if (lang->suffixes != NULL)
1527 for (ext = lang->suffixes; *ext != NULL; ext++)
1528 if ((case_sensitive)
1529 ? streq (*ext, suffix)
1530 : strcaseeq (*ext, suffix))
1531 return lang;
1532 return NULL;
1537 * This routine is called on each file argument.
1539 static void
1540 process_file_name (file, lang)
1541 char *file;
1542 language *lang;
1544 struct stat stat_buf;
1545 FILE *inf;
1546 fdesc *fdp;
1547 compressor *compr;
1548 char *compressed_name, *uncompressed_name;
1549 char *ext, *real_name;
1550 int retval;
1552 canonicalize_filename (file);
1553 if (streq (file, tagfile) && !streq (tagfile, "-"))
1555 error ("skipping inclusion of %s in self.", file);
1556 return;
1558 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1560 compressed_name = NULL;
1561 real_name = uncompressed_name = savestr (file);
1563 else
1565 real_name = compressed_name = savestr (file);
1566 uncompressed_name = savenstr (file, ext - file);
1569 /* If the canonicalized uncompressed name
1570 has already been dealt with, skip it silently. */
1571 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1573 assert (fdp->infname != NULL);
1574 if (streq (uncompressed_name, fdp->infname))
1575 goto cleanup;
1578 if (stat (real_name, &stat_buf) != 0)
1580 /* Reset real_name and try with a different name. */
1581 real_name = NULL;
1582 if (compressed_name != NULL) /* try with the given suffix */
1584 if (stat (uncompressed_name, &stat_buf) == 0)
1585 real_name = uncompressed_name;
1587 else /* try all possible suffixes */
1589 for (compr = compressors; compr->suffix != NULL; compr++)
1591 compressed_name = concat (file, ".", compr->suffix);
1592 if (stat (compressed_name, &stat_buf) != 0)
1594 if (MSDOS)
1596 char *suf = compressed_name + strlen (file);
1597 size_t suflen = strlen (compr->suffix) + 1;
1598 for ( ; suf[1]; suf++, suflen--)
1600 memmove (suf, suf + 1, suflen);
1601 if (stat (compressed_name, &stat_buf) == 0)
1603 real_name = compressed_name;
1604 break;
1607 if (real_name != NULL)
1608 break;
1609 } /* MSDOS */
1610 free (compressed_name);
1611 compressed_name = NULL;
1613 else
1615 real_name = compressed_name;
1616 break;
1620 if (real_name == NULL)
1622 perror (file);
1623 goto cleanup;
1625 } /* try with a different name */
1627 if (!S_ISREG (stat_buf.st_mode))
1629 error ("skipping %s: it is not a regular file.", real_name);
1630 goto cleanup;
1632 if (real_name == compressed_name)
1634 char *cmd = concat (compr->command, " ", real_name);
1635 inf = (FILE *) popen (cmd, "r");
1636 free (cmd);
1638 else
1639 inf = fopen (real_name, "r");
1640 if (inf == NULL)
1642 perror (real_name);
1643 goto cleanup;
1646 process_file (inf, uncompressed_name, lang);
1648 if (real_name == compressed_name)
1649 retval = pclose (inf);
1650 else
1651 retval = fclose (inf);
1652 if (retval < 0)
1653 pfatal (file);
1655 cleanup:
1656 free (compressed_name);
1657 free (uncompressed_name);
1658 last_node = NULL;
1659 curfdp = NULL;
1660 return;
1663 static void
1664 process_file (fh, fn, lang)
1665 FILE *fh;
1666 char *fn;
1667 language *lang;
1669 static const fdesc emptyfdesc;
1670 fdesc *fdp;
1672 /* Create a new input file description entry. */
1673 fdp = xnew (1, fdesc);
1674 *fdp = emptyfdesc;
1675 fdp->next = fdhead;
1676 fdp->infname = savestr (fn);
1677 fdp->lang = lang;
1678 fdp->infabsname = absolute_filename (fn, cwd);
1679 fdp->infabsdir = absolute_dirname (fn, cwd);
1680 if (filename_is_absolute (fn))
1682 /* An absolute file name. Canonicalize it. */
1683 fdp->taggedfname = absolute_filename (fn, NULL);
1685 else
1687 /* A file name relative to cwd. Make it relative
1688 to the directory of the tags file. */
1689 fdp->taggedfname = relative_filename (fn, tagfiledir);
1691 fdp->usecharno = TRUE; /* use char position when making tags */
1692 fdp->prop = NULL;
1693 fdp->written = FALSE; /* not written on tags file yet */
1695 fdhead = fdp;
1696 curfdp = fdhead; /* the current file description */
1698 find_entries (fh);
1700 /* If not Ctags, and if this is not metasource and if it contained no #line
1701 directives, we can write the tags and free all nodes pointing to
1702 curfdp. */
1703 if (!CTAGS
1704 && curfdp->usecharno /* no #line directives in this file */
1705 && !curfdp->lang->metasource)
1707 node *np, *prev;
1709 /* Look for the head of the sublist relative to this file. See add_node
1710 for the structure of the node tree. */
1711 prev = NULL;
1712 for (np = nodehead; np != NULL; prev = np, np = np->left)
1713 if (np->fdp == curfdp)
1714 break;
1716 /* If we generated tags for this file, write and delete them. */
1717 if (np != NULL)
1719 /* This is the head of the last sublist, if any. The following
1720 instructions depend on this being true. */
1721 assert (np->left == NULL);
1723 assert (fdhead == curfdp);
1724 assert (last_node->fdp == curfdp);
1725 put_entries (np); /* write tags for file curfdp->taggedfname */
1726 free_tree (np); /* remove the written nodes */
1727 if (prev == NULL)
1728 nodehead = NULL; /* no nodes left */
1729 else
1730 prev->left = NULL; /* delete the pointer to the sublist */
1736 * This routine sets up the boolean pseudo-functions which work
1737 * by setting boolean flags dependent upon the corresponding character.
1738 * Every char which is NOT in that string is not a white char. Therefore,
1739 * all of the array "_wht" is set to FALSE, and then the elements
1740 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1741 * of a char is TRUE if it is the string "white", else FALSE.
1743 static void
1744 init ()
1746 register char *sp;
1747 register int i;
1749 for (i = 0; i < CHARS; i++)
1750 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1751 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1752 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1753 notinname('\0') = notinname('\n');
1754 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1755 begtoken('\0') = begtoken('\n');
1756 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1757 intoken('\0') = intoken('\n');
1758 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1759 endtoken('\0') = endtoken('\n');
1763 * This routine opens the specified file and calls the function
1764 * which finds the function and type definitions.
1766 static void
1767 find_entries (inf)
1768 FILE *inf;
1770 char *cp;
1771 language *lang = curfdp->lang;
1772 Lang_function *parser = NULL;
1774 /* If user specified a language, use it. */
1775 if (lang != NULL && lang->function != NULL)
1777 parser = lang->function;
1780 /* Else try to guess the language given the file name. */
1781 if (parser == NULL)
1783 lang = get_language_from_filename (curfdp->infname, TRUE);
1784 if (lang != NULL && lang->function != NULL)
1786 curfdp->lang = lang;
1787 parser = lang->function;
1791 /* Else look for sharp-bang as the first two characters. */
1792 if (parser == NULL
1793 && readline_internal (&lb, inf) > 0
1794 && lb.len >= 2
1795 && lb.buffer[0] == '#'
1796 && lb.buffer[1] == '!')
1798 char *lp;
1800 /* Set lp to point at the first char after the last slash in the
1801 line or, if no slashes, at the first nonblank. Then set cp to
1802 the first successive blank and terminate the string. */
1803 lp = etags_strrchr (lb.buffer+2, '/');
1804 if (lp != NULL)
1805 lp += 1;
1806 else
1807 lp = skip_spaces (lb.buffer + 2);
1808 cp = skip_non_spaces (lp);
1809 *cp = '\0';
1811 if (strlen (lp) > 0)
1813 lang = get_language_from_interpreter (lp);
1814 if (lang != NULL && lang->function != NULL)
1816 curfdp->lang = lang;
1817 parser = lang->function;
1822 /* We rewind here, even if inf may be a pipe. We fail if the
1823 length of the first line is longer than the pipe block size,
1824 which is unlikely. */
1825 rewind (inf);
1827 /* Else try to guess the language given the case insensitive file name. */
1828 if (parser == NULL)
1830 lang = get_language_from_filename (curfdp->infname, FALSE);
1831 if (lang != NULL && lang->function != NULL)
1833 curfdp->lang = lang;
1834 parser = lang->function;
1838 /* Else try Fortran or C. */
1839 if (parser == NULL)
1841 node *old_last_node = last_node;
1843 curfdp->lang = get_language_from_langname ("fortran");
1844 find_entries (inf);
1846 if (old_last_node == last_node)
1847 /* No Fortran entries found. Try C. */
1849 /* We do not tag if rewind fails.
1850 Only the file name will be recorded in the tags file. */
1851 rewind (inf);
1852 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1853 find_entries (inf);
1855 return;
1858 if (!no_line_directive
1859 && curfdp->lang != NULL && curfdp->lang->metasource)
1860 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1861 file, or anyway we parsed a file that is automatically generated from
1862 this one. If this is the case, the bingo.c file contained #line
1863 directives that generated tags pointing to this file. Let's delete
1864 them all before parsing this file, which is the real source. */
1866 fdesc **fdpp = &fdhead;
1867 while (*fdpp != NULL)
1868 if (*fdpp != curfdp
1869 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1870 /* We found one of those! We must delete both the file description
1871 and all tags referring to it. */
1873 fdesc *badfdp = *fdpp;
1875 /* Delete the tags referring to badfdp->taggedfname
1876 that were obtained from badfdp->infname. */
1877 invalidate_nodes (badfdp, &nodehead);
1879 *fdpp = badfdp->next; /* remove the bad description from the list */
1880 free_fdesc (badfdp);
1882 else
1883 fdpp = &(*fdpp)->next; /* advance the list pointer */
1886 assert (parser != NULL);
1888 /* Generic initialisations before reading from file. */
1889 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1891 /* Generic initialisations before parsing file with readline. */
1892 lineno = 0; /* reset global line number */
1893 charno = 0; /* reset global char number */
1894 linecharno = 0; /* reset global char number of line start */
1896 parser (inf);
1898 regex_tag_multiline ();
1903 * Check whether an implicitly named tag should be created,
1904 * then call `pfnote'.
1905 * NAME is a string that is internally copied by this function.
1907 * TAGS format specification
1908 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1909 * The following is explained in some more detail in etc/ETAGS.EBNF.
1911 * make_tag creates tags with "implicit tag names" (unnamed tags)
1912 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1913 * 1. NAME does not contain any of the characters in NONAM;
1914 * 2. LINESTART contains name as either a rightmost, or rightmost but
1915 * one character, substring;
1916 * 3. the character, if any, immediately before NAME in LINESTART must
1917 * be a character in NONAM;
1918 * 4. the character, if any, immediately after NAME in LINESTART must
1919 * also be a character in NONAM.
1921 * The implementation uses the notinname() macro, which recognises the
1922 * characters stored in the string `nonam'.
1923 * etags.el needs to use the same characters that are in NONAM.
1925 static void
1926 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1927 char *name; /* tag name, or NULL if unnamed */
1928 int namelen; /* tag length */
1929 bool is_func; /* tag is a function */
1930 char *linestart; /* start of the line where tag is */
1931 int linelen; /* length of the line where tag is */
1932 int lno; /* line number */
1933 long cno; /* character number */
1935 bool named = (name != NULL && namelen > 0);
1937 if (!CTAGS && named) /* maybe set named to false */
1938 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1939 such that etags.el can guess a name from it. */
1941 int i;
1942 register char *cp = name;
1944 for (i = 0; i < namelen; i++)
1945 if (notinname (*cp++))
1946 break;
1947 if (i == namelen) /* rule #1 */
1949 cp = linestart + linelen - namelen;
1950 if (notinname (linestart[linelen-1]))
1951 cp -= 1; /* rule #4 */
1952 if (cp >= linestart /* rule #2 */
1953 && (cp == linestart
1954 || notinname (cp[-1])) /* rule #3 */
1955 && strneq (name, cp, namelen)) /* rule #2 */
1956 named = FALSE; /* use implicit tag name */
1960 if (named)
1961 name = savenstr (name, namelen);
1962 else
1963 name = NULL;
1964 pfnote (name, is_func, linestart, linelen, lno, cno);
1967 /* Record a tag. */
1968 static void
1969 pfnote (name, is_func, linestart, linelen, lno, cno)
1970 char *name; /* tag name, or NULL if unnamed */
1971 bool is_func; /* tag is a function */
1972 char *linestart; /* start of the line where tag is */
1973 int linelen; /* length of the line where tag is */
1974 int lno; /* line number */
1975 long cno; /* character number */
1977 register node *np;
1979 assert (name == NULL || name[0] != '\0');
1980 if (CTAGS && name == NULL)
1981 return;
1983 np = xnew (1, node);
1985 /* If ctags mode, change name "main" to M<thisfilename>. */
1986 if (CTAGS && !cxref_style && streq (name, "main"))
1988 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1989 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1990 fp = etags_strrchr (np->name, '.');
1991 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1992 fp[0] = '\0';
1994 else
1995 np->name = name;
1996 np->valid = TRUE;
1997 np->been_warned = FALSE;
1998 np->fdp = curfdp;
1999 np->is_func = is_func;
2000 np->lno = lno;
2001 if (np->fdp->usecharno)
2002 /* Our char numbers are 0-base, because of C language tradition?
2003 ctags compatibility? old versions compatibility? I don't know.
2004 Anyway, since emacs's are 1-base we expect etags.el to take care
2005 of the difference. If we wanted to have 1-based numbers, we would
2006 uncomment the +1 below. */
2007 np->cno = cno /* + 1 */ ;
2008 else
2009 np->cno = invalidcharno;
2010 np->left = np->right = NULL;
2011 if (CTAGS && !cxref_style)
2013 if (strlen (linestart) < 50)
2014 np->regex = concat (linestart, "$", "");
2015 else
2016 np->regex = savenstr (linestart, 50);
2018 else
2019 np->regex = savenstr (linestart, linelen);
2021 add_node (np, &nodehead);
2025 * free_tree ()
2026 * recurse on left children, iterate on right children.
2028 static void
2029 free_tree (np)
2030 register node *np;
2032 while (np)
2034 register node *node_right = np->right;
2035 free_tree (np->left);
2036 free (np->name);
2037 free (np->regex);
2038 free (np);
2039 np = node_right;
2044 * free_fdesc ()
2045 * delete a file description
2047 static void
2048 free_fdesc (fdp)
2049 register fdesc *fdp;
2051 free (fdp->infname);
2052 free (fdp->infabsname);
2053 free (fdp->infabsdir);
2054 free (fdp->taggedfname);
2055 free (fdp->prop);
2056 free (fdp);
2060 * add_node ()
2061 * Adds a node to the tree of nodes. In etags mode, sort by file
2062 * name. In ctags mode, sort by tag name. Make no attempt at
2063 * balancing.
2065 * add_node is the only function allowed to add nodes, so it can
2066 * maintain state.
2068 static void
2069 add_node (np, cur_node_p)
2070 node *np, **cur_node_p;
2072 register int dif;
2073 register node *cur_node = *cur_node_p;
2075 if (cur_node == NULL)
2077 *cur_node_p = np;
2078 last_node = np;
2079 return;
2082 if (!CTAGS)
2083 /* Etags Mode */
2085 /* For each file name, tags are in a linked sublist on the right
2086 pointer. The first tags of different files are a linked list
2087 on the left pointer. last_node points to the end of the last
2088 used sublist. */
2089 if (last_node != NULL && last_node->fdp == np->fdp)
2091 /* Let's use the same sublist as the last added node. */
2092 assert (last_node->right == NULL);
2093 last_node->right = np;
2094 last_node = np;
2096 else if (cur_node->fdp == np->fdp)
2098 /* Scanning the list we found the head of a sublist which is
2099 good for us. Let's scan this sublist. */
2100 add_node (np, &cur_node->right);
2102 else
2103 /* The head of this sublist is not good for us. Let's try the
2104 next one. */
2105 add_node (np, &cur_node->left);
2106 } /* if ETAGS mode */
2108 else
2110 /* Ctags Mode */
2111 dif = strcmp (np->name, cur_node->name);
2114 * If this tag name matches an existing one, then
2115 * do not add the node, but maybe print a warning.
2117 if (no_duplicates && !dif)
2119 if (np->fdp == cur_node->fdp)
2121 if (!no_warnings)
2123 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2124 np->fdp->infname, lineno, np->name);
2125 fprintf (stderr, "Second entry ignored\n");
2128 else if (!cur_node->been_warned && !no_warnings)
2130 fprintf
2131 (stderr,
2132 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2133 np->fdp->infname, cur_node->fdp->infname, np->name);
2134 cur_node->been_warned = TRUE;
2136 return;
2139 /* Actually add the node */
2140 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2141 } /* if CTAGS mode */
2145 * invalidate_nodes ()
2146 * Scan the node tree and invalidate all nodes pointing to the
2147 * given file description (CTAGS case) or free them (ETAGS case).
2149 static void
2150 invalidate_nodes (badfdp, npp)
2151 fdesc *badfdp;
2152 node **npp;
2154 node *np = *npp;
2156 if (np == NULL)
2157 return;
2159 if (CTAGS)
2161 if (np->left != NULL)
2162 invalidate_nodes (badfdp, &np->left);
2163 if (np->fdp == badfdp)
2164 np->valid = FALSE;
2165 if (np->right != NULL)
2166 invalidate_nodes (badfdp, &np->right);
2168 else
2170 assert (np->fdp != NULL);
2171 if (np->fdp == badfdp)
2173 *npp = np->left; /* detach the sublist from the list */
2174 np->left = NULL; /* isolate it */
2175 free_tree (np); /* free it */
2176 invalidate_nodes (badfdp, npp);
2178 else
2179 invalidate_nodes (badfdp, &np->left);
2184 static int total_size_of_entries __P((node *));
2185 static int number_len __P((long));
2187 /* Length of a non-negative number's decimal representation. */
2188 static int
2189 number_len (num)
2190 long num;
2192 int len = 1;
2193 while ((num /= 10) > 0)
2194 len += 1;
2195 return len;
2199 * Return total number of characters that put_entries will output for
2200 * the nodes in the linked list at the right of the specified node.
2201 * This count is irrelevant with etags.el since emacs 19.34 at least,
2202 * but is still supplied for backward compatibility.
2204 static int
2205 total_size_of_entries (np)
2206 register node *np;
2208 register int total = 0;
2210 for (; np != NULL; np = np->right)
2211 if (np->valid)
2213 total += strlen (np->regex) + 1; /* pat\177 */
2214 if (np->name != NULL)
2215 total += strlen (np->name) + 1; /* name\001 */
2216 total += number_len ((long) np->lno) + 1; /* lno, */
2217 if (np->cno != invalidcharno) /* cno */
2218 total += number_len (np->cno);
2219 total += 1; /* newline */
2222 return total;
2225 static void
2226 put_entries (np)
2227 register node *np;
2229 register char *sp;
2230 static fdesc *fdp = NULL;
2232 if (np == NULL)
2233 return;
2235 /* Output subentries that precede this one */
2236 if (CTAGS)
2237 put_entries (np->left);
2239 /* Output this entry */
2240 if (np->valid)
2242 if (!CTAGS)
2244 /* Etags mode */
2245 if (fdp != np->fdp)
2247 fdp = np->fdp;
2248 fprintf (tagf, "\f\n%s,%d\n",
2249 fdp->taggedfname, total_size_of_entries (np));
2250 fdp->written = TRUE;
2252 fputs (np->regex, tagf);
2253 fputc ('\177', tagf);
2254 if (np->name != NULL)
2256 fputs (np->name, tagf);
2257 fputc ('\001', tagf);
2259 fprintf (tagf, "%d,", np->lno);
2260 if (np->cno != invalidcharno)
2261 fprintf (tagf, "%ld", np->cno);
2262 fputs ("\n", tagf);
2264 else
2266 /* Ctags mode */
2267 if (np->name == NULL)
2268 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2270 if (cxref_style)
2272 if (vgrind_style)
2273 fprintf (stdout, "%s %s %d\n",
2274 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2275 else
2276 fprintf (stdout, "%-16s %3d %-16s %s\n",
2277 np->name, np->lno, np->fdp->taggedfname, np->regex);
2279 else
2281 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2283 if (np->is_func)
2284 { /* function or #define macro with args */
2285 putc (searchar, tagf);
2286 putc ('^', tagf);
2288 for (sp = np->regex; *sp; sp++)
2290 if (*sp == '\\' || *sp == searchar)
2291 putc ('\\', tagf);
2292 putc (*sp, tagf);
2294 putc (searchar, tagf);
2296 else
2297 { /* anything else; text pattern inadequate */
2298 fprintf (tagf, "%d", np->lno);
2300 putc ('\n', tagf);
2303 } /* if this node contains a valid tag */
2305 /* Output subentries that follow this one */
2306 put_entries (np->right);
2307 if (!CTAGS)
2308 put_entries (np->left);
2312 /* C extensions. */
2313 #define C_EXT 0x00fff /* C extensions */
2314 #define C_PLAIN 0x00000 /* C */
2315 #define C_PLPL 0x00001 /* C++ */
2316 #define C_STAR 0x00003 /* C* */
2317 #define C_JAVA 0x00005 /* JAVA */
2318 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2319 #define YACC 0x10000 /* yacc file */
2322 * The C symbol tables.
2324 enum sym_type
2326 st_none,
2327 st_C_objprot, st_C_objimpl, st_C_objend,
2328 st_C_gnumacro,
2329 st_C_ignore, st_C_attribute,
2330 st_C_javastruct,
2331 st_C_operator,
2332 st_C_class, st_C_template,
2333 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2336 static unsigned int hash __P((const char *, unsigned int));
2337 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2338 static enum sym_type C_symtype __P((char *, int, int));
2340 /* Feed stuff between (but not including) %[ and %] lines to:
2341 gperf -m 5
2343 %compare-strncmp
2344 %enum
2345 %struct-type
2346 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2348 if, 0, st_C_ignore
2349 for, 0, st_C_ignore
2350 while, 0, st_C_ignore
2351 switch, 0, st_C_ignore
2352 return, 0, st_C_ignore
2353 __attribute__, 0, st_C_attribute
2354 GTY, 0, st_C_attribute
2355 @interface, 0, st_C_objprot
2356 @protocol, 0, st_C_objprot
2357 @implementation,0, st_C_objimpl
2358 @end, 0, st_C_objend
2359 import, (C_JAVA & ~C_PLPL), st_C_ignore
2360 package, (C_JAVA & ~C_PLPL), st_C_ignore
2361 friend, C_PLPL, st_C_ignore
2362 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2363 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2364 interface, (C_JAVA & ~C_PLPL), st_C_struct
2365 class, 0, st_C_class
2366 namespace, C_PLPL, st_C_struct
2367 domain, C_STAR, st_C_struct
2368 union, 0, st_C_struct
2369 struct, 0, st_C_struct
2370 extern, 0, st_C_extern
2371 enum, 0, st_C_enum
2372 typedef, 0, st_C_typedef
2373 define, 0, st_C_define
2374 undef, 0, st_C_define
2375 operator, C_PLPL, st_C_operator
2376 template, 0, st_C_template
2377 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2378 DEFUN, 0, st_C_gnumacro
2379 SYSCALL, 0, st_C_gnumacro
2380 ENTRY, 0, st_C_gnumacro
2381 PSEUDO, 0, st_C_gnumacro
2382 # These are defined inside C functions, so currently they are not met.
2383 # EXFUN used in glibc, DEFVAR_* in emacs.
2384 #EXFUN, 0, st_C_gnumacro
2385 #DEFVAR_, 0, st_C_gnumacro
2387 and replace lines between %< and %> with its output, then:
2388 - remove the #if characterset check
2389 - make in_word_set static and not inline. */
2390 /*%<*/
2391 /* C code produced by gperf version 3.0.1 */
2392 /* Command-line: gperf -m 5 */
2393 /* Computed positions: -k'2-3' */
2395 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2396 /* maximum key range = 33, duplicates = 0 */
2398 #ifdef __GNUC__
2399 __inline
2400 #else
2401 #ifdef __cplusplus
2402 inline
2403 #endif
2404 #endif
2405 static unsigned int
2406 hash (str, len)
2407 register const char *str;
2408 register unsigned int len;
2410 static unsigned char asso_values[] =
2412 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2413 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2414 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2415 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2416 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2417 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2418 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2419 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2420 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2421 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2422 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2423 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2424 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2425 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2426 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2427 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2428 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2429 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2430 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2431 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2432 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2433 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2434 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2435 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2436 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2437 35, 35, 35, 35, 35, 35
2439 register int hval = len;
2441 switch (hval)
2443 default:
2444 hval += asso_values[(unsigned char)str[2]];
2445 /*FALLTHROUGH*/
2446 case 2:
2447 hval += asso_values[(unsigned char)str[1]];
2448 break;
2450 return hval;
2453 static struct C_stab_entry *
2454 in_word_set (str, len)
2455 register const char *str;
2456 register unsigned int len;
2458 enum
2460 TOTAL_KEYWORDS = 33,
2461 MIN_WORD_LENGTH = 2,
2462 MAX_WORD_LENGTH = 15,
2463 MIN_HASH_VALUE = 2,
2464 MAX_HASH_VALUE = 34
2467 static struct C_stab_entry wordlist[] =
2469 {""}, {""},
2470 {"if", 0, st_C_ignore},
2471 {"GTY", 0, st_C_attribute},
2472 {"@end", 0, st_C_objend},
2473 {"union", 0, st_C_struct},
2474 {"define", 0, st_C_define},
2475 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2476 {"template", 0, st_C_template},
2477 {"operator", C_PLPL, st_C_operator},
2478 {"@interface", 0, st_C_objprot},
2479 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2480 {"friend", C_PLPL, st_C_ignore},
2481 {"typedef", 0, st_C_typedef},
2482 {"return", 0, st_C_ignore},
2483 {"@implementation",0, st_C_objimpl},
2484 {"@protocol", 0, st_C_objprot},
2485 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2486 {"extern", 0, st_C_extern},
2487 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2488 {"struct", 0, st_C_struct},
2489 {"domain", C_STAR, st_C_struct},
2490 {"switch", 0, st_C_ignore},
2491 {"enum", 0, st_C_enum},
2492 {"for", 0, st_C_ignore},
2493 {"namespace", C_PLPL, st_C_struct},
2494 {"class", 0, st_C_class},
2495 {"while", 0, st_C_ignore},
2496 {"undef", 0, st_C_define},
2497 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2498 {"__attribute__", 0, st_C_attribute},
2499 {"SYSCALL", 0, st_C_gnumacro},
2500 {"ENTRY", 0, st_C_gnumacro},
2501 {"PSEUDO", 0, st_C_gnumacro},
2502 {"DEFUN", 0, st_C_gnumacro}
2505 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2507 register int key = hash (str, len);
2509 if (key <= MAX_HASH_VALUE && key >= 0)
2511 register const char *s = wordlist[key].name;
2513 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2514 return &wordlist[key];
2517 return 0;
2519 /*%>*/
2521 static enum sym_type
2522 C_symtype (str, len, c_ext)
2523 char *str;
2524 int len;
2525 int c_ext;
2527 register struct C_stab_entry *se = in_word_set (str, len);
2529 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2530 return st_none;
2531 return se->type;
2536 * Ignoring __attribute__ ((list))
2538 static bool inattribute; /* looking at an __attribute__ construct */
2541 * C functions and variables are recognized using a simple
2542 * finite automaton. fvdef is its state variable.
2544 static enum
2546 fvnone, /* nothing seen */
2547 fdefunkey, /* Emacs DEFUN keyword seen */
2548 fdefunname, /* Emacs DEFUN name seen */
2549 foperator, /* func: operator keyword seen (cplpl) */
2550 fvnameseen, /* function or variable name seen */
2551 fstartlist, /* func: just after open parenthesis */
2552 finlist, /* func: in parameter list */
2553 flistseen, /* func: after parameter list */
2554 fignore, /* func: before open brace */
2555 vignore /* var-like: ignore until ';' */
2556 } fvdef;
2558 static bool fvextern; /* func or var: extern keyword seen; */
2561 * typedefs are recognized using a simple finite automaton.
2562 * typdef is its state variable.
2564 static enum
2566 tnone, /* nothing seen */
2567 tkeyseen, /* typedef keyword seen */
2568 ttypeseen, /* defined type seen */
2569 tinbody, /* inside typedef body */
2570 tend, /* just before typedef tag */
2571 tignore /* junk after typedef tag */
2572 } typdef;
2575 * struct-like structures (enum, struct and union) are recognized
2576 * using another simple finite automaton. `structdef' is its state
2577 * variable.
2579 static enum
2581 snone, /* nothing seen yet,
2582 or in struct body if bracelev > 0 */
2583 skeyseen, /* struct-like keyword seen */
2584 stagseen, /* struct-like tag seen */
2585 scolonseen /* colon seen after struct-like tag */
2586 } structdef;
2589 * When objdef is different from onone, objtag is the name of the class.
2591 static char *objtag = "<uninited>";
2594 * Yet another little state machine to deal with preprocessor lines.
2596 static enum
2598 dnone, /* nothing seen */
2599 dsharpseen, /* '#' seen as first char on line */
2600 ddefineseen, /* '#' and 'define' seen */
2601 dignorerest /* ignore rest of line */
2602 } definedef;
2605 * State machine for Objective C protocols and implementations.
2606 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2608 static enum
2610 onone, /* nothing seen */
2611 oprotocol, /* @interface or @protocol seen */
2612 oimplementation, /* @implementations seen */
2613 otagseen, /* class name seen */
2614 oparenseen, /* parenthesis before category seen */
2615 ocatseen, /* category name seen */
2616 oinbody, /* in @implementation body */
2617 omethodsign, /* in @implementation body, after +/- */
2618 omethodtag, /* after method name */
2619 omethodcolon, /* after method colon */
2620 omethodparm, /* after method parameter */
2621 oignore /* wait for @end */
2622 } objdef;
2626 * Use this structure to keep info about the token read, and how it
2627 * should be tagged. Used by the make_C_tag function to build a tag.
2629 static struct tok
2631 char *line; /* string containing the token */
2632 int offset; /* where the token starts in LINE */
2633 int length; /* token length */
2635 The previous members can be used to pass strings around for generic
2636 purposes. The following ones specifically refer to creating tags. In this
2637 case the token contained here is the pattern that will be used to create a
2638 tag.
2640 bool valid; /* do not create a tag; the token should be
2641 invalidated whenever a state machine is
2642 reset prematurely */
2643 bool named; /* create a named tag */
2644 int lineno; /* source line number of tag */
2645 long linepos; /* source char number of tag */
2646 } token; /* latest token read */
2649 * Variables and functions for dealing with nested structures.
2650 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2652 static void pushclass_above __P((int, char *, int));
2653 static void popclass_above __P((int));
2654 static void write_classname __P((linebuffer *, char *qualifier));
2656 static struct {
2657 char **cname; /* nested class names */
2658 int *bracelev; /* nested class brace level */
2659 int nl; /* class nesting level (elements used) */
2660 int size; /* length of the array */
2661 } cstack; /* stack for nested declaration tags */
2662 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2663 #define nestlev (cstack.nl)
2664 /* After struct keyword or in struct body, not inside a nested function. */
2665 #define instruct (structdef == snone && nestlev > 0 \
2666 && bracelev == cstack.bracelev[nestlev-1] + 1)
2668 static void
2669 pushclass_above (bracelev, str, len)
2670 int bracelev;
2671 char *str;
2672 int len;
2674 int nl;
2676 popclass_above (bracelev);
2677 nl = cstack.nl;
2678 if (nl >= cstack.size)
2680 int size = cstack.size *= 2;
2681 xrnew (cstack.cname, size, char *);
2682 xrnew (cstack.bracelev, size, int);
2684 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2685 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2686 cstack.bracelev[nl] = bracelev;
2687 cstack.nl = nl + 1;
2690 static void
2691 popclass_above (bracelev)
2692 int bracelev;
2694 int nl;
2696 for (nl = cstack.nl - 1;
2697 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2698 nl--)
2700 free (cstack.cname[nl]);
2701 cstack.nl = nl;
2705 static void
2706 write_classname (cn, qualifier)
2707 linebuffer *cn;
2708 char *qualifier;
2710 int i, len;
2711 int qlen = strlen (qualifier);
2713 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2715 len = 0;
2716 cn->len = 0;
2717 cn->buffer[0] = '\0';
2719 else
2721 len = strlen (cstack.cname[0]);
2722 linebuffer_setlen (cn, len);
2723 strcpy (cn->buffer, cstack.cname[0]);
2725 for (i = 1; i < cstack.nl; i++)
2727 char *s;
2728 int slen;
2730 s = cstack.cname[i];
2731 if (s == NULL)
2732 continue;
2733 slen = strlen (s);
2734 len += slen + qlen;
2735 linebuffer_setlen (cn, len);
2736 strncat (cn->buffer, qualifier, qlen);
2737 strncat (cn->buffer, s, slen);
2742 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2743 static void make_C_tag __P((bool));
2746 * consider_token ()
2747 * checks to see if the current token is at the start of a
2748 * function or variable, or corresponds to a typedef, or
2749 * is a struct/union/enum tag, or #define, or an enum constant.
2751 * *IS_FUNC gets TRUE if the token is a function or #define macro
2752 * with args. C_EXTP points to which language we are looking at.
2754 * Globals
2755 * fvdef IN OUT
2756 * structdef IN OUT
2757 * definedef IN OUT
2758 * typdef IN OUT
2759 * objdef IN OUT
2762 static bool
2763 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2764 register char *str; /* IN: token pointer */
2765 register int len; /* IN: token length */
2766 register int c; /* IN: first char after the token */
2767 int *c_extp; /* IN, OUT: C extensions mask */
2768 int bracelev; /* IN: brace level */
2769 int parlev; /* IN: parenthesis level */
2770 bool *is_func_or_var; /* OUT: function or variable found */
2772 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2773 structtype is the type of the preceding struct-like keyword, and
2774 structbracelev is the brace level where it has been seen. */
2775 static enum sym_type structtype;
2776 static int structbracelev;
2777 static enum sym_type toktype;
2780 toktype = C_symtype (str, len, *c_extp);
2783 * Skip __attribute__
2785 if (toktype == st_C_attribute)
2787 inattribute = TRUE;
2788 return FALSE;
2792 * Advance the definedef state machine.
2794 switch (definedef)
2796 case dnone:
2797 /* We're not on a preprocessor line. */
2798 if (toktype == st_C_gnumacro)
2800 fvdef = fdefunkey;
2801 return FALSE;
2803 break;
2804 case dsharpseen:
2805 if (toktype == st_C_define)
2807 definedef = ddefineseen;
2809 else
2811 definedef = dignorerest;
2813 return FALSE;
2814 case ddefineseen:
2816 * Make a tag for any macro, unless it is a constant
2817 * and constantypedefs is FALSE.
2819 definedef = dignorerest;
2820 *is_func_or_var = (c == '(');
2821 if (!*is_func_or_var && !constantypedefs)
2822 return FALSE;
2823 else
2824 return TRUE;
2825 case dignorerest:
2826 return FALSE;
2827 default:
2828 error ("internal error: definedef value.", (char *)NULL);
2832 * Now typedefs
2834 switch (typdef)
2836 case tnone:
2837 if (toktype == st_C_typedef)
2839 if (typedefs)
2840 typdef = tkeyseen;
2841 fvextern = FALSE;
2842 fvdef = fvnone;
2843 return FALSE;
2845 break;
2846 case tkeyseen:
2847 switch (toktype)
2849 case st_none:
2850 case st_C_class:
2851 case st_C_struct:
2852 case st_C_enum:
2853 typdef = ttypeseen;
2855 break;
2856 case ttypeseen:
2857 if (structdef == snone && fvdef == fvnone)
2859 fvdef = fvnameseen;
2860 return TRUE;
2862 break;
2863 case tend:
2864 switch (toktype)
2866 case st_C_class:
2867 case st_C_struct:
2868 case st_C_enum:
2869 return FALSE;
2871 return TRUE;
2874 switch (toktype)
2876 case st_C_javastruct:
2877 if (structdef == stagseen)
2878 structdef = scolonseen;
2879 return FALSE;
2880 case st_C_template:
2881 case st_C_class:
2882 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2883 && bracelev == 0
2884 && definedef == dnone && structdef == snone
2885 && typdef == tnone && fvdef == fvnone)
2886 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2887 if (toktype == st_C_template)
2888 break;
2889 /* FALLTHRU */
2890 case st_C_struct:
2891 case st_C_enum:
2892 if (parlev == 0
2893 && fvdef != vignore
2894 && (typdef == tkeyseen
2895 || (typedefs_or_cplusplus && structdef == snone)))
2897 structdef = skeyseen;
2898 structtype = toktype;
2899 structbracelev = bracelev;
2900 if (fvdef == fvnameseen)
2901 fvdef = fvnone;
2903 return FALSE;
2906 if (structdef == skeyseen)
2908 structdef = stagseen;
2909 return TRUE;
2912 if (typdef != tnone)
2913 definedef = dnone;
2915 /* Detect Objective C constructs. */
2916 switch (objdef)
2918 case onone:
2919 switch (toktype)
2921 case st_C_objprot:
2922 objdef = oprotocol;
2923 return FALSE;
2924 case st_C_objimpl:
2925 objdef = oimplementation;
2926 return FALSE;
2928 break;
2929 case oimplementation:
2930 /* Save the class tag for functions or variables defined inside. */
2931 objtag = savenstr (str, len);
2932 objdef = oinbody;
2933 return FALSE;
2934 case oprotocol:
2935 /* Save the class tag for categories. */
2936 objtag = savenstr (str, len);
2937 objdef = otagseen;
2938 *is_func_or_var = TRUE;
2939 return TRUE;
2940 case oparenseen:
2941 objdef = ocatseen;
2942 *is_func_or_var = TRUE;
2943 return TRUE;
2944 case oinbody:
2945 break;
2946 case omethodsign:
2947 if (parlev == 0)
2949 fvdef = fvnone;
2950 objdef = omethodtag;
2951 linebuffer_setlen (&token_name, len);
2952 strncpy (token_name.buffer, str, len);
2953 token_name.buffer[len] = '\0';
2954 return TRUE;
2956 return FALSE;
2957 case omethodcolon:
2958 if (parlev == 0)
2959 objdef = omethodparm;
2960 return FALSE;
2961 case omethodparm:
2962 if (parlev == 0)
2964 fvdef = fvnone;
2965 objdef = omethodtag;
2966 linebuffer_setlen (&token_name, token_name.len + len);
2967 strncat (token_name.buffer, str, len);
2968 return TRUE;
2970 return FALSE;
2971 case oignore:
2972 if (toktype == st_C_objend)
2974 /* Memory leakage here: the string pointed by objtag is
2975 never released, because many tests would be needed to
2976 avoid breaking on incorrect input code. The amount of
2977 memory leaked here is the sum of the lengths of the
2978 class tags.
2979 free (objtag); */
2980 objdef = onone;
2982 return FALSE;
2985 /* A function, variable or enum constant? */
2986 switch (toktype)
2988 case st_C_extern:
2989 fvextern = TRUE;
2990 switch (fvdef)
2992 case finlist:
2993 case flistseen:
2994 case fignore:
2995 case vignore:
2996 break;
2997 default:
2998 fvdef = fvnone;
3000 return FALSE;
3001 case st_C_ignore:
3002 fvextern = FALSE;
3003 fvdef = vignore;
3004 return FALSE;
3005 case st_C_operator:
3006 fvdef = foperator;
3007 *is_func_or_var = TRUE;
3008 return TRUE;
3009 case st_none:
3010 if (constantypedefs
3011 && structdef == snone
3012 && structtype == st_C_enum && bracelev > structbracelev)
3013 return TRUE; /* enum constant */
3014 switch (fvdef)
3016 case fdefunkey:
3017 if (bracelev > 0)
3018 break;
3019 fvdef = fdefunname; /* GNU macro */
3020 *is_func_or_var = TRUE;
3021 return TRUE;
3022 case fvnone:
3023 switch (typdef)
3025 case ttypeseen:
3026 return FALSE;
3027 case tnone:
3028 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3029 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3031 fvdef = vignore;
3032 return FALSE;
3034 break;
3036 /* FALLTHRU */
3037 case fvnameseen:
3038 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3040 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3041 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3042 fvdef = foperator;
3043 *is_func_or_var = TRUE;
3044 return TRUE;
3046 if (bracelev > 0 && !instruct)
3047 break;
3048 fvdef = fvnameseen; /* function or variable */
3049 *is_func_or_var = TRUE;
3050 return TRUE;
3052 break;
3055 return FALSE;
3060 * C_entries often keeps pointers to tokens or lines which are older than
3061 * the line currently read. By keeping two line buffers, and switching
3062 * them at end of line, it is possible to use those pointers.
3064 static struct
3066 long linepos;
3067 linebuffer lb;
3068 } lbs[2];
3070 #define current_lb_is_new (newndx == curndx)
3071 #define switch_line_buffers() (curndx = 1 - curndx)
3073 #define curlb (lbs[curndx].lb)
3074 #define newlb (lbs[newndx].lb)
3075 #define curlinepos (lbs[curndx].linepos)
3076 #define newlinepos (lbs[newndx].linepos)
3078 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3079 #define cplpl (c_ext & C_PLPL)
3080 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3082 #define CNL_SAVE_DEFINEDEF() \
3083 do { \
3084 curlinepos = charno; \
3085 readline (&curlb, inf); \
3086 lp = curlb.buffer; \
3087 quotednl = FALSE; \
3088 newndx = curndx; \
3089 } while (0)
3091 #define CNL() \
3092 do { \
3093 CNL_SAVE_DEFINEDEF(); \
3094 if (savetoken.valid) \
3096 token = savetoken; \
3097 savetoken.valid = FALSE; \
3099 definedef = dnone; \
3100 } while (0)
3103 static void
3104 make_C_tag (isfun)
3105 bool isfun;
3107 /* This function is never called when token.valid is FALSE, but
3108 we must protect against invalid input or internal errors. */
3109 if (token.valid)
3110 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3111 token.offset+token.length+1, token.lineno, token.linepos);
3112 else if (DEBUG)
3113 { /* this branch is optimised away if !DEBUG */
3114 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3115 token_name.len + 17, isfun, token.line,
3116 token.offset+token.length+1, token.lineno, token.linepos);
3117 error ("INVALID TOKEN", NULL);
3120 token.valid = FALSE;
3125 * C_entries ()
3126 * This routine finds functions, variables, typedefs,
3127 * #define's, enum constants and struct/union/enum definitions in
3128 * C syntax and adds them to the list.
3130 static void
3131 C_entries (c_ext, inf)
3132 int c_ext; /* extension of C */
3133 FILE *inf; /* input file */
3135 register char c; /* latest char read; '\0' for end of line */
3136 register char *lp; /* pointer one beyond the character `c' */
3137 int curndx, newndx; /* indices for current and new lb */
3138 register int tokoff; /* offset in line of start of current token */
3139 register int toklen; /* length of current token */
3140 char *qualifier; /* string used to qualify names */
3141 int qlen; /* length of qualifier */
3142 int bracelev; /* current brace level */
3143 int bracketlev; /* current bracket level */
3144 int parlev; /* current parenthesis level */
3145 int attrparlev; /* __attribute__ parenthesis level */
3146 int templatelev; /* current template level */
3147 int typdefbracelev; /* bracelev where a typedef struct body begun */
3148 bool incomm, inquote, inchar, quotednl, midtoken;
3149 bool yacc_rules; /* in the rules part of a yacc file */
3150 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3153 linebuffer_init (&lbs[0].lb);
3154 linebuffer_init (&lbs[1].lb);
3155 if (cstack.size == 0)
3157 cstack.size = (DEBUG) ? 1 : 4;
3158 cstack.nl = 0;
3159 cstack.cname = xnew (cstack.size, char *);
3160 cstack.bracelev = xnew (cstack.size, int);
3163 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3164 curndx = newndx = 0;
3165 lp = curlb.buffer;
3166 *lp = 0;
3168 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3169 structdef = snone; definedef = dnone; objdef = onone;
3170 yacc_rules = FALSE;
3171 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3172 token.valid = savetoken.valid = FALSE;
3173 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3174 if (cjava)
3175 { qualifier = "."; qlen = 1; }
3176 else
3177 { qualifier = "::"; qlen = 2; }
3180 while (!feof (inf))
3182 c = *lp++;
3183 if (c == '\\')
3185 /* If we are at the end of the line, the next character is a
3186 '\0'; do not skip it, because it is what tells us
3187 to read the next line. */
3188 if (*lp == '\0')
3190 quotednl = TRUE;
3191 continue;
3193 lp++;
3194 c = ' ';
3196 else if (incomm)
3198 switch (c)
3200 case '*':
3201 if (*lp == '/')
3203 c = *lp++;
3204 incomm = FALSE;
3206 break;
3207 case '\0':
3208 /* Newlines inside comments do not end macro definitions in
3209 traditional cpp. */
3210 CNL_SAVE_DEFINEDEF ();
3211 break;
3213 continue;
3215 else if (inquote)
3217 switch (c)
3219 case '"':
3220 inquote = FALSE;
3221 break;
3222 case '\0':
3223 /* Newlines inside strings do not end macro definitions
3224 in traditional cpp, even though compilers don't
3225 usually accept them. */
3226 CNL_SAVE_DEFINEDEF ();
3227 break;
3229 continue;
3231 else if (inchar)
3233 switch (c)
3235 case '\0':
3236 /* Hmmm, something went wrong. */
3237 CNL ();
3238 /* FALLTHRU */
3239 case '\'':
3240 inchar = FALSE;
3241 break;
3243 continue;
3245 else if (bracketlev > 0)
3247 switch (c)
3249 case ']':
3250 if (--bracketlev > 0)
3251 continue;
3252 break;
3253 case '\0':
3254 CNL_SAVE_DEFINEDEF ();
3255 break;
3257 continue;
3259 else switch (c)
3261 case '"':
3262 inquote = TRUE;
3263 if (inattribute)
3264 break;
3265 switch (fvdef)
3267 case fdefunkey:
3268 case fstartlist:
3269 case finlist:
3270 case fignore:
3271 case vignore:
3272 break;
3273 default:
3274 fvextern = FALSE;
3275 fvdef = fvnone;
3277 continue;
3278 case '\'':
3279 inchar = TRUE;
3280 if (inattribute)
3281 break;
3282 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3284 fvextern = FALSE;
3285 fvdef = fvnone;
3287 continue;
3288 case '/':
3289 if (*lp == '*')
3291 incomm = TRUE;
3292 lp++;
3293 c = ' ';
3295 else if (/* cplpl && */ *lp == '/')
3297 c = '\0';
3299 break;
3300 case '%':
3301 if ((c_ext & YACC) && *lp == '%')
3303 /* Entering or exiting rules section in yacc file. */
3304 lp++;
3305 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3306 typdef = tnone; structdef = snone;
3307 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3308 bracelev = 0;
3309 yacc_rules = !yacc_rules;
3310 continue;
3312 else
3313 break;
3314 case '#':
3315 if (definedef == dnone)
3317 char *cp;
3318 bool cpptoken = TRUE;
3320 /* Look back on this line. If all blanks, or nonblanks
3321 followed by an end of comment, this is a preprocessor
3322 token. */
3323 for (cp = newlb.buffer; cp < lp-1; cp++)
3324 if (!iswhite (*cp))
3326 if (*cp == '*' && *(cp+1) == '/')
3328 cp++;
3329 cpptoken = TRUE;
3331 else
3332 cpptoken = FALSE;
3334 if (cpptoken)
3335 definedef = dsharpseen;
3336 } /* if (definedef == dnone) */
3337 continue;
3338 case '[':
3339 bracketlev++;
3340 continue;
3341 } /* switch (c) */
3344 /* Consider token only if some involved conditions are satisfied. */
3345 if (typdef != tignore
3346 && definedef != dignorerest
3347 && fvdef != finlist
3348 && templatelev == 0
3349 && (definedef != dnone
3350 || structdef != scolonseen)
3351 && !inattribute)
3353 if (midtoken)
3355 if (endtoken (c))
3357 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3358 /* This handles :: in the middle,
3359 but not at the beginning of an identifier.
3360 Also, space-separated :: is not recognised. */
3362 if (c_ext & C_AUTO) /* automatic detection of C++ */
3363 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3364 lp += 2;
3365 toklen += 2;
3366 c = lp[-1];
3367 goto still_in_token;
3369 else
3371 bool funorvar = FALSE;
3373 if (yacc_rules
3374 || consider_token (newlb.buffer + tokoff, toklen, c,
3375 &c_ext, bracelev, parlev,
3376 &funorvar))
3378 if (fvdef == foperator)
3380 char *oldlp = lp;
3381 lp = skip_spaces (lp-1);
3382 if (*lp != '\0')
3383 lp += 1;
3384 while (*lp != '\0'
3385 && !iswhite (*lp) && *lp != '(')
3386 lp += 1;
3387 c = *lp++;
3388 toklen += lp - oldlp;
3390 token.named = FALSE;
3391 if (!plainc
3392 && nestlev > 0 && definedef == dnone)
3393 /* in struct body */
3395 write_classname (&token_name, qualifier);
3396 linebuffer_setlen (&token_name,
3397 token_name.len+qlen+toklen);
3398 strcat (token_name.buffer, qualifier);
3399 strncat (token_name.buffer,
3400 newlb.buffer + tokoff, toklen);
3401 token.named = TRUE;
3403 else if (objdef == ocatseen)
3404 /* Objective C category */
3406 int len = strlen (objtag) + 2 + toklen;
3407 linebuffer_setlen (&token_name, len);
3408 strcpy (token_name.buffer, objtag);
3409 strcat (token_name.buffer, "(");
3410 strncat (token_name.buffer,
3411 newlb.buffer + tokoff, toklen);
3412 strcat (token_name.buffer, ")");
3413 token.named = TRUE;
3415 else if (objdef == omethodtag
3416 || objdef == omethodparm)
3417 /* Objective C method */
3419 token.named = TRUE;
3421 else if (fvdef == fdefunname)
3422 /* GNU DEFUN and similar macros */
3424 bool defun = (newlb.buffer[tokoff] == 'F');
3425 int off = tokoff;
3426 int len = toklen;
3428 /* Rewrite the tag so that emacs lisp DEFUNs
3429 can be found by their elisp name */
3430 if (defun)
3432 off += 1;
3433 len -= 1;
3435 linebuffer_setlen (&token_name, len);
3436 strncpy (token_name.buffer,
3437 newlb.buffer + off, len);
3438 token_name.buffer[len] = '\0';
3439 if (defun)
3440 while (--len >= 0)
3441 if (token_name.buffer[len] == '_')
3442 token_name.buffer[len] = '-';
3443 token.named = defun;
3445 else
3447 linebuffer_setlen (&token_name, toklen);
3448 strncpy (token_name.buffer,
3449 newlb.buffer + tokoff, toklen);
3450 token_name.buffer[toklen] = '\0';
3451 /* Name macros and members. */
3452 token.named = (structdef == stagseen
3453 || typdef == ttypeseen
3454 || typdef == tend
3455 || (funorvar
3456 && definedef == dignorerest)
3457 || (funorvar
3458 && definedef == dnone
3459 && structdef == snone
3460 && bracelev > 0));
3462 token.lineno = lineno;
3463 token.offset = tokoff;
3464 token.length = toklen;
3465 token.line = newlb.buffer;
3466 token.linepos = newlinepos;
3467 token.valid = TRUE;
3469 if (definedef == dnone
3470 && (fvdef == fvnameseen
3471 || fvdef == foperator
3472 || structdef == stagseen
3473 || typdef == tend
3474 || typdef == ttypeseen
3475 || objdef != onone))
3477 if (current_lb_is_new)
3478 switch_line_buffers ();
3480 else if (definedef != dnone
3481 || fvdef == fdefunname
3482 || instruct)
3483 make_C_tag (funorvar);
3485 else /* not yacc and consider_token failed */
3487 if (inattribute && fvdef == fignore)
3489 /* We have just met __attribute__ after a
3490 function parameter list: do not tag the
3491 function again. */
3492 fvdef = fvnone;
3495 midtoken = FALSE;
3497 } /* if (endtoken (c)) */
3498 else if (intoken (c))
3499 still_in_token:
3501 toklen++;
3502 continue;
3504 } /* if (midtoken) */
3505 else if (begtoken (c))
3507 switch (definedef)
3509 case dnone:
3510 switch (fvdef)
3512 case fstartlist:
3513 /* This prevents tagging fb in
3514 void (__attribute__((noreturn)) *fb) (void);
3515 Fixing this is not easy and not very important. */
3516 fvdef = finlist;
3517 continue;
3518 case flistseen:
3519 if (plainc || declarations)
3521 make_C_tag (TRUE); /* a function */
3522 fvdef = fignore;
3524 break;
3526 if (structdef == stagseen && !cjava)
3528 popclass_above (bracelev);
3529 structdef = snone;
3531 break;
3532 case dsharpseen:
3533 savetoken = token;
3534 break;
3536 if (!yacc_rules || lp == newlb.buffer + 1)
3538 tokoff = lp - 1 - newlb.buffer;
3539 toklen = 1;
3540 midtoken = TRUE;
3542 continue;
3543 } /* if (begtoken) */
3544 } /* if must look at token */
3547 /* Detect end of line, colon, comma, semicolon and various braces
3548 after having handled a token.*/
3549 switch (c)
3551 case ':':
3552 if (inattribute)
3553 break;
3554 if (yacc_rules && token.offset == 0 && token.valid)
3556 make_C_tag (FALSE); /* a yacc function */
3557 break;
3559 if (definedef != dnone)
3560 break;
3561 switch (objdef)
3563 case otagseen:
3564 objdef = oignore;
3565 make_C_tag (TRUE); /* an Objective C class */
3566 break;
3567 case omethodtag:
3568 case omethodparm:
3569 objdef = omethodcolon;
3570 linebuffer_setlen (&token_name, token_name.len + 1);
3571 strcat (token_name.buffer, ":");
3572 break;
3574 if (structdef == stagseen)
3576 structdef = scolonseen;
3577 break;
3579 /* Should be useless, but may be work as a safety net. */
3580 if (cplpl && fvdef == flistseen)
3582 make_C_tag (TRUE); /* a function */
3583 fvdef = fignore;
3584 break;
3586 break;
3587 case ';':
3588 if (definedef != dnone || inattribute)
3589 break;
3590 switch (typdef)
3592 case tend:
3593 case ttypeseen:
3594 make_C_tag (FALSE); /* a typedef */
3595 typdef = tnone;
3596 fvdef = fvnone;
3597 break;
3598 case tnone:
3599 case tinbody:
3600 case tignore:
3601 switch (fvdef)
3603 case fignore:
3604 if (typdef == tignore || cplpl)
3605 fvdef = fvnone;
3606 break;
3607 case fvnameseen:
3608 if ((globals && bracelev == 0 && (!fvextern || declarations))
3609 || (members && instruct))
3610 make_C_tag (FALSE); /* a variable */
3611 fvextern = FALSE;
3612 fvdef = fvnone;
3613 token.valid = FALSE;
3614 break;
3615 case flistseen:
3616 if ((declarations
3617 && (cplpl || !instruct)
3618 && (typdef == tnone || (typdef != tignore && instruct)))
3619 || (members
3620 && plainc && instruct))
3621 make_C_tag (TRUE); /* a function */
3622 /* FALLTHRU */
3623 default:
3624 fvextern = FALSE;
3625 fvdef = fvnone;
3626 if (declarations
3627 && cplpl && structdef == stagseen)
3628 make_C_tag (FALSE); /* forward declaration */
3629 else
3630 token.valid = FALSE;
3631 } /* switch (fvdef) */
3632 /* FALLTHRU */
3633 default:
3634 if (!instruct)
3635 typdef = tnone;
3637 if (structdef == stagseen)
3638 structdef = snone;
3639 break;
3640 case ',':
3641 if (definedef != dnone || inattribute)
3642 break;
3643 switch (objdef)
3645 case omethodtag:
3646 case omethodparm:
3647 make_C_tag (TRUE); /* an Objective C method */
3648 objdef = oinbody;
3649 break;
3651 switch (fvdef)
3653 case fdefunkey:
3654 case foperator:
3655 case fstartlist:
3656 case finlist:
3657 case fignore:
3658 case vignore:
3659 break;
3660 case fdefunname:
3661 fvdef = fignore;
3662 break;
3663 case fvnameseen:
3664 if (parlev == 0
3665 && ((globals
3666 && bracelev == 0
3667 && templatelev == 0
3668 && (!fvextern || declarations))
3669 || (members && instruct)))
3670 make_C_tag (FALSE); /* a variable */
3671 break;
3672 case flistseen:
3673 if ((declarations && typdef == tnone && !instruct)
3674 || (members && typdef != tignore && instruct))
3676 make_C_tag (TRUE); /* a function */
3677 fvdef = fvnameseen;
3679 else if (!declarations)
3680 fvdef = fvnone;
3681 token.valid = FALSE;
3682 break;
3683 default:
3684 fvdef = fvnone;
3686 if (structdef == stagseen)
3687 structdef = snone;
3688 break;
3689 case ']':
3690 if (definedef != dnone || inattribute)
3691 break;
3692 if (structdef == stagseen)
3693 structdef = snone;
3694 switch (typdef)
3696 case ttypeseen:
3697 case tend:
3698 typdef = tignore;
3699 make_C_tag (FALSE); /* a typedef */
3700 break;
3701 case tnone:
3702 case tinbody:
3703 switch (fvdef)
3705 case foperator:
3706 case finlist:
3707 case fignore:
3708 case vignore:
3709 break;
3710 case fvnameseen:
3711 if ((members && bracelev == 1)
3712 || (globals && bracelev == 0
3713 && (!fvextern || declarations)))
3714 make_C_tag (FALSE); /* a variable */
3715 /* FALLTHRU */
3716 default:
3717 fvdef = fvnone;
3719 break;
3721 break;
3722 case '(':
3723 if (inattribute)
3725 attrparlev++;
3726 break;
3728 if (definedef != dnone)
3729 break;
3730 if (objdef == otagseen && parlev == 0)
3731 objdef = oparenseen;
3732 switch (fvdef)
3734 case fvnameseen:
3735 if (typdef == ttypeseen
3736 && *lp != '*'
3737 && !instruct)
3739 /* This handles constructs like:
3740 typedef void OperatorFun (int fun); */
3741 make_C_tag (FALSE);
3742 typdef = tignore;
3743 fvdef = fignore;
3744 break;
3746 /* FALLTHRU */
3747 case foperator:
3748 fvdef = fstartlist;
3749 break;
3750 case flistseen:
3751 fvdef = finlist;
3752 break;
3754 parlev++;
3755 break;
3756 case ')':
3757 if (inattribute)
3759 if (--attrparlev == 0)
3760 inattribute = FALSE;
3761 break;
3763 if (definedef != dnone)
3764 break;
3765 if (objdef == ocatseen && parlev == 1)
3767 make_C_tag (TRUE); /* an Objective C category */
3768 objdef = oignore;
3770 if (--parlev == 0)
3772 switch (fvdef)
3774 case fstartlist:
3775 case finlist:
3776 fvdef = flistseen;
3777 break;
3779 if (!instruct
3780 && (typdef == tend
3781 || typdef == ttypeseen))
3783 typdef = tignore;
3784 make_C_tag (FALSE); /* a typedef */
3787 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3788 parlev = 0;
3789 break;
3790 case '{':
3791 if (definedef != dnone)
3792 break;
3793 if (typdef == ttypeseen)
3795 /* Whenever typdef is set to tinbody (currently only
3796 here), typdefbracelev should be set to bracelev. */
3797 typdef = tinbody;
3798 typdefbracelev = bracelev;
3800 switch (fvdef)
3802 case flistseen:
3803 make_C_tag (TRUE); /* a function */
3804 /* FALLTHRU */
3805 case fignore:
3806 fvdef = fvnone;
3807 break;
3808 case fvnone:
3809 switch (objdef)
3811 case otagseen:
3812 make_C_tag (TRUE); /* an Objective C class */
3813 objdef = oignore;
3814 break;
3815 case omethodtag:
3816 case omethodparm:
3817 make_C_tag (TRUE); /* an Objective C method */
3818 objdef = oinbody;
3819 break;
3820 default:
3821 /* Neutralize `extern "C" {' grot. */
3822 if (bracelev == 0 && structdef == snone && nestlev == 0
3823 && typdef == tnone)
3824 bracelev = -1;
3826 break;
3828 switch (structdef)
3830 case skeyseen: /* unnamed struct */
3831 pushclass_above (bracelev, NULL, 0);
3832 structdef = snone;
3833 break;
3834 case stagseen: /* named struct or enum */
3835 case scolonseen: /* a class */
3836 pushclass_above (bracelev,token.line+token.offset, token.length);
3837 structdef = snone;
3838 make_C_tag (FALSE); /* a struct or enum */
3839 break;
3841 bracelev += 1;
3842 break;
3843 case '*':
3844 if (definedef != dnone)
3845 break;
3846 if (fvdef == fstartlist)
3848 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3849 token.valid = FALSE;
3851 break;
3852 case '}':
3853 if (definedef != dnone)
3854 break;
3855 bracelev -= 1;
3856 if (!ignoreindent && lp == newlb.buffer + 1)
3858 if (bracelev != 0)
3859 token.valid = FALSE; /* unexpected value, token unreliable */
3860 bracelev = 0; /* reset brace level if first column */
3861 parlev = 0; /* also reset paren level, just in case... */
3863 else if (bracelev < 0)
3865 token.valid = FALSE; /* something gone amiss, token unreliable */
3866 bracelev = 0;
3868 if (bracelev == 0 && fvdef == vignore)
3869 fvdef = fvnone; /* end of function */
3870 popclass_above (bracelev);
3871 structdef = snone;
3872 /* Only if typdef == tinbody is typdefbracelev significant. */
3873 if (typdef == tinbody && bracelev <= typdefbracelev)
3875 assert (bracelev == typdefbracelev);
3876 typdef = tend;
3878 break;
3879 case '=':
3880 if (definedef != dnone)
3881 break;
3882 switch (fvdef)
3884 case foperator:
3885 case finlist:
3886 case fignore:
3887 case vignore:
3888 break;
3889 case fvnameseen:
3890 if ((members && bracelev == 1)
3891 || (globals && bracelev == 0 && (!fvextern || declarations)))
3892 make_C_tag (FALSE); /* a variable */
3893 /* FALLTHRU */
3894 default:
3895 fvdef = vignore;
3897 break;
3898 case '<':
3899 if (cplpl
3900 && (structdef == stagseen || fvdef == fvnameseen))
3902 templatelev++;
3903 break;
3905 goto resetfvdef;
3906 case '>':
3907 if (templatelev > 0)
3909 templatelev--;
3910 break;
3912 goto resetfvdef;
3913 case '+':
3914 case '-':
3915 if (objdef == oinbody && bracelev == 0)
3917 objdef = omethodsign;
3918 break;
3920 /* FALLTHRU */
3921 resetfvdef:
3922 case '#': case '~': case '&': case '%': case '/':
3923 case '|': case '^': case '!': case '.': case '?':
3924 if (definedef != dnone)
3925 break;
3926 /* These surely cannot follow a function tag in C. */
3927 switch (fvdef)
3929 case foperator:
3930 case finlist:
3931 case fignore:
3932 case vignore:
3933 break;
3934 default:
3935 fvdef = fvnone;
3937 break;
3938 case '\0':
3939 if (objdef == otagseen)
3941 make_C_tag (TRUE); /* an Objective C class */
3942 objdef = oignore;
3944 /* If a macro spans multiple lines don't reset its state. */
3945 if (quotednl)
3946 CNL_SAVE_DEFINEDEF ();
3947 else
3948 CNL ();
3949 break;
3950 } /* switch (c) */
3952 } /* while not eof */
3954 free (lbs[0].lb.buffer);
3955 free (lbs[1].lb.buffer);
3959 * Process either a C++ file or a C file depending on the setting
3960 * of a global flag.
3962 static void
3963 default_C_entries (inf)
3964 FILE *inf;
3966 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3969 /* Always do plain C. */
3970 static void
3971 plain_C_entries (inf)
3972 FILE *inf;
3974 C_entries (0, inf);
3977 /* Always do C++. */
3978 static void
3979 Cplusplus_entries (inf)
3980 FILE *inf;
3982 C_entries (C_PLPL, inf);
3985 /* Always do Java. */
3986 static void
3987 Cjava_entries (inf)
3988 FILE *inf;
3990 C_entries (C_JAVA, inf);
3993 /* Always do C*. */
3994 static void
3995 Cstar_entries (inf)
3996 FILE *inf;
3998 C_entries (C_STAR, inf);
4001 /* Always do Yacc. */
4002 static void
4003 Yacc_entries (inf)
4004 FILE *inf;
4006 C_entries (YACC, inf);
4010 /* Useful macros. */
4011 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4012 for (; /* loop initialization */ \
4013 !feof (file_pointer) /* loop test */ \
4014 && /* instructions at start of loop */ \
4015 (readline (&line_buffer, file_pointer), \
4016 char_pointer = line_buffer.buffer, \
4017 TRUE); \
4020 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4021 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4022 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4023 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4024 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4026 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4027 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4028 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4029 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4030 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4033 * Read a file, but do no processing. This is used to do regexp
4034 * matching on files that have no language defined.
4036 static void
4037 just_read_file (inf)
4038 FILE *inf;
4040 register char *dummy;
4042 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4043 continue;
4047 /* Fortran parsing */
4049 static void F_takeprec __P((void));
4050 static void F_getit __P((FILE *));
4052 static void
4053 F_takeprec ()
4055 dbp = skip_spaces (dbp);
4056 if (*dbp != '*')
4057 return;
4058 dbp++;
4059 dbp = skip_spaces (dbp);
4060 if (strneq (dbp, "(*)", 3))
4062 dbp += 3;
4063 return;
4065 if (!ISDIGIT (*dbp))
4067 --dbp; /* force failure */
4068 return;
4071 dbp++;
4072 while (ISDIGIT (*dbp));
4075 static void
4076 F_getit (inf)
4077 FILE *inf;
4079 register char *cp;
4081 dbp = skip_spaces (dbp);
4082 if (*dbp == '\0')
4084 readline (&lb, inf);
4085 dbp = lb.buffer;
4086 if (dbp[5] != '&')
4087 return;
4088 dbp += 6;
4089 dbp = skip_spaces (dbp);
4091 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4092 return;
4093 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4094 continue;
4095 make_tag (dbp, cp-dbp, TRUE,
4096 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4100 static void
4101 Fortran_functions (inf)
4102 FILE *inf;
4104 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4106 if (*dbp == '%')
4107 dbp++; /* Ratfor escape to fortran */
4108 dbp = skip_spaces (dbp);
4109 if (*dbp == '\0')
4110 continue;
4112 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4113 dbp = skip_spaces (dbp);
4115 switch (lowcase (*dbp))
4117 case 'i':
4118 if (nocase_tail ("integer"))
4119 F_takeprec ();
4120 break;
4121 case 'r':
4122 if (nocase_tail ("real"))
4123 F_takeprec ();
4124 break;
4125 case 'l':
4126 if (nocase_tail ("logical"))
4127 F_takeprec ();
4128 break;
4129 case 'c':
4130 if (nocase_tail ("complex") || nocase_tail ("character"))
4131 F_takeprec ();
4132 break;
4133 case 'd':
4134 if (nocase_tail ("double"))
4136 dbp = skip_spaces (dbp);
4137 if (*dbp == '\0')
4138 continue;
4139 if (nocase_tail ("precision"))
4140 break;
4141 continue;
4143 break;
4145 dbp = skip_spaces (dbp);
4146 if (*dbp == '\0')
4147 continue;
4148 switch (lowcase (*dbp))
4150 case 'f':
4151 if (nocase_tail ("function"))
4152 F_getit (inf);
4153 continue;
4154 case 's':
4155 if (nocase_tail ("subroutine"))
4156 F_getit (inf);
4157 continue;
4158 case 'e':
4159 if (nocase_tail ("entry"))
4160 F_getit (inf);
4161 continue;
4162 case 'b':
4163 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4165 dbp = skip_spaces (dbp);
4166 if (*dbp == '\0') /* assume un-named */
4167 make_tag ("blockdata", 9, TRUE,
4168 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4169 else
4170 F_getit (inf); /* look for name */
4172 continue;
4179 * Ada parsing
4180 * Original code by
4181 * Philippe Waroquiers (1998)
4184 static void Ada_getit __P((FILE *, char *));
4186 /* Once we are positioned after an "interesting" keyword, let's get
4187 the real tag value necessary. */
4188 static void
4189 Ada_getit (inf, name_qualifier)
4190 FILE *inf;
4191 char *name_qualifier;
4193 register char *cp;
4194 char *name;
4195 char c;
4197 while (!feof (inf))
4199 dbp = skip_spaces (dbp);
4200 if (*dbp == '\0'
4201 || (dbp[0] == '-' && dbp[1] == '-'))
4203 readline (&lb, inf);
4204 dbp = lb.buffer;
4206 switch (lowcase(*dbp))
4208 case 'b':
4209 if (nocase_tail ("body"))
4211 /* Skipping body of procedure body or package body or ....
4212 resetting qualifier to body instead of spec. */
4213 name_qualifier = "/b";
4214 continue;
4216 break;
4217 case 't':
4218 /* Skipping type of task type or protected type ... */
4219 if (nocase_tail ("type"))
4220 continue;
4221 break;
4223 if (*dbp == '"')
4225 dbp += 1;
4226 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4227 continue;
4229 else
4231 dbp = skip_spaces (dbp);
4232 for (cp = dbp;
4233 (*cp != '\0'
4234 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4235 cp++)
4236 continue;
4237 if (cp == dbp)
4238 return;
4240 c = *cp;
4241 *cp = '\0';
4242 name = concat (dbp, name_qualifier, "");
4243 *cp = c;
4244 make_tag (name, strlen (name), TRUE,
4245 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4246 free (name);
4247 if (c == '"')
4248 dbp = cp + 1;
4249 return;
4253 static void
4254 Ada_funcs (inf)
4255 FILE *inf;
4257 bool inquote = FALSE;
4258 bool skip_till_semicolumn = FALSE;
4260 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4262 while (*dbp != '\0')
4264 /* Skip a string i.e. "abcd". */
4265 if (inquote || (*dbp == '"'))
4267 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4268 if (dbp != NULL)
4270 inquote = FALSE;
4271 dbp += 1;
4272 continue; /* advance char */
4274 else
4276 inquote = TRUE;
4277 break; /* advance line */
4281 /* Skip comments. */
4282 if (dbp[0] == '-' && dbp[1] == '-')
4283 break; /* advance line */
4285 /* Skip character enclosed in single quote i.e. 'a'
4286 and skip single quote starting an attribute i.e. 'Image. */
4287 if (*dbp == '\'')
4289 dbp++ ;
4290 if (*dbp != '\0')
4291 dbp++;
4292 continue;
4295 if (skip_till_semicolumn)
4297 if (*dbp == ';')
4298 skip_till_semicolumn = FALSE;
4299 dbp++;
4300 continue; /* advance char */
4303 /* Search for beginning of a token. */
4304 if (!begtoken (*dbp))
4306 dbp++;
4307 continue; /* advance char */
4310 /* We are at the beginning of a token. */
4311 switch (lowcase(*dbp))
4313 case 'f':
4314 if (!packages_only && nocase_tail ("function"))
4315 Ada_getit (inf, "/f");
4316 else
4317 break; /* from switch */
4318 continue; /* advance char */
4319 case 'p':
4320 if (!packages_only && nocase_tail ("procedure"))
4321 Ada_getit (inf, "/p");
4322 else if (nocase_tail ("package"))
4323 Ada_getit (inf, "/s");
4324 else if (nocase_tail ("protected")) /* protected type */
4325 Ada_getit (inf, "/t");
4326 else
4327 break; /* from switch */
4328 continue; /* advance char */
4330 case 'u':
4331 if (typedefs && !packages_only && nocase_tail ("use"))
4333 /* when tagging types, avoid tagging use type Pack.Typename;
4334 for this, we will skip everything till a ; */
4335 skip_till_semicolumn = TRUE;
4336 continue; /* advance char */
4339 case 't':
4340 if (!packages_only && nocase_tail ("task"))
4341 Ada_getit (inf, "/k");
4342 else if (typedefs && !packages_only && nocase_tail ("type"))
4344 Ada_getit (inf, "/t");
4345 while (*dbp != '\0')
4346 dbp += 1;
4348 else
4349 break; /* from switch */
4350 continue; /* advance char */
4353 /* Look for the end of the token. */
4354 while (!endtoken (*dbp))
4355 dbp++;
4357 } /* advance char */
4358 } /* advance line */
4363 * Unix and microcontroller assembly tag handling
4364 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4365 * Idea by Bob Weiner, Motorola Inc. (1994)
4367 static void
4368 Asm_labels (inf)
4369 FILE *inf;
4371 register char *cp;
4373 LOOP_ON_INPUT_LINES (inf, lb, cp)
4375 /* If first char is alphabetic or one of [_.$], test for colon
4376 following identifier. */
4377 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4379 /* Read past label. */
4380 cp++;
4381 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4382 cp++;
4383 if (*cp == ':' || iswhite (*cp))
4384 /* Found end of label, so copy it and add it to the table. */
4385 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4386 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4393 * Perl support
4394 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4395 * Perl variable names: /^(my|local).../
4396 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4397 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4398 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4400 static void
4401 Perl_functions (inf)
4402 FILE *inf;
4404 char *package = savestr ("main"); /* current package name */
4405 register char *cp;
4407 LOOP_ON_INPUT_LINES (inf, lb, cp)
4409 cp = skip_spaces (cp);
4411 if (LOOKING_AT (cp, "package"))
4413 free (package);
4414 get_tag (cp, &package);
4416 else if (LOOKING_AT (cp, "sub"))
4418 char *pos;
4419 char *sp = cp;
4421 while (!notinname (*cp))
4422 cp++;
4423 if (cp == sp)
4424 continue; /* nothing found */
4425 if ((pos = etags_strchr (sp, ':')) != NULL
4426 && pos < cp && pos[1] == ':')
4427 /* The name is already qualified. */
4428 make_tag (sp, cp - sp, TRUE,
4429 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4430 else
4431 /* Qualify it. */
4433 char savechar, *name;
4435 savechar = *cp;
4436 *cp = '\0';
4437 name = concat (package, "::", sp);
4438 *cp = savechar;
4439 make_tag (name, strlen(name), TRUE,
4440 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4441 free (name);
4444 else if (globals) /* only if we are tagging global vars */
4446 /* Skip a qualifier, if any. */
4447 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4448 /* After "my" or "local", but before any following paren or space. */
4449 char *varstart = cp;
4451 if (qual /* should this be removed? If yes, how? */
4452 && (*cp == '$' || *cp == '@' || *cp == '%'))
4454 varstart += 1;
4456 cp++;
4457 while (ISALNUM (*cp) || *cp == '_');
4459 else if (qual)
4461 /* Should be examining a variable list at this point;
4462 could insist on seeing an open parenthesis. */
4463 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4464 cp++;
4466 else
4467 continue;
4469 make_tag (varstart, cp - varstart, FALSE,
4470 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4473 free (package);
4478 * Python support
4479 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4480 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4481 * More ideas by seb bacon <seb@jamkit.com> (2002)
4483 static void
4484 Python_functions (inf)
4485 FILE *inf;
4487 register char *cp;
4489 LOOP_ON_INPUT_LINES (inf, lb, cp)
4491 cp = skip_spaces (cp);
4492 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4494 char *name = cp;
4495 while (!notinname (*cp) && *cp != ':')
4496 cp++;
4497 make_tag (name, cp - name, TRUE,
4498 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4505 * PHP support
4506 * Look for:
4507 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4508 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4509 * - /^[ \t]*define\(\"[^\"]+/
4510 * Only with --members:
4511 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4512 * Idea by Diez B. Roggisch (2001)
4514 static void
4515 PHP_functions (inf)
4516 FILE *inf;
4518 register char *cp, *name;
4519 bool search_identifier = FALSE;
4521 LOOP_ON_INPUT_LINES (inf, lb, cp)
4523 cp = skip_spaces (cp);
4524 name = cp;
4525 if (search_identifier
4526 && *cp != '\0')
4528 while (!notinname (*cp))
4529 cp++;
4530 make_tag (name, cp - name, TRUE,
4531 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4532 search_identifier = FALSE;
4534 else if (LOOKING_AT (cp, "function"))
4536 if(*cp == '&')
4537 cp = skip_spaces (cp+1);
4538 if(*cp != '\0')
4540 name = cp;
4541 while (!notinname (*cp))
4542 cp++;
4543 make_tag (name, cp - name, TRUE,
4544 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4546 else
4547 search_identifier = TRUE;
4549 else if (LOOKING_AT (cp, "class"))
4551 if (*cp != '\0')
4553 name = cp;
4554 while (*cp != '\0' && !iswhite (*cp))
4555 cp++;
4556 make_tag (name, cp - name, FALSE,
4557 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4559 else
4560 search_identifier = TRUE;
4562 else if (strneq (cp, "define", 6)
4563 && (cp = skip_spaces (cp+6))
4564 && *cp++ == '('
4565 && (*cp == '"' || *cp == '\''))
4567 char quote = *cp++;
4568 name = cp;
4569 while (*cp != quote && *cp != '\0')
4570 cp++;
4571 make_tag (name, cp - name, FALSE,
4572 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4574 else if (members
4575 && LOOKING_AT (cp, "var")
4576 && *cp == '$')
4578 name = cp;
4579 while (!notinname(*cp))
4580 cp++;
4581 make_tag (name, cp - name, FALSE,
4582 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4589 * Cobol tag functions
4590 * We could look for anything that could be a paragraph name.
4591 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4592 * Idea by Corny de Souza (1993)
4594 static void
4595 Cobol_paragraphs (inf)
4596 FILE *inf;
4598 register char *bp, *ep;
4600 LOOP_ON_INPUT_LINES (inf, lb, bp)
4602 if (lb.len < 9)
4603 continue;
4604 bp += 8;
4606 /* If eoln, compiler option or comment ignore whole line. */
4607 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4608 continue;
4610 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4611 continue;
4612 if (*ep++ == '.')
4613 make_tag (bp, ep - bp, TRUE,
4614 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4620 * Makefile support
4621 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4623 static void
4624 Makefile_targets (inf)
4625 FILE *inf;
4627 register char *bp;
4629 LOOP_ON_INPUT_LINES (inf, lb, bp)
4631 if (*bp == '\t' || *bp == '#')
4632 continue;
4633 while (*bp != '\0' && *bp != '=' && *bp != ':')
4634 bp++;
4635 if (*bp == ':' || (globals && *bp == '='))
4637 /* We should detect if there is more than one tag, but we do not.
4638 We just skip initial and final spaces. */
4639 char * namestart = skip_spaces (lb.buffer);
4640 while (--bp > namestart)
4641 if (!notinname (*bp))
4642 break;
4643 make_tag (namestart, bp - namestart + 1, TRUE,
4644 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4651 * Pascal parsing
4652 * Original code by Mosur K. Mohan (1989)
4654 * Locates tags for procedures & functions. Doesn't do any type- or
4655 * var-definitions. It does look for the keyword "extern" or
4656 * "forward" immediately following the procedure statement; if found,
4657 * the tag is skipped.
4659 static void
4660 Pascal_functions (inf)
4661 FILE *inf;
4663 linebuffer tline; /* mostly copied from C_entries */
4664 long save_lcno;
4665 int save_lineno, namelen, taglen;
4666 char c, *name;
4668 bool /* each of these flags is TRUE if: */
4669 incomment, /* point is inside a comment */
4670 inquote, /* point is inside '..' string */
4671 get_tagname, /* point is after PROCEDURE/FUNCTION
4672 keyword, so next item = potential tag */
4673 found_tag, /* point is after a potential tag */
4674 inparms, /* point is within parameter-list */
4675 verify_tag; /* point has passed the parm-list, so the
4676 next token will determine whether this
4677 is a FORWARD/EXTERN to be ignored, or
4678 whether it is a real tag */
4680 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4681 name = NULL; /* keep compiler quiet */
4682 dbp = lb.buffer;
4683 *dbp = '\0';
4684 linebuffer_init (&tline);
4686 incomment = inquote = FALSE;
4687 found_tag = FALSE; /* have a proc name; check if extern */
4688 get_tagname = FALSE; /* found "procedure" keyword */
4689 inparms = FALSE; /* found '(' after "proc" */
4690 verify_tag = FALSE; /* check if "extern" is ahead */
4693 while (!feof (inf)) /* long main loop to get next char */
4695 c = *dbp++;
4696 if (c == '\0') /* if end of line */
4698 readline (&lb, inf);
4699 dbp = lb.buffer;
4700 if (*dbp == '\0')
4701 continue;
4702 if (!((found_tag && verify_tag)
4703 || get_tagname))
4704 c = *dbp++; /* only if don't need *dbp pointing
4705 to the beginning of the name of
4706 the procedure or function */
4708 if (incomment)
4710 if (c == '}') /* within { } comments */
4711 incomment = FALSE;
4712 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4714 dbp++;
4715 incomment = FALSE;
4717 continue;
4719 else if (inquote)
4721 if (c == '\'')
4722 inquote = FALSE;
4723 continue;
4725 else
4726 switch (c)
4728 case '\'':
4729 inquote = TRUE; /* found first quote */
4730 continue;
4731 case '{': /* found open { comment */
4732 incomment = TRUE;
4733 continue;
4734 case '(':
4735 if (*dbp == '*') /* found open (* comment */
4737 incomment = TRUE;
4738 dbp++;
4740 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4741 inparms = TRUE;
4742 continue;
4743 case ')': /* end of parms list */
4744 if (inparms)
4745 inparms = FALSE;
4746 continue;
4747 case ';':
4748 if (found_tag && !inparms) /* end of proc or fn stmt */
4750 verify_tag = TRUE;
4751 break;
4753 continue;
4755 if (found_tag && verify_tag && (*dbp != ' '))
4757 /* Check if this is an "extern" declaration. */
4758 if (*dbp == '\0')
4759 continue;
4760 if (lowcase (*dbp == 'e'))
4762 if (nocase_tail ("extern")) /* superfluous, really! */
4764 found_tag = FALSE;
4765 verify_tag = FALSE;
4768 else if (lowcase (*dbp) == 'f')
4770 if (nocase_tail ("forward")) /* check for forward reference */
4772 found_tag = FALSE;
4773 verify_tag = FALSE;
4776 if (found_tag && verify_tag) /* not external proc, so make tag */
4778 found_tag = FALSE;
4779 verify_tag = FALSE;
4780 make_tag (name, namelen, TRUE,
4781 tline.buffer, taglen, save_lineno, save_lcno);
4782 continue;
4785 if (get_tagname) /* grab name of proc or fn */
4787 char *cp;
4789 if (*dbp == '\0')
4790 continue;
4792 /* Find block name. */
4793 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4794 continue;
4796 /* Save all values for later tagging. */
4797 linebuffer_setlen (&tline, lb.len);
4798 strcpy (tline.buffer, lb.buffer);
4799 save_lineno = lineno;
4800 save_lcno = linecharno;
4801 name = tline.buffer + (dbp - lb.buffer);
4802 namelen = cp - dbp;
4803 taglen = cp - lb.buffer + 1;
4805 dbp = cp; /* set dbp to e-o-token */
4806 get_tagname = FALSE;
4807 found_tag = TRUE;
4808 continue;
4810 /* And proceed to check for "extern". */
4812 else if (!incomment && !inquote && !found_tag)
4814 /* Check for proc/fn keywords. */
4815 switch (lowcase (c))
4817 case 'p':
4818 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4819 get_tagname = TRUE;
4820 continue;
4821 case 'f':
4822 if (nocase_tail ("unction"))
4823 get_tagname = TRUE;
4824 continue;
4827 } /* while not eof */
4829 free (tline.buffer);
4834 * Lisp tag functions
4835 * look for (def or (DEF, quote or QUOTE
4838 static void L_getit __P((void));
4840 static void
4841 L_getit ()
4843 if (*dbp == '\'') /* Skip prefix quote */
4844 dbp++;
4845 else if (*dbp == '(')
4847 dbp++;
4848 /* Try to skip "(quote " */
4849 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4850 /* Ok, then skip "(" before name in (defstruct (foo)) */
4851 dbp = skip_spaces (dbp);
4853 get_tag (dbp, NULL);
4856 static void
4857 Lisp_functions (inf)
4858 FILE *inf;
4860 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4862 if (dbp[0] != '(')
4863 continue;
4865 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4867 dbp = skip_non_spaces (dbp);
4868 dbp = skip_spaces (dbp);
4869 L_getit ();
4871 else
4873 /* Check for (foo::defmumble name-defined ... */
4875 dbp++;
4876 while (!notinname (*dbp) && *dbp != ':');
4877 if (*dbp == ':')
4880 dbp++;
4881 while (*dbp == ':');
4883 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4885 dbp = skip_non_spaces (dbp);
4886 dbp = skip_spaces (dbp);
4887 L_getit ();
4896 * Lua script language parsing
4897 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4899 * "function" and "local function" are tags if they start at column 1.
4901 static void
4902 Lua_functions (inf)
4903 FILE *inf;
4905 register char *bp;
4907 LOOP_ON_INPUT_LINES (inf, lb, bp)
4909 if (bp[0] != 'f' && bp[0] != 'l')
4910 continue;
4912 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4914 if (LOOKING_AT (bp, "function"))
4915 get_tag (bp, NULL);
4921 * Postscript tags
4922 * Just look for lines where the first character is '/'
4923 * Also look at "defineps" for PSWrap
4924 * Ideas by:
4925 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4926 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4928 static void
4929 PS_functions (inf)
4930 FILE *inf;
4932 register char *bp, *ep;
4934 LOOP_ON_INPUT_LINES (inf, lb, bp)
4936 if (bp[0] == '/')
4938 for (ep = bp+1;
4939 *ep != '\0' && *ep != ' ' && *ep != '{';
4940 ep++)
4941 continue;
4942 make_tag (bp, ep - bp, TRUE,
4943 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4945 else if (LOOKING_AT (bp, "defineps"))
4946 get_tag (bp, NULL);
4952 * Forth tags
4953 * Ignore anything after \ followed by space or in ( )
4954 * Look for words defined by :
4955 * Look for constant, code, create, defer, value, and variable
4956 * OBP extensions: Look for buffer:, field,
4957 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4959 static void
4960 Forth_words (inf)
4961 FILE *inf;
4963 register char *bp;
4965 LOOP_ON_INPUT_LINES (inf, lb, bp)
4966 while ((bp = skip_spaces (bp))[0] != '\0')
4967 if (bp[0] == '\\' && iswhite(bp[1]))
4968 break; /* read next line */
4969 else if (bp[0] == '(' && iswhite(bp[1]))
4970 do /* skip to ) or eol */
4971 bp++;
4972 while (*bp != ')' && *bp != '\0');
4973 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4974 || LOOKING_AT_NOCASE (bp, "constant")
4975 || LOOKING_AT_NOCASE (bp, "code")
4976 || LOOKING_AT_NOCASE (bp, "create")
4977 || LOOKING_AT_NOCASE (bp, "defer")
4978 || LOOKING_AT_NOCASE (bp, "value")
4979 || LOOKING_AT_NOCASE (bp, "variable")
4980 || LOOKING_AT_NOCASE (bp, "buffer:")
4981 || LOOKING_AT_NOCASE (bp, "field"))
4982 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4983 else
4984 bp = skip_non_spaces (bp);
4989 * Scheme tag functions
4990 * look for (def... xyzzy
4991 * (def... (xyzzy
4992 * (def ... ((...(xyzzy ....
4993 * (set! xyzzy
4994 * Original code by Ken Haase (1985?)
4996 static void
4997 Scheme_functions (inf)
4998 FILE *inf;
5000 register char *bp;
5002 LOOP_ON_INPUT_LINES (inf, lb, bp)
5004 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5006 bp = skip_non_spaces (bp+4);
5007 /* Skip over open parens and white space. Don't continue past
5008 '\0'. */
5009 while (*bp && notinname (*bp))
5010 bp++;
5011 get_tag (bp, NULL);
5013 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5014 get_tag (bp, NULL);
5019 /* Find tags in TeX and LaTeX input files. */
5021 /* TEX_toktab is a table of TeX control sequences that define tags.
5022 * Each entry records one such control sequence.
5024 * Original code from who knows whom.
5025 * Ideas by:
5026 * Stefan Monnier (2002)
5029 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5031 /* Default set of control sequences to put into TEX_toktab.
5032 The value of environment var TEXTAGS is prepended to this. */
5033 static char *TEX_defenv = "\
5034 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5035 :part:appendix:entry:index:def\
5036 :newcommand:renewcommand:newenvironment:renewenvironment";
5038 static void TEX_mode __P((FILE *));
5039 static void TEX_decode_env __P((char *, char *));
5041 static char TEX_esc = '\\';
5042 static char TEX_opgrp = '{';
5043 static char TEX_clgrp = '}';
5046 * TeX/LaTeX scanning loop.
5048 static void
5049 TeX_commands (inf)
5050 FILE *inf;
5052 char *cp;
5053 linebuffer *key;
5055 /* Select either \ or ! as escape character. */
5056 TEX_mode (inf);
5058 /* Initialize token table once from environment. */
5059 if (TEX_toktab == NULL)
5060 TEX_decode_env ("TEXTAGS", TEX_defenv);
5062 LOOP_ON_INPUT_LINES (inf, lb, cp)
5064 /* Look at each TEX keyword in line. */
5065 for (;;)
5067 /* Look for a TEX escape. */
5068 while (*cp++ != TEX_esc)
5069 if (cp[-1] == '\0' || cp[-1] == '%')
5070 goto tex_next_line;
5072 for (key = TEX_toktab; key->buffer != NULL; key++)
5073 if (strneq (cp, key->buffer, key->len))
5075 register char *p;
5076 int namelen, linelen;
5077 bool opgrp = FALSE;
5079 cp = skip_spaces (cp + key->len);
5080 if (*cp == TEX_opgrp)
5082 opgrp = TRUE;
5083 cp++;
5085 for (p = cp;
5086 (!iswhite (*p) && *p != '#' &&
5087 *p != TEX_opgrp && *p != TEX_clgrp);
5088 p++)
5089 continue;
5090 namelen = p - cp;
5091 linelen = lb.len;
5092 if (!opgrp || *p == TEX_clgrp)
5094 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5095 p++;
5096 linelen = p - lb.buffer + 1;
5098 make_tag (cp, namelen, TRUE,
5099 lb.buffer, linelen, lineno, linecharno);
5100 goto tex_next_line; /* We only tag a line once */
5103 tex_next_line:
5108 #define TEX_LESC '\\'
5109 #define TEX_SESC '!'
5111 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5112 chars accordingly. */
5113 static void
5114 TEX_mode (inf)
5115 FILE *inf;
5117 int c;
5119 while ((c = getc (inf)) != EOF)
5121 /* Skip to next line if we hit the TeX comment char. */
5122 if (c == '%')
5123 while (c != '\n' && c != EOF)
5124 c = getc (inf);
5125 else if (c == TEX_LESC || c == TEX_SESC )
5126 break;
5129 if (c == TEX_LESC)
5131 TEX_esc = TEX_LESC;
5132 TEX_opgrp = '{';
5133 TEX_clgrp = '}';
5135 else
5137 TEX_esc = TEX_SESC;
5138 TEX_opgrp = '<';
5139 TEX_clgrp = '>';
5141 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5142 No attempt is made to correct the situation. */
5143 rewind (inf);
5146 /* Read environment and prepend it to the default string.
5147 Build token table. */
5148 static void
5149 TEX_decode_env (evarname, defenv)
5150 char *evarname;
5151 char *defenv;
5153 register char *env, *p;
5154 int i, len;
5156 /* Append default string to environment. */
5157 env = getenv (evarname);
5158 if (!env)
5159 env = defenv;
5160 else
5162 char *oldenv = env;
5163 env = concat (oldenv, defenv, "");
5166 /* Allocate a token table */
5167 for (len = 1, p = env; p;)
5168 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5169 len++;
5170 TEX_toktab = xnew (len, linebuffer);
5172 /* Unpack environment string into token table. Be careful about */
5173 /* zero-length strings (leading ':', "::" and trailing ':') */
5174 for (i = 0; *env != '\0';)
5176 p = etags_strchr (env, ':');
5177 if (!p) /* End of environment string. */
5178 p = env + strlen (env);
5179 if (p - env > 0)
5180 { /* Only non-zero strings. */
5181 TEX_toktab[i].buffer = savenstr (env, p - env);
5182 TEX_toktab[i].len = p - env;
5183 i++;
5185 if (*p)
5186 env = p + 1;
5187 else
5189 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5190 TEX_toktab[i].len = 0;
5191 break;
5197 /* Texinfo support. Dave Love, Mar. 2000. */
5198 static void
5199 Texinfo_nodes (inf)
5200 FILE * inf;
5202 char *cp, *start;
5203 LOOP_ON_INPUT_LINES (inf, lb, cp)
5204 if (LOOKING_AT (cp, "@node"))
5206 start = cp;
5207 while (*cp != '\0' && *cp != ',')
5208 cp++;
5209 make_tag (start, cp - start, TRUE,
5210 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5216 * HTML support.
5217 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5218 * Contents of <a name=xxx> are tags with name xxx.
5220 * Francesco Potortì, 2002.
5222 static void
5223 HTML_labels (inf)
5224 FILE * inf;
5226 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5227 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5228 bool intag = FALSE; /* inside an html tag, looking for ID= */
5229 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5230 char *end;
5233 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5235 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5236 for (;;) /* loop on the same line */
5238 if (skiptag) /* skip HTML tag */
5240 while (*dbp != '\0' && *dbp != '>')
5241 dbp++;
5242 if (*dbp == '>')
5244 dbp += 1;
5245 skiptag = FALSE;
5246 continue; /* look on the same line */
5248 break; /* go to next line */
5251 else if (intag) /* look for "name=" or "id=" */
5253 while (*dbp != '\0' && *dbp != '>'
5254 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5255 dbp++;
5256 if (*dbp == '\0')
5257 break; /* go to next line */
5258 if (*dbp == '>')
5260 dbp += 1;
5261 intag = FALSE;
5262 continue; /* look on the same line */
5264 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5265 || LOOKING_AT_NOCASE (dbp, "id="))
5267 bool quoted = (dbp[0] == '"');
5269 if (quoted)
5270 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5271 continue;
5272 else
5273 for (end = dbp; *end != '\0' && intoken (*end); end++)
5274 continue;
5275 linebuffer_setlen (&token_name, end - dbp);
5276 strncpy (token_name.buffer, dbp, end - dbp);
5277 token_name.buffer[end - dbp] = '\0';
5279 dbp = end;
5280 intag = FALSE; /* we found what we looked for */
5281 skiptag = TRUE; /* skip to the end of the tag */
5282 getnext = TRUE; /* then grab the text */
5283 continue; /* look on the same line */
5285 dbp += 1;
5288 else if (getnext) /* grab next tokens and tag them */
5290 dbp = skip_spaces (dbp);
5291 if (*dbp == '\0')
5292 break; /* go to next line */
5293 if (*dbp == '<')
5295 intag = TRUE;
5296 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5297 continue; /* look on the same line */
5300 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5301 continue;
5302 make_tag (token_name.buffer, token_name.len, TRUE,
5303 dbp, end - dbp, lineno, linecharno);
5304 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5305 getnext = FALSE;
5306 break; /* go to next line */
5309 else /* look for an interesting HTML tag */
5311 while (*dbp != '\0' && *dbp != '<')
5312 dbp++;
5313 if (*dbp == '\0')
5314 break; /* go to next line */
5315 intag = TRUE;
5316 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5318 inanchor = TRUE;
5319 continue; /* look on the same line */
5321 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5322 || LOOKING_AT_NOCASE (dbp, "<h1>")
5323 || LOOKING_AT_NOCASE (dbp, "<h2>")
5324 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5326 intag = FALSE;
5327 getnext = TRUE;
5328 continue; /* look on the same line */
5330 dbp += 1;
5337 * Prolog support
5339 * Assumes that the predicate or rule starts at column 0.
5340 * Only the first clause of a predicate or rule is added.
5341 * Original code by Sunichirou Sugou (1989)
5342 * Rewritten by Anders Lindgren (1996)
5344 static int prolog_pr __P((char *, char *));
5345 static void prolog_skip_comment __P((linebuffer *, FILE *));
5346 static int prolog_atom __P((char *, int));
5348 static void
5349 Prolog_functions (inf)
5350 FILE *inf;
5352 char *cp, *last;
5353 int len;
5354 int allocated;
5356 allocated = 0;
5357 len = 0;
5358 last = NULL;
5360 LOOP_ON_INPUT_LINES (inf, lb, cp)
5362 if (cp[0] == '\0') /* Empty line */
5363 continue;
5364 else if (iswhite (cp[0])) /* Not a predicate */
5365 continue;
5366 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5367 prolog_skip_comment (&lb, inf);
5368 else if ((len = prolog_pr (cp, last)) > 0)
5370 /* Predicate or rule. Store the function name so that we
5371 only generate a tag for the first clause. */
5372 if (last == NULL)
5373 last = xnew(len + 1, char);
5374 else if (len + 1 > allocated)
5375 xrnew (last, len + 1, char);
5376 allocated = len + 1;
5377 strncpy (last, cp, len);
5378 last[len] = '\0';
5381 free (last);
5385 static void
5386 prolog_skip_comment (plb, inf)
5387 linebuffer *plb;
5388 FILE *inf;
5390 char *cp;
5394 for (cp = plb->buffer; *cp != '\0'; cp++)
5395 if (cp[0] == '*' && cp[1] == '/')
5396 return;
5397 readline (plb, inf);
5399 while (!feof(inf));
5403 * A predicate or rule definition is added if it matches:
5404 * <beginning of line><Prolog Atom><whitespace>(
5405 * or <beginning of line><Prolog Atom><whitespace>:-
5407 * It is added to the tags database if it doesn't match the
5408 * name of the previous clause header.
5410 * Return the size of the name of the predicate or rule, or 0 if no
5411 * header was found.
5413 static int
5414 prolog_pr (s, last)
5415 char *s;
5416 char *last; /* Name of last clause. */
5418 int pos;
5419 int len;
5421 pos = prolog_atom (s, 0);
5422 if (pos < 1)
5423 return 0;
5425 len = pos;
5426 pos = skip_spaces (s + pos) - s;
5428 if ((s[pos] == '.'
5429 || (s[pos] == '(' && (pos += 1))
5430 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5431 && (last == NULL /* save only the first clause */
5432 || len != (int)strlen (last)
5433 || !strneq (s, last, len)))
5435 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5436 return len;
5438 else
5439 return 0;
5443 * Consume a Prolog atom.
5444 * Return the number of bytes consumed, or -1 if there was an error.
5446 * A prolog atom, in this context, could be one of:
5447 * - An alphanumeric sequence, starting with a lower case letter.
5448 * - A quoted arbitrary string. Single quotes can escape themselves.
5449 * Backslash quotes everything.
5451 static int
5452 prolog_atom (s, pos)
5453 char *s;
5454 int pos;
5456 int origpos;
5458 origpos = pos;
5460 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5462 /* The atom is unquoted. */
5463 pos++;
5464 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5466 pos++;
5468 return pos - origpos;
5470 else if (s[pos] == '\'')
5472 pos++;
5474 for (;;)
5476 if (s[pos] == '\'')
5478 pos++;
5479 if (s[pos] != '\'')
5480 break;
5481 pos++; /* A double quote */
5483 else if (s[pos] == '\0')
5484 /* Multiline quoted atoms are ignored. */
5485 return -1;
5486 else if (s[pos] == '\\')
5488 if (s[pos+1] == '\0')
5489 return -1;
5490 pos += 2;
5492 else
5493 pos++;
5495 return pos - origpos;
5497 else
5498 return -1;
5503 * Support for Erlang
5505 * Generates tags for functions, defines, and records.
5506 * Assumes that Erlang functions start at column 0.
5507 * Original code by Anders Lindgren (1996)
5509 static int erlang_func __P((char *, char *));
5510 static void erlang_attribute __P((char *));
5511 static int erlang_atom __P((char *));
5513 static void
5514 Erlang_functions (inf)
5515 FILE *inf;
5517 char *cp, *last;
5518 int len;
5519 int allocated;
5521 allocated = 0;
5522 len = 0;
5523 last = NULL;
5525 LOOP_ON_INPUT_LINES (inf, lb, cp)
5527 if (cp[0] == '\0') /* Empty line */
5528 continue;
5529 else if (iswhite (cp[0])) /* Not function nor attribute */
5530 continue;
5531 else if (cp[0] == '%') /* comment */
5532 continue;
5533 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5534 continue;
5535 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5537 erlang_attribute (cp);
5538 if (last != NULL)
5540 free (last);
5541 last = NULL;
5544 else if ((len = erlang_func (cp, last)) > 0)
5547 * Function. Store the function name so that we only
5548 * generates a tag for the first clause.
5550 if (last == NULL)
5551 last = xnew (len + 1, char);
5552 else if (len + 1 > allocated)
5553 xrnew (last, len + 1, char);
5554 allocated = len + 1;
5555 strncpy (last, cp, len);
5556 last[len] = '\0';
5559 free (last);
5564 * A function definition is added if it matches:
5565 * <beginning of line><Erlang Atom><whitespace>(
5567 * It is added to the tags database if it doesn't match the
5568 * name of the previous clause header.
5570 * Return the size of the name of the function, or 0 if no function
5571 * was found.
5573 static int
5574 erlang_func (s, last)
5575 char *s;
5576 char *last; /* Name of last clause. */
5578 int pos;
5579 int len;
5581 pos = erlang_atom (s);
5582 if (pos < 1)
5583 return 0;
5585 len = pos;
5586 pos = skip_spaces (s + pos) - s;
5588 /* Save only the first clause. */
5589 if (s[pos++] == '('
5590 && (last == NULL
5591 || len != (int)strlen (last)
5592 || !strneq (s, last, len)))
5594 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5595 return len;
5598 return 0;
5603 * Handle attributes. Currently, tags are generated for defines
5604 * and records.
5606 * They are on the form:
5607 * -define(foo, bar).
5608 * -define(Foo(M, N), M+N).
5609 * -record(graph, {vtab = notable, cyclic = true}).
5611 static void
5612 erlang_attribute (s)
5613 char *s;
5615 char *cp = s;
5617 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5618 && *cp++ == '(')
5620 int len = erlang_atom (skip_spaces (cp));
5621 if (len > 0)
5622 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5624 return;
5629 * Consume an Erlang atom (or variable).
5630 * Return the number of bytes consumed, or -1 if there was an error.
5632 static int
5633 erlang_atom (s)
5634 char *s;
5636 int pos = 0;
5638 if (ISALPHA (s[pos]) || s[pos] == '_')
5640 /* The atom is unquoted. */
5642 pos++;
5643 while (ISALNUM (s[pos]) || s[pos] == '_');
5645 else if (s[pos] == '\'')
5647 for (pos++; s[pos] != '\''; pos++)
5648 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5649 || (s[pos] == '\\' && s[++pos] == '\0'))
5650 return 0;
5651 pos++;
5654 return pos;
5658 static char *scan_separators __P((char *));
5659 static void add_regex __P((char *, language *));
5660 static char *substitute __P((char *, char *, struct re_registers *));
5663 * Take a string like "/blah/" and turn it into "blah", verifying
5664 * that the first and last characters are the same, and handling
5665 * quoted separator characters. Actually, stops on the occurrence of
5666 * an unquoted separator. Also process \t, \n, etc. and turn into
5667 * appropriate characters. Works in place. Null terminates name string.
5668 * Returns pointer to terminating separator, or NULL for
5669 * unterminated regexps.
5671 static char *
5672 scan_separators (name)
5673 char *name;
5675 char sep = name[0];
5676 char *copyto = name;
5677 bool quoted = FALSE;
5679 for (++name; *name != '\0'; ++name)
5681 if (quoted)
5683 switch (*name)
5685 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5686 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5687 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5688 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5689 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5690 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5691 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5692 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5693 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5694 default:
5695 if (*name == sep)
5696 *copyto++ = sep;
5697 else
5699 /* Something else is quoted, so preserve the quote. */
5700 *copyto++ = '\\';
5701 *copyto++ = *name;
5703 break;
5705 quoted = FALSE;
5707 else if (*name == '\\')
5708 quoted = TRUE;
5709 else if (*name == sep)
5710 break;
5711 else
5712 *copyto++ = *name;
5714 if (*name != sep)
5715 name = NULL; /* signal unterminated regexp */
5717 /* Terminate copied string. */
5718 *copyto = '\0';
5719 return name;
5722 /* Look at the argument of --regex or --no-regex and do the right
5723 thing. Same for each line of a regexp file. */
5724 static void
5725 analyse_regex (regex_arg)
5726 char *regex_arg;
5728 if (regex_arg == NULL)
5730 free_regexps (); /* --no-regex: remove existing regexps */
5731 return;
5734 /* A real --regexp option or a line in a regexp file. */
5735 switch (regex_arg[0])
5737 /* Comments in regexp file or null arg to --regex. */
5738 case '\0':
5739 case ' ':
5740 case '\t':
5741 break;
5743 /* Read a regex file. This is recursive and may result in a
5744 loop, which will stop when the file descriptors are exhausted. */
5745 case '@':
5747 FILE *regexfp;
5748 linebuffer regexbuf;
5749 char *regexfile = regex_arg + 1;
5751 /* regexfile is a file containing regexps, one per line. */
5752 regexfp = fopen (regexfile, "r");
5753 if (regexfp == NULL)
5755 pfatal (regexfile);
5756 return;
5758 linebuffer_init (&regexbuf);
5759 while (readline_internal (&regexbuf, regexfp) > 0)
5760 analyse_regex (regexbuf.buffer);
5761 free (regexbuf.buffer);
5762 fclose (regexfp);
5764 break;
5766 /* Regexp to be used for a specific language only. */
5767 case '{':
5769 language *lang;
5770 char *lang_name = regex_arg + 1;
5771 char *cp;
5773 for (cp = lang_name; *cp != '}'; cp++)
5774 if (*cp == '\0')
5776 error ("unterminated language name in regex: %s", regex_arg);
5777 return;
5779 *cp++ = '\0';
5780 lang = get_language_from_langname (lang_name);
5781 if (lang == NULL)
5782 return;
5783 add_regex (cp, lang);
5785 break;
5787 /* Regexp to be used for any language. */
5788 default:
5789 add_regex (regex_arg, NULL);
5790 break;
5794 /* Separate the regexp pattern, compile it,
5795 and care for optional name and modifiers. */
5796 static void
5797 add_regex (regexp_pattern, lang)
5798 char *regexp_pattern;
5799 language *lang;
5801 static struct re_pattern_buffer zeropattern;
5802 char sep, *pat, *name, *modifiers;
5803 const char *err;
5804 struct re_pattern_buffer *patbuf;
5805 regexp *rp;
5806 bool
5807 force_explicit_name = TRUE, /* do not use implicit tag names */
5808 ignore_case = FALSE, /* case is significant */
5809 multi_line = FALSE, /* matches are done one line at a time */
5810 single_line = FALSE; /* dot does not match newline */
5813 if (strlen(regexp_pattern) < 3)
5815 error ("null regexp", (char *)NULL);
5816 return;
5818 sep = regexp_pattern[0];
5819 name = scan_separators (regexp_pattern);
5820 if (name == NULL)
5822 error ("%s: unterminated regexp", regexp_pattern);
5823 return;
5825 if (name[1] == sep)
5827 error ("null name for regexp \"%s\"", regexp_pattern);
5828 return;
5830 modifiers = scan_separators (name);
5831 if (modifiers == NULL) /* no terminating separator --> no name */
5833 modifiers = name;
5834 name = "";
5836 else
5837 modifiers += 1; /* skip separator */
5839 /* Parse regex modifiers. */
5840 for (; modifiers[0] != '\0'; modifiers++)
5841 switch (modifiers[0])
5843 case 'N':
5844 if (modifiers == name)
5845 error ("forcing explicit tag name but no name, ignoring", NULL);
5846 force_explicit_name = TRUE;
5847 break;
5848 case 'i':
5849 ignore_case = TRUE;
5850 break;
5851 case 's':
5852 single_line = TRUE;
5853 /* FALLTHRU */
5854 case 'm':
5855 multi_line = TRUE;
5856 need_filebuf = TRUE;
5857 break;
5858 default:
5860 char wrongmod [2];
5861 wrongmod[0] = modifiers[0];
5862 wrongmod[1] = '\0';
5863 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5865 break;
5868 patbuf = xnew (1, struct re_pattern_buffer);
5869 *patbuf = zeropattern;
5870 if (ignore_case)
5872 static char lc_trans[CHARS];
5873 int i;
5874 for (i = 0; i < CHARS; i++)
5875 lc_trans[i] = lowcase (i);
5876 patbuf->translate = lc_trans; /* translation table to fold case */
5879 if (multi_line)
5880 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5881 else
5882 pat = regexp_pattern;
5884 if (single_line)
5885 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5886 else
5887 re_set_syntax (RE_SYNTAX_EMACS);
5889 err = re_compile_pattern (pat, strlen (pat), patbuf);
5890 if (multi_line)
5891 free (pat);
5892 if (err != NULL)
5894 error ("%s while compiling pattern", err);
5895 return;
5898 rp = p_head;
5899 p_head = xnew (1, regexp);
5900 p_head->pattern = savestr (regexp_pattern);
5901 p_head->p_next = rp;
5902 p_head->lang = lang;
5903 p_head->pat = patbuf;
5904 p_head->name = savestr (name);
5905 p_head->error_signaled = FALSE;
5906 p_head->force_explicit_name = force_explicit_name;
5907 p_head->ignore_case = ignore_case;
5908 p_head->multi_line = multi_line;
5912 * Do the substitutions indicated by the regular expression and
5913 * arguments.
5915 static char *
5916 substitute (in, out, regs)
5917 char *in, *out;
5918 struct re_registers *regs;
5920 char *result, *t;
5921 int size, dig, diglen;
5923 result = NULL;
5924 size = strlen (out);
5926 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5927 if (out[size - 1] == '\\')
5928 fatal ("pattern error in \"%s\"", out);
5929 for (t = etags_strchr (out, '\\');
5930 t != NULL;
5931 t = etags_strchr (t + 2, '\\'))
5932 if (ISDIGIT (t[1]))
5934 dig = t[1] - '0';
5935 diglen = regs->end[dig] - regs->start[dig];
5936 size += diglen - 2;
5938 else
5939 size -= 1;
5941 /* Allocate space and do the substitutions. */
5942 assert (size >= 0);
5943 result = xnew (size + 1, char);
5945 for (t = result; *out != '\0'; out++)
5946 if (*out == '\\' && ISDIGIT (*++out))
5948 dig = *out - '0';
5949 diglen = regs->end[dig] - regs->start[dig];
5950 strncpy (t, in + regs->start[dig], diglen);
5951 t += diglen;
5953 else
5954 *t++ = *out;
5955 *t = '\0';
5957 assert (t <= result + size);
5958 assert (t - result == (int)strlen (result));
5960 return result;
5963 /* Deallocate all regexps. */
5964 static void
5965 free_regexps ()
5967 regexp *rp;
5968 while (p_head != NULL)
5970 rp = p_head->p_next;
5971 free (p_head->pattern);
5972 free (p_head->name);
5973 free (p_head);
5974 p_head = rp;
5976 return;
5980 * Reads the whole file as a single string from `filebuf' and looks for
5981 * multi-line regular expressions, creating tags on matches.
5982 * readline already dealt with normal regexps.
5984 * Idea by Ben Wing <ben@666.com> (2002).
5986 static void
5987 regex_tag_multiline ()
5989 char *buffer = filebuf.buffer;
5990 regexp *rp;
5991 char *name;
5993 for (rp = p_head; rp != NULL; rp = rp->p_next)
5995 int match = 0;
5997 if (!rp->multi_line)
5998 continue; /* skip normal regexps */
6000 /* Generic initialisations before parsing file from memory. */
6001 lineno = 1; /* reset global line number */
6002 charno = 0; /* reset global char number */
6003 linecharno = 0; /* reset global char number of line start */
6005 /* Only use generic regexps or those for the current language. */
6006 if (rp->lang != NULL && rp->lang != curfdp->lang)
6007 continue;
6009 while (match >= 0 && match < filebuf.len)
6011 match = re_search (rp->pat, buffer, filebuf.len, charno,
6012 filebuf.len - match, &rp->regs);
6013 switch (match)
6015 case -2:
6016 /* Some error. */
6017 if (!rp->error_signaled)
6019 error ("regexp stack overflow while matching \"%s\"",
6020 rp->pattern);
6021 rp->error_signaled = TRUE;
6023 break;
6024 case -1:
6025 /* No match. */
6026 break;
6027 default:
6028 if (match == rp->regs.end[0])
6030 if (!rp->error_signaled)
6032 error ("regexp matches the empty string: \"%s\"",
6033 rp->pattern);
6034 rp->error_signaled = TRUE;
6036 match = -3; /* exit from while loop */
6037 break;
6040 /* Match occurred. Construct a tag. */
6041 while (charno < rp->regs.end[0])
6042 if (buffer[charno++] == '\n')
6043 lineno++, linecharno = charno;
6044 name = rp->name;
6045 if (name[0] == '\0')
6046 name = NULL;
6047 else /* make a named tag */
6048 name = substitute (buffer, rp->name, &rp->regs);
6049 if (rp->force_explicit_name)
6050 /* Force explicit tag name, if a name is there. */
6051 pfnote (name, TRUE, buffer + linecharno,
6052 charno - linecharno + 1, lineno, linecharno);
6053 else
6054 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6055 charno - linecharno + 1, lineno, linecharno);
6056 break;
6063 static bool
6064 nocase_tail (cp)
6065 char *cp;
6067 register int len = 0;
6069 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6070 cp++, len++;
6071 if (*cp == '\0' && !intoken (dbp[len]))
6073 dbp += len;
6074 return TRUE;
6076 return FALSE;
6079 static void
6080 get_tag (bp, namepp)
6081 register char *bp;
6082 char **namepp;
6084 register char *cp = bp;
6086 if (*bp != '\0')
6088 /* Go till you get to white space or a syntactic break */
6089 for (cp = bp + 1; !notinname (*cp); cp++)
6090 continue;
6091 make_tag (bp, cp - bp, TRUE,
6092 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6095 if (namepp != NULL)
6096 *namepp = savenstr (bp, cp - bp);
6100 * Read a line of text from `stream' into `lbp', excluding the
6101 * newline or CR-NL, if any. Return the number of characters read from
6102 * `stream', which is the length of the line including the newline.
6104 * On DOS or Windows we do not count the CR character, if any before the
6105 * NL, in the returned length; this mirrors the behavior of Emacs on those
6106 * platforms (for text files, it translates CR-NL to NL as it reads in the
6107 * file).
6109 * If multi-line regular expressions are requested, each line read is
6110 * appended to `filebuf'.
6112 static long
6113 readline_internal (lbp, stream)
6114 linebuffer *lbp;
6115 register FILE *stream;
6117 char *buffer = lbp->buffer;
6118 register char *p = lbp->buffer;
6119 register char *pend;
6120 int chars_deleted;
6122 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6124 for (;;)
6126 register int c = getc (stream);
6127 if (p == pend)
6129 /* We're at the end of linebuffer: expand it. */
6130 lbp->size *= 2;
6131 xrnew (buffer, lbp->size, char);
6132 p += buffer - lbp->buffer;
6133 pend = buffer + lbp->size;
6134 lbp->buffer = buffer;
6136 if (c == EOF)
6138 *p = '\0';
6139 chars_deleted = 0;
6140 break;
6142 if (c == '\n')
6144 if (p > buffer && p[-1] == '\r')
6146 p -= 1;
6147 #ifdef DOS_NT
6148 /* Assume CRLF->LF translation will be performed by Emacs
6149 when loading this file, so CRs won't appear in the buffer.
6150 It would be cleaner to compensate within Emacs;
6151 however, Emacs does not know how many CRs were deleted
6152 before any given point in the file. */
6153 chars_deleted = 1;
6154 #else
6155 chars_deleted = 2;
6156 #endif
6158 else
6160 chars_deleted = 1;
6162 *p = '\0';
6163 break;
6165 *p++ = c;
6167 lbp->len = p - buffer;
6169 if (need_filebuf /* we need filebuf for multi-line regexps */
6170 && chars_deleted > 0) /* not at EOF */
6172 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6174 /* Expand filebuf. */
6175 filebuf.size *= 2;
6176 xrnew (filebuf.buffer, filebuf.size, char);
6178 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6179 filebuf.len += lbp->len;
6180 filebuf.buffer[filebuf.len++] = '\n';
6181 filebuf.buffer[filebuf.len] = '\0';
6184 return lbp->len + chars_deleted;
6188 * Like readline_internal, above, but in addition try to match the
6189 * input line against relevant regular expressions and manage #line
6190 * directives.
6192 static void
6193 readline (lbp, stream)
6194 linebuffer *lbp;
6195 FILE *stream;
6197 long result;
6199 linecharno = charno; /* update global char number of line start */
6200 result = readline_internal (lbp, stream); /* read line */
6201 lineno += 1; /* increment global line number */
6202 charno += result; /* increment global char number */
6204 /* Honour #line directives. */
6205 if (!no_line_directive)
6207 static bool discard_until_line_directive;
6209 /* Check whether this is a #line directive. */
6210 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6212 unsigned int lno;
6213 int start = 0;
6215 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6216 && start > 0) /* double quote character found */
6218 char *endp = lbp->buffer + start;
6220 while ((endp = etags_strchr (endp, '"')) != NULL
6221 && endp[-1] == '\\')
6222 endp++;
6223 if (endp != NULL)
6224 /* Ok, this is a real #line directive. Let's deal with it. */
6226 char *taggedabsname; /* absolute name of original file */
6227 char *taggedfname; /* name of original file as given */
6228 char *name; /* temp var */
6230 discard_until_line_directive = FALSE; /* found it */
6231 name = lbp->buffer + start;
6232 *endp = '\0';
6233 canonicalize_filename (name);
6234 taggedabsname = absolute_filename (name, tagfiledir);
6235 if (filename_is_absolute (name)
6236 || filename_is_absolute (curfdp->infname))
6237 taggedfname = savestr (taggedabsname);
6238 else
6239 taggedfname = relative_filename (taggedabsname,tagfiledir);
6241 if (streq (curfdp->taggedfname, taggedfname))
6242 /* The #line directive is only a line number change. We
6243 deal with this afterwards. */
6244 free (taggedfname);
6245 else
6246 /* The tags following this #line directive should be
6247 attributed to taggedfname. In order to do this, set
6248 curfdp accordingly. */
6250 fdesc *fdp; /* file description pointer */
6252 /* Go look for a file description already set up for the
6253 file indicated in the #line directive. If there is
6254 one, use it from now until the next #line
6255 directive. */
6256 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6257 if (streq (fdp->infname, curfdp->infname)
6258 && streq (fdp->taggedfname, taggedfname))
6259 /* If we remove the second test above (after the &&)
6260 then all entries pertaining to the same file are
6261 coalesced in the tags file. If we use it, then
6262 entries pertaining to the same file but generated
6263 from different files (via #line directives) will
6264 go into separate sections in the tags file. These
6265 alternatives look equivalent. The first one
6266 destroys some apparently useless information. */
6268 curfdp = fdp;
6269 free (taggedfname);
6270 break;
6272 /* Else, if we already tagged the real file, skip all
6273 input lines until the next #line directive. */
6274 if (fdp == NULL) /* not found */
6275 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6276 if (streq (fdp->infabsname, taggedabsname))
6278 discard_until_line_directive = TRUE;
6279 free (taggedfname);
6280 break;
6282 /* Else create a new file description and use that from
6283 now on, until the next #line directive. */
6284 if (fdp == NULL) /* not found */
6286 fdp = fdhead;
6287 fdhead = xnew (1, fdesc);
6288 *fdhead = *curfdp; /* copy curr. file description */
6289 fdhead->next = fdp;
6290 fdhead->infname = savestr (curfdp->infname);
6291 fdhead->infabsname = savestr (curfdp->infabsname);
6292 fdhead->infabsdir = savestr (curfdp->infabsdir);
6293 fdhead->taggedfname = taggedfname;
6294 fdhead->usecharno = FALSE;
6295 fdhead->prop = NULL;
6296 fdhead->written = FALSE;
6297 curfdp = fdhead;
6300 free (taggedabsname);
6301 lineno = lno - 1;
6302 readline (lbp, stream);
6303 return;
6304 } /* if a real #line directive */
6305 } /* if #line is followed by a number */
6306 } /* if line begins with "#line " */
6308 /* If we are here, no #line directive was found. */
6309 if (discard_until_line_directive)
6311 if (result > 0)
6313 /* Do a tail recursion on ourselves, thus discarding the contents
6314 of the line buffer. */
6315 readline (lbp, stream);
6316 return;
6318 /* End of file. */
6319 discard_until_line_directive = FALSE;
6320 return;
6322 } /* if #line directives should be considered */
6325 int match;
6326 regexp *rp;
6327 char *name;
6329 /* Match against relevant regexps. */
6330 if (lbp->len > 0)
6331 for (rp = p_head; rp != NULL; rp = rp->p_next)
6333 /* Only use generic regexps or those for the current language.
6334 Also do not use multiline regexps, which is the job of
6335 regex_tag_multiline. */
6336 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6337 || rp->multi_line)
6338 continue;
6340 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6341 switch (match)
6343 case -2:
6344 /* Some error. */
6345 if (!rp->error_signaled)
6347 error ("regexp stack overflow while matching \"%s\"",
6348 rp->pattern);
6349 rp->error_signaled = TRUE;
6351 break;
6352 case -1:
6353 /* No match. */
6354 break;
6355 case 0:
6356 /* Empty string matched. */
6357 if (!rp->error_signaled)
6359 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6360 rp->error_signaled = TRUE;
6362 break;
6363 default:
6364 /* Match occurred. Construct a tag. */
6365 name = rp->name;
6366 if (name[0] == '\0')
6367 name = NULL;
6368 else /* make a named tag */
6369 name = substitute (lbp->buffer, rp->name, &rp->regs);
6370 if (rp->force_explicit_name)
6371 /* Force explicit tag name, if a name is there. */
6372 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6373 else
6374 make_tag (name, strlen (name), TRUE,
6375 lbp->buffer, match, lineno, linecharno);
6376 break;
6384 * Return a pointer to a space of size strlen(cp)+1 allocated
6385 * with xnew where the string CP has been copied.
6387 static char *
6388 savestr (cp)
6389 char *cp;
6391 return savenstr (cp, strlen (cp));
6395 * Return a pointer to a space of size LEN+1 allocated with xnew where
6396 * the string CP has been copied for at most the first LEN characters.
6398 static char *
6399 savenstr (cp, len)
6400 char *cp;
6401 int len;
6403 register char *dp;
6405 dp = xnew (len + 1, char);
6406 strncpy (dp, cp, len);
6407 dp[len] = '\0';
6408 return dp;
6412 * Return the ptr in sp at which the character c last
6413 * appears; NULL if not found
6415 * Identical to POSIX strrchr, included for portability.
6417 static char *
6418 etags_strrchr (sp, c)
6419 register const char *sp;
6420 register int c;
6422 register const char *r;
6424 r = NULL;
6427 if (*sp == c)
6428 r = sp;
6429 } while (*sp++);
6430 return (char *)r;
6434 * Return the ptr in sp at which the character c first
6435 * appears; NULL if not found
6437 * Identical to POSIX strchr, included for portability.
6439 static char *
6440 etags_strchr (sp, c)
6441 register const char *sp;
6442 register int c;
6446 if (*sp == c)
6447 return (char *)sp;
6448 } while (*sp++);
6449 return NULL;
6453 * Compare two strings, ignoring case for alphabetic characters.
6455 * Same as BSD's strcasecmp, included for portability.
6457 static int
6458 etags_strcasecmp (s1, s2)
6459 register const char *s1;
6460 register const char *s2;
6462 while (*s1 != '\0'
6463 && (ISALPHA (*s1) && ISALPHA (*s2)
6464 ? lowcase (*s1) == lowcase (*s2)
6465 : *s1 == *s2))
6466 s1++, s2++;
6468 return (ISALPHA (*s1) && ISALPHA (*s2)
6469 ? lowcase (*s1) - lowcase (*s2)
6470 : *s1 - *s2);
6474 * Compare two strings, ignoring case for alphabetic characters.
6475 * Stop after a given number of characters
6477 * Same as BSD's strncasecmp, included for portability.
6479 static int
6480 etags_strncasecmp (s1, s2, n)
6481 register const char *s1;
6482 register const char *s2;
6483 register int n;
6485 while (*s1 != '\0' && n-- > 0
6486 && (ISALPHA (*s1) && ISALPHA (*s2)
6487 ? lowcase (*s1) == lowcase (*s2)
6488 : *s1 == *s2))
6489 s1++, s2++;
6491 if (n < 0)
6492 return 0;
6493 else
6494 return (ISALPHA (*s1) && ISALPHA (*s2)
6495 ? lowcase (*s1) - lowcase (*s2)
6496 : *s1 - *s2);
6499 /* Skip spaces (end of string is not space), return new pointer. */
6500 static char *
6501 skip_spaces (cp)
6502 char *cp;
6504 while (iswhite (*cp))
6505 cp++;
6506 return cp;
6509 /* Skip non spaces, except end of string, return new pointer. */
6510 static char *
6511 skip_non_spaces (cp)
6512 char *cp;
6514 while (*cp != '\0' && !iswhite (*cp))
6515 cp++;
6516 return cp;
6519 /* Print error message and exit. */
6520 void
6521 fatal (s1, s2)
6522 char *s1, *s2;
6524 error (s1, s2);
6525 exit (EXIT_FAILURE);
6528 static void
6529 pfatal (s1)
6530 char *s1;
6532 perror (s1);
6533 exit (EXIT_FAILURE);
6536 static void
6537 suggest_asking_for_help ()
6539 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6540 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6541 exit (EXIT_FAILURE);
6544 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6545 static void
6546 error (s1, s2)
6547 const char *s1, *s2;
6549 fprintf (stderr, "%s: ", progname);
6550 fprintf (stderr, s1, s2);
6551 fprintf (stderr, "\n");
6554 /* Return a newly-allocated string whose contents
6555 concatenate those of s1, s2, s3. */
6556 static char *
6557 concat (s1, s2, s3)
6558 char *s1, *s2, *s3;
6560 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6561 char *result = xnew (len1 + len2 + len3 + 1, char);
6563 strcpy (result, s1);
6564 strcpy (result + len1, s2);
6565 strcpy (result + len1 + len2, s3);
6566 result[len1 + len2 + len3] = '\0';
6568 return result;
6572 /* Does the same work as the system V getcwd, but does not need to
6573 guess the buffer size in advance. */
6574 static char *
6575 etags_getcwd ()
6577 #ifdef HAVE_GETCWD
6578 int bufsize = 200;
6579 char *path = xnew (bufsize, char);
6581 while (getcwd (path, bufsize) == NULL)
6583 if (errno != ERANGE)
6584 pfatal ("getcwd");
6585 bufsize *= 2;
6586 free (path);
6587 path = xnew (bufsize, char);
6590 canonicalize_filename (path);
6591 return path;
6593 #else /* not HAVE_GETCWD */
6594 #if MSDOS
6596 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6598 getwd (path);
6600 for (p = path; *p != '\0'; p++)
6601 if (*p == '\\')
6602 *p = '/';
6603 else
6604 *p = lowcase (*p);
6606 return strdup (path);
6607 #else /* not MSDOS */
6608 linebuffer path;
6609 FILE *pipe;
6611 linebuffer_init (&path);
6612 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6613 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6614 pfatal ("pwd");
6615 pclose (pipe);
6617 return path.buffer;
6618 #endif /* not MSDOS */
6619 #endif /* not HAVE_GETCWD */
6622 /* Return a newly allocated string containing the file name of FILE
6623 relative to the absolute directory DIR (which should end with a slash). */
6624 static char *
6625 relative_filename (file, dir)
6626 char *file, *dir;
6628 char *fp, *dp, *afn, *res;
6629 int i;
6631 /* Find the common root of file and dir (with a trailing slash). */
6632 afn = absolute_filename (file, cwd);
6633 fp = afn;
6634 dp = dir;
6635 while (*fp++ == *dp++)
6636 continue;
6637 fp--, dp--; /* back to the first differing char */
6638 #ifdef DOS_NT
6639 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6640 return afn;
6641 #endif
6642 do /* look at the equal chars until '/' */
6643 fp--, dp--;
6644 while (*fp != '/');
6646 /* Build a sequence of "../" strings for the resulting relative file name. */
6647 i = 0;
6648 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6649 i += 1;
6650 res = xnew (3*i + strlen (fp + 1) + 1, char);
6651 res[0] = '\0';
6652 while (i-- > 0)
6653 strcat (res, "../");
6655 /* Add the file name relative to the common root of file and dir. */
6656 strcat (res, fp + 1);
6657 free (afn);
6659 return res;
6662 /* Return a newly allocated string containing the absolute file name
6663 of FILE given DIR (which should end with a slash). */
6664 static char *
6665 absolute_filename (file, dir)
6666 char *file, *dir;
6668 char *slashp, *cp, *res;
6670 if (filename_is_absolute (file))
6671 res = savestr (file);
6672 #ifdef DOS_NT
6673 /* We don't support non-absolute file names with a drive
6674 letter, like `d:NAME' (it's too much hassle). */
6675 else if (file[1] == ':')
6676 fatal ("%s: relative file names with drive letters not supported", file);
6677 #endif
6678 else
6679 res = concat (dir, file, "");
6681 /* Delete the "/dirname/.." and "/." substrings. */
6682 slashp = etags_strchr (res, '/');
6683 while (slashp != NULL && slashp[0] != '\0')
6685 if (slashp[1] == '.')
6687 if (slashp[2] == '.'
6688 && (slashp[3] == '/' || slashp[3] == '\0'))
6690 cp = slashp;
6692 cp--;
6693 while (cp >= res && !filename_is_absolute (cp));
6694 if (cp < res)
6695 cp = slashp; /* the absolute name begins with "/.." */
6696 #ifdef DOS_NT
6697 /* Under MSDOS and NT we get `d:/NAME' as absolute
6698 file name, so the luser could say `d:/../NAME'.
6699 We silently treat this as `d:/NAME'. */
6700 else if (cp[0] != '/')
6701 cp = slashp;
6702 #endif
6703 #ifdef HAVE_MEMMOVE
6704 memmove (cp, slashp + 3, strlen (slashp + 2));
6705 #else
6706 /* Overlapping copy isn't really okay */
6707 strcpy (cp, slashp + 3);
6708 #endif
6709 slashp = cp;
6710 continue;
6712 else if (slashp[2] == '/' || slashp[2] == '\0')
6714 #ifdef HAVE_MEMMOVE
6715 memmove (slashp, slashp + 2, strlen (slashp + 1));
6716 #else
6717 strcpy (slashp, slashp + 2);
6718 #endif
6719 continue;
6723 slashp = etags_strchr (slashp + 1, '/');
6726 if (res[0] == '\0') /* just a safety net: should never happen */
6728 free (res);
6729 return savestr ("/");
6731 else
6732 return res;
6735 /* Return a newly allocated string containing the absolute
6736 file name of dir where FILE resides given DIR (which should
6737 end with a slash). */
6738 static char *
6739 absolute_dirname (file, dir)
6740 char *file, *dir;
6742 char *slashp, *res;
6743 char save;
6745 slashp = etags_strrchr (file, '/');
6746 if (slashp == NULL)
6747 return savestr (dir);
6748 save = slashp[1];
6749 slashp[1] = '\0';
6750 res = absolute_filename (file, dir);
6751 slashp[1] = save;
6753 return res;
6756 /* Whether the argument string is an absolute file name. The argument
6757 string must have been canonicalized with canonicalize_filename. */
6758 static bool
6759 filename_is_absolute (fn)
6760 char *fn;
6762 return (fn[0] == '/'
6763 #ifdef DOS_NT
6764 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6765 #endif
6769 /* Upcase DOS drive letter and collapse separators into single slashes.
6770 Works in place. */
6771 static void
6772 canonicalize_filename (fn)
6773 register char *fn;
6775 register char* cp;
6776 char sep = '/';
6778 #ifdef DOS_NT
6779 /* Canonicalize drive letter case. */
6780 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6781 fn[0] = upcase (fn[0]);
6783 sep = '\\';
6784 #endif
6786 /* Collapse multiple separators into a single slash. */
6787 for (cp = fn; *cp != '\0'; cp++, fn++)
6788 if (*cp == sep)
6790 *fn = '/';
6791 while (cp[1] == sep)
6792 cp++;
6794 else
6795 *fn = *cp;
6796 *fn = '\0';
6800 /* Initialize a linebuffer for use. */
6801 static void
6802 linebuffer_init (lbp)
6803 linebuffer *lbp;
6805 lbp->size = (DEBUG) ? 3 : 200;
6806 lbp->buffer = xnew (lbp->size, char);
6807 lbp->buffer[0] = '\0';
6808 lbp->len = 0;
6811 /* Set the minimum size of a string contained in a linebuffer. */
6812 static void
6813 linebuffer_setlen (lbp, toksize)
6814 linebuffer *lbp;
6815 int toksize;
6817 while (lbp->size <= toksize)
6819 lbp->size *= 2;
6820 xrnew (lbp->buffer, lbp->size, char);
6822 lbp->len = toksize;
6825 /* Like malloc but get fatal error if memory is exhausted. */
6826 static PTR
6827 xmalloc (size)
6828 unsigned int size;
6830 PTR result = (PTR) malloc (size);
6831 if (result == NULL)
6832 fatal ("virtual memory exhausted", (char *)NULL);
6833 return result;
6836 static PTR
6837 xrealloc (ptr, size)
6838 char *ptr;
6839 unsigned int size;
6841 PTR result = (PTR) realloc (ptr, size);
6842 if (result == NULL)
6843 fatal ("virtual memory exhausted", (char *)NULL);
6844 return result;
6848 * Local Variables:
6849 * indent-tabs-mode: t
6850 * tab-width: 8
6851 * fill-column: 79
6852 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6853 * c-file-style: "gnu"
6854 * End:
6857 /* etags.c ends here */