Throughout the file, delete all USE_FONT_BACKEND
[emacs.git] / lib-src / etags.c
blobd18ee6c9f924addf14f1ace16f10d6881f44733b
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
33 Free Software Foundation, Inc.
35 This file is not considered part of GNU Emacs.
37 This program is free software: you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation, either version 3 of the License, or
40 (at your option) any later version.
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
47 You should have received a copy of the GNU General Public License
48 along with this program. If not, see <http://www.gnu.org/licenses/>. */
51 /* NB To comply with the above BSD license, copyright information is
52 reproduced in etc/ETAGS.README. That file should be updated when the
53 above notices are.
55 To the best of our knowledge, this code was originally based on the
56 ctags.c distributed with BSD4.2, which was copyrighted by the
57 University of California, as described above. */
61 * Authors:
62 * 1983 Ctags originally by Ken Arnold.
63 * 1984 Fortran added by Jim Kleckner.
64 * 1984 Ed Pelegri-Llopart added C typedefs.
65 * 1985 Emacs TAGS format by Richard Stallman.
66 * 1989 Sam Kendall added C++.
67 * 1992 Joseph B. Wells improved C and C++ parsing.
68 * 1993 Francesco Potortì reorganised C and C++.
69 * 1994 Line-by-line regexp tags by Tom Tromey.
70 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
71 * 2002 #line directives by Francesco Potortì.
73 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
77 * If you want to add support for a new language, start by looking at the LUA
78 * language, which is the simplest. Alternatively, consider shipping a
79 * configuration file containing regexp definitions for etags.
82 char pot_etags_version[] = "@(#) pot revision number is 17.38";
84 #define TRUE 1
85 #define FALSE 0
87 #ifdef DEBUG
88 # undef DEBUG
89 # define DEBUG TRUE
90 #else
91 # define DEBUG FALSE
92 # define NDEBUG /* disable assert */
93 #endif
95 #ifdef HAVE_CONFIG_H
96 # include <config.h>
97 /* On some systems, Emacs defines static as nothing for the sake
98 of unexec. We don't want that here since we don't use unexec. */
99 # undef static
100 # ifndef PTR /* for XEmacs */
101 # define PTR void *
102 # endif
103 # ifndef __P /* for XEmacs */
104 # define __P(args) args
105 # endif
106 #else /* no config.h */
107 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
108 # define __P(args) args /* use prototypes */
109 # define PTR void * /* for generic pointers */
110 # else /* not standard C */
111 # define __P(args) () /* no prototypes */
112 # define const /* remove const for old compilers' sake */
113 # define PTR long * /* don't use void* */
114 # endif
115 #endif /* !HAVE_CONFIG_H */
117 #ifndef _GNU_SOURCE
118 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
119 #endif
121 /* WIN32_NATIVE is for XEmacs.
122 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
123 #ifdef WIN32_NATIVE
124 # undef MSDOS
125 # undef WINDOWSNT
126 # define WINDOWSNT
127 #endif /* WIN32_NATIVE */
129 #ifdef MSDOS
130 # undef MSDOS
131 # define MSDOS TRUE
132 # include <fcntl.h>
133 # include <sys/param.h>
134 # include <io.h>
135 # ifndef HAVE_CONFIG_H
136 # define DOS_NT
137 # include <sys/config.h>
138 # endif
139 #else
140 # define MSDOS FALSE
141 #endif /* MSDOS */
143 #ifdef WINDOWSNT
144 # include <stdlib.h>
145 # include <fcntl.h>
146 # include <string.h>
147 # include <direct.h>
148 # include <io.h>
149 # define MAXPATHLEN _MAX_PATH
150 # undef HAVE_NTGUI
151 # undef DOS_NT
152 # define DOS_NT
153 # ifndef HAVE_GETCWD
154 # define HAVE_GETCWD
155 # endif /* undef HAVE_GETCWD */
156 #else /* not WINDOWSNT */
157 # ifdef STDC_HEADERS
158 # include <stdlib.h>
159 # include <string.h>
160 # else /* no standard C headers */
161 extern char *getenv __P((const char *));
162 extern char *strcpy __P((char *, const char *));
163 extern char *strncpy __P((char *, const char *, unsigned long));
164 extern char *strcat __P((char *, const char *));
165 extern char *strncat __P((char *, const char *, unsigned long));
166 extern int strcmp __P((const char *, const char *));
167 extern int strncmp __P((const char *, const char *, unsigned long));
168 extern int system __P((const char *));
169 extern unsigned long strlen __P((const char *));
170 extern void *malloc __P((unsigned long));
171 extern void *realloc __P((void *, unsigned long));
172 extern void exit __P((int));
173 extern void free __P((void *));
174 extern void *memmove __P((void *, const void *, unsigned long));
175 # ifdef VMS
176 # define EXIT_SUCCESS 1
177 # define EXIT_FAILURE 0
178 # else /* no VMS */
179 # define EXIT_SUCCESS 0
180 # define EXIT_FAILURE 1
181 # endif
182 # endif
183 #endif /* !WINDOWSNT */
185 #ifdef HAVE_UNISTD_H
186 # include <unistd.h>
187 #else
188 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
189 extern char *getcwd (char *buf, size_t size);
190 # endif
191 #endif /* HAVE_UNISTD_H */
193 #include <stdio.h>
194 #include <ctype.h>
195 #include <errno.h>
196 #ifndef errno
197 extern int errno;
198 #endif
199 #include <sys/types.h>
200 #include <sys/stat.h>
202 #include <assert.h>
203 #ifdef NDEBUG
204 # undef assert /* some systems have a buggy assert.h */
205 # define assert(x) ((void) 0)
206 #endif
208 #if !defined (S_ISREG) && defined (S_IFREG)
209 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
210 #endif
212 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
213 # define NO_LONG_OPTIONS TRUE
214 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
215 extern char *optarg;
216 extern int optind, opterr;
217 #else
218 # define NO_LONG_OPTIONS FALSE
219 # include <getopt.h>
220 #endif /* NO_LONG_OPTIONS */
222 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
223 # ifdef __CYGWIN__ /* compiling on Cygwin */
224 !!! NOTICE !!!
225 the regex.h distributed with Cygwin is not compatible with etags, alas!
226 If you want regular expression support, you should delete this notice and
227 arrange to use the GNU regex.h and regex.c.
228 # endif
229 #endif
230 #include <regex.h>
232 /* Define CTAGS to make the program "ctags" compatible with the usual one.
233 Leave it undefined to make the program "etags", which makes emacs-style
234 tag tables and tags typedefs, #defines and struct/union/enum by default. */
235 #ifdef CTAGS
236 # undef CTAGS
237 # define CTAGS TRUE
238 #else
239 # define CTAGS FALSE
240 #endif
242 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
243 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
244 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
245 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
247 #define CHARS 256 /* 2^sizeof(char) */
248 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
249 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
250 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
251 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
252 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
253 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
255 #define ISALNUM(c) isalnum (CHAR(c))
256 #define ISALPHA(c) isalpha (CHAR(c))
257 #define ISDIGIT(c) isdigit (CHAR(c))
258 #define ISLOWER(c) islower (CHAR(c))
260 #define lowcase(c) tolower (CHAR(c))
261 #define upcase(c) toupper (CHAR(c))
265 * xnew, xrnew -- allocate, reallocate storage
267 * SYNOPSIS: Type *xnew (int n, Type);
268 * void xrnew (OldPointer, int n, Type);
270 #if DEBUG
271 # include "chkmalloc.h"
272 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
273 (n) * sizeof (Type)))
274 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
275 (char *) (op), (n) * sizeof (Type)))
276 #else
277 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
278 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
279 (char *) (op), (n) * sizeof (Type)))
280 #endif
282 #define bool int
284 typedef void Lang_function __P((FILE *));
286 typedef struct
288 char *suffix; /* file name suffix for this compressor */
289 char *command; /* takes one arg and decompresses to stdout */
290 } compressor;
292 typedef struct
294 char *name; /* language name */
295 char *help; /* detailed help for the language */
296 Lang_function *function; /* parse function */
297 char **suffixes; /* name suffixes of this language's files */
298 char **filenames; /* names of this language's files */
299 char **interpreters; /* interpreters for this language */
300 bool metasource; /* source used to generate other sources */
301 } language;
303 typedef struct fdesc
305 struct fdesc *next; /* for the linked list */
306 char *infname; /* uncompressed input file name */
307 char *infabsname; /* absolute uncompressed input file name */
308 char *infabsdir; /* absolute dir of input file */
309 char *taggedfname; /* file name to write in tagfile */
310 language *lang; /* language of file */
311 char *prop; /* file properties to write in tagfile */
312 bool usecharno; /* etags tags shall contain char number */
313 bool written; /* entry written in the tags file */
314 } fdesc;
316 typedef struct node_st
317 { /* sorting structure */
318 struct node_st *left, *right; /* left and right sons */
319 fdesc *fdp; /* description of file to whom tag belongs */
320 char *name; /* tag name */
321 char *regex; /* search regexp */
322 bool valid; /* write this tag on the tag file */
323 bool is_func; /* function tag: use regexp in CTAGS mode */
324 bool been_warned; /* warning already given for duplicated tag */
325 int lno; /* line number tag is on */
326 long cno; /* character number line starts on */
327 } node;
330 * A `linebuffer' is a structure which holds a line of text.
331 * `readline_internal' reads a line from a stream into a linebuffer
332 * and works regardless of the length of the line.
333 * SIZE is the size of BUFFER, LEN is the length of the string in
334 * BUFFER after readline reads it.
336 typedef struct
338 long size;
339 int len;
340 char *buffer;
341 } linebuffer;
343 /* Used to support mixing of --lang and file names. */
344 typedef struct
346 enum {
347 at_language, /* a language specification */
348 at_regexp, /* a regular expression */
349 at_filename, /* a file name */
350 at_stdin, /* read from stdin here */
351 at_end /* stop parsing the list */
352 } arg_type; /* argument type */
353 language *lang; /* language associated with the argument */
354 char *what; /* the argument itself */
355 } argument;
357 /* Structure defining a regular expression. */
358 typedef struct regexp
360 struct regexp *p_next; /* pointer to next in list */
361 language *lang; /* if set, use only for this language */
362 char *pattern; /* the regexp pattern */
363 char *name; /* tag name */
364 struct re_pattern_buffer *pat; /* the compiled pattern */
365 struct re_registers regs; /* re registers */
366 bool error_signaled; /* already signaled for this regexp */
367 bool force_explicit_name; /* do not allow implict tag name */
368 bool ignore_case; /* ignore case when matching */
369 bool multi_line; /* do a multi-line match on the whole file */
370 } regexp;
373 /* Many compilers barf on this:
374 Lang_function Ada_funcs;
375 so let's write it this way */
376 static void Ada_funcs __P((FILE *));
377 static void Asm_labels __P((FILE *));
378 static void C_entries __P((int c_ext, FILE *));
379 static void default_C_entries __P((FILE *));
380 static void plain_C_entries __P((FILE *));
381 static void Cjava_entries __P((FILE *));
382 static void Cobol_paragraphs __P((FILE *));
383 static void Cplusplus_entries __P((FILE *));
384 static void Cstar_entries __P((FILE *));
385 static void Erlang_functions __P((FILE *));
386 static void Forth_words __P((FILE *));
387 static void Fortran_functions __P((FILE *));
388 static void HTML_labels __P((FILE *));
389 static void Lisp_functions __P((FILE *));
390 static void Lua_functions __P((FILE *));
391 static void Makefile_targets __P((FILE *));
392 static void Pascal_functions __P((FILE *));
393 static void Perl_functions __P((FILE *));
394 static void PHP_functions __P((FILE *));
395 static void PS_functions __P((FILE *));
396 static void Prolog_functions __P((FILE *));
397 static void Python_functions __P((FILE *));
398 static void Scheme_functions __P((FILE *));
399 static void TeX_commands __P((FILE *));
400 static void Texinfo_nodes __P((FILE *));
401 static void Yacc_entries __P((FILE *));
402 static void just_read_file __P((FILE *));
404 static void print_language_names __P((void));
405 static void print_version __P((void));
406 static void print_help __P((argument *));
407 int main __P((int, char **));
409 static compressor *get_compressor_from_suffix __P((char *, char **));
410 static language *get_language_from_langname __P((const char *));
411 static language *get_language_from_interpreter __P((char *));
412 static language *get_language_from_filename __P((char *, bool));
413 static void readline __P((linebuffer *, FILE *));
414 static long readline_internal __P((linebuffer *, FILE *));
415 static bool nocase_tail __P((char *));
416 static void get_tag __P((char *, char **));
418 static void analyse_regex __P((char *));
419 static void free_regexps __P((void));
420 static void regex_tag_multiline __P((void));
421 static void error __P((const char *, const char *));
422 static void suggest_asking_for_help __P((void));
423 void fatal __P((char *, char *));
424 static void pfatal __P((char *));
425 static void add_node __P((node *, node **));
427 static void init __P((void));
428 static void process_file_name __P((char *, language *));
429 static void process_file __P((FILE *, char *, language *));
430 static void find_entries __P((FILE *));
431 static void free_tree __P((node *));
432 static void free_fdesc __P((fdesc *));
433 static void pfnote __P((char *, bool, char *, int, int, long));
434 static void make_tag __P((char *, int, bool, char *, int, int, long));
435 static void invalidate_nodes __P((fdesc *, node **));
436 static void put_entries __P((node *));
438 static char *concat __P((char *, char *, char *));
439 static char *skip_spaces __P((char *));
440 static char *skip_non_spaces __P((char *));
441 static char *savenstr __P((char *, int));
442 static char *savestr __P((char *));
443 static char *etags_strchr __P((const char *, int));
444 static char *etags_strrchr __P((const char *, int));
445 static int etags_strcasecmp __P((const char *, const char *));
446 static int etags_strncasecmp __P((const char *, const char *, int));
447 static char *etags_getcwd __P((void));
448 static char *relative_filename __P((char *, char *));
449 static char *absolute_filename __P((char *, char *));
450 static char *absolute_dirname __P((char *, char *));
451 static bool filename_is_absolute __P((char *f));
452 static void canonicalize_filename __P((char *));
453 static void linebuffer_init __P((linebuffer *));
454 static void linebuffer_setlen __P((linebuffer *, int));
455 static PTR xmalloc __P((unsigned int));
456 static PTR xrealloc __P((char *, unsigned int));
459 static char searchar = '/'; /* use /.../ searches */
461 static char *tagfile; /* output file */
462 static char *progname; /* name this program was invoked with */
463 static char *cwd; /* current working directory */
464 static char *tagfiledir; /* directory of tagfile */
465 static FILE *tagf; /* ioptr for tags file */
467 static fdesc *fdhead; /* head of file description list */
468 static fdesc *curfdp; /* current file description */
469 static int lineno; /* line number of current line */
470 static long charno; /* current character number */
471 static long linecharno; /* charno of start of current line */
472 static char *dbp; /* pointer to start of current tag */
474 static const int invalidcharno = -1;
476 static node *nodehead; /* the head of the binary tree of tags */
477 static node *last_node; /* the last node created */
479 static linebuffer lb; /* the current line */
480 static linebuffer filebuf; /* a buffer containing the whole file */
481 static linebuffer token_name; /* a buffer containing a tag name */
483 /* boolean "functions" (see init) */
484 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
485 static char
486 /* white chars */
487 *white = " \f\t\n\r\v",
488 /* not in a name */
489 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
490 /* token ending chars */
491 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
492 /* token starting chars */
493 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
494 /* valid in-token chars */
495 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
497 static bool append_to_tagfile; /* -a: append to tags */
498 /* The next five default to TRUE in C and derived languages. */
499 static bool typedefs; /* -t: create tags for C and Ada typedefs */
500 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
501 /* 0 struct/enum/union decls, and C++ */
502 /* member functions. */
503 static bool constantypedefs; /* -d: create tags for C #define, enum */
504 /* constants and variables. */
505 /* -D: opposite of -d. Default under ctags. */
506 static bool globals; /* create tags for global variables */
507 static bool members; /* create tags for C member variables */
508 static bool declarations; /* --declarations: tag them and extern in C&Co*/
509 static bool no_line_directive; /* ignore #line directives (undocumented) */
510 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
511 static bool update; /* -u: update tags */
512 static bool vgrind_style; /* -v: create vgrind style index output */
513 static bool no_warnings; /* -w: suppress warnings (undocumented) */
514 static bool cxref_style; /* -x: create cxref style output */
515 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
516 static bool ignoreindent; /* -I: ignore indentation in C */
517 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
519 /* STDIN is defined in LynxOS system headers */
520 #ifdef STDIN
521 # undef STDIN
522 #endif
524 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
525 static bool parsing_stdin; /* --parse-stdin used */
527 static regexp *p_head; /* list of all regexps */
528 static bool need_filebuf; /* some regexes are multi-line */
530 static struct option longopts[] =
532 { "append", no_argument, NULL, 'a' },
533 { "packages-only", no_argument, &packages_only, TRUE },
534 { "c++", no_argument, NULL, 'C' },
535 { "declarations", no_argument, &declarations, TRUE },
536 { "no-line-directive", no_argument, &no_line_directive, TRUE },
537 { "no-duplicates", no_argument, &no_duplicates, TRUE },
538 { "help", no_argument, NULL, 'h' },
539 { "help", no_argument, NULL, 'H' },
540 { "ignore-indentation", no_argument, NULL, 'I' },
541 { "language", required_argument, NULL, 'l' },
542 { "members", no_argument, &members, TRUE },
543 { "no-members", no_argument, &members, FALSE },
544 { "output", required_argument, NULL, 'o' },
545 { "regex", required_argument, NULL, 'r' },
546 { "no-regex", no_argument, NULL, 'R' },
547 { "ignore-case-regex", required_argument, NULL, 'c' },
548 { "parse-stdin", required_argument, NULL, STDIN },
549 { "version", no_argument, NULL, 'V' },
551 #if CTAGS /* Ctags options */
552 { "backward-search", no_argument, NULL, 'B' },
553 { "cxref", no_argument, NULL, 'x' },
554 { "defines", no_argument, NULL, 'd' },
555 { "globals", no_argument, &globals, TRUE },
556 { "typedefs", no_argument, NULL, 't' },
557 { "typedefs-and-c++", no_argument, NULL, 'T' },
558 { "update", no_argument, NULL, 'u' },
559 { "vgrind", no_argument, NULL, 'v' },
560 { "no-warn", no_argument, NULL, 'w' },
562 #else /* Etags options */
563 { "no-defines", no_argument, NULL, 'D' },
564 { "no-globals", no_argument, &globals, FALSE },
565 { "include", required_argument, NULL, 'i' },
566 #endif
567 { NULL }
570 static compressor compressors[] =
572 { "z", "gzip -d -c"},
573 { "Z", "gzip -d -c"},
574 { "gz", "gzip -d -c"},
575 { "GZ", "gzip -d -c"},
576 { "bz2", "bzip2 -d -c" },
577 { NULL }
581 * Language stuff.
584 /* Ada code */
585 static char *Ada_suffixes [] =
586 { "ads", "adb", "ada", NULL };
587 static char Ada_help [] =
588 "In Ada code, functions, procedures, packages, tasks and types are\n\
589 tags. Use the `--packages-only' option to create tags for\n\
590 packages only.\n\
591 Ada tag names have suffixes indicating the type of entity:\n\
592 Entity type: Qualifier:\n\
593 ------------ ----------\n\
594 function /f\n\
595 procedure /p\n\
596 package spec /s\n\
597 package body /b\n\
598 type /t\n\
599 task /k\n\
600 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
601 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
602 will just search for any tag `bidule'.";
604 /* Assembly code */
605 static char *Asm_suffixes [] =
606 { "a", /* Unix assembler */
607 "asm", /* Microcontroller assembly */
608 "def", /* BSO/Tasking definition includes */
609 "inc", /* Microcontroller include files */
610 "ins", /* Microcontroller include files */
611 "s", "sa", /* Unix assembler */
612 "S", /* cpp-processed Unix assembler */
613 "src", /* BSO/Tasking C compiler output */
614 NULL
616 static char Asm_help [] =
617 "In assembler code, labels appearing at the beginning of a line,\n\
618 followed by a colon, are tags.";
621 /* Note that .c and .h can be considered C++, if the --c++ flag was
622 given, or if the `class' or `template' keywords are met inside the file.
623 That is why default_C_entries is called for these. */
624 static char *default_C_suffixes [] =
625 { "c", "h", NULL };
626 #if CTAGS /* C help for Ctags */
627 static char default_C_help [] =
628 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
629 Use -T to tag definitions of `struct', `union' and `enum'.\n\
630 Use -d to tag `#define' macro definitions and `enum' constants.\n\
631 Use --globals to tag global variables.\n\
632 You can tag function declarations and external variables by\n\
633 using `--declarations', and struct members by using `--members'.";
634 #else /* C help for Etags */
635 static char default_C_help [] =
636 "In C code, any C function or typedef is a tag, and so are\n\
637 definitions of `struct', `union' and `enum'. `#define' macro\n\
638 definitions and `enum' constants are tags unless you specify\n\
639 `--no-defines'. Global variables are tags unless you specify\n\
640 `--no-globals' and so are struct members unless you specify\n\
641 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
642 `--no-members' can make the tags table file much smaller.\n\
643 You can tag function declarations and external variables by\n\
644 using `--declarations'.";
645 #endif /* C help for Ctags and Etags */
647 static char *Cplusplus_suffixes [] =
648 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
649 "M", /* Objective C++ */
650 "pdb", /* Postscript with C syntax */
651 NULL };
652 static char Cplusplus_help [] =
653 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
654 --help --lang=c --lang=c++ for full help.)\n\
655 In addition to C tags, member functions are also recognized. Member\n\
656 variables are recognized unless you use the `--no-members' option.\n\
657 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
658 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
659 `operator+'.";
661 static char *Cjava_suffixes [] =
662 { "java", NULL };
663 static char Cjava_help [] =
664 "In Java code, all the tags constructs of C and C++ code are\n\
665 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
668 static char *Cobol_suffixes [] =
669 { "COB", "cob", NULL };
670 static char Cobol_help [] =
671 "In Cobol code, tags are paragraph names; that is, any word\n\
672 starting in column 8 and followed by a period.";
674 static char *Cstar_suffixes [] =
675 { "cs", "hs", NULL };
677 static char *Erlang_suffixes [] =
678 { "erl", "hrl", NULL };
679 static char Erlang_help [] =
680 "In Erlang code, the tags are the functions, records and macros\n\
681 defined in the file.";
683 char *Forth_suffixes [] =
684 { "fth", "tok", NULL };
685 static char Forth_help [] =
686 "In Forth code, tags are words defined by `:',\n\
687 constant, code, create, defer, value, variable, buffer:, field.";
689 static char *Fortran_suffixes [] =
690 { "F", "f", "f90", "for", NULL };
691 static char Fortran_help [] =
692 "In Fortran code, functions, subroutines and block data are tags.";
694 static char *HTML_suffixes [] =
695 { "htm", "html", "shtml", NULL };
696 static char HTML_help [] =
697 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
698 `h3' headers. Also, tags are `name=' in anchors and all\n\
699 occurrences of `id='.";
701 static char *Lisp_suffixes [] =
702 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
703 static char Lisp_help [] =
704 "In Lisp code, any function defined with `defun', any variable\n\
705 defined with `defvar' or `defconst', and in general the first\n\
706 argument of any expression that starts with `(def' in column zero\n\
707 is a tag.";
709 static char *Lua_suffixes [] =
710 { "lua", "LUA", NULL };
711 static char Lua_help [] =
712 "In Lua scripts, all functions are tags.";
714 static char *Makefile_filenames [] =
715 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
716 static char Makefile_help [] =
717 "In makefiles, targets are tags; additionally, variables are tags\n\
718 unless you specify `--no-globals'.";
720 static char *Objc_suffixes [] =
721 { "lm", /* Objective lex file */
722 "m", /* Objective C file */
723 NULL };
724 static char Objc_help [] =
725 "In Objective C code, tags include Objective C definitions for classes,\n\
726 class categories, methods and protocols. Tags for variables and\n\
727 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
728 (Use --help --lang=c --lang=objc --lang=java for full help.)";
730 static char *Pascal_suffixes [] =
731 { "p", "pas", NULL };
732 static char Pascal_help [] =
733 "In Pascal code, the tags are the functions and procedures defined\n\
734 in the file.";
735 /* " // this is for working around an Emacs highlighting bug... */
737 static char *Perl_suffixes [] =
738 { "pl", "pm", NULL };
739 static char *Perl_interpreters [] =
740 { "perl", "@PERL@", NULL };
741 static char Perl_help [] =
742 "In Perl code, the tags are the packages, subroutines and variables\n\
743 defined by the `package', `sub', `my' and `local' keywords. Use\n\
744 `--globals' if you want to tag global variables. Tags for\n\
745 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
746 defined in the default package is `main::SUB'.";
748 static char *PHP_suffixes [] =
749 { "php", "php3", "php4", NULL };
750 static char PHP_help [] =
751 "In PHP code, tags are functions, classes and defines. Unless you use\n\
752 the `--no-members' option, vars are tags too.";
754 static char *plain_C_suffixes [] =
755 { "pc", /* Pro*C file */
756 NULL };
758 static char *PS_suffixes [] =
759 { "ps", "psw", NULL }; /* .psw is for PSWrap */
760 static char PS_help [] =
761 "In PostScript code, the tags are the functions.";
763 static char *Prolog_suffixes [] =
764 { "prolog", NULL };
765 static char Prolog_help [] =
766 "In Prolog code, tags are predicates and rules at the beginning of\n\
767 line.";
769 static char *Python_suffixes [] =
770 { "py", NULL };
771 static char Python_help [] =
772 "In Python code, `def' or `class' at the beginning of a line\n\
773 generate a tag.";
775 /* Can't do the `SCM' or `scm' prefix with a version number. */
776 static char *Scheme_suffixes [] =
777 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
778 static char Scheme_help [] =
779 "In Scheme code, tags include anything defined with `def' or with a\n\
780 construct whose name starts with `def'. They also include\n\
781 variables set with `set!' at top level in the file.";
783 static char *TeX_suffixes [] =
784 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
785 static char TeX_help [] =
786 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
787 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
788 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
789 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
790 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
792 Other commands can be specified by setting the environment variable\n\
793 `TEXTAGS' to a colon-separated list like, for example,\n\
794 TEXTAGS=\"mycommand:myothercommand\".";
797 static char *Texinfo_suffixes [] =
798 { "texi", "texinfo", "txi", NULL };
799 static char Texinfo_help [] =
800 "for texinfo files, lines starting with @node are tagged.";
802 static char *Yacc_suffixes [] =
803 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
804 static char Yacc_help [] =
805 "In Bison or Yacc input files, each rule defines as a tag the\n\
806 nonterminal it constructs. The portions of the file that contain\n\
807 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
808 for full help).";
810 static char auto_help [] =
811 "`auto' is not a real language, it indicates to use\n\
812 a default language for files base on file name suffix and file contents.";
814 static char none_help [] =
815 "`none' is not a real language, it indicates to only do\n\
816 regexp processing on files.";
818 static char no_lang_help [] =
819 "No detailed help available for this language.";
823 * Table of languages.
825 * It is ok for a given function to be listed under more than one
826 * name. I just didn't.
829 static language lang_names [] =
831 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
832 { "asm", Asm_help, Asm_labels, Asm_suffixes },
833 { "c", default_C_help, default_C_entries, default_C_suffixes },
834 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
835 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
836 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
837 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
838 { "forth", Forth_help, Forth_words, Forth_suffixes },
839 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
840 { "html", HTML_help, HTML_labels, HTML_suffixes },
841 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
842 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
843 { "lua", Lua_help, Lua_functions, Lua_suffixes },
844 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
845 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
846 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
847 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
848 { "php", PHP_help, PHP_functions, PHP_suffixes },
849 { "postscript",PS_help, PS_functions, PS_suffixes },
850 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
851 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
852 { "python", Python_help, Python_functions, Python_suffixes },
853 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
854 { "tex", TeX_help, TeX_commands, TeX_suffixes },
855 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
856 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
857 { "auto", auto_help }, /* default guessing scheme */
858 { "none", none_help, just_read_file }, /* regexp matching only */
859 { NULL } /* end of list */
863 static void
864 print_language_names ()
866 language *lang;
867 char **name, **ext;
869 puts ("\nThese are the currently supported languages, along with the\n\
870 default file names and dot suffixes:");
871 for (lang = lang_names; lang->name != NULL; lang++)
873 printf (" %-*s", 10, lang->name);
874 if (lang->filenames != NULL)
875 for (name = lang->filenames; *name != NULL; name++)
876 printf (" %s", *name);
877 if (lang->suffixes != NULL)
878 for (ext = lang->suffixes; *ext != NULL; ext++)
879 printf (" .%s", *ext);
880 puts ("");
882 puts ("where `auto' means use default language for files based on file\n\
883 name suffix, and `none' means only do regexp processing on files.\n\
884 If no language is specified and no matching suffix is found,\n\
885 the first line of the file is read for a sharp-bang (#!) sequence\n\
886 followed by the name of an interpreter. If no such sequence is found,\n\
887 Fortran is tried first; if no tags are found, C is tried next.\n\
888 When parsing any C file, a \"class\" or \"template\" keyword\n\
889 switches to C++.");
890 puts ("Compressed files are supported using gzip and bzip2.\n\
892 For detailed help on a given language use, for example,\n\
893 etags --help --lang=ada.");
896 #ifndef EMACS_NAME
897 # define EMACS_NAME "standalone"
898 #endif
899 #ifndef VERSION
900 # define VERSION "17.38"
901 #endif
902 static void
903 print_version ()
905 /* Makes it easier to update automatically. */
906 char emacs_copyright[] = "Copyright (C) 2008 Free Software Foundation, Inc.";
908 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
909 puts (emacs_copyright);
910 puts ("This program is distributed under the terms in ETAGS.README");
912 exit (EXIT_SUCCESS);
915 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
916 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
917 #endif
919 static void
920 print_help (argbuffer)
921 argument *argbuffer;
923 bool help_for_lang = FALSE;
925 for (; argbuffer->arg_type != at_end; argbuffer++)
926 if (argbuffer->arg_type == at_language)
928 if (help_for_lang)
929 puts ("");
930 puts (argbuffer->lang->help);
931 help_for_lang = TRUE;
934 if (help_for_lang)
935 exit (EXIT_SUCCESS);
937 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
939 These are the options accepted by %s.\n", progname, progname);
940 if (NO_LONG_OPTIONS)
941 puts ("WARNING: long option names do not work with this executable,\n\
942 as it is not linked with GNU getopt.");
943 else
944 puts ("You may use unambiguous abbreviations for the long option names.");
945 puts (" A - as file name means read names from stdin (one per line).\n\
946 Absolute names are stored in the output file as they are.\n\
947 Relative ones are stored relative to the output file's directory.\n");
949 puts ("-a, --append\n\
950 Append tag entries to existing tags file.");
952 puts ("--packages-only\n\
953 For Ada files, only generate tags for packages.");
955 if (CTAGS)
956 puts ("-B, --backward-search\n\
957 Write the search commands for the tag entries using '?', the\n\
958 backward-search command instead of '/', the forward-search command.");
960 /* This option is mostly obsolete, because etags can now automatically
961 detect C++. Retained for backward compatibility and for debugging and
962 experimentation. In principle, we could want to tag as C++ even
963 before any "class" or "template" keyword.
964 puts ("-C, --c++\n\
965 Treat files whose name suffix defaults to C language as C++ files.");
968 puts ("--declarations\n\
969 In C and derived languages, create tags for function declarations,");
970 if (CTAGS)
971 puts ("\tand create tags for extern variables if --globals is used.");
972 else
973 puts
974 ("\tand create tags for extern variables unless --no-globals is used.");
976 if (CTAGS)
977 puts ("-d, --defines\n\
978 Create tag entries for C #define constants and enum constants, too.");
979 else
980 puts ("-D, --no-defines\n\
981 Don't create tag entries for C #define constants and enum constants.\n\
982 This makes the tags file smaller.");
984 if (!CTAGS)
985 puts ("-i FILE, --include=FILE\n\
986 Include a note in tag file indicating that, when searching for\n\
987 a tag, one should also consult the tags file FILE after\n\
988 checking the current file.");
990 puts ("-l LANG, --language=LANG\n\
991 Force the following files to be considered as written in the\n\
992 named language up to the next --language=LANG option.");
994 if (CTAGS)
995 puts ("--globals\n\
996 Create tag entries for global variables in some languages.");
997 else
998 puts ("--no-globals\n\
999 Do not create tag entries for global variables in some\n\
1000 languages. This makes the tags file smaller.");
1002 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1003 puts ("--no-line-directive\n\
1004 Ignore #line preprocessor directives in C and derived languages.");
1006 if (CTAGS)
1007 puts ("--members\n\
1008 Create tag entries for members of structures in some languages.");
1009 else
1010 puts ("--no-members\n\
1011 Do not create tag entries for members of structures\n\
1012 in some languages.");
1014 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1015 Make a tag for each line matching a regular expression pattern\n\
1016 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1017 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1018 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1019 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1020 puts (" If TAGNAME/ is present, the tags created are named.\n\
1021 For example Tcl named tags can be created with:\n\
1022 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1023 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1024 `m' means to allow multi-line matches, `s' implies `m' and\n\
1025 causes dot to match any character, including newline.");
1027 puts ("-R, --no-regex\n\
1028 Don't create tags from regexps for the following files.");
1030 puts ("-I, --ignore-indentation\n\
1031 In C and C++ do not assume that a closing brace in the first\n\
1032 column is the final brace of a function or structure definition.");
1034 puts ("-o FILE, --output=FILE\n\
1035 Write the tags to FILE.");
1037 puts ("--parse-stdin=NAME\n\
1038 Read from standard input and record tags as belonging to file NAME.");
1040 if (CTAGS)
1042 puts ("-t, --typedefs\n\
1043 Generate tag entries for C and Ada typedefs.");
1044 puts ("-T, --typedefs-and-c++\n\
1045 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1046 and C++ member functions.");
1049 if (CTAGS)
1050 puts ("-u, --update\n\
1051 Update the tag entries for the given files, leaving tag\n\
1052 entries for other files in place. Currently, this is\n\
1053 implemented by deleting the existing entries for the given\n\
1054 files and then rewriting the new entries at the end of the\n\
1055 tags file. It is often faster to simply rebuild the entire\n\
1056 tag file than to use this.");
1058 if (CTAGS)
1060 puts ("-v, --vgrind\n\
1061 Print on the standard output an index of items intended for\n\
1062 human consumption, similar to the output of vgrind. The index\n\
1063 is sorted, and gives the page number of each item.");
1065 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1066 puts ("-w, --no-duplicates\n\
1067 Do not create duplicate tag entries, for compatibility with\n\
1068 traditional ctags.");
1070 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1071 puts ("-w, --no-warn\n\
1072 Suppress warning messages about duplicate tag entries.");
1074 puts ("-x, --cxref\n\
1075 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1076 The output uses line numbers instead of page numbers, but\n\
1077 beyond that the differences are cosmetic; try both to see\n\
1078 which you like.");
1081 puts ("-V, --version\n\
1082 Print the version of the program.\n\
1083 -h, --help\n\
1084 Print this help message.\n\
1085 Followed by one or more `--language' options prints detailed\n\
1086 help about tag generation for the specified languages.");
1088 print_language_names ();
1090 puts ("");
1091 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1093 exit (EXIT_SUCCESS);
1097 #ifdef VMS /* VMS specific functions */
1099 #define EOS '\0'
1101 /* This is a BUG! ANY arbitrary limit is a BUG!
1102 Won't someone please fix this? */
1103 #define MAX_FILE_SPEC_LEN 255
1104 typedef struct {
1105 short curlen;
1106 char body[MAX_FILE_SPEC_LEN + 1];
1107 } vspec;
1110 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1111 returning in each successive call the next file name matching the input
1112 spec. The function expects that each in_spec passed
1113 to it will be processed to completion; in particular, up to and
1114 including the call following that in which the last matching name
1115 is returned, the function ignores the value of in_spec, and will
1116 only start processing a new spec with the following call.
1117 If an error occurs, on return out_spec contains the value
1118 of in_spec when the error occurred.
1120 With each successive file name returned in out_spec, the
1121 function's return value is one. When there are no more matching
1122 names the function returns zero. If on the first call no file
1123 matches in_spec, or there is any other error, -1 is returned.
1126 #include <rmsdef.h>
1127 #include <descrip.h>
1128 #define OUTSIZE MAX_FILE_SPEC_LEN
1129 static short
1130 fn_exp (out, in)
1131 vspec *out;
1132 char *in;
1134 static long context = 0;
1135 static struct dsc$descriptor_s o;
1136 static struct dsc$descriptor_s i;
1137 static bool pass1 = TRUE;
1138 long status;
1139 short retval;
1141 if (pass1)
1143 pass1 = FALSE;
1144 o.dsc$a_pointer = (char *) out;
1145 o.dsc$w_length = (short)OUTSIZE;
1146 i.dsc$a_pointer = in;
1147 i.dsc$w_length = (short)strlen(in);
1148 i.dsc$b_dtype = DSC$K_DTYPE_T;
1149 i.dsc$b_class = DSC$K_CLASS_S;
1150 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1151 o.dsc$b_class = DSC$K_CLASS_VS;
1153 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1155 out->body[out->curlen] = EOS;
1156 return 1;
1158 else if (status == RMS$_NMF)
1159 retval = 0;
1160 else
1162 strcpy(out->body, in);
1163 retval = -1;
1165 lib$find_file_end(&context);
1166 pass1 = TRUE;
1167 return retval;
1171 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1172 name of each file specified by the provided arg expanding wildcards.
1174 static char *
1175 gfnames (arg, p_error)
1176 char *arg;
1177 bool *p_error;
1179 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1181 switch (fn_exp (&filename, arg))
1183 case 1:
1184 *p_error = FALSE;
1185 return filename.body;
1186 case 0:
1187 *p_error = FALSE;
1188 return NULL;
1189 default:
1190 *p_error = TRUE;
1191 return filename.body;
1195 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1196 system (cmd)
1197 char *cmd;
1199 error ("%s", "system() function not implemented under VMS");
1201 #endif
1203 #define VERSION_DELIM ';'
1204 char *massage_name (s)
1205 char *s;
1207 char *start = s;
1209 for ( ; *s; s++)
1210 if (*s == VERSION_DELIM)
1212 *s = EOS;
1213 break;
1215 else
1216 *s = lowcase (*s);
1217 return start;
1219 #endif /* VMS */
1223 main (argc, argv)
1224 int argc;
1225 char *argv[];
1227 int i;
1228 unsigned int nincluded_files;
1229 char **included_files;
1230 argument *argbuffer;
1231 int current_arg, file_count;
1232 linebuffer filename_lb;
1233 bool help_asked = FALSE;
1234 #ifdef VMS
1235 bool got_err;
1236 #endif
1237 char *optstring;
1238 int opt;
1241 #ifdef DOS_NT
1242 _fmode = O_BINARY; /* all of files are treated as binary files */
1243 #endif /* DOS_NT */
1245 progname = argv[0];
1246 nincluded_files = 0;
1247 included_files = xnew (argc, char *);
1248 current_arg = 0;
1249 file_count = 0;
1251 /* Allocate enough no matter what happens. Overkill, but each one
1252 is small. */
1253 argbuffer = xnew (argc, argument);
1256 * Always find typedefs and structure tags.
1257 * Also default to find macro constants, enum constants, struct
1258 * members and global variables. Do it for both etags and ctags.
1260 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1261 globals = members = TRUE;
1263 /* When the optstring begins with a '-' getopt_long does not rearrange the
1264 non-options arguments to be at the end, but leaves them alone. */
1265 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1266 "ac:Cf:Il:o:r:RSVhH",
1267 (CTAGS) ? "BxdtTuvw" : "Di:");
1269 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1270 switch (opt)
1272 case 0:
1273 /* If getopt returns 0, then it has already processed a
1274 long-named option. We should do nothing. */
1275 break;
1277 case 1:
1278 /* This means that a file name has been seen. Record it. */
1279 argbuffer[current_arg].arg_type = at_filename;
1280 argbuffer[current_arg].what = optarg;
1281 ++current_arg;
1282 ++file_count;
1283 break;
1285 case STDIN:
1286 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1287 argbuffer[current_arg].arg_type = at_stdin;
1288 argbuffer[current_arg].what = optarg;
1289 ++current_arg;
1290 ++file_count;
1291 if (parsing_stdin)
1292 fatal ("cannot parse standard input more than once", (char *)NULL);
1293 parsing_stdin = TRUE;
1294 break;
1296 /* Common options. */
1297 case 'a': append_to_tagfile = TRUE; break;
1298 case 'C': cplusplus = TRUE; break;
1299 case 'f': /* for compatibility with old makefiles */
1300 case 'o':
1301 if (tagfile)
1303 error ("-o option may only be given once.", (char *)NULL);
1304 suggest_asking_for_help ();
1305 /* NOTREACHED */
1307 tagfile = optarg;
1308 break;
1309 case 'I':
1310 case 'S': /* for backward compatibility */
1311 ignoreindent = TRUE;
1312 break;
1313 case 'l':
1315 language *lang = get_language_from_langname (optarg);
1316 if (lang != NULL)
1318 argbuffer[current_arg].lang = lang;
1319 argbuffer[current_arg].arg_type = at_language;
1320 ++current_arg;
1323 break;
1324 case 'c':
1325 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1326 optarg = concat (optarg, "i", ""); /* memory leak here */
1327 /* FALLTHRU */
1328 case 'r':
1329 argbuffer[current_arg].arg_type = at_regexp;
1330 argbuffer[current_arg].what = optarg;
1331 ++current_arg;
1332 break;
1333 case 'R':
1334 argbuffer[current_arg].arg_type = at_regexp;
1335 argbuffer[current_arg].what = NULL;
1336 ++current_arg;
1337 break;
1338 case 'V':
1339 print_version ();
1340 break;
1341 case 'h':
1342 case 'H':
1343 help_asked = TRUE;
1344 break;
1346 /* Etags options */
1347 case 'D': constantypedefs = FALSE; break;
1348 case 'i': included_files[nincluded_files++] = optarg; break;
1350 /* Ctags options. */
1351 case 'B': searchar = '?'; break;
1352 case 'd': constantypedefs = TRUE; break;
1353 case 't': typedefs = TRUE; break;
1354 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1355 case 'u': update = TRUE; break;
1356 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1357 case 'x': cxref_style = TRUE; break;
1358 case 'w': no_warnings = TRUE; break;
1359 default:
1360 suggest_asking_for_help ();
1361 /* NOTREACHED */
1364 /* No more options. Store the rest of arguments. */
1365 for (; optind < argc; optind++)
1367 argbuffer[current_arg].arg_type = at_filename;
1368 argbuffer[current_arg].what = argv[optind];
1369 ++current_arg;
1370 ++file_count;
1373 argbuffer[current_arg].arg_type = at_end;
1375 if (help_asked)
1376 print_help (argbuffer);
1377 /* NOTREACHED */
1379 if (nincluded_files == 0 && file_count == 0)
1381 error ("no input files specified.", (char *)NULL);
1382 suggest_asking_for_help ();
1383 /* NOTREACHED */
1386 if (tagfile == NULL)
1387 tagfile = CTAGS ? "tags" : "TAGS";
1388 cwd = etags_getcwd (); /* the current working directory */
1389 if (cwd[strlen (cwd) - 1] != '/')
1391 char *oldcwd = cwd;
1392 cwd = concat (oldcwd, "/", "");
1393 free (oldcwd);
1395 /* Relative file names are made relative to the current directory. */
1396 if (streq (tagfile, "-")
1397 || strneq (tagfile, "/dev/", 5))
1398 tagfiledir = cwd;
1399 else
1400 tagfiledir = absolute_dirname (tagfile, cwd);
1402 init (); /* set up boolean "functions" */
1404 linebuffer_init (&lb);
1405 linebuffer_init (&filename_lb);
1406 linebuffer_init (&filebuf);
1407 linebuffer_init (&token_name);
1409 if (!CTAGS)
1411 if (streq (tagfile, "-"))
1413 tagf = stdout;
1414 #ifdef DOS_NT
1415 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1416 doesn't take effect until after `stdout' is already open). */
1417 if (!isatty (fileno (stdout)))
1418 setmode (fileno (stdout), O_BINARY);
1419 #endif /* DOS_NT */
1421 else
1422 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1423 if (tagf == NULL)
1424 pfatal (tagfile);
1428 * Loop through files finding functions.
1430 for (i = 0; i < current_arg; i++)
1432 static language *lang; /* non-NULL if language is forced */
1433 char *this_file;
1435 switch (argbuffer[i].arg_type)
1437 case at_language:
1438 lang = argbuffer[i].lang;
1439 break;
1440 case at_regexp:
1441 analyse_regex (argbuffer[i].what);
1442 break;
1443 case at_filename:
1444 #ifdef VMS
1445 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1447 if (got_err)
1449 error ("can't find file %s\n", this_file);
1450 argc--, argv++;
1452 else
1454 this_file = massage_name (this_file);
1456 #else
1457 this_file = argbuffer[i].what;
1458 #endif
1459 /* Input file named "-" means read file names from stdin
1460 (one per line) and use them. */
1461 if (streq (this_file, "-"))
1463 if (parsing_stdin)
1464 fatal ("cannot parse standard input AND read file names from it",
1465 (char *)NULL);
1466 while (readline_internal (&filename_lb, stdin) > 0)
1467 process_file_name (filename_lb.buffer, lang);
1469 else
1470 process_file_name (this_file, lang);
1471 #ifdef VMS
1473 #endif
1474 break;
1475 case at_stdin:
1476 this_file = argbuffer[i].what;
1477 process_file (stdin, this_file, lang);
1478 break;
1482 free_regexps ();
1483 free (lb.buffer);
1484 free (filebuf.buffer);
1485 free (token_name.buffer);
1487 if (!CTAGS || cxref_style)
1489 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1490 put_entries (nodehead);
1491 free_tree (nodehead);
1492 nodehead = NULL;
1493 if (!CTAGS)
1495 fdesc *fdp;
1497 /* Output file entries that have no tags. */
1498 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1499 if (!fdp->written)
1500 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1502 while (nincluded_files-- > 0)
1503 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1505 if (fclose (tagf) == EOF)
1506 pfatal (tagfile);
1509 exit (EXIT_SUCCESS);
1512 /* From here on, we are in (CTAGS && !cxref_style) */
1513 if (update)
1515 char cmd[BUFSIZ];
1516 for (i = 0; i < current_arg; ++i)
1518 switch (argbuffer[i].arg_type)
1520 case at_filename:
1521 case at_stdin:
1522 break;
1523 default:
1524 continue; /* the for loop */
1526 sprintf (cmd,
1527 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1528 tagfile, argbuffer[i].what, tagfile);
1529 if (system (cmd) != EXIT_SUCCESS)
1530 fatal ("failed to execute shell command", (char *)NULL);
1532 append_to_tagfile = TRUE;
1535 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1536 if (tagf == NULL)
1537 pfatal (tagfile);
1538 put_entries (nodehead); /* write all the tags (CTAGS) */
1539 free_tree (nodehead);
1540 nodehead = NULL;
1541 if (fclose (tagf) == EOF)
1542 pfatal (tagfile);
1544 if (CTAGS)
1545 if (append_to_tagfile || update)
1547 char cmd[2*BUFSIZ+20];
1548 /* Maybe these should be used:
1549 setenv ("LC_COLLATE", "C", 1);
1550 setenv ("LC_ALL", "C", 1); */
1551 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1552 exit (system (cmd));
1554 return EXIT_SUCCESS;
1559 * Return a compressor given the file name. If EXTPTR is non-zero,
1560 * return a pointer into FILE where the compressor-specific
1561 * extension begins. If no compressor is found, NULL is returned
1562 * and EXTPTR is not significant.
1563 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1565 static compressor *
1566 get_compressor_from_suffix (file, extptr)
1567 char *file;
1568 char **extptr;
1570 compressor *compr;
1571 char *slash, *suffix;
1573 /* This relies on FN to be after canonicalize_filename,
1574 so we don't need to consider backslashes on DOS_NT. */
1575 slash = etags_strrchr (file, '/');
1576 suffix = etags_strrchr (file, '.');
1577 if (suffix == NULL || suffix < slash)
1578 return NULL;
1579 if (extptr != NULL)
1580 *extptr = suffix;
1581 suffix += 1;
1582 /* Let those poor souls who live with DOS 8+3 file name limits get
1583 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1584 Only the first do loop is run if not MSDOS */
1587 for (compr = compressors; compr->suffix != NULL; compr++)
1588 if (streq (compr->suffix, suffix))
1589 return compr;
1590 if (!MSDOS)
1591 break; /* do it only once: not really a loop */
1592 if (extptr != NULL)
1593 *extptr = ++suffix;
1594 } while (*suffix != '\0');
1595 return NULL;
1601 * Return a language given the name.
1603 static language *
1604 get_language_from_langname (name)
1605 const char *name;
1607 language *lang;
1609 if (name == NULL)
1610 error ("empty language name", (char *)NULL);
1611 else
1613 for (lang = lang_names; lang->name != NULL; lang++)
1614 if (streq (name, lang->name))
1615 return lang;
1616 error ("unknown language \"%s\"", name);
1619 return NULL;
1624 * Return a language given the interpreter name.
1626 static language *
1627 get_language_from_interpreter (interpreter)
1628 char *interpreter;
1630 language *lang;
1631 char **iname;
1633 if (interpreter == NULL)
1634 return NULL;
1635 for (lang = lang_names; lang->name != NULL; lang++)
1636 if (lang->interpreters != NULL)
1637 for (iname = lang->interpreters; *iname != NULL; iname++)
1638 if (streq (*iname, interpreter))
1639 return lang;
1641 return NULL;
1647 * Return a language given the file name.
1649 static language *
1650 get_language_from_filename (file, case_sensitive)
1651 char *file;
1652 bool case_sensitive;
1654 language *lang;
1655 char **name, **ext, *suffix;
1657 /* Try whole file name first. */
1658 for (lang = lang_names; lang->name != NULL; lang++)
1659 if (lang->filenames != NULL)
1660 for (name = lang->filenames; *name != NULL; name++)
1661 if ((case_sensitive)
1662 ? streq (*name, file)
1663 : strcaseeq (*name, file))
1664 return lang;
1666 /* If not found, try suffix after last dot. */
1667 suffix = etags_strrchr (file, '.');
1668 if (suffix == NULL)
1669 return NULL;
1670 suffix += 1;
1671 for (lang = lang_names; lang->name != NULL; lang++)
1672 if (lang->suffixes != NULL)
1673 for (ext = lang->suffixes; *ext != NULL; ext++)
1674 if ((case_sensitive)
1675 ? streq (*ext, suffix)
1676 : strcaseeq (*ext, suffix))
1677 return lang;
1678 return NULL;
1683 * This routine is called on each file argument.
1685 static void
1686 process_file_name (file, lang)
1687 char *file;
1688 language *lang;
1690 struct stat stat_buf;
1691 FILE *inf;
1692 fdesc *fdp;
1693 compressor *compr;
1694 char *compressed_name, *uncompressed_name;
1695 char *ext, *real_name;
1696 int retval;
1698 canonicalize_filename (file);
1699 if (streq (file, tagfile) && !streq (tagfile, "-"))
1701 error ("skipping inclusion of %s in self.", file);
1702 return;
1704 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1706 compressed_name = NULL;
1707 real_name = uncompressed_name = savestr (file);
1709 else
1711 real_name = compressed_name = savestr (file);
1712 uncompressed_name = savenstr (file, ext - file);
1715 /* If the canonicalized uncompressed name
1716 has already been dealt with, skip it silently. */
1717 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1719 assert (fdp->infname != NULL);
1720 if (streq (uncompressed_name, fdp->infname))
1721 goto cleanup;
1724 if (stat (real_name, &stat_buf) != 0)
1726 /* Reset real_name and try with a different name. */
1727 real_name = NULL;
1728 if (compressed_name != NULL) /* try with the given suffix */
1730 if (stat (uncompressed_name, &stat_buf) == 0)
1731 real_name = uncompressed_name;
1733 else /* try all possible suffixes */
1735 for (compr = compressors; compr->suffix != NULL; compr++)
1737 compressed_name = concat (file, ".", compr->suffix);
1738 if (stat (compressed_name, &stat_buf) != 0)
1740 if (MSDOS)
1742 char *suf = compressed_name + strlen (file);
1743 size_t suflen = strlen (compr->suffix) + 1;
1744 for ( ; suf[1]; suf++, suflen--)
1746 memmove (suf, suf + 1, suflen);
1747 if (stat (compressed_name, &stat_buf) == 0)
1749 real_name = compressed_name;
1750 break;
1753 if (real_name != NULL)
1754 break;
1755 } /* MSDOS */
1756 free (compressed_name);
1757 compressed_name = NULL;
1759 else
1761 real_name = compressed_name;
1762 break;
1766 if (real_name == NULL)
1768 perror (file);
1769 goto cleanup;
1771 } /* try with a different name */
1773 if (!S_ISREG (stat_buf.st_mode))
1775 error ("skipping %s: it is not a regular file.", real_name);
1776 goto cleanup;
1778 if (real_name == compressed_name)
1780 char *cmd = concat (compr->command, " ", real_name);
1781 inf = (FILE *) popen (cmd, "r");
1782 free (cmd);
1784 else
1785 inf = fopen (real_name, "r");
1786 if (inf == NULL)
1788 perror (real_name);
1789 goto cleanup;
1792 process_file (inf, uncompressed_name, lang);
1794 if (real_name == compressed_name)
1795 retval = pclose (inf);
1796 else
1797 retval = fclose (inf);
1798 if (retval < 0)
1799 pfatal (file);
1801 cleanup:
1802 if (compressed_name) free (compressed_name);
1803 if (uncompressed_name) free (uncompressed_name);
1804 last_node = NULL;
1805 curfdp = NULL;
1806 return;
1809 static void
1810 process_file (fh, fn, lang)
1811 FILE *fh;
1812 char *fn;
1813 language *lang;
1815 static const fdesc emptyfdesc;
1816 fdesc *fdp;
1818 /* Create a new input file description entry. */
1819 fdp = xnew (1, fdesc);
1820 *fdp = emptyfdesc;
1821 fdp->next = fdhead;
1822 fdp->infname = savestr (fn);
1823 fdp->lang = lang;
1824 fdp->infabsname = absolute_filename (fn, cwd);
1825 fdp->infabsdir = absolute_dirname (fn, cwd);
1826 if (filename_is_absolute (fn))
1828 /* An absolute file name. Canonicalize it. */
1829 fdp->taggedfname = absolute_filename (fn, NULL);
1831 else
1833 /* A file name relative to cwd. Make it relative
1834 to the directory of the tags file. */
1835 fdp->taggedfname = relative_filename (fn, tagfiledir);
1837 fdp->usecharno = TRUE; /* use char position when making tags */
1838 fdp->prop = NULL;
1839 fdp->written = FALSE; /* not written on tags file yet */
1841 fdhead = fdp;
1842 curfdp = fdhead; /* the current file description */
1844 find_entries (fh);
1846 /* If not Ctags, and if this is not metasource and if it contained no #line
1847 directives, we can write the tags and free all nodes pointing to
1848 curfdp. */
1849 if (!CTAGS
1850 && curfdp->usecharno /* no #line directives in this file */
1851 && !curfdp->lang->metasource)
1853 node *np, *prev;
1855 /* Look for the head of the sublist relative to this file. See add_node
1856 for the structure of the node tree. */
1857 prev = NULL;
1858 for (np = nodehead; np != NULL; prev = np, np = np->left)
1859 if (np->fdp == curfdp)
1860 break;
1862 /* If we generated tags for this file, write and delete them. */
1863 if (np != NULL)
1865 /* This is the head of the last sublist, if any. The following
1866 instructions depend on this being true. */
1867 assert (np->left == NULL);
1869 assert (fdhead == curfdp);
1870 assert (last_node->fdp == curfdp);
1871 put_entries (np); /* write tags for file curfdp->taggedfname */
1872 free_tree (np); /* remove the written nodes */
1873 if (prev == NULL)
1874 nodehead = NULL; /* no nodes left */
1875 else
1876 prev->left = NULL; /* delete the pointer to the sublist */
1882 * This routine sets up the boolean pseudo-functions which work
1883 * by setting boolean flags dependent upon the corresponding character.
1884 * Every char which is NOT in that string is not a white char. Therefore,
1885 * all of the array "_wht" is set to FALSE, and then the elements
1886 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1887 * of a char is TRUE if it is the string "white", else FALSE.
1889 static void
1890 init ()
1892 register char *sp;
1893 register int i;
1895 for (i = 0; i < CHARS; i++)
1896 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1897 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1898 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1899 notinname('\0') = notinname('\n');
1900 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1901 begtoken('\0') = begtoken('\n');
1902 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1903 intoken('\0') = intoken('\n');
1904 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1905 endtoken('\0') = endtoken('\n');
1909 * This routine opens the specified file and calls the function
1910 * which finds the function and type definitions.
1912 static void
1913 find_entries (inf)
1914 FILE *inf;
1916 char *cp;
1917 language *lang = curfdp->lang;
1918 Lang_function *parser = NULL;
1920 /* If user specified a language, use it. */
1921 if (lang != NULL && lang->function != NULL)
1923 parser = lang->function;
1926 /* Else try to guess the language given the file name. */
1927 if (parser == NULL)
1929 lang = get_language_from_filename (curfdp->infname, TRUE);
1930 if (lang != NULL && lang->function != NULL)
1932 curfdp->lang = lang;
1933 parser = lang->function;
1937 /* Else look for sharp-bang as the first two characters. */
1938 if (parser == NULL
1939 && readline_internal (&lb, inf) > 0
1940 && lb.len >= 2
1941 && lb.buffer[0] == '#'
1942 && lb.buffer[1] == '!')
1944 char *lp;
1946 /* Set lp to point at the first char after the last slash in the
1947 line or, if no slashes, at the first nonblank. Then set cp to
1948 the first successive blank and terminate the string. */
1949 lp = etags_strrchr (lb.buffer+2, '/');
1950 if (lp != NULL)
1951 lp += 1;
1952 else
1953 lp = skip_spaces (lb.buffer + 2);
1954 cp = skip_non_spaces (lp);
1955 *cp = '\0';
1957 if (strlen (lp) > 0)
1959 lang = get_language_from_interpreter (lp);
1960 if (lang != NULL && lang->function != NULL)
1962 curfdp->lang = lang;
1963 parser = lang->function;
1968 /* We rewind here, even if inf may be a pipe. We fail if the
1969 length of the first line is longer than the pipe block size,
1970 which is unlikely. */
1971 rewind (inf);
1973 /* Else try to guess the language given the case insensitive file name. */
1974 if (parser == NULL)
1976 lang = get_language_from_filename (curfdp->infname, FALSE);
1977 if (lang != NULL && lang->function != NULL)
1979 curfdp->lang = lang;
1980 parser = lang->function;
1984 /* Else try Fortran or C. */
1985 if (parser == NULL)
1987 node *old_last_node = last_node;
1989 curfdp->lang = get_language_from_langname ("fortran");
1990 find_entries (inf);
1992 if (old_last_node == last_node)
1993 /* No Fortran entries found. Try C. */
1995 /* We do not tag if rewind fails.
1996 Only the file name will be recorded in the tags file. */
1997 rewind (inf);
1998 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1999 find_entries (inf);
2001 return;
2004 if (!no_line_directive
2005 && curfdp->lang != NULL && curfdp->lang->metasource)
2006 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
2007 file, or anyway we parsed a file that is automatically generated from
2008 this one. If this is the case, the bingo.c file contained #line
2009 directives that generated tags pointing to this file. Let's delete
2010 them all before parsing this file, which is the real source. */
2012 fdesc **fdpp = &fdhead;
2013 while (*fdpp != NULL)
2014 if (*fdpp != curfdp
2015 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
2016 /* We found one of those! We must delete both the file description
2017 and all tags referring to it. */
2019 fdesc *badfdp = *fdpp;
2021 /* Delete the tags referring to badfdp->taggedfname
2022 that were obtained from badfdp->infname. */
2023 invalidate_nodes (badfdp, &nodehead);
2025 *fdpp = badfdp->next; /* remove the bad description from the list */
2026 free_fdesc (badfdp);
2028 else
2029 fdpp = &(*fdpp)->next; /* advance the list pointer */
2032 assert (parser != NULL);
2034 /* Generic initialisations before reading from file. */
2035 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
2037 /* Generic initialisations before parsing file with readline. */
2038 lineno = 0; /* reset global line number */
2039 charno = 0; /* reset global char number */
2040 linecharno = 0; /* reset global char number of line start */
2042 parser (inf);
2044 regex_tag_multiline ();
2049 * Check whether an implicitly named tag should be created,
2050 * then call `pfnote'.
2051 * NAME is a string that is internally copied by this function.
2053 * TAGS format specification
2054 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2055 * The following is explained in some more detail in etc/ETAGS.EBNF.
2057 * make_tag creates tags with "implicit tag names" (unnamed tags)
2058 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2059 * 1. NAME does not contain any of the characters in NONAM;
2060 * 2. LINESTART contains name as either a rightmost, or rightmost but
2061 * one character, substring;
2062 * 3. the character, if any, immediately before NAME in LINESTART must
2063 * be a character in NONAM;
2064 * 4. the character, if any, immediately after NAME in LINESTART must
2065 * also be a character in NONAM.
2067 * The implementation uses the notinname() macro, which recognises the
2068 * characters stored in the string `nonam'.
2069 * etags.el needs to use the same characters that are in NONAM.
2071 static void
2072 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2073 char *name; /* tag name, or NULL if unnamed */
2074 int namelen; /* tag length */
2075 bool is_func; /* tag is a function */
2076 char *linestart; /* start of the line where tag is */
2077 int linelen; /* length of the line where tag is */
2078 int lno; /* line number */
2079 long cno; /* character number */
2081 bool named = (name != NULL && namelen > 0);
2083 if (!CTAGS && named) /* maybe set named to false */
2084 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2085 such that etags.el can guess a name from it. */
2087 int i;
2088 register char *cp = name;
2090 for (i = 0; i < namelen; i++)
2091 if (notinname (*cp++))
2092 break;
2093 if (i == namelen) /* rule #1 */
2095 cp = linestart + linelen - namelen;
2096 if (notinname (linestart[linelen-1]))
2097 cp -= 1; /* rule #4 */
2098 if (cp >= linestart /* rule #2 */
2099 && (cp == linestart
2100 || notinname (cp[-1])) /* rule #3 */
2101 && strneq (name, cp, namelen)) /* rule #2 */
2102 named = FALSE; /* use implicit tag name */
2106 if (named)
2107 name = savenstr (name, namelen);
2108 else
2109 name = NULL;
2110 pfnote (name, is_func, linestart, linelen, lno, cno);
2113 /* Record a tag. */
2114 static void
2115 pfnote (name, is_func, linestart, linelen, lno, cno)
2116 char *name; /* tag name, or NULL if unnamed */
2117 bool is_func; /* tag is a function */
2118 char *linestart; /* start of the line where tag is */
2119 int linelen; /* length of the line where tag is */
2120 int lno; /* line number */
2121 long cno; /* character number */
2123 register node *np;
2125 assert (name == NULL || name[0] != '\0');
2126 if (CTAGS && name == NULL)
2127 return;
2129 np = xnew (1, node);
2131 /* If ctags mode, change name "main" to M<thisfilename>. */
2132 if (CTAGS && !cxref_style && streq (name, "main"))
2134 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2135 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2136 fp = etags_strrchr (np->name, '.');
2137 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2138 fp[0] = '\0';
2140 else
2141 np->name = name;
2142 np->valid = TRUE;
2143 np->been_warned = FALSE;
2144 np->fdp = curfdp;
2145 np->is_func = is_func;
2146 np->lno = lno;
2147 if (np->fdp->usecharno)
2148 /* Our char numbers are 0-base, because of C language tradition?
2149 ctags compatibility? old versions compatibility? I don't know.
2150 Anyway, since emacs's are 1-base we expect etags.el to take care
2151 of the difference. If we wanted to have 1-based numbers, we would
2152 uncomment the +1 below. */
2153 np->cno = cno /* + 1 */ ;
2154 else
2155 np->cno = invalidcharno;
2156 np->left = np->right = NULL;
2157 if (CTAGS && !cxref_style)
2159 if (strlen (linestart) < 50)
2160 np->regex = concat (linestart, "$", "");
2161 else
2162 np->regex = savenstr (linestart, 50);
2164 else
2165 np->regex = savenstr (linestart, linelen);
2167 add_node (np, &nodehead);
2171 * free_tree ()
2172 * recurse on left children, iterate on right children.
2174 static void
2175 free_tree (np)
2176 register node *np;
2178 while (np)
2180 register node *node_right = np->right;
2181 free_tree (np->left);
2182 if (np->name != NULL)
2183 free (np->name);
2184 free (np->regex);
2185 free (np);
2186 np = node_right;
2191 * free_fdesc ()
2192 * delete a file description
2194 static void
2195 free_fdesc (fdp)
2196 register fdesc *fdp;
2198 if (fdp->infname != NULL) free (fdp->infname);
2199 if (fdp->infabsname != NULL) free (fdp->infabsname);
2200 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2201 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2202 if (fdp->prop != NULL) free (fdp->prop);
2203 free (fdp);
2207 * add_node ()
2208 * Adds a node to the tree of nodes. In etags mode, sort by file
2209 * name. In ctags mode, sort by tag name. Make no attempt at
2210 * balancing.
2212 * add_node is the only function allowed to add nodes, so it can
2213 * maintain state.
2215 static void
2216 add_node (np, cur_node_p)
2217 node *np, **cur_node_p;
2219 register int dif;
2220 register node *cur_node = *cur_node_p;
2222 if (cur_node == NULL)
2224 *cur_node_p = np;
2225 last_node = np;
2226 return;
2229 if (!CTAGS)
2230 /* Etags Mode */
2232 /* For each file name, tags are in a linked sublist on the right
2233 pointer. The first tags of different files are a linked list
2234 on the left pointer. last_node points to the end of the last
2235 used sublist. */
2236 if (last_node != NULL && last_node->fdp == np->fdp)
2238 /* Let's use the same sublist as the last added node. */
2239 assert (last_node->right == NULL);
2240 last_node->right = np;
2241 last_node = np;
2243 else if (cur_node->fdp == np->fdp)
2245 /* Scanning the list we found the head of a sublist which is
2246 good for us. Let's scan this sublist. */
2247 add_node (np, &cur_node->right);
2249 else
2250 /* The head of this sublist is not good for us. Let's try the
2251 next one. */
2252 add_node (np, &cur_node->left);
2253 } /* if ETAGS mode */
2255 else
2257 /* Ctags Mode */
2258 dif = strcmp (np->name, cur_node->name);
2261 * If this tag name matches an existing one, then
2262 * do not add the node, but maybe print a warning.
2264 if (no_duplicates && !dif)
2266 if (np->fdp == cur_node->fdp)
2268 if (!no_warnings)
2270 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2271 np->fdp->infname, lineno, np->name);
2272 fprintf (stderr, "Second entry ignored\n");
2275 else if (!cur_node->been_warned && !no_warnings)
2277 fprintf
2278 (stderr,
2279 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2280 np->fdp->infname, cur_node->fdp->infname, np->name);
2281 cur_node->been_warned = TRUE;
2283 return;
2286 /* Actually add the node */
2287 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2288 } /* if CTAGS mode */
2292 * invalidate_nodes ()
2293 * Scan the node tree and invalidate all nodes pointing to the
2294 * given file description (CTAGS case) or free them (ETAGS case).
2296 static void
2297 invalidate_nodes (badfdp, npp)
2298 fdesc *badfdp;
2299 node **npp;
2301 node *np = *npp;
2303 if (np == NULL)
2304 return;
2306 if (CTAGS)
2308 if (np->left != NULL)
2309 invalidate_nodes (badfdp, &np->left);
2310 if (np->fdp == badfdp)
2311 np->valid = FALSE;
2312 if (np->right != NULL)
2313 invalidate_nodes (badfdp, &np->right);
2315 else
2317 assert (np->fdp != NULL);
2318 if (np->fdp == badfdp)
2320 *npp = np->left; /* detach the sublist from the list */
2321 np->left = NULL; /* isolate it */
2322 free_tree (np); /* free it */
2323 invalidate_nodes (badfdp, npp);
2325 else
2326 invalidate_nodes (badfdp, &np->left);
2331 static int total_size_of_entries __P((node *));
2332 static int number_len __P((long));
2334 /* Length of a non-negative number's decimal representation. */
2335 static int
2336 number_len (num)
2337 long num;
2339 int len = 1;
2340 while ((num /= 10) > 0)
2341 len += 1;
2342 return len;
2346 * Return total number of characters that put_entries will output for
2347 * the nodes in the linked list at the right of the specified node.
2348 * This count is irrelevant with etags.el since emacs 19.34 at least,
2349 * but is still supplied for backward compatibility.
2351 static int
2352 total_size_of_entries (np)
2353 register node *np;
2355 register int total = 0;
2357 for (; np != NULL; np = np->right)
2358 if (np->valid)
2360 total += strlen (np->regex) + 1; /* pat\177 */
2361 if (np->name != NULL)
2362 total += strlen (np->name) + 1; /* name\001 */
2363 total += number_len ((long) np->lno) + 1; /* lno, */
2364 if (np->cno != invalidcharno) /* cno */
2365 total += number_len (np->cno);
2366 total += 1; /* newline */
2369 return total;
2372 static void
2373 put_entries (np)
2374 register node *np;
2376 register char *sp;
2377 static fdesc *fdp = NULL;
2379 if (np == NULL)
2380 return;
2382 /* Output subentries that precede this one */
2383 if (CTAGS)
2384 put_entries (np->left);
2386 /* Output this entry */
2387 if (np->valid)
2389 if (!CTAGS)
2391 /* Etags mode */
2392 if (fdp != np->fdp)
2394 fdp = np->fdp;
2395 fprintf (tagf, "\f\n%s,%d\n",
2396 fdp->taggedfname, total_size_of_entries (np));
2397 fdp->written = TRUE;
2399 fputs (np->regex, tagf);
2400 fputc ('\177', tagf);
2401 if (np->name != NULL)
2403 fputs (np->name, tagf);
2404 fputc ('\001', tagf);
2406 fprintf (tagf, "%d,", np->lno);
2407 if (np->cno != invalidcharno)
2408 fprintf (tagf, "%ld", np->cno);
2409 fputs ("\n", tagf);
2411 else
2413 /* Ctags mode */
2414 if (np->name == NULL)
2415 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2417 if (cxref_style)
2419 if (vgrind_style)
2420 fprintf (stdout, "%s %s %d\n",
2421 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2422 else
2423 fprintf (stdout, "%-16s %3d %-16s %s\n",
2424 np->name, np->lno, np->fdp->taggedfname, np->regex);
2426 else
2428 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2430 if (np->is_func)
2431 { /* function or #define macro with args */
2432 putc (searchar, tagf);
2433 putc ('^', tagf);
2435 for (sp = np->regex; *sp; sp++)
2437 if (*sp == '\\' || *sp == searchar)
2438 putc ('\\', tagf);
2439 putc (*sp, tagf);
2441 putc (searchar, tagf);
2443 else
2444 { /* anything else; text pattern inadequate */
2445 fprintf (tagf, "%d", np->lno);
2447 putc ('\n', tagf);
2450 } /* if this node contains a valid tag */
2452 /* Output subentries that follow this one */
2453 put_entries (np->right);
2454 if (!CTAGS)
2455 put_entries (np->left);
2459 /* C extensions. */
2460 #define C_EXT 0x00fff /* C extensions */
2461 #define C_PLAIN 0x00000 /* C */
2462 #define C_PLPL 0x00001 /* C++ */
2463 #define C_STAR 0x00003 /* C* */
2464 #define C_JAVA 0x00005 /* JAVA */
2465 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2466 #define YACC 0x10000 /* yacc file */
2469 * The C symbol tables.
2471 enum sym_type
2473 st_none,
2474 st_C_objprot, st_C_objimpl, st_C_objend,
2475 st_C_gnumacro,
2476 st_C_ignore, st_C_attribute,
2477 st_C_javastruct,
2478 st_C_operator,
2479 st_C_class, st_C_template,
2480 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2483 static unsigned int hash __P((const char *, unsigned int));
2484 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2485 static enum sym_type C_symtype __P((char *, int, int));
2487 /* Feed stuff between (but not including) %[ and %] lines to:
2488 gperf -m 5
2490 %compare-strncmp
2491 %enum
2492 %struct-type
2493 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2495 if, 0, st_C_ignore
2496 for, 0, st_C_ignore
2497 while, 0, st_C_ignore
2498 switch, 0, st_C_ignore
2499 return, 0, st_C_ignore
2500 __attribute__, 0, st_C_attribute
2501 GTY, 0, st_C_attribute
2502 @interface, 0, st_C_objprot
2503 @protocol, 0, st_C_objprot
2504 @implementation,0, st_C_objimpl
2505 @end, 0, st_C_objend
2506 import, (C_JAVA & ~C_PLPL), st_C_ignore
2507 package, (C_JAVA & ~C_PLPL), st_C_ignore
2508 friend, C_PLPL, st_C_ignore
2509 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2510 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2511 interface, (C_JAVA & ~C_PLPL), st_C_struct
2512 class, 0, st_C_class
2513 namespace, C_PLPL, st_C_struct
2514 domain, C_STAR, st_C_struct
2515 union, 0, st_C_struct
2516 struct, 0, st_C_struct
2517 extern, 0, st_C_extern
2518 enum, 0, st_C_enum
2519 typedef, 0, st_C_typedef
2520 define, 0, st_C_define
2521 undef, 0, st_C_define
2522 operator, C_PLPL, st_C_operator
2523 template, 0, st_C_template
2524 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2525 DEFUN, 0, st_C_gnumacro
2526 SYSCALL, 0, st_C_gnumacro
2527 ENTRY, 0, st_C_gnumacro
2528 PSEUDO, 0, st_C_gnumacro
2529 # These are defined inside C functions, so currently they are not met.
2530 # EXFUN used in glibc, DEFVAR_* in emacs.
2531 #EXFUN, 0, st_C_gnumacro
2532 #DEFVAR_, 0, st_C_gnumacro
2534 and replace lines between %< and %> with its output, then:
2535 - remove the #if characterset check
2536 - make in_word_set static and not inline. */
2537 /*%<*/
2538 /* C code produced by gperf version 3.0.1 */
2539 /* Command-line: gperf -m 5 */
2540 /* Computed positions: -k'2-3' */
2542 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2543 /* maximum key range = 33, duplicates = 0 */
2545 #ifdef __GNUC__
2546 __inline
2547 #else
2548 #ifdef __cplusplus
2549 inline
2550 #endif
2551 #endif
2552 static unsigned int
2553 hash (str, len)
2554 register const char *str;
2555 register unsigned int len;
2557 static unsigned char asso_values[] =
2559 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2560 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2561 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2562 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2563 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2564 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2565 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2566 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2567 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2568 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2569 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2570 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2571 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2572 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2573 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2574 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2575 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2576 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2577 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2578 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2579 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2580 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2581 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2582 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2583 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2584 35, 35, 35, 35, 35, 35
2586 register int hval = len;
2588 switch (hval)
2590 default:
2591 hval += asso_values[(unsigned char)str[2]];
2592 /*FALLTHROUGH*/
2593 case 2:
2594 hval += asso_values[(unsigned char)str[1]];
2595 break;
2597 return hval;
2600 static struct C_stab_entry *
2601 in_word_set (str, len)
2602 register const char *str;
2603 register unsigned int len;
2605 enum
2607 TOTAL_KEYWORDS = 33,
2608 MIN_WORD_LENGTH = 2,
2609 MAX_WORD_LENGTH = 15,
2610 MIN_HASH_VALUE = 2,
2611 MAX_HASH_VALUE = 34
2614 static struct C_stab_entry wordlist[] =
2616 {""}, {""},
2617 {"if", 0, st_C_ignore},
2618 {"GTY", 0, st_C_attribute},
2619 {"@end", 0, st_C_objend},
2620 {"union", 0, st_C_struct},
2621 {"define", 0, st_C_define},
2622 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2623 {"template", 0, st_C_template},
2624 {"operator", C_PLPL, st_C_operator},
2625 {"@interface", 0, st_C_objprot},
2626 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2627 {"friend", C_PLPL, st_C_ignore},
2628 {"typedef", 0, st_C_typedef},
2629 {"return", 0, st_C_ignore},
2630 {"@implementation",0, st_C_objimpl},
2631 {"@protocol", 0, st_C_objprot},
2632 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2633 {"extern", 0, st_C_extern},
2634 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2635 {"struct", 0, st_C_struct},
2636 {"domain", C_STAR, st_C_struct},
2637 {"switch", 0, st_C_ignore},
2638 {"enum", 0, st_C_enum},
2639 {"for", 0, st_C_ignore},
2640 {"namespace", C_PLPL, st_C_struct},
2641 {"class", 0, st_C_class},
2642 {"while", 0, st_C_ignore},
2643 {"undef", 0, st_C_define},
2644 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2645 {"__attribute__", 0, st_C_attribute},
2646 {"SYSCALL", 0, st_C_gnumacro},
2647 {"ENTRY", 0, st_C_gnumacro},
2648 {"PSEUDO", 0, st_C_gnumacro},
2649 {"DEFUN", 0, st_C_gnumacro}
2652 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2654 register int key = hash (str, len);
2656 if (key <= MAX_HASH_VALUE && key >= 0)
2658 register const char *s = wordlist[key].name;
2660 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2661 return &wordlist[key];
2664 return 0;
2666 /*%>*/
2668 static enum sym_type
2669 C_symtype (str, len, c_ext)
2670 char *str;
2671 int len;
2672 int c_ext;
2674 register struct C_stab_entry *se = in_word_set (str, len);
2676 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2677 return st_none;
2678 return se->type;
2683 * Ignoring __attribute__ ((list))
2685 static bool inattribute; /* looking at an __attribute__ construct */
2688 * C functions and variables are recognized using a simple
2689 * finite automaton. fvdef is its state variable.
2691 static enum
2693 fvnone, /* nothing seen */
2694 fdefunkey, /* Emacs DEFUN keyword seen */
2695 fdefunname, /* Emacs DEFUN name seen */
2696 foperator, /* func: operator keyword seen (cplpl) */
2697 fvnameseen, /* function or variable name seen */
2698 fstartlist, /* func: just after open parenthesis */
2699 finlist, /* func: in parameter list */
2700 flistseen, /* func: after parameter list */
2701 fignore, /* func: before open brace */
2702 vignore /* var-like: ignore until ';' */
2703 } fvdef;
2705 static bool fvextern; /* func or var: extern keyword seen; */
2708 * typedefs are recognized using a simple finite automaton.
2709 * typdef is its state variable.
2711 static enum
2713 tnone, /* nothing seen */
2714 tkeyseen, /* typedef keyword seen */
2715 ttypeseen, /* defined type seen */
2716 tinbody, /* inside typedef body */
2717 tend, /* just before typedef tag */
2718 tignore /* junk after typedef tag */
2719 } typdef;
2722 * struct-like structures (enum, struct and union) are recognized
2723 * using another simple finite automaton. `structdef' is its state
2724 * variable.
2726 static enum
2728 snone, /* nothing seen yet,
2729 or in struct body if bracelev > 0 */
2730 skeyseen, /* struct-like keyword seen */
2731 stagseen, /* struct-like tag seen */
2732 scolonseen /* colon seen after struct-like tag */
2733 } structdef;
2736 * When objdef is different from onone, objtag is the name of the class.
2738 static char *objtag = "<uninited>";
2741 * Yet another little state machine to deal with preprocessor lines.
2743 static enum
2745 dnone, /* nothing seen */
2746 dsharpseen, /* '#' seen as first char on line */
2747 ddefineseen, /* '#' and 'define' seen */
2748 dignorerest /* ignore rest of line */
2749 } definedef;
2752 * State machine for Objective C protocols and implementations.
2753 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2755 static enum
2757 onone, /* nothing seen */
2758 oprotocol, /* @interface or @protocol seen */
2759 oimplementation, /* @implementations seen */
2760 otagseen, /* class name seen */
2761 oparenseen, /* parenthesis before category seen */
2762 ocatseen, /* category name seen */
2763 oinbody, /* in @implementation body */
2764 omethodsign, /* in @implementation body, after +/- */
2765 omethodtag, /* after method name */
2766 omethodcolon, /* after method colon */
2767 omethodparm, /* after method parameter */
2768 oignore /* wait for @end */
2769 } objdef;
2773 * Use this structure to keep info about the token read, and how it
2774 * should be tagged. Used by the make_C_tag function to build a tag.
2776 static struct tok
2778 char *line; /* string containing the token */
2779 int offset; /* where the token starts in LINE */
2780 int length; /* token length */
2782 The previous members can be used to pass strings around for generic
2783 purposes. The following ones specifically refer to creating tags. In this
2784 case the token contained here is the pattern that will be used to create a
2785 tag.
2787 bool valid; /* do not create a tag; the token should be
2788 invalidated whenever a state machine is
2789 reset prematurely */
2790 bool named; /* create a named tag */
2791 int lineno; /* source line number of tag */
2792 long linepos; /* source char number of tag */
2793 } token; /* latest token read */
2796 * Variables and functions for dealing with nested structures.
2797 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2799 static void pushclass_above __P((int, char *, int));
2800 static void popclass_above __P((int));
2801 static void write_classname __P((linebuffer *, char *qualifier));
2803 static struct {
2804 char **cname; /* nested class names */
2805 int *bracelev; /* nested class brace level */
2806 int nl; /* class nesting level (elements used) */
2807 int size; /* length of the array */
2808 } cstack; /* stack for nested declaration tags */
2809 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2810 #define nestlev (cstack.nl)
2811 /* After struct keyword or in struct body, not inside a nested function. */
2812 #define instruct (structdef == snone && nestlev > 0 \
2813 && bracelev == cstack.bracelev[nestlev-1] + 1)
2815 static void
2816 pushclass_above (bracelev, str, len)
2817 int bracelev;
2818 char *str;
2819 int len;
2821 int nl;
2823 popclass_above (bracelev);
2824 nl = cstack.nl;
2825 if (nl >= cstack.size)
2827 int size = cstack.size *= 2;
2828 xrnew (cstack.cname, size, char *);
2829 xrnew (cstack.bracelev, size, int);
2831 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2832 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2833 cstack.bracelev[nl] = bracelev;
2834 cstack.nl = nl + 1;
2837 static void
2838 popclass_above (bracelev)
2839 int bracelev;
2841 int nl;
2843 for (nl = cstack.nl - 1;
2844 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2845 nl--)
2847 if (cstack.cname[nl] != NULL)
2848 free (cstack.cname[nl]);
2849 cstack.nl = nl;
2853 static void
2854 write_classname (cn, qualifier)
2855 linebuffer *cn;
2856 char *qualifier;
2858 int i, len;
2859 int qlen = strlen (qualifier);
2861 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2863 len = 0;
2864 cn->len = 0;
2865 cn->buffer[0] = '\0';
2867 else
2869 len = strlen (cstack.cname[0]);
2870 linebuffer_setlen (cn, len);
2871 strcpy (cn->buffer, cstack.cname[0]);
2873 for (i = 1; i < cstack.nl; i++)
2875 char *s;
2876 int slen;
2878 s = cstack.cname[i];
2879 if (s == NULL)
2880 continue;
2881 slen = strlen (s);
2882 len += slen + qlen;
2883 linebuffer_setlen (cn, len);
2884 strncat (cn->buffer, qualifier, qlen);
2885 strncat (cn->buffer, s, slen);
2890 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2891 static void make_C_tag __P((bool));
2894 * consider_token ()
2895 * checks to see if the current token is at the start of a
2896 * function or variable, or corresponds to a typedef, or
2897 * is a struct/union/enum tag, or #define, or an enum constant.
2899 * *IS_FUNC gets TRUE if the token is a function or #define macro
2900 * with args. C_EXTP points to which language we are looking at.
2902 * Globals
2903 * fvdef IN OUT
2904 * structdef IN OUT
2905 * definedef IN OUT
2906 * typdef IN OUT
2907 * objdef IN OUT
2910 static bool
2911 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2912 register char *str; /* IN: token pointer */
2913 register int len; /* IN: token length */
2914 register int c; /* IN: first char after the token */
2915 int *c_extp; /* IN, OUT: C extensions mask */
2916 int bracelev; /* IN: brace level */
2917 int parlev; /* IN: parenthesis level */
2918 bool *is_func_or_var; /* OUT: function or variable found */
2920 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2921 structtype is the type of the preceding struct-like keyword, and
2922 structbracelev is the brace level where it has been seen. */
2923 static enum sym_type structtype;
2924 static int structbracelev;
2925 static enum sym_type toktype;
2928 toktype = C_symtype (str, len, *c_extp);
2931 * Skip __attribute__
2933 if (toktype == st_C_attribute)
2935 inattribute = TRUE;
2936 return FALSE;
2940 * Advance the definedef state machine.
2942 switch (definedef)
2944 case dnone:
2945 /* We're not on a preprocessor line. */
2946 if (toktype == st_C_gnumacro)
2948 fvdef = fdefunkey;
2949 return FALSE;
2951 break;
2952 case dsharpseen:
2953 if (toktype == st_C_define)
2955 definedef = ddefineseen;
2957 else
2959 definedef = dignorerest;
2961 return FALSE;
2962 case ddefineseen:
2964 * Make a tag for any macro, unless it is a constant
2965 * and constantypedefs is FALSE.
2967 definedef = dignorerest;
2968 *is_func_or_var = (c == '(');
2969 if (!*is_func_or_var && !constantypedefs)
2970 return FALSE;
2971 else
2972 return TRUE;
2973 case dignorerest:
2974 return FALSE;
2975 default:
2976 error ("internal error: definedef value.", (char *)NULL);
2980 * Now typedefs
2982 switch (typdef)
2984 case tnone:
2985 if (toktype == st_C_typedef)
2987 if (typedefs)
2988 typdef = tkeyseen;
2989 fvextern = FALSE;
2990 fvdef = fvnone;
2991 return FALSE;
2993 break;
2994 case tkeyseen:
2995 switch (toktype)
2997 case st_none:
2998 case st_C_class:
2999 case st_C_struct:
3000 case st_C_enum:
3001 typdef = ttypeseen;
3003 break;
3004 case ttypeseen:
3005 if (structdef == snone && fvdef == fvnone)
3007 fvdef = fvnameseen;
3008 return TRUE;
3010 break;
3011 case tend:
3012 switch (toktype)
3014 case st_C_class:
3015 case st_C_struct:
3016 case st_C_enum:
3017 return FALSE;
3019 return TRUE;
3022 switch (toktype)
3024 case st_C_javastruct:
3025 if (structdef == stagseen)
3026 structdef = scolonseen;
3027 return FALSE;
3028 case st_C_template:
3029 case st_C_class:
3030 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
3031 && bracelev == 0
3032 && definedef == dnone && structdef == snone
3033 && typdef == tnone && fvdef == fvnone)
3034 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3035 if (toktype == st_C_template)
3036 break;
3037 /* FALLTHRU */
3038 case st_C_struct:
3039 case st_C_enum:
3040 if (parlev == 0
3041 && fvdef != vignore
3042 && (typdef == tkeyseen
3043 || (typedefs_or_cplusplus && structdef == snone)))
3045 structdef = skeyseen;
3046 structtype = toktype;
3047 structbracelev = bracelev;
3048 if (fvdef == fvnameseen)
3049 fvdef = fvnone;
3051 return FALSE;
3054 if (structdef == skeyseen)
3056 structdef = stagseen;
3057 return TRUE;
3060 if (typdef != tnone)
3061 definedef = dnone;
3063 /* Detect Objective C constructs. */
3064 switch (objdef)
3066 case onone:
3067 switch (toktype)
3069 case st_C_objprot:
3070 objdef = oprotocol;
3071 return FALSE;
3072 case st_C_objimpl:
3073 objdef = oimplementation;
3074 return FALSE;
3076 break;
3077 case oimplementation:
3078 /* Save the class tag for functions or variables defined inside. */
3079 objtag = savenstr (str, len);
3080 objdef = oinbody;
3081 return FALSE;
3082 case oprotocol:
3083 /* Save the class tag for categories. */
3084 objtag = savenstr (str, len);
3085 objdef = otagseen;
3086 *is_func_or_var = TRUE;
3087 return TRUE;
3088 case oparenseen:
3089 objdef = ocatseen;
3090 *is_func_or_var = TRUE;
3091 return TRUE;
3092 case oinbody:
3093 break;
3094 case omethodsign:
3095 if (parlev == 0)
3097 fvdef = fvnone;
3098 objdef = omethodtag;
3099 linebuffer_setlen (&token_name, len);
3100 strncpy (token_name.buffer, str, len);
3101 token_name.buffer[len] = '\0';
3102 return TRUE;
3104 return FALSE;
3105 case omethodcolon:
3106 if (parlev == 0)
3107 objdef = omethodparm;
3108 return FALSE;
3109 case omethodparm:
3110 if (parlev == 0)
3112 fvdef = fvnone;
3113 objdef = omethodtag;
3114 linebuffer_setlen (&token_name, token_name.len + len);
3115 strncat (token_name.buffer, str, len);
3116 return TRUE;
3118 return FALSE;
3119 case oignore:
3120 if (toktype == st_C_objend)
3122 /* Memory leakage here: the string pointed by objtag is
3123 never released, because many tests would be needed to
3124 avoid breaking on incorrect input code. The amount of
3125 memory leaked here is the sum of the lengths of the
3126 class tags.
3127 free (objtag); */
3128 objdef = onone;
3130 return FALSE;
3133 /* A function, variable or enum constant? */
3134 switch (toktype)
3136 case st_C_extern:
3137 fvextern = TRUE;
3138 switch (fvdef)
3140 case finlist:
3141 case flistseen:
3142 case fignore:
3143 case vignore:
3144 break;
3145 default:
3146 fvdef = fvnone;
3148 return FALSE;
3149 case st_C_ignore:
3150 fvextern = FALSE;
3151 fvdef = vignore;
3152 return FALSE;
3153 case st_C_operator:
3154 fvdef = foperator;
3155 *is_func_or_var = TRUE;
3156 return TRUE;
3157 case st_none:
3158 if (constantypedefs
3159 && structdef == snone
3160 && structtype == st_C_enum && bracelev > structbracelev)
3161 return TRUE; /* enum constant */
3162 switch (fvdef)
3164 case fdefunkey:
3165 if (bracelev > 0)
3166 break;
3167 fvdef = fdefunname; /* GNU macro */
3168 *is_func_or_var = TRUE;
3169 return TRUE;
3170 case fvnone:
3171 switch (typdef)
3173 case ttypeseen:
3174 return FALSE;
3175 case tnone:
3176 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3177 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3179 fvdef = vignore;
3180 return FALSE;
3182 break;
3184 /* FALLTHRU */
3185 case fvnameseen:
3186 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3188 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3189 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3190 fvdef = foperator;
3191 *is_func_or_var = TRUE;
3192 return TRUE;
3194 if (bracelev > 0 && !instruct)
3195 break;
3196 fvdef = fvnameseen; /* function or variable */
3197 *is_func_or_var = TRUE;
3198 return TRUE;
3200 break;
3203 return FALSE;
3208 * C_entries often keeps pointers to tokens or lines which are older than
3209 * the line currently read. By keeping two line buffers, and switching
3210 * them at end of line, it is possible to use those pointers.
3212 static struct
3214 long linepos;
3215 linebuffer lb;
3216 } lbs[2];
3218 #define current_lb_is_new (newndx == curndx)
3219 #define switch_line_buffers() (curndx = 1 - curndx)
3221 #define curlb (lbs[curndx].lb)
3222 #define newlb (lbs[newndx].lb)
3223 #define curlinepos (lbs[curndx].linepos)
3224 #define newlinepos (lbs[newndx].linepos)
3226 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3227 #define cplpl (c_ext & C_PLPL)
3228 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3230 #define CNL_SAVE_DEFINEDEF() \
3231 do { \
3232 curlinepos = charno; \
3233 readline (&curlb, inf); \
3234 lp = curlb.buffer; \
3235 quotednl = FALSE; \
3236 newndx = curndx; \
3237 } while (0)
3239 #define CNL() \
3240 do { \
3241 CNL_SAVE_DEFINEDEF(); \
3242 if (savetoken.valid) \
3244 token = savetoken; \
3245 savetoken.valid = FALSE; \
3247 definedef = dnone; \
3248 } while (0)
3251 static void
3252 make_C_tag (isfun)
3253 bool isfun;
3255 /* This function is never called when token.valid is FALSE, but
3256 we must protect against invalid input or internal errors. */
3257 if (token.valid)
3258 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3259 token.offset+token.length+1, token.lineno, token.linepos);
3260 else if (DEBUG)
3261 { /* this branch is optimised away if !DEBUG */
3262 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3263 token_name.len + 17, isfun, token.line,
3264 token.offset+token.length+1, token.lineno, token.linepos);
3265 error ("INVALID TOKEN", NULL);
3268 token.valid = FALSE;
3273 * C_entries ()
3274 * This routine finds functions, variables, typedefs,
3275 * #define's, enum constants and struct/union/enum definitions in
3276 * C syntax and adds them to the list.
3278 static void
3279 C_entries (c_ext, inf)
3280 int c_ext; /* extension of C */
3281 FILE *inf; /* input file */
3283 register char c; /* latest char read; '\0' for end of line */
3284 register char *lp; /* pointer one beyond the character `c' */
3285 int curndx, newndx; /* indices for current and new lb */
3286 register int tokoff; /* offset in line of start of current token */
3287 register int toklen; /* length of current token */
3288 char *qualifier; /* string used to qualify names */
3289 int qlen; /* length of qualifier */
3290 int bracelev; /* current brace level */
3291 int bracketlev; /* current bracket level */
3292 int parlev; /* current parenthesis level */
3293 int attrparlev; /* __attribute__ parenthesis level */
3294 int templatelev; /* current template level */
3295 int typdefbracelev; /* bracelev where a typedef struct body begun */
3296 bool incomm, inquote, inchar, quotednl, midtoken;
3297 bool yacc_rules; /* in the rules part of a yacc file */
3298 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3301 linebuffer_init (&lbs[0].lb);
3302 linebuffer_init (&lbs[1].lb);
3303 if (cstack.size == 0)
3305 cstack.size = (DEBUG) ? 1 : 4;
3306 cstack.nl = 0;
3307 cstack.cname = xnew (cstack.size, char *);
3308 cstack.bracelev = xnew (cstack.size, int);
3311 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3312 curndx = newndx = 0;
3313 lp = curlb.buffer;
3314 *lp = 0;
3316 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3317 structdef = snone; definedef = dnone; objdef = onone;
3318 yacc_rules = FALSE;
3319 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3320 token.valid = savetoken.valid = FALSE;
3321 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3322 if (cjava)
3323 { qualifier = "."; qlen = 1; }
3324 else
3325 { qualifier = "::"; qlen = 2; }
3328 while (!feof (inf))
3330 c = *lp++;
3331 if (c == '\\')
3333 /* If we are at the end of the line, the next character is a
3334 '\0'; do not skip it, because it is what tells us
3335 to read the next line. */
3336 if (*lp == '\0')
3338 quotednl = TRUE;
3339 continue;
3341 lp++;
3342 c = ' ';
3344 else if (incomm)
3346 switch (c)
3348 case '*':
3349 if (*lp == '/')
3351 c = *lp++;
3352 incomm = FALSE;
3354 break;
3355 case '\0':
3356 /* Newlines inside comments do not end macro definitions in
3357 traditional cpp. */
3358 CNL_SAVE_DEFINEDEF ();
3359 break;
3361 continue;
3363 else if (inquote)
3365 switch (c)
3367 case '"':
3368 inquote = FALSE;
3369 break;
3370 case '\0':
3371 /* Newlines inside strings do not end macro definitions
3372 in traditional cpp, even though compilers don't
3373 usually accept them. */
3374 CNL_SAVE_DEFINEDEF ();
3375 break;
3377 continue;
3379 else if (inchar)
3381 switch (c)
3383 case '\0':
3384 /* Hmmm, something went wrong. */
3385 CNL ();
3386 /* FALLTHRU */
3387 case '\'':
3388 inchar = FALSE;
3389 break;
3391 continue;
3393 else if (bracketlev > 0)
3395 switch (c)
3397 case ']':
3398 if (--bracketlev > 0)
3399 continue;
3400 break;
3401 case '\0':
3402 CNL_SAVE_DEFINEDEF ();
3403 break;
3405 continue;
3407 else switch (c)
3409 case '"':
3410 inquote = TRUE;
3411 if (inattribute)
3412 break;
3413 switch (fvdef)
3415 case fdefunkey:
3416 case fstartlist:
3417 case finlist:
3418 case fignore:
3419 case vignore:
3420 break;
3421 default:
3422 fvextern = FALSE;
3423 fvdef = fvnone;
3425 continue;
3426 case '\'':
3427 inchar = TRUE;
3428 if (inattribute)
3429 break;
3430 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3432 fvextern = FALSE;
3433 fvdef = fvnone;
3435 continue;
3436 case '/':
3437 if (*lp == '*')
3439 incomm = TRUE;
3440 lp++;
3441 c = ' ';
3443 else if (/* cplpl && */ *lp == '/')
3445 c = '\0';
3447 break;
3448 case '%':
3449 if ((c_ext & YACC) && *lp == '%')
3451 /* Entering or exiting rules section in yacc file. */
3452 lp++;
3453 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3454 typdef = tnone; structdef = snone;
3455 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3456 bracelev = 0;
3457 yacc_rules = !yacc_rules;
3458 continue;
3460 else
3461 break;
3462 case '#':
3463 if (definedef == dnone)
3465 char *cp;
3466 bool cpptoken = TRUE;
3468 /* Look back on this line. If all blanks, or nonblanks
3469 followed by an end of comment, this is a preprocessor
3470 token. */
3471 for (cp = newlb.buffer; cp < lp-1; cp++)
3472 if (!iswhite (*cp))
3474 if (*cp == '*' && *(cp+1) == '/')
3476 cp++;
3477 cpptoken = TRUE;
3479 else
3480 cpptoken = FALSE;
3482 if (cpptoken)
3483 definedef = dsharpseen;
3484 } /* if (definedef == dnone) */
3485 continue;
3486 case '[':
3487 bracketlev++;
3488 continue;
3489 } /* switch (c) */
3492 /* Consider token only if some involved conditions are satisfied. */
3493 if (typdef != tignore
3494 && definedef != dignorerest
3495 && fvdef != finlist
3496 && templatelev == 0
3497 && (definedef != dnone
3498 || structdef != scolonseen)
3499 && !inattribute)
3501 if (midtoken)
3503 if (endtoken (c))
3505 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3506 /* This handles :: in the middle,
3507 but not at the beginning of an identifier.
3508 Also, space-separated :: is not recognised. */
3510 if (c_ext & C_AUTO) /* automatic detection of C++ */
3511 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3512 lp += 2;
3513 toklen += 2;
3514 c = lp[-1];
3515 goto still_in_token;
3517 else
3519 bool funorvar = FALSE;
3521 if (yacc_rules
3522 || consider_token (newlb.buffer + tokoff, toklen, c,
3523 &c_ext, bracelev, parlev,
3524 &funorvar))
3526 if (fvdef == foperator)
3528 char *oldlp = lp;
3529 lp = skip_spaces (lp-1);
3530 if (*lp != '\0')
3531 lp += 1;
3532 while (*lp != '\0'
3533 && !iswhite (*lp) && *lp != '(')
3534 lp += 1;
3535 c = *lp++;
3536 toklen += lp - oldlp;
3538 token.named = FALSE;
3539 if (!plainc
3540 && nestlev > 0 && definedef == dnone)
3541 /* in struct body */
3543 write_classname (&token_name, qualifier);
3544 linebuffer_setlen (&token_name,
3545 token_name.len+qlen+toklen);
3546 strcat (token_name.buffer, qualifier);
3547 strncat (token_name.buffer,
3548 newlb.buffer + tokoff, toklen);
3549 token.named = TRUE;
3551 else if (objdef == ocatseen)
3552 /* Objective C category */
3554 int len = strlen (objtag) + 2 + toklen;
3555 linebuffer_setlen (&token_name, len);
3556 strcpy (token_name.buffer, objtag);
3557 strcat (token_name.buffer, "(");
3558 strncat (token_name.buffer,
3559 newlb.buffer + tokoff, toklen);
3560 strcat (token_name.buffer, ")");
3561 token.named = TRUE;
3563 else if (objdef == omethodtag
3564 || objdef == omethodparm)
3565 /* Objective C method */
3567 token.named = TRUE;
3569 else if (fvdef == fdefunname)
3570 /* GNU DEFUN and similar macros */
3572 bool defun = (newlb.buffer[tokoff] == 'F');
3573 int off = tokoff;
3574 int len = toklen;
3576 /* Rewrite the tag so that emacs lisp DEFUNs
3577 can be found by their elisp name */
3578 if (defun)
3580 off += 1;
3581 len -= 1;
3583 linebuffer_setlen (&token_name, len);
3584 strncpy (token_name.buffer,
3585 newlb.buffer + off, len);
3586 token_name.buffer[len] = '\0';
3587 if (defun)
3588 while (--len >= 0)
3589 if (token_name.buffer[len] == '_')
3590 token_name.buffer[len] = '-';
3591 token.named = defun;
3593 else
3595 linebuffer_setlen (&token_name, toklen);
3596 strncpy (token_name.buffer,
3597 newlb.buffer + tokoff, toklen);
3598 token_name.buffer[toklen] = '\0';
3599 /* Name macros and members. */
3600 token.named = (structdef == stagseen
3601 || typdef == ttypeseen
3602 || typdef == tend
3603 || (funorvar
3604 && definedef == dignorerest)
3605 || (funorvar
3606 && definedef == dnone
3607 && structdef == snone
3608 && bracelev > 0));
3610 token.lineno = lineno;
3611 token.offset = tokoff;
3612 token.length = toklen;
3613 token.line = newlb.buffer;
3614 token.linepos = newlinepos;
3615 token.valid = TRUE;
3617 if (definedef == dnone
3618 && (fvdef == fvnameseen
3619 || fvdef == foperator
3620 || structdef == stagseen
3621 || typdef == tend
3622 || typdef == ttypeseen
3623 || objdef != onone))
3625 if (current_lb_is_new)
3626 switch_line_buffers ();
3628 else if (definedef != dnone
3629 || fvdef == fdefunname
3630 || instruct)
3631 make_C_tag (funorvar);
3633 else /* not yacc and consider_token failed */
3635 if (inattribute && fvdef == fignore)
3637 /* We have just met __attribute__ after a
3638 function parameter list: do not tag the
3639 function again. */
3640 fvdef = fvnone;
3643 midtoken = FALSE;
3645 } /* if (endtoken (c)) */
3646 else if (intoken (c))
3647 still_in_token:
3649 toklen++;
3650 continue;
3652 } /* if (midtoken) */
3653 else if (begtoken (c))
3655 switch (definedef)
3657 case dnone:
3658 switch (fvdef)
3660 case fstartlist:
3661 /* This prevents tagging fb in
3662 void (__attribute__((noreturn)) *fb) (void);
3663 Fixing this is not easy and not very important. */
3664 fvdef = finlist;
3665 continue;
3666 case flistseen:
3667 if (plainc || declarations)
3669 make_C_tag (TRUE); /* a function */
3670 fvdef = fignore;
3672 break;
3674 if (structdef == stagseen && !cjava)
3676 popclass_above (bracelev);
3677 structdef = snone;
3679 break;
3680 case dsharpseen:
3681 savetoken = token;
3682 break;
3684 if (!yacc_rules || lp == newlb.buffer + 1)
3686 tokoff = lp - 1 - newlb.buffer;
3687 toklen = 1;
3688 midtoken = TRUE;
3690 continue;
3691 } /* if (begtoken) */
3692 } /* if must look at token */
3695 /* Detect end of line, colon, comma, semicolon and various braces
3696 after having handled a token.*/
3697 switch (c)
3699 case ':':
3700 if (inattribute)
3701 break;
3702 if (yacc_rules && token.offset == 0 && token.valid)
3704 make_C_tag (FALSE); /* a yacc function */
3705 break;
3707 if (definedef != dnone)
3708 break;
3709 switch (objdef)
3711 case otagseen:
3712 objdef = oignore;
3713 make_C_tag (TRUE); /* an Objective C class */
3714 break;
3715 case omethodtag:
3716 case omethodparm:
3717 objdef = omethodcolon;
3718 linebuffer_setlen (&token_name, token_name.len + 1);
3719 strcat (token_name.buffer, ":");
3720 break;
3722 if (structdef == stagseen)
3724 structdef = scolonseen;
3725 break;
3727 /* Should be useless, but may be work as a safety net. */
3728 if (cplpl && fvdef == flistseen)
3730 make_C_tag (TRUE); /* a function */
3731 fvdef = fignore;
3732 break;
3734 break;
3735 case ';':
3736 if (definedef != dnone || inattribute)
3737 break;
3738 switch (typdef)
3740 case tend:
3741 case ttypeseen:
3742 make_C_tag (FALSE); /* a typedef */
3743 typdef = tnone;
3744 fvdef = fvnone;
3745 break;
3746 case tnone:
3747 case tinbody:
3748 case tignore:
3749 switch (fvdef)
3751 case fignore:
3752 if (typdef == tignore || cplpl)
3753 fvdef = fvnone;
3754 break;
3755 case fvnameseen:
3756 if ((globals && bracelev == 0 && (!fvextern || declarations))
3757 || (members && instruct))
3758 make_C_tag (FALSE); /* a variable */
3759 fvextern = FALSE;
3760 fvdef = fvnone;
3761 token.valid = FALSE;
3762 break;
3763 case flistseen:
3764 if ((declarations
3765 && (cplpl || !instruct)
3766 && (typdef == tnone || (typdef != tignore && instruct)))
3767 || (members
3768 && plainc && instruct))
3769 make_C_tag (TRUE); /* a function */
3770 /* FALLTHRU */
3771 default:
3772 fvextern = FALSE;
3773 fvdef = fvnone;
3774 if (declarations
3775 && cplpl && structdef == stagseen)
3776 make_C_tag (FALSE); /* forward declaration */
3777 else
3778 token.valid = FALSE;
3779 } /* switch (fvdef) */
3780 /* FALLTHRU */
3781 default:
3782 if (!instruct)
3783 typdef = tnone;
3785 if (structdef == stagseen)
3786 structdef = snone;
3787 break;
3788 case ',':
3789 if (definedef != dnone || inattribute)
3790 break;
3791 switch (objdef)
3793 case omethodtag:
3794 case omethodparm:
3795 make_C_tag (TRUE); /* an Objective C method */
3796 objdef = oinbody;
3797 break;
3799 switch (fvdef)
3801 case fdefunkey:
3802 case foperator:
3803 case fstartlist:
3804 case finlist:
3805 case fignore:
3806 case vignore:
3807 break;
3808 case fdefunname:
3809 fvdef = fignore;
3810 break;
3811 case fvnameseen:
3812 if (parlev == 0
3813 && ((globals
3814 && bracelev == 0
3815 && templatelev == 0
3816 && (!fvextern || declarations))
3817 || (members && instruct)))
3818 make_C_tag (FALSE); /* a variable */
3819 break;
3820 case flistseen:
3821 if ((declarations && typdef == tnone && !instruct)
3822 || (members && typdef != tignore && instruct))
3824 make_C_tag (TRUE); /* a function */
3825 fvdef = fvnameseen;
3827 else if (!declarations)
3828 fvdef = fvnone;
3829 token.valid = FALSE;
3830 break;
3831 default:
3832 fvdef = fvnone;
3834 if (structdef == stagseen)
3835 structdef = snone;
3836 break;
3837 case ']':
3838 if (definedef != dnone || inattribute)
3839 break;
3840 if (structdef == stagseen)
3841 structdef = snone;
3842 switch (typdef)
3844 case ttypeseen:
3845 case tend:
3846 typdef = tignore;
3847 make_C_tag (FALSE); /* a typedef */
3848 break;
3849 case tnone:
3850 case tinbody:
3851 switch (fvdef)
3853 case foperator:
3854 case finlist:
3855 case fignore:
3856 case vignore:
3857 break;
3858 case fvnameseen:
3859 if ((members && bracelev == 1)
3860 || (globals && bracelev == 0
3861 && (!fvextern || declarations)))
3862 make_C_tag (FALSE); /* a variable */
3863 /* FALLTHRU */
3864 default:
3865 fvdef = fvnone;
3867 break;
3869 break;
3870 case '(':
3871 if (inattribute)
3873 attrparlev++;
3874 break;
3876 if (definedef != dnone)
3877 break;
3878 if (objdef == otagseen && parlev == 0)
3879 objdef = oparenseen;
3880 switch (fvdef)
3882 case fvnameseen:
3883 if (typdef == ttypeseen
3884 && *lp != '*'
3885 && !instruct)
3887 /* This handles constructs like:
3888 typedef void OperatorFun (int fun); */
3889 make_C_tag (FALSE);
3890 typdef = tignore;
3891 fvdef = fignore;
3892 break;
3894 /* FALLTHRU */
3895 case foperator:
3896 fvdef = fstartlist;
3897 break;
3898 case flistseen:
3899 fvdef = finlist;
3900 break;
3902 parlev++;
3903 break;
3904 case ')':
3905 if (inattribute)
3907 if (--attrparlev == 0)
3908 inattribute = FALSE;
3909 break;
3911 if (definedef != dnone)
3912 break;
3913 if (objdef == ocatseen && parlev == 1)
3915 make_C_tag (TRUE); /* an Objective C category */
3916 objdef = oignore;
3918 if (--parlev == 0)
3920 switch (fvdef)
3922 case fstartlist:
3923 case finlist:
3924 fvdef = flistseen;
3925 break;
3927 if (!instruct
3928 && (typdef == tend
3929 || typdef == ttypeseen))
3931 typdef = tignore;
3932 make_C_tag (FALSE); /* a typedef */
3935 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3936 parlev = 0;
3937 break;
3938 case '{':
3939 if (definedef != dnone)
3940 break;
3941 if (typdef == ttypeseen)
3943 /* Whenever typdef is set to tinbody (currently only
3944 here), typdefbracelev should be set to bracelev. */
3945 typdef = tinbody;
3946 typdefbracelev = bracelev;
3948 switch (fvdef)
3950 case flistseen:
3951 make_C_tag (TRUE); /* a function */
3952 /* FALLTHRU */
3953 case fignore:
3954 fvdef = fvnone;
3955 break;
3956 case fvnone:
3957 switch (objdef)
3959 case otagseen:
3960 make_C_tag (TRUE); /* an Objective C class */
3961 objdef = oignore;
3962 break;
3963 case omethodtag:
3964 case omethodparm:
3965 make_C_tag (TRUE); /* an Objective C method */
3966 objdef = oinbody;
3967 break;
3968 default:
3969 /* Neutralize `extern "C" {' grot. */
3970 if (bracelev == 0 && structdef == snone && nestlev == 0
3971 && typdef == tnone)
3972 bracelev = -1;
3974 break;
3976 switch (structdef)
3978 case skeyseen: /* unnamed struct */
3979 pushclass_above (bracelev, NULL, 0);
3980 structdef = snone;
3981 break;
3982 case stagseen: /* named struct or enum */
3983 case scolonseen: /* a class */
3984 pushclass_above (bracelev,token.line+token.offset, token.length);
3985 structdef = snone;
3986 make_C_tag (FALSE); /* a struct or enum */
3987 break;
3989 bracelev += 1;
3990 break;
3991 case '*':
3992 if (definedef != dnone)
3993 break;
3994 if (fvdef == fstartlist)
3996 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3997 token.valid = FALSE;
3999 break;
4000 case '}':
4001 if (definedef != dnone)
4002 break;
4003 bracelev -= 1;
4004 if (!ignoreindent && lp == newlb.buffer + 1)
4006 if (bracelev != 0)
4007 token.valid = FALSE; /* unexpected value, token unreliable */
4008 bracelev = 0; /* reset brace level if first column */
4009 parlev = 0; /* also reset paren level, just in case... */
4011 else if (bracelev < 0)
4013 token.valid = FALSE; /* something gone amiss, token unreliable */
4014 bracelev = 0;
4016 if (bracelev == 0 && fvdef == vignore)
4017 fvdef = fvnone; /* end of function */
4018 popclass_above (bracelev);
4019 structdef = snone;
4020 /* Only if typdef == tinbody is typdefbracelev significant. */
4021 if (typdef == tinbody && bracelev <= typdefbracelev)
4023 assert (bracelev == typdefbracelev);
4024 typdef = tend;
4026 break;
4027 case '=':
4028 if (definedef != dnone)
4029 break;
4030 switch (fvdef)
4032 case foperator:
4033 case finlist:
4034 case fignore:
4035 case vignore:
4036 break;
4037 case fvnameseen:
4038 if ((members && bracelev == 1)
4039 || (globals && bracelev == 0 && (!fvextern || declarations)))
4040 make_C_tag (FALSE); /* a variable */
4041 /* FALLTHRU */
4042 default:
4043 fvdef = vignore;
4045 break;
4046 case '<':
4047 if (cplpl
4048 && (structdef == stagseen || fvdef == fvnameseen))
4050 templatelev++;
4051 break;
4053 goto resetfvdef;
4054 case '>':
4055 if (templatelev > 0)
4057 templatelev--;
4058 break;
4060 goto resetfvdef;
4061 case '+':
4062 case '-':
4063 if (objdef == oinbody && bracelev == 0)
4065 objdef = omethodsign;
4066 break;
4068 /* FALLTHRU */
4069 resetfvdef:
4070 case '#': case '~': case '&': case '%': case '/':
4071 case '|': case '^': case '!': case '.': case '?':
4072 if (definedef != dnone)
4073 break;
4074 /* These surely cannot follow a function tag in C. */
4075 switch (fvdef)
4077 case foperator:
4078 case finlist:
4079 case fignore:
4080 case vignore:
4081 break;
4082 default:
4083 fvdef = fvnone;
4085 break;
4086 case '\0':
4087 if (objdef == otagseen)
4089 make_C_tag (TRUE); /* an Objective C class */
4090 objdef = oignore;
4092 /* If a macro spans multiple lines don't reset its state. */
4093 if (quotednl)
4094 CNL_SAVE_DEFINEDEF ();
4095 else
4096 CNL ();
4097 break;
4098 } /* switch (c) */
4100 } /* while not eof */
4102 free (lbs[0].lb.buffer);
4103 free (lbs[1].lb.buffer);
4107 * Process either a C++ file or a C file depending on the setting
4108 * of a global flag.
4110 static void
4111 default_C_entries (inf)
4112 FILE *inf;
4114 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4117 /* Always do plain C. */
4118 static void
4119 plain_C_entries (inf)
4120 FILE *inf;
4122 C_entries (0, inf);
4125 /* Always do C++. */
4126 static void
4127 Cplusplus_entries (inf)
4128 FILE *inf;
4130 C_entries (C_PLPL, inf);
4133 /* Always do Java. */
4134 static void
4135 Cjava_entries (inf)
4136 FILE *inf;
4138 C_entries (C_JAVA, inf);
4141 /* Always do C*. */
4142 static void
4143 Cstar_entries (inf)
4144 FILE *inf;
4146 C_entries (C_STAR, inf);
4149 /* Always do Yacc. */
4150 static void
4151 Yacc_entries (inf)
4152 FILE *inf;
4154 C_entries (YACC, inf);
4158 /* Useful macros. */
4159 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4160 for (; /* loop initialization */ \
4161 !feof (file_pointer) /* loop test */ \
4162 && /* instructions at start of loop */ \
4163 (readline (&line_buffer, file_pointer), \
4164 char_pointer = line_buffer.buffer, \
4165 TRUE); \
4168 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4169 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4170 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4171 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4172 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4174 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4175 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4176 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4177 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4178 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4181 * Read a file, but do no processing. This is used to do regexp
4182 * matching on files that have no language defined.
4184 static void
4185 just_read_file (inf)
4186 FILE *inf;
4188 register char *dummy;
4190 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4191 continue;
4195 /* Fortran parsing */
4197 static void F_takeprec __P((void));
4198 static void F_getit __P((FILE *));
4200 static void
4201 F_takeprec ()
4203 dbp = skip_spaces (dbp);
4204 if (*dbp != '*')
4205 return;
4206 dbp++;
4207 dbp = skip_spaces (dbp);
4208 if (strneq (dbp, "(*)", 3))
4210 dbp += 3;
4211 return;
4213 if (!ISDIGIT (*dbp))
4215 --dbp; /* force failure */
4216 return;
4219 dbp++;
4220 while (ISDIGIT (*dbp));
4223 static void
4224 F_getit (inf)
4225 FILE *inf;
4227 register char *cp;
4229 dbp = skip_spaces (dbp);
4230 if (*dbp == '\0')
4232 readline (&lb, inf);
4233 dbp = lb.buffer;
4234 if (dbp[5] != '&')
4235 return;
4236 dbp += 6;
4237 dbp = skip_spaces (dbp);
4239 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4240 return;
4241 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4242 continue;
4243 make_tag (dbp, cp-dbp, TRUE,
4244 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4248 static void
4249 Fortran_functions (inf)
4250 FILE *inf;
4252 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4254 if (*dbp == '%')
4255 dbp++; /* Ratfor escape to fortran */
4256 dbp = skip_spaces (dbp);
4257 if (*dbp == '\0')
4258 continue;
4259 switch (lowcase (*dbp))
4261 case 'i':
4262 if (nocase_tail ("integer"))
4263 F_takeprec ();
4264 break;
4265 case 'r':
4266 if (nocase_tail ("real"))
4267 F_takeprec ();
4268 break;
4269 case 'l':
4270 if (nocase_tail ("logical"))
4271 F_takeprec ();
4272 break;
4273 case 'c':
4274 if (nocase_tail ("complex") || nocase_tail ("character"))
4275 F_takeprec ();
4276 break;
4277 case 'd':
4278 if (nocase_tail ("double"))
4280 dbp = skip_spaces (dbp);
4281 if (*dbp == '\0')
4282 continue;
4283 if (nocase_tail ("precision"))
4284 break;
4285 continue;
4287 break;
4289 dbp = skip_spaces (dbp);
4290 if (*dbp == '\0')
4291 continue;
4292 switch (lowcase (*dbp))
4294 case 'f':
4295 if (nocase_tail ("function"))
4296 F_getit (inf);
4297 continue;
4298 case 's':
4299 if (nocase_tail ("subroutine"))
4300 F_getit (inf);
4301 continue;
4302 case 'e':
4303 if (nocase_tail ("entry"))
4304 F_getit (inf);
4305 continue;
4306 case 'b':
4307 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4309 dbp = skip_spaces (dbp);
4310 if (*dbp == '\0') /* assume un-named */
4311 make_tag ("blockdata", 9, TRUE,
4312 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4313 else
4314 F_getit (inf); /* look for name */
4316 continue;
4323 * Ada parsing
4324 * Original code by
4325 * Philippe Waroquiers (1998)
4328 static void Ada_getit __P((FILE *, char *));
4330 /* Once we are positioned after an "interesting" keyword, let's get
4331 the real tag value necessary. */
4332 static void
4333 Ada_getit (inf, name_qualifier)
4334 FILE *inf;
4335 char *name_qualifier;
4337 register char *cp;
4338 char *name;
4339 char c;
4341 while (!feof (inf))
4343 dbp = skip_spaces (dbp);
4344 if (*dbp == '\0'
4345 || (dbp[0] == '-' && dbp[1] == '-'))
4347 readline (&lb, inf);
4348 dbp = lb.buffer;
4350 switch (lowcase(*dbp))
4352 case 'b':
4353 if (nocase_tail ("body"))
4355 /* Skipping body of procedure body or package body or ....
4356 resetting qualifier to body instead of spec. */
4357 name_qualifier = "/b";
4358 continue;
4360 break;
4361 case 't':
4362 /* Skipping type of task type or protected type ... */
4363 if (nocase_tail ("type"))
4364 continue;
4365 break;
4367 if (*dbp == '"')
4369 dbp += 1;
4370 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4371 continue;
4373 else
4375 dbp = skip_spaces (dbp);
4376 for (cp = dbp;
4377 (*cp != '\0'
4378 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4379 cp++)
4380 continue;
4381 if (cp == dbp)
4382 return;
4384 c = *cp;
4385 *cp = '\0';
4386 name = concat (dbp, name_qualifier, "");
4387 *cp = c;
4388 make_tag (name, strlen (name), TRUE,
4389 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4390 free (name);
4391 if (c == '"')
4392 dbp = cp + 1;
4393 return;
4397 static void
4398 Ada_funcs (inf)
4399 FILE *inf;
4401 bool inquote = FALSE;
4402 bool skip_till_semicolumn = FALSE;
4404 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4406 while (*dbp != '\0')
4408 /* Skip a string i.e. "abcd". */
4409 if (inquote || (*dbp == '"'))
4411 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4412 if (dbp != NULL)
4414 inquote = FALSE;
4415 dbp += 1;
4416 continue; /* advance char */
4418 else
4420 inquote = TRUE;
4421 break; /* advance line */
4425 /* Skip comments. */
4426 if (dbp[0] == '-' && dbp[1] == '-')
4427 break; /* advance line */
4429 /* Skip character enclosed in single quote i.e. 'a'
4430 and skip single quote starting an attribute i.e. 'Image. */
4431 if (*dbp == '\'')
4433 dbp++ ;
4434 if (*dbp != '\0')
4435 dbp++;
4436 continue;
4439 if (skip_till_semicolumn)
4441 if (*dbp == ';')
4442 skip_till_semicolumn = FALSE;
4443 dbp++;
4444 continue; /* advance char */
4447 /* Search for beginning of a token. */
4448 if (!begtoken (*dbp))
4450 dbp++;
4451 continue; /* advance char */
4454 /* We are at the beginning of a token. */
4455 switch (lowcase(*dbp))
4457 case 'f':
4458 if (!packages_only && nocase_tail ("function"))
4459 Ada_getit (inf, "/f");
4460 else
4461 break; /* from switch */
4462 continue; /* advance char */
4463 case 'p':
4464 if (!packages_only && nocase_tail ("procedure"))
4465 Ada_getit (inf, "/p");
4466 else if (nocase_tail ("package"))
4467 Ada_getit (inf, "/s");
4468 else if (nocase_tail ("protected")) /* protected type */
4469 Ada_getit (inf, "/t");
4470 else
4471 break; /* from switch */
4472 continue; /* advance char */
4474 case 'u':
4475 if (typedefs && !packages_only && nocase_tail ("use"))
4477 /* when tagging types, avoid tagging use type Pack.Typename;
4478 for this, we will skip everything till a ; */
4479 skip_till_semicolumn = TRUE;
4480 continue; /* advance char */
4483 case 't':
4484 if (!packages_only && nocase_tail ("task"))
4485 Ada_getit (inf, "/k");
4486 else if (typedefs && !packages_only && nocase_tail ("type"))
4488 Ada_getit (inf, "/t");
4489 while (*dbp != '\0')
4490 dbp += 1;
4492 else
4493 break; /* from switch */
4494 continue; /* advance char */
4497 /* Look for the end of the token. */
4498 while (!endtoken (*dbp))
4499 dbp++;
4501 } /* advance char */
4502 } /* advance line */
4507 * Unix and microcontroller assembly tag handling
4508 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4509 * Idea by Bob Weiner, Motorola Inc. (1994)
4511 static void
4512 Asm_labels (inf)
4513 FILE *inf;
4515 register char *cp;
4517 LOOP_ON_INPUT_LINES (inf, lb, cp)
4519 /* If first char is alphabetic or one of [_.$], test for colon
4520 following identifier. */
4521 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4523 /* Read past label. */
4524 cp++;
4525 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4526 cp++;
4527 if (*cp == ':' || iswhite (*cp))
4528 /* Found end of label, so copy it and add it to the table. */
4529 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4530 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4537 * Perl support
4538 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4539 * Perl variable names: /^(my|local).../
4540 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4541 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4542 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4544 static void
4545 Perl_functions (inf)
4546 FILE *inf;
4548 char *package = savestr ("main"); /* current package name */
4549 register char *cp;
4551 LOOP_ON_INPUT_LINES (inf, lb, cp)
4553 cp = skip_spaces (cp);
4555 if (LOOKING_AT (cp, "package"))
4557 free (package);
4558 get_tag (cp, &package);
4560 else if (LOOKING_AT (cp, "sub"))
4562 char *pos;
4563 char *sp = cp;
4565 while (!notinname (*cp))
4566 cp++;
4567 if (cp == sp)
4568 continue; /* nothing found */
4569 if ((pos = etags_strchr (sp, ':')) != NULL
4570 && pos < cp && pos[1] == ':')
4571 /* The name is already qualified. */
4572 make_tag (sp, cp - sp, TRUE,
4573 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4574 else
4575 /* Qualify it. */
4577 char savechar, *name;
4579 savechar = *cp;
4580 *cp = '\0';
4581 name = concat (package, "::", sp);
4582 *cp = savechar;
4583 make_tag (name, strlen(name), TRUE,
4584 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4585 free (name);
4588 else if (globals) /* only if we are tagging global vars */
4590 /* Skip a qualifier, if any. */
4591 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4592 /* After "my" or "local", but before any following paren or space. */
4593 char *varstart = cp;
4595 if (qual /* should this be removed? If yes, how? */
4596 && (*cp == '$' || *cp == '@' || *cp == '%'))
4598 varstart += 1;
4600 cp++;
4601 while (ISALNUM (*cp) || *cp == '_');
4603 else if (qual)
4605 /* Should be examining a variable list at this point;
4606 could insist on seeing an open parenthesis. */
4607 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4608 cp++;
4610 else
4611 continue;
4613 make_tag (varstart, cp - varstart, FALSE,
4614 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4617 free (package);
4622 * Python support
4623 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4624 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4625 * More ideas by seb bacon <seb@jamkit.com> (2002)
4627 static void
4628 Python_functions (inf)
4629 FILE *inf;
4631 register char *cp;
4633 LOOP_ON_INPUT_LINES (inf, lb, cp)
4635 cp = skip_spaces (cp);
4636 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4638 char *name = cp;
4639 while (!notinname (*cp) && *cp != ':')
4640 cp++;
4641 make_tag (name, cp - name, TRUE,
4642 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4649 * PHP support
4650 * Look for:
4651 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4652 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4653 * - /^[ \t]*define\(\"[^\"]+/
4654 * Only with --members:
4655 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4656 * Idea by Diez B. Roggisch (2001)
4658 static void
4659 PHP_functions (inf)
4660 FILE *inf;
4662 register char *cp, *name;
4663 bool search_identifier = FALSE;
4665 LOOP_ON_INPUT_LINES (inf, lb, cp)
4667 cp = skip_spaces (cp);
4668 name = cp;
4669 if (search_identifier
4670 && *cp != '\0')
4672 while (!notinname (*cp))
4673 cp++;
4674 make_tag (name, cp - name, TRUE,
4675 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4676 search_identifier = FALSE;
4678 else if (LOOKING_AT (cp, "function"))
4680 if(*cp == '&')
4681 cp = skip_spaces (cp+1);
4682 if(*cp != '\0')
4684 name = cp;
4685 while (!notinname (*cp))
4686 cp++;
4687 make_tag (name, cp - name, TRUE,
4688 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4690 else
4691 search_identifier = TRUE;
4693 else if (LOOKING_AT (cp, "class"))
4695 if (*cp != '\0')
4697 name = cp;
4698 while (*cp != '\0' && !iswhite (*cp))
4699 cp++;
4700 make_tag (name, cp - name, FALSE,
4701 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4703 else
4704 search_identifier = TRUE;
4706 else if (strneq (cp, "define", 6)
4707 && (cp = skip_spaces (cp+6))
4708 && *cp++ == '('
4709 && (*cp == '"' || *cp == '\''))
4711 char quote = *cp++;
4712 name = cp;
4713 while (*cp != quote && *cp != '\0')
4714 cp++;
4715 make_tag (name, cp - name, FALSE,
4716 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4718 else if (members
4719 && LOOKING_AT (cp, "var")
4720 && *cp == '$')
4722 name = cp;
4723 while (!notinname(*cp))
4724 cp++;
4725 make_tag (name, cp - name, FALSE,
4726 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4733 * Cobol tag functions
4734 * We could look for anything that could be a paragraph name.
4735 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4736 * Idea by Corny de Souza (1993)
4738 static void
4739 Cobol_paragraphs (inf)
4740 FILE *inf;
4742 register char *bp, *ep;
4744 LOOP_ON_INPUT_LINES (inf, lb, bp)
4746 if (lb.len < 9)
4747 continue;
4748 bp += 8;
4750 /* If eoln, compiler option or comment ignore whole line. */
4751 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4752 continue;
4754 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4755 continue;
4756 if (*ep++ == '.')
4757 make_tag (bp, ep - bp, TRUE,
4758 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4764 * Makefile support
4765 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4767 static void
4768 Makefile_targets (inf)
4769 FILE *inf;
4771 register char *bp;
4773 LOOP_ON_INPUT_LINES (inf, lb, bp)
4775 if (*bp == '\t' || *bp == '#')
4776 continue;
4777 while (*bp != '\0' && *bp != '=' && *bp != ':')
4778 bp++;
4779 if (*bp == ':' || (globals && *bp == '='))
4781 /* We should detect if there is more than one tag, but we do not.
4782 We just skip initial and final spaces. */
4783 char * namestart = skip_spaces (lb.buffer);
4784 while (--bp > namestart)
4785 if (!notinname (*bp))
4786 break;
4787 make_tag (namestart, bp - namestart + 1, TRUE,
4788 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4795 * Pascal parsing
4796 * Original code by Mosur K. Mohan (1989)
4798 * Locates tags for procedures & functions. Doesn't do any type- or
4799 * var-definitions. It does look for the keyword "extern" or
4800 * "forward" immediately following the procedure statement; if found,
4801 * the tag is skipped.
4803 static void
4804 Pascal_functions (inf)
4805 FILE *inf;
4807 linebuffer tline; /* mostly copied from C_entries */
4808 long save_lcno;
4809 int save_lineno, namelen, taglen;
4810 char c, *name;
4812 bool /* each of these flags is TRUE if: */
4813 incomment, /* point is inside a comment */
4814 inquote, /* point is inside '..' string */
4815 get_tagname, /* point is after PROCEDURE/FUNCTION
4816 keyword, so next item = potential tag */
4817 found_tag, /* point is after a potential tag */
4818 inparms, /* point is within parameter-list */
4819 verify_tag; /* point has passed the parm-list, so the
4820 next token will determine whether this
4821 is a FORWARD/EXTERN to be ignored, or
4822 whether it is a real tag */
4824 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4825 name = NULL; /* keep compiler quiet */
4826 dbp = lb.buffer;
4827 *dbp = '\0';
4828 linebuffer_init (&tline);
4830 incomment = inquote = FALSE;
4831 found_tag = FALSE; /* have a proc name; check if extern */
4832 get_tagname = FALSE; /* found "procedure" keyword */
4833 inparms = FALSE; /* found '(' after "proc" */
4834 verify_tag = FALSE; /* check if "extern" is ahead */
4837 while (!feof (inf)) /* long main loop to get next char */
4839 c = *dbp++;
4840 if (c == '\0') /* if end of line */
4842 readline (&lb, inf);
4843 dbp = lb.buffer;
4844 if (*dbp == '\0')
4845 continue;
4846 if (!((found_tag && verify_tag)
4847 || get_tagname))
4848 c = *dbp++; /* only if don't need *dbp pointing
4849 to the beginning of the name of
4850 the procedure or function */
4852 if (incomment)
4854 if (c == '}') /* within { } comments */
4855 incomment = FALSE;
4856 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4858 dbp++;
4859 incomment = FALSE;
4861 continue;
4863 else if (inquote)
4865 if (c == '\'')
4866 inquote = FALSE;
4867 continue;
4869 else
4870 switch (c)
4872 case '\'':
4873 inquote = TRUE; /* found first quote */
4874 continue;
4875 case '{': /* found open { comment */
4876 incomment = TRUE;
4877 continue;
4878 case '(':
4879 if (*dbp == '*') /* found open (* comment */
4881 incomment = TRUE;
4882 dbp++;
4884 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4885 inparms = TRUE;
4886 continue;
4887 case ')': /* end of parms list */
4888 if (inparms)
4889 inparms = FALSE;
4890 continue;
4891 case ';':
4892 if (found_tag && !inparms) /* end of proc or fn stmt */
4894 verify_tag = TRUE;
4895 break;
4897 continue;
4899 if (found_tag && verify_tag && (*dbp != ' '))
4901 /* Check if this is an "extern" declaration. */
4902 if (*dbp == '\0')
4903 continue;
4904 if (lowcase (*dbp == 'e'))
4906 if (nocase_tail ("extern")) /* superfluous, really! */
4908 found_tag = FALSE;
4909 verify_tag = FALSE;
4912 else if (lowcase (*dbp) == 'f')
4914 if (nocase_tail ("forward")) /* check for forward reference */
4916 found_tag = FALSE;
4917 verify_tag = FALSE;
4920 if (found_tag && verify_tag) /* not external proc, so make tag */
4922 found_tag = FALSE;
4923 verify_tag = FALSE;
4924 make_tag (name, namelen, TRUE,
4925 tline.buffer, taglen, save_lineno, save_lcno);
4926 continue;
4929 if (get_tagname) /* grab name of proc or fn */
4931 char *cp;
4933 if (*dbp == '\0')
4934 continue;
4936 /* Find block name. */
4937 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4938 continue;
4940 /* Save all values for later tagging. */
4941 linebuffer_setlen (&tline, lb.len);
4942 strcpy (tline.buffer, lb.buffer);
4943 save_lineno = lineno;
4944 save_lcno = linecharno;
4945 name = tline.buffer + (dbp - lb.buffer);
4946 namelen = cp - dbp;
4947 taglen = cp - lb.buffer + 1;
4949 dbp = cp; /* set dbp to e-o-token */
4950 get_tagname = FALSE;
4951 found_tag = TRUE;
4952 continue;
4954 /* And proceed to check for "extern". */
4956 else if (!incomment && !inquote && !found_tag)
4958 /* Check for proc/fn keywords. */
4959 switch (lowcase (c))
4961 case 'p':
4962 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4963 get_tagname = TRUE;
4964 continue;
4965 case 'f':
4966 if (nocase_tail ("unction"))
4967 get_tagname = TRUE;
4968 continue;
4971 } /* while not eof */
4973 free (tline.buffer);
4978 * Lisp tag functions
4979 * look for (def or (DEF, quote or QUOTE
4982 static void L_getit __P((void));
4984 static void
4985 L_getit ()
4987 if (*dbp == '\'') /* Skip prefix quote */
4988 dbp++;
4989 else if (*dbp == '(')
4991 dbp++;
4992 /* Try to skip "(quote " */
4993 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4994 /* Ok, then skip "(" before name in (defstruct (foo)) */
4995 dbp = skip_spaces (dbp);
4997 get_tag (dbp, NULL);
5000 static void
5001 Lisp_functions (inf)
5002 FILE *inf;
5004 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5006 if (dbp[0] != '(')
5007 continue;
5009 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5011 dbp = skip_non_spaces (dbp);
5012 dbp = skip_spaces (dbp);
5013 L_getit ();
5015 else
5017 /* Check for (foo::defmumble name-defined ... */
5019 dbp++;
5020 while (!notinname (*dbp) && *dbp != ':');
5021 if (*dbp == ':')
5024 dbp++;
5025 while (*dbp == ':');
5027 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5029 dbp = skip_non_spaces (dbp);
5030 dbp = skip_spaces (dbp);
5031 L_getit ();
5040 * Lua script language parsing
5041 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5043 * "function" and "local function" are tags if they start at column 1.
5045 static void
5046 Lua_functions (inf)
5047 FILE *inf;
5049 register char *bp;
5051 LOOP_ON_INPUT_LINES (inf, lb, bp)
5053 if (bp[0] != 'f' && bp[0] != 'l')
5054 continue;
5056 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5058 if (LOOKING_AT (bp, "function"))
5059 get_tag (bp, NULL);
5065 * Postscript tags
5066 * Just look for lines where the first character is '/'
5067 * Also look at "defineps" for PSWrap
5068 * Ideas by:
5069 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5070 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5072 static void
5073 PS_functions (inf)
5074 FILE *inf;
5076 register char *bp, *ep;
5078 LOOP_ON_INPUT_LINES (inf, lb, bp)
5080 if (bp[0] == '/')
5082 for (ep = bp+1;
5083 *ep != '\0' && *ep != ' ' && *ep != '{';
5084 ep++)
5085 continue;
5086 make_tag (bp, ep - bp, TRUE,
5087 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5089 else if (LOOKING_AT (bp, "defineps"))
5090 get_tag (bp, NULL);
5096 * Forth tags
5097 * Ignore anything after \ followed by space or in ( )
5098 * Look for words defined by :
5099 * Look for constant, code, create, defer, value, and variable
5100 * OBP extensions: Look for buffer:, field,
5101 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5103 static void
5104 Forth_words (inf)
5105 FILE *inf;
5107 register char *bp;
5109 LOOP_ON_INPUT_LINES (inf, lb, bp)
5110 while ((bp = skip_spaces (bp))[0] != '\0')
5111 if (bp[0] == '\\' && iswhite(bp[1]))
5112 break; /* read next line */
5113 else if (bp[0] == '(' && iswhite(bp[1]))
5114 do /* skip to ) or eol */
5115 bp++;
5116 while (*bp != ')' && *bp != '\0');
5117 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5118 || LOOKING_AT_NOCASE (bp, "constant")
5119 || LOOKING_AT_NOCASE (bp, "code")
5120 || LOOKING_AT_NOCASE (bp, "create")
5121 || LOOKING_AT_NOCASE (bp, "defer")
5122 || LOOKING_AT_NOCASE (bp, "value")
5123 || LOOKING_AT_NOCASE (bp, "variable")
5124 || LOOKING_AT_NOCASE (bp, "buffer:")
5125 || LOOKING_AT_NOCASE (bp, "field"))
5126 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5127 else
5128 bp = skip_non_spaces (bp);
5133 * Scheme tag functions
5134 * look for (def... xyzzy
5135 * (def... (xyzzy
5136 * (def ... ((...(xyzzy ....
5137 * (set! xyzzy
5138 * Original code by Ken Haase (1985?)
5140 static void
5141 Scheme_functions (inf)
5142 FILE *inf;
5144 register char *bp;
5146 LOOP_ON_INPUT_LINES (inf, lb, bp)
5148 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5150 bp = skip_non_spaces (bp+4);
5151 /* Skip over open parens and white space */
5152 while (notinname (*bp))
5153 bp++;
5154 get_tag (bp, NULL);
5156 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5157 get_tag (bp, NULL);
5162 /* Find tags in TeX and LaTeX input files. */
5164 /* TEX_toktab is a table of TeX control sequences that define tags.
5165 * Each entry records one such control sequence.
5167 * Original code from who knows whom.
5168 * Ideas by:
5169 * Stefan Monnier (2002)
5172 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5174 /* Default set of control sequences to put into TEX_toktab.
5175 The value of environment var TEXTAGS is prepended to this. */
5176 static char *TEX_defenv = "\
5177 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5178 :part:appendix:entry:index:def\
5179 :newcommand:renewcommand:newenvironment:renewenvironment";
5181 static void TEX_mode __P((FILE *));
5182 static void TEX_decode_env __P((char *, char *));
5184 static char TEX_esc = '\\';
5185 static char TEX_opgrp = '{';
5186 static char TEX_clgrp = '}';
5189 * TeX/LaTeX scanning loop.
5191 static void
5192 TeX_commands (inf)
5193 FILE *inf;
5195 char *cp;
5196 linebuffer *key;
5198 /* Select either \ or ! as escape character. */
5199 TEX_mode (inf);
5201 /* Initialize token table once from environment. */
5202 if (TEX_toktab == NULL)
5203 TEX_decode_env ("TEXTAGS", TEX_defenv);
5205 LOOP_ON_INPUT_LINES (inf, lb, cp)
5207 /* Look at each TEX keyword in line. */
5208 for (;;)
5210 /* Look for a TEX escape. */
5211 while (*cp++ != TEX_esc)
5212 if (cp[-1] == '\0' || cp[-1] == '%')
5213 goto tex_next_line;
5215 for (key = TEX_toktab; key->buffer != NULL; key++)
5216 if (strneq (cp, key->buffer, key->len))
5218 register char *p;
5219 int namelen, linelen;
5220 bool opgrp = FALSE;
5222 cp = skip_spaces (cp + key->len);
5223 if (*cp == TEX_opgrp)
5225 opgrp = TRUE;
5226 cp++;
5228 for (p = cp;
5229 (!iswhite (*p) && *p != '#' &&
5230 *p != TEX_opgrp && *p != TEX_clgrp);
5231 p++)
5232 continue;
5233 namelen = p - cp;
5234 linelen = lb.len;
5235 if (!opgrp || *p == TEX_clgrp)
5237 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5238 p++;
5239 linelen = p - lb.buffer + 1;
5241 make_tag (cp, namelen, TRUE,
5242 lb.buffer, linelen, lineno, linecharno);
5243 goto tex_next_line; /* We only tag a line once */
5246 tex_next_line:
5251 #define TEX_LESC '\\'
5252 #define TEX_SESC '!'
5254 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5255 chars accordingly. */
5256 static void
5257 TEX_mode (inf)
5258 FILE *inf;
5260 int c;
5262 while ((c = getc (inf)) != EOF)
5264 /* Skip to next line if we hit the TeX comment char. */
5265 if (c == '%')
5266 while (c != '\n' && c != EOF)
5267 c = getc (inf);
5268 else if (c == TEX_LESC || c == TEX_SESC )
5269 break;
5272 if (c == TEX_LESC)
5274 TEX_esc = TEX_LESC;
5275 TEX_opgrp = '{';
5276 TEX_clgrp = '}';
5278 else
5280 TEX_esc = TEX_SESC;
5281 TEX_opgrp = '<';
5282 TEX_clgrp = '>';
5284 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5285 No attempt is made to correct the situation. */
5286 rewind (inf);
5289 /* Read environment and prepend it to the default string.
5290 Build token table. */
5291 static void
5292 TEX_decode_env (evarname, defenv)
5293 char *evarname;
5294 char *defenv;
5296 register char *env, *p;
5297 int i, len;
5299 /* Append default string to environment. */
5300 env = getenv (evarname);
5301 if (!env)
5302 env = defenv;
5303 else
5305 char *oldenv = env;
5306 env = concat (oldenv, defenv, "");
5309 /* Allocate a token table */
5310 for (len = 1, p = env; p;)
5311 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5312 len++;
5313 TEX_toktab = xnew (len, linebuffer);
5315 /* Unpack environment string into token table. Be careful about */
5316 /* zero-length strings (leading ':', "::" and trailing ':') */
5317 for (i = 0; *env != '\0';)
5319 p = etags_strchr (env, ':');
5320 if (!p) /* End of environment string. */
5321 p = env + strlen (env);
5322 if (p - env > 0)
5323 { /* Only non-zero strings. */
5324 TEX_toktab[i].buffer = savenstr (env, p - env);
5325 TEX_toktab[i].len = p - env;
5326 i++;
5328 if (*p)
5329 env = p + 1;
5330 else
5332 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5333 TEX_toktab[i].len = 0;
5334 break;
5340 /* Texinfo support. Dave Love, Mar. 2000. */
5341 static void
5342 Texinfo_nodes (inf)
5343 FILE * inf;
5345 char *cp, *start;
5346 LOOP_ON_INPUT_LINES (inf, lb, cp)
5347 if (LOOKING_AT (cp, "@node"))
5349 start = cp;
5350 while (*cp != '\0' && *cp != ',')
5351 cp++;
5352 make_tag (start, cp - start, TRUE,
5353 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5359 * HTML support.
5360 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5361 * Contents of <a name=xxx> are tags with name xxx.
5363 * Francesco Potortì, 2002.
5365 static void
5366 HTML_labels (inf)
5367 FILE * inf;
5369 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5370 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5371 bool intag = FALSE; /* inside an html tag, looking for ID= */
5372 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5373 char *end;
5376 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5378 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5379 for (;;) /* loop on the same line */
5381 if (skiptag) /* skip HTML tag */
5383 while (*dbp != '\0' && *dbp != '>')
5384 dbp++;
5385 if (*dbp == '>')
5387 dbp += 1;
5388 skiptag = FALSE;
5389 continue; /* look on the same line */
5391 break; /* go to next line */
5394 else if (intag) /* look for "name=" or "id=" */
5396 while (*dbp != '\0' && *dbp != '>'
5397 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5398 dbp++;
5399 if (*dbp == '\0')
5400 break; /* go to next line */
5401 if (*dbp == '>')
5403 dbp += 1;
5404 intag = FALSE;
5405 continue; /* look on the same line */
5407 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5408 || LOOKING_AT_NOCASE (dbp, "id="))
5410 bool quoted = (dbp[0] == '"');
5412 if (quoted)
5413 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5414 continue;
5415 else
5416 for (end = dbp; *end != '\0' && intoken (*end); end++)
5417 continue;
5418 linebuffer_setlen (&token_name, end - dbp);
5419 strncpy (token_name.buffer, dbp, end - dbp);
5420 token_name.buffer[end - dbp] = '\0';
5422 dbp = end;
5423 intag = FALSE; /* we found what we looked for */
5424 skiptag = TRUE; /* skip to the end of the tag */
5425 getnext = TRUE; /* then grab the text */
5426 continue; /* look on the same line */
5428 dbp += 1;
5431 else if (getnext) /* grab next tokens and tag them */
5433 dbp = skip_spaces (dbp);
5434 if (*dbp == '\0')
5435 break; /* go to next line */
5436 if (*dbp == '<')
5438 intag = TRUE;
5439 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5440 continue; /* look on the same line */
5443 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5444 continue;
5445 make_tag (token_name.buffer, token_name.len, TRUE,
5446 dbp, end - dbp, lineno, linecharno);
5447 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5448 getnext = FALSE;
5449 break; /* go to next line */
5452 else /* look for an interesting HTML tag */
5454 while (*dbp != '\0' && *dbp != '<')
5455 dbp++;
5456 if (*dbp == '\0')
5457 break; /* go to next line */
5458 intag = TRUE;
5459 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5461 inanchor = TRUE;
5462 continue; /* look on the same line */
5464 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5465 || LOOKING_AT_NOCASE (dbp, "<h1>")
5466 || LOOKING_AT_NOCASE (dbp, "<h2>")
5467 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5469 intag = FALSE;
5470 getnext = TRUE;
5471 continue; /* look on the same line */
5473 dbp += 1;
5480 * Prolog support
5482 * Assumes that the predicate or rule starts at column 0.
5483 * Only the first clause of a predicate or rule is added.
5484 * Original code by Sunichirou Sugou (1989)
5485 * Rewritten by Anders Lindgren (1996)
5487 static int prolog_pr __P((char *, char *));
5488 static void prolog_skip_comment __P((linebuffer *, FILE *));
5489 static int prolog_atom __P((char *, int));
5491 static void
5492 Prolog_functions (inf)
5493 FILE *inf;
5495 char *cp, *last;
5496 int len;
5497 int allocated;
5499 allocated = 0;
5500 len = 0;
5501 last = NULL;
5503 LOOP_ON_INPUT_LINES (inf, lb, cp)
5505 if (cp[0] == '\0') /* Empty line */
5506 continue;
5507 else if (iswhite (cp[0])) /* Not a predicate */
5508 continue;
5509 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5510 prolog_skip_comment (&lb, inf);
5511 else if ((len = prolog_pr (cp, last)) > 0)
5513 /* Predicate or rule. Store the function name so that we
5514 only generate a tag for the first clause. */
5515 if (last == NULL)
5516 last = xnew(len + 1, char);
5517 else if (len + 1 > allocated)
5518 xrnew (last, len + 1, char);
5519 allocated = len + 1;
5520 strncpy (last, cp, len);
5521 last[len] = '\0';
5524 if (last != NULL)
5525 free (last);
5529 static void
5530 prolog_skip_comment (plb, inf)
5531 linebuffer *plb;
5532 FILE *inf;
5534 char *cp;
5538 for (cp = plb->buffer; *cp != '\0'; cp++)
5539 if (cp[0] == '*' && cp[1] == '/')
5540 return;
5541 readline (plb, inf);
5543 while (!feof(inf));
5547 * A predicate or rule definition is added if it matches:
5548 * <beginning of line><Prolog Atom><whitespace>(
5549 * or <beginning of line><Prolog Atom><whitespace>:-
5551 * It is added to the tags database if it doesn't match the
5552 * name of the previous clause header.
5554 * Return the size of the name of the predicate or rule, or 0 if no
5555 * header was found.
5557 static int
5558 prolog_pr (s, last)
5559 char *s;
5560 char *last; /* Name of last clause. */
5562 int pos;
5563 int len;
5565 pos = prolog_atom (s, 0);
5566 if (pos < 1)
5567 return 0;
5569 len = pos;
5570 pos = skip_spaces (s + pos) - s;
5572 if ((s[pos] == '.'
5573 || (s[pos] == '(' && (pos += 1))
5574 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5575 && (last == NULL /* save only the first clause */
5576 || len != (int)strlen (last)
5577 || !strneq (s, last, len)))
5579 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5580 return len;
5582 else
5583 return 0;
5587 * Consume a Prolog atom.
5588 * Return the number of bytes consumed, or -1 if there was an error.
5590 * A prolog atom, in this context, could be one of:
5591 * - An alphanumeric sequence, starting with a lower case letter.
5592 * - A quoted arbitrary string. Single quotes can escape themselves.
5593 * Backslash quotes everything.
5595 static int
5596 prolog_atom (s, pos)
5597 char *s;
5598 int pos;
5600 int origpos;
5602 origpos = pos;
5604 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5606 /* The atom is unquoted. */
5607 pos++;
5608 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5610 pos++;
5612 return pos - origpos;
5614 else if (s[pos] == '\'')
5616 pos++;
5618 for (;;)
5620 if (s[pos] == '\'')
5622 pos++;
5623 if (s[pos] != '\'')
5624 break;
5625 pos++; /* A double quote */
5627 else if (s[pos] == '\0')
5628 /* Multiline quoted atoms are ignored. */
5629 return -1;
5630 else if (s[pos] == '\\')
5632 if (s[pos+1] == '\0')
5633 return -1;
5634 pos += 2;
5636 else
5637 pos++;
5639 return pos - origpos;
5641 else
5642 return -1;
5647 * Support for Erlang
5649 * Generates tags for functions, defines, and records.
5650 * Assumes that Erlang functions start at column 0.
5651 * Original code by Anders Lindgren (1996)
5653 static int erlang_func __P((char *, char *));
5654 static void erlang_attribute __P((char *));
5655 static int erlang_atom __P((char *));
5657 static void
5658 Erlang_functions (inf)
5659 FILE *inf;
5661 char *cp, *last;
5662 int len;
5663 int allocated;
5665 allocated = 0;
5666 len = 0;
5667 last = NULL;
5669 LOOP_ON_INPUT_LINES (inf, lb, cp)
5671 if (cp[0] == '\0') /* Empty line */
5672 continue;
5673 else if (iswhite (cp[0])) /* Not function nor attribute */
5674 continue;
5675 else if (cp[0] == '%') /* comment */
5676 continue;
5677 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5678 continue;
5679 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5681 erlang_attribute (cp);
5682 if (last != NULL)
5684 free (last);
5685 last = NULL;
5688 else if ((len = erlang_func (cp, last)) > 0)
5691 * Function. Store the function name so that we only
5692 * generates a tag for the first clause.
5694 if (last == NULL)
5695 last = xnew (len + 1, char);
5696 else if (len + 1 > allocated)
5697 xrnew (last, len + 1, char);
5698 allocated = len + 1;
5699 strncpy (last, cp, len);
5700 last[len] = '\0';
5703 if (last != NULL)
5704 free (last);
5709 * A function definition is added if it matches:
5710 * <beginning of line><Erlang Atom><whitespace>(
5712 * It is added to the tags database if it doesn't match the
5713 * name of the previous clause header.
5715 * Return the size of the name of the function, or 0 if no function
5716 * was found.
5718 static int
5719 erlang_func (s, last)
5720 char *s;
5721 char *last; /* Name of last clause. */
5723 int pos;
5724 int len;
5726 pos = erlang_atom (s);
5727 if (pos < 1)
5728 return 0;
5730 len = pos;
5731 pos = skip_spaces (s + pos) - s;
5733 /* Save only the first clause. */
5734 if (s[pos++] == '('
5735 && (last == NULL
5736 || len != (int)strlen (last)
5737 || !strneq (s, last, len)))
5739 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5740 return len;
5743 return 0;
5748 * Handle attributes. Currently, tags are generated for defines
5749 * and records.
5751 * They are on the form:
5752 * -define(foo, bar).
5753 * -define(Foo(M, N), M+N).
5754 * -record(graph, {vtab = notable, cyclic = true}).
5756 static void
5757 erlang_attribute (s)
5758 char *s;
5760 char *cp = s;
5762 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5763 && *cp++ == '(')
5765 int len = erlang_atom (skip_spaces (cp));
5766 if (len > 0)
5767 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5769 return;
5774 * Consume an Erlang atom (or variable).
5775 * Return the number of bytes consumed, or -1 if there was an error.
5777 static int
5778 erlang_atom (s)
5779 char *s;
5781 int pos = 0;
5783 if (ISALPHA (s[pos]) || s[pos] == '_')
5785 /* The atom is unquoted. */
5787 pos++;
5788 while (ISALNUM (s[pos]) || s[pos] == '_');
5790 else if (s[pos] == '\'')
5792 for (pos++; s[pos] != '\''; pos++)
5793 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5794 || (s[pos] == '\\' && s[++pos] == '\0'))
5795 return 0;
5796 pos++;
5799 return pos;
5803 static char *scan_separators __P((char *));
5804 static void add_regex __P((char *, language *));
5805 static char *substitute __P((char *, char *, struct re_registers *));
5808 * Take a string like "/blah/" and turn it into "blah", verifying
5809 * that the first and last characters are the same, and handling
5810 * quoted separator characters. Actually, stops on the occurrence of
5811 * an unquoted separator. Also process \t, \n, etc. and turn into
5812 * appropriate characters. Works in place. Null terminates name string.
5813 * Returns pointer to terminating separator, or NULL for
5814 * unterminated regexps.
5816 static char *
5817 scan_separators (name)
5818 char *name;
5820 char sep = name[0];
5821 char *copyto = name;
5822 bool quoted = FALSE;
5824 for (++name; *name != '\0'; ++name)
5826 if (quoted)
5828 switch (*name)
5830 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5831 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5832 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5833 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5834 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5835 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5836 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5837 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5838 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5839 default:
5840 if (*name == sep)
5841 *copyto++ = sep;
5842 else
5844 /* Something else is quoted, so preserve the quote. */
5845 *copyto++ = '\\';
5846 *copyto++ = *name;
5848 break;
5850 quoted = FALSE;
5852 else if (*name == '\\')
5853 quoted = TRUE;
5854 else if (*name == sep)
5855 break;
5856 else
5857 *copyto++ = *name;
5859 if (*name != sep)
5860 name = NULL; /* signal unterminated regexp */
5862 /* Terminate copied string. */
5863 *copyto = '\0';
5864 return name;
5867 /* Look at the argument of --regex or --no-regex and do the right
5868 thing. Same for each line of a regexp file. */
5869 static void
5870 analyse_regex (regex_arg)
5871 char *regex_arg;
5873 if (regex_arg == NULL)
5875 free_regexps (); /* --no-regex: remove existing regexps */
5876 return;
5879 /* A real --regexp option or a line in a regexp file. */
5880 switch (regex_arg[0])
5882 /* Comments in regexp file or null arg to --regex. */
5883 case '\0':
5884 case ' ':
5885 case '\t':
5886 break;
5888 /* Read a regex file. This is recursive and may result in a
5889 loop, which will stop when the file descriptors are exhausted. */
5890 case '@':
5892 FILE *regexfp;
5893 linebuffer regexbuf;
5894 char *regexfile = regex_arg + 1;
5896 /* regexfile is a file containing regexps, one per line. */
5897 regexfp = fopen (regexfile, "r");
5898 if (regexfp == NULL)
5900 pfatal (regexfile);
5901 return;
5903 linebuffer_init (&regexbuf);
5904 while (readline_internal (&regexbuf, regexfp) > 0)
5905 analyse_regex (regexbuf.buffer);
5906 free (regexbuf.buffer);
5907 fclose (regexfp);
5909 break;
5911 /* Regexp to be used for a specific language only. */
5912 case '{':
5914 language *lang;
5915 char *lang_name = regex_arg + 1;
5916 char *cp;
5918 for (cp = lang_name; *cp != '}'; cp++)
5919 if (*cp == '\0')
5921 error ("unterminated language name in regex: %s", regex_arg);
5922 return;
5924 *cp++ = '\0';
5925 lang = get_language_from_langname (lang_name);
5926 if (lang == NULL)
5927 return;
5928 add_regex (cp, lang);
5930 break;
5932 /* Regexp to be used for any language. */
5933 default:
5934 add_regex (regex_arg, NULL);
5935 break;
5939 /* Separate the regexp pattern, compile it,
5940 and care for optional name and modifiers. */
5941 static void
5942 add_regex (regexp_pattern, lang)
5943 char *regexp_pattern;
5944 language *lang;
5946 static struct re_pattern_buffer zeropattern;
5947 char sep, *pat, *name, *modifiers;
5948 const char *err;
5949 struct re_pattern_buffer *patbuf;
5950 regexp *rp;
5951 bool
5952 force_explicit_name = TRUE, /* do not use implicit tag names */
5953 ignore_case = FALSE, /* case is significant */
5954 multi_line = FALSE, /* matches are done one line at a time */
5955 single_line = FALSE; /* dot does not match newline */
5958 if (strlen(regexp_pattern) < 3)
5960 error ("null regexp", (char *)NULL);
5961 return;
5963 sep = regexp_pattern[0];
5964 name = scan_separators (regexp_pattern);
5965 if (name == NULL)
5967 error ("%s: unterminated regexp", regexp_pattern);
5968 return;
5970 if (name[1] == sep)
5972 error ("null name for regexp \"%s\"", regexp_pattern);
5973 return;
5975 modifiers = scan_separators (name);
5976 if (modifiers == NULL) /* no terminating separator --> no name */
5978 modifiers = name;
5979 name = "";
5981 else
5982 modifiers += 1; /* skip separator */
5984 /* Parse regex modifiers. */
5985 for (; modifiers[0] != '\0'; modifiers++)
5986 switch (modifiers[0])
5988 case 'N':
5989 if (modifiers == name)
5990 error ("forcing explicit tag name but no name, ignoring", NULL);
5991 force_explicit_name = TRUE;
5992 break;
5993 case 'i':
5994 ignore_case = TRUE;
5995 break;
5996 case 's':
5997 single_line = TRUE;
5998 /* FALLTHRU */
5999 case 'm':
6000 multi_line = TRUE;
6001 need_filebuf = TRUE;
6002 break;
6003 default:
6005 char wrongmod [2];
6006 wrongmod[0] = modifiers[0];
6007 wrongmod[1] = '\0';
6008 error ("invalid regexp modifier `%s', ignoring", wrongmod);
6010 break;
6013 patbuf = xnew (1, struct re_pattern_buffer);
6014 *patbuf = zeropattern;
6015 if (ignore_case)
6017 static char lc_trans[CHARS];
6018 int i;
6019 for (i = 0; i < CHARS; i++)
6020 lc_trans[i] = lowcase (i);
6021 patbuf->translate = lc_trans; /* translation table to fold case */
6024 if (multi_line)
6025 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6026 else
6027 pat = regexp_pattern;
6029 if (single_line)
6030 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6031 else
6032 re_set_syntax (RE_SYNTAX_EMACS);
6034 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
6035 if (multi_line)
6036 free (pat);
6037 if (err != NULL)
6039 error ("%s while compiling pattern", err);
6040 return;
6043 rp = p_head;
6044 p_head = xnew (1, regexp);
6045 p_head->pattern = savestr (regexp_pattern);
6046 p_head->p_next = rp;
6047 p_head->lang = lang;
6048 p_head->pat = patbuf;
6049 p_head->name = savestr (name);
6050 p_head->error_signaled = FALSE;
6051 p_head->force_explicit_name = force_explicit_name;
6052 p_head->ignore_case = ignore_case;
6053 p_head->multi_line = multi_line;
6057 * Do the substitutions indicated by the regular expression and
6058 * arguments.
6060 static char *
6061 substitute (in, out, regs)
6062 char *in, *out;
6063 struct re_registers *regs;
6065 char *result, *t;
6066 int size, dig, diglen;
6068 result = NULL;
6069 size = strlen (out);
6071 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6072 if (out[size - 1] == '\\')
6073 fatal ("pattern error in \"%s\"", out);
6074 for (t = etags_strchr (out, '\\');
6075 t != NULL;
6076 t = etags_strchr (t + 2, '\\'))
6077 if (ISDIGIT (t[1]))
6079 dig = t[1] - '0';
6080 diglen = regs->end[dig] - regs->start[dig];
6081 size += diglen - 2;
6083 else
6084 size -= 1;
6086 /* Allocate space and do the substitutions. */
6087 assert (size >= 0);
6088 result = xnew (size + 1, char);
6090 for (t = result; *out != '\0'; out++)
6091 if (*out == '\\' && ISDIGIT (*++out))
6093 dig = *out - '0';
6094 diglen = regs->end[dig] - regs->start[dig];
6095 strncpy (t, in + regs->start[dig], diglen);
6096 t += diglen;
6098 else
6099 *t++ = *out;
6100 *t = '\0';
6102 assert (t <= result + size);
6103 assert (t - result == (int)strlen (result));
6105 return result;
6108 /* Deallocate all regexps. */
6109 static void
6110 free_regexps ()
6112 regexp *rp;
6113 while (p_head != NULL)
6115 rp = p_head->p_next;
6116 free (p_head->pattern);
6117 free (p_head->name);
6118 free (p_head);
6119 p_head = rp;
6121 return;
6125 * Reads the whole file as a single string from `filebuf' and looks for
6126 * multi-line regular expressions, creating tags on matches.
6127 * readline already dealt with normal regexps.
6129 * Idea by Ben Wing <ben@666.com> (2002).
6131 static void
6132 regex_tag_multiline ()
6134 char *buffer = filebuf.buffer;
6135 regexp *rp;
6136 char *name;
6138 for (rp = p_head; rp != NULL; rp = rp->p_next)
6140 int match = 0;
6142 if (!rp->multi_line)
6143 continue; /* skip normal regexps */
6145 /* Generic initialisations before parsing file from memory. */
6146 lineno = 1; /* reset global line number */
6147 charno = 0; /* reset global char number */
6148 linecharno = 0; /* reset global char number of line start */
6150 /* Only use generic regexps or those for the current language. */
6151 if (rp->lang != NULL && rp->lang != curfdp->lang)
6152 continue;
6154 while (match >= 0 && match < filebuf.len)
6156 match = re_search (rp->pat, buffer, filebuf.len, charno,
6157 filebuf.len - match, &rp->regs);
6158 switch (match)
6160 case -2:
6161 /* Some error. */
6162 if (!rp->error_signaled)
6164 error ("regexp stack overflow while matching \"%s\"",
6165 rp->pattern);
6166 rp->error_signaled = TRUE;
6168 break;
6169 case -1:
6170 /* No match. */
6171 break;
6172 default:
6173 if (match == rp->regs.end[0])
6175 if (!rp->error_signaled)
6177 error ("regexp matches the empty string: \"%s\"",
6178 rp->pattern);
6179 rp->error_signaled = TRUE;
6181 match = -3; /* exit from while loop */
6182 break;
6185 /* Match occurred. Construct a tag. */
6186 while (charno < rp->regs.end[0])
6187 if (buffer[charno++] == '\n')
6188 lineno++, linecharno = charno;
6189 name = rp->name;
6190 if (name[0] == '\0')
6191 name = NULL;
6192 else /* make a named tag */
6193 name = substitute (buffer, rp->name, &rp->regs);
6194 if (rp->force_explicit_name)
6195 /* Force explicit tag name, if a name is there. */
6196 pfnote (name, TRUE, buffer + linecharno,
6197 charno - linecharno + 1, lineno, linecharno);
6198 else
6199 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6200 charno - linecharno + 1, lineno, linecharno);
6201 break;
6208 static bool
6209 nocase_tail (cp)
6210 char *cp;
6212 register int len = 0;
6214 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6215 cp++, len++;
6216 if (*cp == '\0' && !intoken (dbp[len]))
6218 dbp += len;
6219 return TRUE;
6221 return FALSE;
6224 static void
6225 get_tag (bp, namepp)
6226 register char *bp;
6227 char **namepp;
6229 register char *cp = bp;
6231 if (*bp != '\0')
6233 /* Go till you get to white space or a syntactic break */
6234 for (cp = bp + 1; !notinname (*cp); cp++)
6235 continue;
6236 make_tag (bp, cp - bp, TRUE,
6237 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6240 if (namepp != NULL)
6241 *namepp = savenstr (bp, cp - bp);
6245 * Read a line of text from `stream' into `lbp', excluding the
6246 * newline or CR-NL, if any. Return the number of characters read from
6247 * `stream', which is the length of the line including the newline.
6249 * On DOS or Windows we do not count the CR character, if any before the
6250 * NL, in the returned length; this mirrors the behavior of Emacs on those
6251 * platforms (for text files, it translates CR-NL to NL as it reads in the
6252 * file).
6254 * If multi-line regular expressions are requested, each line read is
6255 * appended to `filebuf'.
6257 static long
6258 readline_internal (lbp, stream)
6259 linebuffer *lbp;
6260 register FILE *stream;
6262 char *buffer = lbp->buffer;
6263 register char *p = lbp->buffer;
6264 register char *pend;
6265 int chars_deleted;
6267 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6269 for (;;)
6271 register int c = getc (stream);
6272 if (p == pend)
6274 /* We're at the end of linebuffer: expand it. */
6275 lbp->size *= 2;
6276 xrnew (buffer, lbp->size, char);
6277 p += buffer - lbp->buffer;
6278 pend = buffer + lbp->size;
6279 lbp->buffer = buffer;
6281 if (c == EOF)
6283 *p = '\0';
6284 chars_deleted = 0;
6285 break;
6287 if (c == '\n')
6289 if (p > buffer && p[-1] == '\r')
6291 p -= 1;
6292 #ifdef DOS_NT
6293 /* Assume CRLF->LF translation will be performed by Emacs
6294 when loading this file, so CRs won't appear in the buffer.
6295 It would be cleaner to compensate within Emacs;
6296 however, Emacs does not know how many CRs were deleted
6297 before any given point in the file. */
6298 chars_deleted = 1;
6299 #else
6300 chars_deleted = 2;
6301 #endif
6303 else
6305 chars_deleted = 1;
6307 *p = '\0';
6308 break;
6310 *p++ = c;
6312 lbp->len = p - buffer;
6314 if (need_filebuf /* we need filebuf for multi-line regexps */
6315 && chars_deleted > 0) /* not at EOF */
6317 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6319 /* Expand filebuf. */
6320 filebuf.size *= 2;
6321 xrnew (filebuf.buffer, filebuf.size, char);
6323 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6324 filebuf.len += lbp->len;
6325 filebuf.buffer[filebuf.len++] = '\n';
6326 filebuf.buffer[filebuf.len] = '\0';
6329 return lbp->len + chars_deleted;
6333 * Like readline_internal, above, but in addition try to match the
6334 * input line against relevant regular expressions and manage #line
6335 * directives.
6337 static void
6338 readline (lbp, stream)
6339 linebuffer *lbp;
6340 FILE *stream;
6342 long result;
6344 linecharno = charno; /* update global char number of line start */
6345 result = readline_internal (lbp, stream); /* read line */
6346 lineno += 1; /* increment global line number */
6347 charno += result; /* increment global char number */
6349 /* Honour #line directives. */
6350 if (!no_line_directive)
6352 static bool discard_until_line_directive;
6354 /* Check whether this is a #line directive. */
6355 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6357 unsigned int lno;
6358 int start = 0;
6360 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6361 && start > 0) /* double quote character found */
6363 char *endp = lbp->buffer + start;
6365 while ((endp = etags_strchr (endp, '"')) != NULL
6366 && endp[-1] == '\\')
6367 endp++;
6368 if (endp != NULL)
6369 /* Ok, this is a real #line directive. Let's deal with it. */
6371 char *taggedabsname; /* absolute name of original file */
6372 char *taggedfname; /* name of original file as given */
6373 char *name; /* temp var */
6375 discard_until_line_directive = FALSE; /* found it */
6376 name = lbp->buffer + start;
6377 *endp = '\0';
6378 canonicalize_filename (name); /* for DOS */
6379 taggedabsname = absolute_filename (name, tagfiledir);
6380 if (filename_is_absolute (name)
6381 || filename_is_absolute (curfdp->infname))
6382 taggedfname = savestr (taggedabsname);
6383 else
6384 taggedfname = relative_filename (taggedabsname,tagfiledir);
6386 if (streq (curfdp->taggedfname, taggedfname))
6387 /* The #line directive is only a line number change. We
6388 deal with this afterwards. */
6389 free (taggedfname);
6390 else
6391 /* The tags following this #line directive should be
6392 attributed to taggedfname. In order to do this, set
6393 curfdp accordingly. */
6395 fdesc *fdp; /* file description pointer */
6397 /* Go look for a file description already set up for the
6398 file indicated in the #line directive. If there is
6399 one, use it from now until the next #line
6400 directive. */
6401 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6402 if (streq (fdp->infname, curfdp->infname)
6403 && streq (fdp->taggedfname, taggedfname))
6404 /* If we remove the second test above (after the &&)
6405 then all entries pertaining to the same file are
6406 coalesced in the tags file. If we use it, then
6407 entries pertaining to the same file but generated
6408 from different files (via #line directives) will
6409 go into separate sections in the tags file. These
6410 alternatives look equivalent. The first one
6411 destroys some apparently useless information. */
6413 curfdp = fdp;
6414 free (taggedfname);
6415 break;
6417 /* Else, if we already tagged the real file, skip all
6418 input lines until the next #line directive. */
6419 if (fdp == NULL) /* not found */
6420 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6421 if (streq (fdp->infabsname, taggedabsname))
6423 discard_until_line_directive = TRUE;
6424 free (taggedfname);
6425 break;
6427 /* Else create a new file description and use that from
6428 now on, until the next #line directive. */
6429 if (fdp == NULL) /* not found */
6431 fdp = fdhead;
6432 fdhead = xnew (1, fdesc);
6433 *fdhead = *curfdp; /* copy curr. file description */
6434 fdhead->next = fdp;
6435 fdhead->infname = savestr (curfdp->infname);
6436 fdhead->infabsname = savestr (curfdp->infabsname);
6437 fdhead->infabsdir = savestr (curfdp->infabsdir);
6438 fdhead->taggedfname = taggedfname;
6439 fdhead->usecharno = FALSE;
6440 fdhead->prop = NULL;
6441 fdhead->written = FALSE;
6442 curfdp = fdhead;
6445 free (taggedabsname);
6446 lineno = lno - 1;
6447 readline (lbp, stream);
6448 return;
6449 } /* if a real #line directive */
6450 } /* if #line is followed by a a number */
6451 } /* if line begins with "#line " */
6453 /* If we are here, no #line directive was found. */
6454 if (discard_until_line_directive)
6456 if (result > 0)
6458 /* Do a tail recursion on ourselves, thus discarding the contents
6459 of the line buffer. */
6460 readline (lbp, stream);
6461 return;
6463 /* End of file. */
6464 discard_until_line_directive = FALSE;
6465 return;
6467 } /* if #line directives should be considered */
6470 int match;
6471 regexp *rp;
6472 char *name;
6474 /* Match against relevant regexps. */
6475 if (lbp->len > 0)
6476 for (rp = p_head; rp != NULL; rp = rp->p_next)
6478 /* Only use generic regexps or those for the current language.
6479 Also do not use multiline regexps, which is the job of
6480 regex_tag_multiline. */
6481 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6482 || rp->multi_line)
6483 continue;
6485 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6486 switch (match)
6488 case -2:
6489 /* Some error. */
6490 if (!rp->error_signaled)
6492 error ("regexp stack overflow while matching \"%s\"",
6493 rp->pattern);
6494 rp->error_signaled = TRUE;
6496 break;
6497 case -1:
6498 /* No match. */
6499 break;
6500 case 0:
6501 /* Empty string matched. */
6502 if (!rp->error_signaled)
6504 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6505 rp->error_signaled = TRUE;
6507 break;
6508 default:
6509 /* Match occurred. Construct a tag. */
6510 name = rp->name;
6511 if (name[0] == '\0')
6512 name = NULL;
6513 else /* make a named tag */
6514 name = substitute (lbp->buffer, rp->name, &rp->regs);
6515 if (rp->force_explicit_name)
6516 /* Force explicit tag name, if a name is there. */
6517 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6518 else
6519 make_tag (name, strlen (name), TRUE,
6520 lbp->buffer, match, lineno, linecharno);
6521 break;
6529 * Return a pointer to a space of size strlen(cp)+1 allocated
6530 * with xnew where the string CP has been copied.
6532 static char *
6533 savestr (cp)
6534 char *cp;
6536 return savenstr (cp, strlen (cp));
6540 * Return a pointer to a space of size LEN+1 allocated with xnew where
6541 * the string CP has been copied for at most the first LEN characters.
6543 static char *
6544 savenstr (cp, len)
6545 char *cp;
6546 int len;
6548 register char *dp;
6550 dp = xnew (len + 1, char);
6551 strncpy (dp, cp, len);
6552 dp[len] = '\0';
6553 return dp;
6557 * Return the ptr in sp at which the character c last
6558 * appears; NULL if not found
6560 * Identical to POSIX strrchr, included for portability.
6562 static char *
6563 etags_strrchr (sp, c)
6564 register const char *sp;
6565 register int c;
6567 register const char *r;
6569 r = NULL;
6572 if (*sp == c)
6573 r = sp;
6574 } while (*sp++);
6575 return (char *)r;
6579 * Return the ptr in sp at which the character c first
6580 * appears; NULL if not found
6582 * Identical to POSIX strchr, included for portability.
6584 static char *
6585 etags_strchr (sp, c)
6586 register const char *sp;
6587 register int c;
6591 if (*sp == c)
6592 return (char *)sp;
6593 } while (*sp++);
6594 return NULL;
6598 * Compare two strings, ignoring case for alphabetic characters.
6600 * Same as BSD's strcasecmp, included for portability.
6602 static int
6603 etags_strcasecmp (s1, s2)
6604 register const char *s1;
6605 register const char *s2;
6607 while (*s1 != '\0'
6608 && (ISALPHA (*s1) && ISALPHA (*s2)
6609 ? lowcase (*s1) == lowcase (*s2)
6610 : *s1 == *s2))
6611 s1++, s2++;
6613 return (ISALPHA (*s1) && ISALPHA (*s2)
6614 ? lowcase (*s1) - lowcase (*s2)
6615 : *s1 - *s2);
6619 * Compare two strings, ignoring case for alphabetic characters.
6620 * Stop after a given number of characters
6622 * Same as BSD's strncasecmp, included for portability.
6624 static int
6625 etags_strncasecmp (s1, s2, n)
6626 register const char *s1;
6627 register const char *s2;
6628 register int n;
6630 while (*s1 != '\0' && n-- > 0
6631 && (ISALPHA (*s1) && ISALPHA (*s2)
6632 ? lowcase (*s1) == lowcase (*s2)
6633 : *s1 == *s2))
6634 s1++, s2++;
6636 if (n < 0)
6637 return 0;
6638 else
6639 return (ISALPHA (*s1) && ISALPHA (*s2)
6640 ? lowcase (*s1) - lowcase (*s2)
6641 : *s1 - *s2);
6644 /* Skip spaces (end of string is not space), return new pointer. */
6645 static char *
6646 skip_spaces (cp)
6647 char *cp;
6649 while (iswhite (*cp))
6650 cp++;
6651 return cp;
6654 /* Skip non spaces, except end of string, return new pointer. */
6655 static char *
6656 skip_non_spaces (cp)
6657 char *cp;
6659 while (*cp != '\0' && !iswhite (*cp))
6660 cp++;
6661 return cp;
6664 /* Print error message and exit. */
6665 void
6666 fatal (s1, s2)
6667 char *s1, *s2;
6669 error (s1, s2);
6670 exit (EXIT_FAILURE);
6673 static void
6674 pfatal (s1)
6675 char *s1;
6677 perror (s1);
6678 exit (EXIT_FAILURE);
6681 static void
6682 suggest_asking_for_help ()
6684 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6685 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6686 exit (EXIT_FAILURE);
6689 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6690 static void
6691 error (s1, s2)
6692 const char *s1, *s2;
6694 fprintf (stderr, "%s: ", progname);
6695 fprintf (stderr, s1, s2);
6696 fprintf (stderr, "\n");
6699 /* Return a newly-allocated string whose contents
6700 concatenate those of s1, s2, s3. */
6701 static char *
6702 concat (s1, s2, s3)
6703 char *s1, *s2, *s3;
6705 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6706 char *result = xnew (len1 + len2 + len3 + 1, char);
6708 strcpy (result, s1);
6709 strcpy (result + len1, s2);
6710 strcpy (result + len1 + len2, s3);
6711 result[len1 + len2 + len3] = '\0';
6713 return result;
6717 /* Does the same work as the system V getcwd, but does not need to
6718 guess the buffer size in advance. */
6719 static char *
6720 etags_getcwd ()
6722 #ifdef HAVE_GETCWD
6723 int bufsize = 200;
6724 char *path = xnew (bufsize, char);
6726 while (getcwd (path, bufsize) == NULL)
6728 if (errno != ERANGE)
6729 pfatal ("getcwd");
6730 bufsize *= 2;
6731 free (path);
6732 path = xnew (bufsize, char);
6735 canonicalize_filename (path);
6736 return path;
6738 #else /* not HAVE_GETCWD */
6739 #if MSDOS
6741 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6743 getwd (path);
6745 for (p = path; *p != '\0'; p++)
6746 if (*p == '\\')
6747 *p = '/';
6748 else
6749 *p = lowcase (*p);
6751 return strdup (path);
6752 #else /* not MSDOS */
6753 linebuffer path;
6754 FILE *pipe;
6756 linebuffer_init (&path);
6757 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6758 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6759 pfatal ("pwd");
6760 pclose (pipe);
6762 return path.buffer;
6763 #endif /* not MSDOS */
6764 #endif /* not HAVE_GETCWD */
6767 /* Return a newly allocated string containing the file name of FILE
6768 relative to the absolute directory DIR (which should end with a slash). */
6769 static char *
6770 relative_filename (file, dir)
6771 char *file, *dir;
6773 char *fp, *dp, *afn, *res;
6774 int i;
6776 /* Find the common root of file and dir (with a trailing slash). */
6777 afn = absolute_filename (file, cwd);
6778 fp = afn;
6779 dp = dir;
6780 while (*fp++ == *dp++)
6781 continue;
6782 fp--, dp--; /* back to the first differing char */
6783 #ifdef DOS_NT
6784 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6785 return afn;
6786 #endif
6787 do /* look at the equal chars until '/' */
6788 fp--, dp--;
6789 while (*fp != '/');
6791 /* Build a sequence of "../" strings for the resulting relative file name. */
6792 i = 0;
6793 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6794 i += 1;
6795 res = xnew (3*i + strlen (fp + 1) + 1, char);
6796 res[0] = '\0';
6797 while (i-- > 0)
6798 strcat (res, "../");
6800 /* Add the file name relative to the common root of file and dir. */
6801 strcat (res, fp + 1);
6802 free (afn);
6804 return res;
6807 /* Return a newly allocated string containing the absolute file name
6808 of FILE given DIR (which should end with a slash). */
6809 static char *
6810 absolute_filename (file, dir)
6811 char *file, *dir;
6813 char *slashp, *cp, *res;
6815 if (filename_is_absolute (file))
6816 res = savestr (file);
6817 #ifdef DOS_NT
6818 /* We don't support non-absolute file names with a drive
6819 letter, like `d:NAME' (it's too much hassle). */
6820 else if (file[1] == ':')
6821 fatal ("%s: relative file names with drive letters not supported", file);
6822 #endif
6823 else
6824 res = concat (dir, file, "");
6826 /* Delete the "/dirname/.." and "/." substrings. */
6827 slashp = etags_strchr (res, '/');
6828 while (slashp != NULL && slashp[0] != '\0')
6830 if (slashp[1] == '.')
6832 if (slashp[2] == '.'
6833 && (slashp[3] == '/' || slashp[3] == '\0'))
6835 cp = slashp;
6837 cp--;
6838 while (cp >= res && !filename_is_absolute (cp));
6839 if (cp < res)
6840 cp = slashp; /* the absolute name begins with "/.." */
6841 #ifdef DOS_NT
6842 /* Under MSDOS and NT we get `d:/NAME' as absolute
6843 file name, so the luser could say `d:/../NAME'.
6844 We silently treat this as `d:/NAME'. */
6845 else if (cp[0] != '/')
6846 cp = slashp;
6847 #endif
6848 strcpy (cp, slashp + 3);
6849 slashp = cp;
6850 continue;
6852 else if (slashp[2] == '/' || slashp[2] == '\0')
6854 strcpy (slashp, slashp + 2);
6855 continue;
6859 slashp = etags_strchr (slashp + 1, '/');
6862 if (res[0] == '\0') /* just a safety net: should never happen */
6864 free (res);
6865 return savestr ("/");
6867 else
6868 return res;
6871 /* Return a newly allocated string containing the absolute
6872 file name of dir where FILE resides given DIR (which should
6873 end with a slash). */
6874 static char *
6875 absolute_dirname (file, dir)
6876 char *file, *dir;
6878 char *slashp, *res;
6879 char save;
6881 canonicalize_filename (file);
6882 slashp = etags_strrchr (file, '/');
6883 if (slashp == NULL)
6884 return savestr (dir);
6885 save = slashp[1];
6886 slashp[1] = '\0';
6887 res = absolute_filename (file, dir);
6888 slashp[1] = save;
6890 return res;
6893 /* Whether the argument string is an absolute file name. The argument
6894 string must have been canonicalized with canonicalize_filename. */
6895 static bool
6896 filename_is_absolute (fn)
6897 char *fn;
6899 return (fn[0] == '/'
6900 #ifdef DOS_NT
6901 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6902 #endif
6906 /* Translate backslashes into slashes. Works in place. */
6907 static void
6908 canonicalize_filename (fn)
6909 register char *fn;
6911 #ifdef DOS_NT
6912 /* Canonicalize drive letter case. */
6913 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6914 fn[0] = upcase (fn[0]);
6915 /* Convert backslashes to slashes. */
6916 for (; *fn != '\0'; fn++)
6917 if (*fn == '\\')
6918 *fn = '/';
6919 #else
6920 /* No action. */
6921 fn = NULL; /* shut up the compiler */
6922 #endif
6926 /* Initialize a linebuffer for use */
6927 static void
6928 linebuffer_init (lbp)
6929 linebuffer *lbp;
6931 lbp->size = (DEBUG) ? 3 : 200;
6932 lbp->buffer = xnew (lbp->size, char);
6933 lbp->buffer[0] = '\0';
6934 lbp->len = 0;
6937 /* Set the minimum size of a string contained in a linebuffer. */
6938 static void
6939 linebuffer_setlen (lbp, toksize)
6940 linebuffer *lbp;
6941 int toksize;
6943 while (lbp->size <= toksize)
6945 lbp->size *= 2;
6946 xrnew (lbp->buffer, lbp->size, char);
6948 lbp->len = toksize;
6951 /* Like malloc but get fatal error if memory is exhausted. */
6952 static PTR
6953 xmalloc (size)
6954 unsigned int size;
6956 PTR result = (PTR) malloc (size);
6957 if (result == NULL)
6958 fatal ("virtual memory exhausted", (char *)NULL);
6959 return result;
6962 static PTR
6963 xrealloc (ptr, size)
6964 char *ptr;
6965 unsigned int size;
6967 PTR result = (PTR) realloc (ptr, size);
6968 if (result == NULL)
6969 fatal ("virtual memory exhausted", (char *)NULL);
6970 return result;
6974 * Local Variables:
6975 * indent-tabs-mode: t
6976 * tab-width: 8
6977 * fill-column: 79
6978 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6979 * c-file-style: "gnu"
6980 * End:
6983 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6984 (do not change this comment) */
6986 /* etags.c ends here */