newsticker: handle url-retrieve errors.
[emacs.git] / lib-src / etags.c
blobf577c751a2849e5eeee174d0521b5efd4b1ac6d8
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
33 Free Software Foundation, Inc.
35 This file is not considered part of GNU Emacs.
37 This program is free software: you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation, either version 3 of the License, or
40 (at your option) any later version.
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
47 You should have received a copy of the GNU General Public License
48 along with this program. If not, see <http://www.gnu.org/licenses/>. */
51 /* NB To comply with the above BSD license, copyright information is
52 reproduced in etc/ETAGS.README. That file should be updated when the
53 above notices are.
55 To the best of our knowledge, this code was originally based on the
56 ctags.c distributed with BSD4.2, which was copyrighted by the
57 University of California, as described above. */
61 * Authors:
62 * 1983 Ctags originally by Ken Arnold.
63 * 1984 Fortran added by Jim Kleckner.
64 * 1984 Ed Pelegri-Llopart added C typedefs.
65 * 1985 Emacs TAGS format by Richard Stallman.
66 * 1989 Sam Kendall added C++.
67 * 1992 Joseph B. Wells improved C and C++ parsing.
68 * 1993 Francesco Potortì reorganized C and C++.
69 * 1994 Line-by-line regexp tags by Tom Tromey.
70 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
71 * 2002 #line directives by Francesco Potortì.
73 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
77 * If you want to add support for a new language, start by looking at the LUA
78 * language, which is the simplest. Alternatively, consider shipping a
79 * configuration file containing regexp definitions for etags.
82 char pot_etags_version[] = "@(#) pot revision number is 17.38";
84 #define TRUE 1
85 #define FALSE 0
87 #ifdef DEBUG
88 # undef DEBUG
89 # define DEBUG TRUE
90 #else
91 # define DEBUG FALSE
92 # define NDEBUG /* disable assert */
93 #endif
95 #ifdef HAVE_CONFIG_H
96 # include <config.h>
97 /* On some systems, Emacs defines static as nothing for the sake
98 of unexec. We don't want that here since we don't use unexec. */
99 # undef static
100 # ifndef PTR /* for XEmacs */
101 # define PTR void *
102 # endif
103 # ifndef __P /* for XEmacs */
104 # define __P(args) args
105 # endif
106 #else /* no config.h */
107 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
108 # define __P(args) args /* use prototypes */
109 # define PTR void * /* for generic pointers */
110 # else /* not standard C */
111 # define __P(args) () /* no prototypes */
112 # define const /* remove const for old compilers' sake */
113 # define PTR long * /* don't use void* */
114 # endif
115 #endif /* !HAVE_CONFIG_H */
117 #ifndef _GNU_SOURCE
118 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
119 #endif
121 /* WIN32_NATIVE is for XEmacs.
122 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
123 #ifdef WIN32_NATIVE
124 # undef MSDOS
125 # undef WINDOWSNT
126 # define WINDOWSNT
127 #endif /* WIN32_NATIVE */
129 #ifdef MSDOS
130 # undef MSDOS
131 # define MSDOS TRUE
132 # include <fcntl.h>
133 # include <sys/param.h>
134 # include <io.h>
135 # ifndef HAVE_CONFIG_H
136 # define DOS_NT
137 # include <sys/config.h>
138 # endif
139 #else
140 # define MSDOS FALSE
141 #endif /* MSDOS */
143 #ifdef WINDOWSNT
144 # include <stdlib.h>
145 # include <fcntl.h>
146 # include <string.h>
147 # include <direct.h>
148 # include <io.h>
149 # define MAXPATHLEN _MAX_PATH
150 # undef HAVE_NTGUI
151 # undef DOS_NT
152 # define DOS_NT
153 # ifndef HAVE_GETCWD
154 # define HAVE_GETCWD
155 # endif /* undef HAVE_GETCWD */
156 #else /* not WINDOWSNT */
157 # ifdef STDC_HEADERS
158 # include <stdlib.h>
159 # include <string.h>
160 # else /* no standard C headers */
161 extern char *getenv __P((const char *));
162 extern char *strcpy __P((char *, const char *));
163 extern char *strncpy __P((char *, const char *, unsigned long));
164 extern char *strcat __P((char *, const char *));
165 extern char *strncat __P((char *, const char *, unsigned long));
166 extern int strcmp __P((const char *, const char *));
167 extern int strncmp __P((const char *, const char *, unsigned long));
168 extern int system __P((const char *));
169 extern unsigned long strlen __P((const char *));
170 extern void *malloc __P((unsigned long));
171 extern void *realloc __P((void *, unsigned long));
172 extern void exit __P((int));
173 extern void free __P((void *));
174 extern void *memmove __P((void *, const void *, unsigned long));
175 # ifdef VMS
176 # define EXIT_SUCCESS 1
177 # define EXIT_FAILURE 0
178 # else /* no VMS */
179 # define EXIT_SUCCESS 0
180 # define EXIT_FAILURE 1
181 # endif
182 # endif
183 #endif /* !WINDOWSNT */
185 #ifdef HAVE_UNISTD_H
186 # include <unistd.h>
187 #else
188 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
189 extern char *getcwd (char *buf, size_t size);
190 # endif
191 #endif /* HAVE_UNISTD_H */
193 #include <stdio.h>
194 #include <ctype.h>
195 #include <errno.h>
196 #ifndef errno
197 extern int errno;
198 #endif
199 #include <sys/types.h>
200 #include <sys/stat.h>
202 #include <assert.h>
203 #ifdef NDEBUG
204 # undef assert /* some systems have a buggy assert.h */
205 # define assert(x) ((void) 0)
206 #endif
208 #if !defined (S_ISREG) && defined (S_IFREG)
209 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
210 #endif
212 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
213 # define NO_LONG_OPTIONS TRUE
214 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
215 extern char *optarg;
216 extern int optind, opterr;
217 #else
218 # define NO_LONG_OPTIONS FALSE
219 # include <getopt.h>
220 #endif /* NO_LONG_OPTIONS */
222 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
223 # ifdef __CYGWIN__ /* compiling on Cygwin */
224 !!! NOTICE !!!
225 the regex.h distributed with Cygwin is not compatible with etags, alas!
226 If you want regular expression support, you should delete this notice and
227 arrange to use the GNU regex.h and regex.c.
228 # endif
229 #endif
230 #include <regex.h>
232 /* Define CTAGS to make the program "ctags" compatible with the usual one.
233 Leave it undefined to make the program "etags", which makes emacs-style
234 tag tables and tags typedefs, #defines and struct/union/enum by default. */
235 #ifdef CTAGS
236 # undef CTAGS
237 # define CTAGS TRUE
238 #else
239 # define CTAGS FALSE
240 #endif
242 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
243 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
244 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
245 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
247 #define CHARS 256 /* 2^sizeof(char) */
248 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
249 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
250 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
251 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
252 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
253 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
255 #define ISALNUM(c) isalnum (CHAR(c))
256 #define ISALPHA(c) isalpha (CHAR(c))
257 #define ISDIGIT(c) isdigit (CHAR(c))
258 #define ISLOWER(c) islower (CHAR(c))
260 #define lowcase(c) tolower (CHAR(c))
261 #define upcase(c) toupper (CHAR(c))
265 * xnew, xrnew -- allocate, reallocate storage
267 * SYNOPSIS: Type *xnew (int n, Type);
268 * void xrnew (OldPointer, int n, Type);
270 #if DEBUG
271 # include "chkmalloc.h"
272 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
273 (n) * sizeof (Type)))
274 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
275 (char *) (op), (n) * sizeof (Type)))
276 #else
277 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
278 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
279 (char *) (op), (n) * sizeof (Type)))
280 #endif
282 #define bool int
284 typedef void Lang_function __P((FILE *));
286 typedef struct
288 char *suffix; /* file name suffix for this compressor */
289 char *command; /* takes one arg and decompresses to stdout */
290 } compressor;
292 typedef struct
294 char *name; /* language name */
295 char *help; /* detailed help for the language */
296 Lang_function *function; /* parse function */
297 char **suffixes; /* name suffixes of this language's files */
298 char **filenames; /* names of this language's files */
299 char **interpreters; /* interpreters for this language */
300 bool metasource; /* source used to generate other sources */
301 } language;
303 typedef struct fdesc
305 struct fdesc *next; /* for the linked list */
306 char *infname; /* uncompressed input file name */
307 char *infabsname; /* absolute uncompressed input file name */
308 char *infabsdir; /* absolute dir of input file */
309 char *taggedfname; /* file name to write in tagfile */
310 language *lang; /* language of file */
311 char *prop; /* file properties to write in tagfile */
312 bool usecharno; /* etags tags shall contain char number */
313 bool written; /* entry written in the tags file */
314 } fdesc;
316 typedef struct node_st
317 { /* sorting structure */
318 struct node_st *left, *right; /* left and right sons */
319 fdesc *fdp; /* description of file to whom tag belongs */
320 char *name; /* tag name */
321 char *regex; /* search regexp */
322 bool valid; /* write this tag on the tag file */
323 bool is_func; /* function tag: use regexp in CTAGS mode */
324 bool been_warned; /* warning already given for duplicated tag */
325 int lno; /* line number tag is on */
326 long cno; /* character number line starts on */
327 } node;
330 * A `linebuffer' is a structure which holds a line of text.
331 * `readline_internal' reads a line from a stream into a linebuffer
332 * and works regardless of the length of the line.
333 * SIZE is the size of BUFFER, LEN is the length of the string in
334 * BUFFER after readline reads it.
336 typedef struct
338 long size;
339 int len;
340 char *buffer;
341 } linebuffer;
343 /* Used to support mixing of --lang and file names. */
344 typedef struct
346 enum {
347 at_language, /* a language specification */
348 at_regexp, /* a regular expression */
349 at_filename, /* a file name */
350 at_stdin, /* read from stdin here */
351 at_end /* stop parsing the list */
352 } arg_type; /* argument type */
353 language *lang; /* language associated with the argument */
354 char *what; /* the argument itself */
355 } argument;
357 /* Structure defining a regular expression. */
358 typedef struct regexp
360 struct regexp *p_next; /* pointer to next in list */
361 language *lang; /* if set, use only for this language */
362 char *pattern; /* the regexp pattern */
363 char *name; /* tag name */
364 struct re_pattern_buffer *pat; /* the compiled pattern */
365 struct re_registers regs; /* re registers */
366 bool error_signaled; /* already signaled for this regexp */
367 bool force_explicit_name; /* do not allow implict tag name */
368 bool ignore_case; /* ignore case when matching */
369 bool multi_line; /* do a multi-line match on the whole file */
370 } regexp;
373 /* Many compilers barf on this:
374 Lang_function Ada_funcs;
375 so let's write it this way */
376 static void Ada_funcs __P((FILE *));
377 static void Asm_labels __P((FILE *));
378 static void C_entries __P((int c_ext, FILE *));
379 static void default_C_entries __P((FILE *));
380 static void plain_C_entries __P((FILE *));
381 static void Cjava_entries __P((FILE *));
382 static void Cobol_paragraphs __P((FILE *));
383 static void Cplusplus_entries __P((FILE *));
384 static void Cstar_entries __P((FILE *));
385 static void Erlang_functions __P((FILE *));
386 static void Forth_words __P((FILE *));
387 static void Fortran_functions __P((FILE *));
388 static void HTML_labels __P((FILE *));
389 static void Lisp_functions __P((FILE *));
390 static void Lua_functions __P((FILE *));
391 static void Makefile_targets __P((FILE *));
392 static void Pascal_functions __P((FILE *));
393 static void Perl_functions __P((FILE *));
394 static void PHP_functions __P((FILE *));
395 static void PS_functions __P((FILE *));
396 static void Prolog_functions __P((FILE *));
397 static void Python_functions __P((FILE *));
398 static void Scheme_functions __P((FILE *));
399 static void TeX_commands __P((FILE *));
400 static void Texinfo_nodes __P((FILE *));
401 static void Yacc_entries __P((FILE *));
402 static void just_read_file __P((FILE *));
404 static void print_language_names __P((void));
405 static void print_version __P((void));
406 static void print_help __P((argument *));
407 int main __P((int, char **));
409 static compressor *get_compressor_from_suffix __P((char *, char **));
410 static language *get_language_from_langname __P((const char *));
411 static language *get_language_from_interpreter __P((char *));
412 static language *get_language_from_filename __P((char *, bool));
413 static void readline __P((linebuffer *, FILE *));
414 static long readline_internal __P((linebuffer *, FILE *));
415 static bool nocase_tail __P((char *));
416 static void get_tag __P((char *, char **));
418 static void analyse_regex __P((char *));
419 static void free_regexps __P((void));
420 static void regex_tag_multiline __P((void));
421 static void error __P((const char *, const char *));
422 static void suggest_asking_for_help __P((void));
423 void fatal __P((char *, char *));
424 static void pfatal __P((char *));
425 static void add_node __P((node *, node **));
427 static void init __P((void));
428 static void process_file_name __P((char *, language *));
429 static void process_file __P((FILE *, char *, language *));
430 static void find_entries __P((FILE *));
431 static void free_tree __P((node *));
432 static void free_fdesc __P((fdesc *));
433 static void pfnote __P((char *, bool, char *, int, int, long));
434 static void make_tag __P((char *, int, bool, char *, int, int, long));
435 static void invalidate_nodes __P((fdesc *, node **));
436 static void put_entries __P((node *));
438 static char *concat __P((char *, char *, char *));
439 static char *skip_spaces __P((char *));
440 static char *skip_non_spaces __P((char *));
441 static char *savenstr __P((char *, int));
442 static char *savestr __P((char *));
443 static char *etags_strchr __P((const char *, int));
444 static char *etags_strrchr __P((const char *, int));
445 static int etags_strcasecmp __P((const char *, const char *));
446 static int etags_strncasecmp __P((const char *, const char *, int));
447 static char *etags_getcwd __P((void));
448 static char *relative_filename __P((char *, char *));
449 static char *absolute_filename __P((char *, char *));
450 static char *absolute_dirname __P((char *, char *));
451 static bool filename_is_absolute __P((char *f));
452 static void canonicalize_filename __P((char *));
453 static void linebuffer_init __P((linebuffer *));
454 static void linebuffer_setlen __P((linebuffer *, int));
455 static PTR xmalloc __P((unsigned int));
456 static PTR xrealloc __P((char *, unsigned int));
459 static char searchar = '/'; /* use /.../ searches */
461 static char *tagfile; /* output file */
462 static char *progname; /* name this program was invoked with */
463 static char *cwd; /* current working directory */
464 static char *tagfiledir; /* directory of tagfile */
465 static FILE *tagf; /* ioptr for tags file */
467 static fdesc *fdhead; /* head of file description list */
468 static fdesc *curfdp; /* current file description */
469 static int lineno; /* line number of current line */
470 static long charno; /* current character number */
471 static long linecharno; /* charno of start of current line */
472 static char *dbp; /* pointer to start of current tag */
474 static const int invalidcharno = -1;
476 static node *nodehead; /* the head of the binary tree of tags */
477 static node *last_node; /* the last node created */
479 static linebuffer lb; /* the current line */
480 static linebuffer filebuf; /* a buffer containing the whole file */
481 static linebuffer token_name; /* a buffer containing a tag name */
483 /* boolean "functions" (see init) */
484 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
485 static char
486 /* white chars */
487 *white = " \f\t\n\r\v",
488 /* not in a name */
489 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
490 /* token ending chars */
491 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
492 /* token starting chars */
493 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
494 /* valid in-token chars */
495 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
497 static bool append_to_tagfile; /* -a: append to tags */
498 /* The next five default to TRUE in C and derived languages. */
499 static bool typedefs; /* -t: create tags for C and Ada typedefs */
500 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
501 /* 0 struct/enum/union decls, and C++ */
502 /* member functions. */
503 static bool constantypedefs; /* -d: create tags for C #define, enum */
504 /* constants and variables. */
505 /* -D: opposite of -d. Default under ctags. */
506 static bool globals; /* create tags for global variables */
507 static bool members; /* create tags for C member variables */
508 static bool declarations; /* --declarations: tag them and extern in C&Co*/
509 static bool no_line_directive; /* ignore #line directives (undocumented) */
510 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
511 static bool update; /* -u: update tags */
512 static bool vgrind_style; /* -v: create vgrind style index output */
513 static bool no_warnings; /* -w: suppress warnings (undocumented) */
514 static bool cxref_style; /* -x: create cxref style output */
515 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
516 static bool ignoreindent; /* -I: ignore indentation in C */
517 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
519 /* STDIN is defined in LynxOS system headers */
520 #ifdef STDIN
521 # undef STDIN
522 #endif
524 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
525 static bool parsing_stdin; /* --parse-stdin used */
527 static regexp *p_head; /* list of all regexps */
528 static bool need_filebuf; /* some regexes are multi-line */
530 static struct option longopts[] =
532 { "append", no_argument, NULL, 'a' },
533 { "packages-only", no_argument, &packages_only, TRUE },
534 { "c++", no_argument, NULL, 'C' },
535 { "declarations", no_argument, &declarations, TRUE },
536 { "no-line-directive", no_argument, &no_line_directive, TRUE },
537 { "no-duplicates", no_argument, &no_duplicates, TRUE },
538 { "help", no_argument, NULL, 'h' },
539 { "help", no_argument, NULL, 'H' },
540 { "ignore-indentation", no_argument, NULL, 'I' },
541 { "language", required_argument, NULL, 'l' },
542 { "members", no_argument, &members, TRUE },
543 { "no-members", no_argument, &members, FALSE },
544 { "output", required_argument, NULL, 'o' },
545 { "regex", required_argument, NULL, 'r' },
546 { "no-regex", no_argument, NULL, 'R' },
547 { "ignore-case-regex", required_argument, NULL, 'c' },
548 { "parse-stdin", required_argument, NULL, STDIN },
549 { "version", no_argument, NULL, 'V' },
551 #if CTAGS /* Ctags options */
552 { "backward-search", no_argument, NULL, 'B' },
553 { "cxref", no_argument, NULL, 'x' },
554 { "defines", no_argument, NULL, 'd' },
555 { "globals", no_argument, &globals, TRUE },
556 { "typedefs", no_argument, NULL, 't' },
557 { "typedefs-and-c++", no_argument, NULL, 'T' },
558 { "update", no_argument, NULL, 'u' },
559 { "vgrind", no_argument, NULL, 'v' },
560 { "no-warn", no_argument, NULL, 'w' },
562 #else /* Etags options */
563 { "no-defines", no_argument, NULL, 'D' },
564 { "no-globals", no_argument, &globals, FALSE },
565 { "include", required_argument, NULL, 'i' },
566 #endif
567 { NULL }
570 static compressor compressors[] =
572 { "z", "gzip -d -c"},
573 { "Z", "gzip -d -c"},
574 { "gz", "gzip -d -c"},
575 { "GZ", "gzip -d -c"},
576 { "bz2", "bzip2 -d -c" },
577 { NULL }
581 * Language stuff.
584 /* Ada code */
585 static char *Ada_suffixes [] =
586 { "ads", "adb", "ada", NULL };
587 static char Ada_help [] =
588 "In Ada code, functions, procedures, packages, tasks and types are\n\
589 tags. Use the `--packages-only' option to create tags for\n\
590 packages only.\n\
591 Ada tag names have suffixes indicating the type of entity:\n\
592 Entity type: Qualifier:\n\
593 ------------ ----------\n\
594 function /f\n\
595 procedure /p\n\
596 package spec /s\n\
597 package body /b\n\
598 type /t\n\
599 task /k\n\
600 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
601 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
602 will just search for any tag `bidule'.";
604 /* Assembly code */
605 static char *Asm_suffixes [] =
606 { "a", /* Unix assembler */
607 "asm", /* Microcontroller assembly */
608 "def", /* BSO/Tasking definition includes */
609 "inc", /* Microcontroller include files */
610 "ins", /* Microcontroller include files */
611 "s", "sa", /* Unix assembler */
612 "S", /* cpp-processed Unix assembler */
613 "src", /* BSO/Tasking C compiler output */
614 NULL
616 static char Asm_help [] =
617 "In assembler code, labels appearing at the beginning of a line,\n\
618 followed by a colon, are tags.";
621 /* Note that .c and .h can be considered C++, if the --c++ flag was
622 given, or if the `class' or `template' keywords are met inside the file.
623 That is why default_C_entries is called for these. */
624 static char *default_C_suffixes [] =
625 { "c", "h", NULL };
626 #if CTAGS /* C help for Ctags */
627 static char default_C_help [] =
628 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
629 Use -T to tag definitions of `struct', `union' and `enum'.\n\
630 Use -d to tag `#define' macro definitions and `enum' constants.\n\
631 Use --globals to tag global variables.\n\
632 You can tag function declarations and external variables by\n\
633 using `--declarations', and struct members by using `--members'.";
634 #else /* C help for Etags */
635 static char default_C_help [] =
636 "In C code, any C function or typedef is a tag, and so are\n\
637 definitions of `struct', `union' and `enum'. `#define' macro\n\
638 definitions and `enum' constants are tags unless you specify\n\
639 `--no-defines'. Global variables are tags unless you specify\n\
640 `--no-globals' and so are struct members unless you specify\n\
641 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
642 `--no-members' can make the tags table file much smaller.\n\
643 You can tag function declarations and external variables by\n\
644 using `--declarations'.";
645 #endif /* C help for Ctags and Etags */
647 static char *Cplusplus_suffixes [] =
648 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
649 "M", /* Objective C++ */
650 "pdb", /* Postscript with C syntax */
651 NULL };
652 static char Cplusplus_help [] =
653 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
654 --help --lang=c --lang=c++ for full help.)\n\
655 In addition to C tags, member functions are also recognized. Member\n\
656 variables are recognized unless you use the `--no-members' option.\n\
657 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
658 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
659 `operator+'.";
661 static char *Cjava_suffixes [] =
662 { "java", NULL };
663 static char Cjava_help [] =
664 "In Java code, all the tags constructs of C and C++ code are\n\
665 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
668 static char *Cobol_suffixes [] =
669 { "COB", "cob", NULL };
670 static char Cobol_help [] =
671 "In Cobol code, tags are paragraph names; that is, any word\n\
672 starting in column 8 and followed by a period.";
674 static char *Cstar_suffixes [] =
675 { "cs", "hs", NULL };
677 static char *Erlang_suffixes [] =
678 { "erl", "hrl", NULL };
679 static char Erlang_help [] =
680 "In Erlang code, the tags are the functions, records and macros\n\
681 defined in the file.";
683 char *Forth_suffixes [] =
684 { "fth", "tok", NULL };
685 static char Forth_help [] =
686 "In Forth code, tags are words defined by `:',\n\
687 constant, code, create, defer, value, variable, buffer:, field.";
689 static char *Fortran_suffixes [] =
690 { "F", "f", "f90", "for", NULL };
691 static char Fortran_help [] =
692 "In Fortran code, functions, subroutines and block data are tags.";
694 static char *HTML_suffixes [] =
695 { "htm", "html", "shtml", NULL };
696 static char HTML_help [] =
697 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
698 `h3' headers. Also, tags are `name=' in anchors and all\n\
699 occurrences of `id='.";
701 static char *Lisp_suffixes [] =
702 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
703 static char Lisp_help [] =
704 "In Lisp code, any function defined with `defun', any variable\n\
705 defined with `defvar' or `defconst', and in general the first\n\
706 argument of any expression that starts with `(def' in column zero\n\
707 is a tag.";
709 static char *Lua_suffixes [] =
710 { "lua", "LUA", NULL };
711 static char Lua_help [] =
712 "In Lua scripts, all functions are tags.";
714 static char *Makefile_filenames [] =
715 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
716 static char Makefile_help [] =
717 "In makefiles, targets are tags; additionally, variables are tags\n\
718 unless you specify `--no-globals'.";
720 static char *Objc_suffixes [] =
721 { "lm", /* Objective lex file */
722 "m", /* Objective C file */
723 NULL };
724 static char Objc_help [] =
725 "In Objective C code, tags include Objective C definitions for classes,\n\
726 class categories, methods and protocols. Tags for variables and\n\
727 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
728 (Use --help --lang=c --lang=objc --lang=java for full help.)";
730 static char *Pascal_suffixes [] =
731 { "p", "pas", NULL };
732 static char Pascal_help [] =
733 "In Pascal code, the tags are the functions and procedures defined\n\
734 in the file.";
735 /* " // this is for working around an Emacs highlighting bug... */
737 static char *Perl_suffixes [] =
738 { "pl", "pm", NULL };
739 static char *Perl_interpreters [] =
740 { "perl", "@PERL@", NULL };
741 static char Perl_help [] =
742 "In Perl code, the tags are the packages, subroutines and variables\n\
743 defined by the `package', `sub', `my' and `local' keywords. Use\n\
744 `--globals' if you want to tag global variables. Tags for\n\
745 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
746 defined in the default package is `main::SUB'.";
748 static char *PHP_suffixes [] =
749 { "php", "php3", "php4", NULL };
750 static char PHP_help [] =
751 "In PHP code, tags are functions, classes and defines. Unless you use\n\
752 the `--no-members' option, vars are tags too.";
754 static char *plain_C_suffixes [] =
755 { "pc", /* Pro*C file */
756 NULL };
758 static char *PS_suffixes [] =
759 { "ps", "psw", NULL }; /* .psw is for PSWrap */
760 static char PS_help [] =
761 "In PostScript code, the tags are the functions.";
763 static char *Prolog_suffixes [] =
764 { "prolog", NULL };
765 static char Prolog_help [] =
766 "In Prolog code, tags are predicates and rules at the beginning of\n\
767 line.";
769 static char *Python_suffixes [] =
770 { "py", NULL };
771 static char Python_help [] =
772 "In Python code, `def' or `class' at the beginning of a line\n\
773 generate a tag.";
775 /* Can't do the `SCM' or `scm' prefix with a version number. */
776 static char *Scheme_suffixes [] =
777 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
778 static char Scheme_help [] =
779 "In Scheme code, tags include anything defined with `def' or with a\n\
780 construct whose name starts with `def'. They also include\n\
781 variables set with `set!' at top level in the file.";
783 static char *TeX_suffixes [] =
784 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
785 static char TeX_help [] =
786 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
787 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
788 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
789 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
790 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
792 Other commands can be specified by setting the environment variable\n\
793 `TEXTAGS' to a colon-separated list like, for example,\n\
794 TEXTAGS=\"mycommand:myothercommand\".";
797 static char *Texinfo_suffixes [] =
798 { "texi", "texinfo", "txi", NULL };
799 static char Texinfo_help [] =
800 "for texinfo files, lines starting with @node are tagged.";
802 static char *Yacc_suffixes [] =
803 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
804 static char Yacc_help [] =
805 "In Bison or Yacc input files, each rule defines as a tag the\n\
806 nonterminal it constructs. The portions of the file that contain\n\
807 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
808 for full help).";
810 static char auto_help [] =
811 "`auto' is not a real language, it indicates to use\n\
812 a default language for files base on file name suffix and file contents.";
814 static char none_help [] =
815 "`none' is not a real language, it indicates to only do\n\
816 regexp processing on files.";
818 static char no_lang_help [] =
819 "No detailed help available for this language.";
823 * Table of languages.
825 * It is ok for a given function to be listed under more than one
826 * name. I just didn't.
829 static language lang_names [] =
831 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
832 { "asm", Asm_help, Asm_labels, Asm_suffixes },
833 { "c", default_C_help, default_C_entries, default_C_suffixes },
834 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
835 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
836 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
837 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
838 { "forth", Forth_help, Forth_words, Forth_suffixes },
839 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
840 { "html", HTML_help, HTML_labels, HTML_suffixes },
841 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
842 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
843 { "lua", Lua_help, Lua_functions, Lua_suffixes },
844 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
845 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
846 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
847 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
848 { "php", PHP_help, PHP_functions, PHP_suffixes },
849 { "postscript",PS_help, PS_functions, PS_suffixes },
850 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
851 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
852 { "python", Python_help, Python_functions, Python_suffixes },
853 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
854 { "tex", TeX_help, TeX_commands, TeX_suffixes },
855 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
856 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
857 { "auto", auto_help }, /* default guessing scheme */
858 { "none", none_help, just_read_file }, /* regexp matching only */
859 { NULL } /* end of list */
863 static void
864 print_language_names ()
866 language *lang;
867 char **name, **ext;
869 puts ("\nThese are the currently supported languages, along with the\n\
870 default file names and dot suffixes:");
871 for (lang = lang_names; lang->name != NULL; lang++)
873 printf (" %-*s", 10, lang->name);
874 if (lang->filenames != NULL)
875 for (name = lang->filenames; *name != NULL; name++)
876 printf (" %s", *name);
877 if (lang->suffixes != NULL)
878 for (ext = lang->suffixes; *ext != NULL; ext++)
879 printf (" .%s", *ext);
880 puts ("");
882 puts ("where `auto' means use default language for files based on file\n\
883 name suffix, and `none' means only do regexp processing on files.\n\
884 If no language is specified and no matching suffix is found,\n\
885 the first line of the file is read for a sharp-bang (#!) sequence\n\
886 followed by the name of an interpreter. If no such sequence is found,\n\
887 Fortran is tried first; if no tags are found, C is tried next.\n\
888 When parsing any C file, a \"class\" or \"template\" keyword\n\
889 switches to C++.");
890 puts ("Compressed files are supported using gzip and bzip2.\n\
892 For detailed help on a given language use, for example,\n\
893 etags --help --lang=ada.");
896 #ifndef EMACS_NAME
897 # define EMACS_NAME "standalone"
898 #endif
899 #ifndef VERSION
900 # define VERSION "17.38"
901 #endif
902 static void
903 print_version ()
905 /* Makes it easier to update automatically. */
906 char emacs_copyright[] = "Copyright (C) 2008 Free Software Foundation, Inc.";
908 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
909 puts (emacs_copyright);
910 puts ("This program is distributed under the terms in ETAGS.README");
912 exit (EXIT_SUCCESS);
915 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
916 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
917 #endif
919 static void
920 print_help (argbuffer)
921 argument *argbuffer;
923 bool help_for_lang = FALSE;
925 for (; argbuffer->arg_type != at_end; argbuffer++)
926 if (argbuffer->arg_type == at_language)
928 if (help_for_lang)
929 puts ("");
930 puts (argbuffer->lang->help);
931 help_for_lang = TRUE;
934 if (help_for_lang)
935 exit (EXIT_SUCCESS);
937 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
939 These are the options accepted by %s.\n", progname, progname);
940 if (NO_LONG_OPTIONS)
941 puts ("WARNING: long option names do not work with this executable,\n\
942 as it is not linked with GNU getopt.");
943 else
944 puts ("You may use unambiguous abbreviations for the long option names.");
945 puts (" A - as file name means read names from stdin (one per line).\n\
946 Absolute names are stored in the output file as they are.\n\
947 Relative ones are stored relative to the output file's directory.\n");
949 puts ("-a, --append\n\
950 Append tag entries to existing tags file.");
952 puts ("--packages-only\n\
953 For Ada files, only generate tags for packages.");
955 if (CTAGS)
956 puts ("-B, --backward-search\n\
957 Write the search commands for the tag entries using '?', the\n\
958 backward-search command instead of '/', the forward-search command.");
960 /* This option is mostly obsolete, because etags can now automatically
961 detect C++. Retained for backward compatibility and for debugging and
962 experimentation. In principle, we could want to tag as C++ even
963 before any "class" or "template" keyword.
964 puts ("-C, --c++\n\
965 Treat files whose name suffix defaults to C language as C++ files.");
968 puts ("--declarations\n\
969 In C and derived languages, create tags for function declarations,");
970 if (CTAGS)
971 puts ("\tand create tags for extern variables if --globals is used.");
972 else
973 puts
974 ("\tand create tags for extern variables unless --no-globals is used.");
976 if (CTAGS)
977 puts ("-d, --defines\n\
978 Create tag entries for C #define constants and enum constants, too.");
979 else
980 puts ("-D, --no-defines\n\
981 Don't create tag entries for C #define constants and enum constants.\n\
982 This makes the tags file smaller.");
984 if (!CTAGS)
985 puts ("-i FILE, --include=FILE\n\
986 Include a note in tag file indicating that, when searching for\n\
987 a tag, one should also consult the tags file FILE after\n\
988 checking the current file.");
990 puts ("-l LANG, --language=LANG\n\
991 Force the following files to be considered as written in the\n\
992 named language up to the next --language=LANG option.");
994 if (CTAGS)
995 puts ("--globals\n\
996 Create tag entries for global variables in some languages.");
997 else
998 puts ("--no-globals\n\
999 Do not create tag entries for global variables in some\n\
1000 languages. This makes the tags file smaller.");
1002 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1003 puts ("--no-line-directive\n\
1004 Ignore #line preprocessor directives in C and derived languages.");
1006 if (CTAGS)
1007 puts ("--members\n\
1008 Create tag entries for members of structures in some languages.");
1009 else
1010 puts ("--no-members\n\
1011 Do not create tag entries for members of structures\n\
1012 in some languages.");
1014 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1015 Make a tag for each line matching a regular expression pattern\n\
1016 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1017 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1018 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1019 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1020 puts (" If TAGNAME/ is present, the tags created are named.\n\
1021 For example Tcl named tags can be created with:\n\
1022 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1023 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1024 `m' means to allow multi-line matches, `s' implies `m' and\n\
1025 causes dot to match any character, including newline.");
1027 puts ("-R, --no-regex\n\
1028 Don't create tags from regexps for the following files.");
1030 puts ("-I, --ignore-indentation\n\
1031 In C and C++ do not assume that a closing brace in the first\n\
1032 column is the final brace of a function or structure definition.");
1034 puts ("-o FILE, --output=FILE\n\
1035 Write the tags to FILE.");
1037 puts ("--parse-stdin=NAME\n\
1038 Read from standard input and record tags as belonging to file NAME.");
1040 if (CTAGS)
1042 puts ("-t, --typedefs\n\
1043 Generate tag entries for C and Ada typedefs.");
1044 puts ("-T, --typedefs-and-c++\n\
1045 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1046 and C++ member functions.");
1049 if (CTAGS)
1050 puts ("-u, --update\n\
1051 Update the tag entries for the given files, leaving tag\n\
1052 entries for other files in place. Currently, this is\n\
1053 implemented by deleting the existing entries for the given\n\
1054 files and then rewriting the new entries at the end of the\n\
1055 tags file. It is often faster to simply rebuild the entire\n\
1056 tag file than to use this.");
1058 if (CTAGS)
1060 puts ("-v, --vgrind\n\
1061 Print on the standard output an index of items intended for\n\
1062 human consumption, similar to the output of vgrind. The index\n\
1063 is sorted, and gives the page number of each item.");
1065 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1066 puts ("-w, --no-duplicates\n\
1067 Do not create duplicate tag entries, for compatibility with\n\
1068 traditional ctags.");
1070 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1071 puts ("-w, --no-warn\n\
1072 Suppress warning messages about duplicate tag entries.");
1074 puts ("-x, --cxref\n\
1075 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1076 The output uses line numbers instead of page numbers, but\n\
1077 beyond that the differences are cosmetic; try both to see\n\
1078 which you like.");
1081 puts ("-V, --version\n\
1082 Print the version of the program.\n\
1083 -h, --help\n\
1084 Print this help message.\n\
1085 Followed by one or more `--language' options prints detailed\n\
1086 help about tag generation for the specified languages.");
1088 print_language_names ();
1090 puts ("");
1091 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1093 exit (EXIT_SUCCESS);
1097 #ifdef VMS /* VMS specific functions */
1099 #define EOS '\0'
1101 /* This is a BUG! ANY arbitrary limit is a BUG!
1102 Won't someone please fix this? */
1103 #define MAX_FILE_SPEC_LEN 255
1104 typedef struct {
1105 short curlen;
1106 char body[MAX_FILE_SPEC_LEN + 1];
1107 } vspec;
1110 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1111 returning in each successive call the next file name matching the input
1112 spec. The function expects that each in_spec passed
1113 to it will be processed to completion; in particular, up to and
1114 including the call following that in which the last matching name
1115 is returned, the function ignores the value of in_spec, and will
1116 only start processing a new spec with the following call.
1117 If an error occurs, on return out_spec contains the value
1118 of in_spec when the error occurred.
1120 With each successive file name returned in out_spec, the
1121 function's return value is one. When there are no more matching
1122 names the function returns zero. If on the first call no file
1123 matches in_spec, or there is any other error, -1 is returned.
1126 #include <rmsdef.h>
1127 #include <descrip.h>
1128 #define OUTSIZE MAX_FILE_SPEC_LEN
1129 static short
1130 fn_exp (out, in)
1131 vspec *out;
1132 char *in;
1134 static long context = 0;
1135 static struct dsc$descriptor_s o;
1136 static struct dsc$descriptor_s i;
1137 static bool pass1 = TRUE;
1138 long status;
1139 short retval;
1141 if (pass1)
1143 pass1 = FALSE;
1144 o.dsc$a_pointer = (char *) out;
1145 o.dsc$w_length = (short)OUTSIZE;
1146 i.dsc$a_pointer = in;
1147 i.dsc$w_length = (short)strlen(in);
1148 i.dsc$b_dtype = DSC$K_DTYPE_T;
1149 i.dsc$b_class = DSC$K_CLASS_S;
1150 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1151 o.dsc$b_class = DSC$K_CLASS_VS;
1153 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1155 out->body[out->curlen] = EOS;
1156 return 1;
1158 else if (status == RMS$_NMF)
1159 retval = 0;
1160 else
1162 strcpy(out->body, in);
1163 retval = -1;
1165 lib$find_file_end(&context);
1166 pass1 = TRUE;
1167 return retval;
1171 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1172 name of each file specified by the provided arg expanding wildcards.
1174 static char *
1175 gfnames (arg, p_error)
1176 char *arg;
1177 bool *p_error;
1179 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1181 switch (fn_exp (&filename, arg))
1183 case 1:
1184 *p_error = FALSE;
1185 return filename.body;
1186 case 0:
1187 *p_error = FALSE;
1188 return NULL;
1189 default:
1190 *p_error = TRUE;
1191 return filename.body;
1195 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1196 system (cmd)
1197 char *cmd;
1199 error ("%s", "system() function not implemented under VMS");
1201 #endif
1203 #define VERSION_DELIM ';'
1204 char *massage_name (s)
1205 char *s;
1207 char *start = s;
1209 for ( ; *s; s++)
1210 if (*s == VERSION_DELIM)
1212 *s = EOS;
1213 break;
1215 else
1216 *s = lowcase (*s);
1217 return start;
1219 #endif /* VMS */
1223 main (argc, argv)
1224 int argc;
1225 char *argv[];
1227 int i;
1228 unsigned int nincluded_files;
1229 char **included_files;
1230 argument *argbuffer;
1231 int current_arg, file_count;
1232 linebuffer filename_lb;
1233 bool help_asked = FALSE;
1234 #ifdef VMS
1235 bool got_err;
1236 #endif
1237 char *optstring;
1238 int opt;
1241 #ifdef DOS_NT
1242 _fmode = O_BINARY; /* all of files are treated as binary files */
1243 #endif /* DOS_NT */
1245 progname = argv[0];
1246 nincluded_files = 0;
1247 included_files = xnew (argc, char *);
1248 current_arg = 0;
1249 file_count = 0;
1251 /* Allocate enough no matter what happens. Overkill, but each one
1252 is small. */
1253 argbuffer = xnew (argc, argument);
1256 * Always find typedefs and structure tags.
1257 * Also default to find macro constants, enum constants, struct
1258 * members and global variables. Do it for both etags and ctags.
1260 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1261 globals = members = TRUE;
1263 /* When the optstring begins with a '-' getopt_long does not rearrange the
1264 non-options arguments to be at the end, but leaves them alone. */
1265 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1266 "ac:Cf:Il:o:r:RSVhH",
1267 (CTAGS) ? "BxdtTuvw" : "Di:");
1269 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1270 switch (opt)
1272 case 0:
1273 /* If getopt returns 0, then it has already processed a
1274 long-named option. We should do nothing. */
1275 break;
1277 case 1:
1278 /* This means that a file name has been seen. Record it. */
1279 argbuffer[current_arg].arg_type = at_filename;
1280 argbuffer[current_arg].what = optarg;
1281 ++current_arg;
1282 ++file_count;
1283 break;
1285 case STDIN:
1286 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1287 argbuffer[current_arg].arg_type = at_stdin;
1288 argbuffer[current_arg].what = optarg;
1289 ++current_arg;
1290 ++file_count;
1291 if (parsing_stdin)
1292 fatal ("cannot parse standard input more than once", (char *)NULL);
1293 parsing_stdin = TRUE;
1294 break;
1296 /* Common options. */
1297 case 'a': append_to_tagfile = TRUE; break;
1298 case 'C': cplusplus = TRUE; break;
1299 case 'f': /* for compatibility with old makefiles */
1300 case 'o':
1301 if (tagfile)
1303 error ("-o option may only be given once.", (char *)NULL);
1304 suggest_asking_for_help ();
1305 /* NOTREACHED */
1307 tagfile = optarg;
1308 break;
1309 case 'I':
1310 case 'S': /* for backward compatibility */
1311 ignoreindent = TRUE;
1312 break;
1313 case 'l':
1315 language *lang = get_language_from_langname (optarg);
1316 if (lang != NULL)
1318 argbuffer[current_arg].lang = lang;
1319 argbuffer[current_arg].arg_type = at_language;
1320 ++current_arg;
1323 break;
1324 case 'c':
1325 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1326 optarg = concat (optarg, "i", ""); /* memory leak here */
1327 /* FALLTHRU */
1328 case 'r':
1329 argbuffer[current_arg].arg_type = at_regexp;
1330 argbuffer[current_arg].what = optarg;
1331 ++current_arg;
1332 break;
1333 case 'R':
1334 argbuffer[current_arg].arg_type = at_regexp;
1335 argbuffer[current_arg].what = NULL;
1336 ++current_arg;
1337 break;
1338 case 'V':
1339 print_version ();
1340 break;
1341 case 'h':
1342 case 'H':
1343 help_asked = TRUE;
1344 break;
1346 /* Etags options */
1347 case 'D': constantypedefs = FALSE; break;
1348 case 'i': included_files[nincluded_files++] = optarg; break;
1350 /* Ctags options. */
1351 case 'B': searchar = '?'; break;
1352 case 'd': constantypedefs = TRUE; break;
1353 case 't': typedefs = TRUE; break;
1354 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1355 case 'u': update = TRUE; break;
1356 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1357 case 'x': cxref_style = TRUE; break;
1358 case 'w': no_warnings = TRUE; break;
1359 default:
1360 suggest_asking_for_help ();
1361 /* NOTREACHED */
1364 /* No more options. Store the rest of arguments. */
1365 for (; optind < argc; optind++)
1367 argbuffer[current_arg].arg_type = at_filename;
1368 argbuffer[current_arg].what = argv[optind];
1369 ++current_arg;
1370 ++file_count;
1373 argbuffer[current_arg].arg_type = at_end;
1375 if (help_asked)
1376 print_help (argbuffer);
1377 /* NOTREACHED */
1379 if (nincluded_files == 0 && file_count == 0)
1381 error ("no input files specified.", (char *)NULL);
1382 suggest_asking_for_help ();
1383 /* NOTREACHED */
1386 if (tagfile == NULL)
1387 tagfile = CTAGS ? "tags" : "TAGS";
1388 cwd = etags_getcwd (); /* the current working directory */
1389 if (cwd[strlen (cwd) - 1] != '/')
1391 char *oldcwd = cwd;
1392 cwd = concat (oldcwd, "/", "");
1393 free (oldcwd);
1395 /* Relative file names are made relative to the current directory. */
1396 if (streq (tagfile, "-")
1397 || strneq (tagfile, "/dev/", 5))
1398 tagfiledir = cwd;
1399 else
1400 tagfiledir = absolute_dirname (tagfile, cwd);
1402 init (); /* set up boolean "functions" */
1404 linebuffer_init (&lb);
1405 linebuffer_init (&filename_lb);
1406 linebuffer_init (&filebuf);
1407 linebuffer_init (&token_name);
1409 if (!CTAGS)
1411 if (streq (tagfile, "-"))
1413 tagf = stdout;
1414 #ifdef DOS_NT
1415 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1416 doesn't take effect until after `stdout' is already open). */
1417 if (!isatty (fileno (stdout)))
1418 setmode (fileno (stdout), O_BINARY);
1419 #endif /* DOS_NT */
1421 else
1422 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1423 if (tagf == NULL)
1424 pfatal (tagfile);
1428 * Loop through files finding functions.
1430 for (i = 0; i < current_arg; i++)
1432 static language *lang; /* non-NULL if language is forced */
1433 char *this_file;
1435 switch (argbuffer[i].arg_type)
1437 case at_language:
1438 lang = argbuffer[i].lang;
1439 break;
1440 case at_regexp:
1441 analyse_regex (argbuffer[i].what);
1442 break;
1443 case at_filename:
1444 #ifdef VMS
1445 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1447 if (got_err)
1449 error ("can't find file %s\n", this_file);
1450 argc--, argv++;
1452 else
1454 this_file = massage_name (this_file);
1456 #else
1457 this_file = argbuffer[i].what;
1458 #endif
1459 /* Input file named "-" means read file names from stdin
1460 (one per line) and use them. */
1461 if (streq (this_file, "-"))
1463 if (parsing_stdin)
1464 fatal ("cannot parse standard input AND read file names from it",
1465 (char *)NULL);
1466 while (readline_internal (&filename_lb, stdin) > 0)
1467 process_file_name (filename_lb.buffer, lang);
1469 else
1470 process_file_name (this_file, lang);
1471 #ifdef VMS
1473 #endif
1474 break;
1475 case at_stdin:
1476 this_file = argbuffer[i].what;
1477 process_file (stdin, this_file, lang);
1478 break;
1482 free_regexps ();
1483 free (lb.buffer);
1484 free (filebuf.buffer);
1485 free (token_name.buffer);
1487 if (!CTAGS || cxref_style)
1489 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1490 put_entries (nodehead);
1491 free_tree (nodehead);
1492 nodehead = NULL;
1493 if (!CTAGS)
1495 fdesc *fdp;
1497 /* Output file entries that have no tags. */
1498 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1499 if (!fdp->written)
1500 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1502 while (nincluded_files-- > 0)
1503 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1505 if (fclose (tagf) == EOF)
1506 pfatal (tagfile);
1509 exit (EXIT_SUCCESS);
1512 /* From here on, we are in (CTAGS && !cxref_style) */
1513 if (update)
1515 char cmd[BUFSIZ];
1516 for (i = 0; i < current_arg; ++i)
1518 switch (argbuffer[i].arg_type)
1520 case at_filename:
1521 case at_stdin:
1522 break;
1523 default:
1524 continue; /* the for loop */
1526 sprintf (cmd,
1527 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1528 tagfile, argbuffer[i].what, tagfile);
1529 if (system (cmd) != EXIT_SUCCESS)
1530 fatal ("failed to execute shell command", (char *)NULL);
1532 append_to_tagfile = TRUE;
1535 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1536 if (tagf == NULL)
1537 pfatal (tagfile);
1538 put_entries (nodehead); /* write all the tags (CTAGS) */
1539 free_tree (nodehead);
1540 nodehead = NULL;
1541 if (fclose (tagf) == EOF)
1542 pfatal (tagfile);
1544 if (CTAGS)
1545 if (append_to_tagfile || update)
1547 char cmd[2*BUFSIZ+20];
1548 /* Maybe these should be used:
1549 setenv ("LC_COLLATE", "C", 1);
1550 setenv ("LC_ALL", "C", 1); */
1551 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1552 exit (system (cmd));
1554 return EXIT_SUCCESS;
1559 * Return a compressor given the file name. If EXTPTR is non-zero,
1560 * return a pointer into FILE where the compressor-specific
1561 * extension begins. If no compressor is found, NULL is returned
1562 * and EXTPTR is not significant.
1563 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1565 static compressor *
1566 get_compressor_from_suffix (file, extptr)
1567 char *file;
1568 char **extptr;
1570 compressor *compr;
1571 char *slash, *suffix;
1573 /* This relies on FN to be after canonicalize_filename,
1574 so we don't need to consider backslashes on DOS_NT. */
1575 slash = etags_strrchr (file, '/');
1576 suffix = etags_strrchr (file, '.');
1577 if (suffix == NULL || suffix < slash)
1578 return NULL;
1579 if (extptr != NULL)
1580 *extptr = suffix;
1581 suffix += 1;
1582 /* Let those poor souls who live with DOS 8+3 file name limits get
1583 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1584 Only the first do loop is run if not MSDOS */
1587 for (compr = compressors; compr->suffix != NULL; compr++)
1588 if (streq (compr->suffix, suffix))
1589 return compr;
1590 if (!MSDOS)
1591 break; /* do it only once: not really a loop */
1592 if (extptr != NULL)
1593 *extptr = ++suffix;
1594 } while (*suffix != '\0');
1595 return NULL;
1601 * Return a language given the name.
1603 static language *
1604 get_language_from_langname (name)
1605 const char *name;
1607 language *lang;
1609 if (name == NULL)
1610 error ("empty language name", (char *)NULL);
1611 else
1613 for (lang = lang_names; lang->name != NULL; lang++)
1614 if (streq (name, lang->name))
1615 return lang;
1616 error ("unknown language \"%s\"", name);
1619 return NULL;
1624 * Return a language given the interpreter name.
1626 static language *
1627 get_language_from_interpreter (interpreter)
1628 char *interpreter;
1630 language *lang;
1631 char **iname;
1633 if (interpreter == NULL)
1634 return NULL;
1635 for (lang = lang_names; lang->name != NULL; lang++)
1636 if (lang->interpreters != NULL)
1637 for (iname = lang->interpreters; *iname != NULL; iname++)
1638 if (streq (*iname, interpreter))
1639 return lang;
1641 return NULL;
1647 * Return a language given the file name.
1649 static language *
1650 get_language_from_filename (file, case_sensitive)
1651 char *file;
1652 bool case_sensitive;
1654 language *lang;
1655 char **name, **ext, *suffix;
1657 /* Try whole file name first. */
1658 for (lang = lang_names; lang->name != NULL; lang++)
1659 if (lang->filenames != NULL)
1660 for (name = lang->filenames; *name != NULL; name++)
1661 if ((case_sensitive)
1662 ? streq (*name, file)
1663 : strcaseeq (*name, file))
1664 return lang;
1666 /* If not found, try suffix after last dot. */
1667 suffix = etags_strrchr (file, '.');
1668 if (suffix == NULL)
1669 return NULL;
1670 suffix += 1;
1671 for (lang = lang_names; lang->name != NULL; lang++)
1672 if (lang->suffixes != NULL)
1673 for (ext = lang->suffixes; *ext != NULL; ext++)
1674 if ((case_sensitive)
1675 ? streq (*ext, suffix)
1676 : strcaseeq (*ext, suffix))
1677 return lang;
1678 return NULL;
1683 * This routine is called on each file argument.
1685 static void
1686 process_file_name (file, lang)
1687 char *file;
1688 language *lang;
1690 struct stat stat_buf;
1691 FILE *inf;
1692 fdesc *fdp;
1693 compressor *compr;
1694 char *compressed_name, *uncompressed_name;
1695 char *ext, *real_name;
1696 int retval;
1698 canonicalize_filename (file);
1699 if (streq (file, tagfile) && !streq (tagfile, "-"))
1701 error ("skipping inclusion of %s in self.", file);
1702 return;
1704 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1706 compressed_name = NULL;
1707 real_name = uncompressed_name = savestr (file);
1709 else
1711 real_name = compressed_name = savestr (file);
1712 uncompressed_name = savenstr (file, ext - file);
1715 /* If the canonicalized uncompressed name
1716 has already been dealt with, skip it silently. */
1717 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1719 assert (fdp->infname != NULL);
1720 if (streq (uncompressed_name, fdp->infname))
1721 goto cleanup;
1724 if (stat (real_name, &stat_buf) != 0)
1726 /* Reset real_name and try with a different name. */
1727 real_name = NULL;
1728 if (compressed_name != NULL) /* try with the given suffix */
1730 if (stat (uncompressed_name, &stat_buf) == 0)
1731 real_name = uncompressed_name;
1733 else /* try all possible suffixes */
1735 for (compr = compressors; compr->suffix != NULL; compr++)
1737 compressed_name = concat (file, ".", compr->suffix);
1738 if (stat (compressed_name, &stat_buf) != 0)
1740 if (MSDOS)
1742 char *suf = compressed_name + strlen (file);
1743 size_t suflen = strlen (compr->suffix) + 1;
1744 for ( ; suf[1]; suf++, suflen--)
1746 memmove (suf, suf + 1, suflen);
1747 if (stat (compressed_name, &stat_buf) == 0)
1749 real_name = compressed_name;
1750 break;
1753 if (real_name != NULL)
1754 break;
1755 } /* MSDOS */
1756 free (compressed_name);
1757 compressed_name = NULL;
1759 else
1761 real_name = compressed_name;
1762 break;
1766 if (real_name == NULL)
1768 perror (file);
1769 goto cleanup;
1771 } /* try with a different name */
1773 if (!S_ISREG (stat_buf.st_mode))
1775 error ("skipping %s: it is not a regular file.", real_name);
1776 goto cleanup;
1778 if (real_name == compressed_name)
1780 char *cmd = concat (compr->command, " ", real_name);
1781 inf = (FILE *) popen (cmd, "r");
1782 free (cmd);
1784 else
1785 inf = fopen (real_name, "r");
1786 if (inf == NULL)
1788 perror (real_name);
1789 goto cleanup;
1792 process_file (inf, uncompressed_name, lang);
1794 if (real_name == compressed_name)
1795 retval = pclose (inf);
1796 else
1797 retval = fclose (inf);
1798 if (retval < 0)
1799 pfatal (file);
1801 cleanup:
1802 free (compressed_name);
1803 free (uncompressed_name);
1804 last_node = NULL;
1805 curfdp = NULL;
1806 return;
1809 static void
1810 process_file (fh, fn, lang)
1811 FILE *fh;
1812 char *fn;
1813 language *lang;
1815 static const fdesc emptyfdesc;
1816 fdesc *fdp;
1818 /* Create a new input file description entry. */
1819 fdp = xnew (1, fdesc);
1820 *fdp = emptyfdesc;
1821 fdp->next = fdhead;
1822 fdp->infname = savestr (fn);
1823 fdp->lang = lang;
1824 fdp->infabsname = absolute_filename (fn, cwd);
1825 fdp->infabsdir = absolute_dirname (fn, cwd);
1826 if (filename_is_absolute (fn))
1828 /* An absolute file name. Canonicalize it. */
1829 fdp->taggedfname = absolute_filename (fn, NULL);
1831 else
1833 /* A file name relative to cwd. Make it relative
1834 to the directory of the tags file. */
1835 fdp->taggedfname = relative_filename (fn, tagfiledir);
1837 fdp->usecharno = TRUE; /* use char position when making tags */
1838 fdp->prop = NULL;
1839 fdp->written = FALSE; /* not written on tags file yet */
1841 fdhead = fdp;
1842 curfdp = fdhead; /* the current file description */
1844 find_entries (fh);
1846 /* If not Ctags, and if this is not metasource and if it contained no #line
1847 directives, we can write the tags and free all nodes pointing to
1848 curfdp. */
1849 if (!CTAGS
1850 && curfdp->usecharno /* no #line directives in this file */
1851 && !curfdp->lang->metasource)
1853 node *np, *prev;
1855 /* Look for the head of the sublist relative to this file. See add_node
1856 for the structure of the node tree. */
1857 prev = NULL;
1858 for (np = nodehead; np != NULL; prev = np, np = np->left)
1859 if (np->fdp == curfdp)
1860 break;
1862 /* If we generated tags for this file, write and delete them. */
1863 if (np != NULL)
1865 /* This is the head of the last sublist, if any. The following
1866 instructions depend on this being true. */
1867 assert (np->left == NULL);
1869 assert (fdhead == curfdp);
1870 assert (last_node->fdp == curfdp);
1871 put_entries (np); /* write tags for file curfdp->taggedfname */
1872 free_tree (np); /* remove the written nodes */
1873 if (prev == NULL)
1874 nodehead = NULL; /* no nodes left */
1875 else
1876 prev->left = NULL; /* delete the pointer to the sublist */
1882 * This routine sets up the boolean pseudo-functions which work
1883 * by setting boolean flags dependent upon the corresponding character.
1884 * Every char which is NOT in that string is not a white char. Therefore,
1885 * all of the array "_wht" is set to FALSE, and then the elements
1886 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1887 * of a char is TRUE if it is the string "white", else FALSE.
1889 static void
1890 init ()
1892 register char *sp;
1893 register int i;
1895 for (i = 0; i < CHARS; i++)
1896 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1897 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1898 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1899 notinname('\0') = notinname('\n');
1900 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1901 begtoken('\0') = begtoken('\n');
1902 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1903 intoken('\0') = intoken('\n');
1904 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1905 endtoken('\0') = endtoken('\n');
1909 * This routine opens the specified file and calls the function
1910 * which finds the function and type definitions.
1912 static void
1913 find_entries (inf)
1914 FILE *inf;
1916 char *cp;
1917 language *lang = curfdp->lang;
1918 Lang_function *parser = NULL;
1920 /* If user specified a language, use it. */
1921 if (lang != NULL && lang->function != NULL)
1923 parser = lang->function;
1926 /* Else try to guess the language given the file name. */
1927 if (parser == NULL)
1929 lang = get_language_from_filename (curfdp->infname, TRUE);
1930 if (lang != NULL && lang->function != NULL)
1932 curfdp->lang = lang;
1933 parser = lang->function;
1937 /* Else look for sharp-bang as the first two characters. */
1938 if (parser == NULL
1939 && readline_internal (&lb, inf) > 0
1940 && lb.len >= 2
1941 && lb.buffer[0] == '#'
1942 && lb.buffer[1] == '!')
1944 char *lp;
1946 /* Set lp to point at the first char after the last slash in the
1947 line or, if no slashes, at the first nonblank. Then set cp to
1948 the first successive blank and terminate the string. */
1949 lp = etags_strrchr (lb.buffer+2, '/');
1950 if (lp != NULL)
1951 lp += 1;
1952 else
1953 lp = skip_spaces (lb.buffer + 2);
1954 cp = skip_non_spaces (lp);
1955 *cp = '\0';
1957 if (strlen (lp) > 0)
1959 lang = get_language_from_interpreter (lp);
1960 if (lang != NULL && lang->function != NULL)
1962 curfdp->lang = lang;
1963 parser = lang->function;
1968 /* We rewind here, even if inf may be a pipe. We fail if the
1969 length of the first line is longer than the pipe block size,
1970 which is unlikely. */
1971 rewind (inf);
1973 /* Else try to guess the language given the case insensitive file name. */
1974 if (parser == NULL)
1976 lang = get_language_from_filename (curfdp->infname, FALSE);
1977 if (lang != NULL && lang->function != NULL)
1979 curfdp->lang = lang;
1980 parser = lang->function;
1984 /* Else try Fortran or C. */
1985 if (parser == NULL)
1987 node *old_last_node = last_node;
1989 curfdp->lang = get_language_from_langname ("fortran");
1990 find_entries (inf);
1992 if (old_last_node == last_node)
1993 /* No Fortran entries found. Try C. */
1995 /* We do not tag if rewind fails.
1996 Only the file name will be recorded in the tags file. */
1997 rewind (inf);
1998 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1999 find_entries (inf);
2001 return;
2004 if (!no_line_directive
2005 && curfdp->lang != NULL && curfdp->lang->metasource)
2006 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
2007 file, or anyway we parsed a file that is automatically generated from
2008 this one. If this is the case, the bingo.c file contained #line
2009 directives that generated tags pointing to this file. Let's delete
2010 them all before parsing this file, which is the real source. */
2012 fdesc **fdpp = &fdhead;
2013 while (*fdpp != NULL)
2014 if (*fdpp != curfdp
2015 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
2016 /* We found one of those! We must delete both the file description
2017 and all tags referring to it. */
2019 fdesc *badfdp = *fdpp;
2021 /* Delete the tags referring to badfdp->taggedfname
2022 that were obtained from badfdp->infname. */
2023 invalidate_nodes (badfdp, &nodehead);
2025 *fdpp = badfdp->next; /* remove the bad description from the list */
2026 free_fdesc (badfdp);
2028 else
2029 fdpp = &(*fdpp)->next; /* advance the list pointer */
2032 assert (parser != NULL);
2034 /* Generic initialisations before reading from file. */
2035 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
2037 /* Generic initialisations before parsing file with readline. */
2038 lineno = 0; /* reset global line number */
2039 charno = 0; /* reset global char number */
2040 linecharno = 0; /* reset global char number of line start */
2042 parser (inf);
2044 regex_tag_multiline ();
2049 * Check whether an implicitly named tag should be created,
2050 * then call `pfnote'.
2051 * NAME is a string that is internally copied by this function.
2053 * TAGS format specification
2054 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2055 * The following is explained in some more detail in etc/ETAGS.EBNF.
2057 * make_tag creates tags with "implicit tag names" (unnamed tags)
2058 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2059 * 1. NAME does not contain any of the characters in NONAM;
2060 * 2. LINESTART contains name as either a rightmost, or rightmost but
2061 * one character, substring;
2062 * 3. the character, if any, immediately before NAME in LINESTART must
2063 * be a character in NONAM;
2064 * 4. the character, if any, immediately after NAME in LINESTART must
2065 * also be a character in NONAM.
2067 * The implementation uses the notinname() macro, which recognises the
2068 * characters stored in the string `nonam'.
2069 * etags.el needs to use the same characters that are in NONAM.
2071 static void
2072 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2073 char *name; /* tag name, or NULL if unnamed */
2074 int namelen; /* tag length */
2075 bool is_func; /* tag is a function */
2076 char *linestart; /* start of the line where tag is */
2077 int linelen; /* length of the line where tag is */
2078 int lno; /* line number */
2079 long cno; /* character number */
2081 bool named = (name != NULL && namelen > 0);
2083 if (!CTAGS && named) /* maybe set named to false */
2084 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2085 such that etags.el can guess a name from it. */
2087 int i;
2088 register char *cp = name;
2090 for (i = 0; i < namelen; i++)
2091 if (notinname (*cp++))
2092 break;
2093 if (i == namelen) /* rule #1 */
2095 cp = linestart + linelen - namelen;
2096 if (notinname (linestart[linelen-1]))
2097 cp -= 1; /* rule #4 */
2098 if (cp >= linestart /* rule #2 */
2099 && (cp == linestart
2100 || notinname (cp[-1])) /* rule #3 */
2101 && strneq (name, cp, namelen)) /* rule #2 */
2102 named = FALSE; /* use implicit tag name */
2106 if (named)
2107 name = savenstr (name, namelen);
2108 else
2109 name = NULL;
2110 pfnote (name, is_func, linestart, linelen, lno, cno);
2113 /* Record a tag. */
2114 static void
2115 pfnote (name, is_func, linestart, linelen, lno, cno)
2116 char *name; /* tag name, or NULL if unnamed */
2117 bool is_func; /* tag is a function */
2118 char *linestart; /* start of the line where tag is */
2119 int linelen; /* length of the line where tag is */
2120 int lno; /* line number */
2121 long cno; /* character number */
2123 register node *np;
2125 assert (name == NULL || name[0] != '\0');
2126 if (CTAGS && name == NULL)
2127 return;
2129 np = xnew (1, node);
2131 /* If ctags mode, change name "main" to M<thisfilename>. */
2132 if (CTAGS && !cxref_style && streq (name, "main"))
2134 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2135 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2136 fp = etags_strrchr (np->name, '.');
2137 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2138 fp[0] = '\0';
2140 else
2141 np->name = name;
2142 np->valid = TRUE;
2143 np->been_warned = FALSE;
2144 np->fdp = curfdp;
2145 np->is_func = is_func;
2146 np->lno = lno;
2147 if (np->fdp->usecharno)
2148 /* Our char numbers are 0-base, because of C language tradition?
2149 ctags compatibility? old versions compatibility? I don't know.
2150 Anyway, since emacs's are 1-base we expect etags.el to take care
2151 of the difference. If we wanted to have 1-based numbers, we would
2152 uncomment the +1 below. */
2153 np->cno = cno /* + 1 */ ;
2154 else
2155 np->cno = invalidcharno;
2156 np->left = np->right = NULL;
2157 if (CTAGS && !cxref_style)
2159 if (strlen (linestart) < 50)
2160 np->regex = concat (linestart, "$", "");
2161 else
2162 np->regex = savenstr (linestart, 50);
2164 else
2165 np->regex = savenstr (linestart, linelen);
2167 add_node (np, &nodehead);
2171 * free_tree ()
2172 * recurse on left children, iterate on right children.
2174 static void
2175 free_tree (np)
2176 register node *np;
2178 while (np)
2180 register node *node_right = np->right;
2181 free_tree (np->left);
2182 free (np->name);
2183 free (np->regex);
2184 free (np);
2185 np = node_right;
2190 * free_fdesc ()
2191 * delete a file description
2193 static void
2194 free_fdesc (fdp)
2195 register fdesc *fdp;
2197 free (fdp->infname);
2198 free (fdp->infabsname);
2199 free (fdp->infabsdir);
2200 free (fdp->taggedfname);
2201 free (fdp->prop);
2202 free (fdp);
2206 * add_node ()
2207 * Adds a node to the tree of nodes. In etags mode, sort by file
2208 * name. In ctags mode, sort by tag name. Make no attempt at
2209 * balancing.
2211 * add_node is the only function allowed to add nodes, so it can
2212 * maintain state.
2214 static void
2215 add_node (np, cur_node_p)
2216 node *np, **cur_node_p;
2218 register int dif;
2219 register node *cur_node = *cur_node_p;
2221 if (cur_node == NULL)
2223 *cur_node_p = np;
2224 last_node = np;
2225 return;
2228 if (!CTAGS)
2229 /* Etags Mode */
2231 /* For each file name, tags are in a linked sublist on the right
2232 pointer. The first tags of different files are a linked list
2233 on the left pointer. last_node points to the end of the last
2234 used sublist. */
2235 if (last_node != NULL && last_node->fdp == np->fdp)
2237 /* Let's use the same sublist as the last added node. */
2238 assert (last_node->right == NULL);
2239 last_node->right = np;
2240 last_node = np;
2242 else if (cur_node->fdp == np->fdp)
2244 /* Scanning the list we found the head of a sublist which is
2245 good for us. Let's scan this sublist. */
2246 add_node (np, &cur_node->right);
2248 else
2249 /* The head of this sublist is not good for us. Let's try the
2250 next one. */
2251 add_node (np, &cur_node->left);
2252 } /* if ETAGS mode */
2254 else
2256 /* Ctags Mode */
2257 dif = strcmp (np->name, cur_node->name);
2260 * If this tag name matches an existing one, then
2261 * do not add the node, but maybe print a warning.
2263 if (no_duplicates && !dif)
2265 if (np->fdp == cur_node->fdp)
2267 if (!no_warnings)
2269 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2270 np->fdp->infname, lineno, np->name);
2271 fprintf (stderr, "Second entry ignored\n");
2274 else if (!cur_node->been_warned && !no_warnings)
2276 fprintf
2277 (stderr,
2278 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2279 np->fdp->infname, cur_node->fdp->infname, np->name);
2280 cur_node->been_warned = TRUE;
2282 return;
2285 /* Actually add the node */
2286 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2287 } /* if CTAGS mode */
2291 * invalidate_nodes ()
2292 * Scan the node tree and invalidate all nodes pointing to the
2293 * given file description (CTAGS case) or free them (ETAGS case).
2295 static void
2296 invalidate_nodes (badfdp, npp)
2297 fdesc *badfdp;
2298 node **npp;
2300 node *np = *npp;
2302 if (np == NULL)
2303 return;
2305 if (CTAGS)
2307 if (np->left != NULL)
2308 invalidate_nodes (badfdp, &np->left);
2309 if (np->fdp == badfdp)
2310 np->valid = FALSE;
2311 if (np->right != NULL)
2312 invalidate_nodes (badfdp, &np->right);
2314 else
2316 assert (np->fdp != NULL);
2317 if (np->fdp == badfdp)
2319 *npp = np->left; /* detach the sublist from the list */
2320 np->left = NULL; /* isolate it */
2321 free_tree (np); /* free it */
2322 invalidate_nodes (badfdp, npp);
2324 else
2325 invalidate_nodes (badfdp, &np->left);
2330 static int total_size_of_entries __P((node *));
2331 static int number_len __P((long));
2333 /* Length of a non-negative number's decimal representation. */
2334 static int
2335 number_len (num)
2336 long num;
2338 int len = 1;
2339 while ((num /= 10) > 0)
2340 len += 1;
2341 return len;
2345 * Return total number of characters that put_entries will output for
2346 * the nodes in the linked list at the right of the specified node.
2347 * This count is irrelevant with etags.el since emacs 19.34 at least,
2348 * but is still supplied for backward compatibility.
2350 static int
2351 total_size_of_entries (np)
2352 register node *np;
2354 register int total = 0;
2356 for (; np != NULL; np = np->right)
2357 if (np->valid)
2359 total += strlen (np->regex) + 1; /* pat\177 */
2360 if (np->name != NULL)
2361 total += strlen (np->name) + 1; /* name\001 */
2362 total += number_len ((long) np->lno) + 1; /* lno, */
2363 if (np->cno != invalidcharno) /* cno */
2364 total += number_len (np->cno);
2365 total += 1; /* newline */
2368 return total;
2371 static void
2372 put_entries (np)
2373 register node *np;
2375 register char *sp;
2376 static fdesc *fdp = NULL;
2378 if (np == NULL)
2379 return;
2381 /* Output subentries that precede this one */
2382 if (CTAGS)
2383 put_entries (np->left);
2385 /* Output this entry */
2386 if (np->valid)
2388 if (!CTAGS)
2390 /* Etags mode */
2391 if (fdp != np->fdp)
2393 fdp = np->fdp;
2394 fprintf (tagf, "\f\n%s,%d\n",
2395 fdp->taggedfname, total_size_of_entries (np));
2396 fdp->written = TRUE;
2398 fputs (np->regex, tagf);
2399 fputc ('\177', tagf);
2400 if (np->name != NULL)
2402 fputs (np->name, tagf);
2403 fputc ('\001', tagf);
2405 fprintf (tagf, "%d,", np->lno);
2406 if (np->cno != invalidcharno)
2407 fprintf (tagf, "%ld", np->cno);
2408 fputs ("\n", tagf);
2410 else
2412 /* Ctags mode */
2413 if (np->name == NULL)
2414 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2416 if (cxref_style)
2418 if (vgrind_style)
2419 fprintf (stdout, "%s %s %d\n",
2420 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2421 else
2422 fprintf (stdout, "%-16s %3d %-16s %s\n",
2423 np->name, np->lno, np->fdp->taggedfname, np->regex);
2425 else
2427 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2429 if (np->is_func)
2430 { /* function or #define macro with args */
2431 putc (searchar, tagf);
2432 putc ('^', tagf);
2434 for (sp = np->regex; *sp; sp++)
2436 if (*sp == '\\' || *sp == searchar)
2437 putc ('\\', tagf);
2438 putc (*sp, tagf);
2440 putc (searchar, tagf);
2442 else
2443 { /* anything else; text pattern inadequate */
2444 fprintf (tagf, "%d", np->lno);
2446 putc ('\n', tagf);
2449 } /* if this node contains a valid tag */
2451 /* Output subentries that follow this one */
2452 put_entries (np->right);
2453 if (!CTAGS)
2454 put_entries (np->left);
2458 /* C extensions. */
2459 #define C_EXT 0x00fff /* C extensions */
2460 #define C_PLAIN 0x00000 /* C */
2461 #define C_PLPL 0x00001 /* C++ */
2462 #define C_STAR 0x00003 /* C* */
2463 #define C_JAVA 0x00005 /* JAVA */
2464 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2465 #define YACC 0x10000 /* yacc file */
2468 * The C symbol tables.
2470 enum sym_type
2472 st_none,
2473 st_C_objprot, st_C_objimpl, st_C_objend,
2474 st_C_gnumacro,
2475 st_C_ignore, st_C_attribute,
2476 st_C_javastruct,
2477 st_C_operator,
2478 st_C_class, st_C_template,
2479 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2482 static unsigned int hash __P((const char *, unsigned int));
2483 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2484 static enum sym_type C_symtype __P((char *, int, int));
2486 /* Feed stuff between (but not including) %[ and %] lines to:
2487 gperf -m 5
2489 %compare-strncmp
2490 %enum
2491 %struct-type
2492 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2494 if, 0, st_C_ignore
2495 for, 0, st_C_ignore
2496 while, 0, st_C_ignore
2497 switch, 0, st_C_ignore
2498 return, 0, st_C_ignore
2499 __attribute__, 0, st_C_attribute
2500 GTY, 0, st_C_attribute
2501 @interface, 0, st_C_objprot
2502 @protocol, 0, st_C_objprot
2503 @implementation,0, st_C_objimpl
2504 @end, 0, st_C_objend
2505 import, (C_JAVA & ~C_PLPL), st_C_ignore
2506 package, (C_JAVA & ~C_PLPL), st_C_ignore
2507 friend, C_PLPL, st_C_ignore
2508 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2509 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2510 interface, (C_JAVA & ~C_PLPL), st_C_struct
2511 class, 0, st_C_class
2512 namespace, C_PLPL, st_C_struct
2513 domain, C_STAR, st_C_struct
2514 union, 0, st_C_struct
2515 struct, 0, st_C_struct
2516 extern, 0, st_C_extern
2517 enum, 0, st_C_enum
2518 typedef, 0, st_C_typedef
2519 define, 0, st_C_define
2520 undef, 0, st_C_define
2521 operator, C_PLPL, st_C_operator
2522 template, 0, st_C_template
2523 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2524 DEFUN, 0, st_C_gnumacro
2525 SYSCALL, 0, st_C_gnumacro
2526 ENTRY, 0, st_C_gnumacro
2527 PSEUDO, 0, st_C_gnumacro
2528 # These are defined inside C functions, so currently they are not met.
2529 # EXFUN used in glibc, DEFVAR_* in emacs.
2530 #EXFUN, 0, st_C_gnumacro
2531 #DEFVAR_, 0, st_C_gnumacro
2533 and replace lines between %< and %> with its output, then:
2534 - remove the #if characterset check
2535 - make in_word_set static and not inline. */
2536 /*%<*/
2537 /* C code produced by gperf version 3.0.1 */
2538 /* Command-line: gperf -m 5 */
2539 /* Computed positions: -k'2-3' */
2541 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2542 /* maximum key range = 33, duplicates = 0 */
2544 #ifdef __GNUC__
2545 __inline
2546 #else
2547 #ifdef __cplusplus
2548 inline
2549 #endif
2550 #endif
2551 static unsigned int
2552 hash (str, len)
2553 register const char *str;
2554 register unsigned int len;
2556 static unsigned char asso_values[] =
2558 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2559 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2560 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2561 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2562 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2563 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2564 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2565 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2566 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2567 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2568 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2569 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2570 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2571 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2572 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2573 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2574 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2575 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2576 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2577 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2578 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2579 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2580 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2581 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2582 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2583 35, 35, 35, 35, 35, 35
2585 register int hval = len;
2587 switch (hval)
2589 default:
2590 hval += asso_values[(unsigned char)str[2]];
2591 /*FALLTHROUGH*/
2592 case 2:
2593 hval += asso_values[(unsigned char)str[1]];
2594 break;
2596 return hval;
2599 static struct C_stab_entry *
2600 in_word_set (str, len)
2601 register const char *str;
2602 register unsigned int len;
2604 enum
2606 TOTAL_KEYWORDS = 33,
2607 MIN_WORD_LENGTH = 2,
2608 MAX_WORD_LENGTH = 15,
2609 MIN_HASH_VALUE = 2,
2610 MAX_HASH_VALUE = 34
2613 static struct C_stab_entry wordlist[] =
2615 {""}, {""},
2616 {"if", 0, st_C_ignore},
2617 {"GTY", 0, st_C_attribute},
2618 {"@end", 0, st_C_objend},
2619 {"union", 0, st_C_struct},
2620 {"define", 0, st_C_define},
2621 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2622 {"template", 0, st_C_template},
2623 {"operator", C_PLPL, st_C_operator},
2624 {"@interface", 0, st_C_objprot},
2625 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2626 {"friend", C_PLPL, st_C_ignore},
2627 {"typedef", 0, st_C_typedef},
2628 {"return", 0, st_C_ignore},
2629 {"@implementation",0, st_C_objimpl},
2630 {"@protocol", 0, st_C_objprot},
2631 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2632 {"extern", 0, st_C_extern},
2633 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2634 {"struct", 0, st_C_struct},
2635 {"domain", C_STAR, st_C_struct},
2636 {"switch", 0, st_C_ignore},
2637 {"enum", 0, st_C_enum},
2638 {"for", 0, st_C_ignore},
2639 {"namespace", C_PLPL, st_C_struct},
2640 {"class", 0, st_C_class},
2641 {"while", 0, st_C_ignore},
2642 {"undef", 0, st_C_define},
2643 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2644 {"__attribute__", 0, st_C_attribute},
2645 {"SYSCALL", 0, st_C_gnumacro},
2646 {"ENTRY", 0, st_C_gnumacro},
2647 {"PSEUDO", 0, st_C_gnumacro},
2648 {"DEFUN", 0, st_C_gnumacro}
2651 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2653 register int key = hash (str, len);
2655 if (key <= MAX_HASH_VALUE && key >= 0)
2657 register const char *s = wordlist[key].name;
2659 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2660 return &wordlist[key];
2663 return 0;
2665 /*%>*/
2667 static enum sym_type
2668 C_symtype (str, len, c_ext)
2669 char *str;
2670 int len;
2671 int c_ext;
2673 register struct C_stab_entry *se = in_word_set (str, len);
2675 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2676 return st_none;
2677 return se->type;
2682 * Ignoring __attribute__ ((list))
2684 static bool inattribute; /* looking at an __attribute__ construct */
2687 * C functions and variables are recognized using a simple
2688 * finite automaton. fvdef is its state variable.
2690 static enum
2692 fvnone, /* nothing seen */
2693 fdefunkey, /* Emacs DEFUN keyword seen */
2694 fdefunname, /* Emacs DEFUN name seen */
2695 foperator, /* func: operator keyword seen (cplpl) */
2696 fvnameseen, /* function or variable name seen */
2697 fstartlist, /* func: just after open parenthesis */
2698 finlist, /* func: in parameter list */
2699 flistseen, /* func: after parameter list */
2700 fignore, /* func: before open brace */
2701 vignore /* var-like: ignore until ';' */
2702 } fvdef;
2704 static bool fvextern; /* func or var: extern keyword seen; */
2707 * typedefs are recognized using a simple finite automaton.
2708 * typdef is its state variable.
2710 static enum
2712 tnone, /* nothing seen */
2713 tkeyseen, /* typedef keyword seen */
2714 ttypeseen, /* defined type seen */
2715 tinbody, /* inside typedef body */
2716 tend, /* just before typedef tag */
2717 tignore /* junk after typedef tag */
2718 } typdef;
2721 * struct-like structures (enum, struct and union) are recognized
2722 * using another simple finite automaton. `structdef' is its state
2723 * variable.
2725 static enum
2727 snone, /* nothing seen yet,
2728 or in struct body if bracelev > 0 */
2729 skeyseen, /* struct-like keyword seen */
2730 stagseen, /* struct-like tag seen */
2731 scolonseen /* colon seen after struct-like tag */
2732 } structdef;
2735 * When objdef is different from onone, objtag is the name of the class.
2737 static char *objtag = "<uninited>";
2740 * Yet another little state machine to deal with preprocessor lines.
2742 static enum
2744 dnone, /* nothing seen */
2745 dsharpseen, /* '#' seen as first char on line */
2746 ddefineseen, /* '#' and 'define' seen */
2747 dignorerest /* ignore rest of line */
2748 } definedef;
2751 * State machine for Objective C protocols and implementations.
2752 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2754 static enum
2756 onone, /* nothing seen */
2757 oprotocol, /* @interface or @protocol seen */
2758 oimplementation, /* @implementations seen */
2759 otagseen, /* class name seen */
2760 oparenseen, /* parenthesis before category seen */
2761 ocatseen, /* category name seen */
2762 oinbody, /* in @implementation body */
2763 omethodsign, /* in @implementation body, after +/- */
2764 omethodtag, /* after method name */
2765 omethodcolon, /* after method colon */
2766 omethodparm, /* after method parameter */
2767 oignore /* wait for @end */
2768 } objdef;
2772 * Use this structure to keep info about the token read, and how it
2773 * should be tagged. Used by the make_C_tag function to build a tag.
2775 static struct tok
2777 char *line; /* string containing the token */
2778 int offset; /* where the token starts in LINE */
2779 int length; /* token length */
2781 The previous members can be used to pass strings around for generic
2782 purposes. The following ones specifically refer to creating tags. In this
2783 case the token contained here is the pattern that will be used to create a
2784 tag.
2786 bool valid; /* do not create a tag; the token should be
2787 invalidated whenever a state machine is
2788 reset prematurely */
2789 bool named; /* create a named tag */
2790 int lineno; /* source line number of tag */
2791 long linepos; /* source char number of tag */
2792 } token; /* latest token read */
2795 * Variables and functions for dealing with nested structures.
2796 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2798 static void pushclass_above __P((int, char *, int));
2799 static void popclass_above __P((int));
2800 static void write_classname __P((linebuffer *, char *qualifier));
2802 static struct {
2803 char **cname; /* nested class names */
2804 int *bracelev; /* nested class brace level */
2805 int nl; /* class nesting level (elements used) */
2806 int size; /* length of the array */
2807 } cstack; /* stack for nested declaration tags */
2808 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2809 #define nestlev (cstack.nl)
2810 /* After struct keyword or in struct body, not inside a nested function. */
2811 #define instruct (structdef == snone && nestlev > 0 \
2812 && bracelev == cstack.bracelev[nestlev-1] + 1)
2814 static void
2815 pushclass_above (bracelev, str, len)
2816 int bracelev;
2817 char *str;
2818 int len;
2820 int nl;
2822 popclass_above (bracelev);
2823 nl = cstack.nl;
2824 if (nl >= cstack.size)
2826 int size = cstack.size *= 2;
2827 xrnew (cstack.cname, size, char *);
2828 xrnew (cstack.bracelev, size, int);
2830 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2831 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2832 cstack.bracelev[nl] = bracelev;
2833 cstack.nl = nl + 1;
2836 static void
2837 popclass_above (bracelev)
2838 int bracelev;
2840 int nl;
2842 for (nl = cstack.nl - 1;
2843 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2844 nl--)
2846 free (cstack.cname[nl]);
2847 cstack.nl = nl;
2851 static void
2852 write_classname (cn, qualifier)
2853 linebuffer *cn;
2854 char *qualifier;
2856 int i, len;
2857 int qlen = strlen (qualifier);
2859 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2861 len = 0;
2862 cn->len = 0;
2863 cn->buffer[0] = '\0';
2865 else
2867 len = strlen (cstack.cname[0]);
2868 linebuffer_setlen (cn, len);
2869 strcpy (cn->buffer, cstack.cname[0]);
2871 for (i = 1; i < cstack.nl; i++)
2873 char *s;
2874 int slen;
2876 s = cstack.cname[i];
2877 if (s == NULL)
2878 continue;
2879 slen = strlen (s);
2880 len += slen + qlen;
2881 linebuffer_setlen (cn, len);
2882 strncat (cn->buffer, qualifier, qlen);
2883 strncat (cn->buffer, s, slen);
2888 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2889 static void make_C_tag __P((bool));
2892 * consider_token ()
2893 * checks to see if the current token is at the start of a
2894 * function or variable, or corresponds to a typedef, or
2895 * is a struct/union/enum tag, or #define, or an enum constant.
2897 * *IS_FUNC gets TRUE if the token is a function or #define macro
2898 * with args. C_EXTP points to which language we are looking at.
2900 * Globals
2901 * fvdef IN OUT
2902 * structdef IN OUT
2903 * definedef IN OUT
2904 * typdef IN OUT
2905 * objdef IN OUT
2908 static bool
2909 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2910 register char *str; /* IN: token pointer */
2911 register int len; /* IN: token length */
2912 register int c; /* IN: first char after the token */
2913 int *c_extp; /* IN, OUT: C extensions mask */
2914 int bracelev; /* IN: brace level */
2915 int parlev; /* IN: parenthesis level */
2916 bool *is_func_or_var; /* OUT: function or variable found */
2918 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2919 structtype is the type of the preceding struct-like keyword, and
2920 structbracelev is the brace level where it has been seen. */
2921 static enum sym_type structtype;
2922 static int structbracelev;
2923 static enum sym_type toktype;
2926 toktype = C_symtype (str, len, *c_extp);
2929 * Skip __attribute__
2931 if (toktype == st_C_attribute)
2933 inattribute = TRUE;
2934 return FALSE;
2938 * Advance the definedef state machine.
2940 switch (definedef)
2942 case dnone:
2943 /* We're not on a preprocessor line. */
2944 if (toktype == st_C_gnumacro)
2946 fvdef = fdefunkey;
2947 return FALSE;
2949 break;
2950 case dsharpseen:
2951 if (toktype == st_C_define)
2953 definedef = ddefineseen;
2955 else
2957 definedef = dignorerest;
2959 return FALSE;
2960 case ddefineseen:
2962 * Make a tag for any macro, unless it is a constant
2963 * and constantypedefs is FALSE.
2965 definedef = dignorerest;
2966 *is_func_or_var = (c == '(');
2967 if (!*is_func_or_var && !constantypedefs)
2968 return FALSE;
2969 else
2970 return TRUE;
2971 case dignorerest:
2972 return FALSE;
2973 default:
2974 error ("internal error: definedef value.", (char *)NULL);
2978 * Now typedefs
2980 switch (typdef)
2982 case tnone:
2983 if (toktype == st_C_typedef)
2985 if (typedefs)
2986 typdef = tkeyseen;
2987 fvextern = FALSE;
2988 fvdef = fvnone;
2989 return FALSE;
2991 break;
2992 case tkeyseen:
2993 switch (toktype)
2995 case st_none:
2996 case st_C_class:
2997 case st_C_struct:
2998 case st_C_enum:
2999 typdef = ttypeseen;
3001 break;
3002 case ttypeseen:
3003 if (structdef == snone && fvdef == fvnone)
3005 fvdef = fvnameseen;
3006 return TRUE;
3008 break;
3009 case tend:
3010 switch (toktype)
3012 case st_C_class:
3013 case st_C_struct:
3014 case st_C_enum:
3015 return FALSE;
3017 return TRUE;
3020 switch (toktype)
3022 case st_C_javastruct:
3023 if (structdef == stagseen)
3024 structdef = scolonseen;
3025 return FALSE;
3026 case st_C_template:
3027 case st_C_class:
3028 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
3029 && bracelev == 0
3030 && definedef == dnone && structdef == snone
3031 && typdef == tnone && fvdef == fvnone)
3032 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3033 if (toktype == st_C_template)
3034 break;
3035 /* FALLTHRU */
3036 case st_C_struct:
3037 case st_C_enum:
3038 if (parlev == 0
3039 && fvdef != vignore
3040 && (typdef == tkeyseen
3041 || (typedefs_or_cplusplus && structdef == snone)))
3043 structdef = skeyseen;
3044 structtype = toktype;
3045 structbracelev = bracelev;
3046 if (fvdef == fvnameseen)
3047 fvdef = fvnone;
3049 return FALSE;
3052 if (structdef == skeyseen)
3054 structdef = stagseen;
3055 return TRUE;
3058 if (typdef != tnone)
3059 definedef = dnone;
3061 /* Detect Objective C constructs. */
3062 switch (objdef)
3064 case onone:
3065 switch (toktype)
3067 case st_C_objprot:
3068 objdef = oprotocol;
3069 return FALSE;
3070 case st_C_objimpl:
3071 objdef = oimplementation;
3072 return FALSE;
3074 break;
3075 case oimplementation:
3076 /* Save the class tag for functions or variables defined inside. */
3077 objtag = savenstr (str, len);
3078 objdef = oinbody;
3079 return FALSE;
3080 case oprotocol:
3081 /* Save the class tag for categories. */
3082 objtag = savenstr (str, len);
3083 objdef = otagseen;
3084 *is_func_or_var = TRUE;
3085 return TRUE;
3086 case oparenseen:
3087 objdef = ocatseen;
3088 *is_func_or_var = TRUE;
3089 return TRUE;
3090 case oinbody:
3091 break;
3092 case omethodsign:
3093 if (parlev == 0)
3095 fvdef = fvnone;
3096 objdef = omethodtag;
3097 linebuffer_setlen (&token_name, len);
3098 strncpy (token_name.buffer, str, len);
3099 token_name.buffer[len] = '\0';
3100 return TRUE;
3102 return FALSE;
3103 case omethodcolon:
3104 if (parlev == 0)
3105 objdef = omethodparm;
3106 return FALSE;
3107 case omethodparm:
3108 if (parlev == 0)
3110 fvdef = fvnone;
3111 objdef = omethodtag;
3112 linebuffer_setlen (&token_name, token_name.len + len);
3113 strncat (token_name.buffer, str, len);
3114 return TRUE;
3116 return FALSE;
3117 case oignore:
3118 if (toktype == st_C_objend)
3120 /* Memory leakage here: the string pointed by objtag is
3121 never released, because many tests would be needed to
3122 avoid breaking on incorrect input code. The amount of
3123 memory leaked here is the sum of the lengths of the
3124 class tags.
3125 free (objtag); */
3126 objdef = onone;
3128 return FALSE;
3131 /* A function, variable or enum constant? */
3132 switch (toktype)
3134 case st_C_extern:
3135 fvextern = TRUE;
3136 switch (fvdef)
3138 case finlist:
3139 case flistseen:
3140 case fignore:
3141 case vignore:
3142 break;
3143 default:
3144 fvdef = fvnone;
3146 return FALSE;
3147 case st_C_ignore:
3148 fvextern = FALSE;
3149 fvdef = vignore;
3150 return FALSE;
3151 case st_C_operator:
3152 fvdef = foperator;
3153 *is_func_or_var = TRUE;
3154 return TRUE;
3155 case st_none:
3156 if (constantypedefs
3157 && structdef == snone
3158 && structtype == st_C_enum && bracelev > structbracelev)
3159 return TRUE; /* enum constant */
3160 switch (fvdef)
3162 case fdefunkey:
3163 if (bracelev > 0)
3164 break;
3165 fvdef = fdefunname; /* GNU macro */
3166 *is_func_or_var = TRUE;
3167 return TRUE;
3168 case fvnone:
3169 switch (typdef)
3171 case ttypeseen:
3172 return FALSE;
3173 case tnone:
3174 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3175 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3177 fvdef = vignore;
3178 return FALSE;
3180 break;
3182 /* FALLTHRU */
3183 case fvnameseen:
3184 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3186 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3187 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3188 fvdef = foperator;
3189 *is_func_or_var = TRUE;
3190 return TRUE;
3192 if (bracelev > 0 && !instruct)
3193 break;
3194 fvdef = fvnameseen; /* function or variable */
3195 *is_func_or_var = TRUE;
3196 return TRUE;
3198 break;
3201 return FALSE;
3206 * C_entries often keeps pointers to tokens or lines which are older than
3207 * the line currently read. By keeping two line buffers, and switching
3208 * them at end of line, it is possible to use those pointers.
3210 static struct
3212 long linepos;
3213 linebuffer lb;
3214 } lbs[2];
3216 #define current_lb_is_new (newndx == curndx)
3217 #define switch_line_buffers() (curndx = 1 - curndx)
3219 #define curlb (lbs[curndx].lb)
3220 #define newlb (lbs[newndx].lb)
3221 #define curlinepos (lbs[curndx].linepos)
3222 #define newlinepos (lbs[newndx].linepos)
3224 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3225 #define cplpl (c_ext & C_PLPL)
3226 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3228 #define CNL_SAVE_DEFINEDEF() \
3229 do { \
3230 curlinepos = charno; \
3231 readline (&curlb, inf); \
3232 lp = curlb.buffer; \
3233 quotednl = FALSE; \
3234 newndx = curndx; \
3235 } while (0)
3237 #define CNL() \
3238 do { \
3239 CNL_SAVE_DEFINEDEF(); \
3240 if (savetoken.valid) \
3242 token = savetoken; \
3243 savetoken.valid = FALSE; \
3245 definedef = dnone; \
3246 } while (0)
3249 static void
3250 make_C_tag (isfun)
3251 bool isfun;
3253 /* This function is never called when token.valid is FALSE, but
3254 we must protect against invalid input or internal errors. */
3255 if (token.valid)
3256 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3257 token.offset+token.length+1, token.lineno, token.linepos);
3258 else if (DEBUG)
3259 { /* this branch is optimised away if !DEBUG */
3260 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3261 token_name.len + 17, isfun, token.line,
3262 token.offset+token.length+1, token.lineno, token.linepos);
3263 error ("INVALID TOKEN", NULL);
3266 token.valid = FALSE;
3271 * C_entries ()
3272 * This routine finds functions, variables, typedefs,
3273 * #define's, enum constants and struct/union/enum definitions in
3274 * C syntax and adds them to the list.
3276 static void
3277 C_entries (c_ext, inf)
3278 int c_ext; /* extension of C */
3279 FILE *inf; /* input file */
3281 register char c; /* latest char read; '\0' for end of line */
3282 register char *lp; /* pointer one beyond the character `c' */
3283 int curndx, newndx; /* indices for current and new lb */
3284 register int tokoff; /* offset in line of start of current token */
3285 register int toklen; /* length of current token */
3286 char *qualifier; /* string used to qualify names */
3287 int qlen; /* length of qualifier */
3288 int bracelev; /* current brace level */
3289 int bracketlev; /* current bracket level */
3290 int parlev; /* current parenthesis level */
3291 int attrparlev; /* __attribute__ parenthesis level */
3292 int templatelev; /* current template level */
3293 int typdefbracelev; /* bracelev where a typedef struct body begun */
3294 bool incomm, inquote, inchar, quotednl, midtoken;
3295 bool yacc_rules; /* in the rules part of a yacc file */
3296 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3299 linebuffer_init (&lbs[0].lb);
3300 linebuffer_init (&lbs[1].lb);
3301 if (cstack.size == 0)
3303 cstack.size = (DEBUG) ? 1 : 4;
3304 cstack.nl = 0;
3305 cstack.cname = xnew (cstack.size, char *);
3306 cstack.bracelev = xnew (cstack.size, int);
3309 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3310 curndx = newndx = 0;
3311 lp = curlb.buffer;
3312 *lp = 0;
3314 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3315 structdef = snone; definedef = dnone; objdef = onone;
3316 yacc_rules = FALSE;
3317 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3318 token.valid = savetoken.valid = FALSE;
3319 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3320 if (cjava)
3321 { qualifier = "."; qlen = 1; }
3322 else
3323 { qualifier = "::"; qlen = 2; }
3326 while (!feof (inf))
3328 c = *lp++;
3329 if (c == '\\')
3331 /* If we are at the end of the line, the next character is a
3332 '\0'; do not skip it, because it is what tells us
3333 to read the next line. */
3334 if (*lp == '\0')
3336 quotednl = TRUE;
3337 continue;
3339 lp++;
3340 c = ' ';
3342 else if (incomm)
3344 switch (c)
3346 case '*':
3347 if (*lp == '/')
3349 c = *lp++;
3350 incomm = FALSE;
3352 break;
3353 case '\0':
3354 /* Newlines inside comments do not end macro definitions in
3355 traditional cpp. */
3356 CNL_SAVE_DEFINEDEF ();
3357 break;
3359 continue;
3361 else if (inquote)
3363 switch (c)
3365 case '"':
3366 inquote = FALSE;
3367 break;
3368 case '\0':
3369 /* Newlines inside strings do not end macro definitions
3370 in traditional cpp, even though compilers don't
3371 usually accept them. */
3372 CNL_SAVE_DEFINEDEF ();
3373 break;
3375 continue;
3377 else if (inchar)
3379 switch (c)
3381 case '\0':
3382 /* Hmmm, something went wrong. */
3383 CNL ();
3384 /* FALLTHRU */
3385 case '\'':
3386 inchar = FALSE;
3387 break;
3389 continue;
3391 else if (bracketlev > 0)
3393 switch (c)
3395 case ']':
3396 if (--bracketlev > 0)
3397 continue;
3398 break;
3399 case '\0':
3400 CNL_SAVE_DEFINEDEF ();
3401 break;
3403 continue;
3405 else switch (c)
3407 case '"':
3408 inquote = TRUE;
3409 if (inattribute)
3410 break;
3411 switch (fvdef)
3413 case fdefunkey:
3414 case fstartlist:
3415 case finlist:
3416 case fignore:
3417 case vignore:
3418 break;
3419 default:
3420 fvextern = FALSE;
3421 fvdef = fvnone;
3423 continue;
3424 case '\'':
3425 inchar = TRUE;
3426 if (inattribute)
3427 break;
3428 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3430 fvextern = FALSE;
3431 fvdef = fvnone;
3433 continue;
3434 case '/':
3435 if (*lp == '*')
3437 incomm = TRUE;
3438 lp++;
3439 c = ' ';
3441 else if (/* cplpl && */ *lp == '/')
3443 c = '\0';
3445 break;
3446 case '%':
3447 if ((c_ext & YACC) && *lp == '%')
3449 /* Entering or exiting rules section in yacc file. */
3450 lp++;
3451 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3452 typdef = tnone; structdef = snone;
3453 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3454 bracelev = 0;
3455 yacc_rules = !yacc_rules;
3456 continue;
3458 else
3459 break;
3460 case '#':
3461 if (definedef == dnone)
3463 char *cp;
3464 bool cpptoken = TRUE;
3466 /* Look back on this line. If all blanks, or nonblanks
3467 followed by an end of comment, this is a preprocessor
3468 token. */
3469 for (cp = newlb.buffer; cp < lp-1; cp++)
3470 if (!iswhite (*cp))
3472 if (*cp == '*' && *(cp+1) == '/')
3474 cp++;
3475 cpptoken = TRUE;
3477 else
3478 cpptoken = FALSE;
3480 if (cpptoken)
3481 definedef = dsharpseen;
3482 } /* if (definedef == dnone) */
3483 continue;
3484 case '[':
3485 bracketlev++;
3486 continue;
3487 } /* switch (c) */
3490 /* Consider token only if some involved conditions are satisfied. */
3491 if (typdef != tignore
3492 && definedef != dignorerest
3493 && fvdef != finlist
3494 && templatelev == 0
3495 && (definedef != dnone
3496 || structdef != scolonseen)
3497 && !inattribute)
3499 if (midtoken)
3501 if (endtoken (c))
3503 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3504 /* This handles :: in the middle,
3505 but not at the beginning of an identifier.
3506 Also, space-separated :: is not recognised. */
3508 if (c_ext & C_AUTO) /* automatic detection of C++ */
3509 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3510 lp += 2;
3511 toklen += 2;
3512 c = lp[-1];
3513 goto still_in_token;
3515 else
3517 bool funorvar = FALSE;
3519 if (yacc_rules
3520 || consider_token (newlb.buffer + tokoff, toklen, c,
3521 &c_ext, bracelev, parlev,
3522 &funorvar))
3524 if (fvdef == foperator)
3526 char *oldlp = lp;
3527 lp = skip_spaces (lp-1);
3528 if (*lp != '\0')
3529 lp += 1;
3530 while (*lp != '\0'
3531 && !iswhite (*lp) && *lp != '(')
3532 lp += 1;
3533 c = *lp++;
3534 toklen += lp - oldlp;
3536 token.named = FALSE;
3537 if (!plainc
3538 && nestlev > 0 && definedef == dnone)
3539 /* in struct body */
3541 write_classname (&token_name, qualifier);
3542 linebuffer_setlen (&token_name,
3543 token_name.len+qlen+toklen);
3544 strcat (token_name.buffer, qualifier);
3545 strncat (token_name.buffer,
3546 newlb.buffer + tokoff, toklen);
3547 token.named = TRUE;
3549 else if (objdef == ocatseen)
3550 /* Objective C category */
3552 int len = strlen (objtag) + 2 + toklen;
3553 linebuffer_setlen (&token_name, len);
3554 strcpy (token_name.buffer, objtag);
3555 strcat (token_name.buffer, "(");
3556 strncat (token_name.buffer,
3557 newlb.buffer + tokoff, toklen);
3558 strcat (token_name.buffer, ")");
3559 token.named = TRUE;
3561 else if (objdef == omethodtag
3562 || objdef == omethodparm)
3563 /* Objective C method */
3565 token.named = TRUE;
3567 else if (fvdef == fdefunname)
3568 /* GNU DEFUN and similar macros */
3570 bool defun = (newlb.buffer[tokoff] == 'F');
3571 int off = tokoff;
3572 int len = toklen;
3574 /* Rewrite the tag so that emacs lisp DEFUNs
3575 can be found by their elisp name */
3576 if (defun)
3578 off += 1;
3579 len -= 1;
3581 linebuffer_setlen (&token_name, len);
3582 strncpy (token_name.buffer,
3583 newlb.buffer + off, len);
3584 token_name.buffer[len] = '\0';
3585 if (defun)
3586 while (--len >= 0)
3587 if (token_name.buffer[len] == '_')
3588 token_name.buffer[len] = '-';
3589 token.named = defun;
3591 else
3593 linebuffer_setlen (&token_name, toklen);
3594 strncpy (token_name.buffer,
3595 newlb.buffer + tokoff, toklen);
3596 token_name.buffer[toklen] = '\0';
3597 /* Name macros and members. */
3598 token.named = (structdef == stagseen
3599 || typdef == ttypeseen
3600 || typdef == tend
3601 || (funorvar
3602 && definedef == dignorerest)
3603 || (funorvar
3604 && definedef == dnone
3605 && structdef == snone
3606 && bracelev > 0));
3608 token.lineno = lineno;
3609 token.offset = tokoff;
3610 token.length = toklen;
3611 token.line = newlb.buffer;
3612 token.linepos = newlinepos;
3613 token.valid = TRUE;
3615 if (definedef == dnone
3616 && (fvdef == fvnameseen
3617 || fvdef == foperator
3618 || structdef == stagseen
3619 || typdef == tend
3620 || typdef == ttypeseen
3621 || objdef != onone))
3623 if (current_lb_is_new)
3624 switch_line_buffers ();
3626 else if (definedef != dnone
3627 || fvdef == fdefunname
3628 || instruct)
3629 make_C_tag (funorvar);
3631 else /* not yacc and consider_token failed */
3633 if (inattribute && fvdef == fignore)
3635 /* We have just met __attribute__ after a
3636 function parameter list: do not tag the
3637 function again. */
3638 fvdef = fvnone;
3641 midtoken = FALSE;
3643 } /* if (endtoken (c)) */
3644 else if (intoken (c))
3645 still_in_token:
3647 toklen++;
3648 continue;
3650 } /* if (midtoken) */
3651 else if (begtoken (c))
3653 switch (definedef)
3655 case dnone:
3656 switch (fvdef)
3658 case fstartlist:
3659 /* This prevents tagging fb in
3660 void (__attribute__((noreturn)) *fb) (void);
3661 Fixing this is not easy and not very important. */
3662 fvdef = finlist;
3663 continue;
3664 case flistseen:
3665 if (plainc || declarations)
3667 make_C_tag (TRUE); /* a function */
3668 fvdef = fignore;
3670 break;
3672 if (structdef == stagseen && !cjava)
3674 popclass_above (bracelev);
3675 structdef = snone;
3677 break;
3678 case dsharpseen:
3679 savetoken = token;
3680 break;
3682 if (!yacc_rules || lp == newlb.buffer + 1)
3684 tokoff = lp - 1 - newlb.buffer;
3685 toklen = 1;
3686 midtoken = TRUE;
3688 continue;
3689 } /* if (begtoken) */
3690 } /* if must look at token */
3693 /* Detect end of line, colon, comma, semicolon and various braces
3694 after having handled a token.*/
3695 switch (c)
3697 case ':':
3698 if (inattribute)
3699 break;
3700 if (yacc_rules && token.offset == 0 && token.valid)
3702 make_C_tag (FALSE); /* a yacc function */
3703 break;
3705 if (definedef != dnone)
3706 break;
3707 switch (objdef)
3709 case otagseen:
3710 objdef = oignore;
3711 make_C_tag (TRUE); /* an Objective C class */
3712 break;
3713 case omethodtag:
3714 case omethodparm:
3715 objdef = omethodcolon;
3716 linebuffer_setlen (&token_name, token_name.len + 1);
3717 strcat (token_name.buffer, ":");
3718 break;
3720 if (structdef == stagseen)
3722 structdef = scolonseen;
3723 break;
3725 /* Should be useless, but may be work as a safety net. */
3726 if (cplpl && fvdef == flistseen)
3728 make_C_tag (TRUE); /* a function */
3729 fvdef = fignore;
3730 break;
3732 break;
3733 case ';':
3734 if (definedef != dnone || inattribute)
3735 break;
3736 switch (typdef)
3738 case tend:
3739 case ttypeseen:
3740 make_C_tag (FALSE); /* a typedef */
3741 typdef = tnone;
3742 fvdef = fvnone;
3743 break;
3744 case tnone:
3745 case tinbody:
3746 case tignore:
3747 switch (fvdef)
3749 case fignore:
3750 if (typdef == tignore || cplpl)
3751 fvdef = fvnone;
3752 break;
3753 case fvnameseen:
3754 if ((globals && bracelev == 0 && (!fvextern || declarations))
3755 || (members && instruct))
3756 make_C_tag (FALSE); /* a variable */
3757 fvextern = FALSE;
3758 fvdef = fvnone;
3759 token.valid = FALSE;
3760 break;
3761 case flistseen:
3762 if ((declarations
3763 && (cplpl || !instruct)
3764 && (typdef == tnone || (typdef != tignore && instruct)))
3765 || (members
3766 && plainc && instruct))
3767 make_C_tag (TRUE); /* a function */
3768 /* FALLTHRU */
3769 default:
3770 fvextern = FALSE;
3771 fvdef = fvnone;
3772 if (declarations
3773 && cplpl && structdef == stagseen)
3774 make_C_tag (FALSE); /* forward declaration */
3775 else
3776 token.valid = FALSE;
3777 } /* switch (fvdef) */
3778 /* FALLTHRU */
3779 default:
3780 if (!instruct)
3781 typdef = tnone;
3783 if (structdef == stagseen)
3784 structdef = snone;
3785 break;
3786 case ',':
3787 if (definedef != dnone || inattribute)
3788 break;
3789 switch (objdef)
3791 case omethodtag:
3792 case omethodparm:
3793 make_C_tag (TRUE); /* an Objective C method */
3794 objdef = oinbody;
3795 break;
3797 switch (fvdef)
3799 case fdefunkey:
3800 case foperator:
3801 case fstartlist:
3802 case finlist:
3803 case fignore:
3804 case vignore:
3805 break;
3806 case fdefunname:
3807 fvdef = fignore;
3808 break;
3809 case fvnameseen:
3810 if (parlev == 0
3811 && ((globals
3812 && bracelev == 0
3813 && templatelev == 0
3814 && (!fvextern || declarations))
3815 || (members && instruct)))
3816 make_C_tag (FALSE); /* a variable */
3817 break;
3818 case flistseen:
3819 if ((declarations && typdef == tnone && !instruct)
3820 || (members && typdef != tignore && instruct))
3822 make_C_tag (TRUE); /* a function */
3823 fvdef = fvnameseen;
3825 else if (!declarations)
3826 fvdef = fvnone;
3827 token.valid = FALSE;
3828 break;
3829 default:
3830 fvdef = fvnone;
3832 if (structdef == stagseen)
3833 structdef = snone;
3834 break;
3835 case ']':
3836 if (definedef != dnone || inattribute)
3837 break;
3838 if (structdef == stagseen)
3839 structdef = snone;
3840 switch (typdef)
3842 case ttypeseen:
3843 case tend:
3844 typdef = tignore;
3845 make_C_tag (FALSE); /* a typedef */
3846 break;
3847 case tnone:
3848 case tinbody:
3849 switch (fvdef)
3851 case foperator:
3852 case finlist:
3853 case fignore:
3854 case vignore:
3855 break;
3856 case fvnameseen:
3857 if ((members && bracelev == 1)
3858 || (globals && bracelev == 0
3859 && (!fvextern || declarations)))
3860 make_C_tag (FALSE); /* a variable */
3861 /* FALLTHRU */
3862 default:
3863 fvdef = fvnone;
3865 break;
3867 break;
3868 case '(':
3869 if (inattribute)
3871 attrparlev++;
3872 break;
3874 if (definedef != dnone)
3875 break;
3876 if (objdef == otagseen && parlev == 0)
3877 objdef = oparenseen;
3878 switch (fvdef)
3880 case fvnameseen:
3881 if (typdef == ttypeseen
3882 && *lp != '*'
3883 && !instruct)
3885 /* This handles constructs like:
3886 typedef void OperatorFun (int fun); */
3887 make_C_tag (FALSE);
3888 typdef = tignore;
3889 fvdef = fignore;
3890 break;
3892 /* FALLTHRU */
3893 case foperator:
3894 fvdef = fstartlist;
3895 break;
3896 case flistseen:
3897 fvdef = finlist;
3898 break;
3900 parlev++;
3901 break;
3902 case ')':
3903 if (inattribute)
3905 if (--attrparlev == 0)
3906 inattribute = FALSE;
3907 break;
3909 if (definedef != dnone)
3910 break;
3911 if (objdef == ocatseen && parlev == 1)
3913 make_C_tag (TRUE); /* an Objective C category */
3914 objdef = oignore;
3916 if (--parlev == 0)
3918 switch (fvdef)
3920 case fstartlist:
3921 case finlist:
3922 fvdef = flistseen;
3923 break;
3925 if (!instruct
3926 && (typdef == tend
3927 || typdef == ttypeseen))
3929 typdef = tignore;
3930 make_C_tag (FALSE); /* a typedef */
3933 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3934 parlev = 0;
3935 break;
3936 case '{':
3937 if (definedef != dnone)
3938 break;
3939 if (typdef == ttypeseen)
3941 /* Whenever typdef is set to tinbody (currently only
3942 here), typdefbracelev should be set to bracelev. */
3943 typdef = tinbody;
3944 typdefbracelev = bracelev;
3946 switch (fvdef)
3948 case flistseen:
3949 make_C_tag (TRUE); /* a function */
3950 /* FALLTHRU */
3951 case fignore:
3952 fvdef = fvnone;
3953 break;
3954 case fvnone:
3955 switch (objdef)
3957 case otagseen:
3958 make_C_tag (TRUE); /* an Objective C class */
3959 objdef = oignore;
3960 break;
3961 case omethodtag:
3962 case omethodparm:
3963 make_C_tag (TRUE); /* an Objective C method */
3964 objdef = oinbody;
3965 break;
3966 default:
3967 /* Neutralize `extern "C" {' grot. */
3968 if (bracelev == 0 && structdef == snone && nestlev == 0
3969 && typdef == tnone)
3970 bracelev = -1;
3972 break;
3974 switch (structdef)
3976 case skeyseen: /* unnamed struct */
3977 pushclass_above (bracelev, NULL, 0);
3978 structdef = snone;
3979 break;
3980 case stagseen: /* named struct or enum */
3981 case scolonseen: /* a class */
3982 pushclass_above (bracelev,token.line+token.offset, token.length);
3983 structdef = snone;
3984 make_C_tag (FALSE); /* a struct or enum */
3985 break;
3987 bracelev += 1;
3988 break;
3989 case '*':
3990 if (definedef != dnone)
3991 break;
3992 if (fvdef == fstartlist)
3994 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3995 token.valid = FALSE;
3997 break;
3998 case '}':
3999 if (definedef != dnone)
4000 break;
4001 bracelev -= 1;
4002 if (!ignoreindent && lp == newlb.buffer + 1)
4004 if (bracelev != 0)
4005 token.valid = FALSE; /* unexpected value, token unreliable */
4006 bracelev = 0; /* reset brace level if first column */
4007 parlev = 0; /* also reset paren level, just in case... */
4009 else if (bracelev < 0)
4011 token.valid = FALSE; /* something gone amiss, token unreliable */
4012 bracelev = 0;
4014 if (bracelev == 0 && fvdef == vignore)
4015 fvdef = fvnone; /* end of function */
4016 popclass_above (bracelev);
4017 structdef = snone;
4018 /* Only if typdef == tinbody is typdefbracelev significant. */
4019 if (typdef == tinbody && bracelev <= typdefbracelev)
4021 assert (bracelev == typdefbracelev);
4022 typdef = tend;
4024 break;
4025 case '=':
4026 if (definedef != dnone)
4027 break;
4028 switch (fvdef)
4030 case foperator:
4031 case finlist:
4032 case fignore:
4033 case vignore:
4034 break;
4035 case fvnameseen:
4036 if ((members && bracelev == 1)
4037 || (globals && bracelev == 0 && (!fvextern || declarations)))
4038 make_C_tag (FALSE); /* a variable */
4039 /* FALLTHRU */
4040 default:
4041 fvdef = vignore;
4043 break;
4044 case '<':
4045 if (cplpl
4046 && (structdef == stagseen || fvdef == fvnameseen))
4048 templatelev++;
4049 break;
4051 goto resetfvdef;
4052 case '>':
4053 if (templatelev > 0)
4055 templatelev--;
4056 break;
4058 goto resetfvdef;
4059 case '+':
4060 case '-':
4061 if (objdef == oinbody && bracelev == 0)
4063 objdef = omethodsign;
4064 break;
4066 /* FALLTHRU */
4067 resetfvdef:
4068 case '#': case '~': case '&': case '%': case '/':
4069 case '|': case '^': case '!': case '.': case '?':
4070 if (definedef != dnone)
4071 break;
4072 /* These surely cannot follow a function tag in C. */
4073 switch (fvdef)
4075 case foperator:
4076 case finlist:
4077 case fignore:
4078 case vignore:
4079 break;
4080 default:
4081 fvdef = fvnone;
4083 break;
4084 case '\0':
4085 if (objdef == otagseen)
4087 make_C_tag (TRUE); /* an Objective C class */
4088 objdef = oignore;
4090 /* If a macro spans multiple lines don't reset its state. */
4091 if (quotednl)
4092 CNL_SAVE_DEFINEDEF ();
4093 else
4094 CNL ();
4095 break;
4096 } /* switch (c) */
4098 } /* while not eof */
4100 free (lbs[0].lb.buffer);
4101 free (lbs[1].lb.buffer);
4105 * Process either a C++ file or a C file depending on the setting
4106 * of a global flag.
4108 static void
4109 default_C_entries (inf)
4110 FILE *inf;
4112 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4115 /* Always do plain C. */
4116 static void
4117 plain_C_entries (inf)
4118 FILE *inf;
4120 C_entries (0, inf);
4123 /* Always do C++. */
4124 static void
4125 Cplusplus_entries (inf)
4126 FILE *inf;
4128 C_entries (C_PLPL, inf);
4131 /* Always do Java. */
4132 static void
4133 Cjava_entries (inf)
4134 FILE *inf;
4136 C_entries (C_JAVA, inf);
4139 /* Always do C*. */
4140 static void
4141 Cstar_entries (inf)
4142 FILE *inf;
4144 C_entries (C_STAR, inf);
4147 /* Always do Yacc. */
4148 static void
4149 Yacc_entries (inf)
4150 FILE *inf;
4152 C_entries (YACC, inf);
4156 /* Useful macros. */
4157 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4158 for (; /* loop initialization */ \
4159 !feof (file_pointer) /* loop test */ \
4160 && /* instructions at start of loop */ \
4161 (readline (&line_buffer, file_pointer), \
4162 char_pointer = line_buffer.buffer, \
4163 TRUE); \
4166 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4167 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4168 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4169 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4170 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4172 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4173 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4174 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4175 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4176 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4179 * Read a file, but do no processing. This is used to do regexp
4180 * matching on files that have no language defined.
4182 static void
4183 just_read_file (inf)
4184 FILE *inf;
4186 register char *dummy;
4188 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4189 continue;
4193 /* Fortran parsing */
4195 static void F_takeprec __P((void));
4196 static void F_getit __P((FILE *));
4198 static void
4199 F_takeprec ()
4201 dbp = skip_spaces (dbp);
4202 if (*dbp != '*')
4203 return;
4204 dbp++;
4205 dbp = skip_spaces (dbp);
4206 if (strneq (dbp, "(*)", 3))
4208 dbp += 3;
4209 return;
4211 if (!ISDIGIT (*dbp))
4213 --dbp; /* force failure */
4214 return;
4217 dbp++;
4218 while (ISDIGIT (*dbp));
4221 static void
4222 F_getit (inf)
4223 FILE *inf;
4225 register char *cp;
4227 dbp = skip_spaces (dbp);
4228 if (*dbp == '\0')
4230 readline (&lb, inf);
4231 dbp = lb.buffer;
4232 if (dbp[5] != '&')
4233 return;
4234 dbp += 6;
4235 dbp = skip_spaces (dbp);
4237 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4238 return;
4239 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4240 continue;
4241 make_tag (dbp, cp-dbp, TRUE,
4242 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4246 static void
4247 Fortran_functions (inf)
4248 FILE *inf;
4250 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4252 if (*dbp == '%')
4253 dbp++; /* Ratfor escape to fortran */
4254 dbp = skip_spaces (dbp);
4255 if (*dbp == '\0')
4256 continue;
4257 switch (lowcase (*dbp))
4259 case 'i':
4260 if (nocase_tail ("integer"))
4261 F_takeprec ();
4262 break;
4263 case 'r':
4264 if (nocase_tail ("real"))
4265 F_takeprec ();
4266 break;
4267 case 'l':
4268 if (nocase_tail ("logical"))
4269 F_takeprec ();
4270 break;
4271 case 'c':
4272 if (nocase_tail ("complex") || nocase_tail ("character"))
4273 F_takeprec ();
4274 break;
4275 case 'd':
4276 if (nocase_tail ("double"))
4278 dbp = skip_spaces (dbp);
4279 if (*dbp == '\0')
4280 continue;
4281 if (nocase_tail ("precision"))
4282 break;
4283 continue;
4285 break;
4287 dbp = skip_spaces (dbp);
4288 if (*dbp == '\0')
4289 continue;
4290 switch (lowcase (*dbp))
4292 case 'f':
4293 if (nocase_tail ("function"))
4294 F_getit (inf);
4295 continue;
4296 case 's':
4297 if (nocase_tail ("subroutine"))
4298 F_getit (inf);
4299 continue;
4300 case 'e':
4301 if (nocase_tail ("entry"))
4302 F_getit (inf);
4303 continue;
4304 case 'b':
4305 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4307 dbp = skip_spaces (dbp);
4308 if (*dbp == '\0') /* assume un-named */
4309 make_tag ("blockdata", 9, TRUE,
4310 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4311 else
4312 F_getit (inf); /* look for name */
4314 continue;
4321 * Ada parsing
4322 * Original code by
4323 * Philippe Waroquiers (1998)
4326 static void Ada_getit __P((FILE *, char *));
4328 /* Once we are positioned after an "interesting" keyword, let's get
4329 the real tag value necessary. */
4330 static void
4331 Ada_getit (inf, name_qualifier)
4332 FILE *inf;
4333 char *name_qualifier;
4335 register char *cp;
4336 char *name;
4337 char c;
4339 while (!feof (inf))
4341 dbp = skip_spaces (dbp);
4342 if (*dbp == '\0'
4343 || (dbp[0] == '-' && dbp[1] == '-'))
4345 readline (&lb, inf);
4346 dbp = lb.buffer;
4348 switch (lowcase(*dbp))
4350 case 'b':
4351 if (nocase_tail ("body"))
4353 /* Skipping body of procedure body or package body or ....
4354 resetting qualifier to body instead of spec. */
4355 name_qualifier = "/b";
4356 continue;
4358 break;
4359 case 't':
4360 /* Skipping type of task type or protected type ... */
4361 if (nocase_tail ("type"))
4362 continue;
4363 break;
4365 if (*dbp == '"')
4367 dbp += 1;
4368 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4369 continue;
4371 else
4373 dbp = skip_spaces (dbp);
4374 for (cp = dbp;
4375 (*cp != '\0'
4376 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4377 cp++)
4378 continue;
4379 if (cp == dbp)
4380 return;
4382 c = *cp;
4383 *cp = '\0';
4384 name = concat (dbp, name_qualifier, "");
4385 *cp = c;
4386 make_tag (name, strlen (name), TRUE,
4387 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4388 free (name);
4389 if (c == '"')
4390 dbp = cp + 1;
4391 return;
4395 static void
4396 Ada_funcs (inf)
4397 FILE *inf;
4399 bool inquote = FALSE;
4400 bool skip_till_semicolumn = FALSE;
4402 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4404 while (*dbp != '\0')
4406 /* Skip a string i.e. "abcd". */
4407 if (inquote || (*dbp == '"'))
4409 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4410 if (dbp != NULL)
4412 inquote = FALSE;
4413 dbp += 1;
4414 continue; /* advance char */
4416 else
4418 inquote = TRUE;
4419 break; /* advance line */
4423 /* Skip comments. */
4424 if (dbp[0] == '-' && dbp[1] == '-')
4425 break; /* advance line */
4427 /* Skip character enclosed in single quote i.e. 'a'
4428 and skip single quote starting an attribute i.e. 'Image. */
4429 if (*dbp == '\'')
4431 dbp++ ;
4432 if (*dbp != '\0')
4433 dbp++;
4434 continue;
4437 if (skip_till_semicolumn)
4439 if (*dbp == ';')
4440 skip_till_semicolumn = FALSE;
4441 dbp++;
4442 continue; /* advance char */
4445 /* Search for beginning of a token. */
4446 if (!begtoken (*dbp))
4448 dbp++;
4449 continue; /* advance char */
4452 /* We are at the beginning of a token. */
4453 switch (lowcase(*dbp))
4455 case 'f':
4456 if (!packages_only && nocase_tail ("function"))
4457 Ada_getit (inf, "/f");
4458 else
4459 break; /* from switch */
4460 continue; /* advance char */
4461 case 'p':
4462 if (!packages_only && nocase_tail ("procedure"))
4463 Ada_getit (inf, "/p");
4464 else if (nocase_tail ("package"))
4465 Ada_getit (inf, "/s");
4466 else if (nocase_tail ("protected")) /* protected type */
4467 Ada_getit (inf, "/t");
4468 else
4469 break; /* from switch */
4470 continue; /* advance char */
4472 case 'u':
4473 if (typedefs && !packages_only && nocase_tail ("use"))
4475 /* when tagging types, avoid tagging use type Pack.Typename;
4476 for this, we will skip everything till a ; */
4477 skip_till_semicolumn = TRUE;
4478 continue; /* advance char */
4481 case 't':
4482 if (!packages_only && nocase_tail ("task"))
4483 Ada_getit (inf, "/k");
4484 else if (typedefs && !packages_only && nocase_tail ("type"))
4486 Ada_getit (inf, "/t");
4487 while (*dbp != '\0')
4488 dbp += 1;
4490 else
4491 break; /* from switch */
4492 continue; /* advance char */
4495 /* Look for the end of the token. */
4496 while (!endtoken (*dbp))
4497 dbp++;
4499 } /* advance char */
4500 } /* advance line */
4505 * Unix and microcontroller assembly tag handling
4506 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4507 * Idea by Bob Weiner, Motorola Inc. (1994)
4509 static void
4510 Asm_labels (inf)
4511 FILE *inf;
4513 register char *cp;
4515 LOOP_ON_INPUT_LINES (inf, lb, cp)
4517 /* If first char is alphabetic or one of [_.$], test for colon
4518 following identifier. */
4519 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4521 /* Read past label. */
4522 cp++;
4523 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4524 cp++;
4525 if (*cp == ':' || iswhite (*cp))
4526 /* Found end of label, so copy it and add it to the table. */
4527 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4528 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4535 * Perl support
4536 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4537 * Perl variable names: /^(my|local).../
4538 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4539 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4540 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4542 static void
4543 Perl_functions (inf)
4544 FILE *inf;
4546 char *package = savestr ("main"); /* current package name */
4547 register char *cp;
4549 LOOP_ON_INPUT_LINES (inf, lb, cp)
4551 cp = skip_spaces (cp);
4553 if (LOOKING_AT (cp, "package"))
4555 free (package);
4556 get_tag (cp, &package);
4558 else if (LOOKING_AT (cp, "sub"))
4560 char *pos;
4561 char *sp = cp;
4563 while (!notinname (*cp))
4564 cp++;
4565 if (cp == sp)
4566 continue; /* nothing found */
4567 if ((pos = etags_strchr (sp, ':')) != NULL
4568 && pos < cp && pos[1] == ':')
4569 /* The name is already qualified. */
4570 make_tag (sp, cp - sp, TRUE,
4571 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4572 else
4573 /* Qualify it. */
4575 char savechar, *name;
4577 savechar = *cp;
4578 *cp = '\0';
4579 name = concat (package, "::", sp);
4580 *cp = savechar;
4581 make_tag (name, strlen(name), TRUE,
4582 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4583 free (name);
4586 else if (globals) /* only if we are tagging global vars */
4588 /* Skip a qualifier, if any. */
4589 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4590 /* After "my" or "local", but before any following paren or space. */
4591 char *varstart = cp;
4593 if (qual /* should this be removed? If yes, how? */
4594 && (*cp == '$' || *cp == '@' || *cp == '%'))
4596 varstart += 1;
4598 cp++;
4599 while (ISALNUM (*cp) || *cp == '_');
4601 else if (qual)
4603 /* Should be examining a variable list at this point;
4604 could insist on seeing an open parenthesis. */
4605 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4606 cp++;
4608 else
4609 continue;
4611 make_tag (varstart, cp - varstart, FALSE,
4612 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4615 free (package);
4620 * Python support
4621 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4622 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4623 * More ideas by seb bacon <seb@jamkit.com> (2002)
4625 static void
4626 Python_functions (inf)
4627 FILE *inf;
4629 register char *cp;
4631 LOOP_ON_INPUT_LINES (inf, lb, cp)
4633 cp = skip_spaces (cp);
4634 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4636 char *name = cp;
4637 while (!notinname (*cp) && *cp != ':')
4638 cp++;
4639 make_tag (name, cp - name, TRUE,
4640 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4647 * PHP support
4648 * Look for:
4649 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4650 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4651 * - /^[ \t]*define\(\"[^\"]+/
4652 * Only with --members:
4653 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4654 * Idea by Diez B. Roggisch (2001)
4656 static void
4657 PHP_functions (inf)
4658 FILE *inf;
4660 register char *cp, *name;
4661 bool search_identifier = FALSE;
4663 LOOP_ON_INPUT_LINES (inf, lb, cp)
4665 cp = skip_spaces (cp);
4666 name = cp;
4667 if (search_identifier
4668 && *cp != '\0')
4670 while (!notinname (*cp))
4671 cp++;
4672 make_tag (name, cp - name, TRUE,
4673 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4674 search_identifier = FALSE;
4676 else if (LOOKING_AT (cp, "function"))
4678 if(*cp == '&')
4679 cp = skip_spaces (cp+1);
4680 if(*cp != '\0')
4682 name = cp;
4683 while (!notinname (*cp))
4684 cp++;
4685 make_tag (name, cp - name, TRUE,
4686 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4688 else
4689 search_identifier = TRUE;
4691 else if (LOOKING_AT (cp, "class"))
4693 if (*cp != '\0')
4695 name = cp;
4696 while (*cp != '\0' && !iswhite (*cp))
4697 cp++;
4698 make_tag (name, cp - name, FALSE,
4699 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4701 else
4702 search_identifier = TRUE;
4704 else if (strneq (cp, "define", 6)
4705 && (cp = skip_spaces (cp+6))
4706 && *cp++ == '('
4707 && (*cp == '"' || *cp == '\''))
4709 char quote = *cp++;
4710 name = cp;
4711 while (*cp != quote && *cp != '\0')
4712 cp++;
4713 make_tag (name, cp - name, FALSE,
4714 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4716 else if (members
4717 && LOOKING_AT (cp, "var")
4718 && *cp == '$')
4720 name = cp;
4721 while (!notinname(*cp))
4722 cp++;
4723 make_tag (name, cp - name, FALSE,
4724 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4731 * Cobol tag functions
4732 * We could look for anything that could be a paragraph name.
4733 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4734 * Idea by Corny de Souza (1993)
4736 static void
4737 Cobol_paragraphs (inf)
4738 FILE *inf;
4740 register char *bp, *ep;
4742 LOOP_ON_INPUT_LINES (inf, lb, bp)
4744 if (lb.len < 9)
4745 continue;
4746 bp += 8;
4748 /* If eoln, compiler option or comment ignore whole line. */
4749 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4750 continue;
4752 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4753 continue;
4754 if (*ep++ == '.')
4755 make_tag (bp, ep - bp, TRUE,
4756 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4762 * Makefile support
4763 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4765 static void
4766 Makefile_targets (inf)
4767 FILE *inf;
4769 register char *bp;
4771 LOOP_ON_INPUT_LINES (inf, lb, bp)
4773 if (*bp == '\t' || *bp == '#')
4774 continue;
4775 while (*bp != '\0' && *bp != '=' && *bp != ':')
4776 bp++;
4777 if (*bp == ':' || (globals && *bp == '='))
4779 /* We should detect if there is more than one tag, but we do not.
4780 We just skip initial and final spaces. */
4781 char * namestart = skip_spaces (lb.buffer);
4782 while (--bp > namestart)
4783 if (!notinname (*bp))
4784 break;
4785 make_tag (namestart, bp - namestart + 1, TRUE,
4786 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4793 * Pascal parsing
4794 * Original code by Mosur K. Mohan (1989)
4796 * Locates tags for procedures & functions. Doesn't do any type- or
4797 * var-definitions. It does look for the keyword "extern" or
4798 * "forward" immediately following the procedure statement; if found,
4799 * the tag is skipped.
4801 static void
4802 Pascal_functions (inf)
4803 FILE *inf;
4805 linebuffer tline; /* mostly copied from C_entries */
4806 long save_lcno;
4807 int save_lineno, namelen, taglen;
4808 char c, *name;
4810 bool /* each of these flags is TRUE if: */
4811 incomment, /* point is inside a comment */
4812 inquote, /* point is inside '..' string */
4813 get_tagname, /* point is after PROCEDURE/FUNCTION
4814 keyword, so next item = potential tag */
4815 found_tag, /* point is after a potential tag */
4816 inparms, /* point is within parameter-list */
4817 verify_tag; /* point has passed the parm-list, so the
4818 next token will determine whether this
4819 is a FORWARD/EXTERN to be ignored, or
4820 whether it is a real tag */
4822 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4823 name = NULL; /* keep compiler quiet */
4824 dbp = lb.buffer;
4825 *dbp = '\0';
4826 linebuffer_init (&tline);
4828 incomment = inquote = FALSE;
4829 found_tag = FALSE; /* have a proc name; check if extern */
4830 get_tagname = FALSE; /* found "procedure" keyword */
4831 inparms = FALSE; /* found '(' after "proc" */
4832 verify_tag = FALSE; /* check if "extern" is ahead */
4835 while (!feof (inf)) /* long main loop to get next char */
4837 c = *dbp++;
4838 if (c == '\0') /* if end of line */
4840 readline (&lb, inf);
4841 dbp = lb.buffer;
4842 if (*dbp == '\0')
4843 continue;
4844 if (!((found_tag && verify_tag)
4845 || get_tagname))
4846 c = *dbp++; /* only if don't need *dbp pointing
4847 to the beginning of the name of
4848 the procedure or function */
4850 if (incomment)
4852 if (c == '}') /* within { } comments */
4853 incomment = FALSE;
4854 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4856 dbp++;
4857 incomment = FALSE;
4859 continue;
4861 else if (inquote)
4863 if (c == '\'')
4864 inquote = FALSE;
4865 continue;
4867 else
4868 switch (c)
4870 case '\'':
4871 inquote = TRUE; /* found first quote */
4872 continue;
4873 case '{': /* found open { comment */
4874 incomment = TRUE;
4875 continue;
4876 case '(':
4877 if (*dbp == '*') /* found open (* comment */
4879 incomment = TRUE;
4880 dbp++;
4882 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4883 inparms = TRUE;
4884 continue;
4885 case ')': /* end of parms list */
4886 if (inparms)
4887 inparms = FALSE;
4888 continue;
4889 case ';':
4890 if (found_tag && !inparms) /* end of proc or fn stmt */
4892 verify_tag = TRUE;
4893 break;
4895 continue;
4897 if (found_tag && verify_tag && (*dbp != ' '))
4899 /* Check if this is an "extern" declaration. */
4900 if (*dbp == '\0')
4901 continue;
4902 if (lowcase (*dbp == 'e'))
4904 if (nocase_tail ("extern")) /* superfluous, really! */
4906 found_tag = FALSE;
4907 verify_tag = FALSE;
4910 else if (lowcase (*dbp) == 'f')
4912 if (nocase_tail ("forward")) /* check for forward reference */
4914 found_tag = FALSE;
4915 verify_tag = FALSE;
4918 if (found_tag && verify_tag) /* not external proc, so make tag */
4920 found_tag = FALSE;
4921 verify_tag = FALSE;
4922 make_tag (name, namelen, TRUE,
4923 tline.buffer, taglen, save_lineno, save_lcno);
4924 continue;
4927 if (get_tagname) /* grab name of proc or fn */
4929 char *cp;
4931 if (*dbp == '\0')
4932 continue;
4934 /* Find block name. */
4935 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4936 continue;
4938 /* Save all values for later tagging. */
4939 linebuffer_setlen (&tline, lb.len);
4940 strcpy (tline.buffer, lb.buffer);
4941 save_lineno = lineno;
4942 save_lcno = linecharno;
4943 name = tline.buffer + (dbp - lb.buffer);
4944 namelen = cp - dbp;
4945 taglen = cp - lb.buffer + 1;
4947 dbp = cp; /* set dbp to e-o-token */
4948 get_tagname = FALSE;
4949 found_tag = TRUE;
4950 continue;
4952 /* And proceed to check for "extern". */
4954 else if (!incomment && !inquote && !found_tag)
4956 /* Check for proc/fn keywords. */
4957 switch (lowcase (c))
4959 case 'p':
4960 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4961 get_tagname = TRUE;
4962 continue;
4963 case 'f':
4964 if (nocase_tail ("unction"))
4965 get_tagname = TRUE;
4966 continue;
4969 } /* while not eof */
4971 free (tline.buffer);
4976 * Lisp tag functions
4977 * look for (def or (DEF, quote or QUOTE
4980 static void L_getit __P((void));
4982 static void
4983 L_getit ()
4985 if (*dbp == '\'') /* Skip prefix quote */
4986 dbp++;
4987 else if (*dbp == '(')
4989 dbp++;
4990 /* Try to skip "(quote " */
4991 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4992 /* Ok, then skip "(" before name in (defstruct (foo)) */
4993 dbp = skip_spaces (dbp);
4995 get_tag (dbp, NULL);
4998 static void
4999 Lisp_functions (inf)
5000 FILE *inf;
5002 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5004 if (dbp[0] != '(')
5005 continue;
5007 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5009 dbp = skip_non_spaces (dbp);
5010 dbp = skip_spaces (dbp);
5011 L_getit ();
5013 else
5015 /* Check for (foo::defmumble name-defined ... */
5017 dbp++;
5018 while (!notinname (*dbp) && *dbp != ':');
5019 if (*dbp == ':')
5022 dbp++;
5023 while (*dbp == ':');
5025 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5027 dbp = skip_non_spaces (dbp);
5028 dbp = skip_spaces (dbp);
5029 L_getit ();
5038 * Lua script language parsing
5039 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5041 * "function" and "local function" are tags if they start at column 1.
5043 static void
5044 Lua_functions (inf)
5045 FILE *inf;
5047 register char *bp;
5049 LOOP_ON_INPUT_LINES (inf, lb, bp)
5051 if (bp[0] != 'f' && bp[0] != 'l')
5052 continue;
5054 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5056 if (LOOKING_AT (bp, "function"))
5057 get_tag (bp, NULL);
5063 * Postscript tags
5064 * Just look for lines where the first character is '/'
5065 * Also look at "defineps" for PSWrap
5066 * Ideas by:
5067 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5068 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5070 static void
5071 PS_functions (inf)
5072 FILE *inf;
5074 register char *bp, *ep;
5076 LOOP_ON_INPUT_LINES (inf, lb, bp)
5078 if (bp[0] == '/')
5080 for (ep = bp+1;
5081 *ep != '\0' && *ep != ' ' && *ep != '{';
5082 ep++)
5083 continue;
5084 make_tag (bp, ep - bp, TRUE,
5085 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5087 else if (LOOKING_AT (bp, "defineps"))
5088 get_tag (bp, NULL);
5094 * Forth tags
5095 * Ignore anything after \ followed by space or in ( )
5096 * Look for words defined by :
5097 * Look for constant, code, create, defer, value, and variable
5098 * OBP extensions: Look for buffer:, field,
5099 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5101 static void
5102 Forth_words (inf)
5103 FILE *inf;
5105 register char *bp;
5107 LOOP_ON_INPUT_LINES (inf, lb, bp)
5108 while ((bp = skip_spaces (bp))[0] != '\0')
5109 if (bp[0] == '\\' && iswhite(bp[1]))
5110 break; /* read next line */
5111 else if (bp[0] == '(' && iswhite(bp[1]))
5112 do /* skip to ) or eol */
5113 bp++;
5114 while (*bp != ')' && *bp != '\0');
5115 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5116 || LOOKING_AT_NOCASE (bp, "constant")
5117 || LOOKING_AT_NOCASE (bp, "code")
5118 || LOOKING_AT_NOCASE (bp, "create")
5119 || LOOKING_AT_NOCASE (bp, "defer")
5120 || LOOKING_AT_NOCASE (bp, "value")
5121 || LOOKING_AT_NOCASE (bp, "variable")
5122 || LOOKING_AT_NOCASE (bp, "buffer:")
5123 || LOOKING_AT_NOCASE (bp, "field"))
5124 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5125 else
5126 bp = skip_non_spaces (bp);
5131 * Scheme tag functions
5132 * look for (def... xyzzy
5133 * (def... (xyzzy
5134 * (def ... ((...(xyzzy ....
5135 * (set! xyzzy
5136 * Original code by Ken Haase (1985?)
5138 static void
5139 Scheme_functions (inf)
5140 FILE *inf;
5142 register char *bp;
5144 LOOP_ON_INPUT_LINES (inf, lb, bp)
5146 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5148 bp = skip_non_spaces (bp+4);
5149 /* Skip over open parens and white space */
5150 while (notinname (*bp))
5151 bp++;
5152 get_tag (bp, NULL);
5154 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5155 get_tag (bp, NULL);
5160 /* Find tags in TeX and LaTeX input files. */
5162 /* TEX_toktab is a table of TeX control sequences that define tags.
5163 * Each entry records one such control sequence.
5165 * Original code from who knows whom.
5166 * Ideas by:
5167 * Stefan Monnier (2002)
5170 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5172 /* Default set of control sequences to put into TEX_toktab.
5173 The value of environment var TEXTAGS is prepended to this. */
5174 static char *TEX_defenv = "\
5175 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5176 :part:appendix:entry:index:def\
5177 :newcommand:renewcommand:newenvironment:renewenvironment";
5179 static void TEX_mode __P((FILE *));
5180 static void TEX_decode_env __P((char *, char *));
5182 static char TEX_esc = '\\';
5183 static char TEX_opgrp = '{';
5184 static char TEX_clgrp = '}';
5187 * TeX/LaTeX scanning loop.
5189 static void
5190 TeX_commands (inf)
5191 FILE *inf;
5193 char *cp;
5194 linebuffer *key;
5196 /* Select either \ or ! as escape character. */
5197 TEX_mode (inf);
5199 /* Initialize token table once from environment. */
5200 if (TEX_toktab == NULL)
5201 TEX_decode_env ("TEXTAGS", TEX_defenv);
5203 LOOP_ON_INPUT_LINES (inf, lb, cp)
5205 /* Look at each TEX keyword in line. */
5206 for (;;)
5208 /* Look for a TEX escape. */
5209 while (*cp++ != TEX_esc)
5210 if (cp[-1] == '\0' || cp[-1] == '%')
5211 goto tex_next_line;
5213 for (key = TEX_toktab; key->buffer != NULL; key++)
5214 if (strneq (cp, key->buffer, key->len))
5216 register char *p;
5217 int namelen, linelen;
5218 bool opgrp = FALSE;
5220 cp = skip_spaces (cp + key->len);
5221 if (*cp == TEX_opgrp)
5223 opgrp = TRUE;
5224 cp++;
5226 for (p = cp;
5227 (!iswhite (*p) && *p != '#' &&
5228 *p != TEX_opgrp && *p != TEX_clgrp);
5229 p++)
5230 continue;
5231 namelen = p - cp;
5232 linelen = lb.len;
5233 if (!opgrp || *p == TEX_clgrp)
5235 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5236 p++;
5237 linelen = p - lb.buffer + 1;
5239 make_tag (cp, namelen, TRUE,
5240 lb.buffer, linelen, lineno, linecharno);
5241 goto tex_next_line; /* We only tag a line once */
5244 tex_next_line:
5249 #define TEX_LESC '\\'
5250 #define TEX_SESC '!'
5252 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5253 chars accordingly. */
5254 static void
5255 TEX_mode (inf)
5256 FILE *inf;
5258 int c;
5260 while ((c = getc (inf)) != EOF)
5262 /* Skip to next line if we hit the TeX comment char. */
5263 if (c == '%')
5264 while (c != '\n' && c != EOF)
5265 c = getc (inf);
5266 else if (c == TEX_LESC || c == TEX_SESC )
5267 break;
5270 if (c == TEX_LESC)
5272 TEX_esc = TEX_LESC;
5273 TEX_opgrp = '{';
5274 TEX_clgrp = '}';
5276 else
5278 TEX_esc = TEX_SESC;
5279 TEX_opgrp = '<';
5280 TEX_clgrp = '>';
5282 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5283 No attempt is made to correct the situation. */
5284 rewind (inf);
5287 /* Read environment and prepend it to the default string.
5288 Build token table. */
5289 static void
5290 TEX_decode_env (evarname, defenv)
5291 char *evarname;
5292 char *defenv;
5294 register char *env, *p;
5295 int i, len;
5297 /* Append default string to environment. */
5298 env = getenv (evarname);
5299 if (!env)
5300 env = defenv;
5301 else
5303 char *oldenv = env;
5304 env = concat (oldenv, defenv, "");
5307 /* Allocate a token table */
5308 for (len = 1, p = env; p;)
5309 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5310 len++;
5311 TEX_toktab = xnew (len, linebuffer);
5313 /* Unpack environment string into token table. Be careful about */
5314 /* zero-length strings (leading ':', "::" and trailing ':') */
5315 for (i = 0; *env != '\0';)
5317 p = etags_strchr (env, ':');
5318 if (!p) /* End of environment string. */
5319 p = env + strlen (env);
5320 if (p - env > 0)
5321 { /* Only non-zero strings. */
5322 TEX_toktab[i].buffer = savenstr (env, p - env);
5323 TEX_toktab[i].len = p - env;
5324 i++;
5326 if (*p)
5327 env = p + 1;
5328 else
5330 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5331 TEX_toktab[i].len = 0;
5332 break;
5338 /* Texinfo support. Dave Love, Mar. 2000. */
5339 static void
5340 Texinfo_nodes (inf)
5341 FILE * inf;
5343 char *cp, *start;
5344 LOOP_ON_INPUT_LINES (inf, lb, cp)
5345 if (LOOKING_AT (cp, "@node"))
5347 start = cp;
5348 while (*cp != '\0' && *cp != ',')
5349 cp++;
5350 make_tag (start, cp - start, TRUE,
5351 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5357 * HTML support.
5358 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5359 * Contents of <a name=xxx> are tags with name xxx.
5361 * Francesco Potortì, 2002.
5363 static void
5364 HTML_labels (inf)
5365 FILE * inf;
5367 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5368 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5369 bool intag = FALSE; /* inside an html tag, looking for ID= */
5370 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5371 char *end;
5374 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5376 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5377 for (;;) /* loop on the same line */
5379 if (skiptag) /* skip HTML tag */
5381 while (*dbp != '\0' && *dbp != '>')
5382 dbp++;
5383 if (*dbp == '>')
5385 dbp += 1;
5386 skiptag = FALSE;
5387 continue; /* look on the same line */
5389 break; /* go to next line */
5392 else if (intag) /* look for "name=" or "id=" */
5394 while (*dbp != '\0' && *dbp != '>'
5395 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5396 dbp++;
5397 if (*dbp == '\0')
5398 break; /* go to next line */
5399 if (*dbp == '>')
5401 dbp += 1;
5402 intag = FALSE;
5403 continue; /* look on the same line */
5405 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5406 || LOOKING_AT_NOCASE (dbp, "id="))
5408 bool quoted = (dbp[0] == '"');
5410 if (quoted)
5411 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5412 continue;
5413 else
5414 for (end = dbp; *end != '\0' && intoken (*end); end++)
5415 continue;
5416 linebuffer_setlen (&token_name, end - dbp);
5417 strncpy (token_name.buffer, dbp, end - dbp);
5418 token_name.buffer[end - dbp] = '\0';
5420 dbp = end;
5421 intag = FALSE; /* we found what we looked for */
5422 skiptag = TRUE; /* skip to the end of the tag */
5423 getnext = TRUE; /* then grab the text */
5424 continue; /* look on the same line */
5426 dbp += 1;
5429 else if (getnext) /* grab next tokens and tag them */
5431 dbp = skip_spaces (dbp);
5432 if (*dbp == '\0')
5433 break; /* go to next line */
5434 if (*dbp == '<')
5436 intag = TRUE;
5437 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5438 continue; /* look on the same line */
5441 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5442 continue;
5443 make_tag (token_name.buffer, token_name.len, TRUE,
5444 dbp, end - dbp, lineno, linecharno);
5445 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5446 getnext = FALSE;
5447 break; /* go to next line */
5450 else /* look for an interesting HTML tag */
5452 while (*dbp != '\0' && *dbp != '<')
5453 dbp++;
5454 if (*dbp == '\0')
5455 break; /* go to next line */
5456 intag = TRUE;
5457 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5459 inanchor = TRUE;
5460 continue; /* look on the same line */
5462 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5463 || LOOKING_AT_NOCASE (dbp, "<h1>")
5464 || LOOKING_AT_NOCASE (dbp, "<h2>")
5465 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5467 intag = FALSE;
5468 getnext = TRUE;
5469 continue; /* look on the same line */
5471 dbp += 1;
5478 * Prolog support
5480 * Assumes that the predicate or rule starts at column 0.
5481 * Only the first clause of a predicate or rule is added.
5482 * Original code by Sunichirou Sugou (1989)
5483 * Rewritten by Anders Lindgren (1996)
5485 static int prolog_pr __P((char *, char *));
5486 static void prolog_skip_comment __P((linebuffer *, FILE *));
5487 static int prolog_atom __P((char *, int));
5489 static void
5490 Prolog_functions (inf)
5491 FILE *inf;
5493 char *cp, *last;
5494 int len;
5495 int allocated;
5497 allocated = 0;
5498 len = 0;
5499 last = NULL;
5501 LOOP_ON_INPUT_LINES (inf, lb, cp)
5503 if (cp[0] == '\0') /* Empty line */
5504 continue;
5505 else if (iswhite (cp[0])) /* Not a predicate */
5506 continue;
5507 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5508 prolog_skip_comment (&lb, inf);
5509 else if ((len = prolog_pr (cp, last)) > 0)
5511 /* Predicate or rule. Store the function name so that we
5512 only generate a tag for the first clause. */
5513 if (last == NULL)
5514 last = xnew(len + 1, char);
5515 else if (len + 1 > allocated)
5516 xrnew (last, len + 1, char);
5517 allocated = len + 1;
5518 strncpy (last, cp, len);
5519 last[len] = '\0';
5522 free (last);
5526 static void
5527 prolog_skip_comment (plb, inf)
5528 linebuffer *plb;
5529 FILE *inf;
5531 char *cp;
5535 for (cp = plb->buffer; *cp != '\0'; cp++)
5536 if (cp[0] == '*' && cp[1] == '/')
5537 return;
5538 readline (plb, inf);
5540 while (!feof(inf));
5544 * A predicate or rule definition is added if it matches:
5545 * <beginning of line><Prolog Atom><whitespace>(
5546 * or <beginning of line><Prolog Atom><whitespace>:-
5548 * It is added to the tags database if it doesn't match the
5549 * name of the previous clause header.
5551 * Return the size of the name of the predicate or rule, or 0 if no
5552 * header was found.
5554 static int
5555 prolog_pr (s, last)
5556 char *s;
5557 char *last; /* Name of last clause. */
5559 int pos;
5560 int len;
5562 pos = prolog_atom (s, 0);
5563 if (pos < 1)
5564 return 0;
5566 len = pos;
5567 pos = skip_spaces (s + pos) - s;
5569 if ((s[pos] == '.'
5570 || (s[pos] == '(' && (pos += 1))
5571 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5572 && (last == NULL /* save only the first clause */
5573 || len != (int)strlen (last)
5574 || !strneq (s, last, len)))
5576 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5577 return len;
5579 else
5580 return 0;
5584 * Consume a Prolog atom.
5585 * Return the number of bytes consumed, or -1 if there was an error.
5587 * A prolog atom, in this context, could be one of:
5588 * - An alphanumeric sequence, starting with a lower case letter.
5589 * - A quoted arbitrary string. Single quotes can escape themselves.
5590 * Backslash quotes everything.
5592 static int
5593 prolog_atom (s, pos)
5594 char *s;
5595 int pos;
5597 int origpos;
5599 origpos = pos;
5601 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5603 /* The atom is unquoted. */
5604 pos++;
5605 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5607 pos++;
5609 return pos - origpos;
5611 else if (s[pos] == '\'')
5613 pos++;
5615 for (;;)
5617 if (s[pos] == '\'')
5619 pos++;
5620 if (s[pos] != '\'')
5621 break;
5622 pos++; /* A double quote */
5624 else if (s[pos] == '\0')
5625 /* Multiline quoted atoms are ignored. */
5626 return -1;
5627 else if (s[pos] == '\\')
5629 if (s[pos+1] == '\0')
5630 return -1;
5631 pos += 2;
5633 else
5634 pos++;
5636 return pos - origpos;
5638 else
5639 return -1;
5644 * Support for Erlang
5646 * Generates tags for functions, defines, and records.
5647 * Assumes that Erlang functions start at column 0.
5648 * Original code by Anders Lindgren (1996)
5650 static int erlang_func __P((char *, char *));
5651 static void erlang_attribute __P((char *));
5652 static int erlang_atom __P((char *));
5654 static void
5655 Erlang_functions (inf)
5656 FILE *inf;
5658 char *cp, *last;
5659 int len;
5660 int allocated;
5662 allocated = 0;
5663 len = 0;
5664 last = NULL;
5666 LOOP_ON_INPUT_LINES (inf, lb, cp)
5668 if (cp[0] == '\0') /* Empty line */
5669 continue;
5670 else if (iswhite (cp[0])) /* Not function nor attribute */
5671 continue;
5672 else if (cp[0] == '%') /* comment */
5673 continue;
5674 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5675 continue;
5676 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5678 erlang_attribute (cp);
5679 if (last != NULL)
5681 free (last);
5682 last = NULL;
5685 else if ((len = erlang_func (cp, last)) > 0)
5688 * Function. Store the function name so that we only
5689 * generates a tag for the first clause.
5691 if (last == NULL)
5692 last = xnew (len + 1, char);
5693 else if (len + 1 > allocated)
5694 xrnew (last, len + 1, char);
5695 allocated = len + 1;
5696 strncpy (last, cp, len);
5697 last[len] = '\0';
5700 free (last);
5705 * A function definition is added if it matches:
5706 * <beginning of line><Erlang Atom><whitespace>(
5708 * It is added to the tags database if it doesn't match the
5709 * name of the previous clause header.
5711 * Return the size of the name of the function, or 0 if no function
5712 * was found.
5714 static int
5715 erlang_func (s, last)
5716 char *s;
5717 char *last; /* Name of last clause. */
5719 int pos;
5720 int len;
5722 pos = erlang_atom (s);
5723 if (pos < 1)
5724 return 0;
5726 len = pos;
5727 pos = skip_spaces (s + pos) - s;
5729 /* Save only the first clause. */
5730 if (s[pos++] == '('
5731 && (last == NULL
5732 || len != (int)strlen (last)
5733 || !strneq (s, last, len)))
5735 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5736 return len;
5739 return 0;
5744 * Handle attributes. Currently, tags are generated for defines
5745 * and records.
5747 * They are on the form:
5748 * -define(foo, bar).
5749 * -define(Foo(M, N), M+N).
5750 * -record(graph, {vtab = notable, cyclic = true}).
5752 static void
5753 erlang_attribute (s)
5754 char *s;
5756 char *cp = s;
5758 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5759 && *cp++ == '(')
5761 int len = erlang_atom (skip_spaces (cp));
5762 if (len > 0)
5763 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5765 return;
5770 * Consume an Erlang atom (or variable).
5771 * Return the number of bytes consumed, or -1 if there was an error.
5773 static int
5774 erlang_atom (s)
5775 char *s;
5777 int pos = 0;
5779 if (ISALPHA (s[pos]) || s[pos] == '_')
5781 /* The atom is unquoted. */
5783 pos++;
5784 while (ISALNUM (s[pos]) || s[pos] == '_');
5786 else if (s[pos] == '\'')
5788 for (pos++; s[pos] != '\''; pos++)
5789 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5790 || (s[pos] == '\\' && s[++pos] == '\0'))
5791 return 0;
5792 pos++;
5795 return pos;
5799 static char *scan_separators __P((char *));
5800 static void add_regex __P((char *, language *));
5801 static char *substitute __P((char *, char *, struct re_registers *));
5804 * Take a string like "/blah/" and turn it into "blah", verifying
5805 * that the first and last characters are the same, and handling
5806 * quoted separator characters. Actually, stops on the occurrence of
5807 * an unquoted separator. Also process \t, \n, etc. and turn into
5808 * appropriate characters. Works in place. Null terminates name string.
5809 * Returns pointer to terminating separator, or NULL for
5810 * unterminated regexps.
5812 static char *
5813 scan_separators (name)
5814 char *name;
5816 char sep = name[0];
5817 char *copyto = name;
5818 bool quoted = FALSE;
5820 for (++name; *name != '\0'; ++name)
5822 if (quoted)
5824 switch (*name)
5826 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5827 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5828 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5829 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5830 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5831 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5832 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5833 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5834 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5835 default:
5836 if (*name == sep)
5837 *copyto++ = sep;
5838 else
5840 /* Something else is quoted, so preserve the quote. */
5841 *copyto++ = '\\';
5842 *copyto++ = *name;
5844 break;
5846 quoted = FALSE;
5848 else if (*name == '\\')
5849 quoted = TRUE;
5850 else if (*name == sep)
5851 break;
5852 else
5853 *copyto++ = *name;
5855 if (*name != sep)
5856 name = NULL; /* signal unterminated regexp */
5858 /* Terminate copied string. */
5859 *copyto = '\0';
5860 return name;
5863 /* Look at the argument of --regex or --no-regex and do the right
5864 thing. Same for each line of a regexp file. */
5865 static void
5866 analyse_regex (regex_arg)
5867 char *regex_arg;
5869 if (regex_arg == NULL)
5871 free_regexps (); /* --no-regex: remove existing regexps */
5872 return;
5875 /* A real --regexp option or a line in a regexp file. */
5876 switch (regex_arg[0])
5878 /* Comments in regexp file or null arg to --regex. */
5879 case '\0':
5880 case ' ':
5881 case '\t':
5882 break;
5884 /* Read a regex file. This is recursive and may result in a
5885 loop, which will stop when the file descriptors are exhausted. */
5886 case '@':
5888 FILE *regexfp;
5889 linebuffer regexbuf;
5890 char *regexfile = regex_arg + 1;
5892 /* regexfile is a file containing regexps, one per line. */
5893 regexfp = fopen (regexfile, "r");
5894 if (regexfp == NULL)
5896 pfatal (regexfile);
5897 return;
5899 linebuffer_init (&regexbuf);
5900 while (readline_internal (&regexbuf, regexfp) > 0)
5901 analyse_regex (regexbuf.buffer);
5902 free (regexbuf.buffer);
5903 fclose (regexfp);
5905 break;
5907 /* Regexp to be used for a specific language only. */
5908 case '{':
5910 language *lang;
5911 char *lang_name = regex_arg + 1;
5912 char *cp;
5914 for (cp = lang_name; *cp != '}'; cp++)
5915 if (*cp == '\0')
5917 error ("unterminated language name in regex: %s", regex_arg);
5918 return;
5920 *cp++ = '\0';
5921 lang = get_language_from_langname (lang_name);
5922 if (lang == NULL)
5923 return;
5924 add_regex (cp, lang);
5926 break;
5928 /* Regexp to be used for any language. */
5929 default:
5930 add_regex (regex_arg, NULL);
5931 break;
5935 /* Separate the regexp pattern, compile it,
5936 and care for optional name and modifiers. */
5937 static void
5938 add_regex (regexp_pattern, lang)
5939 char *regexp_pattern;
5940 language *lang;
5942 static struct re_pattern_buffer zeropattern;
5943 char sep, *pat, *name, *modifiers;
5944 const char *err;
5945 struct re_pattern_buffer *patbuf;
5946 regexp *rp;
5947 bool
5948 force_explicit_name = TRUE, /* do not use implicit tag names */
5949 ignore_case = FALSE, /* case is significant */
5950 multi_line = FALSE, /* matches are done one line at a time */
5951 single_line = FALSE; /* dot does not match newline */
5954 if (strlen(regexp_pattern) < 3)
5956 error ("null regexp", (char *)NULL);
5957 return;
5959 sep = regexp_pattern[0];
5960 name = scan_separators (regexp_pattern);
5961 if (name == NULL)
5963 error ("%s: unterminated regexp", regexp_pattern);
5964 return;
5966 if (name[1] == sep)
5968 error ("null name for regexp \"%s\"", regexp_pattern);
5969 return;
5971 modifiers = scan_separators (name);
5972 if (modifiers == NULL) /* no terminating separator --> no name */
5974 modifiers = name;
5975 name = "";
5977 else
5978 modifiers += 1; /* skip separator */
5980 /* Parse regex modifiers. */
5981 for (; modifiers[0] != '\0'; modifiers++)
5982 switch (modifiers[0])
5984 case 'N':
5985 if (modifiers == name)
5986 error ("forcing explicit tag name but no name, ignoring", NULL);
5987 force_explicit_name = TRUE;
5988 break;
5989 case 'i':
5990 ignore_case = TRUE;
5991 break;
5992 case 's':
5993 single_line = TRUE;
5994 /* FALLTHRU */
5995 case 'm':
5996 multi_line = TRUE;
5997 need_filebuf = TRUE;
5998 break;
5999 default:
6001 char wrongmod [2];
6002 wrongmod[0] = modifiers[0];
6003 wrongmod[1] = '\0';
6004 error ("invalid regexp modifier `%s', ignoring", wrongmod);
6006 break;
6009 patbuf = xnew (1, struct re_pattern_buffer);
6010 *patbuf = zeropattern;
6011 if (ignore_case)
6013 static char lc_trans[CHARS];
6014 int i;
6015 for (i = 0; i < CHARS; i++)
6016 lc_trans[i] = lowcase (i);
6017 patbuf->translate = lc_trans; /* translation table to fold case */
6020 if (multi_line)
6021 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6022 else
6023 pat = regexp_pattern;
6025 if (single_line)
6026 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6027 else
6028 re_set_syntax (RE_SYNTAX_EMACS);
6030 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
6031 if (multi_line)
6032 free (pat);
6033 if (err != NULL)
6035 error ("%s while compiling pattern", err);
6036 return;
6039 rp = p_head;
6040 p_head = xnew (1, regexp);
6041 p_head->pattern = savestr (regexp_pattern);
6042 p_head->p_next = rp;
6043 p_head->lang = lang;
6044 p_head->pat = patbuf;
6045 p_head->name = savestr (name);
6046 p_head->error_signaled = FALSE;
6047 p_head->force_explicit_name = force_explicit_name;
6048 p_head->ignore_case = ignore_case;
6049 p_head->multi_line = multi_line;
6053 * Do the substitutions indicated by the regular expression and
6054 * arguments.
6056 static char *
6057 substitute (in, out, regs)
6058 char *in, *out;
6059 struct re_registers *regs;
6061 char *result, *t;
6062 int size, dig, diglen;
6064 result = NULL;
6065 size = strlen (out);
6067 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6068 if (out[size - 1] == '\\')
6069 fatal ("pattern error in \"%s\"", out);
6070 for (t = etags_strchr (out, '\\');
6071 t != NULL;
6072 t = etags_strchr (t + 2, '\\'))
6073 if (ISDIGIT (t[1]))
6075 dig = t[1] - '0';
6076 diglen = regs->end[dig] - regs->start[dig];
6077 size += diglen - 2;
6079 else
6080 size -= 1;
6082 /* Allocate space and do the substitutions. */
6083 assert (size >= 0);
6084 result = xnew (size + 1, char);
6086 for (t = result; *out != '\0'; out++)
6087 if (*out == '\\' && ISDIGIT (*++out))
6089 dig = *out - '0';
6090 diglen = regs->end[dig] - regs->start[dig];
6091 strncpy (t, in + regs->start[dig], diglen);
6092 t += diglen;
6094 else
6095 *t++ = *out;
6096 *t = '\0';
6098 assert (t <= result + size);
6099 assert (t - result == (int)strlen (result));
6101 return result;
6104 /* Deallocate all regexps. */
6105 static void
6106 free_regexps ()
6108 regexp *rp;
6109 while (p_head != NULL)
6111 rp = p_head->p_next;
6112 free (p_head->pattern);
6113 free (p_head->name);
6114 free (p_head);
6115 p_head = rp;
6117 return;
6121 * Reads the whole file as a single string from `filebuf' and looks for
6122 * multi-line regular expressions, creating tags on matches.
6123 * readline already dealt with normal regexps.
6125 * Idea by Ben Wing <ben@666.com> (2002).
6127 static void
6128 regex_tag_multiline ()
6130 char *buffer = filebuf.buffer;
6131 regexp *rp;
6132 char *name;
6134 for (rp = p_head; rp != NULL; rp = rp->p_next)
6136 int match = 0;
6138 if (!rp->multi_line)
6139 continue; /* skip normal regexps */
6141 /* Generic initialisations before parsing file from memory. */
6142 lineno = 1; /* reset global line number */
6143 charno = 0; /* reset global char number */
6144 linecharno = 0; /* reset global char number of line start */
6146 /* Only use generic regexps or those for the current language. */
6147 if (rp->lang != NULL && rp->lang != curfdp->lang)
6148 continue;
6150 while (match >= 0 && match < filebuf.len)
6152 match = re_search (rp->pat, buffer, filebuf.len, charno,
6153 filebuf.len - match, &rp->regs);
6154 switch (match)
6156 case -2:
6157 /* Some error. */
6158 if (!rp->error_signaled)
6160 error ("regexp stack overflow while matching \"%s\"",
6161 rp->pattern);
6162 rp->error_signaled = TRUE;
6164 break;
6165 case -1:
6166 /* No match. */
6167 break;
6168 default:
6169 if (match == rp->regs.end[0])
6171 if (!rp->error_signaled)
6173 error ("regexp matches the empty string: \"%s\"",
6174 rp->pattern);
6175 rp->error_signaled = TRUE;
6177 match = -3; /* exit from while loop */
6178 break;
6181 /* Match occurred. Construct a tag. */
6182 while (charno < rp->regs.end[0])
6183 if (buffer[charno++] == '\n')
6184 lineno++, linecharno = charno;
6185 name = rp->name;
6186 if (name[0] == '\0')
6187 name = NULL;
6188 else /* make a named tag */
6189 name = substitute (buffer, rp->name, &rp->regs);
6190 if (rp->force_explicit_name)
6191 /* Force explicit tag name, if a name is there. */
6192 pfnote (name, TRUE, buffer + linecharno,
6193 charno - linecharno + 1, lineno, linecharno);
6194 else
6195 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6196 charno - linecharno + 1, lineno, linecharno);
6197 break;
6204 static bool
6205 nocase_tail (cp)
6206 char *cp;
6208 register int len = 0;
6210 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6211 cp++, len++;
6212 if (*cp == '\0' && !intoken (dbp[len]))
6214 dbp += len;
6215 return TRUE;
6217 return FALSE;
6220 static void
6221 get_tag (bp, namepp)
6222 register char *bp;
6223 char **namepp;
6225 register char *cp = bp;
6227 if (*bp != '\0')
6229 /* Go till you get to white space or a syntactic break */
6230 for (cp = bp + 1; !notinname (*cp); cp++)
6231 continue;
6232 make_tag (bp, cp - bp, TRUE,
6233 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6236 if (namepp != NULL)
6237 *namepp = savenstr (bp, cp - bp);
6241 * Read a line of text from `stream' into `lbp', excluding the
6242 * newline or CR-NL, if any. Return the number of characters read from
6243 * `stream', which is the length of the line including the newline.
6245 * On DOS or Windows we do not count the CR character, if any before the
6246 * NL, in the returned length; this mirrors the behavior of Emacs on those
6247 * platforms (for text files, it translates CR-NL to NL as it reads in the
6248 * file).
6250 * If multi-line regular expressions are requested, each line read is
6251 * appended to `filebuf'.
6253 static long
6254 readline_internal (lbp, stream)
6255 linebuffer *lbp;
6256 register FILE *stream;
6258 char *buffer = lbp->buffer;
6259 register char *p = lbp->buffer;
6260 register char *pend;
6261 int chars_deleted;
6263 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6265 for (;;)
6267 register int c = getc (stream);
6268 if (p == pend)
6270 /* We're at the end of linebuffer: expand it. */
6271 lbp->size *= 2;
6272 xrnew (buffer, lbp->size, char);
6273 p += buffer - lbp->buffer;
6274 pend = buffer + lbp->size;
6275 lbp->buffer = buffer;
6277 if (c == EOF)
6279 *p = '\0';
6280 chars_deleted = 0;
6281 break;
6283 if (c == '\n')
6285 if (p > buffer && p[-1] == '\r')
6287 p -= 1;
6288 #ifdef DOS_NT
6289 /* Assume CRLF->LF translation will be performed by Emacs
6290 when loading this file, so CRs won't appear in the buffer.
6291 It would be cleaner to compensate within Emacs;
6292 however, Emacs does not know how many CRs were deleted
6293 before any given point in the file. */
6294 chars_deleted = 1;
6295 #else
6296 chars_deleted = 2;
6297 #endif
6299 else
6301 chars_deleted = 1;
6303 *p = '\0';
6304 break;
6306 *p++ = c;
6308 lbp->len = p - buffer;
6310 if (need_filebuf /* we need filebuf for multi-line regexps */
6311 && chars_deleted > 0) /* not at EOF */
6313 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6315 /* Expand filebuf. */
6316 filebuf.size *= 2;
6317 xrnew (filebuf.buffer, filebuf.size, char);
6319 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6320 filebuf.len += lbp->len;
6321 filebuf.buffer[filebuf.len++] = '\n';
6322 filebuf.buffer[filebuf.len] = '\0';
6325 return lbp->len + chars_deleted;
6329 * Like readline_internal, above, but in addition try to match the
6330 * input line against relevant regular expressions and manage #line
6331 * directives.
6333 static void
6334 readline (lbp, stream)
6335 linebuffer *lbp;
6336 FILE *stream;
6338 long result;
6340 linecharno = charno; /* update global char number of line start */
6341 result = readline_internal (lbp, stream); /* read line */
6342 lineno += 1; /* increment global line number */
6343 charno += result; /* increment global char number */
6345 /* Honour #line directives. */
6346 if (!no_line_directive)
6348 static bool discard_until_line_directive;
6350 /* Check whether this is a #line directive. */
6351 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6353 unsigned int lno;
6354 int start = 0;
6356 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6357 && start > 0) /* double quote character found */
6359 char *endp = lbp->buffer + start;
6361 while ((endp = etags_strchr (endp, '"')) != NULL
6362 && endp[-1] == '\\')
6363 endp++;
6364 if (endp != NULL)
6365 /* Ok, this is a real #line directive. Let's deal with it. */
6367 char *taggedabsname; /* absolute name of original file */
6368 char *taggedfname; /* name of original file as given */
6369 char *name; /* temp var */
6371 discard_until_line_directive = FALSE; /* found it */
6372 name = lbp->buffer + start;
6373 *endp = '\0';
6374 canonicalize_filename (name); /* for DOS */
6375 taggedabsname = absolute_filename (name, tagfiledir);
6376 if (filename_is_absolute (name)
6377 || filename_is_absolute (curfdp->infname))
6378 taggedfname = savestr (taggedabsname);
6379 else
6380 taggedfname = relative_filename (taggedabsname,tagfiledir);
6382 if (streq (curfdp->taggedfname, taggedfname))
6383 /* The #line directive is only a line number change. We
6384 deal with this afterwards. */
6385 free (taggedfname);
6386 else
6387 /* The tags following this #line directive should be
6388 attributed to taggedfname. In order to do this, set
6389 curfdp accordingly. */
6391 fdesc *fdp; /* file description pointer */
6393 /* Go look for a file description already set up for the
6394 file indicated in the #line directive. If there is
6395 one, use it from now until the next #line
6396 directive. */
6397 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6398 if (streq (fdp->infname, curfdp->infname)
6399 && streq (fdp->taggedfname, taggedfname))
6400 /* If we remove the second test above (after the &&)
6401 then all entries pertaining to the same file are
6402 coalesced in the tags file. If we use it, then
6403 entries pertaining to the same file but generated
6404 from different files (via #line directives) will
6405 go into separate sections in the tags file. These
6406 alternatives look equivalent. The first one
6407 destroys some apparently useless information. */
6409 curfdp = fdp;
6410 free (taggedfname);
6411 break;
6413 /* Else, if we already tagged the real file, skip all
6414 input lines until the next #line directive. */
6415 if (fdp == NULL) /* not found */
6416 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6417 if (streq (fdp->infabsname, taggedabsname))
6419 discard_until_line_directive = TRUE;
6420 free (taggedfname);
6421 break;
6423 /* Else create a new file description and use that from
6424 now on, until the next #line directive. */
6425 if (fdp == NULL) /* not found */
6427 fdp = fdhead;
6428 fdhead = xnew (1, fdesc);
6429 *fdhead = *curfdp; /* copy curr. file description */
6430 fdhead->next = fdp;
6431 fdhead->infname = savestr (curfdp->infname);
6432 fdhead->infabsname = savestr (curfdp->infabsname);
6433 fdhead->infabsdir = savestr (curfdp->infabsdir);
6434 fdhead->taggedfname = taggedfname;
6435 fdhead->usecharno = FALSE;
6436 fdhead->prop = NULL;
6437 fdhead->written = FALSE;
6438 curfdp = fdhead;
6441 free (taggedabsname);
6442 lineno = lno - 1;
6443 readline (lbp, stream);
6444 return;
6445 } /* if a real #line directive */
6446 } /* if #line is followed by a a number */
6447 } /* if line begins with "#line " */
6449 /* If we are here, no #line directive was found. */
6450 if (discard_until_line_directive)
6452 if (result > 0)
6454 /* Do a tail recursion on ourselves, thus discarding the contents
6455 of the line buffer. */
6456 readline (lbp, stream);
6457 return;
6459 /* End of file. */
6460 discard_until_line_directive = FALSE;
6461 return;
6463 } /* if #line directives should be considered */
6466 int match;
6467 regexp *rp;
6468 char *name;
6470 /* Match against relevant regexps. */
6471 if (lbp->len > 0)
6472 for (rp = p_head; rp != NULL; rp = rp->p_next)
6474 /* Only use generic regexps or those for the current language.
6475 Also do not use multiline regexps, which is the job of
6476 regex_tag_multiline. */
6477 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6478 || rp->multi_line)
6479 continue;
6481 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6482 switch (match)
6484 case -2:
6485 /* Some error. */
6486 if (!rp->error_signaled)
6488 error ("regexp stack overflow while matching \"%s\"",
6489 rp->pattern);
6490 rp->error_signaled = TRUE;
6492 break;
6493 case -1:
6494 /* No match. */
6495 break;
6496 case 0:
6497 /* Empty string matched. */
6498 if (!rp->error_signaled)
6500 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6501 rp->error_signaled = TRUE;
6503 break;
6504 default:
6505 /* Match occurred. Construct a tag. */
6506 name = rp->name;
6507 if (name[0] == '\0')
6508 name = NULL;
6509 else /* make a named tag */
6510 name = substitute (lbp->buffer, rp->name, &rp->regs);
6511 if (rp->force_explicit_name)
6512 /* Force explicit tag name, if a name is there. */
6513 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6514 else
6515 make_tag (name, strlen (name), TRUE,
6516 lbp->buffer, match, lineno, linecharno);
6517 break;
6525 * Return a pointer to a space of size strlen(cp)+1 allocated
6526 * with xnew where the string CP has been copied.
6528 static char *
6529 savestr (cp)
6530 char *cp;
6532 return savenstr (cp, strlen (cp));
6536 * Return a pointer to a space of size LEN+1 allocated with xnew where
6537 * the string CP has been copied for at most the first LEN characters.
6539 static char *
6540 savenstr (cp, len)
6541 char *cp;
6542 int len;
6544 register char *dp;
6546 dp = xnew (len + 1, char);
6547 strncpy (dp, cp, len);
6548 dp[len] = '\0';
6549 return dp;
6553 * Return the ptr in sp at which the character c last
6554 * appears; NULL if not found
6556 * Identical to POSIX strrchr, included for portability.
6558 static char *
6559 etags_strrchr (sp, c)
6560 register const char *sp;
6561 register int c;
6563 register const char *r;
6565 r = NULL;
6568 if (*sp == c)
6569 r = sp;
6570 } while (*sp++);
6571 return (char *)r;
6575 * Return the ptr in sp at which the character c first
6576 * appears; NULL if not found
6578 * Identical to POSIX strchr, included for portability.
6580 static char *
6581 etags_strchr (sp, c)
6582 register const char *sp;
6583 register int c;
6587 if (*sp == c)
6588 return (char *)sp;
6589 } while (*sp++);
6590 return NULL;
6594 * Compare two strings, ignoring case for alphabetic characters.
6596 * Same as BSD's strcasecmp, included for portability.
6598 static int
6599 etags_strcasecmp (s1, s2)
6600 register const char *s1;
6601 register const char *s2;
6603 while (*s1 != '\0'
6604 && (ISALPHA (*s1) && ISALPHA (*s2)
6605 ? lowcase (*s1) == lowcase (*s2)
6606 : *s1 == *s2))
6607 s1++, s2++;
6609 return (ISALPHA (*s1) && ISALPHA (*s2)
6610 ? lowcase (*s1) - lowcase (*s2)
6611 : *s1 - *s2);
6615 * Compare two strings, ignoring case for alphabetic characters.
6616 * Stop after a given number of characters
6618 * Same as BSD's strncasecmp, included for portability.
6620 static int
6621 etags_strncasecmp (s1, s2, n)
6622 register const char *s1;
6623 register const char *s2;
6624 register int n;
6626 while (*s1 != '\0' && n-- > 0
6627 && (ISALPHA (*s1) && ISALPHA (*s2)
6628 ? lowcase (*s1) == lowcase (*s2)
6629 : *s1 == *s2))
6630 s1++, s2++;
6632 if (n < 0)
6633 return 0;
6634 else
6635 return (ISALPHA (*s1) && ISALPHA (*s2)
6636 ? lowcase (*s1) - lowcase (*s2)
6637 : *s1 - *s2);
6640 /* Skip spaces (end of string is not space), return new pointer. */
6641 static char *
6642 skip_spaces (cp)
6643 char *cp;
6645 while (iswhite (*cp))
6646 cp++;
6647 return cp;
6650 /* Skip non spaces, except end of string, return new pointer. */
6651 static char *
6652 skip_non_spaces (cp)
6653 char *cp;
6655 while (*cp != '\0' && !iswhite (*cp))
6656 cp++;
6657 return cp;
6660 /* Print error message and exit. */
6661 void
6662 fatal (s1, s2)
6663 char *s1, *s2;
6665 error (s1, s2);
6666 exit (EXIT_FAILURE);
6669 static void
6670 pfatal (s1)
6671 char *s1;
6673 perror (s1);
6674 exit (EXIT_FAILURE);
6677 static void
6678 suggest_asking_for_help ()
6680 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6681 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6682 exit (EXIT_FAILURE);
6685 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6686 static void
6687 error (s1, s2)
6688 const char *s1, *s2;
6690 fprintf (stderr, "%s: ", progname);
6691 fprintf (stderr, s1, s2);
6692 fprintf (stderr, "\n");
6695 /* Return a newly-allocated string whose contents
6696 concatenate those of s1, s2, s3. */
6697 static char *
6698 concat (s1, s2, s3)
6699 char *s1, *s2, *s3;
6701 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6702 char *result = xnew (len1 + len2 + len3 + 1, char);
6704 strcpy (result, s1);
6705 strcpy (result + len1, s2);
6706 strcpy (result + len1 + len2, s3);
6707 result[len1 + len2 + len3] = '\0';
6709 return result;
6713 /* Does the same work as the system V getcwd, but does not need to
6714 guess the buffer size in advance. */
6715 static char *
6716 etags_getcwd ()
6718 #ifdef HAVE_GETCWD
6719 int bufsize = 200;
6720 char *path = xnew (bufsize, char);
6722 while (getcwd (path, bufsize) == NULL)
6724 if (errno != ERANGE)
6725 pfatal ("getcwd");
6726 bufsize *= 2;
6727 free (path);
6728 path = xnew (bufsize, char);
6731 canonicalize_filename (path);
6732 return path;
6734 #else /* not HAVE_GETCWD */
6735 #if MSDOS
6737 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6739 getwd (path);
6741 for (p = path; *p != '\0'; p++)
6742 if (*p == '\\')
6743 *p = '/';
6744 else
6745 *p = lowcase (*p);
6747 return strdup (path);
6748 #else /* not MSDOS */
6749 linebuffer path;
6750 FILE *pipe;
6752 linebuffer_init (&path);
6753 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6754 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6755 pfatal ("pwd");
6756 pclose (pipe);
6758 return path.buffer;
6759 #endif /* not MSDOS */
6760 #endif /* not HAVE_GETCWD */
6763 /* Return a newly allocated string containing the file name of FILE
6764 relative to the absolute directory DIR (which should end with a slash). */
6765 static char *
6766 relative_filename (file, dir)
6767 char *file, *dir;
6769 char *fp, *dp, *afn, *res;
6770 int i;
6772 /* Find the common root of file and dir (with a trailing slash). */
6773 afn = absolute_filename (file, cwd);
6774 fp = afn;
6775 dp = dir;
6776 while (*fp++ == *dp++)
6777 continue;
6778 fp--, dp--; /* back to the first differing char */
6779 #ifdef DOS_NT
6780 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6781 return afn;
6782 #endif
6783 do /* look at the equal chars until '/' */
6784 fp--, dp--;
6785 while (*fp != '/');
6787 /* Build a sequence of "../" strings for the resulting relative file name. */
6788 i = 0;
6789 while (*dp == '/')
6790 ++dp;
6791 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6793 i += 1;
6794 while (*dp == '/')
6795 ++dp;
6797 res = xnew (3*i + strlen (fp + 1) + 1, char);
6798 res[0] = '\0';
6799 while (i-- > 0)
6800 strcat (res, "../");
6802 /* Add the file name relative to the common root of file and dir. */
6803 strcat (res, fp + 1);
6804 free (afn);
6806 return res;
6809 /* Return a newly allocated string containing the absolute file name
6810 of FILE given DIR (which should end with a slash). */
6811 static char *
6812 absolute_filename (file, dir)
6813 char *file, *dir;
6815 char *slashp, *cp, *res;
6817 if (filename_is_absolute (file))
6818 res = savestr (file);
6819 #ifdef DOS_NT
6820 /* We don't support non-absolute file names with a drive
6821 letter, like `d:NAME' (it's too much hassle). */
6822 else if (file[1] == ':')
6823 fatal ("%s: relative file names with drive letters not supported", file);
6824 #endif
6825 else
6826 res = concat (dir, file, "");
6828 /* Delete the "/dirname/.." and "/." substrings. */
6829 slashp = etags_strchr (res, '/');
6830 while (slashp != NULL && slashp[0] != '\0')
6832 if (slashp[1] == '.')
6834 if (slashp[2] == '.'
6835 && (slashp[3] == '/' || slashp[3] == '\0'))
6837 cp = slashp;
6839 cp--;
6840 while (cp >= res && !filename_is_absolute (cp));
6841 if (cp < res)
6842 cp = slashp; /* the absolute name begins with "/.." */
6843 #ifdef DOS_NT
6844 /* Under MSDOS and NT we get `d:/NAME' as absolute
6845 file name, so the luser could say `d:/../NAME'.
6846 We silently treat this as `d:/NAME'. */
6847 else if (cp[0] != '/')
6848 cp = slashp;
6849 #endif
6850 strcpy (cp, slashp + 3);
6851 slashp = cp;
6852 continue;
6854 else if (slashp[2] == '/' || slashp[2] == '\0')
6856 strcpy (slashp, slashp + 2);
6857 continue;
6861 slashp = etags_strchr (slashp + 1, '/');
6864 if (res[0] == '\0') /* just a safety net: should never happen */
6866 free (res);
6867 return savestr ("/");
6869 else
6870 return res;
6873 /* Return a newly allocated string containing the absolute
6874 file name of dir where FILE resides given DIR (which should
6875 end with a slash). */
6876 static char *
6877 absolute_dirname (file, dir)
6878 char *file, *dir;
6880 char *slashp, *res;
6881 char save;
6883 canonicalize_filename (file);
6884 slashp = etags_strrchr (file, '/');
6885 if (slashp == NULL)
6886 return savestr (dir);
6887 save = slashp[1];
6888 slashp[1] = '\0';
6889 res = absolute_filename (file, dir);
6890 slashp[1] = save;
6892 return res;
6895 /* Whether the argument string is an absolute file name. The argument
6896 string must have been canonicalized with canonicalize_filename. */
6897 static bool
6898 filename_is_absolute (fn)
6899 char *fn;
6901 return (fn[0] == '/'
6902 #ifdef DOS_NT
6903 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6904 #endif
6908 /* Translate backslashes into slashes. Works in place. */
6909 static void
6910 canonicalize_filename (fn)
6911 register char *fn;
6913 #ifdef DOS_NT
6914 /* Canonicalize drive letter case. */
6915 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6916 fn[0] = upcase (fn[0]);
6917 /* Convert backslashes to slashes. */
6918 for (; *fn != '\0'; fn++)
6919 if (*fn == '\\')
6920 *fn = '/';
6921 #else
6922 /* No action. */
6923 fn = NULL; /* shut up the compiler */
6924 #endif
6928 /* Initialize a linebuffer for use */
6929 static void
6930 linebuffer_init (lbp)
6931 linebuffer *lbp;
6933 lbp->size = (DEBUG) ? 3 : 200;
6934 lbp->buffer = xnew (lbp->size, char);
6935 lbp->buffer[0] = '\0';
6936 lbp->len = 0;
6939 /* Set the minimum size of a string contained in a linebuffer. */
6940 static void
6941 linebuffer_setlen (lbp, toksize)
6942 linebuffer *lbp;
6943 int toksize;
6945 while (lbp->size <= toksize)
6947 lbp->size *= 2;
6948 xrnew (lbp->buffer, lbp->size, char);
6950 lbp->len = toksize;
6953 /* Like malloc but get fatal error if memory is exhausted. */
6954 static PTR
6955 xmalloc (size)
6956 unsigned int size;
6958 PTR result = (PTR) malloc (size);
6959 if (result == NULL)
6960 fatal ("virtual memory exhausted", (char *)NULL);
6961 return result;
6964 static PTR
6965 xrealloc (ptr, size)
6966 char *ptr;
6967 unsigned int size;
6969 PTR result = (PTR) realloc (ptr, size);
6970 if (result == NULL)
6971 fatal ("virtual memory exhausted", (char *)NULL);
6972 return result;
6976 * Local Variables:
6977 * indent-tabs-mode: t
6978 * tab-width: 8
6979 * fill-column: 79
6980 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6981 * c-file-style: "gnu"
6982 * End:
6985 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6986 (do not change this comment) */
6988 /* etags.c ends here */