(Reverting): Add anchor.
[emacs.git] / lib-src / etags.c
blob829fc97fbfdbae371413c27dac19896857b4f737
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
3 Free Software Foundation, Inc. and Ken Arnold
5 This file is not considered part of GNU Emacs.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1992 Joseph B. Wells improved C and C++ parsing.
29 * 1993 Francesco Potortì reorganised C and C++.
30 * 1994 Line-by-line regexp tags by Tom Tromey.
31 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
32 * 2002 #line directives by Francesco Potortì.
34 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
38 char pot_etags_version[] = "@(#) pot revision number is 16.56";
40 #define TRUE 1
41 #define FALSE 0
43 #ifdef DEBUG
44 # undef DEBUG
45 # define DEBUG TRUE
46 #else
47 # define DEBUG FALSE
48 # define NDEBUG /* disable assert */
49 #endif
51 #ifdef HAVE_CONFIG_H
52 # include <config.h>
53 /* On some systems, Emacs defines static as nothing for the sake
54 of unexec. We don't want that here since we don't use unexec. */
55 # undef static
56 # define ETAGS_REGEXPS /* use the regexp features */
57 # define LONG_OPTIONS /* accept long options */
58 # ifndef PTR /* for Xemacs */
59 # define PTR void *
60 # endif
61 # ifndef __P /* for Xemacs */
62 # define __P(args) args
63 # endif
64 #else
65 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
66 # define __P(args) args /* use prototypes */
67 # define PTR void * /* for generic pointers */
68 # else
69 # define __P(args) () /* no prototypes */
70 # define const /* remove const for old compilers' sake */
71 # define PTR long * /* don't use void* */
72 # endif
73 #endif /* !HAVE_CONFIG_H */
75 #ifndef _GNU_SOURCE
76 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
77 #endif
79 /* WIN32_NATIVE is for Xemacs.
80 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
81 #ifdef WIN32_NATIVE
82 # undef MSDOS
83 # undef WINDOWSNT
84 # define WINDOWSNT
85 #endif /* WIN32_NATIVE */
87 #ifdef MSDOS
88 # undef MSDOS
89 # define MSDOS TRUE
90 # include <fcntl.h>
91 # include <sys/param.h>
92 # include <io.h>
93 # ifndef HAVE_CONFIG_H
94 # define DOS_NT
95 # include <sys/config.h>
96 # endif
97 #else
98 # define MSDOS FALSE
99 #endif /* MSDOS */
101 #ifdef WINDOWSNT
102 # include <stdlib.h>
103 # include <fcntl.h>
104 # include <string.h>
105 # include <direct.h>
106 # include <io.h>
107 # define MAXPATHLEN _MAX_PATH
108 # undef HAVE_NTGUI
109 # undef DOS_NT
110 # define DOS_NT
111 # ifndef HAVE_GETCWD
112 # define HAVE_GETCWD
113 # endif /* undef HAVE_GETCWD */
114 #else /* !WINDOWSNT */
115 # ifdef STDC_HEADERS
116 # include <stdlib.h>
117 # include <string.h>
118 # else
119 extern char *getenv ();
120 # endif
121 #endif /* !WINDOWSNT */
123 #ifdef HAVE_UNISTD_H
124 # include <unistd.h>
125 #else
126 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
127 extern char *getcwd (char *buf, size_t size);
128 # endif
129 #endif /* HAVE_UNISTD_H */
131 #include <stdio.h>
132 #include <ctype.h>
133 #include <errno.h>
134 #ifndef errno
135 extern int errno;
136 #endif
137 #include <sys/types.h>
138 #include <sys/stat.h>
140 #include <assert.h>
141 #ifdef NDEBUG
142 # undef assert /* some systems have a buggy assert.h */
143 # define assert(x) ((void) 0)
144 #endif
146 #if !defined (S_ISREG) && defined (S_IFREG)
147 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
148 #endif
150 #ifdef LONG_OPTIONS
151 # include <getopt.h>
152 #else
153 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
154 extern char *optarg;
155 extern int optind, opterr;
156 #endif /* LONG_OPTIONS */
158 #ifdef ETAGS_REGEXPS
159 # ifndef HAVE_CONFIG_H /* this is a standalone compilation */
160 # ifdef __CYGWIN__ /* compiling on Cygwin */
161 !!! NOTICE !!!
162 the regex.h distributed with Cygwin is not compatible with etags, alas!
163 If you want regular expression support, you should delete this notice and
164 arrange to use the GNU regex.h and regex.c.
165 # endif
166 # endif
167 # include <regex.h>
168 #endif /* ETAGS_REGEXPS */
170 /* Define CTAGS to make the program "ctags" compatible with the usual one.
171 Leave it undefined to make the program "etags", which makes emacs-style
172 tag tables and tags typedefs, #defines and struct/union/enum by default. */
173 #ifdef CTAGS
174 # undef CTAGS
175 # define CTAGS TRUE
176 #else
177 # define CTAGS FALSE
178 #endif
180 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
181 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
182 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
183 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
185 #define CHARS 256 /* 2^sizeof(char) */
186 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
187 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
188 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
189 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
190 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
191 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
193 #define ISALNUM(c) isalnum (CHAR(c))
194 #define ISALPHA(c) isalpha (CHAR(c))
195 #define ISDIGIT(c) isdigit (CHAR(c))
196 #define ISLOWER(c) islower (CHAR(c))
198 #define lowcase(c) tolower (CHAR(c))
199 #define upcase(c) toupper (CHAR(c))
203 * xnew, xrnew -- allocate, reallocate storage
205 * SYNOPSIS: Type *xnew (int n, Type);
206 * void xrnew (OldPointer, int n, Type);
208 #if DEBUG
209 # include "chkmalloc.h"
210 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
211 (n) * sizeof (Type)))
212 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
213 (char *) (op), (n) * sizeof (Type)))
214 #else
215 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
216 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
217 (char *) (op), (n) * sizeof (Type)))
218 #endif
220 #define bool int
222 typedef void Lang_function __P((FILE *));
224 typedef struct
226 char *suffix; /* file name suffix for this compressor */
227 char *command; /* takes one arg and decompresses to stdout */
228 } compressor;
230 typedef struct
232 char *name; /* language name */
233 char *help; /* detailed help for the language */
234 Lang_function *function; /* parse function */
235 char **suffixes; /* name suffixes of this language's files */
236 char **filenames; /* names of this language's files */
237 char **interpreters; /* interpreters for this language */
238 bool metasource; /* source used to generate other sources */
239 } language;
241 typedef struct fdesc
243 struct fdesc *next; /* for the linked list */
244 char *infname; /* uncompressed input file name */
245 char *infabsname; /* absolute uncompressed input file name */
246 char *infabsdir; /* absolute dir of input file */
247 char *taggedfname; /* file name to write in tagfile */
248 language *lang; /* language of file */
249 char *prop; /* file properties to write in tagfile */
250 bool usecharno; /* etags tags shall contain char number */
251 bool written; /* entry written in the tags file */
252 } fdesc;
254 typedef struct node_st
255 { /* sorting structure */
256 struct node_st *left, *right; /* left and right sons */
257 fdesc *fdp; /* description of file to whom tag belongs */
258 char *name; /* tag name */
259 char *regex; /* search regexp */
260 bool valid; /* write this tag on the tag file */
261 bool is_func; /* function tag: use regexp in CTAGS mode */
262 bool been_warned; /* warning already given for duplicated tag */
263 int lno; /* line number tag is on */
264 long cno; /* character number line starts on */
265 } node;
268 * A `linebuffer' is a structure which holds a line of text.
269 * `readline_internal' reads a line from a stream into a linebuffer
270 * and works regardless of the length of the line.
271 * SIZE is the size of BUFFER, LEN is the length of the string in
272 * BUFFER after readline reads it.
274 typedef struct
276 long size;
277 int len;
278 char *buffer;
279 } linebuffer;
281 /* Used to support mixing of --lang and file names. */
282 typedef struct
284 enum {
285 at_language, /* a language specification */
286 at_regexp, /* a regular expression */
287 at_filename, /* a file name */
288 at_stdin, /* read from stdin here */
289 at_end /* stop parsing the list */
290 } arg_type; /* argument type */
291 language *lang; /* language associated with the argument */
292 char *what; /* the argument itself */
293 } argument;
295 #ifdef ETAGS_REGEXPS
296 /* Structure defining a regular expression. */
297 typedef struct regexp
299 struct regexp *p_next; /* pointer to next in list */
300 language *lang; /* if set, use only for this language */
301 char *pattern; /* the regexp pattern */
302 char *name; /* tag name */
303 struct re_pattern_buffer *pat; /* the compiled pattern */
304 struct re_registers regs; /* re registers */
305 bool error_signaled; /* already signaled for this regexp */
306 bool force_explicit_name; /* do not allow implict tag name */
307 bool ignore_case; /* ignore case when matching */
308 bool multi_line; /* do a multi-line match on the whole file */
309 } regexp;
310 #endif /* ETAGS_REGEXPS */
313 /* Many compilers barf on this:
314 Lang_function Ada_funcs;
315 so let's write it this way */
316 static void Ada_funcs __P((FILE *));
317 static void Asm_labels __P((FILE *));
318 static void C_entries __P((int c_ext, FILE *));
319 static void default_C_entries __P((FILE *));
320 static void plain_C_entries __P((FILE *));
321 static void Cjava_entries __P((FILE *));
322 static void Cobol_paragraphs __P((FILE *));
323 static void Cplusplus_entries __P((FILE *));
324 static void Cstar_entries __P((FILE *));
325 static void Erlang_functions __P((FILE *));
326 static void Fortran_functions __P((FILE *));
327 static void HTML_labels __P((FILE *));
328 static void Lisp_functions __P((FILE *));
329 static void Makefile_targets __P((FILE *));
330 static void Pascal_functions __P((FILE *));
331 static void Perl_functions __P((FILE *));
332 static void PHP_functions __P((FILE *));
333 static void PS_functions __P((FILE *));
334 static void Prolog_functions __P((FILE *));
335 static void Python_functions __P((FILE *));
336 static void Scheme_functions __P((FILE *));
337 static void TeX_commands __P((FILE *));
338 static void Texinfo_nodes __P((FILE *));
339 static void Yacc_entries __P((FILE *));
340 static void just_read_file __P((FILE *));
342 static void print_language_names __P((void));
343 static void print_version __P((void));
344 static void print_help __P((argument *));
345 int main __P((int, char **));
347 static compressor *get_compressor_from_suffix __P((char *, char **));
348 static language *get_language_from_langname __P((const char *));
349 static language *get_language_from_interpreter __P((char *));
350 static language *get_language_from_filename __P((char *, bool));
351 static void readline __P((linebuffer *, FILE *));
352 static long readline_internal __P((linebuffer *, FILE *));
353 static bool nocase_tail __P((char *));
354 static void get_tag __P((char *, char **));
356 #ifdef ETAGS_REGEXPS
357 static void analyse_regex __P((char *));
358 static void free_regexps __P((void));
359 static void regex_tag_multiline __P((void));
360 #endif /* ETAGS_REGEXPS */
361 static void error __P((const char *, const char *));
362 static void suggest_asking_for_help __P((void));
363 void fatal __P((char *, char *));
364 static void pfatal __P((char *));
365 static void add_node __P((node *, node **));
367 static void init __P((void));
368 static void process_file_name __P((char *, language *));
369 static void process_file __P((FILE *, char *, language *));
370 static void find_entries __P((FILE *));
371 static void free_tree __P((node *));
372 static void free_fdesc __P((fdesc *));
373 static void pfnote __P((char *, bool, char *, int, int, long));
374 static void make_tag __P((char *, int, bool, char *, int, int, long));
375 static void invalidate_nodes __P((fdesc *, node **));
376 static void put_entries __P((node *));
378 static char *concat __P((char *, char *, char *));
379 static char *skip_spaces __P((char *));
380 static char *skip_non_spaces __P((char *));
381 static char *savenstr __P((char *, int));
382 static char *savestr __P((char *));
383 static char *etags_strchr __P((const char *, int));
384 static char *etags_strrchr __P((const char *, int));
385 static int etags_strcasecmp __P((const char *, const char *));
386 static int etags_strncasecmp __P((const char *, const char *, int));
387 static char *etags_getcwd __P((void));
388 static char *relative_filename __P((char *, char *));
389 static char *absolute_filename __P((char *, char *));
390 static char *absolute_dirname __P((char *, char *));
391 static bool filename_is_absolute __P((char *f));
392 static void canonicalize_filename __P((char *));
393 static void linebuffer_init __P((linebuffer *));
394 static void linebuffer_setlen __P((linebuffer *, int));
395 static PTR xmalloc __P((unsigned int));
396 static PTR xrealloc __P((char *, unsigned int));
399 static char searchar = '/'; /* use /.../ searches */
401 static char *tagfile; /* output file */
402 static char *progname; /* name this program was invoked with */
403 static char *cwd; /* current working directory */
404 static char *tagfiledir; /* directory of tagfile */
405 static FILE *tagf; /* ioptr for tags file */
407 static fdesc *fdhead; /* head of file description list */
408 static fdesc *curfdp; /* current file description */
409 static int lineno; /* line number of current line */
410 static long charno; /* current character number */
411 static long linecharno; /* charno of start of current line */
412 static char *dbp; /* pointer to start of current tag */
414 static const int invalidcharno = -1;
416 static node *nodehead; /* the head of the binary tree of tags */
417 static node *last_node; /* the last node created */
419 static linebuffer lb; /* the current line */
420 static linebuffer filebuf; /* a buffer containing the whole file */
421 static linebuffer token_name; /* a buffer containing a tag name */
423 /* boolean "functions" (see init) */
424 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
425 static char
426 /* white chars */
427 *white = " \f\t\n\r\v",
428 /* not in a name */
429 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
430 /* token ending chars */
431 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
432 /* token starting chars */
433 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
434 /* valid in-token chars */
435 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
437 static bool append_to_tagfile; /* -a: append to tags */
438 /* The next four default to TRUE for etags, but to FALSE for ctags. */
439 static bool typedefs; /* -t: create tags for C and Ada typedefs */
440 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
441 /* 0 struct/enum/union decls, and C++ */
442 /* member functions. */
443 static bool constantypedefs; /* -d: create tags for C #define, enum */
444 /* constants and variables. */
445 /* -D: opposite of -d. Default under ctags. */
446 static bool globals; /* create tags for global variables */
447 static bool declarations; /* --declarations: tag them and extern in C&Co*/
448 static bool members; /* create tags for C member variables */
449 static bool no_line_directive; /* ignore #line directives (undocumented) */
450 static bool update; /* -u: update tags */
451 static bool vgrind_style; /* -v: create vgrind style index output */
452 static bool no_warnings; /* -w: suppress warnings */
453 static bool cxref_style; /* -x: create cxref style output */
454 static bool cplusplus; /* .[hc] means C++, not C */
455 static bool ignoreindent; /* -I: ignore indentation in C */
456 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
458 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
459 static bool parsing_stdin; /* --parse-stdin used */
461 #ifdef ETAGS_REGEXPS
462 static regexp *p_head; /* list of all regexps */
463 static bool need_filebuf; /* some regexes are multi-line */
464 #else
465 # define need_filebuf FALSE
466 #endif /* ETAGS_REGEXPS */
468 #ifdef LONG_OPTIONS
469 static struct option longopts[] =
471 { "packages-only", no_argument, &packages_only, TRUE },
472 { "c++", no_argument, NULL, 'C' },
473 { "declarations", no_argument, &declarations, TRUE },
474 { "no-line-directive", no_argument, &no_line_directive, TRUE },
475 { "help", no_argument, NULL, 'h' },
476 { "help", no_argument, NULL, 'H' },
477 { "ignore-indentation", no_argument, NULL, 'I' },
478 { "language", required_argument, NULL, 'l' },
479 { "members", no_argument, &members, TRUE },
480 { "no-members", no_argument, &members, FALSE },
481 { "output", required_argument, NULL, 'o' },
482 #ifdef ETAGS_REGEXPS
483 { "regex", required_argument, NULL, 'r' },
484 { "no-regex", no_argument, NULL, 'R' },
485 { "ignore-case-regex", required_argument, NULL, 'c' },
486 #endif /* ETAGS_REGEXPS */
487 { "parse-stdin", required_argument, NULL, STDIN },
488 { "version", no_argument, NULL, 'V' },
490 #if CTAGS /* Etags options */
491 { "backward-search", no_argument, NULL, 'B' },
492 { "cxref", no_argument, NULL, 'x' },
493 { "defines", no_argument, NULL, 'd' },
494 { "globals", no_argument, &globals, TRUE },
495 { "typedefs", no_argument, NULL, 't' },
496 { "typedefs-and-c++", no_argument, NULL, 'T' },
497 { "update", no_argument, NULL, 'u' },
498 { "vgrind", no_argument, NULL, 'v' },
499 { "no-warn", no_argument, NULL, 'w' },
501 #else /* Ctags options */
502 { "append", no_argument, NULL, 'a' },
503 { "no-defines", no_argument, NULL, 'D' },
504 { "no-globals", no_argument, &globals, FALSE },
505 { "include", required_argument, NULL, 'i' },
506 #endif
507 { NULL }
509 #endif /* LONG_OPTIONS */
511 static compressor compressors[] =
513 { "z", "gzip -d -c"},
514 { "Z", "gzip -d -c"},
515 { "gz", "gzip -d -c"},
516 { "GZ", "gzip -d -c"},
517 { "bz2", "bzip2 -d -c" },
518 { NULL }
522 * Language stuff.
525 /* Ada code */
526 static char *Ada_suffixes [] =
527 { "ads", "adb", "ada", NULL };
528 static char Ada_help [] =
529 "In Ada code, functions, procedures, packages, tasks and types are\n\
530 tags. Use the `--packages-only' option to create tags for\n\
531 packages only.\n\
532 Ada tag names have suffixes indicating the type of entity:\n\
533 Entity type: Qualifier:\n\
534 ------------ ----------\n\
535 function /f\n\
536 procedure /p\n\
537 package spec /s\n\
538 package body /b\n\
539 type /t\n\
540 task /k\n\
541 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
542 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
543 will just search for any tag `bidule'.";
545 /* Assembly code */
546 static char *Asm_suffixes [] =
547 { "a", /* Unix assembler */
548 "asm", /* Microcontroller assembly */
549 "def", /* BSO/Tasking definition includes */
550 "inc", /* Microcontroller include files */
551 "ins", /* Microcontroller include files */
552 "s", "sa", /* Unix assembler */
553 "S", /* cpp-processed Unix assembler */
554 "src", /* BSO/Tasking C compiler output */
555 NULL
557 static char Asm_help [] =
558 "In assembler code, labels appearing at the beginning of a line,\n\
559 followed by a colon, are tags.";
562 /* Note that .c and .h can be considered C++, if the --c++ flag was
563 given, or if the `class' or `template' keyowrds are met inside the file.
564 That is why default_C_entries is called for these. */
565 static char *default_C_suffixes [] =
566 { "c", "h", NULL };
567 static char default_C_help [] =
568 "In C code, any C function or typedef is a tag, and so are\n\
569 definitions of `struct', `union' and `enum'. `#define' macro\n\
570 definitions and `enum' constants are tags unless you specify\n\
571 `--no-defines'. Global variables are tags unless you specify\n\
572 `--no-globals'. Use of `--no-globals' and `--no-defines'\n\
573 can make the tags table file much smaller.\n\
574 You can tag function declarations and external variables by\n\
575 using `--declarations', and struct members by using `--members'.";
577 static char *Cplusplus_suffixes [] =
578 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
579 "M", /* Objective C++ */
580 "pdb", /* Postscript with C syntax */
581 NULL };
582 static char Cplusplus_help [] =
583 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
584 --help --lang=c --lang=c++ for full help.)\n\
585 In addition to C tags, member functions are also recognized, and\n\
586 optionally member variables if you use the `--members' option.\n\
587 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
588 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
589 `operator+'.";
591 static char *Cjava_suffixes [] =
592 { "java", NULL };
593 static char Cjava_help [] =
594 "In Java code, all the tags constructs of C and C++ code are\n\
595 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
598 static char *Cobol_suffixes [] =
599 { "COB", "cob", NULL };
600 static char Cobol_help [] =
601 "In Cobol code, tags are paragraph names; that is, any word\n\
602 starting in column 8 and followed by a period.";
604 static char *Cstar_suffixes [] =
605 { "cs", "hs", NULL };
607 static char *Erlang_suffixes [] =
608 { "erl", "hrl", NULL };
609 static char Erlang_help [] =
610 "In Erlang code, the tags are the functions, records and macros\n\
611 defined in the file.";
613 static char *Fortran_suffixes [] =
614 { "F", "f", "f90", "for", NULL };
615 static char Fortran_help [] =
616 "In Fortran code, functions, subroutines and block data are tags.";
618 static char *HTML_suffixes [] =
619 { "htm", "html", "shtml", NULL };
620 static char HTML_help [] =
621 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
622 `h3' headers. Also, tags are `name=' in anchors and all\n\
623 occurrences of `id='.";
625 static char *Lisp_suffixes [] =
626 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
627 static char Lisp_help [] =
628 "In Lisp code, any function defined with `defun', any variable\n\
629 defined with `defvar' or `defconst', and in general the first\n\
630 argument of any expression that starts with `(def' in column zero\n\
631 is a tag.";
633 static char *Makefile_filenames [] =
634 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
635 static char Makefile_help [] =
636 "In makefiles, targets are tags; additionally, variables are tags\n\
637 unless you specify `--no-globals'.";
639 static char *Objc_suffixes [] =
640 { "lm", /* Objective lex file */
641 "m", /* Objective C file */
642 NULL };
643 static char Objc_help [] =
644 "In Objective C code, tags include Objective C definitions for classes,\n\
645 class categories, methods and protocols. Tags for variables and\n\
646 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
648 static char *Pascal_suffixes [] =
649 { "p", "pas", NULL };
650 static char Pascal_help [] =
651 "In Pascal code, the tags are the functions and procedures defined\n\
652 in the file.";
654 static char *Perl_suffixes [] =
655 { "pl", "pm", NULL };
656 static char *Perl_interpreters [] =
657 { "perl", "@PERL@", NULL };
658 static char Perl_help [] =
659 "In Perl code, the tags are the packages, subroutines and variables\n\
660 defined by the `package', `sub', `my' and `local' keywords. Use\n\
661 `--globals' if you want to tag global variables. Tags for\n\
662 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
663 defined in the default package is `main::SUB'.";
665 static char *PHP_suffixes [] =
666 { "php", "php3", "php4", NULL };
667 static char PHP_help [] =
668 "In PHP code, tags are functions, classes and defines. When using\n\
669 the `--members' option, vars are tags too.";
671 static char *plain_C_suffixes [] =
672 { "pc", /* Pro*C file */
673 NULL };
675 static char *PS_suffixes [] =
676 { "ps", "psw", NULL }; /* .psw is for PSWrap */
677 static char PS_help [] =
678 "In PostScript code, the tags are the functions.";
680 static char *Prolog_suffixes [] =
681 { "prolog", NULL };
682 static char Prolog_help [] =
683 "In Prolog code, tags are predicates and rules at the beginning of\n\
684 line.";
686 static char *Python_suffixes [] =
687 { "py", NULL };
688 static char Python_help [] =
689 "In Python code, `def' or `class' at the beginning of a line\n\
690 generate a tag.";
692 /* Can't do the `SCM' or `scm' prefix with a version number. */
693 static char *Scheme_suffixes [] =
694 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
695 static char Scheme_help [] =
696 "In Scheme code, tags include anything defined with `def' or with a\n\
697 construct whose name starts with `def'. They also include\n\
698 variables set with `set!' at top level in the file.";
700 static char *TeX_suffixes [] =
701 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
702 static char TeX_help [] =
703 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
704 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
705 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
706 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
707 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
709 Other commands can be specified by setting the environment variable\n\
710 `TEXTAGS' to a colon-separated list like, for example,\n\
711 TEXTAGS=\"mycommand:myothercommand\".";
714 static char *Texinfo_suffixes [] =
715 { "texi", "texinfo", "txi", NULL };
716 static char Texinfo_help [] =
717 "for texinfo files, lines starting with @node are tagged.";
719 static char *Yacc_suffixes [] =
720 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
721 static char Yacc_help [] =
722 "In Bison or Yacc input files, each rule defines as a tag the\n\
723 nonterminal it constructs. The portions of the file that contain\n\
724 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
725 for full help).";
727 static char auto_help [] =
728 "`auto' is not a real language, it indicates to use\n\
729 a default language for files base on file name suffix and file contents.";
731 static char none_help [] =
732 "`none' is not a real language, it indicates to only do\n\
733 regexp processing on files.";
735 static char no_lang_help [] =
736 "No detailed help available for this language.";
740 * Table of languages.
742 * It is ok for a given function to be listed under more than one
743 * name. I just didn't.
746 static language lang_names [] =
748 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
749 { "asm", Asm_help, Asm_labels, Asm_suffixes },
750 { "c", default_C_help, default_C_entries, default_C_suffixes },
751 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
752 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
753 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
754 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
755 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
756 { "html", HTML_help, HTML_labels, HTML_suffixes },
757 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
758 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
759 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
760 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
761 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
762 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
763 { "php", PHP_help, PHP_functions, PHP_suffixes },
764 { "postscript",PS_help, PS_functions, PS_suffixes },
765 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
766 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
767 { "python", Python_help, Python_functions, Python_suffixes },
768 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
769 { "tex", TeX_help, TeX_commands, TeX_suffixes },
770 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
771 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
772 { "auto", auto_help }, /* default guessing scheme */
773 { "none", none_help, just_read_file }, /* regexp matching only */
774 { NULL } /* end of list */
778 static void
779 print_language_names ()
781 language *lang;
782 char **name, **ext;
784 puts ("\nThese are the currently supported languages, along with the\n\
785 default file names and dot suffixes:");
786 for (lang = lang_names; lang->name != NULL; lang++)
788 printf (" %-*s", 10, lang->name);
789 if (lang->filenames != NULL)
790 for (name = lang->filenames; *name != NULL; name++)
791 printf (" %s", *name);
792 if (lang->suffixes != NULL)
793 for (ext = lang->suffixes; *ext != NULL; ext++)
794 printf (" .%s", *ext);
795 puts ("");
797 puts ("where `auto' means use default language for files based on file\n\
798 name suffix, and `none' means only do regexp processing on files.\n\
799 If no language is specified and no matching suffix is found,\n\
800 the first line of the file is read for a sharp-bang (#!) sequence\n\
801 followed by the name of an interpreter. If no such sequence is found,\n\
802 Fortran is tried first; if no tags are found, C is tried next.\n\
803 When parsing any C file, a \"class\" or \"template\" keyword\n\
804 switches to C++.");
805 puts ("Compressed files are supported using gzip and bzip2.\n\
807 For detailed help on a given language use, for example,\n\
808 etags --help --lang=ada.");
811 #ifndef EMACS_NAME
812 # define EMACS_NAME "standalone"
813 #endif
814 #ifndef VERSION
815 # define VERSION "version"
816 #endif
817 static void
818 print_version ()
820 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
821 puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
822 puts ("This program is distributed under the same terms as Emacs");
824 exit (EXIT_SUCCESS);
827 static void
828 print_help (argbuffer)
829 argument *argbuffer;
831 bool help_for_lang = FALSE;
833 for (; argbuffer->arg_type != at_end; argbuffer++)
834 if (argbuffer->arg_type == at_language)
836 if (help_for_lang)
837 puts ("");
838 puts (argbuffer->lang->help);
839 help_for_lang = TRUE;
842 if (help_for_lang)
843 exit (EXIT_SUCCESS);
845 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
847 These are the options accepted by %s.\n", progname, progname);
848 #ifdef LONG_OPTIONS
849 puts ("You may use unambiguous abbreviations for the long option names.");
850 #else
851 puts ("Long option names do not work with this executable, as it is not\n\
852 linked with GNU getopt.");
853 #endif /* LONG_OPTIONS */
854 puts (" A - as file name means read names from stdin (one per line).\n\
855 Absolute names are stored in the output file as they are.\n\
856 Relative ones are stored relative to the output file's directory.\n");
858 if (!CTAGS)
859 puts ("-a, --append\n\
860 Append tag entries to existing tags file.");
862 puts ("--packages-only\n\
863 For Ada files, only generate tags for packages.");
865 if (CTAGS)
866 puts ("-B, --backward-search\n\
867 Write the search commands for the tag entries using '?', the\n\
868 backward-search command instead of '/', the forward-search command.");
870 /* This option is mostly obsolete, because etags can now automatically
871 detect C++. Retained for backward compatibility and for debugging and
872 experimentation. In principle, we could want to tag as C++ even
873 before any "class" or "template" keyword.
874 puts ("-C, --c++\n\
875 Treat files whose name suffix defaults to C language as C++ files.");
878 puts ("--declarations\n\
879 In C and derived languages, create tags for function declarations,");
880 if (CTAGS)
881 puts ("\tand create tags for extern variables if --globals is used.");
882 else
883 puts
884 ("\tand create tags for extern variables unless --no-globals is used.");
886 if (CTAGS)
887 puts ("-d, --defines\n\
888 Create tag entries for C #define constants and enum constants, too.");
889 else
890 puts ("-D, --no-defines\n\
891 Don't create tag entries for C #define constants and enum constants.\n\
892 This makes the tags file smaller.");
894 if (!CTAGS)
895 puts ("-i FILE, --include=FILE\n\
896 Include a note in tag file indicating that, when searching for\n\
897 a tag, one should also consult the tags file FILE after\n\
898 checking the current file.");
900 puts ("-l LANG, --language=LANG\n\
901 Force the following files to be considered as written in the\n\
902 named language up to the next --language=LANG option.");
904 if (CTAGS)
905 puts ("--globals\n\
906 Create tag entries for global variables in some languages.");
907 else
908 puts ("--no-globals\n\
909 Do not create tag entries for global variables in some\n\
910 languages. This makes the tags file smaller.");
911 puts ("--members\n\
912 Create tag entries for members of structures in some languages.");
914 #ifdef ETAGS_REGEXPS
915 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
916 Make a tag for each line matching a regular expression pattern\n\
917 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
918 files only. REGEXFILE is a file containing one REGEXP per line.\n\
919 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
920 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
921 puts (" If TAGNAME/ is present, the tags created are named.\n\
922 For example Tcl named tags can be created with:\n\
923 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
924 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
925 `m' means to allow multi-line matches, `s' implies `m' and\n\
926 causes dot to match any character, including newline.");
927 puts ("-R, --no-regex\n\
928 Don't create tags from regexps for the following files.");
929 #endif /* ETAGS_REGEXPS */
930 puts ("-I, --ignore-indentation\n\
931 In C and C++ do not assume that a closing brace in the first\n\
932 column is the final brace of a function or structure definition.");
933 puts ("-o FILE, --output=FILE\n\
934 Write the tags to FILE.");
935 puts ("--parse-stdin=NAME\n\
936 Read from standard input and record tags as belonging to file NAME.");
938 if (CTAGS)
940 puts ("-t, --typedefs\n\
941 Generate tag entries for C and Ada typedefs.");
942 puts ("-T, --typedefs-and-c++\n\
943 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
944 and C++ member functions.");
947 if (CTAGS)
948 puts ("-u, --update\n\
949 Update the tag entries for the given files, leaving tag\n\
950 entries for other files in place. Currently, this is\n\
951 implemented by deleting the existing entries for the given\n\
952 files and then rewriting the new entries at the end of the\n\
953 tags file. It is often faster to simply rebuild the entire\n\
954 tag file than to use this.");
956 if (CTAGS)
958 puts ("-v, --vgrind\n\
959 Generates an index of items intended for human consumption,\n\
960 similar to the output of vgrind. The index is sorted, and\n\
961 gives the page number of each item.");
962 puts ("-w, --no-warn\n\
963 Suppress warning messages about entries defined in multiple\n\
964 files.");
965 puts ("-x, --cxref\n\
966 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
967 The output uses line numbers instead of page numbers, but\n\
968 beyond that the differences are cosmetic; try both to see\n\
969 which you like.");
972 puts ("-V, --version\n\
973 Print the version of the program.\n\
974 -h, --help\n\
975 Print this help message.\n\
976 Followed by one or more `--language' options prints detailed\n\
977 help about tag generation for the specified languages.");
979 print_language_names ();
981 puts ("");
982 puts ("Report bugs to bug-gnu-emacs@gnu.org");
984 exit (EXIT_SUCCESS);
988 #ifdef VMS /* VMS specific functions */
990 #define EOS '\0'
992 /* This is a BUG! ANY arbitrary limit is a BUG!
993 Won't someone please fix this? */
994 #define MAX_FILE_SPEC_LEN 255
995 typedef struct {
996 short curlen;
997 char body[MAX_FILE_SPEC_LEN + 1];
998 } vspec;
1001 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1002 returning in each successive call the next file name matching the input
1003 spec. The function expects that each in_spec passed
1004 to it will be processed to completion; in particular, up to and
1005 including the call following that in which the last matching name
1006 is returned, the function ignores the value of in_spec, and will
1007 only start processing a new spec with the following call.
1008 If an error occurs, on return out_spec contains the value
1009 of in_spec when the error occurred.
1011 With each successive file name returned in out_spec, the
1012 function's return value is one. When there are no more matching
1013 names the function returns zero. If on the first call no file
1014 matches in_spec, or there is any other error, -1 is returned.
1017 #include <rmsdef.h>
1018 #include <descrip.h>
1019 #define OUTSIZE MAX_FILE_SPEC_LEN
1020 static short
1021 fn_exp (out, in)
1022 vspec *out;
1023 char *in;
1025 static long context = 0;
1026 static struct dsc$descriptor_s o;
1027 static struct dsc$descriptor_s i;
1028 static bool pass1 = TRUE;
1029 long status;
1030 short retval;
1032 if (pass1)
1034 pass1 = FALSE;
1035 o.dsc$a_pointer = (char *) out;
1036 o.dsc$w_length = (short)OUTSIZE;
1037 i.dsc$a_pointer = in;
1038 i.dsc$w_length = (short)strlen(in);
1039 i.dsc$b_dtype = DSC$K_DTYPE_T;
1040 i.dsc$b_class = DSC$K_CLASS_S;
1041 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1042 o.dsc$b_class = DSC$K_CLASS_VS;
1044 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1046 out->body[out->curlen] = EOS;
1047 return 1;
1049 else if (status == RMS$_NMF)
1050 retval = 0;
1051 else
1053 strcpy(out->body, in);
1054 retval = -1;
1056 lib$find_file_end(&context);
1057 pass1 = TRUE;
1058 return retval;
1062 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1063 name of each file specified by the provided arg expanding wildcards.
1065 static char *
1066 gfnames (arg, p_error)
1067 char *arg;
1068 bool *p_error;
1070 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1072 switch (fn_exp (&filename, arg))
1074 case 1:
1075 *p_error = FALSE;
1076 return filename.body;
1077 case 0:
1078 *p_error = FALSE;
1079 return NULL;
1080 default:
1081 *p_error = TRUE;
1082 return filename.body;
1086 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1087 system (cmd)
1088 char *cmd;
1090 error ("%s", "system() function not implemented under VMS");
1092 #endif
1094 #define VERSION_DELIM ';'
1095 char *massage_name (s)
1096 char *s;
1098 char *start = s;
1100 for ( ; *s; s++)
1101 if (*s == VERSION_DELIM)
1103 *s = EOS;
1104 break;
1106 else
1107 *s = lowcase (*s);
1108 return start;
1110 #endif /* VMS */
1114 main (argc, argv)
1115 int argc;
1116 char *argv[];
1118 int i;
1119 unsigned int nincluded_files;
1120 char **included_files;
1121 argument *argbuffer;
1122 int current_arg, file_count;
1123 linebuffer filename_lb;
1124 bool help_asked = FALSE;
1125 #ifdef VMS
1126 bool got_err;
1127 #endif
1128 char *optstring;
1129 int opt;
1132 #ifdef DOS_NT
1133 _fmode = O_BINARY; /* all of files are treated as binary files */
1134 #endif /* DOS_NT */
1136 progname = argv[0];
1137 nincluded_files = 0;
1138 included_files = xnew (argc, char *);
1139 current_arg = 0;
1140 file_count = 0;
1142 /* Allocate enough no matter what happens. Overkill, but each one
1143 is small. */
1144 argbuffer = xnew (argc, argument);
1147 * If etags, always find typedefs and structure tags. Why not?
1148 * Also default to find macro constants, enum constants and
1149 * global variables.
1151 if (!CTAGS)
1153 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1154 globals = TRUE;
1157 optstring = "-";
1158 #ifdef ETAGS_REGEXPS
1159 optstring = "-r:Rc:";
1160 #endif /* ETAGS_REGEXPS */
1161 #ifndef LONG_OPTIONS
1162 optstring = optstring + 1;
1163 #endif /* LONG_OPTIONS */
1164 optstring = concat (optstring,
1165 "Cf:Il:o:SVhH",
1166 (CTAGS) ? "BxdtTuvw" : "aDi:");
1168 while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
1169 switch (opt)
1171 case 0:
1172 /* If getopt returns 0, then it has already processed a
1173 long-named option. We should do nothing. */
1174 break;
1176 case 1:
1177 /* This means that a file name has been seen. Record it. */
1178 argbuffer[current_arg].arg_type = at_filename;
1179 argbuffer[current_arg].what = optarg;
1180 ++current_arg;
1181 ++file_count;
1182 break;
1184 case STDIN:
1185 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1186 argbuffer[current_arg].arg_type = at_stdin;
1187 argbuffer[current_arg].what = optarg;
1188 ++current_arg;
1189 ++file_count;
1190 if (parsing_stdin)
1191 fatal ("cannot parse standard input more than once", (char *)NULL);
1192 parsing_stdin = TRUE;
1193 break;
1195 /* Common options. */
1196 case 'C': cplusplus = TRUE; break;
1197 case 'f': /* for compatibility with old makefiles */
1198 case 'o':
1199 if (tagfile)
1201 error ("-o option may only be given once.", (char *)NULL);
1202 suggest_asking_for_help ();
1203 /* NOTREACHED */
1205 tagfile = optarg;
1206 break;
1207 case 'I':
1208 case 'S': /* for backward compatibility */
1209 ignoreindent = TRUE;
1210 break;
1211 case 'l':
1213 language *lang = get_language_from_langname (optarg);
1214 if (lang != NULL)
1216 argbuffer[current_arg].lang = lang;
1217 argbuffer[current_arg].arg_type = at_language;
1218 ++current_arg;
1221 break;
1222 case 'c':
1223 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1224 optarg = concat (optarg, "i", ""); /* memory leak here */
1225 /* FALLTHRU */
1226 case 'r':
1227 argbuffer[current_arg].arg_type = at_regexp;
1228 argbuffer[current_arg].what = optarg;
1229 ++current_arg;
1230 break;
1231 case 'R':
1232 argbuffer[current_arg].arg_type = at_regexp;
1233 argbuffer[current_arg].what = NULL;
1234 ++current_arg;
1235 break;
1236 case 'V':
1237 print_version ();
1238 break;
1239 case 'h':
1240 case 'H':
1241 help_asked = TRUE;
1242 break;
1244 /* Etags options */
1245 case 'a': append_to_tagfile = TRUE; break;
1246 case 'D': constantypedefs = FALSE; break;
1247 case 'i': included_files[nincluded_files++] = optarg; break;
1249 /* Ctags options. */
1250 case 'B': searchar = '?'; break;
1251 case 'd': constantypedefs = TRUE; break;
1252 case 't': typedefs = TRUE; break;
1253 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1254 case 'u': update = TRUE; break;
1255 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1256 case 'x': cxref_style = TRUE; break;
1257 case 'w': no_warnings = TRUE; break;
1258 default:
1259 suggest_asking_for_help ();
1260 /* NOTREACHED */
1263 for (; optind < argc; optind++)
1265 argbuffer[current_arg].arg_type = at_filename;
1266 argbuffer[current_arg].what = argv[optind];
1267 ++current_arg;
1268 ++file_count;
1271 argbuffer[current_arg].arg_type = at_end;
1273 if (help_asked)
1274 print_help (argbuffer);
1275 /* NOTREACHED */
1277 if (nincluded_files == 0 && file_count == 0)
1279 error ("no input files specified.", (char *)NULL);
1280 suggest_asking_for_help ();
1281 /* NOTREACHED */
1284 if (tagfile == NULL)
1285 tagfile = CTAGS ? "tags" : "TAGS";
1286 cwd = etags_getcwd (); /* the current working directory */
1287 if (cwd[strlen (cwd) - 1] != '/')
1289 char *oldcwd = cwd;
1290 cwd = concat (oldcwd, "/", "");
1291 free (oldcwd);
1293 if (streq (tagfile, "-"))
1294 tagfiledir = cwd;
1295 else
1296 tagfiledir = absolute_dirname (tagfile, cwd);
1298 init (); /* set up boolean "functions" */
1300 linebuffer_init (&lb);
1301 linebuffer_init (&filename_lb);
1302 linebuffer_init (&filebuf);
1303 linebuffer_init (&token_name);
1305 if (!CTAGS)
1307 if (streq (tagfile, "-"))
1309 tagf = stdout;
1310 #ifdef DOS_NT
1311 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1312 doesn't take effect until after `stdout' is already open). */
1313 if (!isatty (fileno (stdout)))
1314 setmode (fileno (stdout), O_BINARY);
1315 #endif /* DOS_NT */
1317 else
1318 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1319 if (tagf == NULL)
1320 pfatal (tagfile);
1324 * Loop through files finding functions.
1326 for (i = 0; i < current_arg; i++)
1328 static language *lang; /* non-NULL if language is forced */
1329 char *this_file;
1331 switch (argbuffer[i].arg_type)
1333 case at_language:
1334 lang = argbuffer[i].lang;
1335 break;
1336 #ifdef ETAGS_REGEXPS
1337 case at_regexp:
1338 analyse_regex (argbuffer[i].what);
1339 break;
1340 #endif
1341 case at_filename:
1342 #ifdef VMS
1343 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1345 if (got_err)
1347 error ("can't find file %s\n", this_file);
1348 argc--, argv++;
1350 else
1352 this_file = massage_name (this_file);
1354 #else
1355 this_file = argbuffer[i].what;
1356 #endif
1357 /* Input file named "-" means read file names from stdin
1358 (one per line) and use them. */
1359 if (streq (this_file, "-"))
1361 if (parsing_stdin)
1362 fatal ("cannot parse standard input AND read file names from it",
1363 (char *)NULL);
1364 while (readline_internal (&filename_lb, stdin) > 0)
1365 process_file_name (filename_lb.buffer, lang);
1367 else
1368 process_file_name (this_file, lang);
1369 #ifdef VMS
1371 #endif
1372 break;
1373 case at_stdin:
1374 this_file = argbuffer[i].what;
1375 process_file (stdin, this_file, lang);
1376 break;
1380 #ifdef ETAGS_REGEXPS
1381 free_regexps ();
1382 #endif /* ETAGS_REGEXPS */
1383 free (lb.buffer);
1384 free (filebuf.buffer);
1385 free (token_name.buffer);
1387 if (!CTAGS || cxref_style)
1389 put_entries (nodehead); /* write the remainig tags (ETAGS) */
1390 free_tree (nodehead);
1391 nodehead = NULL;
1392 if (!CTAGS)
1394 fdesc *fdp;
1396 /* Output file entries that have no tags. */
1397 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1398 if (!fdp->written)
1399 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1401 while (nincluded_files-- > 0)
1402 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1405 if (fclose (tagf) == EOF)
1406 pfatal (tagfile);
1407 exit (EXIT_SUCCESS);
1410 if (update)
1412 char cmd[BUFSIZ];
1413 for (i = 0; i < current_arg; ++i)
1415 switch (argbuffer[i].arg_type)
1417 case at_filename:
1418 case at_stdin:
1419 break;
1420 default:
1421 continue; /* the for loop */
1423 sprintf (cmd,
1424 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1425 tagfile, argbuffer[i].what, tagfile);
1426 if (system (cmd) != EXIT_SUCCESS)
1427 fatal ("failed to execute shell command", (char *)NULL);
1429 append_to_tagfile = TRUE;
1432 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1433 if (tagf == NULL)
1434 pfatal (tagfile);
1435 put_entries (nodehead); /* write all the tags (CTAGS) */
1436 free_tree (nodehead);
1437 nodehead = NULL;
1438 if (fclose (tagf) == EOF)
1439 pfatal (tagfile);
1441 if (update)
1443 char cmd[2*BUFSIZ+10];
1444 sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1445 exit (system (cmd));
1447 return EXIT_SUCCESS;
1452 * Return a compressor given the file name. If EXTPTR is non-zero,
1453 * return a pointer into FILE where the compressor-specific
1454 * extension begins. If no compressor is found, NULL is returned
1455 * and EXTPTR is not significant.
1456 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1458 static compressor *
1459 get_compressor_from_suffix (file, extptr)
1460 char *file;
1461 char **extptr;
1463 compressor *compr;
1464 char *slash, *suffix;
1466 /* This relies on FN to be after canonicalize_filename,
1467 so we don't need to consider backslashes on DOS_NT. */
1468 slash = etags_strrchr (file, '/');
1469 suffix = etags_strrchr (file, '.');
1470 if (suffix == NULL || suffix < slash)
1471 return NULL;
1472 if (extptr != NULL)
1473 *extptr = suffix;
1474 suffix += 1;
1475 /* Let those poor souls who live with DOS 8+3 file name limits get
1476 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1477 Only the first do loop is run if not MSDOS */
1480 for (compr = compressors; compr->suffix != NULL; compr++)
1481 if (streq (compr->suffix, suffix))
1482 return compr;
1483 if (!MSDOS)
1484 break; /* do it only once: not really a loop */
1485 if (extptr != NULL)
1486 *extptr = ++suffix;
1487 } while (*suffix != '\0');
1488 return NULL;
1494 * Return a language given the name.
1496 static language *
1497 get_language_from_langname (name)
1498 const char *name;
1500 language *lang;
1502 if (name == NULL)
1503 error ("empty language name", (char *)NULL);
1504 else
1506 for (lang = lang_names; lang->name != NULL; lang++)
1507 if (streq (name, lang->name))
1508 return lang;
1509 error ("unknown language \"%s\"", name);
1512 return NULL;
1517 * Return a language given the interpreter name.
1519 static language *
1520 get_language_from_interpreter (interpreter)
1521 char *interpreter;
1523 language *lang;
1524 char **iname;
1526 if (interpreter == NULL)
1527 return NULL;
1528 for (lang = lang_names; lang->name != NULL; lang++)
1529 if (lang->interpreters != NULL)
1530 for (iname = lang->interpreters; *iname != NULL; iname++)
1531 if (streq (*iname, interpreter))
1532 return lang;
1534 return NULL;
1540 * Return a language given the file name.
1542 static language *
1543 get_language_from_filename (file, case_sensitive)
1544 char *file;
1545 bool case_sensitive;
1547 language *lang;
1548 char **name, **ext, *suffix;
1550 /* Try whole file name first. */
1551 for (lang = lang_names; lang->name != NULL; lang++)
1552 if (lang->filenames != NULL)
1553 for (name = lang->filenames; *name != NULL; name++)
1554 if ((case_sensitive)
1555 ? streq (*name, file)
1556 : strcaseeq (*name, file))
1557 return lang;
1559 /* If not found, try suffix after last dot. */
1560 suffix = etags_strrchr (file, '.');
1561 if (suffix == NULL)
1562 return NULL;
1563 suffix += 1;
1564 for (lang = lang_names; lang->name != NULL; lang++)
1565 if (lang->suffixes != NULL)
1566 for (ext = lang->suffixes; *ext != NULL; ext++)
1567 if ((case_sensitive)
1568 ? streq (*ext, suffix)
1569 : strcaseeq (*ext, suffix))
1570 return lang;
1571 return NULL;
1576 * This routine is called on each file argument.
1578 static void
1579 process_file_name (file, lang)
1580 char *file;
1581 language *lang;
1583 struct stat stat_buf;
1584 FILE *inf;
1585 fdesc *fdp;
1586 compressor *compr;
1587 char *compressed_name, *uncompressed_name;
1588 char *ext, *real_name;
1589 int retval;
1591 canonicalize_filename (file);
1592 if (streq (file, tagfile) && !streq (tagfile, "-"))
1594 error ("skipping inclusion of %s in self.", file);
1595 return;
1597 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1599 compressed_name = NULL;
1600 real_name = uncompressed_name = savestr (file);
1602 else
1604 real_name = compressed_name = savestr (file);
1605 uncompressed_name = savenstr (file, ext - file);
1608 /* If the canonicalized uncompressed name
1609 has already been dealt with, skip it silently. */
1610 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1612 assert (fdp->infname != NULL);
1613 if (streq (uncompressed_name, fdp->infname))
1614 goto cleanup;
1617 if (stat (real_name, &stat_buf) != 0)
1619 /* Reset real_name and try with a different name. */
1620 real_name = NULL;
1621 if (compressed_name != NULL) /* try with the given suffix */
1623 if (stat (uncompressed_name, &stat_buf) == 0)
1624 real_name = uncompressed_name;
1626 else /* try all possible suffixes */
1628 for (compr = compressors; compr->suffix != NULL; compr++)
1630 compressed_name = concat (file, ".", compr->suffix);
1631 if (stat (compressed_name, &stat_buf) != 0)
1633 if (MSDOS)
1635 char *suf = compressed_name + strlen (file);
1636 size_t suflen = strlen (compr->suffix) + 1;
1637 for ( ; suf[1]; suf++, suflen--)
1639 memmove (suf, suf + 1, suflen);
1640 if (stat (compressed_name, &stat_buf) == 0)
1642 real_name = compressed_name;
1643 break;
1646 if (real_name != NULL)
1647 break;
1648 } /* MSDOS */
1649 free (compressed_name);
1650 compressed_name = NULL;
1652 else
1654 real_name = compressed_name;
1655 break;
1659 if (real_name == NULL)
1661 perror (file);
1662 goto cleanup;
1664 } /* try with a different name */
1666 if (!S_ISREG (stat_buf.st_mode))
1668 error ("skipping %s: it is not a regular file.", real_name);
1669 goto cleanup;
1671 if (real_name == compressed_name)
1673 char *cmd = concat (compr->command, " ", real_name);
1674 inf = (FILE *) popen (cmd, "r");
1675 free (cmd);
1677 else
1678 inf = fopen (real_name, "r");
1679 if (inf == NULL)
1681 perror (real_name);
1682 goto cleanup;
1685 process_file (inf, uncompressed_name, lang);
1687 if (real_name == compressed_name)
1688 retval = pclose (inf);
1689 else
1690 retval = fclose (inf);
1691 if (retval < 0)
1692 pfatal (file);
1694 cleanup:
1695 if (compressed_name) free (compressed_name);
1696 if (uncompressed_name) free (uncompressed_name);
1697 last_node = NULL;
1698 curfdp = NULL;
1699 return;
1702 static void
1703 process_file (fh, fn, lang)
1704 FILE *fh;
1705 char *fn;
1706 language *lang;
1708 static const fdesc emptyfdesc;
1709 fdesc *fdp;
1711 /* Create a new input file description entry. */
1712 fdp = xnew (1, fdesc);
1713 *fdp = emptyfdesc;
1714 fdp->next = fdhead;
1715 fdp->infname = savestr (fn);
1716 fdp->lang = lang;
1717 fdp->infabsname = absolute_filename (fn, cwd);
1718 fdp->infabsdir = absolute_dirname (fn, cwd);
1719 if (filename_is_absolute (fn))
1721 /* An absolute file name. Canonicalize it. */
1722 fdp->taggedfname = absolute_filename (fn, NULL);
1724 else
1726 /* A file name relative to cwd. Make it relative
1727 to the directory of the tags file. */
1728 fdp->taggedfname = relative_filename (fn, tagfiledir);
1730 fdp->usecharno = TRUE; /* use char position when making tags */
1731 fdp->prop = NULL;
1732 fdp->written = FALSE; /* not written on tags file yet */
1734 fdhead = fdp;
1735 curfdp = fdhead; /* the current file description */
1737 find_entries (fh);
1739 /* If not Ctags, and if this is not metasource and if it contained no #line
1740 directives, we can write the tags and free all nodes pointing to
1741 curfdp. */
1742 if (!CTAGS
1743 && curfdp->usecharno /* no #line directives in this file */
1744 && !curfdp->lang->metasource)
1746 node *np, *prev;
1748 /* Look for the head of the sublist relative to this file. See add_node
1749 for the structure of the node tree. */
1750 prev = NULL;
1751 for (np = nodehead; np != NULL; prev = np, np = np->left)
1752 if (np->fdp == curfdp)
1753 break;
1755 /* If we generated tags for this file, write and delete them. */
1756 if (np != NULL)
1758 /* This is the head of the last sublist, if any. The following
1759 instructions depend on this being true. */
1760 assert (np->left == NULL);
1762 assert (fdhead == curfdp);
1763 assert (last_node->fdp == curfdp);
1764 put_entries (np); /* write tags for file curfdp->taggedfname */
1765 free_tree (np); /* remove the written nodes */
1766 if (prev == NULL)
1767 nodehead = NULL; /* no nodes left */
1768 else
1769 prev->left = NULL; /* delete the pointer to the sublist */
1775 * This routine sets up the boolean pseudo-functions which work
1776 * by setting boolean flags dependent upon the corresponding character.
1777 * Every char which is NOT in that string is not a white char. Therefore,
1778 * all of the array "_wht" is set to FALSE, and then the elements
1779 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1780 * of a char is TRUE if it is the string "white", else FALSE.
1782 static void
1783 init ()
1785 register char *sp;
1786 register int i;
1788 for (i = 0; i < CHARS; i++)
1789 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1790 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1791 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1792 notinname('\0') = notinname('\n');
1793 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1794 begtoken('\0') = begtoken('\n');
1795 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1796 intoken('\0') = intoken('\n');
1797 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1798 endtoken('\0') = endtoken('\n');
1802 * This routine opens the specified file and calls the function
1803 * which finds the function and type definitions.
1805 static void
1806 find_entries (inf)
1807 FILE *inf;
1809 char *cp;
1810 language *lang = curfdp->lang;
1811 Lang_function *parser = NULL;
1813 /* If user specified a language, use it. */
1814 if (lang != NULL && lang->function != NULL)
1816 parser = lang->function;
1819 /* Else try to guess the language given the file name. */
1820 if (parser == NULL)
1822 lang = get_language_from_filename (curfdp->infname, TRUE);
1823 if (lang != NULL && lang->function != NULL)
1825 curfdp->lang = lang;
1826 parser = lang->function;
1830 /* Else look for sharp-bang as the first two characters. */
1831 if (parser == NULL
1832 && readline_internal (&lb, inf) > 0
1833 && lb.len >= 2
1834 && lb.buffer[0] == '#'
1835 && lb.buffer[1] == '!')
1837 char *lp;
1839 /* Set lp to point at the first char after the last slash in the
1840 line or, if no slashes, at the first nonblank. Then set cp to
1841 the first successive blank and terminate the string. */
1842 lp = etags_strrchr (lb.buffer+2, '/');
1843 if (lp != NULL)
1844 lp += 1;
1845 else
1846 lp = skip_spaces (lb.buffer + 2);
1847 cp = skip_non_spaces (lp);
1848 *cp = '\0';
1850 if (strlen (lp) > 0)
1852 lang = get_language_from_interpreter (lp);
1853 if (lang != NULL && lang->function != NULL)
1855 curfdp->lang = lang;
1856 parser = lang->function;
1861 /* We rewind here, even if inf may be a pipe. We fail if the
1862 length of the first line is longer than the pipe block size,
1863 which is unlikely. */
1864 rewind (inf);
1866 /* Else try to guess the language given the case insensitive file name. */
1867 if (parser == NULL)
1869 lang = get_language_from_filename (curfdp->infname, FALSE);
1870 if (lang != NULL && lang->function != NULL)
1872 curfdp->lang = lang;
1873 parser = lang->function;
1877 /* Else try Fortran or C. */
1878 if (parser == NULL)
1880 node *old_last_node = last_node;
1882 curfdp->lang = get_language_from_langname ("fortran");
1883 find_entries (inf);
1885 if (old_last_node == last_node)
1886 /* No Fortran entries found. Try C. */
1888 /* We do not tag if rewind fails.
1889 Only the file name will be recorded in the tags file. */
1890 rewind (inf);
1891 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1892 find_entries (inf);
1894 return;
1897 if (!no_line_directive
1898 && curfdp->lang != NULL && curfdp->lang->metasource)
1899 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1900 file, or anyway we parsed a file that is automatically generated from
1901 this one. If this is the case, the bingo.c file contained #line
1902 directives that generated tags pointing to this file. Let's delete
1903 them all before parsing this file, which is the real source. */
1905 fdesc **fdpp = &fdhead;
1906 while (*fdpp != NULL)
1907 if (*fdpp != curfdp
1908 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1909 /* We found one of those! We must delete both the file description
1910 and all tags referring to it. */
1912 fdesc *badfdp = *fdpp;
1914 /* Delete the tags referring to badfdp->taggedfname
1915 that were obtained from badfdp->infname. */
1916 invalidate_nodes (badfdp, &nodehead);
1918 *fdpp = badfdp->next; /* remove the bad description from the list */
1919 free_fdesc (badfdp);
1921 else
1922 fdpp = &(*fdpp)->next; /* advance the list pointer */
1925 assert (parser != NULL);
1927 /* Generic initialisations before reading from file. */
1928 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1930 /* Generic initialisations before parsing file with readline. */
1931 lineno = 0; /* reset global line number */
1932 charno = 0; /* reset global char number */
1933 linecharno = 0; /* reset global char number of line start */
1935 parser (inf);
1937 #ifdef ETAGS_REGEXPS
1938 regex_tag_multiline ();
1939 #endif /* ETAGS_REGEXPS */
1944 * Check whether an implicitly named tag should be created,
1945 * then call `pfnote'.
1946 * NAME is a string that is internally copied by this function.
1948 * TAGS format specification
1949 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1950 * The following is explained in some more detail in etc/ETAGS.EBNF.
1952 * make_tag creates tags with "implicit tag names" (unnamed tags)
1953 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1954 * 1. NAME does not contain any of the characters in NONAM;
1955 * 2. LINESTART contains name as either a rightmost, or rightmost but
1956 * one character, substring;
1957 * 3. the character, if any, immediately before NAME in LINESTART must
1958 * be a character in NONAM;
1959 * 4. the character, if any, immediately after NAME in LINESTART must
1960 * also be a character in NONAM.
1962 * The implementation uses the notinname() macro, which recognises the
1963 * characters stored in the string `nonam'.
1964 * etags.el needs to use the same characters that are in NONAM.
1966 static void
1967 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1968 char *name; /* tag name, or NULL if unnamed */
1969 int namelen; /* tag length */
1970 bool is_func; /* tag is a function */
1971 char *linestart; /* start of the line where tag is */
1972 int linelen; /* length of the line where tag is */
1973 int lno; /* line number */
1974 long cno; /* character number */
1976 bool named = (name != NULL && namelen > 0);
1978 if (!CTAGS && named) /* maybe set named to false */
1979 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1980 such that etags.el can guess a name from it. */
1982 int i;
1983 register char *cp = name;
1985 for (i = 0; i < namelen; i++)
1986 if (notinname (*cp++))
1987 break;
1988 if (i == namelen) /* rule #1 */
1990 cp = linestart + linelen - namelen;
1991 if (notinname (linestart[linelen-1]))
1992 cp -= 1; /* rule #4 */
1993 if (cp >= linestart /* rule #2 */
1994 && (cp == linestart
1995 || notinname (cp[-1])) /* rule #3 */
1996 && strneq (name, cp, namelen)) /* rule #2 */
1997 named = FALSE; /* use implicit tag name */
2001 if (named)
2002 name = savenstr (name, namelen);
2003 else
2004 name = NULL;
2005 pfnote (name, is_func, linestart, linelen, lno, cno);
2008 /* Record a tag. */
2009 static void
2010 pfnote (name, is_func, linestart, linelen, lno, cno)
2011 char *name; /* tag name, or NULL if unnamed */
2012 bool is_func; /* tag is a function */
2013 char *linestart; /* start of the line where tag is */
2014 int linelen; /* length of the line where tag is */
2015 int lno; /* line number */
2016 long cno; /* character number */
2018 register node *np;
2020 assert (name == NULL || name[0] != '\0');
2021 if (CTAGS && name == NULL)
2022 return;
2024 np = xnew (1, node);
2026 /* If ctags mode, change name "main" to M<thisfilename>. */
2027 if (CTAGS && !cxref_style && streq (name, "main"))
2029 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2030 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2031 fp = etags_strrchr (np->name, '.');
2032 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2033 fp[0] = '\0';
2035 else
2036 np->name = name;
2037 np->valid = TRUE;
2038 np->been_warned = FALSE;
2039 np->fdp = curfdp;
2040 np->is_func = is_func;
2041 np->lno = lno;
2042 if (np->fdp->usecharno)
2043 /* Our char numbers are 0-base, because of C language tradition?
2044 ctags compatibility? old versions compatibility? I don't know.
2045 Anyway, since emacs's are 1-base we expect etags.el to take care
2046 of the difference. If we wanted to have 1-based numbers, we would
2047 uncomment the +1 below. */
2048 np->cno = cno /* + 1 */ ;
2049 else
2050 np->cno = invalidcharno;
2051 np->left = np->right = NULL;
2052 if (CTAGS && !cxref_style)
2054 if (strlen (linestart) < 50)
2055 np->regex = concat (linestart, "$", "");
2056 else
2057 np->regex = savenstr (linestart, 50);
2059 else
2060 np->regex = savenstr (linestart, linelen);
2062 add_node (np, &nodehead);
2066 * free_tree ()
2067 * recurse on left children, iterate on right children.
2069 static void
2070 free_tree (np)
2071 register node *np;
2073 while (np)
2075 register node *node_right = np->right;
2076 free_tree (np->left);
2077 if (np->name != NULL)
2078 free (np->name);
2079 free (np->regex);
2080 free (np);
2081 np = node_right;
2086 * free_fdesc ()
2087 * delete a file description
2089 static void
2090 free_fdesc (fdp)
2091 register fdesc *fdp;
2093 if (fdp->infname != NULL) free (fdp->infname);
2094 if (fdp->infabsname != NULL) free (fdp->infabsname);
2095 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2096 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2097 if (fdp->prop != NULL) free (fdp->prop);
2098 free (fdp);
2102 * add_node ()
2103 * Adds a node to the tree of nodes. In etags mode, sort by file
2104 * name. In ctags mode, sort by tag name. Make no attempt at
2105 * balancing.
2107 * add_node is the only function allowed to add nodes, so it can
2108 * maintain state.
2110 static void
2111 add_node (np, cur_node_p)
2112 node *np, **cur_node_p;
2114 register int dif;
2115 register node *cur_node = *cur_node_p;
2117 if (cur_node == NULL)
2119 *cur_node_p = np;
2120 last_node = np;
2121 return;
2124 if (!CTAGS)
2125 /* Etags Mode */
2127 /* For each file name, tags are in a linked sublist on the right
2128 pointer. The first tags of different files are a linked list
2129 on the left pointer. last_node points to the end of the last
2130 used sublist. */
2131 if (last_node != NULL && last_node->fdp == np->fdp)
2133 /* Let's use the same sublist as the last added node. */
2134 assert (last_node->right == NULL);
2135 last_node->right = np;
2136 last_node = np;
2138 else if (cur_node->fdp == np->fdp)
2140 /* Scanning the list we found the head of a sublist which is
2141 good for us. Let's scan this sublist. */
2142 add_node (np, &cur_node->right);
2144 else
2145 /* The head of this sublist is not good for us. Let's try the
2146 next one. */
2147 add_node (np, &cur_node->left);
2148 } /* if ETAGS mode */
2150 else
2152 /* Ctags Mode */
2153 dif = strcmp (np->name, cur_node->name);
2156 * If this tag name matches an existing one, then
2157 * do not add the node, but maybe print a warning.
2159 if (!dif)
2161 if (np->fdp == cur_node->fdp)
2163 if (!no_warnings)
2165 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2166 np->fdp->infname, lineno, np->name);
2167 fprintf (stderr, "Second entry ignored\n");
2170 else if (!cur_node->been_warned && !no_warnings)
2172 fprintf
2173 (stderr,
2174 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2175 np->fdp->infname, cur_node->fdp->infname, np->name);
2176 cur_node->been_warned = TRUE;
2178 return;
2181 /* Actually add the node */
2182 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2183 } /* if CTAGS mode */
2187 * invalidate_nodes ()
2188 * Scan the node tree and invalidate all nodes pointing to the
2189 * given file description (CTAGS case) or free them (ETAGS case).
2191 static void
2192 invalidate_nodes (badfdp, npp)
2193 fdesc *badfdp;
2194 node **npp;
2196 node *np = *npp;
2198 if (np == NULL)
2199 return;
2201 if (CTAGS)
2203 if (np->left != NULL)
2204 invalidate_nodes (badfdp, &np->left);
2205 if (np->fdp == badfdp)
2206 np->valid = FALSE;
2207 if (np->right != NULL)
2208 invalidate_nodes (badfdp, &np->right);
2210 else
2212 assert (np->fdp != NULL);
2213 if (np->fdp == badfdp)
2215 *npp = np->left; /* detach the sublist from the list */
2216 np->left = NULL; /* isolate it */
2217 free_tree (np); /* free it */
2218 invalidate_nodes (badfdp, npp);
2220 else
2221 invalidate_nodes (badfdp, &np->left);
2226 static int total_size_of_entries __P((node *));
2227 static int number_len __P((long));
2229 /* Length of a non-negative number's decimal representation. */
2230 static int
2231 number_len (num)
2232 long num;
2234 int len = 1;
2235 while ((num /= 10) > 0)
2236 len += 1;
2237 return len;
2241 * Return total number of characters that put_entries will output for
2242 * the nodes in the linked list at the right of the specified node.
2243 * This count is irrelevant with etags.el since emacs 19.34 at least,
2244 * but is still supplied for backward compatibility.
2246 static int
2247 total_size_of_entries (np)
2248 register node *np;
2250 register int total = 0;
2252 for (; np != NULL; np = np->right)
2253 if (np->valid)
2255 total += strlen (np->regex) + 1; /* pat\177 */
2256 if (np->name != NULL)
2257 total += strlen (np->name) + 1; /* name\001 */
2258 total += number_len ((long) np->lno) + 1; /* lno, */
2259 if (np->cno != invalidcharno) /* cno */
2260 total += number_len (np->cno);
2261 total += 1; /* newline */
2264 return total;
2267 static void
2268 put_entries (np)
2269 register node *np;
2271 register char *sp;
2272 static fdesc *fdp = NULL;
2274 if (np == NULL)
2275 return;
2277 /* Output subentries that precede this one */
2278 if (CTAGS)
2279 put_entries (np->left);
2281 /* Output this entry */
2282 if (np->valid)
2284 if (!CTAGS)
2286 /* Etags mode */
2287 if (fdp != np->fdp)
2289 fdp = np->fdp;
2290 fprintf (tagf, "\f\n%s,%d\n",
2291 fdp->taggedfname, total_size_of_entries (np));
2292 fdp->written = TRUE;
2294 fputs (np->regex, tagf);
2295 fputc ('\177', tagf);
2296 if (np->name != NULL)
2298 fputs (np->name, tagf);
2299 fputc ('\001', tagf);
2301 fprintf (tagf, "%d,", np->lno);
2302 if (np->cno != invalidcharno)
2303 fprintf (tagf, "%ld", np->cno);
2304 fputs ("\n", tagf);
2306 else
2308 /* Ctags mode */
2309 if (np->name == NULL)
2310 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2312 if (cxref_style)
2314 if (vgrind_style)
2315 fprintf (stdout, "%s %s %d\n",
2316 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2317 else
2318 fprintf (stdout, "%-16s %3d %-16s %s\n",
2319 np->name, np->lno, np->fdp->taggedfname, np->regex);
2321 else
2323 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2325 if (np->is_func)
2326 { /* function or #define macro with args */
2327 putc (searchar, tagf);
2328 putc ('^', tagf);
2330 for (sp = np->regex; *sp; sp++)
2332 if (*sp == '\\' || *sp == searchar)
2333 putc ('\\', tagf);
2334 putc (*sp, tagf);
2336 putc (searchar, tagf);
2338 else
2339 { /* anything else; text pattern inadequate */
2340 fprintf (tagf, "%d", np->lno);
2342 putc ('\n', tagf);
2345 } /* if this node contains a valid tag */
2347 /* Output subentries that follow this one */
2348 put_entries (np->right);
2349 if (!CTAGS)
2350 put_entries (np->left);
2354 /* C extensions. */
2355 #define C_EXT 0x00fff /* C extensions */
2356 #define C_PLAIN 0x00000 /* C */
2357 #define C_PLPL 0x00001 /* C++ */
2358 #define C_STAR 0x00003 /* C* */
2359 #define C_JAVA 0x00005 /* JAVA */
2360 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2361 #define YACC 0x10000 /* yacc file */
2364 * The C symbol tables.
2366 enum sym_type
2368 st_none,
2369 st_C_objprot, st_C_objimpl, st_C_objend,
2370 st_C_gnumacro,
2371 st_C_ignore,
2372 st_C_javastruct,
2373 st_C_operator,
2374 st_C_class, st_C_template,
2375 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
2378 static unsigned int hash __P((const char *, unsigned int));
2379 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2380 static enum sym_type C_symtype __P((char *, int, int));
2382 /* Feed stuff between (but not including) %[ and %] lines to:
2383 gperf -c -k 1,3 -o -p -r -t
2385 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2387 if, 0, st_C_ignore
2388 for, 0, st_C_ignore
2389 while, 0, st_C_ignore
2390 switch, 0, st_C_ignore
2391 return, 0, st_C_ignore
2392 @interface, 0, st_C_objprot
2393 @protocol, 0, st_C_objprot
2394 @implementation,0, st_C_objimpl
2395 @end, 0, st_C_objend
2396 import, C_JAVA, st_C_ignore
2397 package, C_JAVA, st_C_ignore
2398 friend, C_PLPL, st_C_ignore
2399 extends, C_JAVA, st_C_javastruct
2400 implements, C_JAVA, st_C_javastruct
2401 interface, C_JAVA, st_C_struct
2402 class, 0, st_C_class
2403 namespace, C_PLPL, st_C_struct
2404 domain, C_STAR, st_C_struct
2405 union, 0, st_C_struct
2406 struct, 0, st_C_struct
2407 extern, 0, st_C_extern
2408 enum, 0, st_C_enum
2409 typedef, 0, st_C_typedef
2410 define, 0, st_C_define
2411 operator, C_PLPL, st_C_operator
2412 template, 0, st_C_template
2413 bool, C_PLPL, st_C_typespec
2414 long, 0, st_C_typespec
2415 short, 0, st_C_typespec
2416 int, 0, st_C_typespec
2417 char, 0, st_C_typespec
2418 float, 0, st_C_typespec
2419 double, 0, st_C_typespec
2420 signed, 0, st_C_typespec
2421 unsigned, 0, st_C_typespec
2422 auto, 0, st_C_typespec
2423 void, 0, st_C_typespec
2424 static, 0, st_C_typespec
2425 const, 0, st_C_typespec
2426 volatile, 0, st_C_typespec
2427 explicit, C_PLPL, st_C_typespec
2428 mutable, C_PLPL, st_C_typespec
2429 typename, C_PLPL, st_C_typespec
2430 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2431 DEFUN, 0, st_C_gnumacro
2432 SYSCALL, 0, st_C_gnumacro
2433 ENTRY, 0, st_C_gnumacro
2434 PSEUDO, 0, st_C_gnumacro
2435 # These are defined inside C functions, so currently they are not met.
2436 # EXFUN used in glibc, DEFVAR_* in emacs.
2437 #EXFUN, 0, st_C_gnumacro
2438 #DEFVAR_, 0, st_C_gnumacro
2440 and replace lines between %< and %> with its output,
2441 then make in_word_set and C_stab_entry static. */
2442 /*%<*/
2443 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2444 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
2445 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2447 #define TOTAL_KEYWORDS 47
2448 #define MIN_WORD_LENGTH 2
2449 #define MAX_WORD_LENGTH 15
2450 #define MIN_HASH_VALUE 18
2451 #define MAX_HASH_VALUE 138
2452 /* maximum key range = 121, duplicates = 0 */
2454 #ifdef __GNUC__
2455 __inline
2456 #endif
2457 static unsigned int
2458 hash (str, len)
2459 register const char *str;
2460 register unsigned int len;
2462 static unsigned char asso_values[] =
2464 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2465 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2466 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2467 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2468 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2469 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2470 139, 139, 139, 139, 63, 139, 139, 139, 33, 44,
2471 62, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2472 42, 139, 139, 12, 32, 139, 139, 139, 139, 139,
2473 139, 139, 139, 139, 139, 139, 139, 34, 59, 37,
2474 24, 58, 33, 3, 139, 16, 139, 139, 42, 60,
2475 18, 11, 39, 139, 23, 57, 4, 63, 6, 20,
2476 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2477 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2478 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2479 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2480 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2481 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2482 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2483 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2484 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2485 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2486 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2487 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2488 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2489 139, 139, 139, 139, 139, 139
2491 register int hval = len;
2493 switch (hval)
2495 default:
2496 case 3:
2497 hval += asso_values[(unsigned char)str[2]];
2498 case 2:
2499 case 1:
2500 hval += asso_values[(unsigned char)str[0]];
2501 break;
2503 return hval;
2506 #ifdef __GNUC__
2507 __inline
2508 #endif
2509 static struct C_stab_entry *
2510 in_word_set (str, len)
2511 register const char *str;
2512 register unsigned int len;
2514 static struct C_stab_entry wordlist[] =
2516 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2517 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2518 {"if", 0, st_C_ignore},
2519 {""}, {""}, {""}, {""},
2520 {"int", 0, st_C_typespec},
2521 {""}, {""},
2522 {"void", 0, st_C_typespec},
2523 {""}, {""},
2524 {"interface", C_JAVA, st_C_struct},
2525 {""},
2526 {"SYSCALL", 0, st_C_gnumacro},
2527 {""},
2528 {"return", 0, st_C_ignore},
2529 {""}, {""}, {""}, {""}, {""}, {""}, {""},
2530 {"while", 0, st_C_ignore},
2531 {"auto", 0, st_C_typespec},
2532 {""}, {""}, {""}, {""}, {""}, {""},
2533 {"float", 0, st_C_typespec},
2534 {"typedef", 0, st_C_typedef},
2535 {"typename", C_PLPL, st_C_typespec},
2536 {""}, {""}, {""},
2537 {"friend", C_PLPL, st_C_ignore},
2538 {"volatile", 0, st_C_typespec},
2539 {""}, {""},
2540 {"for", 0, st_C_ignore},
2541 {"const", 0, st_C_typespec},
2542 {"import", C_JAVA, st_C_ignore},
2543 {""},
2544 {"define", 0, st_C_define},
2545 {"long", 0, st_C_typespec},
2546 {"implements", C_JAVA, st_C_javastruct},
2547 {"signed", 0, st_C_typespec},
2548 {""},
2549 {"extern", 0, st_C_extern},
2550 {"extends", C_JAVA, st_C_javastruct},
2551 {""},
2552 {"mutable", C_PLPL, st_C_typespec},
2553 {"template", 0, st_C_template},
2554 {"short", 0, st_C_typespec},
2555 {"bool", C_PLPL, st_C_typespec},
2556 {"char", 0, st_C_typespec},
2557 {"class", 0, st_C_class},
2558 {"operator", C_PLPL, st_C_operator},
2559 {""},
2560 {"switch", 0, st_C_ignore},
2561 {""},
2562 {"ENTRY", 0, st_C_gnumacro},
2563 {""},
2564 {"package", C_JAVA, st_C_ignore},
2565 {"union", 0, st_C_struct},
2566 {"@end", 0, st_C_objend},
2567 {"struct", 0, st_C_struct},
2568 {"namespace", C_PLPL, st_C_struct},
2569 {""}, {""},
2570 {"domain", C_STAR, st_C_struct},
2571 {"@interface", 0, st_C_objprot},
2572 {"PSEUDO", 0, st_C_gnumacro},
2573 {"double", 0, st_C_typespec},
2574 {""},
2575 {"@protocol", 0, st_C_objprot},
2576 {""},
2577 {"static", 0, st_C_typespec},
2578 {""}, {""},
2579 {"DEFUN", 0, st_C_gnumacro},
2580 {""}, {""}, {""}, {""},
2581 {"explicit", C_PLPL, st_C_typespec},
2582 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2583 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2584 {""},
2585 {"enum", 0, st_C_enum},
2586 {""}, {""},
2587 {"unsigned", 0, st_C_typespec},
2588 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2589 {"@implementation",0, st_C_objimpl}
2592 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2594 register int key = hash (str, len);
2596 if (key <= MAX_HASH_VALUE && key >= 0)
2598 register const char *s = wordlist[key].name;
2600 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2601 return &wordlist[key];
2604 return 0;
2606 /*%>*/
2608 static enum sym_type
2609 C_symtype (str, len, c_ext)
2610 char *str;
2611 int len;
2612 int c_ext;
2614 register struct C_stab_entry *se = in_word_set (str, len);
2616 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2617 return st_none;
2618 return se->type;
2623 * C functions and variables are recognized using a simple
2624 * finite automaton. fvdef is its state variable.
2626 static enum
2628 fvnone, /* nothing seen */
2629 fdefunkey, /* Emacs DEFUN keyword seen */
2630 fdefunname, /* Emacs DEFUN name seen */
2631 foperator, /* func: operator keyword seen (cplpl) */
2632 fvnameseen, /* function or variable name seen */
2633 fstartlist, /* func: just after open parenthesis */
2634 finlist, /* func: in parameter list */
2635 flistseen, /* func: after parameter list */
2636 fignore, /* func: before open brace */
2637 vignore /* var-like: ignore until ';' */
2638 } fvdef;
2640 static bool fvextern; /* func or var: extern keyword seen; */
2643 * typedefs are recognized using a simple finite automaton.
2644 * typdef is its state variable.
2646 static enum
2648 tnone, /* nothing seen */
2649 tkeyseen, /* typedef keyword seen */
2650 ttypeseen, /* defined type seen */
2651 tinbody, /* inside typedef body */
2652 tend, /* just before typedef tag */
2653 tignore /* junk after typedef tag */
2654 } typdef;
2657 * struct-like structures (enum, struct and union) are recognized
2658 * using another simple finite automaton. `structdef' is its state
2659 * variable.
2661 static enum
2663 snone, /* nothing seen yet,
2664 or in struct body if cblev > 0 */
2665 skeyseen, /* struct-like keyword seen */
2666 stagseen, /* struct-like tag seen */
2667 sintemplate, /* inside template (ignore) */
2668 scolonseen /* colon seen after struct-like tag */
2669 } structdef;
2672 * When objdef is different from onone, objtag is the name of the class.
2674 static char *objtag = "<uninited>";
2677 * Yet another little state machine to deal with preprocessor lines.
2679 static enum
2681 dnone, /* nothing seen */
2682 dsharpseen, /* '#' seen as first char on line */
2683 ddefineseen, /* '#' and 'define' seen */
2684 dignorerest /* ignore rest of line */
2685 } definedef;
2688 * State machine for Objective C protocols and implementations.
2689 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2691 static enum
2693 onone, /* nothing seen */
2694 oprotocol, /* @interface or @protocol seen */
2695 oimplementation, /* @implementations seen */
2696 otagseen, /* class name seen */
2697 oparenseen, /* parenthesis before category seen */
2698 ocatseen, /* category name seen */
2699 oinbody, /* in @implementation body */
2700 omethodsign, /* in @implementation body, after +/- */
2701 omethodtag, /* after method name */
2702 omethodcolon, /* after method colon */
2703 omethodparm, /* after method parameter */
2704 oignore /* wait for @end */
2705 } objdef;
2709 * Use this structure to keep info about the token read, and how it
2710 * should be tagged. Used by the make_C_tag function to build a tag.
2712 static struct tok
2714 char *line; /* string containing the token */
2715 int offset; /* where the token starts in LINE */
2716 int length; /* token length */
2718 The previous members can be used to pass strings around for generic
2719 purposes. The following ones specifically refer to creating tags. In this
2720 case the token contained here is the pattern that will be used to create a
2721 tag.
2723 bool valid; /* do not create a tag; the token should be
2724 invalidated whenever a state machine is
2725 reset prematurely */
2726 bool named; /* create a named tag */
2727 int lineno; /* source line number of tag */
2728 long linepos; /* source char number of tag */
2729 } token; /* latest token read */
2732 * Variables and functions for dealing with nested structures.
2733 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2735 static void pushclass_above __P((int, char *, int));
2736 static void popclass_above __P((int));
2737 static void write_classname __P((linebuffer *, char *qualifier));
2739 static struct {
2740 char **cname; /* nested class names */
2741 int *cblev; /* nested class curly brace level */
2742 int nl; /* class nesting level (elements used) */
2743 int size; /* length of the array */
2744 } cstack; /* stack for nested declaration tags */
2745 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2746 #define nestlev (cstack.nl)
2747 /* After struct keyword or in struct body, not inside a nested function. */
2748 #define instruct (structdef == snone && nestlev > 0 \
2749 && cblev == cstack.cblev[nestlev-1] + 1)
2751 static void
2752 pushclass_above (cblev, str, len)
2753 int cblev;
2754 char *str;
2755 int len;
2757 int nl;
2759 popclass_above (cblev);
2760 nl = cstack.nl;
2761 if (nl >= cstack.size)
2763 int size = cstack.size *= 2;
2764 xrnew (cstack.cname, size, char *);
2765 xrnew (cstack.cblev, size, int);
2767 assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2768 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2769 cstack.cblev[nl] = cblev;
2770 cstack.nl = nl + 1;
2773 static void
2774 popclass_above (cblev)
2775 int cblev;
2777 int nl;
2779 for (nl = cstack.nl - 1;
2780 nl >= 0 && cstack.cblev[nl] >= cblev;
2781 nl--)
2783 if (cstack.cname[nl] != NULL)
2784 free (cstack.cname[nl]);
2785 cstack.nl = nl;
2789 static void
2790 write_classname (cn, qualifier)
2791 linebuffer *cn;
2792 char *qualifier;
2794 int i, len;
2795 int qlen = strlen (qualifier);
2797 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2799 len = 0;
2800 cn->len = 0;
2801 cn->buffer[0] = '\0';
2803 else
2805 len = strlen (cstack.cname[0]);
2806 linebuffer_setlen (cn, len);
2807 strcpy (cn->buffer, cstack.cname[0]);
2809 for (i = 1; i < cstack.nl; i++)
2811 char *s;
2812 int slen;
2814 s = cstack.cname[i];
2815 if (s == NULL)
2816 continue;
2817 slen = strlen (s);
2818 len += slen + qlen;
2819 linebuffer_setlen (cn, len);
2820 strncat (cn->buffer, qualifier, qlen);
2821 strncat (cn->buffer, s, slen);
2826 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2827 static void make_C_tag __P((bool));
2830 * consider_token ()
2831 * checks to see if the current token is at the start of a
2832 * function or variable, or corresponds to a typedef, or
2833 * is a struct/union/enum tag, or #define, or an enum constant.
2835 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2836 * with args. C_EXTP points to which language we are looking at.
2838 * Globals
2839 * fvdef IN OUT
2840 * structdef IN OUT
2841 * definedef IN OUT
2842 * typdef IN OUT
2843 * objdef IN OUT
2846 static bool
2847 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2848 register char *str; /* IN: token pointer */
2849 register int len; /* IN: token length */
2850 register int c; /* IN: first char after the token */
2851 int *c_extp; /* IN, OUT: C extensions mask */
2852 int cblev; /* IN: curly brace level */
2853 int parlev; /* IN: parenthesis level */
2854 bool *is_func_or_var; /* OUT: function or variable found */
2856 /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2857 structtype is the type of the preceding struct-like keyword, and
2858 structcblev is the curly brace level where it has been seen. */
2859 static enum sym_type structtype;
2860 static int structcblev;
2861 static enum sym_type toktype;
2864 toktype = C_symtype (str, len, *c_extp);
2867 * Advance the definedef state machine.
2869 switch (definedef)
2871 case dnone:
2872 /* We're not on a preprocessor line. */
2873 if (toktype == st_C_gnumacro)
2875 fvdef = fdefunkey;
2876 return FALSE;
2878 break;
2879 case dsharpseen:
2880 if (toktype == st_C_define)
2882 definedef = ddefineseen;
2884 else
2886 definedef = dignorerest;
2888 return FALSE;
2889 case ddefineseen:
2891 * Make a tag for any macro, unless it is a constant
2892 * and constantypedefs is FALSE.
2894 definedef = dignorerest;
2895 *is_func_or_var = (c == '(');
2896 if (!*is_func_or_var && !constantypedefs)
2897 return FALSE;
2898 else
2899 return TRUE;
2900 case dignorerest:
2901 return FALSE;
2902 default:
2903 error ("internal error: definedef value.", (char *)NULL);
2907 * Now typedefs
2909 switch (typdef)
2911 case tnone:
2912 if (toktype == st_C_typedef)
2914 if (typedefs)
2915 typdef = tkeyseen;
2916 fvextern = FALSE;
2917 fvdef = fvnone;
2918 return FALSE;
2920 break;
2921 case tkeyseen:
2922 switch (toktype)
2924 case st_none:
2925 case st_C_typespec:
2926 case st_C_class:
2927 case st_C_struct:
2928 case st_C_enum:
2929 typdef = ttypeseen;
2930 break;
2932 break;
2933 case ttypeseen:
2934 if (structdef == snone && fvdef == fvnone)
2936 fvdef = fvnameseen;
2937 return TRUE;
2939 break;
2940 case tend:
2941 switch (toktype)
2943 case st_C_typespec:
2944 case st_C_class:
2945 case st_C_struct:
2946 case st_C_enum:
2947 return FALSE;
2949 return TRUE;
2953 * This structdef business is NOT invoked when we are ctags and the
2954 * file is plain C. This is because a struct tag may have the same
2955 * name as another tag, and this loses with ctags.
2957 switch (toktype)
2959 case st_C_javastruct:
2960 if (structdef == stagseen)
2961 structdef = scolonseen;
2962 return FALSE;
2963 case st_C_template:
2964 case st_C_class:
2965 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2966 && cblev == 0
2967 && definedef == dnone && structdef == snone
2968 && typdef == tnone && fvdef == fvnone)
2969 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2970 if (toktype == st_C_template)
2971 break;
2972 /* FALLTHRU */
2973 case st_C_struct:
2974 case st_C_enum:
2975 if (parlev == 0
2976 && fvdef != vignore
2977 && (typdef == tkeyseen
2978 || (typedefs_or_cplusplus && structdef == snone)))
2980 structdef = skeyseen;
2981 structtype = toktype;
2982 structcblev = cblev;
2984 return FALSE;
2987 if (structdef == skeyseen)
2989 structdef = stagseen;
2990 return TRUE;
2993 if (typdef != tnone)
2994 definedef = dnone;
2996 /* Detect Objective C constructs. */
2997 switch (objdef)
2999 case onone:
3000 switch (toktype)
3002 case st_C_objprot:
3003 objdef = oprotocol;
3004 return FALSE;
3005 case st_C_objimpl:
3006 objdef = oimplementation;
3007 return FALSE;
3009 break;
3010 case oimplementation:
3011 /* Save the class tag for functions or variables defined inside. */
3012 objtag = savenstr (str, len);
3013 objdef = oinbody;
3014 return FALSE;
3015 case oprotocol:
3016 /* Save the class tag for categories. */
3017 objtag = savenstr (str, len);
3018 objdef = otagseen;
3019 *is_func_or_var = TRUE;
3020 return TRUE;
3021 case oparenseen:
3022 objdef = ocatseen;
3023 *is_func_or_var = TRUE;
3024 return TRUE;
3025 case oinbody:
3026 break;
3027 case omethodsign:
3028 if (parlev == 0)
3030 objdef = omethodtag;
3031 linebuffer_setlen (&token_name, len);
3032 strncpy (token_name.buffer, str, len);
3033 token_name.buffer[len] = '\0';
3034 return TRUE;
3036 return FALSE;
3037 case omethodcolon:
3038 if (parlev == 0)
3039 objdef = omethodparm;
3040 return FALSE;
3041 case omethodparm:
3042 if (parlev == 0)
3044 objdef = omethodtag;
3045 linebuffer_setlen (&token_name, token_name.len + len);
3046 strncat (token_name.buffer, str, len);
3047 return TRUE;
3049 return FALSE;
3050 case oignore:
3051 if (toktype == st_C_objend)
3053 /* Memory leakage here: the string pointed by objtag is
3054 never released, because many tests would be needed to
3055 avoid breaking on incorrect input code. The amount of
3056 memory leaked here is the sum of the lengths of the
3057 class tags.
3058 free (objtag); */
3059 objdef = onone;
3061 return FALSE;
3064 /* A function, variable or enum constant? */
3065 switch (toktype)
3067 case st_C_extern:
3068 fvextern = TRUE;
3069 /* FALLTHRU */
3070 case st_C_typespec:
3071 switch (fvdef)
3073 case finlist:
3074 case flistseen:
3075 case fignore:
3076 case vignore:
3077 break;
3078 default:
3079 fvdef = fvnone;
3081 return FALSE;
3082 case st_C_ignore:
3083 fvextern = FALSE;
3084 fvdef = vignore;
3085 return FALSE;
3086 case st_C_operator:
3087 fvdef = foperator;
3088 *is_func_or_var = TRUE;
3089 return TRUE;
3090 case st_none:
3091 if (constantypedefs
3092 && structdef == snone
3093 && structtype == st_C_enum && cblev > structcblev)
3094 return TRUE; /* enum constant */
3095 switch (fvdef)
3097 case fdefunkey:
3098 if (cblev > 0)
3099 break;
3100 fvdef = fdefunname; /* GNU macro */
3101 *is_func_or_var = TRUE;
3102 return TRUE;
3103 case fvnone:
3104 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3105 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3107 fvdef = vignore;
3108 return FALSE;
3110 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3112 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3113 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3114 fvdef = foperator;
3115 *is_func_or_var = TRUE;
3116 return TRUE;
3118 if (cblev > 0 && !instruct)
3119 break;
3120 fvdef = fvnameseen; /* function or variable */
3121 *is_func_or_var = TRUE;
3122 return TRUE;
3124 break;
3127 return FALSE;
3132 * C_entries often keeps pointers to tokens or lines which are older than
3133 * the line currently read. By keeping two line buffers, and switching
3134 * them at end of line, it is possible to use those pointers.
3136 static struct
3138 long linepos;
3139 linebuffer lb;
3140 } lbs[2];
3142 #define current_lb_is_new (newndx == curndx)
3143 #define switch_line_buffers() (curndx = 1 - curndx)
3145 #define curlb (lbs[curndx].lb)
3146 #define newlb (lbs[newndx].lb)
3147 #define curlinepos (lbs[curndx].linepos)
3148 #define newlinepos (lbs[newndx].linepos)
3150 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3151 #define cplpl (c_ext & C_PLPL)
3152 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3154 #define CNL_SAVE_DEFINEDEF() \
3155 do { \
3156 curlinepos = charno; \
3157 readline (&curlb, inf); \
3158 lp = curlb.buffer; \
3159 quotednl = FALSE; \
3160 newndx = curndx; \
3161 } while (0)
3163 #define CNL() \
3164 do { \
3165 CNL_SAVE_DEFINEDEF(); \
3166 if (savetoken.valid) \
3168 token = savetoken; \
3169 savetoken.valid = FALSE; \
3171 definedef = dnone; \
3172 } while (0)
3175 static void
3176 make_C_tag (isfun)
3177 bool isfun;
3179 /* This function should never be called when token.valid is FALSE, but
3180 we must protect against invalid input or internal errors. */
3181 if (!DEBUG && !token.valid)
3182 return;
3184 if (token.valid)
3185 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3186 token.offset+token.length+1, token.lineno, token.linepos);
3187 else /* this case is optimised away if !DEBUG */
3188 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3189 token_name.len + 17, isfun, token.line,
3190 token.offset+token.length+1, token.lineno, token.linepos);
3192 token.valid = FALSE;
3197 * C_entries ()
3198 * This routine finds functions, variables, typedefs,
3199 * #define's, enum constants and struct/union/enum definitions in
3200 * C syntax and adds them to the list.
3202 static void
3203 C_entries (c_ext, inf)
3204 int c_ext; /* extension of C */
3205 FILE *inf; /* input file */
3207 register char c; /* latest char read; '\0' for end of line */
3208 register char *lp; /* pointer one beyond the character `c' */
3209 int curndx, newndx; /* indices for current and new lb */
3210 register int tokoff; /* offset in line of start of current token */
3211 register int toklen; /* length of current token */
3212 char *qualifier; /* string used to qualify names */
3213 int qlen; /* length of qualifier */
3214 int cblev; /* current curly brace level */
3215 int parlev; /* current parenthesis level */
3216 int typdefcblev; /* cblev where a typedef struct body begun */
3217 bool incomm, inquote, inchar, quotednl, midtoken;
3218 bool yacc_rules; /* in the rules part of a yacc file */
3219 struct tok savetoken; /* token saved during preprocessor handling */
3222 linebuffer_init (&lbs[0].lb);
3223 linebuffer_init (&lbs[1].lb);
3224 if (cstack.size == 0)
3226 cstack.size = (DEBUG) ? 1 : 4;
3227 cstack.nl = 0;
3228 cstack.cname = xnew (cstack.size, char *);
3229 cstack.cblev = xnew (cstack.size, int);
3232 tokoff = toklen = typdefcblev = 0; /* keep compiler quiet */
3233 curndx = newndx = 0;
3234 lp = curlb.buffer;
3235 *lp = 0;
3237 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3238 structdef = snone; definedef = dnone; objdef = onone;
3239 yacc_rules = FALSE;
3240 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3241 token.valid = savetoken.valid = FALSE;
3242 cblev = 0;
3243 parlev = 0;
3244 if (cjava)
3245 { qualifier = "."; qlen = 1; }
3246 else
3247 { qualifier = "::"; qlen = 2; }
3250 while (!feof (inf))
3252 c = *lp++;
3253 if (c == '\\')
3255 /* If we're at the end of the line, the next character is a
3256 '\0'; don't skip it, because it's the thing that tells us
3257 to read the next line. */
3258 if (*lp == '\0')
3260 quotednl = TRUE;
3261 continue;
3263 lp++;
3264 c = ' ';
3266 else if (incomm)
3268 switch (c)
3270 case '*':
3271 if (*lp == '/')
3273 c = *lp++;
3274 incomm = FALSE;
3276 break;
3277 case '\0':
3278 /* Newlines inside comments do not end macro definitions in
3279 traditional cpp. */
3280 CNL_SAVE_DEFINEDEF ();
3281 break;
3283 continue;
3285 else if (inquote)
3287 switch (c)
3289 case '"':
3290 inquote = FALSE;
3291 break;
3292 case '\0':
3293 /* Newlines inside strings do not end macro definitions
3294 in traditional cpp, even though compilers don't
3295 usually accept them. */
3296 CNL_SAVE_DEFINEDEF ();
3297 break;
3299 continue;
3301 else if (inchar)
3303 switch (c)
3305 case '\0':
3306 /* Hmmm, something went wrong. */
3307 CNL ();
3308 /* FALLTHRU */
3309 case '\'':
3310 inchar = FALSE;
3311 break;
3313 continue;
3315 else
3316 switch (c)
3318 case '"':
3319 inquote = TRUE;
3320 switch (fvdef)
3322 case fdefunkey:
3323 case fstartlist:
3324 case finlist:
3325 case fignore:
3326 case vignore:
3327 break;
3328 default:
3329 fvextern = FALSE;
3330 fvdef = fvnone;
3332 continue;
3333 case '\'':
3334 inchar = TRUE;
3335 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3337 fvextern = FALSE;
3338 fvdef = fvnone;
3340 continue;
3341 case '/':
3342 if (*lp == '*')
3344 lp++;
3345 incomm = TRUE;
3346 continue;
3348 else if (/* cplpl && */ *lp == '/')
3350 c = '\0';
3351 break;
3353 else
3354 break;
3355 case '%':
3356 if ((c_ext & YACC) && *lp == '%')
3358 /* Entering or exiting rules section in yacc file. */
3359 lp++;
3360 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3361 typdef = tnone; structdef = snone;
3362 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3363 cblev = 0;
3364 yacc_rules = !yacc_rules;
3365 continue;
3367 else
3368 break;
3369 case '#':
3370 if (definedef == dnone)
3372 char *cp;
3373 bool cpptoken = TRUE;
3375 /* Look back on this line. If all blanks, or nonblanks
3376 followed by an end of comment, this is a preprocessor
3377 token. */
3378 for (cp = newlb.buffer; cp < lp-1; cp++)
3379 if (!iswhite (*cp))
3381 if (*cp == '*' && *(cp+1) == '/')
3383 cp++;
3384 cpptoken = TRUE;
3386 else
3387 cpptoken = FALSE;
3389 if (cpptoken)
3390 definedef = dsharpseen;
3391 } /* if (definedef == dnone) */
3393 continue;
3394 } /* switch (c) */
3397 /* Consider token only if some involved conditions are satisfied. */
3398 if (typdef != tignore
3399 && definedef != dignorerest
3400 && fvdef != finlist
3401 && structdef != sintemplate
3402 && (definedef != dnone
3403 || structdef != scolonseen))
3405 if (midtoken)
3407 if (endtoken (c))
3409 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3410 /* This handles :: in the middle,
3411 but not at the beginning of an identifier.
3412 Also, space-separated :: is not recognised. */
3414 if (c_ext & C_AUTO) /* automatic detection of C++ */
3415 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3416 lp += 2;
3417 toklen += 2;
3418 c = lp[-1];
3419 goto still_in_token;
3421 else
3423 bool funorvar = FALSE;
3425 if (yacc_rules
3426 || consider_token (newlb.buffer + tokoff, toklen, c,
3427 &c_ext, cblev, parlev, &funorvar))
3429 if (fvdef == foperator)
3431 char *oldlp = lp;
3432 lp = skip_spaces (lp-1);
3433 if (*lp != '\0')
3434 lp += 1;
3435 while (*lp != '\0'
3436 && !iswhite (*lp) && *lp != '(')
3437 lp += 1;
3438 c = *lp++;
3439 toklen += lp - oldlp;
3441 token.named = FALSE;
3442 if (!plainc
3443 && nestlev > 0 && definedef == dnone)
3444 /* in struct body */
3446 write_classname (&token_name, qualifier);
3447 linebuffer_setlen (&token_name,
3448 token_name.len+qlen+toklen);
3449 strcat (token_name.buffer, qualifier);
3450 strncat (token_name.buffer,
3451 newlb.buffer + tokoff, toklen);
3452 token.named = TRUE;
3454 else if (objdef == ocatseen)
3455 /* Objective C category */
3457 int len = strlen (objtag) + 2 + toklen;
3458 linebuffer_setlen (&token_name, len);
3459 strcpy (token_name.buffer, objtag);
3460 strcat (token_name.buffer, "(");
3461 strncat (token_name.buffer,
3462 newlb.buffer + tokoff, toklen);
3463 strcat (token_name.buffer, ")");
3464 token.named = TRUE;
3466 else if (objdef == omethodtag
3467 || objdef == omethodparm)
3468 /* Objective C method */
3470 token.named = TRUE;
3472 else if (fvdef == fdefunname)
3473 /* GNU DEFUN and similar macros */
3475 bool defun = (newlb.buffer[tokoff] == 'F');
3476 int off = tokoff;
3477 int len = toklen;
3479 /* Rewrite the tag so that emacs lisp DEFUNs
3480 can be found by their elisp name */
3481 if (defun)
3483 off += 1;
3484 len -= 1;
3486 len = toklen;
3487 linebuffer_setlen (&token_name, len);
3488 strncpy (token_name.buffer,
3489 newlb.buffer + off, len);
3490 token_name.buffer[len] = '\0';
3491 if (defun)
3492 while (--len >= 0)
3493 if (token_name.buffer[len] == '_')
3494 token_name.buffer[len] = '-';
3495 token.named = defun;
3497 else
3499 linebuffer_setlen (&token_name, toklen);
3500 strncpy (token_name.buffer,
3501 newlb.buffer + tokoff, toklen);
3502 token_name.buffer[toklen] = '\0';
3503 /* Name macros and members. */
3504 token.named = (structdef == stagseen
3505 || typdef == ttypeseen
3506 || typdef == tend
3507 || (funorvar
3508 && definedef == dignorerest)
3509 || (funorvar
3510 && definedef == dnone
3511 && structdef == snone
3512 && cblev > 0));
3514 token.lineno = lineno;
3515 token.offset = tokoff;
3516 token.length = toklen;
3517 token.line = newlb.buffer;
3518 token.linepos = newlinepos;
3519 token.valid = TRUE;
3521 if (definedef == dnone
3522 && (fvdef == fvnameseen
3523 || fvdef == foperator
3524 || structdef == stagseen
3525 || typdef == tend
3526 || typdef == ttypeseen
3527 || objdef != onone))
3529 if (current_lb_is_new)
3530 switch_line_buffers ();
3532 else if (definedef != dnone
3533 || fvdef == fdefunname
3534 || instruct)
3535 make_C_tag (funorvar);
3537 midtoken = FALSE;
3539 } /* if (endtoken (c)) */
3540 else if (intoken (c))
3541 still_in_token:
3543 toklen++;
3544 continue;
3546 } /* if (midtoken) */
3547 else if (begtoken (c))
3549 switch (definedef)
3551 case dnone:
3552 switch (fvdef)
3554 case fstartlist:
3555 fvdef = finlist;
3556 continue;
3557 case flistseen:
3558 if (plainc || declarations)
3560 make_C_tag (TRUE); /* a function */
3561 fvdef = fignore;
3563 break;
3564 case fvnameseen:
3565 fvdef = fvnone;
3566 break;
3568 if (structdef == stagseen && !cjava)
3570 popclass_above (cblev);
3571 structdef = snone;
3573 break;
3574 case dsharpseen:
3575 savetoken = token;
3576 break;
3578 if (!yacc_rules || lp == newlb.buffer + 1)
3580 tokoff = lp - 1 - newlb.buffer;
3581 toklen = 1;
3582 midtoken = TRUE;
3584 continue;
3585 } /* if (begtoken) */
3586 } /* if must look at token */
3589 /* Detect end of line, colon, comma, semicolon and various braces
3590 after having handled a token.*/
3591 switch (c)
3593 case ':':
3594 if (yacc_rules && token.offset == 0 && token.valid)
3596 make_C_tag (FALSE); /* a yacc function */
3597 break;
3599 if (definedef != dnone)
3600 break;
3601 switch (objdef)
3603 case otagseen:
3604 objdef = oignore;
3605 make_C_tag (TRUE); /* an Objective C class */
3606 break;
3607 case omethodtag:
3608 case omethodparm:
3609 objdef = omethodcolon;
3610 linebuffer_setlen (&token_name, token_name.len + 1);
3611 strcat (token_name.buffer, ":");
3612 break;
3614 if (structdef == stagseen)
3616 structdef = scolonseen;
3617 break;
3619 /* Should be useless, but may be work as a safety net. */
3620 if (cplpl && fvdef == flistseen)
3622 make_C_tag (TRUE); /* a function */
3623 fvdef = fignore;
3624 break;
3626 break;
3627 case ';':
3628 if (definedef != dnone)
3629 break;
3630 switch (typdef)
3632 case tend:
3633 case ttypeseen:
3634 make_C_tag (FALSE); /* a typedef */
3635 typdef = tnone;
3636 fvdef = fvnone;
3637 break;
3638 case tnone:
3639 case tinbody:
3640 case tignore:
3641 switch (fvdef)
3643 case fignore:
3644 if (typdef == tignore || cplpl)
3645 fvdef = fvnone;
3646 break;
3647 case fvnameseen:
3648 if ((globals && cblev == 0 && (!fvextern || declarations))
3649 || (members && instruct))
3650 make_C_tag (FALSE); /* a variable */
3651 fvextern = FALSE;
3652 fvdef = fvnone;
3653 token.valid = FALSE;
3654 break;
3655 case flistseen:
3656 if (declarations
3657 && (typdef == tnone || (typdef != tignore && instruct)))
3658 make_C_tag (TRUE); /* a function declaration */
3659 /* FALLTHRU */
3660 default:
3661 fvextern = FALSE;
3662 fvdef = fvnone;
3663 if (declarations
3664 && cplpl && structdef == stagseen)
3665 make_C_tag (FALSE); /* forward declaration */
3666 else
3667 token.valid = FALSE;
3668 } /* switch (fvdef) */
3669 /* FALLTHRU */
3670 default:
3671 if (!instruct)
3672 typdef = tnone;
3674 if (structdef == stagseen)
3675 structdef = snone;
3676 break;
3677 case ',':
3678 if (definedef != dnone)
3679 break;
3680 switch (objdef)
3682 case omethodtag:
3683 case omethodparm:
3684 make_C_tag (TRUE); /* an Objective C method */
3685 objdef = oinbody;
3686 break;
3688 switch (fvdef)
3690 case fdefunkey:
3691 case foperator:
3692 case fstartlist:
3693 case finlist:
3694 case fignore:
3695 case vignore:
3696 break;
3697 case fdefunname:
3698 fvdef = fignore;
3699 break;
3700 case fvnameseen: /* a variable */
3701 if ((globals && cblev == 0 && (!fvextern || declarations))
3702 || (members && instruct))
3703 make_C_tag (FALSE);
3704 break;
3705 case flistseen: /* a function */
3706 if ((declarations && typdef == tnone && !instruct)
3707 || (members && typdef != tignore && instruct))
3709 make_C_tag (TRUE); /* a function declaration */
3710 fvdef = fvnameseen;
3712 else if (!declarations)
3713 fvdef = fvnone;
3714 token.valid = FALSE;
3715 break;
3716 default:
3717 fvdef = fvnone;
3719 if (structdef == stagseen)
3720 structdef = snone;
3721 break;
3722 case '[':
3723 if (definedef != dnone)
3724 break;
3725 if (structdef == stagseen)
3726 structdef = snone;
3727 switch (typdef)
3729 case ttypeseen:
3730 case tend:
3731 typdef = tignore;
3732 make_C_tag (FALSE); /* a typedef */
3733 break;
3734 case tnone:
3735 case tinbody:
3736 switch (fvdef)
3738 case foperator:
3739 case finlist:
3740 case fignore:
3741 case vignore:
3742 break;
3743 case fvnameseen:
3744 if ((members && cblev == 1)
3745 || (globals && cblev == 0
3746 && (!fvextern || declarations)))
3747 make_C_tag (FALSE); /* a variable */
3748 /* FALLTHRU */
3749 default:
3750 fvdef = fvnone;
3752 break;
3754 break;
3755 case '(':
3756 if (definedef != dnone)
3757 break;
3758 if (objdef == otagseen && parlev == 0)
3759 objdef = oparenseen;
3760 switch (fvdef)
3762 case fvnameseen:
3763 if (typdef == ttypeseen
3764 && *lp != '*'
3765 && !instruct)
3767 /* This handles constructs like:
3768 typedef void OperatorFun (int fun); */
3769 make_C_tag (FALSE);
3770 typdef = tignore;
3771 fvdef = fignore;
3772 break;
3774 /* FALLTHRU */
3775 case foperator:
3776 fvdef = fstartlist;
3777 break;
3778 case flistseen:
3779 fvdef = finlist;
3780 break;
3782 parlev++;
3783 break;
3784 case ')':
3785 if (definedef != dnone)
3786 break;
3787 if (objdef == ocatseen && parlev == 1)
3789 make_C_tag (TRUE); /* an Objective C category */
3790 objdef = oignore;
3792 if (--parlev == 0)
3794 switch (fvdef)
3796 case fstartlist:
3797 case finlist:
3798 fvdef = flistseen;
3799 break;
3801 if (!instruct
3802 && (typdef == tend
3803 || typdef == ttypeseen))
3805 typdef = tignore;
3806 make_C_tag (FALSE); /* a typedef */
3809 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3810 parlev = 0;
3811 break;
3812 case '{':
3813 if (definedef != dnone)
3814 break;
3815 if (typdef == ttypeseen)
3817 /* Whenever typdef is set to tinbody (currently only
3818 here), typdefcblev should be set to cblev. */
3819 typdef = tinbody;
3820 typdefcblev = cblev;
3822 switch (fvdef)
3824 case flistseen:
3825 make_C_tag (TRUE); /* a function */
3826 /* FALLTHRU */
3827 case fignore:
3828 fvdef = fvnone;
3829 break;
3830 case fvnone:
3831 switch (objdef)
3833 case otagseen:
3834 make_C_tag (TRUE); /* an Objective C class */
3835 objdef = oignore;
3836 break;
3837 case omethodtag:
3838 case omethodparm:
3839 make_C_tag (TRUE); /* an Objective C method */
3840 objdef = oinbody;
3841 break;
3842 default:
3843 /* Neutralize `extern "C" {' grot. */
3844 if (cblev == 0 && structdef == snone && nestlev == 0
3845 && typdef == tnone)
3846 cblev = -1;
3848 break;
3850 switch (structdef)
3852 case skeyseen: /* unnamed struct */
3853 pushclass_above (cblev, NULL, 0);
3854 structdef = snone;
3855 break;
3856 case stagseen: /* named struct or enum */
3857 case scolonseen: /* a class */
3858 pushclass_above (cblev, token.line+token.offset, token.length);
3859 structdef = snone;
3860 make_C_tag (FALSE); /* a struct or enum */
3861 break;
3863 cblev++;
3864 break;
3865 case '*':
3866 if (definedef != dnone)
3867 break;
3868 if (fvdef == fstartlist)
3870 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3871 token.valid = FALSE;
3873 break;
3874 case '}':
3875 if (definedef != dnone)
3876 break;
3877 if (!ignoreindent && lp == newlb.buffer + 1)
3879 if (cblev != 0)
3880 token.valid = FALSE;
3881 cblev = 0; /* reset curly brace level if first column */
3882 parlev = 0; /* also reset paren level, just in case... */
3884 else if (cblev > 0)
3885 cblev--;
3886 else
3887 token.valid = FALSE; /* something gone amiss, token unreliable */
3888 popclass_above (cblev);
3889 structdef = snone;
3890 /* Only if typdef == tinbody is typdefcblev significant. */
3891 if (typdef == tinbody && cblev <= typdefcblev)
3893 assert (cblev == typdefcblev);
3894 typdef = tend;
3896 break;
3897 case '=':
3898 if (definedef != dnone)
3899 break;
3900 switch (fvdef)
3902 case foperator:
3903 case finlist:
3904 case fignore:
3905 case vignore:
3906 break;
3907 case fvnameseen:
3908 if ((members && cblev == 1)
3909 || (globals && cblev == 0 && (!fvextern || declarations)))
3910 make_C_tag (FALSE); /* a variable */
3911 /* FALLTHRU */
3912 default:
3913 fvdef = vignore;
3915 break;
3916 case '<':
3917 if (cplpl && structdef == stagseen)
3919 structdef = sintemplate;
3920 break;
3922 goto resetfvdef;
3923 case '>':
3924 if (structdef == sintemplate)
3926 structdef = stagseen;
3927 break;
3929 goto resetfvdef;
3930 case '+':
3931 case '-':
3932 if (objdef == oinbody && cblev == 0)
3934 objdef = omethodsign;
3935 break;
3937 /* FALLTHRU */
3938 resetfvdef:
3939 case '#': case '~': case '&': case '%': case '/': case '|':
3940 case '^': case '!': case '.': case '?': case ']':
3941 if (definedef != dnone)
3942 break;
3943 /* These surely cannot follow a function tag in C. */
3944 switch (fvdef)
3946 case foperator:
3947 case finlist:
3948 case fignore:
3949 case vignore:
3950 break;
3951 default:
3952 fvdef = fvnone;
3954 break;
3955 case '\0':
3956 if (objdef == otagseen)
3958 make_C_tag (TRUE); /* an Objective C class */
3959 objdef = oignore;
3961 /* If a macro spans multiple lines don't reset its state. */
3962 if (quotednl)
3963 CNL_SAVE_DEFINEDEF ();
3964 else
3965 CNL ();
3966 break;
3967 } /* switch (c) */
3969 } /* while not eof */
3971 free (lbs[0].lb.buffer);
3972 free (lbs[1].lb.buffer);
3976 * Process either a C++ file or a C file depending on the setting
3977 * of a global flag.
3979 static void
3980 default_C_entries (inf)
3981 FILE *inf;
3983 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3986 /* Always do plain C. */
3987 static void
3988 plain_C_entries (inf)
3989 FILE *inf;
3991 C_entries (0, inf);
3994 /* Always do C++. */
3995 static void
3996 Cplusplus_entries (inf)
3997 FILE *inf;
3999 C_entries (C_PLPL, inf);
4002 /* Always do Java. */
4003 static void
4004 Cjava_entries (inf)
4005 FILE *inf;
4007 C_entries (C_JAVA, inf);
4010 /* Always do C*. */
4011 static void
4012 Cstar_entries (inf)
4013 FILE *inf;
4015 C_entries (C_STAR, inf);
4018 /* Always do Yacc. */
4019 static void
4020 Yacc_entries (inf)
4021 FILE *inf;
4023 C_entries (YACC, inf);
4027 /* Useful macros. */
4028 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4029 for (; /* loop initialization */ \
4030 !feof (file_pointer) /* loop test */ \
4031 && /* instructions at start of loop */ \
4032 (readline (&line_buffer, file_pointer), \
4033 char_pointer = line_buffer.buffer, \
4034 TRUE); \
4036 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */ \
4037 (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
4038 && notinname ((cp)[sizeof(keyword)-1]) /* end of keyword */ \
4039 && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
4042 * Read a file, but do no processing. This is used to do regexp
4043 * matching on files that have no language defined.
4045 static void
4046 just_read_file (inf)
4047 FILE *inf;
4049 register char *dummy;
4051 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4052 continue;
4056 /* Fortran parsing */
4058 static void F_takeprec __P((void));
4059 static void F_getit __P((FILE *));
4061 static void
4062 F_takeprec ()
4064 dbp = skip_spaces (dbp);
4065 if (*dbp != '*')
4066 return;
4067 dbp++;
4068 dbp = skip_spaces (dbp);
4069 if (strneq (dbp, "(*)", 3))
4071 dbp += 3;
4072 return;
4074 if (!ISDIGIT (*dbp))
4076 --dbp; /* force failure */
4077 return;
4080 dbp++;
4081 while (ISDIGIT (*dbp));
4084 static void
4085 F_getit (inf)
4086 FILE *inf;
4088 register char *cp;
4090 dbp = skip_spaces (dbp);
4091 if (*dbp == '\0')
4093 readline (&lb, inf);
4094 dbp = lb.buffer;
4095 if (dbp[5] != '&')
4096 return;
4097 dbp += 6;
4098 dbp = skip_spaces (dbp);
4100 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4101 return;
4102 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4103 continue;
4104 make_tag (dbp, cp-dbp, TRUE,
4105 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4109 static void
4110 Fortran_functions (inf)
4111 FILE *inf;
4113 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4115 if (*dbp == '%')
4116 dbp++; /* Ratfor escape to fortran */
4117 dbp = skip_spaces (dbp);
4118 if (*dbp == '\0')
4119 continue;
4120 switch (lowcase (*dbp))
4122 case 'i':
4123 if (nocase_tail ("integer"))
4124 F_takeprec ();
4125 break;
4126 case 'r':
4127 if (nocase_tail ("real"))
4128 F_takeprec ();
4129 break;
4130 case 'l':
4131 if (nocase_tail ("logical"))
4132 F_takeprec ();
4133 break;
4134 case 'c':
4135 if (nocase_tail ("complex") || nocase_tail ("character"))
4136 F_takeprec ();
4137 break;
4138 case 'd':
4139 if (nocase_tail ("double"))
4141 dbp = skip_spaces (dbp);
4142 if (*dbp == '\0')
4143 continue;
4144 if (nocase_tail ("precision"))
4145 break;
4146 continue;
4148 break;
4150 dbp = skip_spaces (dbp);
4151 if (*dbp == '\0')
4152 continue;
4153 switch (lowcase (*dbp))
4155 case 'f':
4156 if (nocase_tail ("function"))
4157 F_getit (inf);
4158 continue;
4159 case 's':
4160 if (nocase_tail ("subroutine"))
4161 F_getit (inf);
4162 continue;
4163 case 'e':
4164 if (nocase_tail ("entry"))
4165 F_getit (inf);
4166 continue;
4167 case 'b':
4168 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4170 dbp = skip_spaces (dbp);
4171 if (*dbp == '\0') /* assume un-named */
4172 make_tag ("blockdata", 9, TRUE,
4173 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4174 else
4175 F_getit (inf); /* look for name */
4177 continue;
4184 * Ada parsing
4185 * Original code by
4186 * Philippe Waroquiers (1998)
4189 static void Ada_getit __P((FILE *, char *));
4191 /* Once we are positioned after an "interesting" keyword, let's get
4192 the real tag value necessary. */
4193 static void
4194 Ada_getit (inf, name_qualifier)
4195 FILE *inf;
4196 char *name_qualifier;
4198 register char *cp;
4199 char *name;
4200 char c;
4202 while (!feof (inf))
4204 dbp = skip_spaces (dbp);
4205 if (*dbp == '\0'
4206 || (dbp[0] == '-' && dbp[1] == '-'))
4208 readline (&lb, inf);
4209 dbp = lb.buffer;
4211 switch (lowcase(*dbp))
4213 case 'b':
4214 if (nocase_tail ("body"))
4216 /* Skipping body of procedure body or package body or ....
4217 resetting qualifier to body instead of spec. */
4218 name_qualifier = "/b";
4219 continue;
4221 break;
4222 case 't':
4223 /* Skipping type of task type or protected type ... */
4224 if (nocase_tail ("type"))
4225 continue;
4226 break;
4228 if (*dbp == '"')
4230 dbp += 1;
4231 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4232 continue;
4234 else
4236 dbp = skip_spaces (dbp);
4237 for (cp = dbp;
4238 (*cp != '\0'
4239 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4240 cp++)
4241 continue;
4242 if (cp == dbp)
4243 return;
4245 c = *cp;
4246 *cp = '\0';
4247 name = concat (dbp, name_qualifier, "");
4248 *cp = c;
4249 make_tag (name, strlen (name), TRUE,
4250 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4251 free (name);
4252 if (c == '"')
4253 dbp = cp + 1;
4254 return;
4258 static void
4259 Ada_funcs (inf)
4260 FILE *inf;
4262 bool inquote = FALSE;
4263 bool skip_till_semicolumn = FALSE;
4265 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4267 while (*dbp != '\0')
4269 /* Skip a string i.e. "abcd". */
4270 if (inquote || (*dbp == '"'))
4272 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4273 if (dbp != NULL)
4275 inquote = FALSE;
4276 dbp += 1;
4277 continue; /* advance char */
4279 else
4281 inquote = TRUE;
4282 break; /* advance line */
4286 /* Skip comments. */
4287 if (dbp[0] == '-' && dbp[1] == '-')
4288 break; /* advance line */
4290 /* Skip character enclosed in single quote i.e. 'a'
4291 and skip single quote starting an attribute i.e. 'Image. */
4292 if (*dbp == '\'')
4294 dbp++ ;
4295 if (*dbp != '\0')
4296 dbp++;
4297 continue;
4300 if (skip_till_semicolumn)
4302 if (*dbp == ';')
4303 skip_till_semicolumn = FALSE;
4304 dbp++;
4305 continue; /* advance char */
4308 /* Search for beginning of a token. */
4309 if (!begtoken (*dbp))
4311 dbp++;
4312 continue; /* advance char */
4315 /* We are at the beginning of a token. */
4316 switch (lowcase(*dbp))
4318 case 'f':
4319 if (!packages_only && nocase_tail ("function"))
4320 Ada_getit (inf, "/f");
4321 else
4322 break; /* from switch */
4323 continue; /* advance char */
4324 case 'p':
4325 if (!packages_only && nocase_tail ("procedure"))
4326 Ada_getit (inf, "/p");
4327 else if (nocase_tail ("package"))
4328 Ada_getit (inf, "/s");
4329 else if (nocase_tail ("protected")) /* protected type */
4330 Ada_getit (inf, "/t");
4331 else
4332 break; /* from switch */
4333 continue; /* advance char */
4335 case 'u':
4336 if (typedefs && !packages_only && nocase_tail ("use"))
4338 /* when tagging types, avoid tagging use type Pack.Typename;
4339 for this, we will skip everything till a ; */
4340 skip_till_semicolumn = TRUE;
4341 continue; /* advance char */
4344 case 't':
4345 if (!packages_only && nocase_tail ("task"))
4346 Ada_getit (inf, "/k");
4347 else if (typedefs && !packages_only && nocase_tail ("type"))
4349 Ada_getit (inf, "/t");
4350 while (*dbp != '\0')
4351 dbp += 1;
4353 else
4354 break; /* from switch */
4355 continue; /* advance char */
4358 /* Look for the end of the token. */
4359 while (!endtoken (*dbp))
4360 dbp++;
4362 } /* advance char */
4363 } /* advance line */
4368 * Unix and microcontroller assembly tag handling
4369 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4370 * Idea by Bob Weiner, Motorola Inc. (1994)
4372 static void
4373 Asm_labels (inf)
4374 FILE *inf;
4376 register char *cp;
4378 LOOP_ON_INPUT_LINES (inf, lb, cp)
4380 /* If first char is alphabetic or one of [_.$], test for colon
4381 following identifier. */
4382 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4384 /* Read past label. */
4385 cp++;
4386 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4387 cp++;
4388 if (*cp == ':' || iswhite (*cp))
4389 /* Found end of label, so copy it and add it to the table. */
4390 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4391 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4398 * Perl support
4399 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4400 * Perl variable names: /^(my|local).../
4401 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4402 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4403 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4405 static void
4406 Perl_functions (inf)
4407 FILE *inf;
4409 char *package = savestr ("main"); /* current package name */
4410 register char *cp;
4412 LOOP_ON_INPUT_LINES (inf, lb, cp)
4414 skip_spaces(cp);
4416 if (LOOKING_AT (cp, "package"))
4418 free (package);
4419 get_tag (cp, &package);
4421 else if (LOOKING_AT (cp, "sub"))
4423 char *pos;
4424 char *sp = cp;
4426 while (!notinname (*cp))
4427 cp++;
4428 if (cp == sp)
4429 continue; /* nothing found */
4430 if ((pos = etags_strchr (sp, ':')) != NULL
4431 && pos < cp && pos[1] == ':')
4432 /* The name is already qualified. */
4433 make_tag (sp, cp - sp, TRUE,
4434 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4435 else
4436 /* Qualify it. */
4438 char savechar, *name;
4440 savechar = *cp;
4441 *cp = '\0';
4442 name = concat (package, "::", sp);
4443 *cp = savechar;
4444 make_tag (name, strlen(name), TRUE,
4445 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4446 free (name);
4449 else if (globals) /* only if we are tagging global vars */
4451 /* Skip a qualifier, if any. */
4452 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4453 /* After "my" or "local", but before any following paren or space. */
4454 char *varstart = cp;
4456 if (qual /* should this be removed? If yes, how? */
4457 && (*cp == '$' || *cp == '@' || *cp == '%'))
4459 varstart += 1;
4461 cp++;
4462 while (ISALNUM (*cp) || *cp == '_');
4464 else if (qual)
4466 /* Should be examining a variable list at this point;
4467 could insist on seeing an open parenthesis. */
4468 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4469 cp++;
4471 else
4472 continue;
4474 make_tag (varstart, cp - varstart, FALSE,
4475 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4482 * Python support
4483 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4484 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4485 * More ideas by seb bacon <seb@jamkit.com> (2002)
4487 static void
4488 Python_functions (inf)
4489 FILE *inf;
4491 register char *cp;
4493 LOOP_ON_INPUT_LINES (inf, lb, cp)
4495 cp = skip_spaces (cp);
4496 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4498 char *name = cp;
4499 while (!notinname (*cp) && *cp != ':')
4500 cp++;
4501 make_tag (name, cp - name, TRUE,
4502 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4509 * PHP support
4510 * Look for:
4511 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4512 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4513 * - /^[ \t]*define\(\"[^\"]+/
4514 * Only with --members:
4515 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4516 * Idea by Diez B. Roggisch (2001)
4518 static void
4519 PHP_functions (inf)
4520 FILE *inf;
4522 register char *cp, *name;
4523 bool search_identifier = FALSE;
4525 LOOP_ON_INPUT_LINES (inf, lb, cp)
4527 cp = skip_spaces (cp);
4528 name = cp;
4529 if (search_identifier
4530 && *cp != '\0')
4532 while (!notinname (*cp))
4533 cp++;
4534 make_tag (name, cp - name, TRUE,
4535 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4536 search_identifier = FALSE;
4538 else if (LOOKING_AT (cp, "function"))
4540 if(*cp == '&')
4541 cp = skip_spaces (cp+1);
4542 if(*cp != '\0')
4544 name = cp;
4545 while (!notinname (*cp))
4546 cp++;
4547 make_tag (name, cp - name, TRUE,
4548 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4550 else
4551 search_identifier = TRUE;
4553 else if (LOOKING_AT (cp, "class"))
4555 if (*cp != '\0')
4557 name = cp;
4558 while (*cp != '\0' && !iswhite (*cp))
4559 cp++;
4560 make_tag (name, cp - name, FALSE,
4561 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4563 else
4564 search_identifier = TRUE;
4566 else if (strneq (cp, "define", 6)
4567 && (cp = skip_spaces (cp+6))
4568 && *cp++ == '('
4569 && (*cp == '"' || *cp == '\''))
4571 char quote = *cp++;
4572 name = cp;
4573 while (*cp != quote && *cp != '\0')
4574 cp++;
4575 make_tag (name, cp - name, FALSE,
4576 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4578 else if (members
4579 && LOOKING_AT (cp, "var")
4580 && *cp == '$')
4582 name = cp;
4583 while (!notinname(*cp))
4584 cp++;
4585 make_tag (name, cp - name, FALSE,
4586 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4593 * Cobol tag functions
4594 * We could look for anything that could be a paragraph name.
4595 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4596 * Idea by Corny de Souza (1993)
4598 static void
4599 Cobol_paragraphs (inf)
4600 FILE *inf;
4602 register char *bp, *ep;
4604 LOOP_ON_INPUT_LINES (inf, lb, bp)
4606 if (lb.len < 9)
4607 continue;
4608 bp += 8;
4610 /* If eoln, compiler option or comment ignore whole line. */
4611 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4612 continue;
4614 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4615 continue;
4616 if (*ep++ == '.')
4617 make_tag (bp, ep - bp, TRUE,
4618 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4624 * Makefile support
4625 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4627 static void
4628 Makefile_targets (inf)
4629 FILE *inf;
4631 register char *bp;
4633 LOOP_ON_INPUT_LINES (inf, lb, bp)
4635 if (*bp == '\t' || *bp == '#')
4636 continue;
4637 while (*bp != '\0' && *bp != '=' && *bp != ':')
4638 bp++;
4639 if (*bp == ':' || (globals && *bp == '='))
4640 make_tag (lb.buffer, bp - lb.buffer, TRUE,
4641 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4647 * Pascal parsing
4648 * Original code by Mosur K. Mohan (1989)
4650 * Locates tags for procedures & functions. Doesn't do any type- or
4651 * var-definitions. It does look for the keyword "extern" or
4652 * "forward" immediately following the procedure statement; if found,
4653 * the tag is skipped.
4655 static void
4656 Pascal_functions (inf)
4657 FILE *inf;
4659 linebuffer tline; /* mostly copied from C_entries */
4660 long save_lcno;
4661 int save_lineno, namelen, taglen;
4662 char c, *name;
4664 bool /* each of these flags is TRUE iff: */
4665 incomment, /* point is inside a comment */
4666 inquote, /* point is inside '..' string */
4667 get_tagname, /* point is after PROCEDURE/FUNCTION
4668 keyword, so next item = potential tag */
4669 found_tag, /* point is after a potential tag */
4670 inparms, /* point is within parameter-list */
4671 verify_tag; /* point has passed the parm-list, so the
4672 next token will determine whether this
4673 is a FORWARD/EXTERN to be ignored, or
4674 whether it is a real tag */
4676 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4677 name = NULL; /* keep compiler quiet */
4678 dbp = lb.buffer;
4679 *dbp = '\0';
4680 linebuffer_init (&tline);
4682 incomment = inquote = FALSE;
4683 found_tag = FALSE; /* have a proc name; check if extern */
4684 get_tagname = FALSE; /* found "procedure" keyword */
4685 inparms = FALSE; /* found '(' after "proc" */
4686 verify_tag = FALSE; /* check if "extern" is ahead */
4689 while (!feof (inf)) /* long main loop to get next char */
4691 c = *dbp++;
4692 if (c == '\0') /* if end of line */
4694 readline (&lb, inf);
4695 dbp = lb.buffer;
4696 if (*dbp == '\0')
4697 continue;
4698 if (!((found_tag && verify_tag)
4699 || get_tagname))
4700 c = *dbp++; /* only if don't need *dbp pointing
4701 to the beginning of the name of
4702 the procedure or function */
4704 if (incomment)
4706 if (c == '}') /* within { } comments */
4707 incomment = FALSE;
4708 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4710 dbp++;
4711 incomment = FALSE;
4713 continue;
4715 else if (inquote)
4717 if (c == '\'')
4718 inquote = FALSE;
4719 continue;
4721 else
4722 switch (c)
4724 case '\'':
4725 inquote = TRUE; /* found first quote */
4726 continue;
4727 case '{': /* found open { comment */
4728 incomment = TRUE;
4729 continue;
4730 case '(':
4731 if (*dbp == '*') /* found open (* comment */
4733 incomment = TRUE;
4734 dbp++;
4736 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4737 inparms = TRUE;
4738 continue;
4739 case ')': /* end of parms list */
4740 if (inparms)
4741 inparms = FALSE;
4742 continue;
4743 case ';':
4744 if (found_tag && !inparms) /* end of proc or fn stmt */
4746 verify_tag = TRUE;
4747 break;
4749 continue;
4751 if (found_tag && verify_tag && (*dbp != ' '))
4753 /* Check if this is an "extern" declaration. */
4754 if (*dbp == '\0')
4755 continue;
4756 if (lowcase (*dbp == 'e'))
4758 if (nocase_tail ("extern")) /* superfluous, really! */
4760 found_tag = FALSE;
4761 verify_tag = FALSE;
4764 else if (lowcase (*dbp) == 'f')
4766 if (nocase_tail ("forward")) /* check for forward reference */
4768 found_tag = FALSE;
4769 verify_tag = FALSE;
4772 if (found_tag && verify_tag) /* not external proc, so make tag */
4774 found_tag = FALSE;
4775 verify_tag = FALSE;
4776 make_tag (name, namelen, TRUE,
4777 tline.buffer, taglen, save_lineno, save_lcno);
4778 continue;
4781 if (get_tagname) /* grab name of proc or fn */
4783 char *cp;
4785 if (*dbp == '\0')
4786 continue;
4788 /* Find block name. */
4789 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4790 continue;
4792 /* Save all values for later tagging. */
4793 linebuffer_setlen (&tline, lb.len);
4794 strcpy (tline.buffer, lb.buffer);
4795 save_lineno = lineno;
4796 save_lcno = linecharno;
4797 name = tline.buffer + (dbp - lb.buffer);
4798 namelen = cp - dbp;
4799 taglen = cp - lb.buffer + 1;
4801 dbp = cp; /* set dbp to e-o-token */
4802 get_tagname = FALSE;
4803 found_tag = TRUE;
4804 continue;
4806 /* And proceed to check for "extern". */
4808 else if (!incomment && !inquote && !found_tag)
4810 /* Check for proc/fn keywords. */
4811 switch (lowcase (c))
4813 case 'p':
4814 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4815 get_tagname = TRUE;
4816 continue;
4817 case 'f':
4818 if (nocase_tail ("unction"))
4819 get_tagname = TRUE;
4820 continue;
4823 } /* while not eof */
4825 free (tline.buffer);
4830 * Lisp tag functions
4831 * look for (def or (DEF, quote or QUOTE
4834 static void L_getit __P((void));
4836 static void
4837 L_getit ()
4839 if (*dbp == '\'') /* Skip prefix quote */
4840 dbp++;
4841 else if (*dbp == '(')
4843 dbp++;
4844 /* Try to skip "(quote " */
4845 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4846 /* Ok, then skip "(" before name in (defstruct (foo)) */
4847 dbp = skip_spaces (dbp);
4849 get_tag (dbp, NULL);
4852 static void
4853 Lisp_functions (inf)
4854 FILE *inf;
4856 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4858 if (dbp[0] != '(')
4859 continue;
4861 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4863 dbp = skip_non_spaces (dbp);
4864 dbp = skip_spaces (dbp);
4865 L_getit ();
4867 else
4869 /* Check for (foo::defmumble name-defined ... */
4871 dbp++;
4872 while (!notinname (*dbp) && *dbp != ':');
4873 if (*dbp == ':')
4876 dbp++;
4877 while (*dbp == ':');
4879 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4881 dbp = skip_non_spaces (dbp);
4882 dbp = skip_spaces (dbp);
4883 L_getit ();
4892 * Postscript tag functions
4893 * Just look for lines where the first character is '/'
4894 * Also look at "defineps" for PSWrap
4895 * Ideas by:
4896 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4897 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4899 static void
4900 PS_functions (inf)
4901 FILE *inf;
4903 register char *bp, *ep;
4905 LOOP_ON_INPUT_LINES (inf, lb, bp)
4907 if (bp[0] == '/')
4909 for (ep = bp+1;
4910 *ep != '\0' && *ep != ' ' && *ep != '{';
4911 ep++)
4912 continue;
4913 make_tag (bp, ep - bp, TRUE,
4914 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4916 else if (LOOKING_AT (bp, "defineps"))
4917 get_tag (bp, NULL);
4923 * Scheme tag functions
4924 * look for (def... xyzzy
4925 * (def... (xyzzy
4926 * (def ... ((...(xyzzy ....
4927 * (set! xyzzy
4928 * Original code by Ken Haase (1985?)
4931 static void
4932 Scheme_functions (inf)
4933 FILE *inf;
4935 register char *bp;
4937 LOOP_ON_INPUT_LINES (inf, lb, bp)
4939 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4941 bp = skip_non_spaces (bp+4);
4942 /* Skip over open parens and white space */
4943 while (notinname (*bp))
4944 bp++;
4945 get_tag (bp, NULL);
4947 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4948 get_tag (bp, NULL);
4953 /* Find tags in TeX and LaTeX input files. */
4955 /* TEX_toktab is a table of TeX control sequences that define tags.
4956 * Each entry records one such control sequence.
4958 * Original code from who knows whom.
4959 * Ideas by:
4960 * Stefan Monnier (2002)
4963 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4965 /* Default set of control sequences to put into TEX_toktab.
4966 The value of environment var TEXTAGS is prepended to this. */
4967 static char *TEX_defenv = "\
4968 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4969 :part:appendix:entry:index:def\
4970 :newcommand:renewcommand:newenvironment:renewenvironment";
4972 static void TEX_mode __P((FILE *));
4973 static void TEX_decode_env __P((char *, char *));
4975 static char TEX_esc = '\\';
4976 static char TEX_opgrp = '{';
4977 static char TEX_clgrp = '}';
4980 * TeX/LaTeX scanning loop.
4982 static void
4983 TeX_commands (inf)
4984 FILE *inf;
4986 char *cp;
4987 linebuffer *key;
4989 /* Select either \ or ! as escape character. */
4990 TEX_mode (inf);
4992 /* Initialize token table once from environment. */
4993 if (TEX_toktab == NULL)
4994 TEX_decode_env ("TEXTAGS", TEX_defenv);
4996 LOOP_ON_INPUT_LINES (inf, lb, cp)
4998 /* Look at each TEX keyword in line. */
4999 for (;;)
5001 /* Look for a TEX escape. */
5002 while (*cp++ != TEX_esc)
5003 if (cp[-1] == '\0' || cp[-1] == '%')
5004 goto tex_next_line;
5006 for (key = TEX_toktab; key->buffer != NULL; key++)
5007 if (strneq (cp, key->buffer, key->len))
5009 register char *p;
5010 int namelen, linelen;
5011 bool opgrp = FALSE;
5013 cp = skip_spaces (cp + key->len);
5014 if (*cp == TEX_opgrp)
5016 opgrp = TRUE;
5017 cp++;
5019 for (p = cp;
5020 (!iswhite (*p) && *p != '#' &&
5021 *p != TEX_opgrp && *p != TEX_clgrp);
5022 p++)
5023 continue;
5024 namelen = p - cp;
5025 linelen = lb.len;
5026 if (!opgrp || *p == TEX_clgrp)
5028 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5029 *p++;
5030 linelen = p - lb.buffer + 1;
5032 make_tag (cp, namelen, TRUE,
5033 lb.buffer, linelen, lineno, linecharno);
5034 goto tex_next_line; /* We only tag a line once */
5037 tex_next_line:
5042 #define TEX_LESC '\\'
5043 #define TEX_SESC '!'
5045 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5046 chars accordingly. */
5047 static void
5048 TEX_mode (inf)
5049 FILE *inf;
5051 int c;
5053 while ((c = getc (inf)) != EOF)
5055 /* Skip to next line if we hit the TeX comment char. */
5056 if (c == '%')
5057 while (c != '\n')
5058 c = getc (inf);
5059 else if (c == TEX_LESC || c == TEX_SESC )
5060 break;
5063 if (c == TEX_LESC)
5065 TEX_esc = TEX_LESC;
5066 TEX_opgrp = '{';
5067 TEX_clgrp = '}';
5069 else
5071 TEX_esc = TEX_SESC;
5072 TEX_opgrp = '<';
5073 TEX_clgrp = '>';
5075 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5076 No attempt is made to correct the situation. */
5077 rewind (inf);
5080 /* Read environment and prepend it to the default string.
5081 Build token table. */
5082 static void
5083 TEX_decode_env (evarname, defenv)
5084 char *evarname;
5085 char *defenv;
5087 register char *env, *p;
5088 int i, len;
5090 /* Append default string to environment. */
5091 env = getenv (evarname);
5092 if (!env)
5093 env = defenv;
5094 else
5096 char *oldenv = env;
5097 env = concat (oldenv, defenv, "");
5100 /* Allocate a token table */
5101 for (len = 1, p = env; p;)
5102 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5103 len++;
5104 TEX_toktab = xnew (len, linebuffer);
5106 /* Unpack environment string into token table. Be careful about */
5107 /* zero-length strings (leading ':', "::" and trailing ':') */
5108 for (i = 0; *env != '\0';)
5110 p = etags_strchr (env, ':');
5111 if (!p) /* End of environment string. */
5112 p = env + strlen (env);
5113 if (p - env > 0)
5114 { /* Only non-zero strings. */
5115 TEX_toktab[i].buffer = savenstr (env, p - env);
5116 TEX_toktab[i].len = p - env;
5117 i++;
5119 if (*p)
5120 env = p + 1;
5121 else
5123 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5124 TEX_toktab[i].len = 0;
5125 break;
5131 /* Texinfo support. Dave Love, Mar. 2000. */
5132 static void
5133 Texinfo_nodes (inf)
5134 FILE * inf;
5136 char *cp, *start;
5137 LOOP_ON_INPUT_LINES (inf, lb, cp)
5138 if (LOOKING_AT (cp, "@node"))
5140 start = cp;
5141 while (*cp != '\0' && *cp != ',')
5142 cp++;
5143 make_tag (start, cp - start, TRUE,
5144 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5149 /* Similar to LOOKING_AT but does not use notinname, does not skip */
5150 #define LOOKING_AT_NOCASE(cp, kw) /* kw is a constant string */ \
5151 (strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
5152 && ((cp) += sizeof(kw)-1)) /* skip spaces */
5155 * HTML support.
5156 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5157 * Contents of <a name=xxx> are tags with name xxx.
5159 * Francesco Potortì, 2002.
5161 static void
5162 HTML_labels (inf)
5163 FILE * inf;
5165 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5166 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5167 bool intag = FALSE; /* inside an html tag, looking for ID= */
5168 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5169 char *end;
5172 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5174 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5175 for (;;) /* loop on the same line */
5177 if (skiptag) /* skip HTML tag */
5179 while (*dbp != '\0' && *dbp != '>')
5180 dbp++;
5181 if (*dbp == '>')
5183 dbp += 1;
5184 skiptag = FALSE;
5185 continue; /* look on the same line */
5187 break; /* go to next line */
5190 else if (intag) /* look for "name=" or "id=" */
5192 while (*dbp != '\0' && *dbp != '>'
5193 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5194 dbp++;
5195 if (*dbp == '\0')
5196 break; /* go to next line */
5197 if (*dbp == '>')
5199 dbp += 1;
5200 intag = FALSE;
5201 continue; /* look on the same line */
5203 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5204 || LOOKING_AT_NOCASE (dbp, "id="))
5206 bool quoted = (dbp[0] == '"');
5208 if (quoted)
5209 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5210 continue;
5211 else
5212 for (end = dbp; *end != '\0' && intoken (*end); end++)
5213 continue;
5214 linebuffer_setlen (&token_name, end - dbp);
5215 strncpy (token_name.buffer, dbp, end - dbp);
5216 token_name.buffer[end - dbp] = '\0';
5218 dbp = end;
5219 intag = FALSE; /* we found what we looked for */
5220 skiptag = TRUE; /* skip to the end of the tag */
5221 getnext = TRUE; /* then grab the text */
5222 continue; /* look on the same line */
5224 dbp += 1;
5227 else if (getnext) /* grab next tokens and tag them */
5229 dbp = skip_spaces (dbp);
5230 if (*dbp == '\0')
5231 break; /* go to next line */
5232 if (*dbp == '<')
5234 intag = TRUE;
5235 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5236 continue; /* look on the same line */
5239 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5240 continue;
5241 make_tag (token_name.buffer, token_name.len, TRUE,
5242 dbp, end - dbp, lineno, linecharno);
5243 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5244 getnext = FALSE;
5245 break; /* go to next line */
5248 else /* look for an interesting HTML tag */
5250 while (*dbp != '\0' && *dbp != '<')
5251 dbp++;
5252 if (*dbp == '\0')
5253 break; /* go to next line */
5254 intag = TRUE;
5255 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5257 inanchor = TRUE;
5258 continue; /* look on the same line */
5260 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5261 || LOOKING_AT_NOCASE (dbp, "<h1>")
5262 || LOOKING_AT_NOCASE (dbp, "<h2>")
5263 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5265 intag = FALSE;
5266 getnext = TRUE;
5267 continue; /* look on the same line */
5269 dbp += 1;
5276 * Prolog support
5278 * Assumes that the predicate or rule starts at column 0.
5279 * Only the first clause of a predicate or rule is added.
5280 * Original code by Sunichirou Sugou (1989)
5281 * Rewritten by Anders Lindgren (1996)
5283 static int prolog_pr __P((char *, char *));
5284 static void prolog_skip_comment __P((linebuffer *, FILE *));
5285 static int prolog_atom __P((char *, int));
5287 static void
5288 Prolog_functions (inf)
5289 FILE *inf;
5291 char *cp, *last;
5292 int len;
5293 int allocated;
5295 allocated = 0;
5296 len = 0;
5297 last = NULL;
5299 LOOP_ON_INPUT_LINES (inf, lb, cp)
5301 if (cp[0] == '\0') /* Empty line */
5302 continue;
5303 else if (iswhite (cp[0])) /* Not a predicate */
5304 continue;
5305 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5306 prolog_skip_comment (&lb, inf);
5307 else if ((len = prolog_pr (cp, last)) > 0)
5309 /* Predicate or rule. Store the function name so that we
5310 only generate a tag for the first clause. */
5311 if (last == NULL)
5312 last = xnew(len + 1, char);
5313 else if (len + 1 > allocated)
5314 xrnew (last, len + 1, char);
5315 allocated = len + 1;
5316 strncpy (last, cp, len);
5317 last[len] = '\0';
5323 static void
5324 prolog_skip_comment (plb, inf)
5325 linebuffer *plb;
5326 FILE *inf;
5328 char *cp;
5332 for (cp = plb->buffer; *cp != '\0'; cp++)
5333 if (cp[0] == '*' && cp[1] == '/')
5334 return;
5335 readline (plb, inf);
5337 while (!feof(inf));
5341 * A predicate or rule definition is added if it matches:
5342 * <beginning of line><Prolog Atom><whitespace>(
5343 * or <beginning of line><Prolog Atom><whitespace>:-
5345 * It is added to the tags database if it doesn't match the
5346 * name of the previous clause header.
5348 * Return the size of the name of the predicate or rule, or 0 if no
5349 * header was found.
5351 static int
5352 prolog_pr (s, last)
5353 char *s;
5354 char *last; /* Name of last clause. */
5356 int pos;
5357 int len;
5359 pos = prolog_atom (s, 0);
5360 if (pos < 1)
5361 return 0;
5363 len = pos;
5364 pos = skip_spaces (s + pos) - s;
5366 if ((s[pos] == '.'
5367 || (s[pos] == '(' && (pos += 1))
5368 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5369 && (last == NULL /* save only the first clause */
5370 || len != strlen (last)
5371 || !strneq (s, last, len)))
5373 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5374 return len;
5376 else
5377 return 0;
5381 * Consume a Prolog atom.
5382 * Return the number of bytes consumed, or -1 if there was an error.
5384 * A prolog atom, in this context, could be one of:
5385 * - An alphanumeric sequence, starting with a lower case letter.
5386 * - A quoted arbitrary string. Single quotes can escape themselves.
5387 * Backslash quotes everything.
5389 static int
5390 prolog_atom (s, pos)
5391 char *s;
5392 int pos;
5394 int origpos;
5396 origpos = pos;
5398 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5400 /* The atom is unquoted. */
5401 pos++;
5402 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5404 pos++;
5406 return pos - origpos;
5408 else if (s[pos] == '\'')
5410 pos++;
5412 for (;;)
5414 if (s[pos] == '\'')
5416 pos++;
5417 if (s[pos] != '\'')
5418 break;
5419 pos++; /* A double quote */
5421 else if (s[pos] == '\0')
5422 /* Multiline quoted atoms are ignored. */
5423 return -1;
5424 else if (s[pos] == '\\')
5426 if (s[pos+1] == '\0')
5427 return -1;
5428 pos += 2;
5430 else
5431 pos++;
5433 return pos - origpos;
5435 else
5436 return -1;
5441 * Support for Erlang
5443 * Generates tags for functions, defines, and records.
5444 * Assumes that Erlang functions start at column 0.
5445 * Original code by Anders Lindgren (1996)
5447 static int erlang_func __P((char *, char *));
5448 static void erlang_attribute __P((char *));
5449 static int erlang_atom __P((char *));
5451 static void
5452 Erlang_functions (inf)
5453 FILE *inf;
5455 char *cp, *last;
5456 int len;
5457 int allocated;
5459 allocated = 0;
5460 len = 0;
5461 last = NULL;
5463 LOOP_ON_INPUT_LINES (inf, lb, cp)
5465 if (cp[0] == '\0') /* Empty line */
5466 continue;
5467 else if (iswhite (cp[0])) /* Not function nor attribute */
5468 continue;
5469 else if (cp[0] == '%') /* comment */
5470 continue;
5471 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5472 continue;
5473 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5475 erlang_attribute (cp);
5476 last = NULL;
5478 else if ((len = erlang_func (cp, last)) > 0)
5481 * Function. Store the function name so that we only
5482 * generates a tag for the first clause.
5484 if (last == NULL)
5485 last = xnew (len + 1, char);
5486 else if (len + 1 > allocated)
5487 xrnew (last, len + 1, char);
5488 allocated = len + 1;
5489 strncpy (last, cp, len);
5490 last[len] = '\0';
5497 * A function definition is added if it matches:
5498 * <beginning of line><Erlang Atom><whitespace>(
5500 * It is added to the tags database if it doesn't match the
5501 * name of the previous clause header.
5503 * Return the size of the name of the function, or 0 if no function
5504 * was found.
5506 static int
5507 erlang_func (s, last)
5508 char *s;
5509 char *last; /* Name of last clause. */
5511 int pos;
5512 int len;
5514 pos = erlang_atom (s);
5515 if (pos < 1)
5516 return 0;
5518 len = pos;
5519 pos = skip_spaces (s + pos) - s;
5521 /* Save only the first clause. */
5522 if (s[pos++] == '('
5523 && (last == NULL
5524 || len != (int)strlen (last)
5525 || !strneq (s, last, len)))
5527 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5528 return len;
5531 return 0;
5536 * Handle attributes. Currently, tags are generated for defines
5537 * and records.
5539 * They are on the form:
5540 * -define(foo, bar).
5541 * -define(Foo(M, N), M+N).
5542 * -record(graph, {vtab = notable, cyclic = true}).
5544 static void
5545 erlang_attribute (s)
5546 char *s;
5548 char *cp = s;
5550 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5551 && *cp++ == '(')
5553 int len = erlang_atom (skip_spaces (cp));
5554 if (len > 0)
5555 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5557 return;
5562 * Consume an Erlang atom (or variable).
5563 * Return the number of bytes consumed, or -1 if there was an error.
5565 static int
5566 erlang_atom (s)
5567 char *s;
5569 int pos = 0;
5571 if (ISALPHA (s[pos]) || s[pos] == '_')
5573 /* The atom is unquoted. */
5575 pos++;
5576 while (ISALNUM (s[pos]) || s[pos] == '_');
5578 else if (s[pos] == '\'')
5580 for (pos++; s[pos] != '\''; pos++)
5581 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5582 || (s[pos] == '\\' && s[++pos] == '\0'))
5583 return 0;
5584 pos++;
5587 return pos;
5591 #ifdef ETAGS_REGEXPS
5593 static char *scan_separators __P((char *));
5594 static void add_regex __P((char *, language *));
5595 static char *substitute __P((char *, char *, struct re_registers *));
5598 * Take a string like "/blah/" and turn it into "blah", verifying
5599 * that the first and last characters are the same, and handling
5600 * quoted separator characters. Actually, stops on the occurrence of
5601 * an unquoted separator. Also process \t, \n, etc. and turn into
5602 * appropriate characters. Works in place. Null terminates name string.
5603 * Returns pointer to terminating separator, or NULL for
5604 * unterminated regexps.
5606 static char *
5607 scan_separators (name)
5608 char *name;
5610 char sep = name[0];
5611 char *copyto = name;
5612 bool quoted = FALSE;
5614 for (++name; *name != '\0'; ++name)
5616 if (quoted)
5618 switch (*name)
5620 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5621 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5622 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5623 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5624 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5625 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5626 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5627 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5628 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5629 default:
5630 if (*name == sep)
5631 *copyto++ = sep;
5632 else
5634 /* Something else is quoted, so preserve the quote. */
5635 *copyto++ = '\\';
5636 *copyto++ = *name;
5638 break;
5640 quoted = FALSE;
5642 else if (*name == '\\')
5643 quoted = TRUE;
5644 else if (*name == sep)
5645 break;
5646 else
5647 *copyto++ = *name;
5649 if (*name != sep)
5650 name = NULL; /* signal unterminated regexp */
5652 /* Terminate copied string. */
5653 *copyto = '\0';
5654 return name;
5657 /* Look at the argument of --regex or --no-regex and do the right
5658 thing. Same for each line of a regexp file. */
5659 static void
5660 analyse_regex (regex_arg)
5661 char *regex_arg;
5663 if (regex_arg == NULL)
5665 free_regexps (); /* --no-regex: remove existing regexps */
5666 return;
5669 /* A real --regexp option or a line in a regexp file. */
5670 switch (regex_arg[0])
5672 /* Comments in regexp file or null arg to --regex. */
5673 case '\0':
5674 case ' ':
5675 case '\t':
5676 break;
5678 /* Read a regex file. This is recursive and may result in a
5679 loop, which will stop when the file descriptors are exhausted. */
5680 case '@':
5682 FILE *regexfp;
5683 linebuffer regexbuf;
5684 char *regexfile = regex_arg + 1;
5686 /* regexfile is a file containing regexps, one per line. */
5687 regexfp = fopen (regexfile, "r");
5688 if (regexfp == NULL)
5690 pfatal (regexfile);
5691 return;
5693 linebuffer_init (&regexbuf);
5694 while (readline_internal (&regexbuf, regexfp) > 0)
5695 analyse_regex (regexbuf.buffer);
5696 free (regexbuf.buffer);
5697 fclose (regexfp);
5699 break;
5701 /* Regexp to be used for a specific language only. */
5702 case '{':
5704 language *lang;
5705 char *lang_name = regex_arg + 1;
5706 char *cp;
5708 for (cp = lang_name; *cp != '}'; cp++)
5709 if (*cp == '\0')
5711 error ("unterminated language name in regex: %s", regex_arg);
5712 return;
5714 *cp++ = '\0';
5715 lang = get_language_from_langname (lang_name);
5716 if (lang == NULL)
5717 return;
5718 add_regex (cp, lang);
5720 break;
5722 /* Regexp to be used for any language. */
5723 default:
5724 add_regex (regex_arg, NULL);
5725 break;
5729 /* Separate the regexp pattern, compile it,
5730 and care for optional name and modifiers. */
5731 static void
5732 add_regex (regexp_pattern, lang)
5733 char *regexp_pattern;
5734 language *lang;
5736 static struct re_pattern_buffer zeropattern;
5737 char sep, *pat, *name, *modifiers;
5738 const char *err;
5739 struct re_pattern_buffer *patbuf;
5740 regexp *rp;
5741 bool
5742 force_explicit_name = TRUE, /* do not use implicit tag names */
5743 ignore_case = FALSE, /* case is significant */
5744 multi_line = FALSE, /* matches are done one line at a time */
5745 single_line = FALSE; /* dot does not match newline */
5748 if (strlen(regexp_pattern) < 3)
5750 error ("null regexp", (char *)NULL);
5751 return;
5753 sep = regexp_pattern[0];
5754 name = scan_separators (regexp_pattern);
5755 if (name == NULL)
5757 error ("%s: unterminated regexp", regexp_pattern);
5758 return;
5760 if (name[1] == sep)
5762 error ("null name for regexp \"%s\"", regexp_pattern);
5763 return;
5765 modifiers = scan_separators (name);
5766 if (modifiers == NULL) /* no terminating separator --> no name */
5768 modifiers = name;
5769 name = "";
5771 else
5772 modifiers += 1; /* skip separator */
5774 /* Parse regex modifiers. */
5775 for (; modifiers[0] != '\0'; modifiers++)
5776 switch (modifiers[0])
5778 case 'N':
5779 if (modifiers == name)
5780 error ("forcing explicit tag name but no name, ignoring", NULL);
5781 force_explicit_name = TRUE;
5782 break;
5783 case 'i':
5784 ignore_case = TRUE;
5785 break;
5786 case 's':
5787 single_line = TRUE;
5788 /* FALLTHRU */
5789 case 'm':
5790 multi_line = TRUE;
5791 need_filebuf = TRUE;
5792 break;
5793 default:
5795 char wrongmod [2];
5796 wrongmod[0] = modifiers[0];
5797 wrongmod[1] = '\0';
5798 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5800 break;
5803 patbuf = xnew (1, struct re_pattern_buffer);
5804 *patbuf = zeropattern;
5805 if (ignore_case)
5807 static char lc_trans[CHARS];
5808 int i;
5809 for (i = 0; i < CHARS; i++)
5810 lc_trans[i] = lowcase (i);
5811 patbuf->translate = lc_trans; /* translation table to fold case */
5814 if (multi_line)
5815 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5816 else
5817 pat = regexp_pattern;
5819 if (single_line)
5820 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5821 else
5822 re_set_syntax (RE_SYNTAX_EMACS);
5824 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5825 if (multi_line)
5826 free (pat);
5827 if (err != NULL)
5829 error ("%s while compiling pattern", err);
5830 return;
5833 rp = p_head;
5834 p_head = xnew (1, regexp);
5835 p_head->pattern = savestr (regexp_pattern);
5836 p_head->p_next = rp;
5837 p_head->lang = lang;
5838 p_head->pat = patbuf;
5839 p_head->name = savestr (name);
5840 p_head->error_signaled = FALSE;
5841 p_head->force_explicit_name = force_explicit_name;
5842 p_head->ignore_case = ignore_case;
5843 p_head->multi_line = multi_line;
5847 * Do the substitutions indicated by the regular expression and
5848 * arguments.
5850 static char *
5851 substitute (in, out, regs)
5852 char *in, *out;
5853 struct re_registers *regs;
5855 char *result, *t;
5856 int size, dig, diglen;
5858 result = NULL;
5859 size = strlen (out);
5861 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5862 if (out[size - 1] == '\\')
5863 fatal ("pattern error in \"%s\"", out);
5864 for (t = etags_strchr (out, '\\');
5865 t != NULL;
5866 t = etags_strchr (t + 2, '\\'))
5867 if (ISDIGIT (t[1]))
5869 dig = t[1] - '0';
5870 diglen = regs->end[dig] - regs->start[dig];
5871 size += diglen - 2;
5873 else
5874 size -= 1;
5876 /* Allocate space and do the substitutions. */
5877 assert (size >= 0);
5878 result = xnew (size + 1, char);
5880 for (t = result; *out != '\0'; out++)
5881 if (*out == '\\' && ISDIGIT (*++out))
5883 dig = *out - '0';
5884 diglen = regs->end[dig] - regs->start[dig];
5885 strncpy (t, in + regs->start[dig], diglen);
5886 t += diglen;
5888 else
5889 *t++ = *out;
5890 *t = '\0';
5892 assert (t <= result + size);
5893 assert (t - result == (int)strlen (result));
5895 return result;
5898 /* Deallocate all regexps. */
5899 static void
5900 free_regexps ()
5902 regexp *rp;
5903 while (p_head != NULL)
5905 rp = p_head->p_next;
5906 free (p_head->pattern);
5907 free (p_head->name);
5908 free (p_head);
5909 p_head = rp;
5911 return;
5915 * Reads the whole file as a single string from `filebuf' and looks for
5916 * multi-line regular expressions, creating tags on matches.
5917 * readline already dealt with normal regexps.
5919 * Idea by Ben Wing <ben@666.com> (2002).
5921 static void
5922 regex_tag_multiline ()
5924 char *buffer = filebuf.buffer;
5925 regexp *rp;
5926 char *name;
5928 for (rp = p_head; rp != NULL; rp = rp->p_next)
5930 int match = 0;
5932 if (!rp->multi_line)
5933 continue; /* skip normal regexps */
5935 /* Generic initialisations before parsing file from memory. */
5936 lineno = 1; /* reset global line number */
5937 charno = 0; /* reset global char number */
5938 linecharno = 0; /* reset global char number of line start */
5940 /* Only use generic regexps or those for the current language. */
5941 if (rp->lang != NULL && rp->lang != curfdp->lang)
5942 continue;
5944 while (match >= 0 && match < filebuf.len)
5946 match = re_search (rp->pat, buffer, filebuf.len, charno,
5947 filebuf.len - match, &rp->regs);
5948 switch (match)
5950 case -2:
5951 /* Some error. */
5952 if (!rp->error_signaled)
5954 error ("regexp stack overflow while matching \"%s\"",
5955 rp->pattern);
5956 rp->error_signaled = TRUE;
5958 break;
5959 case -1:
5960 /* No match. */
5961 break;
5962 default:
5963 if (match == rp->regs.end[0])
5965 if (!rp->error_signaled)
5967 error ("regexp matches the empty string: \"%s\"",
5968 rp->pattern);
5969 rp->error_signaled = TRUE;
5971 match = -3; /* exit from while loop */
5972 break;
5975 /* Match occurred. Construct a tag. */
5976 while (charno < rp->regs.end[0])
5977 if (buffer[charno++] == '\n')
5978 lineno++, linecharno = charno;
5979 name = rp->name;
5980 if (name[0] == '\0')
5981 name = NULL;
5982 else /* make a named tag */
5983 name = substitute (buffer, rp->name, &rp->regs);
5984 if (rp->force_explicit_name)
5985 /* Force explicit tag name, if a name is there. */
5986 pfnote (name, TRUE, buffer + linecharno,
5987 charno - linecharno + 1, lineno, linecharno);
5988 else
5989 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5990 charno - linecharno + 1, lineno, linecharno);
5991 break;
5997 #endif /* ETAGS_REGEXPS */
6000 static bool
6001 nocase_tail (cp)
6002 char *cp;
6004 register int len = 0;
6006 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6007 cp++, len++;
6008 if (*cp == '\0' && !intoken (dbp[len]))
6010 dbp += len;
6011 return TRUE;
6013 return FALSE;
6016 static void
6017 get_tag (bp, namepp)
6018 register char *bp;
6019 char **namepp;
6021 register char *cp = bp;
6023 if (*bp != '\0')
6025 /* Go till you get to white space or a syntactic break */
6026 for (cp = bp + 1; !notinname (*cp); cp++)
6027 continue;
6028 make_tag (bp, cp - bp, TRUE,
6029 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6032 if (namepp != NULL)
6033 *namepp = savenstr (bp, cp - bp);
6037 * Read a line of text from `stream' into `lbp', excluding the
6038 * newline or CR-NL, if any. Return the number of characters read from
6039 * `stream', which is the length of the line including the newline.
6041 * On DOS or Windows we do not count the CR character, if any before the
6042 * NL, in the returned length; this mirrors the behavior of Emacs on those
6043 * platforms (for text files, it translates CR-NL to NL as it reads in the
6044 * file).
6046 * If multi-line regular expressions are requested, each line read is
6047 * appended to `filebuf'.
6049 static long
6050 readline_internal (lbp, stream)
6051 linebuffer *lbp;
6052 register FILE *stream;
6054 char *buffer = lbp->buffer;
6055 register char *p = lbp->buffer;
6056 register char *pend;
6057 int chars_deleted;
6059 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6061 for (;;)
6063 register int c = getc (stream);
6064 if (p == pend)
6066 /* We're at the end of linebuffer: expand it. */
6067 lbp->size *= 2;
6068 xrnew (buffer, lbp->size, char);
6069 p += buffer - lbp->buffer;
6070 pend = buffer + lbp->size;
6071 lbp->buffer = buffer;
6073 if (c == EOF)
6075 *p = '\0';
6076 chars_deleted = 0;
6077 break;
6079 if (c == '\n')
6081 if (p > buffer && p[-1] == '\r')
6083 p -= 1;
6084 #ifdef DOS_NT
6085 /* Assume CRLF->LF translation will be performed by Emacs
6086 when loading this file, so CRs won't appear in the buffer.
6087 It would be cleaner to compensate within Emacs;
6088 however, Emacs does not know how many CRs were deleted
6089 before any given point in the file. */
6090 chars_deleted = 1;
6091 #else
6092 chars_deleted = 2;
6093 #endif
6095 else
6097 chars_deleted = 1;
6099 *p = '\0';
6100 break;
6102 *p++ = c;
6104 lbp->len = p - buffer;
6106 if (need_filebuf /* we need filebuf for multi-line regexps */
6107 && chars_deleted > 0) /* not at EOF */
6109 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6111 /* Expand filebuf. */
6112 filebuf.size *= 2;
6113 xrnew (filebuf.buffer, filebuf.size, char);
6115 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6116 filebuf.len += lbp->len;
6117 filebuf.buffer[filebuf.len++] = '\n';
6118 filebuf.buffer[filebuf.len] = '\0';
6121 return lbp->len + chars_deleted;
6125 * Like readline_internal, above, but in addition try to match the
6126 * input line against relevant regular expressions and manage #line
6127 * directives.
6129 static void
6130 readline (lbp, stream)
6131 linebuffer *lbp;
6132 FILE *stream;
6134 long result;
6136 linecharno = charno; /* update global char number of line start */
6137 result = readline_internal (lbp, stream); /* read line */
6138 lineno += 1; /* increment global line number */
6139 charno += result; /* increment global char number */
6141 /* Honour #line directives. */
6142 if (!no_line_directive)
6144 static bool discard_until_line_directive;
6146 /* Check whether this is a #line directive. */
6147 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6149 int start, lno;
6151 if (DEBUG) start = 0; /* shut up the compiler */
6152 if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
6154 char *endp = lbp->buffer + start;
6156 assert (start > 0);
6157 while ((endp = etags_strchr (endp, '"')) != NULL
6158 && endp[-1] == '\\')
6159 endp++;
6160 if (endp != NULL)
6161 /* Ok, this is a real #line directive. Let's deal with it. */
6163 char *taggedabsname; /* absolute name of original file */
6164 char *taggedfname; /* name of original file as given */
6165 char *name; /* temp var */
6167 discard_until_line_directive = FALSE; /* found it */
6168 name = lbp->buffer + start;
6169 *endp = '\0';
6170 canonicalize_filename (name); /* for DOS */
6171 taggedabsname = absolute_filename (name, curfdp->infabsdir);
6172 if (filename_is_absolute (name)
6173 || filename_is_absolute (curfdp->infname))
6174 taggedfname = savestr (taggedabsname);
6175 else
6176 taggedfname = relative_filename (taggedabsname,tagfiledir);
6178 if (streq (curfdp->taggedfname, taggedfname))
6179 /* The #line directive is only a line number change. We
6180 deal with this afterwards. */
6181 free (taggedfname);
6182 else
6183 /* The tags following this #line directive should be
6184 attributed to taggedfname. In order to do this, set
6185 curfdp accordingly. */
6187 fdesc *fdp; /* file description pointer */
6189 /* Go look for a file description already set up for the
6190 file indicated in the #line directive. If there is
6191 one, use it from now until the next #line
6192 directive. */
6193 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6194 if (streq (fdp->infname, curfdp->infname)
6195 && streq (fdp->taggedfname, taggedfname))
6196 /* If we remove the second test above (after the &&)
6197 then all entries pertaining to the same file are
6198 coalesced in the tags file. If we use it, then
6199 entries pertaining to the same file but generated
6200 from different files (via #line directives) will
6201 go into separate sections in the tags file. These
6202 alternatives look equivalent. The first one
6203 destroys some apparently useless information. */
6205 curfdp = fdp;
6206 free (taggedfname);
6207 break;
6209 /* Else, if we already tagged the real file, skip all
6210 input lines until the next #line directive. */
6211 if (fdp == NULL) /* not found */
6212 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6213 if (streq (fdp->infabsname, taggedabsname))
6215 discard_until_line_directive = TRUE;
6216 free (taggedfname);
6217 break;
6219 /* Else create a new file description and use that from
6220 now on, until the next #line directive. */
6221 if (fdp == NULL) /* not found */
6223 fdp = fdhead;
6224 fdhead = xnew (1, fdesc);
6225 *fdhead = *curfdp; /* copy curr. file description */
6226 fdhead->next = fdp;
6227 fdhead->infname = savestr (curfdp->infname);
6228 fdhead->infabsname = savestr (curfdp->infabsname);
6229 fdhead->infabsdir = savestr (curfdp->infabsdir);
6230 fdhead->taggedfname = taggedfname;
6231 fdhead->usecharno = FALSE;
6232 fdhead->prop = NULL;
6233 fdhead->written = FALSE;
6234 curfdp = fdhead;
6237 free (taggedabsname);
6238 lineno = lno - 1;
6239 readline (lbp, stream);
6240 return;
6241 } /* if a real #line directive */
6242 } /* if #line is followed by a a number */
6243 } /* if line begins with "#line " */
6245 /* If we are here, no #line directive was found. */
6246 if (discard_until_line_directive)
6248 if (result > 0)
6250 /* Do a tail recursion on ourselves, thus discarding the contents
6251 of the line buffer. */
6252 readline (lbp, stream);
6253 return;
6255 /* End of file. */
6256 discard_until_line_directive = FALSE;
6257 return;
6259 } /* if #line directives should be considered */
6261 #ifdef ETAGS_REGEXPS
6263 int match;
6264 regexp *rp;
6265 char *name;
6267 /* Match against relevant regexps. */
6268 if (lbp->len > 0)
6269 for (rp = p_head; rp != NULL; rp = rp->p_next)
6271 /* Only use generic regexps or those for the current language.
6272 Also do not use multiline regexps, which is the job of
6273 regex_tag_multiline. */
6274 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6275 || rp->multi_line)
6276 continue;
6278 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6279 switch (match)
6281 case -2:
6282 /* Some error. */
6283 if (!rp->error_signaled)
6285 error ("regexp stack overflow while matching \"%s\"",
6286 rp->pattern);
6287 rp->error_signaled = TRUE;
6289 break;
6290 case -1:
6291 /* No match. */
6292 break;
6293 case 0:
6294 /* Empty string matched. */
6295 if (!rp->error_signaled)
6297 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6298 rp->error_signaled = TRUE;
6300 break;
6301 default:
6302 /* Match occurred. Construct a tag. */
6303 name = rp->name;
6304 if (name[0] == '\0')
6305 name = NULL;
6306 else /* make a named tag */
6307 name = substitute (lbp->buffer, rp->name, &rp->regs);
6308 if (rp->force_explicit_name)
6309 /* Force explicit tag name, if a name is there. */
6310 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6311 else
6312 make_tag (name, strlen (name), TRUE,
6313 lbp->buffer, match, lineno, linecharno);
6314 break;
6318 #endif /* ETAGS_REGEXPS */
6323 * Return a pointer to a space of size strlen(cp)+1 allocated
6324 * with xnew where the string CP has been copied.
6326 static char *
6327 savestr (cp)
6328 char *cp;
6330 return savenstr (cp, strlen (cp));
6334 * Return a pointer to a space of size LEN+1 allocated with xnew where
6335 * the string CP has been copied for at most the first LEN characters.
6337 static char *
6338 savenstr (cp, len)
6339 char *cp;
6340 int len;
6342 register char *dp;
6344 dp = xnew (len + 1, char);
6345 strncpy (dp, cp, len);
6346 dp[len] = '\0';
6347 return dp;
6351 * Return the ptr in sp at which the character c last
6352 * appears; NULL if not found
6354 * Identical to POSIX strrchr, included for portability.
6356 static char *
6357 etags_strrchr (sp, c)
6358 register const char *sp;
6359 register int c;
6361 register const char *r;
6363 r = NULL;
6366 if (*sp == c)
6367 r = sp;
6368 } while (*sp++);
6369 return (char *)r;
6373 * Return the ptr in sp at which the character c first
6374 * appears; NULL if not found
6376 * Identical to POSIX strchr, included for portability.
6378 static char *
6379 etags_strchr (sp, c)
6380 register const char *sp;
6381 register int c;
6385 if (*sp == c)
6386 return (char *)sp;
6387 } while (*sp++);
6388 return NULL;
6392 * Compare two strings, ignoring case for alphabetic characters.
6394 * Same as BSD's strcasecmp, included for portability.
6396 static int
6397 etags_strcasecmp (s1, s2)
6398 register const char *s1;
6399 register const char *s2;
6401 while (*s1 != '\0'
6402 && (ISALPHA (*s1) && ISALPHA (*s2)
6403 ? lowcase (*s1) == lowcase (*s2)
6404 : *s1 == *s2))
6405 s1++, s2++;
6407 return (ISALPHA (*s1) && ISALPHA (*s2)
6408 ? lowcase (*s1) - lowcase (*s2)
6409 : *s1 - *s2);
6413 * Compare two strings, ignoring case for alphabetic characters.
6414 * Stop after a given number of characters
6416 * Same as BSD's strncasecmp, included for portability.
6418 static int
6419 etags_strncasecmp (s1, s2, n)
6420 register const char *s1;
6421 register const char *s2;
6422 register int n;
6424 while (*s1 != '\0' && n-- > 0
6425 && (ISALPHA (*s1) && ISALPHA (*s2)
6426 ? lowcase (*s1) == lowcase (*s2)
6427 : *s1 == *s2))
6428 s1++, s2++;
6430 if (n < 0)
6431 return 0;
6432 else
6433 return (ISALPHA (*s1) && ISALPHA (*s2)
6434 ? lowcase (*s1) - lowcase (*s2)
6435 : *s1 - *s2);
6438 /* Skip spaces, return new pointer. */
6439 static char *
6440 skip_spaces (cp)
6441 char *cp;
6443 while (iswhite (*cp))
6444 cp++;
6445 return cp;
6448 /* Skip non spaces, return new pointer. */
6449 static char *
6450 skip_non_spaces (cp)
6451 char *cp;
6453 while (*cp != '\0' && !iswhite (*cp))
6454 cp++;
6455 return cp;
6458 /* Print error message and exit. */
6459 void
6460 fatal (s1, s2)
6461 char *s1, *s2;
6463 error (s1, s2);
6464 exit (EXIT_FAILURE);
6467 static void
6468 pfatal (s1)
6469 char *s1;
6471 perror (s1);
6472 exit (EXIT_FAILURE);
6475 static void
6476 suggest_asking_for_help ()
6479 #ifdef LONG_OPTIONS
6480 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6481 progname, "--help");
6482 #else
6483 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6484 progname, "-h");
6485 #endif
6486 exit (EXIT_FAILURE);
6489 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6490 static void
6491 error (s1, s2)
6492 const char *s1, *s2;
6494 fprintf (stderr, "%s: ", progname);
6495 fprintf (stderr, s1, s2);
6496 fprintf (stderr, "\n");
6499 /* Return a newly-allocated string whose contents
6500 concatenate those of s1, s2, s3. */
6501 static char *
6502 concat (s1, s2, s3)
6503 char *s1, *s2, *s3;
6505 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6506 char *result = xnew (len1 + len2 + len3 + 1, char);
6508 strcpy (result, s1);
6509 strcpy (result + len1, s2);
6510 strcpy (result + len1 + len2, s3);
6511 result[len1 + len2 + len3] = '\0';
6513 return result;
6517 /* Does the same work as the system V getcwd, but does not need to
6518 guess the buffer size in advance. */
6519 static char *
6520 etags_getcwd ()
6522 #ifdef HAVE_GETCWD
6523 int bufsize = 200;
6524 char *path = xnew (bufsize, char);
6526 while (getcwd (path, bufsize) == NULL)
6528 if (errno != ERANGE)
6529 pfatal ("getcwd");
6530 bufsize *= 2;
6531 free (path);
6532 path = xnew (bufsize, char);
6535 canonicalize_filename (path);
6536 return path;
6538 #else /* not HAVE_GETCWD */
6539 #if MSDOS
6541 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6543 getwd (path);
6545 for (p = path; *p != '\0'; p++)
6546 if (*p == '\\')
6547 *p = '/';
6548 else
6549 *p = lowcase (*p);
6551 return strdup (path);
6552 #else /* not MSDOS */
6553 linebuffer path;
6554 FILE *pipe;
6556 linebuffer_init (&path);
6557 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6558 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6559 pfatal ("pwd");
6560 pclose (pipe);
6562 return path.buffer;
6563 #endif /* not MSDOS */
6564 #endif /* not HAVE_GETCWD */
6567 /* Return a newly allocated string containing the file name of FILE
6568 relative to the absolute directory DIR (which should end with a slash). */
6569 static char *
6570 relative_filename (file, dir)
6571 char *file, *dir;
6573 char *fp, *dp, *afn, *res;
6574 int i;
6576 /* Find the common root of file and dir (with a trailing slash). */
6577 afn = absolute_filename (file, cwd);
6578 fp = afn;
6579 dp = dir;
6580 while (*fp++ == *dp++)
6581 continue;
6582 fp--, dp--; /* back to the first differing char */
6583 #ifdef DOS_NT
6584 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6585 return afn;
6586 #endif
6587 do /* look at the equal chars until '/' */
6588 fp--, dp--;
6589 while (*fp != '/');
6591 /* Build a sequence of "../" strings for the resulting relative file name. */
6592 i = 0;
6593 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6594 i += 1;
6595 res = xnew (3*i + strlen (fp + 1) + 1, char);
6596 res[0] = '\0';
6597 while (i-- > 0)
6598 strcat (res, "../");
6600 /* Add the file name relative to the common root of file and dir. */
6601 strcat (res, fp + 1);
6602 free (afn);
6604 return res;
6607 /* Return a newly allocated string containing the absolute file name
6608 of FILE given DIR (which should end with a slash). */
6609 static char *
6610 absolute_filename (file, dir)
6611 char *file, *dir;
6613 char *slashp, *cp, *res;
6615 if (filename_is_absolute (file))
6616 res = savestr (file);
6617 #ifdef DOS_NT
6618 /* We don't support non-absolute file names with a drive
6619 letter, like `d:NAME' (it's too much hassle). */
6620 else if (file[1] == ':')
6621 fatal ("%s: relative file names with drive letters not supported", file);
6622 #endif
6623 else
6624 res = concat (dir, file, "");
6626 /* Delete the "/dirname/.." and "/." substrings. */
6627 slashp = etags_strchr (res, '/');
6628 while (slashp != NULL && slashp[0] != '\0')
6630 if (slashp[1] == '.')
6632 if (slashp[2] == '.'
6633 && (slashp[3] == '/' || slashp[3] == '\0'))
6635 cp = slashp;
6637 cp--;
6638 while (cp >= res && !filename_is_absolute (cp));
6639 if (cp < res)
6640 cp = slashp; /* the absolute name begins with "/.." */
6641 #ifdef DOS_NT
6642 /* Under MSDOS and NT we get `d:/NAME' as absolute
6643 file name, so the luser could say `d:/../NAME'.
6644 We silently treat this as `d:/NAME'. */
6645 else if (cp[0] != '/')
6646 cp = slashp;
6647 #endif
6648 strcpy (cp, slashp + 3);
6649 slashp = cp;
6650 continue;
6652 else if (slashp[2] == '/' || slashp[2] == '\0')
6654 strcpy (slashp, slashp + 2);
6655 continue;
6659 slashp = etags_strchr (slashp + 1, '/');
6662 if (res[0] == '\0')
6663 return savestr ("/");
6664 else
6665 return res;
6668 /* Return a newly allocated string containing the absolute
6669 file name of dir where FILE resides given DIR (which should
6670 end with a slash). */
6671 static char *
6672 absolute_dirname (file, dir)
6673 char *file, *dir;
6675 char *slashp, *res;
6676 char save;
6678 canonicalize_filename (file);
6679 slashp = etags_strrchr (file, '/');
6680 if (slashp == NULL)
6681 return savestr (dir);
6682 save = slashp[1];
6683 slashp[1] = '\0';
6684 res = absolute_filename (file, dir);
6685 slashp[1] = save;
6687 return res;
6690 /* Whether the argument string is an absolute file name. The argument
6691 string must have been canonicalized with canonicalize_filename. */
6692 static bool
6693 filename_is_absolute (fn)
6694 char *fn;
6696 return (fn[0] == '/'
6697 #ifdef DOS_NT
6698 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6699 #endif
6703 /* Translate backslashes into slashes. Works in place. */
6704 static void
6705 canonicalize_filename (fn)
6706 register char *fn;
6708 #ifdef DOS_NT
6709 /* Canonicalize drive letter case. */
6710 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6711 fn[0] = upcase (fn[0]);
6712 /* Convert backslashes to slashes. */
6713 for (; *fn != '\0'; fn++)
6714 if (*fn == '\\')
6715 *fn = '/';
6716 #else
6717 /* No action. */
6718 fn = NULL; /* shut up the compiler */
6719 #endif
6723 /* Initialize a linebuffer for use */
6724 static void
6725 linebuffer_init (lbp)
6726 linebuffer *lbp;
6728 lbp->size = (DEBUG) ? 3 : 200;
6729 lbp->buffer = xnew (lbp->size, char);
6730 lbp->buffer[0] = '\0';
6731 lbp->len = 0;
6734 /* Set the minimum size of a string contained in a linebuffer. */
6735 static void
6736 linebuffer_setlen (lbp, toksize)
6737 linebuffer *lbp;
6738 int toksize;
6740 while (lbp->size <= toksize)
6742 lbp->size *= 2;
6743 xrnew (lbp->buffer, lbp->size, char);
6745 lbp->len = toksize;
6748 /* Like malloc but get fatal error if memory is exhausted. */
6749 static PTR
6750 xmalloc (size)
6751 unsigned int size;
6753 PTR result = (PTR) malloc (size);
6754 if (result == NULL)
6755 fatal ("virtual memory exhausted", (char *)NULL);
6756 return result;
6759 static PTR
6760 xrealloc (ptr, size)
6761 char *ptr;
6762 unsigned int size;
6764 PTR result = (PTR) realloc (ptr, size);
6765 if (result == NULL)
6766 fatal ("virtual memory exhausted", (char *)NULL);
6767 return result;
6771 * Local Variables:
6772 * c-indentation-style: gnu
6773 * indent-tabs-mode: t
6774 * tab-width: 8
6775 * fill-column: 79
6776 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6777 * End:
6780 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6781 (do not change this comment) */
6783 /* etags.c ends here */