(regexp-opt-group): Don't cons uselessly.
[emacs.git] / lib-src / etags.c
blob5f46013153b6df1b157da9c34395fe707c5b86eb
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
3 Free Software Foundation, Inc. and Ken Arnold
5 This file is not considered part of GNU Emacs.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1992 Joseph B. Wells improved C and C++ parsing.
29 * 1993 Francesco Potortì reorganised C and C++.
30 * 1994 Regexp tags by Tom Tromey.
31 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
32 * 2002 #line directives by Francesco Potortì.
34 * Francesco Potortì <pot@gnu.org> has maintained it since 1993.
37 char pot_etags_version[] = "@(#) pot revision number is 16.26";
39 #define TRUE 1
40 #define FALSE 0
42 #ifdef DEBUG
43 # undef DEBUG
44 # define DEBUG TRUE
45 #else
46 # define DEBUG FALSE
47 # define NDEBUG /* disable assert */
48 #endif
50 #ifdef HAVE_CONFIG_H
51 # include <config.h>
52 /* On some systems, Emacs defines static as nothing for the sake
53 of unexec. We don't want that here since we don't use unexec. */
54 # undef static
55 # define ETAGS_REGEXPS /* use the regexp features */
56 # define LONG_OPTIONS /* accept long options */
57 # ifndef PTR /* for Xemacs */
58 # define PTR void *
59 # endif
60 # ifndef __P /* for Xemacs */
61 # define __P(args) args
62 # endif
63 #else
64 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
65 # define __P(args) args /* use prototypes */
66 # define PTR void * /* for generic pointers */
67 # else
68 # define __P(args) () /* no prototypes */
69 # define const /* remove const for old compilers' sake */
70 # define PTR long * /* don't use void* */
71 # endif
72 #endif /* !HAVE_CONFIG_H */
74 #ifndef _GNU_SOURCE
75 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
76 #endif
78 /* WIN32_NATIVE is for Xemacs.
79 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
80 #ifdef WIN32_NATIVE
81 # undef MSDOS
82 # undef WINDOWSNT
83 # define WINDOWSNT
84 #endif /* WIN32_NATIVE */
86 #ifdef MSDOS
87 # undef MSDOS
88 # define MSDOS TRUE
89 # include <fcntl.h>
90 # include <sys/param.h>
91 # include <io.h>
92 # ifndef HAVE_CONFIG_H
93 # define DOS_NT
94 # include <sys/config.h>
95 # endif
96 #else
97 # define MSDOS FALSE
98 #endif /* MSDOS */
100 #ifdef WINDOWSNT
101 # include <stdlib.h>
102 # include <fcntl.h>
103 # include <string.h>
104 # include <direct.h>
105 # include <io.h>
106 # define MAXPATHLEN _MAX_PATH
107 # undef HAVE_NTGUI
108 # undef DOS_NT
109 # define DOS_NT
110 # ifndef HAVE_GETCWD
111 # define HAVE_GETCWD
112 # endif /* undef HAVE_GETCWD */
113 #else /* !WINDOWSNT */
114 # ifdef STDC_HEADERS
115 # include <stdlib.h>
116 # include <string.h>
117 # else
118 extern char *getenv ();
119 # endif
120 #endif /* !WINDOWSNT */
122 #ifdef HAVE_UNISTD_H
123 # include <unistd.h>
124 #else
125 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
126 extern char *getcwd (char *buf, size_t size);
127 # endif
128 #endif /* HAVE_UNISTD_H */
130 #include <stdio.h>
131 #include <ctype.h>
132 #include <errno.h>
133 #ifndef errno
134 extern int errno;
135 #endif
136 #include <sys/types.h>
137 #include <sys/stat.h>
139 #include <assert.h>
140 #ifdef NDEBUG
141 # undef assert /* some systems have a buggy assert.h */
142 # define assert(x) ((void) 0)
143 #endif
145 #if !defined (S_ISREG) && defined (S_IFREG)
146 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
147 #endif
149 #ifdef LONG_OPTIONS
150 # include <getopt.h>
151 #else
152 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
153 extern char *optarg;
154 extern int optind, opterr;
155 #endif /* LONG_OPTIONS */
157 #ifdef ETAGS_REGEXPS
158 # ifndef HAVE_CONFIG_H /* this is a standalone compilation */
159 # ifdef __CYGWIN__ /* compiling on Cygwin */
160 !!! NOTICE !!!
161 the regex.h distributed with Cygwin is not compatible with etags, alas!
162 If you want regular expression support, you should delete this notice and
163 arrange to use the GNU regex.h and regex.c.
164 # endif
165 # endif
166 # include <regex.h>
167 #endif /* ETAGS_REGEXPS */
169 /* Define CTAGS to make the program "ctags" compatible with the usual one.
170 Leave it undefined to make the program "etags", which makes emacs-style
171 tag tables and tags typedefs, #defines and struct/union/enum by default. */
172 #ifdef CTAGS
173 # undef CTAGS
174 # define CTAGS TRUE
175 #else
176 # define CTAGS FALSE
177 #endif
179 /* Exit codes for success and failure. */
180 #ifdef VMS
181 # define GOOD 1
182 # define BAD 0
183 #else
184 # define GOOD 0
185 # define BAD 1
186 #endif
188 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
189 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
191 #define CHARS 256 /* 2^sizeof(char) */
192 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
193 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
194 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
195 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
196 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
197 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
199 #define ISALNUM(c) isalnum (CHAR(c))
200 #define ISALPHA(c) isalpha (CHAR(c))
201 #define ISDIGIT(c) isdigit (CHAR(c))
202 #define ISLOWER(c) islower (CHAR(c))
204 #define lowcase(c) tolower (CHAR(c))
205 #define upcase(c) toupper (CHAR(c))
209 * xnew, xrnew -- allocate, reallocate storage
211 * SYNOPSIS: Type *xnew (int n, Type);
212 * void xrnew (OldPointer, int n, Type);
214 #if DEBUG
215 # include "chkmalloc.h"
216 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
217 (n) * sizeof (Type)))
218 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
219 (char *) (op), (n) * sizeof (Type)))
220 #else
221 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
222 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
223 (char *) (op), (n) * sizeof (Type)))
224 #endif
226 #define bool int
228 typedef void Lang_function __P((FILE *));
230 typedef struct
232 char *suffix; /* file name suffix for this compressor */
233 char *command; /* takes one arg and decompresses to stdout */
234 } compressor;
236 typedef struct
238 char *name; /* language name */
239 bool metasource; /* source used to generate other sources */
240 Lang_function *function; /* parse function */
241 char **filenames; /* names of this language's files */
242 char **suffixes; /* name suffixes of this language's files */
243 char **interpreters; /* interpreters for this language */
244 } language;
246 typedef struct fdesc
248 struct fdesc *next; /* for the linked list */
249 char *infname; /* uncompressed input file name */
250 char *infabsname; /* absolute uncompressed input file name */
251 char *infabsdir; /* absolute dir of input file */
252 char *taggedfname; /* file name to write in tagfile */
253 language *lang; /* language of file */
254 char *prop; /* file properties to write in tagfile */
255 bool usecharno; /* etags tags shall contain char number */
256 } fdesc;
258 typedef struct node_st
259 { /* sorting structure */
260 struct node_st *left, *right; /* left and right sons */
261 fdesc *fdp; /* description of file to whom tag belongs */
262 char *name; /* tag name */
263 char *pat; /* search pattern */
264 bool valid; /* write this tag on the tag file */
265 bool is_func; /* function tag: use pattern in CTAGS mode */
266 bool been_warned; /* warning already given for duplicated tag */
267 int lno; /* line number tag is on */
268 long cno; /* character number line starts on */
269 } node;
272 * A `linebuffer' is a structure which holds a line of text.
273 * `readline_internal' reads a line from a stream into a linebuffer
274 * and works regardless of the length of the line.
275 * SIZE is the size of BUFFER, LEN is the length of the string in
276 * BUFFER after readline reads it.
278 typedef struct
280 long size;
281 int len;
282 char *buffer;
283 } linebuffer;
285 /* Used to support mixing of --lang and file names. */
286 typedef struct
288 enum {
289 at_language, /* a language specification */
290 at_regexp, /* a regular expression */
291 at_filename, /* a file name */
292 at_stdin /* read from stdin here */
293 } arg_type; /* argument type */
294 language *lang; /* language associated with the argument */
295 char *what; /* the argument itself */
296 } argument;
298 #ifdef ETAGS_REGEXPS
299 /* Structure defining a regular expression. */
300 typedef struct pattern
302 struct pattern *p_next;
303 language *lang;
304 char *regex;
305 struct re_pattern_buffer *pat;
306 struct re_registers regs;
307 char *name_pattern;
308 bool error_signaled;
309 bool ignore_case;
310 bool multi_line;
311 } pattern;
312 #endif /* ETAGS_REGEXPS */
315 /* Many compilers barf on this:
316 Lang_function Ada_funcs;
317 so let's write it this way */
318 static void Ada_funcs __P((FILE *));
319 static void Asm_labels __P((FILE *));
320 static void C_entries __P((int c_ext, FILE *));
321 static void default_C_entries __P((FILE *));
322 static void plain_C_entries __P((FILE *));
323 static void Cjava_entries __P((FILE *));
324 static void Cobol_paragraphs __P((FILE *));
325 static void Cplusplus_entries __P((FILE *));
326 static void Cstar_entries __P((FILE *));
327 static void Erlang_functions __P((FILE *));
328 static void Fortran_functions __P((FILE *));
329 static void Yacc_entries __P((FILE *));
330 static void Lisp_functions __P((FILE *));
331 static void Makefile_targets __P((FILE *));
332 static void Pascal_functions __P((FILE *));
333 static void Perl_functions __P((FILE *));
334 static void PHP_functions __P((FILE *));
335 static void Postscript_functions __P((FILE *));
336 static void Prolog_functions __P((FILE *));
337 static void Python_functions __P((FILE *));
338 static void Scheme_functions __P((FILE *));
339 static void TeX_commands __P((FILE *));
340 static void Texinfo_nodes __P((FILE *));
341 static void just_read_file __P((FILE *));
343 static void print_language_names __P((void));
344 static void print_version __P((void));
345 static void print_help __P((void));
346 int main __P((int, char **));
348 static compressor *get_compressor_from_suffix __P((char *, char **));
349 static language *get_language_from_langname __P((const char *));
350 static language *get_language_from_interpreter __P((char *));
351 static language *get_language_from_filename __P((char *, bool));
352 static void readline __P((linebuffer *, FILE *));
353 static long readline_internal __P((linebuffer *, FILE *));
354 static bool nocase_tail __P((char *));
355 static char *get_tag __P((char *));
357 #ifdef ETAGS_REGEXPS
358 static void analyse_regex __P((char *));
359 static void free_patterns __P((void));
360 static void regex_tag_multiline __P((void));
361 #endif /* ETAGS_REGEXPS */
362 static void error __P((const char *, const char *));
363 static void suggest_asking_for_help __P((void));
364 void fatal __P((char *, char *));
365 static void pfatal __P((char *));
366 static void add_node __P((node *, node **));
368 static void init __P((void));
369 static void initbuffer __P((linebuffer *));
370 static void process_file_name __P((char *, language *));
371 static void process_file __P((FILE *, char *, language *));
372 static void find_entries __P((FILE *));
373 static void free_tree __P((node *));
374 static void free_fdesc __P((fdesc *));
375 static void pfnote __P((char *, bool, char *, int, int, long));
376 static void new_pfnote __P((char *, int, bool, char *, int, int, long));
377 static void invalidate_nodes __P((fdesc *, node **));
378 static void put_entries __P((node *));
380 static char *concat __P((char *, char *, char *));
381 static char *skip_spaces __P((char *));
382 static char *skip_non_spaces __P((char *));
383 static char *savenstr __P((char *, int));
384 static char *savestr __P((char *));
385 static char *etags_strchr __P((const char *, int));
386 static char *etags_strrchr __P((const char *, int));
387 static bool strcaseeq __P((const char *, const char *));
388 static char *etags_getcwd __P((void));
389 static char *relative_filename __P((char *, char *));
390 static char *absolute_filename __P((char *, char *));
391 static char *absolute_dirname __P((char *, char *));
392 static bool filename_is_absolute __P((char *f));
393 static void canonicalize_filename __P((char *));
394 static void linebuffer_setlen __P((linebuffer *, int));
395 static PTR xmalloc __P((unsigned int));
396 static PTR xrealloc __P((char *, unsigned int));
399 static char searchar = '/'; /* use /.../ searches */
401 static char *tagfile; /* output file */
402 static char *progname; /* name this program was invoked with */
403 static char *cwd; /* current working directory */
404 static char *tagfiledir; /* directory of tagfile */
405 static FILE *tagf; /* ioptr for tags file */
407 static fdesc *fdhead; /* head of file description list */
408 static fdesc *curfdp; /* current file description */
409 static int lineno; /* line number of current line */
410 static long charno; /* current character number */
411 static long linecharno; /* charno of start of current line */
412 static char *dbp; /* pointer to start of current tag */
414 static const int invalidcharno = -1;
416 static node *nodehead; /* the head of the binary tree of tags */
417 static node *last_node; /* the last node created */
419 static linebuffer lb; /* the current line */
420 static linebuffer filebuf; /* a buffer containing the whole file */
422 /* boolean "functions" (see init) */
423 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
424 static char
425 /* white chars */
426 *white = " \f\t\n\r\v",
427 /* not in a name */
428 *nonam = " \f\t\n\r()=,;",
429 /* token ending chars */
430 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
431 /* token starting chars */
432 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
433 /* valid in-token chars */
434 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
436 static bool append_to_tagfile; /* -a: append to tags */
437 /* The next four default to TRUE for etags, but to FALSE for ctags. */
438 static bool typedefs; /* -t: create tags for C and Ada typedefs */
439 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
440 /* 0 struct/enum/union decls, and C++ */
441 /* member functions. */
442 static bool constantypedefs; /* -d: create tags for C #define, enum */
443 /* constants and variables. */
444 /* -D: opposite of -d. Default under ctags. */
445 static bool globals; /* create tags for global variables */
446 static bool declarations; /* --declarations: tag them and extern in C&Co*/
447 static bool members; /* create tags for C member variables */
448 static bool no_line_directive; /* ignore #line directives (undocumented) */
449 static bool update; /* -u: update tags */
450 static bool vgrind_style; /* -v: create vgrind style index output */
451 static bool no_warnings; /* -w: suppress warnings */
452 static bool cxref_style; /* -x: create cxref style output */
453 static bool cplusplus; /* .[hc] means C++, not C */
454 static bool noindentypedefs; /* -I: ignore indentation in C */
455 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
457 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
458 static bool parsing_stdin; /* --parse-stdin used */
460 #ifdef ETAGS_REGEXPS
461 static pattern *p_head; /* list of all regexps */
462 static bool need_filebuf; /* some regexes are multi-line */
463 #else
464 # define need_filebuf FALSE
465 #endif /* ETAGS_REGEXPS */
467 #ifdef LONG_OPTIONS
468 static struct option longopts[] =
470 { "packages-only", no_argument, &packages_only, TRUE },
471 { "c++", no_argument, NULL, 'C' },
472 { "declarations", no_argument, &declarations, TRUE },
473 { "no-line-directive", no_argument, &no_line_directive, TRUE },
474 { "help", no_argument, NULL, 'h' },
475 { "help", no_argument, NULL, 'H' },
476 { "ignore-indentation", no_argument, NULL, 'I' },
477 { "language", required_argument, NULL, 'l' },
478 { "members", no_argument, &members, TRUE },
479 { "no-members", no_argument, &members, FALSE },
480 { "output", required_argument, NULL, 'o' },
481 #ifdef ETAGS_REGEXPS
482 { "regex", required_argument, NULL, 'r' },
483 { "no-regex", no_argument, NULL, 'R' },
484 { "ignore-case-regex", required_argument, NULL, 'c' },
485 #endif /* ETAGS_REGEXPS */
486 { "parse-stdin", required_argument, NULL, STDIN },
487 { "version", no_argument, NULL, 'V' },
489 #if CTAGS /* Etags options */
490 { "backward-search", no_argument, NULL, 'B' },
491 { "cxref", no_argument, NULL, 'x' },
492 { "defines", no_argument, NULL, 'd' },
493 { "globals", no_argument, &globals, TRUE },
494 { "typedefs", no_argument, NULL, 't' },
495 { "typedefs-and-c++", no_argument, NULL, 'T' },
496 { "update", no_argument, NULL, 'u' },
497 { "vgrind", no_argument, NULL, 'v' },
498 { "no-warn", no_argument, NULL, 'w' },
500 #else /* Ctags options */
501 { "append", no_argument, NULL, 'a' },
502 { "no-defines", no_argument, NULL, 'D' },
503 { "no-globals", no_argument, &globals, FALSE },
504 { "include", required_argument, NULL, 'i' },
505 #endif
506 { NULL }
508 #endif /* LONG_OPTIONS */
510 static compressor compressors[] =
512 { "z", "gzip -d -c"},
513 { "Z", "gzip -d -c"},
514 { "gz", "gzip -d -c"},
515 { "GZ", "gzip -d -c"},
516 { "bz2", "bzip2 -d -c" },
517 { NULL }
521 * Language stuff.
524 /* Ada code */
525 static char *Ada_suffixes [] =
526 { "ads", "adb", "ada", NULL };
528 /* Assembly code */
529 static char *Asm_suffixes [] =
530 { "a", /* Unix assembler */
531 "asm", /* Microcontroller assembly */
532 "def", /* BSO/Tasking definition includes */
533 "inc", /* Microcontroller include files */
534 "ins", /* Microcontroller include files */
535 "s", "sa", /* Unix assembler */
536 "S", /* cpp-processed Unix assembler */
537 "src", /* BSO/Tasking C compiler output */
538 NULL
541 /* Note that .c and .h can be considered C++, if the --c++ flag was
542 given, or if the `class' keyowrd is met inside the file.
543 That is why default_C_entries is called for these. */
544 static char *default_C_suffixes [] =
545 { "c", "h", NULL };
547 static char *Cplusplus_suffixes [] =
548 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
549 "M", /* Objective C++ */
550 "pdb", /* Postscript with C syntax */
551 NULL };
553 static char *Cjava_suffixes [] =
554 { "java", NULL };
556 static char *Cobol_suffixes [] =
557 { "COB", "cob", NULL };
559 static char *Cstar_suffixes [] =
560 { "cs", "hs", NULL };
562 static char *Erlang_suffixes [] =
563 { "erl", "hrl", NULL };
565 static char *Fortran_suffixes [] =
566 { "F", "f", "f90", "for", NULL };
568 static char *Lisp_suffixes [] =
569 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
571 static char *Makefile_filenames [] =
572 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
574 static char *Pascal_suffixes [] =
575 { "p", "pas", NULL };
577 static char *Perl_suffixes [] =
578 { "pl", "pm", NULL };
580 static char *Perl_interpreters [] =
581 { "perl", "@PERL@", NULL };
583 static char *PHP_suffixes [] =
584 { "php", "php3", "php4", NULL };
586 static char *plain_C_suffixes [] =
587 { "lm", /* Objective lex file */
588 "m", /* Objective C file */
589 "pc", /* Pro*C file */
590 NULL };
592 static char *Postscript_suffixes [] =
593 { "ps", "psw", NULL }; /* .psw is for PSWrap */
595 static char *Prolog_suffixes [] =
596 { "prolog", NULL };
598 static char *Python_suffixes [] =
599 { "py", NULL };
601 /* Can't do the `SCM' or `scm' prefix with a version number. */
602 static char *Scheme_suffixes [] =
603 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
605 static char *TeX_suffixes [] =
606 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
608 static char *Texinfo_suffixes [] =
609 { "texi", "texinfo", "txi", NULL };
611 static char *Yacc_suffixes [] =
612 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
615 * Table of languages.
617 * It is ok for a given function to be listed under more than one
618 * name. I just didn't.
621 static language lang_names [] =
623 { "ada", FALSE, Ada_funcs, NULL, Ada_suffixes, NULL },
624 { "asm", FALSE, Asm_labels, NULL, Asm_suffixes, NULL },
625 { "c", FALSE, default_C_entries, NULL, default_C_suffixes, NULL },
626 { "c++", FALSE, Cplusplus_entries, NULL, Cplusplus_suffixes, NULL },
627 { "c*", FALSE, Cstar_entries, NULL, Cstar_suffixes, NULL },
628 { "cobol", FALSE, Cobol_paragraphs, NULL, Cobol_suffixes, NULL },
629 { "erlang", FALSE, Erlang_functions, NULL, Erlang_suffixes, NULL },
630 { "fortran", FALSE, Fortran_functions, NULL, Fortran_suffixes, NULL },
631 { "java", FALSE, Cjava_entries, NULL, Cjava_suffixes, NULL },
632 { "lisp", FALSE, Lisp_functions, NULL, Lisp_suffixes, NULL },
633 { "makefile", FALSE, Makefile_targets, Makefile_filenames, NULL, NULL },
634 { "pascal", FALSE, Pascal_functions, NULL, Pascal_suffixes, NULL },
635 { "perl", FALSE, Perl_functions,NULL, Perl_suffixes, Perl_interpreters },
636 { "php", FALSE, PHP_functions, NULL, PHP_suffixes, NULL },
637 { "postscript",FALSE, Postscript_functions,NULL, Postscript_suffixes, NULL },
638 { "proc", FALSE, plain_C_entries, NULL, plain_C_suffixes, NULL },
639 { "prolog", FALSE, Prolog_functions, NULL, Prolog_suffixes, NULL },
640 { "python", FALSE, Python_functions, NULL, Python_suffixes, NULL },
641 { "scheme", FALSE, Scheme_functions, NULL, Scheme_suffixes, NULL },
642 { "tex", FALSE, TeX_commands, NULL, TeX_suffixes, NULL },
643 { "texinfo", FALSE, Texinfo_nodes, NULL, Texinfo_suffixes, NULL },
644 { "yacc", TRUE, Yacc_entries, NULL, Yacc_suffixes, NULL },
645 { "auto", FALSE, NULL }, /* default guessing scheme */
646 { "none", FALSE, just_read_file }, /* regexp matching only */
647 { NULL, FALSE, NULL } /* end of list */
651 static void
652 print_language_names ()
654 language *lang;
655 char **name, **ext;
657 puts ("\nThese are the currently supported languages, along with the\n\
658 default file names and dot suffixes:");
659 for (lang = lang_names; lang->name != NULL; lang++)
661 printf (" %-*s", 10, lang->name);
662 if (lang->filenames != NULL)
663 for (name = lang->filenames; *name != NULL; name++)
664 printf (" %s", *name);
665 if (lang->suffixes != NULL)
666 for (ext = lang->suffixes; *ext != NULL; ext++)
667 printf (" .%s", *ext);
668 puts ("");
670 puts ("Where `auto' means use default language for files based on file\n\
671 name suffix, and `none' means only do regexp processing on files.\n\
672 If no language is specified and no matching suffix is found,\n\
673 the first line of the file is read for a sharp-bang (#!) sequence\n\
674 followed by the name of an interpreter. If no such sequence is found,\n\
675 Fortran is tried first; if no tags are found, C is tried next.\n\
676 When parsing any C file, a \"class\" keyword switches to C++.\n\
677 Compressed files are supported using gzip and bzip2.");
680 #ifndef EMACS_NAME
681 # define EMACS_NAME "standalone"
682 #endif
683 #ifndef VERSION
684 # define VERSION "version"
685 #endif
686 static void
687 print_version ()
689 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
690 puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
691 puts ("This program is distributed under the same terms as Emacs");
693 exit (GOOD);
696 static void
697 print_help ()
699 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
701 These are the options accepted by %s.\n", progname, progname);
702 #ifdef LONG_OPTIONS
703 puts ("You may use unambiguous abbreviations for the long option names.");
704 #else
705 puts ("Long option names do not work with this executable, as it is not\n\
706 linked with GNU getopt.");
707 #endif /* LONG_OPTIONS */
708 puts (" A - as file name means read names from stdin (one per line).\n\
709 Absolute names are stored in the output file as they are.\n\
710 Relative ones are stored relative to the output file's directory.\n");
712 if (!CTAGS)
713 puts ("-a, --append\n\
714 Append tag entries to existing tags file.");
716 puts ("--packages-only\n\
717 For Ada files, only generate tags for packages.");
719 if (CTAGS)
720 puts ("-B, --backward-search\n\
721 Write the search commands for the tag entries using '?', the\n\
722 backward-search command instead of '/', the forward-search command.");
724 /* This option is mostly obsolete, because etags can now automatically
725 detect C++. Retained for backward compatibility and for debugging and
726 experimentation. In principle, we could want to tag as C++ even
727 before any "class" keyword.
728 puts ("-C, --c++\n\
729 Treat files whose name suffix defaults to C language as C++ files.");
732 puts ("--declarations\n\
733 In C and derived languages, create tags for function declarations,");
734 if (CTAGS)
735 puts ("\tand create tags for extern variables if --globals is used.");
736 else
737 puts
738 ("\tand create tags for extern variables unless --no-globals is used.");
740 if (CTAGS)
741 puts ("-d, --defines\n\
742 Create tag entries for C #define constants and enum constants, too.");
743 else
744 puts ("-D, --no-defines\n\
745 Don't create tag entries for C #define constants and enum constants.\n\
746 This makes the tags file smaller.");
748 if (!CTAGS)
749 puts ("-i FILE, --include=FILE\n\
750 Include a note in tag file indicating that, when searching for\n\
751 a tag, one should also consult the tags file FILE after\n\
752 checking the current file.");
754 puts ("-l LANG, --language=LANG\n\
755 Force the following files to be considered as written in the\n\
756 named language up to the next --language=LANG option.");
758 if (CTAGS)
759 puts ("--globals\n\
760 Create tag entries for global variables in some languages.");
761 else
762 puts ("--no-globals\n\
763 Do not create tag entries for global variables in some\n\
764 languages. This makes the tags file smaller.");
765 puts ("--members\n\
766 Create tag entries for member variables in C and derived languages.");
768 #ifdef ETAGS_REGEXPS
769 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
770 Make a tag for each line matching the regular expression pattern\n\
771 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
772 files only. REGEXFILE is a file containing one REGEXP per line.\n\
773 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
774 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
775 puts (" If TAGNAME/ is present, the tags created are named.\n\
776 For example Tcl named tags can be created with:\n\
777 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
778 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
779 `m' means to allow multi-line matches, `s' implies `m' and\n\
780 causes dot to match the newline character as well.");
781 puts ("-R, --no-regex\n\
782 Don't create tags from regexps for the following files.");
783 #endif /* ETAGS_REGEXPS */
784 puts ("-I, --ignore-indentation\n\
785 Don't rely on indentation quite as much as normal. Currently,\n\
786 this means not to assume that a closing brace in the first\n\
787 column is the final brace of a function or structure\n\
788 definition in C and C++.");
789 puts ("-o FILE, --output=FILE\n\
790 Write the tags to FILE.");
791 puts ("--parse-stdin=NAME\n\
792 Read from standard input and record tags as belonging to file NAME.");
794 if (CTAGS)
796 puts ("-t, --typedefs\n\
797 Generate tag entries for C and Ada typedefs.");
798 puts ("-T, --typedefs-and-c++\n\
799 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
800 and C++ member functions.");
803 if (CTAGS)
804 puts ("-u, --update\n\
805 Update the tag entries for the given files, leaving tag\n\
806 entries for other files in place. Currently, this is\n\
807 implemented by deleting the existing entries for the given\n\
808 files and then rewriting the new entries at the end of the\n\
809 tags file. It is often faster to simply rebuild the entire\n\
810 tag file than to use this.");
812 if (CTAGS)
814 puts ("-v, --vgrind\n\
815 Generates an index of items intended for human consumption,\n\
816 similar to the output of vgrind. The index is sorted, and\n\
817 gives the page number of each item.");
818 puts ("-w, --no-warn\n\
819 Suppress warning messages about entries defined in multiple\n\
820 files.");
821 puts ("-x, --cxref\n\
822 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
823 The output uses line numbers instead of page numbers, but\n\
824 beyond that the differences are cosmetic; try both to see\n\
825 which you like.");
828 puts ("-V, --version\n\
829 Print the version of the program.\n\
830 -h, --help\n\
831 Print this help message.");
833 print_language_names ();
835 puts ("");
836 puts ("Report bugs to bug-gnu-emacs@gnu.org");
838 exit (GOOD);
842 #ifdef VMS /* VMS specific functions */
844 #define EOS '\0'
846 /* This is a BUG! ANY arbitrary limit is a BUG!
847 Won't someone please fix this? */
848 #define MAX_FILE_SPEC_LEN 255
849 typedef struct {
850 short curlen;
851 char body[MAX_FILE_SPEC_LEN + 1];
852 } vspec;
855 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
856 returning in each successive call the next file name matching the input
857 spec. The function expects that each in_spec passed
858 to it will be processed to completion; in particular, up to and
859 including the call following that in which the last matching name
860 is returned, the function ignores the value of in_spec, and will
861 only start processing a new spec with the following call.
862 If an error occurs, on return out_spec contains the value
863 of in_spec when the error occurred.
865 With each successive file name returned in out_spec, the
866 function's return value is one. When there are no more matching
867 names the function returns zero. If on the first call no file
868 matches in_spec, or there is any other error, -1 is returned.
871 #include <rmsdef.h>
872 #include <descrip.h>
873 #define OUTSIZE MAX_FILE_SPEC_LEN
874 static short
875 fn_exp (out, in)
876 vspec *out;
877 char *in;
879 static long context = 0;
880 static struct dsc$descriptor_s o;
881 static struct dsc$descriptor_s i;
882 static bool pass1 = TRUE;
883 long status;
884 short retval;
886 if (pass1)
888 pass1 = FALSE;
889 o.dsc$a_pointer = (char *) out;
890 o.dsc$w_length = (short)OUTSIZE;
891 i.dsc$a_pointer = in;
892 i.dsc$w_length = (short)strlen(in);
893 i.dsc$b_dtype = DSC$K_DTYPE_T;
894 i.dsc$b_class = DSC$K_CLASS_S;
895 o.dsc$b_dtype = DSC$K_DTYPE_VT;
896 o.dsc$b_class = DSC$K_CLASS_VS;
898 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
900 out->body[out->curlen] = EOS;
901 return 1;
903 else if (status == RMS$_NMF)
904 retval = 0;
905 else
907 strcpy(out->body, in);
908 retval = -1;
910 lib$find_file_end(&context);
911 pass1 = TRUE;
912 return retval;
916 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
917 name of each file specified by the provided arg expanding wildcards.
919 static char *
920 gfnames (arg, p_error)
921 char *arg;
922 bool *p_error;
924 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
926 switch (fn_exp (&filename, arg))
928 case 1:
929 *p_error = FALSE;
930 return filename.body;
931 case 0:
932 *p_error = FALSE;
933 return NULL;
934 default:
935 *p_error = TRUE;
936 return filename.body;
940 #ifndef OLD /* Newer versions of VMS do provide `system'. */
941 system (cmd)
942 char *cmd;
944 error ("%s", "system() function not implemented under VMS");
946 #endif
948 #define VERSION_DELIM ';'
949 char *massage_name (s)
950 char *s;
952 char *start = s;
954 for ( ; *s; s++)
955 if (*s == VERSION_DELIM)
957 *s = EOS;
958 break;
960 else
961 *s = lowcase (*s);
962 return start;
964 #endif /* VMS */
968 main (argc, argv)
969 int argc;
970 char *argv[];
972 int i;
973 unsigned int nincluded_files;
974 char **included_files;
975 argument *argbuffer;
976 int current_arg, file_count;
977 linebuffer filename_lb;
978 #ifdef VMS
979 bool got_err;
980 #endif
981 char *optstring;
982 int opt;
985 #ifdef DOS_NT
986 _fmode = O_BINARY; /* all of files are treated as binary files */
987 #endif /* DOS_NT */
989 progname = argv[0];
990 nincluded_files = 0;
991 included_files = xnew (argc, char *);
992 current_arg = 0;
993 file_count = 0;
995 /* Allocate enough no matter what happens. Overkill, but each one
996 is small. */
997 argbuffer = xnew (argc, argument);
1000 * If etags, always find typedefs and structure tags. Why not?
1001 * Also default to find macro constants, enum constants and
1002 * global variables.
1004 if (!CTAGS)
1006 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1007 globals = TRUE;
1010 optstring = "-";
1011 #ifdef ETAGS_REGEXPS
1012 optstring = "-r:Rc:";
1013 #endif /* ETAGS_REGEXPS */
1014 #ifndef LONG_OPTIONS
1015 optstring = optstring + 1;
1016 #endif /* LONG_OPTIONS */
1017 optstring = concat (optstring,
1018 "Cf:Il:o:SVhH",
1019 (CTAGS) ? "BxdtTuvw" : "aDi:");
1021 while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
1022 switch (opt)
1024 case 0:
1025 /* If getopt returns 0, then it has already processed a
1026 long-named option. We should do nothing. */
1027 break;
1029 case 1:
1030 /* This means that a file name has been seen. Record it. */
1031 argbuffer[current_arg].arg_type = at_filename;
1032 argbuffer[current_arg].what = optarg;
1033 ++current_arg;
1034 ++file_count;
1035 break;
1037 case STDIN:
1038 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1039 argbuffer[current_arg].arg_type = at_stdin;
1040 argbuffer[current_arg].what = optarg;
1041 ++current_arg;
1042 ++file_count;
1043 if (parsing_stdin)
1044 fatal ("cannot parse standard input more than once", (char *)NULL);
1045 parsing_stdin = TRUE;
1046 break;
1048 /* Common options. */
1049 case 'C': cplusplus = TRUE; break;
1050 case 'f': /* for compatibility with old makefiles */
1051 case 'o':
1052 if (tagfile)
1054 error ("-o option may only be given once.", (char *)NULL);
1055 suggest_asking_for_help ();
1057 tagfile = optarg;
1058 break;
1059 case 'I':
1060 case 'S': /* for backward compatibility */
1061 noindentypedefs = TRUE;
1062 break;
1063 case 'l':
1065 language *lang = get_language_from_langname (optarg);
1066 if (lang != NULL)
1068 argbuffer[current_arg].lang = lang;
1069 argbuffer[current_arg].arg_type = at_language;
1070 ++current_arg;
1073 break;
1074 case 'c':
1075 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1076 optarg = concat (optarg, "i", ""); /* memory leak here */
1077 /* FALLTHRU */
1078 case 'r':
1079 argbuffer[current_arg].arg_type = at_regexp;
1080 argbuffer[current_arg].what = optarg;
1081 ++current_arg;
1082 break;
1083 case 'R':
1084 argbuffer[current_arg].arg_type = at_regexp;
1085 argbuffer[current_arg].what = NULL;
1086 ++current_arg;
1087 break;
1088 case 'V':
1089 print_version ();
1090 break;
1091 case 'h':
1092 case 'H':
1093 print_help ();
1094 break;
1096 /* Etags options */
1097 case 'a': append_to_tagfile = TRUE; break;
1098 case 'D': constantypedefs = FALSE; break;
1099 case 'i': included_files[nincluded_files++] = optarg; break;
1101 /* Ctags options. */
1102 case 'B': searchar = '?'; break;
1103 case 'd': constantypedefs = TRUE; break;
1104 case 't': typedefs = TRUE; break;
1105 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1106 case 'u': update = TRUE; break;
1107 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1108 case 'x': cxref_style = TRUE; break;
1109 case 'w': no_warnings = TRUE; break;
1110 default:
1111 suggest_asking_for_help ();
1114 for (; optind < argc; ++optind)
1116 argbuffer[current_arg].arg_type = at_filename;
1117 argbuffer[current_arg].what = argv[optind];
1118 ++current_arg;
1119 ++file_count;
1122 if (nincluded_files == 0 && file_count == 0)
1124 error ("no input files specified.", (char *)NULL);
1125 suggest_asking_for_help ();
1128 if (tagfile == NULL)
1129 tagfile = CTAGS ? "tags" : "TAGS";
1130 cwd = etags_getcwd (); /* the current working directory */
1131 if (cwd[strlen (cwd) - 1] != '/')
1133 char *oldcwd = cwd;
1134 cwd = concat (oldcwd, "/", "");
1135 free (oldcwd);
1137 if (streq (tagfile, "-"))
1138 tagfiledir = cwd;
1139 else
1140 tagfiledir = absolute_dirname (tagfile, cwd);
1142 init (); /* set up boolean "functions" */
1144 initbuffer (&lb);
1145 initbuffer (&filename_lb);
1146 initbuffer (&filebuf);
1148 if (!CTAGS)
1150 if (streq (tagfile, "-"))
1152 tagf = stdout;
1153 #ifdef DOS_NT
1154 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1155 doesn't take effect until after `stdout' is already open). */
1156 if (!isatty (fileno (stdout)))
1157 setmode (fileno (stdout), O_BINARY);
1158 #endif /* DOS_NT */
1160 else
1161 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1162 if (tagf == NULL)
1163 pfatal (tagfile);
1167 * Loop through files finding functions.
1169 for (i = 0; i < current_arg; ++i)
1171 static language *lang; /* non-NULL if language is forced */
1172 char *this_file;
1174 switch (argbuffer[i].arg_type)
1176 case at_language:
1177 lang = argbuffer[i].lang;
1178 break;
1179 #ifdef ETAGS_REGEXPS
1180 case at_regexp:
1181 analyse_regex (argbuffer[i].what);
1182 break;
1183 #endif
1184 case at_filename:
1185 #ifdef VMS
1186 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1188 if (got_err)
1190 error ("can't find file %s\n", this_file);
1191 argc--, argv++;
1193 else
1195 this_file = massage_name (this_file);
1197 #else
1198 this_file = argbuffer[i].what;
1199 #endif
1200 /* Input file named "-" means read file names from stdin
1201 (one per line) and use them. */
1202 if (streq (this_file, "-"))
1204 if (parsing_stdin)
1205 fatal ("cannot parse standard input AND read file names from it",
1206 (char *)NULL);
1207 while (readline_internal (&filename_lb, stdin) > 0)
1208 process_file_name (filename_lb.buffer, lang);
1210 else
1211 process_file_name (this_file, lang);
1212 #ifdef VMS
1214 #endif
1215 break;
1216 case at_stdin:
1217 this_file = argbuffer[i].what;
1218 process_file (stdin, this_file, lang);
1219 break;
1223 #ifdef ETAGS_REGEXPS
1224 free_patterns ();
1225 #endif /* ETAGS_REGEXPS */
1226 free (filebuf.buffer);
1228 if (!CTAGS || cxref_style)
1230 put_entries (nodehead); /* write the remainig tags (ETAGS) */
1231 free_tree (nodehead);
1232 nodehead = NULL;
1233 if (!CTAGS)
1234 while (nincluded_files-- > 0)
1235 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1237 if (fclose (tagf) == EOF)
1238 pfatal (tagfile);
1239 exit (GOOD);
1242 if (update)
1244 char cmd[BUFSIZ];
1245 for (i = 0; i < current_arg; ++i)
1247 switch (argbuffer[i].arg_type)
1249 case at_filename:
1250 case at_stdin:
1251 break;
1252 default:
1253 continue; /* the for loop */
1255 sprintf (cmd,
1256 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1257 tagfile, argbuffer[i].what, tagfile);
1258 if (system (cmd) != GOOD)
1259 fatal ("failed to execute shell command", (char *)NULL);
1261 append_to_tagfile = TRUE;
1264 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1265 if (tagf == NULL)
1266 pfatal (tagfile);
1267 put_entries (nodehead); /* write all the tags (CTAGS) */
1268 free_tree (nodehead);
1269 nodehead = NULL;
1270 if (fclose (tagf) == EOF)
1271 pfatal (tagfile);
1273 if (update)
1275 char cmd[2*BUFSIZ+10];
1276 sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1277 exit (system (cmd));
1279 return GOOD;
1284 * Return a compressor given the file name. If EXTPTR is non-zero,
1285 * return a pointer into FILE where the compressor-specific
1286 * extension begins. If no compressor is found, NULL is returned
1287 * and EXTPTR is not significant.
1288 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1290 static compressor *
1291 get_compressor_from_suffix (file, extptr)
1292 char *file;
1293 char **extptr;
1295 compressor *compr;
1296 char *slash, *suffix;
1298 /* This relies on FN to be after canonicalize_filename,
1299 so we don't need to consider backslashes on DOS_NT. */
1300 slash = etags_strrchr (file, '/');
1301 suffix = etags_strrchr (file, '.');
1302 if (suffix == NULL || suffix < slash)
1303 return NULL;
1304 if (extptr != NULL)
1305 *extptr = suffix;
1306 suffix += 1;
1307 /* Let those poor souls who live with DOS 8+3 file name limits get
1308 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1309 Only the first do loop is run if not MSDOS */
1312 for (compr = compressors; compr->suffix != NULL; compr++)
1313 if (streq (compr->suffix, suffix))
1314 return compr;
1315 if (!MSDOS)
1316 break; /* do it only once: not really a loop */
1317 if (extptr != NULL)
1318 *extptr = ++suffix;
1319 } while (*suffix != '\0');
1320 return NULL;
1326 * Return a language given the name.
1328 static language *
1329 get_language_from_langname (name)
1330 const char *name;
1332 language *lang;
1334 if (name == NULL)
1335 error ("empty language name", (char *)NULL);
1336 else
1338 for (lang = lang_names; lang->name != NULL; lang++)
1339 if (streq (name, lang->name))
1340 return lang;
1341 error ("unknown language \"%s\"", name);
1344 return NULL;
1349 * Return a language given the interpreter name.
1351 static language *
1352 get_language_from_interpreter (interpreter)
1353 char *interpreter;
1355 language *lang;
1356 char **iname;
1358 if (interpreter == NULL)
1359 return NULL;
1360 for (lang = lang_names; lang->name != NULL; lang++)
1361 if (lang->interpreters != NULL)
1362 for (iname = lang->interpreters; *iname != NULL; iname++)
1363 if (streq (*iname, interpreter))
1364 return lang;
1366 return NULL;
1372 * Return a language given the file name.
1374 static language *
1375 get_language_from_filename (file, case_sensitive)
1376 char *file;
1377 bool case_sensitive;
1379 language *lang;
1380 char **name, **ext, *suffix;
1382 /* Try whole file name first. */
1383 for (lang = lang_names; lang->name != NULL; lang++)
1384 if (lang->filenames != NULL)
1385 for (name = lang->filenames; *name != NULL; name++)
1386 if ((case_sensitive)
1387 ? streq (*name, file)
1388 : strcaseeq (*name, file))
1389 return lang;
1391 /* If not found, try suffix after last dot. */
1392 suffix = etags_strrchr (file, '.');
1393 if (suffix == NULL)
1394 return NULL;
1395 suffix += 1;
1396 for (lang = lang_names; lang->name != NULL; lang++)
1397 if (lang->suffixes != NULL)
1398 for (ext = lang->suffixes; *ext != NULL; ext++)
1399 if ((case_sensitive)
1400 ? streq (*ext, suffix)
1401 : strcaseeq (*ext, suffix))
1402 return lang;
1403 return NULL;
1408 * This routine is called on each file argument.
1410 static void
1411 process_file_name (file, lang)
1412 char *file;
1413 language *lang;
1415 struct stat stat_buf;
1416 FILE *inf;
1417 fdesc *fdp;
1418 compressor *compr;
1419 char *compressed_name, *uncompressed_name;
1420 char *ext, *real_name;
1421 int retval;
1423 canonicalize_filename (file);
1424 if (streq (file, tagfile) && !streq (tagfile, "-"))
1426 error ("skipping inclusion of %s in self.", file);
1427 return;
1429 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1431 compressed_name = NULL;
1432 real_name = uncompressed_name = savestr (file);
1434 else
1436 real_name = compressed_name = savestr (file);
1437 uncompressed_name = savenstr (file, ext - file);
1440 /* If the canonicalized uncompressed name
1441 has already been dealt with, skip it silently. */
1442 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1444 assert (fdp->infname != NULL);
1445 if (streq (uncompressed_name, fdp->infname))
1446 goto cleanup;
1449 if (stat (real_name, &stat_buf) != 0)
1451 /* Reset real_name and try with a different name. */
1452 real_name = NULL;
1453 if (compressed_name != NULL) /* try with the given suffix */
1455 if (stat (uncompressed_name, &stat_buf) == 0)
1456 real_name = uncompressed_name;
1458 else /* try all possible suffixes */
1460 for (compr = compressors; compr->suffix != NULL; compr++)
1462 compressed_name = concat (file, ".", compr->suffix);
1463 if (stat (compressed_name, &stat_buf) != 0)
1465 if (MSDOS)
1467 char *suf = compressed_name + strlen (file);
1468 size_t suflen = strlen (compr->suffix) + 1;
1469 for ( ; suf[1]; suf++, suflen--)
1471 memmove (suf, suf + 1, suflen);
1472 if (stat (compressed_name, &stat_buf) == 0)
1474 real_name = compressed_name;
1475 break;
1478 if (real_name != NULL)
1479 break;
1480 } /* MSDOS */
1481 free (compressed_name);
1482 compressed_name = NULL;
1484 else
1486 real_name = compressed_name;
1487 break;
1491 if (real_name == NULL)
1493 perror (file);
1494 goto cleanup;
1496 } /* try with a different name */
1498 if (!S_ISREG (stat_buf.st_mode))
1500 error ("skipping %s: it is not a regular file.", real_name);
1501 goto cleanup;
1503 if (real_name == compressed_name)
1505 char *cmd = concat (compr->command, " ", real_name);
1506 inf = (FILE *) popen (cmd, "r");
1507 free (cmd);
1509 else
1510 inf = fopen (real_name, "r");
1511 if (inf == NULL)
1513 perror (real_name);
1514 goto cleanup;
1517 process_file (inf, uncompressed_name, lang);
1519 if (real_name == compressed_name)
1520 retval = pclose (inf);
1521 else
1522 retval = fclose (inf);
1523 if (retval < 0)
1524 pfatal (file);
1526 cleanup:
1527 if (compressed_name) free (compressed_name);
1528 if (uncompressed_name) free (uncompressed_name);
1529 last_node = NULL;
1530 curfdp = NULL;
1531 return;
1534 static void
1535 process_file (fh, fn, lang)
1536 FILE *fh;
1537 char *fn;
1538 language *lang;
1540 static const fdesc emptyfdesc;
1541 fdesc *fdp;
1543 /* Create a new input file description entry. */
1544 fdp = xnew (1, fdesc);
1545 *fdp = emptyfdesc;
1546 fdp->next = fdhead;
1547 fdp->infname = savestr (fn);
1548 fdp->lang = lang;
1549 fdp->infabsname = absolute_filename (fn, cwd);
1550 fdp->infabsdir = absolute_dirname (fn, cwd);
1551 if (filename_is_absolute (fn))
1553 /* An absolute file name. Canonicalize it. */
1554 fdp->taggedfname = absolute_filename (fn, NULL);
1556 else
1558 /* A file name relative to cwd. Make it relative
1559 to the directory of the tags file. */
1560 fdp->taggedfname = relative_filename (fn, tagfiledir);
1562 fdp->usecharno = TRUE; /* use char position when making tags */
1563 fdp->prop = NULL;
1565 fdhead = fdp;
1566 curfdp = fdhead; /* the current file description */
1568 find_entries (fh);
1570 /* If not Ctags, and if this is not metasource and if it contained no #line
1571 directives, we can write the tags and free all nodes pointing to
1572 curfdp. */
1573 if (!CTAGS
1574 && curfdp->usecharno /* no #line directives in this file */
1575 && !curfdp->lang->metasource)
1577 node *np, *prev;
1579 /* Look for the head of the sublist relative to this file. See add_node
1580 for the structure of the node tree. */
1581 prev = NULL;
1582 for (np = nodehead; np != NULL; prev = np, np = np->left)
1583 if (np->fdp == curfdp)
1584 break;
1586 /* If we generated tags for this file, write and delete them. */
1587 if (np != NULL)
1589 /* This is the head of the last sublist, if any. The following
1590 instructions depend on this being true. */
1591 assert (np->left == NULL);
1593 assert (fdhead == curfdp);
1594 assert (last_node->fdp == curfdp);
1595 put_entries (np); /* write tags for file curfdp->taggedfname */
1596 free_tree (np); /* remove the written nodes */
1597 if (prev == NULL)
1598 nodehead = NULL; /* no nodes left */
1599 else
1600 prev->left = NULL; /* delete the pointer to the sublist */
1606 * This routine sets up the boolean pseudo-functions which work
1607 * by setting boolean flags dependent upon the corresponding character.
1608 * Every char which is NOT in that string is not a white char. Therefore,
1609 * all of the array "_wht" is set to FALSE, and then the elements
1610 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1611 * of a char is TRUE if it is the string "white", else FALSE.
1613 static void
1614 init ()
1616 register char *sp;
1617 register int i;
1619 for (i = 0; i < CHARS; i++)
1620 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1621 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1622 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1623 notinname('\0') = notinname('\n');
1624 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1625 begtoken('\0') = begtoken('\n');
1626 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1627 intoken('\0') = intoken('\n');
1628 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1629 endtoken('\0') = endtoken('\n');
1633 * This routine opens the specified file and calls the function
1634 * which finds the function and type definitions.
1636 static void
1637 find_entries (inf)
1638 FILE *inf;
1640 char *cp;
1641 language *lang = curfdp->lang;
1642 Lang_function *parser = NULL;
1644 /* If user specified a language, use it. */
1645 if (lang != NULL && lang->function != NULL)
1647 parser = lang->function;
1650 /* Else try to guess the language given the file name. */
1651 if (parser == NULL)
1653 lang = get_language_from_filename (curfdp->infname, TRUE);
1654 if (lang != NULL && lang->function != NULL)
1656 curfdp->lang = lang;
1657 parser = lang->function;
1661 /* Else look for sharp-bang as the first two characters. */
1662 if (parser == NULL
1663 && readline_internal (&lb, inf) > 0
1664 && lb.len >= 2
1665 && lb.buffer[0] == '#'
1666 && lb.buffer[1] == '!')
1668 char *lp;
1670 /* Set lp to point at the first char after the last slash in the
1671 line or, if no slashes, at the first nonblank. Then set cp to
1672 the first successive blank and terminate the string. */
1673 lp = etags_strrchr (lb.buffer+2, '/');
1674 if (lp != NULL)
1675 lp += 1;
1676 else
1677 lp = skip_spaces (lb.buffer + 2);
1678 cp = skip_non_spaces (lp);
1679 *cp = '\0';
1681 if (strlen (lp) > 0)
1683 lang = get_language_from_interpreter (lp);
1684 if (lang != NULL && lang->function != NULL)
1686 curfdp->lang = lang;
1687 parser = lang->function;
1692 /* We rewind here, even if inf may be a pipe. We fail if the
1693 length of the first line is longer than the pipe block size,
1694 which is unlikely. */
1695 rewind (inf);
1697 /* Else try to guess the language given the case insensitive file name. */
1698 if (parser == NULL)
1700 lang = get_language_from_filename (curfdp->infname, FALSE);
1701 if (lang != NULL && lang->function != NULL)
1703 curfdp->lang = lang;
1704 parser = lang->function;
1708 /* Else try Fortran or C. */
1709 if (parser == NULL)
1711 node *old_last_node = last_node;
1713 curfdp->lang = get_language_from_langname ("fortran");
1714 find_entries (inf);
1716 if (old_last_node == last_node)
1717 /* No Fortran entries found. Try C. */
1719 /* We do not tag if rewind fails.
1720 Only the file name will be recorded in the tags file. */
1721 rewind (inf);
1722 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1723 find_entries (inf);
1725 return;
1728 if (!no_line_directive
1729 && curfdp->lang != NULL && curfdp->lang->metasource)
1730 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1731 file, or anyway we parsed a file that is automatically generated from
1732 this one. If this is the case, the bingo.c file contained #line
1733 directives that generated tags pointing to this file. Let's delete
1734 them all before parsing this file, which is the real source. */
1736 fdesc **fdpp = &fdhead;
1737 while (*fdpp != NULL)
1738 if (*fdpp != curfdp
1739 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1740 /* We found one of those! We must delete both the file description
1741 and all tags referring to it. */
1743 fdesc *badfdp = *fdpp;
1745 if (DEBUG)
1746 fprintf (stderr,
1747 "Removing references to \"%s\" obtained from \"%s\"\n",
1748 badfdp->taggedfname, badfdp->infname);
1750 /* Delete the tags referring to badfdp. */
1751 invalidate_nodes (badfdp, &nodehead);
1753 *fdpp = badfdp->next; /* remove the bad description from the list */
1754 free_fdesc (badfdp);
1756 else
1757 fdpp = &(*fdpp)->next; /* advance the list pointer */
1760 assert (parser != NULL);
1762 /* Generic initialisations before reading from file. */
1763 filebuf.len = 0; /* reset the file buffer */
1765 /* Generic initialisations before parsing file with readline. */
1766 lineno = 0; /* reset global line number */
1767 charno = 0; /* reset global char number */
1768 linecharno = 0; /* reset global char number of line start */
1770 parser (inf);
1772 #ifdef ETAGS_REGEXPS
1773 regex_tag_multiline ();
1774 #endif /* ETAGS_REGEXPS */
1778 /* Record a tag. */
1779 static void
1780 pfnote (name, is_func, linestart, linelen, lno, cno)
1781 char *name; /* tag name, or NULL if unnamed */
1782 bool is_func; /* tag is a function */
1783 char *linestart; /* start of the line where tag is */
1784 int linelen; /* length of the line where tag is */
1785 int lno; /* line number */
1786 long cno; /* character number */
1788 register node *np;
1790 if (CTAGS && name == NULL)
1791 return;
1793 np = xnew (1, node);
1795 /* If ctags mode, change name "main" to M<thisfilename>. */
1796 if (CTAGS && !cxref_style && streq (name, "main"))
1798 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1799 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1800 fp = etags_strrchr (np->name, '.');
1801 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1802 fp[0] = '\0';
1804 else
1805 np->name = name;
1806 np->valid = TRUE;
1807 np->been_warned = FALSE;
1808 np->fdp = curfdp;
1809 np->is_func = is_func;
1810 np->lno = lno;
1811 if (np->fdp->usecharno)
1812 /* Our char numbers are 0-base, because of C language tradition?
1813 ctags compatibility? old versions compatibility? I don't know.
1814 Anyway, since emacs's are 1-base we expect etags.el to take care
1815 of the difference. If we wanted to have 1-based numbers, we would
1816 uncomment the +1 below. */
1817 np->cno = cno /* + 1 */ ;
1818 else
1819 np->cno = invalidcharno;
1820 np->left = np->right = NULL;
1821 if (CTAGS && !cxref_style)
1823 if (strlen (linestart) < 50)
1824 np->pat = concat (linestart, "$", "");
1825 else
1826 np->pat = savenstr (linestart, 50);
1828 else
1829 np->pat = savenstr (linestart, linelen);
1831 add_node (np, &nodehead);
1835 * TAGS format specification
1836 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1838 * pfnote should emit the optimized form [unnamed tag] only if:
1839 * 1. name does not contain any of the characters " \t\r\n(),;";
1840 * 2. linestart contains name as either a rightmost, or rightmost but
1841 * one character, substring;
1842 * 3. the character, if any, immediately before name in linestart must
1843 * be one of the characters " \t(),;";
1844 * 4. the character, if any, immediately after name in linestart must
1845 * also be one of the characters " \t(),;".
1847 * The real implementation uses the notinname() macro, which recognises
1848 * characters slightly different from " \t\r\n(),;". See the variable
1849 * `nonam'.
1851 #define traditional_tag_style TRUE
1852 static void
1853 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1854 char *name; /* tag name, or NULL if unnamed */
1855 int namelen; /* tag length */
1856 bool is_func; /* tag is a function */
1857 char *linestart; /* start of the line where tag is */
1858 int linelen; /* length of the line where tag is */
1859 int lno; /* line number */
1860 long cno; /* character number */
1862 register char *cp;
1863 bool named;
1865 named = TRUE;
1866 if (!CTAGS)
1868 for (cp = name; !notinname (*cp); cp++)
1869 continue;
1870 if (*cp == '\0') /* rule #1 */
1872 cp = linestart + linelen - namelen;
1873 if (notinname (linestart[linelen-1]))
1874 cp -= 1; /* rule #4 */
1875 if (cp >= linestart /* rule #2 */
1876 && (cp == linestart
1877 || notinname (cp[-1])) /* rule #3 */
1878 && strneq (name, cp, namelen)) /* rule #2 */
1879 named = FALSE; /* use unnamed tag */
1883 if (named)
1884 name = savenstr (name, namelen);
1885 else
1886 name = NULL;
1887 pfnote (name, is_func, linestart, linelen, lno, cno);
1891 * free_tree ()
1892 * recurse on left children, iterate on right children.
1894 static void
1895 free_tree (np)
1896 register node *np;
1898 while (np)
1900 register node *node_right = np->right;
1901 free_tree (np->left);
1902 if (np->name != NULL)
1903 free (np->name);
1904 free (np->pat);
1905 free (np);
1906 np = node_right;
1911 * free_fdesc ()
1912 * delete a file description
1914 static void
1915 free_fdesc (fdp)
1916 register fdesc *fdp;
1918 if (fdp->infname != NULL) free (fdp->infname);
1919 if (fdp->infabsname != NULL) free (fdp->infabsname);
1920 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
1921 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
1922 if (fdp->prop != NULL) free (fdp->prop);
1923 free (fdp);
1927 * add_node ()
1928 * Adds a node to the tree of nodes. In etags mode, sort by file
1929 * name. In ctags mode, sort by tag name. Make no attempt at
1930 * balancing.
1932 * add_node is the only function allowed to add nodes, so it can
1933 * maintain state.
1935 static void
1936 add_node (np, cur_node_p)
1937 node *np, **cur_node_p;
1939 register int dif;
1940 register node *cur_node = *cur_node_p;
1942 if (cur_node == NULL)
1944 *cur_node_p = np;
1945 last_node = np;
1946 return;
1949 if (!CTAGS)
1950 /* Etags Mode */
1952 /* For each file name, tags are in a linked sublist on the right
1953 pointer. The first tags of different files are a linked list
1954 on the left pointer. last_node points to the end of the last
1955 used sublist. */
1956 if (last_node != NULL && last_node->fdp == np->fdp)
1958 /* Let's use the same sublist as the last added node. */
1959 assert (last_node->right == NULL);
1960 last_node->right = np;
1961 last_node = np;
1963 else if (cur_node->fdp == np->fdp)
1965 /* Scanning the list we found the head of a sublist which is
1966 good for us. Let's scan this sublist. */
1967 add_node (np, &cur_node->right);
1969 else
1970 /* The head of this sublist is not good for us. Let's try the
1971 next one. */
1972 add_node (np, &cur_node->left);
1973 } /* if ETAGS mode */
1975 else
1977 /* Ctags Mode */
1978 dif = strcmp (np->name, cur_node->name);
1981 * If this tag name matches an existing one, then
1982 * do not add the node, but maybe print a warning.
1984 if (!dif)
1986 if (np->fdp == cur_node->fdp)
1988 if (!no_warnings)
1990 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1991 np->fdp->infname, lineno, np->name);
1992 fprintf (stderr, "Second entry ignored\n");
1995 else if (!cur_node->been_warned && !no_warnings)
1997 fprintf
1998 (stderr,
1999 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2000 np->fdp->infname, cur_node->fdp->infname, np->name);
2001 cur_node->been_warned = TRUE;
2003 return;
2006 /* Actually add the node */
2007 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2008 } /* if CTAGS mode */
2012 * invalidate_nodes ()
2013 * Scan the node tree and invalidate all nodes pointing to the
2014 * given file description (CTAGS case) or free them (ETAGS case).
2016 static void
2017 invalidate_nodes (badfdp, npp)
2018 fdesc *badfdp;
2019 node **npp;
2021 node *np = *npp;
2023 if (np == NULL)
2024 return;
2026 if (CTAGS)
2028 if (np->left != NULL)
2029 invalidate_nodes (badfdp, &np->left);
2030 if (np->fdp == badfdp)
2031 np->valid = FALSE;
2032 if (np->right != NULL)
2033 invalidate_nodes (badfdp, &np->right);
2035 else
2037 assert (np->fdp != NULL);
2038 if (np->fdp == badfdp)
2040 *npp = np->left; /* detach the sublist from the list */
2041 np->left = NULL; /* isolate it */
2042 free_tree (np); /* free it */
2043 invalidate_nodes (badfdp, npp);
2045 else
2046 invalidate_nodes (badfdp, &np->left);
2051 static int total_size_of_entries __P((node *));
2052 static int number_len __P((long));
2054 /* Length of a non-negative number's decimal representation. */
2055 static int
2056 number_len (num)
2057 long num;
2059 int len = 1;
2060 while ((num /= 10) > 0)
2061 len += 1;
2062 return len;
2066 * Return total number of characters that put_entries will output for
2067 * the nodes in the linked list at the right of the specified node.
2068 * This count is irrelevant with etags.el since emacs 19.34 at least,
2069 * but is still supplied for backward compatibility.
2071 static int
2072 total_size_of_entries (np)
2073 register node *np;
2075 register int total = 0;
2077 for (; np != NULL; np = np->right)
2079 total += strlen (np->pat) + 1; /* pat\177 */
2080 if (np->name != NULL)
2081 total += strlen (np->name) + 1; /* name\001 */
2082 total += number_len ((long) np->lno) + 1; /* lno, */
2083 if (np->cno != invalidcharno) /* cno */
2084 total += number_len (np->cno);
2085 total += 1; /* newline */
2088 return total;
2091 static void
2092 put_entries (np)
2093 register node *np;
2095 register char *sp;
2096 static fdesc *fdp = NULL;
2098 if (np == NULL)
2099 return;
2101 /* Output subentries that precede this one */
2102 if (CTAGS)
2103 put_entries (np->left);
2105 /* Output this entry */
2106 if (np->valid)
2108 if (!CTAGS)
2110 /* Etags mode */
2111 if (fdp != np->fdp)
2113 fdp = np->fdp;
2114 fprintf (tagf, "\f\n%s,%d\n",
2115 fdp->taggedfname, total_size_of_entries (np));
2117 fputs (np->pat, tagf);
2118 fputc ('\177', tagf);
2119 if (np->name != NULL)
2121 fputs (np->name, tagf);
2122 fputc ('\001', tagf);
2124 fprintf (tagf, "%d,", np->lno);
2125 if (np->cno != invalidcharno)
2126 fprintf (tagf, "%ld", np->cno);
2127 fputs ("\n", tagf);
2129 else
2131 /* Ctags mode */
2132 if (np->name == NULL)
2133 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2135 if (cxref_style)
2137 if (vgrind_style)
2138 fprintf (stdout, "%s %s %d\n",
2139 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2140 else
2141 fprintf (stdout, "%-16s %3d %-16s %s\n",
2142 np->name, np->lno, np->fdp->taggedfname, np->pat);
2144 else
2146 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2148 if (np->is_func)
2149 { /* function or #define macro with args */
2150 putc (searchar, tagf);
2151 putc ('^', tagf);
2153 for (sp = np->pat; *sp; sp++)
2155 if (*sp == '\\' || *sp == searchar)
2156 putc ('\\', tagf);
2157 putc (*sp, tagf);
2159 putc (searchar, tagf);
2161 else
2162 { /* anything else; text pattern inadequate */
2163 fprintf (tagf, "%d", np->lno);
2165 putc ('\n', tagf);
2168 } /* if this node contains a valid tag */
2170 /* Output subentries that follow this one */
2171 put_entries (np->right);
2172 if (!CTAGS)
2173 put_entries (np->left);
2177 /* C extensions. */
2178 #define C_EXT 0x00fff /* C extensions */
2179 #define C_PLAIN 0x00000 /* C */
2180 #define C_PLPL 0x00001 /* C++ */
2181 #define C_STAR 0x00003 /* C* */
2182 #define C_JAVA 0x00005 /* JAVA */
2183 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2184 #define YACC 0x10000 /* yacc file */
2187 * The C symbol tables.
2189 enum sym_type
2191 st_none,
2192 st_C_objprot, st_C_objimpl, st_C_objend,
2193 st_C_gnumacro,
2194 st_C_ignore,
2195 st_C_javastruct,
2196 st_C_operator,
2197 st_C_class, st_C_template,
2198 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
2201 static unsigned int hash __P((const char *, unsigned int));
2202 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2203 static enum sym_type C_symtype __P((char *, int, int));
2205 /* Feed stuff between (but not including) %[ and %] lines to:
2206 gperf -c -k 1,3 -o -p -r -t
2208 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2210 if, 0, st_C_ignore
2211 for, 0, st_C_ignore
2212 while, 0, st_C_ignore
2213 switch, 0, st_C_ignore
2214 return, 0, st_C_ignore
2215 @interface, 0, st_C_objprot
2216 @protocol, 0, st_C_objprot
2217 @implementation,0, st_C_objimpl
2218 @end, 0, st_C_objend
2219 import, C_JAVA, st_C_ignore
2220 package, C_JAVA, st_C_ignore
2221 friend, C_PLPL, st_C_ignore
2222 extends, C_JAVA, st_C_javastruct
2223 implements, C_JAVA, st_C_javastruct
2224 interface, C_JAVA, st_C_struct
2225 class, 0, st_C_class
2226 namespace, C_PLPL, st_C_struct
2227 domain, C_STAR, st_C_struct
2228 union, 0, st_C_struct
2229 struct, 0, st_C_struct
2230 extern, 0, st_C_extern
2231 enum, 0, st_C_enum
2232 typedef, 0, st_C_typedef
2233 define, 0, st_C_define
2234 operator, C_PLPL, st_C_operator
2235 template, 0, st_C_template
2236 bool, C_PLPL, st_C_typespec
2237 long, 0, st_C_typespec
2238 short, 0, st_C_typespec
2239 int, 0, st_C_typespec
2240 char, 0, st_C_typespec
2241 float, 0, st_C_typespec
2242 double, 0, st_C_typespec
2243 signed, 0, st_C_typespec
2244 unsigned, 0, st_C_typespec
2245 auto, 0, st_C_typespec
2246 void, 0, st_C_typespec
2247 static, 0, st_C_typespec
2248 const, 0, st_C_typespec
2249 volatile, 0, st_C_typespec
2250 explicit, C_PLPL, st_C_typespec
2251 mutable, C_PLPL, st_C_typespec
2252 typename, C_PLPL, st_C_typespec
2253 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2254 DEFUN, 0, st_C_gnumacro
2255 SYSCALL, 0, st_C_gnumacro
2256 ENTRY, 0, st_C_gnumacro
2257 PSEUDO, 0, st_C_gnumacro
2258 # These are defined inside C functions, so currently they are not met.
2259 # EXFUN used in glibc, DEFVAR_* in emacs.
2260 #EXFUN, 0, st_C_gnumacro
2261 #DEFVAR_, 0, st_C_gnumacro
2263 and replace lines between %< and %> with its output,
2264 then make in_word_set and C_stab_entry static. */
2265 /*%<*/
2266 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2267 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
2268 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2270 #define TOTAL_KEYWORDS 47
2271 #define MIN_WORD_LENGTH 2
2272 #define MAX_WORD_LENGTH 15
2273 #define MIN_HASH_VALUE 18
2274 #define MAX_HASH_VALUE 138
2275 /* maximum key range = 121, duplicates = 0 */
2277 #ifdef __GNUC__
2278 __inline
2279 #endif
2280 static unsigned int
2281 hash (str, len)
2282 register const char *str;
2283 register unsigned int len;
2285 static unsigned char asso_values[] =
2287 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2288 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2289 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2290 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2291 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2292 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2293 139, 139, 139, 139, 63, 139, 139, 139, 33, 44,
2294 62, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2295 42, 139, 139, 12, 32, 139, 139, 139, 139, 139,
2296 139, 139, 139, 139, 139, 139, 139, 34, 59, 37,
2297 24, 58, 33, 3, 139, 16, 139, 139, 42, 60,
2298 18, 11, 39, 139, 23, 57, 4, 63, 6, 20,
2299 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2300 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2301 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2302 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2303 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2304 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2305 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2306 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2307 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2308 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2309 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2310 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2311 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2312 139, 139, 139, 139, 139, 139
2314 register int hval = len;
2316 switch (hval)
2318 default:
2319 case 3:
2320 hval += asso_values[(unsigned char)str[2]];
2321 case 2:
2322 case 1:
2323 hval += asso_values[(unsigned char)str[0]];
2324 break;
2326 return hval;
2329 #ifdef __GNUC__
2330 __inline
2331 #endif
2332 static struct C_stab_entry *
2333 in_word_set (str, len)
2334 register const char *str;
2335 register unsigned int len;
2337 static struct C_stab_entry wordlist[] =
2339 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2340 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2341 {"if", 0, st_C_ignore},
2342 {""}, {""}, {""}, {""},
2343 {"int", 0, st_C_typespec},
2344 {""}, {""},
2345 {"void", 0, st_C_typespec},
2346 {""}, {""},
2347 {"interface", C_JAVA, st_C_struct},
2348 {""},
2349 {"SYSCALL", 0, st_C_gnumacro},
2350 {""},
2351 {"return", 0, st_C_ignore},
2352 {""}, {""}, {""}, {""}, {""}, {""}, {""},
2353 {"while", 0, st_C_ignore},
2354 {"auto", 0, st_C_typespec},
2355 {""}, {""}, {""}, {""}, {""}, {""},
2356 {"float", 0, st_C_typespec},
2357 {"typedef", 0, st_C_typedef},
2358 {"typename", C_PLPL, st_C_typespec},
2359 {""}, {""}, {""},
2360 {"friend", C_PLPL, st_C_ignore},
2361 {"volatile", 0, st_C_typespec},
2362 {""}, {""},
2363 {"for", 0, st_C_ignore},
2364 {"const", 0, st_C_typespec},
2365 {"import", C_JAVA, st_C_ignore},
2366 {""},
2367 {"define", 0, st_C_define},
2368 {"long", 0, st_C_typespec},
2369 {"implements", C_JAVA, st_C_javastruct},
2370 {"signed", 0, st_C_typespec},
2371 {""},
2372 {"extern", 0, st_C_extern},
2373 {"extends", C_JAVA, st_C_javastruct},
2374 {""},
2375 {"mutable", C_PLPL, st_C_typespec},
2376 {"template", 0, st_C_template},
2377 {"short", 0, st_C_typespec},
2378 {"bool", C_PLPL, st_C_typespec},
2379 {"char", 0, st_C_typespec},
2380 {"class", 0, st_C_class},
2381 {"operator", C_PLPL, st_C_operator},
2382 {""},
2383 {"switch", 0, st_C_ignore},
2384 {""},
2385 {"ENTRY", 0, st_C_gnumacro},
2386 {""},
2387 {"package", C_JAVA, st_C_ignore},
2388 {"union", 0, st_C_struct},
2389 {"@end", 0, st_C_objend},
2390 {"struct", 0, st_C_struct},
2391 {"namespace", C_PLPL, st_C_struct},
2392 {""}, {""},
2393 {"domain", C_STAR, st_C_struct},
2394 {"@interface", 0, st_C_objprot},
2395 {"PSEUDO", 0, st_C_gnumacro},
2396 {"double", 0, st_C_typespec},
2397 {""},
2398 {"@protocol", 0, st_C_objprot},
2399 {""},
2400 {"static", 0, st_C_typespec},
2401 {""}, {""},
2402 {"DEFUN", 0, st_C_gnumacro},
2403 {""}, {""}, {""}, {""},
2404 {"explicit", C_PLPL, st_C_typespec},
2405 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2406 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2407 {""},
2408 {"enum", 0, st_C_enum},
2409 {""}, {""},
2410 {"unsigned", 0, st_C_typespec},
2411 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2412 {"@implementation",0, st_C_objimpl}
2415 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2417 register int key = hash (str, len);
2419 if (key <= MAX_HASH_VALUE && key >= 0)
2421 register const char *s = wordlist[key].name;
2423 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2424 return &wordlist[key];
2427 return 0;
2429 /*%>*/
2431 static enum sym_type
2432 C_symtype (str, len, c_ext)
2433 char *str;
2434 int len;
2435 int c_ext;
2437 register struct C_stab_entry *se = in_word_set (str, len);
2439 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2440 return st_none;
2441 return se->type;
2446 * C functions and variables are recognized using a simple
2447 * finite automaton. fvdef is its state variable.
2449 static enum
2451 fvnone, /* nothing seen */
2452 fdefunkey, /* Emacs DEFUN keyword seen */
2453 fdefunname, /* Emacs DEFUN name seen */
2454 foperator, /* func: operator keyword seen (cplpl) */
2455 fvnameseen, /* function or variable name seen */
2456 fstartlist, /* func: just after open parenthesis */
2457 finlist, /* func: in parameter list */
2458 flistseen, /* func: after parameter list */
2459 fignore, /* func: before open brace */
2460 vignore /* var-like: ignore until ';' */
2461 } fvdef;
2463 static bool fvextern; /* func or var: extern keyword seen; */
2466 * typedefs are recognized using a simple finite automaton.
2467 * typdef is its state variable.
2469 static enum
2471 tnone, /* nothing seen */
2472 tkeyseen, /* typedef keyword seen */
2473 ttypeseen, /* defined type seen */
2474 tinbody, /* inside typedef body */
2475 tend, /* just before typedef tag */
2476 tignore /* junk after typedef tag */
2477 } typdef;
2480 * struct-like structures (enum, struct and union) are recognized
2481 * using another simple finite automaton. `structdef' is its state
2482 * variable.
2484 static enum
2486 snone, /* nothing seen yet,
2487 or in struct body if cblev > 0 */
2488 skeyseen, /* struct-like keyword seen */
2489 stagseen, /* struct-like tag seen */
2490 sintemplate, /* inside template (ignore) */
2491 scolonseen /* colon seen after struct-like tag */
2492 } structdef;
2495 * When objdef is different from onone, objtag is the name of the class.
2497 static char *objtag = "<uninited>";
2500 * Yet another little state machine to deal with preprocessor lines.
2502 static enum
2504 dnone, /* nothing seen */
2505 dsharpseen, /* '#' seen as first char on line */
2506 ddefineseen, /* '#' and 'define' seen */
2507 dignorerest /* ignore rest of line */
2508 } definedef;
2511 * State machine for Objective C protocols and implementations.
2512 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2514 static enum
2516 onone, /* nothing seen */
2517 oprotocol, /* @interface or @protocol seen */
2518 oimplementation, /* @implementations seen */
2519 otagseen, /* class name seen */
2520 oparenseen, /* parenthesis before category seen */
2521 ocatseen, /* category name seen */
2522 oinbody, /* in @implementation body */
2523 omethodsign, /* in @implementation body, after +/- */
2524 omethodtag, /* after method name */
2525 omethodcolon, /* after method colon */
2526 omethodparm, /* after method parameter */
2527 oignore /* wait for @end */
2528 } objdef;
2532 * Use this structure to keep info about the token read, and how it
2533 * should be tagged. Used by the make_C_tag function to build a tag.
2535 static struct tok
2537 bool valid;
2538 bool named;
2539 int offset;
2540 int length;
2541 int lineno;
2542 long linepos;
2543 char *line;
2544 } token; /* latest token read */
2545 static linebuffer token_name; /* its name */
2548 * Variables and functions for dealing with nested structures.
2549 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2551 static void pushclass_above __P((int, char *, int));
2552 static void popclass_above __P((int));
2553 static void write_classname __P((linebuffer *, char *qualifier));
2555 static struct {
2556 char **cname; /* nested class names */
2557 int *cblev; /* nested class curly brace level */
2558 int nl; /* class nesting level (elements used) */
2559 int size; /* length of the array */
2560 } cstack; /* stack for nested declaration tags */
2561 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2562 #define nestlev (cstack.nl)
2563 /* After struct keyword or in struct body, not inside an nested function. */
2564 #define instruct (structdef == snone && nestlev > 0 \
2565 && cblev == cstack.cblev[nestlev-1] + 1)
2567 static void
2568 pushclass_above (cblev, str, len)
2569 int cblev;
2570 char *str;
2571 int len;
2573 int nl;
2575 popclass_above (cblev);
2576 nl = cstack.nl;
2577 if (nl >= cstack.size)
2579 int size = cstack.size *= 2;
2580 xrnew (cstack.cname, size, char *);
2581 xrnew (cstack.cblev, size, int);
2583 assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2584 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2585 cstack.cblev[nl] = cblev;
2586 cstack.nl = nl + 1;
2589 static void
2590 popclass_above (cblev)
2591 int cblev;
2593 int nl;
2595 for (nl = cstack.nl - 1;
2596 nl >= 0 && cstack.cblev[nl] >= cblev;
2597 nl--)
2599 if (cstack.cname[nl] != NULL)
2600 free (cstack.cname[nl]);
2601 cstack.nl = nl;
2605 static void
2606 write_classname (cn, qualifier)
2607 linebuffer *cn;
2608 char *qualifier;
2610 int i, len;
2611 int qlen = strlen (qualifier);
2613 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2615 len = 0;
2616 cn->len = 0;
2617 cn->buffer[0] = '\0';
2619 else
2621 len = strlen (cstack.cname[0]);
2622 linebuffer_setlen (cn, len);
2623 strcpy (cn->buffer, cstack.cname[0]);
2625 for (i = 1; i < cstack.nl; i++)
2627 char *s;
2628 int slen;
2630 s = cstack.cname[i];
2631 if (s == NULL)
2632 continue;
2633 slen = strlen (s);
2634 len += slen + qlen;
2635 linebuffer_setlen (cn, len);
2636 strncat (cn->buffer, qualifier, qlen);
2637 strncat (cn->buffer, s, slen);
2642 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2643 static void make_C_tag __P((bool));
2646 * consider_token ()
2647 * checks to see if the current token is at the start of a
2648 * function or variable, or corresponds to a typedef, or
2649 * is a struct/union/enum tag, or #define, or an enum constant.
2651 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2652 * with args. C_EXTP points to which language we are looking at.
2654 * Globals
2655 * fvdef IN OUT
2656 * structdef IN OUT
2657 * definedef IN OUT
2658 * typdef IN OUT
2659 * objdef IN OUT
2662 static bool
2663 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2664 register char *str; /* IN: token pointer */
2665 register int len; /* IN: token length */
2666 register int c; /* IN: first char after the token */
2667 int *c_extp; /* IN, OUT: C extensions mask */
2668 int cblev; /* IN: curly brace level */
2669 int parlev; /* IN: parenthesis level */
2670 bool *is_func_or_var; /* OUT: function or variable found */
2672 /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2673 structtype is the type of the preceding struct-like keyword, and
2674 structcblev is the curly brace level where it has been seen. */
2675 static enum sym_type structtype;
2676 static int structcblev;
2677 static enum sym_type toktype;
2680 toktype = C_symtype (str, len, *c_extp);
2683 * Advance the definedef state machine.
2685 switch (definedef)
2687 case dnone:
2688 /* We're not on a preprocessor line. */
2689 if (toktype == st_C_gnumacro)
2691 fvdef = fdefunkey;
2692 return FALSE;
2694 break;
2695 case dsharpseen:
2696 if (toktype == st_C_define)
2698 definedef = ddefineseen;
2700 else
2702 definedef = dignorerest;
2704 return FALSE;
2705 case ddefineseen:
2707 * Make a tag for any macro, unless it is a constant
2708 * and constantypedefs is FALSE.
2710 definedef = dignorerest;
2711 *is_func_or_var = (c == '(');
2712 if (!*is_func_or_var && !constantypedefs)
2713 return FALSE;
2714 else
2715 return TRUE;
2716 case dignorerest:
2717 return FALSE;
2718 default:
2719 error ("internal error: definedef value.", (char *)NULL);
2723 * Now typedefs
2725 switch (typdef)
2727 case tnone:
2728 if (toktype == st_C_typedef)
2730 if (typedefs)
2731 typdef = tkeyseen;
2732 fvextern = FALSE;
2733 fvdef = fvnone;
2734 return FALSE;
2736 break;
2737 case tkeyseen:
2738 switch (toktype)
2740 case st_none:
2741 case st_C_typespec:
2742 case st_C_class:
2743 case st_C_struct:
2744 case st_C_enum:
2745 typdef = ttypeseen;
2746 break;
2748 break;
2749 case ttypeseen:
2750 if (structdef == snone && fvdef == fvnone)
2752 fvdef = fvnameseen;
2753 return TRUE;
2755 break;
2756 case tend:
2757 switch (toktype)
2759 case st_C_typespec:
2760 case st_C_class:
2761 case st_C_struct:
2762 case st_C_enum:
2763 return FALSE;
2765 return TRUE;
2769 * This structdef business is NOT invoked when we are ctags and the
2770 * file is plain C. This is because a struct tag may have the same
2771 * name as another tag, and this loses with ctags.
2773 switch (toktype)
2775 case st_C_javastruct:
2776 if (structdef == stagseen)
2777 structdef = scolonseen;
2778 return FALSE;
2779 case st_C_template:
2780 case st_C_class:
2781 if (cblev == 0
2782 && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2783 && definedef == dnone && structdef == snone
2784 && typdef == tnone && fvdef == fvnone)
2785 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2786 if (toktype == st_C_template)
2787 break;
2788 /* FALLTHRU */
2789 case st_C_struct:
2790 case st_C_enum:
2791 if (parlev == 0
2792 && fvdef != vignore
2793 && (typdef == tkeyseen
2794 || (typedefs_or_cplusplus && structdef == snone)))
2796 structdef = skeyseen;
2797 structtype = toktype;
2798 structcblev = cblev;
2800 return FALSE;
2803 if (structdef == skeyseen)
2805 structdef = stagseen;
2806 return TRUE;
2809 if (typdef != tnone)
2810 definedef = dnone;
2812 /* Detect Objective C constructs. */
2813 switch (objdef)
2815 case onone:
2816 switch (toktype)
2818 case st_C_objprot:
2819 objdef = oprotocol;
2820 return FALSE;
2821 case st_C_objimpl:
2822 objdef = oimplementation;
2823 return FALSE;
2825 break;
2826 case oimplementation:
2827 /* Save the class tag for functions or variables defined inside. */
2828 objtag = savenstr (str, len);
2829 objdef = oinbody;
2830 return FALSE;
2831 case oprotocol:
2832 /* Save the class tag for categories. */
2833 objtag = savenstr (str, len);
2834 objdef = otagseen;
2835 *is_func_or_var = TRUE;
2836 return TRUE;
2837 case oparenseen:
2838 objdef = ocatseen;
2839 *is_func_or_var = TRUE;
2840 return TRUE;
2841 case oinbody:
2842 break;
2843 case omethodsign:
2844 if (parlev == 0)
2846 objdef = omethodtag;
2847 linebuffer_setlen (&token_name, len);
2848 strncpy (token_name.buffer, str, len);
2849 token_name.buffer[len] = '\0';
2850 return TRUE;
2852 return FALSE;
2853 case omethodcolon:
2854 if (parlev == 0)
2855 objdef = omethodparm;
2856 return FALSE;
2857 case omethodparm:
2858 if (parlev == 0)
2860 objdef = omethodtag;
2861 linebuffer_setlen (&token_name, token_name.len + len);
2862 strncat (token_name.buffer, str, len);
2863 return TRUE;
2865 return FALSE;
2866 case oignore:
2867 if (toktype == st_C_objend)
2869 /* Memory leakage here: the string pointed by objtag is
2870 never released, because many tests would be needed to
2871 avoid breaking on incorrect input code. The amount of
2872 memory leaked here is the sum of the lengths of the
2873 class tags.
2874 free (objtag); */
2875 objdef = onone;
2877 return FALSE;
2880 /* A function, variable or enum constant? */
2881 switch (toktype)
2883 case st_C_extern:
2884 fvextern = TRUE;
2885 /* FALLTHRU */
2886 case st_C_typespec:
2887 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2888 fvdef = fvnone; /* should be useless */
2889 return FALSE;
2890 case st_C_ignore:
2891 fvextern = FALSE;
2892 fvdef = vignore;
2893 return FALSE;
2894 case st_C_operator:
2895 fvdef = foperator;
2896 *is_func_or_var = TRUE;
2897 return TRUE;
2898 case st_none:
2899 if (constantypedefs
2900 && structdef == snone
2901 && structtype == st_C_enum && cblev > structcblev)
2902 return TRUE; /* enum constant */
2903 switch (fvdef)
2905 case fdefunkey:
2906 if (cblev > 0)
2907 break;
2908 fvdef = fdefunname; /* GNU macro */
2909 *is_func_or_var = TRUE;
2910 return TRUE;
2911 case fvnone:
2912 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2913 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2915 fvdef = vignore;
2916 return FALSE;
2918 if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2920 fvdef = foperator;
2921 *is_func_or_var = TRUE;
2922 return TRUE;
2924 if (cblev > 0 && !instruct)
2925 break;
2926 fvdef = fvnameseen; /* function or variable */
2927 *is_func_or_var = TRUE;
2928 return TRUE;
2930 break;
2933 return FALSE;
2938 * C_entries often keeps pointers to tokens or lines which are older than
2939 * the line currently read. By keeping two line buffers, and switching
2940 * them at end of line, it is possible to use those pointers.
2942 static struct
2944 long linepos;
2945 linebuffer lb;
2946 } lbs[2];
2948 #define current_lb_is_new (newndx == curndx)
2949 #define switch_line_buffers() (curndx = 1 - curndx)
2951 #define curlb (lbs[curndx].lb)
2952 #define newlb (lbs[newndx].lb)
2953 #define curlinepos (lbs[curndx].linepos)
2954 #define newlinepos (lbs[newndx].linepos)
2956 #define CNL_SAVE_DEFINEDEF() \
2957 do { \
2958 curlinepos = charno; \
2959 readline (&curlb, inf); \
2960 lp = curlb.buffer; \
2961 quotednl = FALSE; \
2962 newndx = curndx; \
2963 } while (0)
2965 #define CNL() \
2966 do { \
2967 CNL_SAVE_DEFINEDEF(); \
2968 if (savetoken.valid) \
2970 token = savetoken; \
2971 savetoken.valid = FALSE; \
2973 definedef = dnone; \
2974 } while (0)
2977 static void
2978 make_C_tag (isfun)
2979 bool isfun;
2981 /* This function should never be called when token.valid is FALSE, but
2982 we must protect against invalid input or internal errors. */
2983 if (DEBUG || token.valid)
2985 if (traditional_tag_style)
2987 /* This was the original code. Now we call new_pfnote instead,
2988 which uses the new method for naming tags (see new_pfnote). */
2989 char *name = NULL;
2991 if (CTAGS || token.named)
2992 name = savestr (token_name.buffer);
2993 if (DEBUG && !token.valid)
2995 if (token.named)
2996 name = concat (name, "##invalid##", "");
2997 else
2998 name = savestr ("##invalid##");
3000 pfnote (name, isfun, token.line,
3001 token.offset+token.length+1, token.lineno, token.linepos);
3003 else
3004 new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
3005 token.offset+token.length+1, token.lineno, token.linepos);
3006 token.valid = FALSE;
3012 * C_entries ()
3013 * This routine finds functions, variables, typedefs,
3014 * #define's, enum constants and struct/union/enum definitions in
3015 * C syntax and adds them to the list.
3017 static void
3018 C_entries (c_ext, inf)
3019 int c_ext; /* extension of C */
3020 FILE *inf; /* input file */
3022 register char c; /* latest char read; '\0' for end of line */
3023 register char *lp; /* pointer one beyond the character `c' */
3024 int curndx, newndx; /* indices for current and new lb */
3025 register int tokoff; /* offset in line of start of current token */
3026 register int toklen; /* length of current token */
3027 char *qualifier; /* string used to qualify names */
3028 int qlen; /* length of qualifier */
3029 int cblev; /* current curly brace level */
3030 int parlev; /* current parenthesis level */
3031 int typdefcblev; /* cblev where a typedef struct body begun */
3032 bool incomm, inquote, inchar, quotednl, midtoken;
3033 bool cplpl, cjava;
3034 bool yacc_rules; /* in the rules part of a yacc file */
3035 struct tok savetoken; /* token saved during preprocessor handling */
3038 initbuffer (&token_name);
3039 initbuffer (&lbs[0].lb);
3040 initbuffer (&lbs[1].lb);
3041 if (cstack.size == 0)
3043 cstack.size = (DEBUG) ? 1 : 4;
3044 cstack.nl = 0;
3045 cstack.cname = xnew (cstack.size, char *);
3046 cstack.cblev = xnew (cstack.size, int);
3049 tokoff = toklen = typdefcblev = 0; /* keep compiler quiet */
3050 curndx = newndx = 0;
3051 lp = curlb.buffer;
3052 *lp = 0;
3054 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3055 structdef = snone; definedef = dnone; objdef = onone;
3056 yacc_rules = FALSE;
3057 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3058 token.valid = savetoken.valid = FALSE;
3059 cblev = 0;
3060 parlev = 0;
3061 cplpl = (c_ext & C_PLPL) == C_PLPL;
3062 cjava = (c_ext & C_JAVA) == C_JAVA;
3063 if (cjava)
3064 { qualifier = "."; qlen = 1; }
3065 else
3066 { qualifier = "::"; qlen = 2; }
3069 while (!feof (inf))
3071 c = *lp++;
3072 if (c == '\\')
3074 /* If we're at the end of the line, the next character is a
3075 '\0'; don't skip it, because it's the thing that tells us
3076 to read the next line. */
3077 if (*lp == '\0')
3079 quotednl = TRUE;
3080 continue;
3082 lp++;
3083 c = ' ';
3085 else if (incomm)
3087 switch (c)
3089 case '*':
3090 if (*lp == '/')
3092 c = *lp++;
3093 incomm = FALSE;
3095 break;
3096 case '\0':
3097 /* Newlines inside comments do not end macro definitions in
3098 traditional cpp. */
3099 CNL_SAVE_DEFINEDEF ();
3100 break;
3102 continue;
3104 else if (inquote)
3106 switch (c)
3108 case '"':
3109 inquote = FALSE;
3110 break;
3111 case '\0':
3112 /* Newlines inside strings do not end macro definitions
3113 in traditional cpp, even though compilers don't
3114 usually accept them. */
3115 CNL_SAVE_DEFINEDEF ();
3116 break;
3118 continue;
3120 else if (inchar)
3122 switch (c)
3124 case '\0':
3125 /* Hmmm, something went wrong. */
3126 CNL ();
3127 /* FALLTHRU */
3128 case '\'':
3129 inchar = FALSE;
3130 break;
3132 continue;
3134 else
3135 switch (c)
3137 case '"':
3138 inquote = TRUE;
3139 switch (fvdef)
3141 case fdefunkey:
3142 case fstartlist:
3143 case finlist:
3144 case fignore:
3145 case vignore:
3146 break;
3147 default:
3148 fvextern = FALSE;
3149 fvdef = fvnone;
3151 continue;
3152 case '\'':
3153 inchar = TRUE;
3154 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3156 fvextern = FALSE;
3157 fvdef = fvnone;
3159 continue;
3160 case '/':
3161 if (*lp == '*')
3163 lp++;
3164 incomm = TRUE;
3165 continue;
3167 else if (/* cplpl && */ *lp == '/')
3169 c = '\0';
3170 break;
3172 else
3173 break;
3174 case '%':
3175 if ((c_ext & YACC) && *lp == '%')
3177 /* Entering or exiting rules section in yacc file. */
3178 lp++;
3179 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3180 typdef = tnone; structdef = snone;
3181 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3182 cblev = 0;
3183 yacc_rules = !yacc_rules;
3184 continue;
3186 else
3187 break;
3188 case '#':
3189 if (definedef == dnone)
3191 char *cp;
3192 bool cpptoken = TRUE;
3194 /* Look back on this line. If all blanks, or nonblanks
3195 followed by an end of comment, this is a preprocessor
3196 token. */
3197 for (cp = newlb.buffer; cp < lp-1; cp++)
3198 if (!iswhite (*cp))
3200 if (*cp == '*' && *(cp+1) == '/')
3202 cp++;
3203 cpptoken = TRUE;
3205 else
3206 cpptoken = FALSE;
3208 if (cpptoken)
3209 definedef = dsharpseen;
3210 } /* if (definedef == dnone) */
3212 continue;
3213 } /* switch (c) */
3216 /* Consider token only if some involved conditions are satisfied. */
3217 if (typdef != tignore
3218 && definedef != dignorerest
3219 && fvdef != finlist
3220 && structdef != sintemplate
3221 && (definedef != dnone
3222 || structdef != scolonseen))
3224 if (midtoken)
3226 if (endtoken (c))
3228 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
3231 * This handles :: in the middle, but not at the
3232 * beginning of an identifier. Also, space-separated
3233 * :: is not recognised.
3235 lp += 2;
3236 toklen += 2;
3237 c = lp[-1];
3238 goto still_in_token;
3240 else
3242 bool funorvar = FALSE;
3244 if (yacc_rules
3245 || consider_token (newlb.buffer + tokoff, toklen, c,
3246 &c_ext, cblev, parlev, &funorvar))
3248 if (fvdef == foperator)
3250 char *oldlp = lp;
3251 lp = skip_spaces (lp-1);
3252 if (*lp != '\0')
3253 lp += 1;
3254 while (*lp != '\0'
3255 && !iswhite (*lp) && *lp != '(')
3256 lp += 1;
3257 c = *lp++;
3258 toklen += lp - oldlp;
3260 token.named = FALSE;
3261 if ((c_ext & C_EXT) /* not pure C */
3262 && nestlev > 0 && definedef == dnone)
3263 /* in struct body */
3265 write_classname (&token_name, qualifier);
3266 linebuffer_setlen (&token_name,
3267 token_name.len+qlen+toklen);
3268 strcat (token_name.buffer, qualifier);
3269 strncat (token_name.buffer,
3270 newlb.buffer + tokoff, toklen);
3271 token.named = TRUE;
3273 else if (objdef == ocatseen)
3274 /* Objective C category */
3276 int len = strlen (objtag) + 2 + toklen;
3277 linebuffer_setlen (&token_name, len);
3278 strcpy (token_name.buffer, objtag);
3279 strcat (token_name.buffer, "(");
3280 strncat (token_name.buffer,
3281 newlb.buffer + tokoff, toklen);
3282 strcat (token_name.buffer, ")");
3283 token.named = TRUE;
3285 else if (objdef == omethodtag
3286 || objdef == omethodparm)
3287 /* Objective C method */
3289 token.named = TRUE;
3291 else if (fvdef == fdefunname)
3292 /* GNU DEFUN and similar macros */
3294 bool defun = (newlb.buffer[tokoff] == 'F');
3295 int off = tokoff;
3296 int len = toklen;
3298 /* Rewrite the tag so that emacs lisp DEFUNs
3299 can be found by their elisp name */
3300 if (defun)
3302 off += 1;
3303 len -= 1;
3305 len = toklen;
3306 linebuffer_setlen (&token_name, len);
3307 strncpy (token_name.buffer,
3308 newlb.buffer + off, len);
3309 token_name.buffer[len] = '\0';
3310 if (defun)
3311 while (--len >= 0)
3312 if (token_name.buffer[len] == '_')
3313 token_name.buffer[len] = '-';
3314 token.named = defun;
3316 else
3318 linebuffer_setlen (&token_name, toklen);
3319 strncpy (token_name.buffer,
3320 newlb.buffer + tokoff, toklen);
3321 token_name.buffer[toklen] = '\0';
3322 /* Name macros and members. */
3323 token.named = (structdef == stagseen
3324 || typdef == ttypeseen
3325 || typdef == tend
3326 || (funorvar
3327 && definedef == dignorerest)
3328 || (funorvar
3329 && definedef == dnone
3330 && structdef == snone
3331 && cblev > 0));
3333 token.lineno = lineno;
3334 token.offset = tokoff;
3335 token.length = toklen;
3336 token.line = newlb.buffer;
3337 token.linepos = newlinepos;
3338 token.valid = TRUE;
3340 if (definedef == dnone
3341 && (fvdef == fvnameseen
3342 || fvdef == foperator
3343 || structdef == stagseen
3344 || typdef == tend
3345 || typdef == ttypeseen
3346 || objdef != onone))
3348 if (current_lb_is_new)
3349 switch_line_buffers ();
3351 else if (definedef != dnone
3352 || fvdef == fdefunname
3353 || instruct)
3354 make_C_tag (funorvar);
3356 midtoken = FALSE;
3358 } /* if (endtoken (c)) */
3359 else if (intoken (c))
3360 still_in_token:
3362 toklen++;
3363 continue;
3365 } /* if (midtoken) */
3366 else if (begtoken (c))
3368 switch (definedef)
3370 case dnone:
3371 switch (fvdef)
3373 case fstartlist:
3374 fvdef = finlist;
3375 continue;
3376 case flistseen:
3377 make_C_tag (TRUE); /* a function */
3378 fvdef = fignore;
3379 break;
3380 case fvnameseen:
3381 fvdef = fvnone;
3382 break;
3384 if (structdef == stagseen && !cjava)
3386 popclass_above (cblev);
3387 structdef = snone;
3389 break;
3390 case dsharpseen:
3391 savetoken = token;
3392 break;
3394 if (!yacc_rules || lp == newlb.buffer + 1)
3396 tokoff = lp - 1 - newlb.buffer;
3397 toklen = 1;
3398 midtoken = TRUE;
3400 continue;
3401 } /* if (begtoken) */
3402 } /* if must look at token */
3405 /* Detect end of line, colon, comma, semicolon and various braces
3406 after having handled a token.*/
3407 switch (c)
3409 case ':':
3410 if (yacc_rules && token.offset == 0 && token.valid)
3412 make_C_tag (FALSE); /* a yacc function */
3413 break;
3415 if (definedef != dnone)
3416 break;
3417 switch (objdef)
3419 case otagseen:
3420 objdef = oignore;
3421 make_C_tag (TRUE); /* an Objective C class */
3422 break;
3423 case omethodtag:
3424 case omethodparm:
3425 objdef = omethodcolon;
3426 linebuffer_setlen (&token_name, token_name.len + 1);
3427 strcat (token_name.buffer, ":");
3428 break;
3430 if (structdef == stagseen)
3431 structdef = scolonseen;
3432 break;
3433 case ';':
3434 if (definedef != dnone)
3435 break;
3436 switch (typdef)
3438 case tend:
3439 case ttypeseen:
3440 make_C_tag (FALSE); /* a typedef */
3441 typdef = tnone;
3442 fvdef = fvnone;
3443 break;
3444 case tnone:
3445 case tinbody:
3446 case tignore:
3447 switch (fvdef)
3449 case fignore:
3450 if (typdef == tignore)
3451 fvdef = fvnone;
3452 break;
3453 case fvnameseen:
3454 if ((globals && cblev == 0 && (!fvextern || declarations))
3455 || (members && instruct))
3456 make_C_tag (FALSE); /* a variable */
3457 fvextern = FALSE;
3458 fvdef = fvnone;
3459 token.valid = FALSE;
3460 break;
3461 case flistseen:
3462 if ((declarations && typdef == tnone && !instruct)
3463 || (members && typdef != tignore && instruct))
3464 make_C_tag (TRUE); /* a function declaration */
3465 /* FALLTHRU */
3466 default:
3467 fvextern = FALSE;
3468 fvdef = fvnone;
3469 if (declarations
3470 && structdef == stagseen && (c_ext & C_PLPL))
3471 make_C_tag (FALSE); /* forward declaration */
3472 else
3473 /* The following instruction invalidates the token.
3474 Probably the token should be invalidated in all other
3475 cases where some state machine is reset prematurely. */
3476 token.valid = FALSE;
3477 } /* switch (fvdef) */
3478 /* FALLTHRU */
3479 default:
3480 if (!instruct)
3481 typdef = tnone;
3483 if (structdef == stagseen)
3484 structdef = snone;
3485 break;
3486 case ',':
3487 if (definedef != dnone)
3488 break;
3489 switch (objdef)
3491 case omethodtag:
3492 case omethodparm:
3493 make_C_tag (TRUE); /* an Objective C method */
3494 objdef = oinbody;
3495 break;
3497 switch (fvdef)
3499 case fdefunkey:
3500 case foperator:
3501 case fstartlist:
3502 case finlist:
3503 case fignore:
3504 case vignore:
3505 break;
3506 case fdefunname:
3507 fvdef = fignore;
3508 break;
3509 case fvnameseen: /* a variable */
3510 if ((globals && cblev == 0 && (!fvextern || declarations))
3511 || (members && instruct))
3512 make_C_tag (FALSE);
3513 break;
3514 case flistseen: /* a function */
3515 if ((declarations && typdef == tnone && !instruct)
3516 || (members && typdef != tignore && instruct))
3518 make_C_tag (TRUE); /* a function declaration */
3519 fvdef = fvnameseen;
3521 else if (!declarations)
3522 fvdef = fvnone;
3523 token.valid = FALSE;
3524 break;
3525 default:
3526 fvdef = fvnone;
3528 if (structdef == stagseen)
3529 structdef = snone;
3530 break;
3531 case '[':
3532 if (definedef != dnone)
3533 break;
3534 if (structdef == stagseen)
3535 structdef = snone;
3536 switch (typdef)
3538 case ttypeseen:
3539 case tend:
3540 typdef = tignore;
3541 make_C_tag (FALSE); /* a typedef */
3542 break;
3543 case tnone:
3544 case tinbody:
3545 switch (fvdef)
3547 case foperator:
3548 case finlist:
3549 case fignore:
3550 case vignore:
3551 break;
3552 case fvnameseen:
3553 if ((members && cblev == 1)
3554 || (globals && cblev == 0
3555 && (!fvextern || declarations)))
3556 make_C_tag (FALSE); /* a variable */
3557 /* FALLTHRU */
3558 default:
3559 fvdef = fvnone;
3561 break;
3563 break;
3564 case '(':
3565 if (definedef != dnone)
3566 break;
3567 if (objdef == otagseen && parlev == 0)
3568 objdef = oparenseen;
3569 switch (fvdef)
3571 case fvnameseen:
3572 if (typdef == ttypeseen
3573 && *lp != '*'
3574 && !instruct)
3576 /* This handles constructs like:
3577 typedef void OperatorFun (int fun); */
3578 make_C_tag (FALSE);
3579 typdef = tignore;
3580 fvdef = fignore;
3581 break;
3583 /* FALLTHRU */
3584 case foperator:
3585 fvdef = fstartlist;
3586 break;
3587 case flistseen:
3588 fvdef = finlist;
3589 break;
3591 parlev++;
3592 break;
3593 case ')':
3594 if (definedef != dnone)
3595 break;
3596 if (objdef == ocatseen && parlev == 1)
3598 make_C_tag (TRUE); /* an Objective C category */
3599 objdef = oignore;
3601 if (--parlev == 0)
3603 switch (fvdef)
3605 case fstartlist:
3606 case finlist:
3607 fvdef = flistseen;
3608 break;
3610 if (!instruct
3611 && (typdef == tend
3612 || typdef == ttypeseen))
3614 typdef = tignore;
3615 make_C_tag (FALSE); /* a typedef */
3618 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3619 parlev = 0;
3620 break;
3621 case '{':
3622 if (definedef != dnone)
3623 break;
3624 if (typdef == ttypeseen)
3626 /* Whenever typdef is set to tinbody (currently only
3627 here), typdefcblev should be set to cblev. */
3628 typdef = tinbody;
3629 typdefcblev = cblev;
3631 switch (fvdef)
3633 case flistseen:
3634 make_C_tag (TRUE); /* a function */
3635 /* FALLTHRU */
3636 case fignore:
3637 fvdef = fvnone;
3638 break;
3639 case fvnone:
3640 switch (objdef)
3642 case otagseen:
3643 make_C_tag (TRUE); /* an Objective C class */
3644 objdef = oignore;
3645 break;
3646 case omethodtag:
3647 case omethodparm:
3648 make_C_tag (TRUE); /* an Objective C method */
3649 objdef = oinbody;
3650 break;
3651 default:
3652 /* Neutralize `extern "C" {' grot. */
3653 if (cblev == 0 && structdef == snone && nestlev == 0
3654 && typdef == tnone)
3655 cblev = -1;
3657 break;
3659 switch (structdef)
3661 case skeyseen: /* unnamed struct */
3662 pushclass_above (cblev, NULL, 0);
3663 structdef = snone;
3664 break;
3665 case stagseen: /* named struct or enum */
3666 case scolonseen: /* a class */
3667 pushclass_above (cblev, token.line+token.offset, token.length);
3668 structdef = snone;
3669 make_C_tag (FALSE); /* a struct or enum */
3670 break;
3672 cblev++;
3673 break;
3674 case '*':
3675 if (definedef != dnone)
3676 break;
3677 if (fvdef == fstartlist)
3678 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3679 break;
3680 case '}':
3681 if (definedef != dnone)
3682 break;
3683 if (!noindentypedefs && lp == newlb.buffer + 1)
3685 cblev = 0; /* reset curly brace level if first column */
3686 parlev = 0; /* also reset paren level, just in case... */
3688 else if (cblev > 0)
3689 cblev--;
3690 popclass_above (cblev);
3691 structdef = snone;
3692 /* Only if typdef == tinbody is typdefcblev significant. */
3693 if (typdef == tinbody && cblev <= typdefcblev)
3695 assert (cblev == typdefcblev);
3696 typdef = tend;
3698 break;
3699 case '=':
3700 if (definedef != dnone)
3701 break;
3702 switch (fvdef)
3704 case foperator:
3705 case finlist:
3706 case fignore:
3707 case vignore:
3708 break;
3709 case fvnameseen:
3710 if ((members && cblev == 1)
3711 || (globals && cblev == 0 && (!fvextern || declarations)))
3712 make_C_tag (FALSE); /* a variable */
3713 /* FALLTHRU */
3714 default:
3715 fvdef = vignore;
3717 break;
3718 case '<':
3719 if (cplpl && structdef == stagseen)
3721 structdef = sintemplate;
3722 break;
3724 goto resetfvdef;
3725 case '>':
3726 if (structdef == sintemplate)
3728 structdef = stagseen;
3729 break;
3731 goto resetfvdef;
3732 case '+':
3733 case '-':
3734 if (objdef == oinbody && cblev == 0)
3736 objdef = omethodsign;
3737 break;
3739 /* FALLTHRU */
3740 resetfvdef:
3741 case '#': case '~': case '&': case '%': case '/': case '|':
3742 case '^': case '!': case '.': case '?': case ']':
3743 if (definedef != dnone)
3744 break;
3745 /* These surely cannot follow a function tag in C. */
3746 switch (fvdef)
3748 case foperator:
3749 case finlist:
3750 case fignore:
3751 case vignore:
3752 break;
3753 default:
3754 fvdef = fvnone;
3756 break;
3757 case '\0':
3758 if (objdef == otagseen)
3760 make_C_tag (TRUE); /* an Objective C class */
3761 objdef = oignore;
3763 /* If a macro spans multiple lines don't reset its state. */
3764 if (quotednl)
3765 CNL_SAVE_DEFINEDEF ();
3766 else
3767 CNL ();
3768 break;
3769 } /* switch (c) */
3771 } /* while not eof */
3773 free (token_name.buffer);
3774 free (lbs[0].lb.buffer);
3775 free (lbs[1].lb.buffer);
3779 * Process either a C++ file or a C file depending on the setting
3780 * of a global flag.
3782 static void
3783 default_C_entries (inf)
3784 FILE *inf;
3786 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3789 /* Always do plain C. */
3790 static void
3791 plain_C_entries (inf)
3792 FILE *inf;
3794 C_entries (0, inf);
3797 /* Always do C++. */
3798 static void
3799 Cplusplus_entries (inf)
3800 FILE *inf;
3802 C_entries (C_PLPL, inf);
3805 /* Always do Java. */
3806 static void
3807 Cjava_entries (inf)
3808 FILE *inf;
3810 C_entries (C_JAVA, inf);
3813 /* Always do C*. */
3814 static void
3815 Cstar_entries (inf)
3816 FILE *inf;
3818 C_entries (C_STAR, inf);
3821 /* Always do Yacc. */
3822 static void
3823 Yacc_entries (inf)
3824 FILE *inf;
3826 C_entries (YACC, inf);
3830 /* Useful macros. */
3831 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3832 for (; /* loop initialization */ \
3833 !feof (file_pointer) /* loop test */ \
3834 && (char_pointer = lb.buffer, /* instructions at start of loop */ \
3835 readline (&line_buffer, file_pointer), \
3836 TRUE); \
3838 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */ \
3839 (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
3840 && notinname ((cp)[sizeof(keyword)-1]) /* end of keyword */ \
3841 && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
3844 * Read a file, but do no processing. This is used to do regexp
3845 * matching on files that have no language defined.
3847 static void
3848 just_read_file (inf)
3849 FILE *inf;
3851 register char *dummy;
3853 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3854 continue;
3858 /* Fortran parsing */
3860 static void F_takeprec __P((void));
3861 static void F_getit __P((FILE *));
3863 static void
3864 F_takeprec ()
3866 dbp = skip_spaces (dbp);
3867 if (*dbp != '*')
3868 return;
3869 dbp++;
3870 dbp = skip_spaces (dbp);
3871 if (strneq (dbp, "(*)", 3))
3873 dbp += 3;
3874 return;
3876 if (!ISDIGIT (*dbp))
3878 --dbp; /* force failure */
3879 return;
3882 dbp++;
3883 while (ISDIGIT (*dbp));
3886 static void
3887 F_getit (inf)
3888 FILE *inf;
3890 register char *cp;
3892 dbp = skip_spaces (dbp);
3893 if (*dbp == '\0')
3895 readline (&lb, inf);
3896 dbp = lb.buffer;
3897 if (dbp[5] != '&')
3898 return;
3899 dbp += 6;
3900 dbp = skip_spaces (dbp);
3902 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3903 return;
3904 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3905 continue;
3906 pfnote (savenstr (dbp, cp-dbp), TRUE,
3907 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3911 static void
3912 Fortran_functions (inf)
3913 FILE *inf;
3915 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3917 if (*dbp == '%')
3918 dbp++; /* Ratfor escape to fortran */
3919 dbp = skip_spaces (dbp);
3920 if (*dbp == '\0')
3921 continue;
3922 switch (lowcase (*dbp))
3924 case 'i':
3925 if (nocase_tail ("integer"))
3926 F_takeprec ();
3927 break;
3928 case 'r':
3929 if (nocase_tail ("real"))
3930 F_takeprec ();
3931 break;
3932 case 'l':
3933 if (nocase_tail ("logical"))
3934 F_takeprec ();
3935 break;
3936 case 'c':
3937 if (nocase_tail ("complex") || nocase_tail ("character"))
3938 F_takeprec ();
3939 break;
3940 case 'd':
3941 if (nocase_tail ("double"))
3943 dbp = skip_spaces (dbp);
3944 if (*dbp == '\0')
3945 continue;
3946 if (nocase_tail ("precision"))
3947 break;
3948 continue;
3950 break;
3952 dbp = skip_spaces (dbp);
3953 if (*dbp == '\0')
3954 continue;
3955 switch (lowcase (*dbp))
3957 case 'f':
3958 if (nocase_tail ("function"))
3959 F_getit (inf);
3960 continue;
3961 case 's':
3962 if (nocase_tail ("subroutine"))
3963 F_getit (inf);
3964 continue;
3965 case 'e':
3966 if (nocase_tail ("entry"))
3967 F_getit (inf);
3968 continue;
3969 case 'b':
3970 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
3972 dbp = skip_spaces (dbp);
3973 if (*dbp == '\0') /* assume un-named */
3974 pfnote (savestr ("blockdata"), TRUE,
3975 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3976 else
3977 F_getit (inf); /* look for name */
3979 continue;
3986 * Ada parsing
3987 * Original code by
3988 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3991 static void Ada_getit __P((FILE *, char *));
3993 /* Once we are positioned after an "interesting" keyword, let's get
3994 the real tag value necessary. */
3995 static void
3996 Ada_getit (inf, name_qualifier)
3997 FILE *inf;
3998 char *name_qualifier;
4000 register char *cp;
4001 char *name;
4002 char c;
4004 while (!feof (inf))
4006 dbp = skip_spaces (dbp);
4007 if (*dbp == '\0'
4008 || (dbp[0] == '-' && dbp[1] == '-'))
4010 readline (&lb, inf);
4011 dbp = lb.buffer;
4013 switch (lowcase(*dbp))
4015 case 'b':
4016 if (nocase_tail ("body"))
4018 /* Skipping body of procedure body or package body or ....
4019 resetting qualifier to body instead of spec. */
4020 name_qualifier = "/b";
4021 continue;
4023 break;
4024 case 't':
4025 /* Skipping type of task type or protected type ... */
4026 if (nocase_tail ("type"))
4027 continue;
4028 break;
4030 if (*dbp == '"')
4032 dbp += 1;
4033 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4034 continue;
4036 else
4038 dbp = skip_spaces (dbp);
4039 for (cp = dbp;
4040 (*cp != '\0'
4041 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4042 cp++)
4043 continue;
4044 if (cp == dbp)
4045 return;
4047 c = *cp;
4048 *cp = '\0';
4049 name = concat (dbp, name_qualifier, "");
4050 *cp = c;
4051 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4052 if (c == '"')
4053 dbp = cp + 1;
4054 return;
4058 static void
4059 Ada_funcs (inf)
4060 FILE *inf;
4062 bool inquote = FALSE;
4064 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4066 while (*dbp != '\0')
4068 /* Skip a string i.e. "abcd". */
4069 if (inquote || (*dbp == '"'))
4071 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4072 if (dbp != NULL)
4074 inquote = FALSE;
4075 dbp += 1;
4076 continue; /* advance char */
4078 else
4080 inquote = TRUE;
4081 break; /* advance line */
4085 /* Skip comments. */
4086 if (dbp[0] == '-' && dbp[1] == '-')
4087 break; /* advance line */
4089 /* Skip character enclosed in single quote i.e. 'a'
4090 and skip single quote starting an attribute i.e. 'Image. */
4091 if (*dbp == '\'')
4093 dbp++ ;
4094 if (*dbp != '\0')
4095 dbp++;
4096 continue;
4099 /* Search for beginning of a token. */
4100 if (!begtoken (*dbp))
4102 dbp++;
4103 continue; /* advance char */
4106 /* We are at the beginning of a token. */
4107 switch (lowcase(*dbp))
4109 case 'f':
4110 if (!packages_only && nocase_tail ("function"))
4111 Ada_getit (inf, "/f");
4112 else
4113 break; /* from switch */
4114 continue; /* advance char */
4115 case 'p':
4116 if (!packages_only && nocase_tail ("procedure"))
4117 Ada_getit (inf, "/p");
4118 else if (nocase_tail ("package"))
4119 Ada_getit (inf, "/s");
4120 else if (nocase_tail ("protected")) /* protected type */
4121 Ada_getit (inf, "/t");
4122 else
4123 break; /* from switch */
4124 continue; /* advance char */
4125 case 't':
4126 if (!packages_only && nocase_tail ("task"))
4127 Ada_getit (inf, "/k");
4128 else if (typedefs && !packages_only && nocase_tail ("type"))
4130 Ada_getit (inf, "/t");
4131 while (*dbp != '\0')
4132 dbp += 1;
4134 else
4135 break; /* from switch */
4136 continue; /* advance char */
4139 /* Look for the end of the token. */
4140 while (!endtoken (*dbp))
4141 dbp++;
4143 } /* advance char */
4144 } /* advance line */
4149 * Unix and microcontroller assembly tag handling
4150 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4151 * Idea by Bob Weiner, Motorola Inc. (1994)
4153 static void
4154 Asm_labels (inf)
4155 FILE *inf;
4157 register char *cp;
4159 LOOP_ON_INPUT_LINES (inf, lb, cp)
4161 /* If first char is alphabetic or one of [_.$], test for colon
4162 following identifier. */
4163 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4165 /* Read past label. */
4166 cp++;
4167 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4168 cp++;
4169 if (*cp == ':' || iswhite (*cp))
4171 /* Found end of label, so copy it and add it to the table. */
4172 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
4173 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4181 * Perl support
4182 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4183 * Perl variable names: /^(my|local).../
4184 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4185 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4186 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4188 static void
4189 Perl_functions (inf)
4190 FILE *inf;
4192 char *package = savestr ("main"); /* current package name */
4193 register char *cp;
4195 LOOP_ON_INPUT_LINES (inf, lb, cp)
4197 skip_spaces(cp);
4199 if (LOOKING_AT (cp, "package"))
4201 free (package);
4202 package = get_tag (cp);
4203 if (package == NULL) /* can't parse package name */
4204 package = savestr ("");
4205 else
4206 package = savestr(package); /* make a copy */
4208 else if (LOOKING_AT (cp, "sub"))
4210 char *name, *fullname, *pos;
4211 char *sp = cp;
4213 while (!notinname (*cp))
4214 cp++;
4215 if (cp == sp)
4216 continue;
4217 name = savenstr (sp, cp-sp);
4218 if ((pos = etags_strchr (name, ':')) != NULL && pos[1] == ':')
4219 fullname = name;
4220 else
4221 fullname = concat (package, "::", name);
4222 pfnote (fullname, TRUE,
4223 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4224 if (name != fullname)
4225 free (name);
4227 else if (globals /* only if tagging global vars is enabled */
4228 && (LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local")))
4230 /* After "my" or "local", but before any following paren or space. */
4231 char *varname = NULL;
4233 if (*cp == '$' || *cp == '@' || *cp == '%')
4235 char* varstart = ++cp;
4236 while (ISALNUM (*cp) || *cp == '_')
4237 cp++;
4238 varname = savenstr (varstart, cp-varstart);
4240 else
4242 /* Should be examining a variable list at this point;
4243 could insist on seeing an open parenthesis. */
4244 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4245 cp++;
4248 /* Perhaps I should back cp up one character, so the TAGS table
4249 doesn't mention (and so depend upon) the following char. */
4250 pfnote (varname, FALSE,
4251 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4258 * Python support
4259 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4260 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4261 * More ideas by seb bacon <seb@jamkit.com> (2002)
4263 static void
4264 Python_functions (inf)
4265 FILE *inf;
4267 register char *cp;
4269 LOOP_ON_INPUT_LINES (inf, lb, cp)
4271 cp = skip_spaces (cp);
4272 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4274 char *name = cp;
4275 while (!notinname (*cp) && *cp != ':')
4276 cp++;
4277 pfnote (savenstr (name, cp-name), TRUE,
4278 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4285 * PHP support
4286 * Look for:
4287 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4288 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4289 * - /^[ \t]*define\(\"[^\"]+/
4290 * Only with --members:
4291 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4292 * Idea by Diez B. Roggisch (2001)
4294 static void
4295 PHP_functions (inf)
4296 FILE *inf;
4298 register char *cp, *name;
4299 bool search_identifier = FALSE;
4301 LOOP_ON_INPUT_LINES (inf, lb, cp)
4303 cp = skip_spaces (cp);
4304 name = cp;
4305 if (search_identifier
4306 && *cp != '\0')
4308 while (!notinname (*cp))
4309 cp++;
4310 pfnote (savenstr (name, cp-name), TRUE,
4311 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4312 search_identifier = FALSE;
4314 else if (LOOKING_AT (cp, "function"))
4316 if(*cp == '&')
4317 cp = skip_spaces (cp+1);
4318 if(*cp != '\0')
4320 name = cp;
4321 while (!notinname (*cp))
4322 cp++;
4323 pfnote (savenstr (name, cp-name), TRUE,
4324 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4326 else
4327 search_identifier = TRUE;
4329 else if (LOOKING_AT (cp, "class"))
4331 if (*cp != '\0')
4333 name = cp;
4334 while (*cp != '\0' && !iswhite (*cp))
4335 cp++;
4336 pfnote (savenstr (name, cp-name), FALSE,
4337 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4339 else
4340 search_identifier = TRUE;
4342 else if (strneq (cp, "define", 6)
4343 && (cp = skip_spaces (cp+6))
4344 && *cp++ == '('
4345 && (*cp == '"' || *cp == '\''))
4347 char quote = *cp++;
4348 name = cp;
4349 while (*cp != quote && *cp != '\0')
4350 cp++;
4351 pfnote (savenstr (name, cp-name), FALSE,
4352 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4354 else if (members
4355 && LOOKING_AT (cp, "var")
4356 && *cp == '$')
4358 name = cp;
4359 while (!notinname(*cp))
4360 cp++;
4361 pfnote (savenstr (name, cp-name), FALSE,
4362 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4369 * Cobol tag functions
4370 * We could look for anything that could be a paragraph name.
4371 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4372 * Idea by Corny de Souza (1993)
4374 static void
4375 Cobol_paragraphs (inf)
4376 FILE *inf;
4378 register char *bp, *ep;
4380 LOOP_ON_INPUT_LINES (inf, lb, bp)
4382 if (lb.len < 9)
4383 continue;
4384 bp += 8;
4386 /* If eoln, compiler option or comment ignore whole line. */
4387 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4388 continue;
4390 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4391 continue;
4392 if (*ep++ == '.')
4393 pfnote (savenstr (bp, ep-bp), TRUE,
4394 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4400 * Makefile support
4401 * Idea by Assar Westerlund <assar@sics.se> (2001)
4403 static void
4404 Makefile_targets (inf)
4405 FILE *inf;
4407 register char *bp;
4409 LOOP_ON_INPUT_LINES (inf, lb, bp)
4411 if (*bp == '\t' || *bp == '#')
4412 continue;
4413 while (*bp != '\0' && *bp != '=' && *bp != ':')
4414 bp++;
4415 if (*bp == ':')
4416 pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4417 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4423 * Pascal parsing
4424 * Original code by Mosur K. Mohan (1989)
4426 * Locates tags for procedures & functions. Doesn't do any type- or
4427 * var-definitions. It does look for the keyword "extern" or
4428 * "forward" immediately following the procedure statement; if found,
4429 * the tag is skipped.
4431 static void
4432 Pascal_functions (inf)
4433 FILE *inf;
4435 linebuffer tline; /* mostly copied from C_entries */
4436 long save_lcno;
4437 int save_lineno, save_len;
4438 char c, *cp, *namebuf;
4440 bool /* each of these flags is TRUE iff: */
4441 incomment, /* point is inside a comment */
4442 inquote, /* point is inside '..' string */
4443 get_tagname, /* point is after PROCEDURE/FUNCTION
4444 keyword, so next item = potential tag */
4445 found_tag, /* point is after a potential tag */
4446 inparms, /* point is within parameter-list */
4447 verify_tag; /* point has passed the parm-list, so the
4448 next token will determine whether this
4449 is a FORWARD/EXTERN to be ignored, or
4450 whether it is a real tag */
4452 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4453 namebuf = NULL; /* keep compiler quiet */
4454 dbp = lb.buffer;
4455 *dbp = '\0';
4456 initbuffer (&tline);
4458 incomment = inquote = FALSE;
4459 found_tag = FALSE; /* have a proc name; check if extern */
4460 get_tagname = FALSE; /* have found "procedure" keyword */
4461 inparms = FALSE; /* found '(' after "proc" */
4462 verify_tag = FALSE; /* check if "extern" is ahead */
4465 while (!feof (inf)) /* long main loop to get next char */
4467 c = *dbp++;
4468 if (c == '\0') /* if end of line */
4470 readline (&lb, inf);
4471 dbp = lb.buffer;
4472 if (*dbp == '\0')
4473 continue;
4474 if (!((found_tag && verify_tag)
4475 || get_tagname))
4476 c = *dbp++; /* only if don't need *dbp pointing
4477 to the beginning of the name of
4478 the procedure or function */
4480 if (incomment)
4482 if (c == '}') /* within { } comments */
4483 incomment = FALSE;
4484 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4486 dbp++;
4487 incomment = FALSE;
4489 continue;
4491 else if (inquote)
4493 if (c == '\'')
4494 inquote = FALSE;
4495 continue;
4497 else
4498 switch (c)
4500 case '\'':
4501 inquote = TRUE; /* found first quote */
4502 continue;
4503 case '{': /* found open { comment */
4504 incomment = TRUE;
4505 continue;
4506 case '(':
4507 if (*dbp == '*') /* found open (* comment */
4509 incomment = TRUE;
4510 dbp++;
4512 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4513 inparms = TRUE;
4514 continue;
4515 case ')': /* end of parms list */
4516 if (inparms)
4517 inparms = FALSE;
4518 continue;
4519 case ';':
4520 if (found_tag && !inparms) /* end of proc or fn stmt */
4522 verify_tag = TRUE;
4523 break;
4525 continue;
4527 if (found_tag && verify_tag && (*dbp != ' '))
4529 /* check if this is an "extern" declaration */
4530 if (*dbp == '\0')
4531 continue;
4532 if (lowcase (*dbp == 'e'))
4534 if (nocase_tail ("extern")) /* superfluous, really! */
4536 found_tag = FALSE;
4537 verify_tag = FALSE;
4540 else if (lowcase (*dbp) == 'f')
4542 if (nocase_tail ("forward")) /* check for forward reference */
4544 found_tag = FALSE;
4545 verify_tag = FALSE;
4548 if (found_tag && verify_tag) /* not external proc, so make tag */
4550 found_tag = FALSE;
4551 verify_tag = FALSE;
4552 pfnote (namebuf, TRUE,
4553 tline.buffer, save_len, save_lineno, save_lcno);
4554 continue;
4557 if (get_tagname) /* grab name of proc or fn */
4559 if (*dbp == '\0')
4560 continue;
4562 /* save all values for later tagging */
4563 linebuffer_setlen (&tline, lb.len);
4564 strcpy (tline.buffer, lb.buffer);
4565 save_lineno = lineno;
4566 save_lcno = linecharno;
4568 /* grab block name */
4569 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4570 continue;
4571 namebuf = savenstr (dbp, cp-dbp);
4572 dbp = cp; /* set dbp to e-o-token */
4573 save_len = dbp - lb.buffer + 1;
4574 get_tagname = FALSE;
4575 found_tag = TRUE;
4576 continue;
4578 /* and proceed to check for "extern" */
4580 else if (!incomment && !inquote && !found_tag)
4582 /* check for proc/fn keywords */
4583 switch (lowcase (c))
4585 case 'p':
4586 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4587 get_tagname = TRUE;
4588 continue;
4589 case 'f':
4590 if (nocase_tail ("unction"))
4591 get_tagname = TRUE;
4592 continue;
4595 } /* while not eof */
4597 free (tline.buffer);
4602 * Lisp tag functions
4603 * look for (def or (DEF, quote or QUOTE
4606 static void L_getit __P((void));
4608 static void
4609 L_getit ()
4611 if (*dbp == '\'') /* Skip prefix quote */
4612 dbp++;
4613 else if (*dbp == '(')
4615 dbp++;
4616 /* Try to skip "(quote " */
4617 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4618 /* Ok, then skip "(" before name in (defstruct (foo)) */
4619 dbp = skip_spaces (dbp);
4621 get_tag (dbp);
4624 static void
4625 Lisp_functions (inf)
4626 FILE *inf;
4628 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4630 if (dbp[0] != '(')
4631 continue;
4633 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4635 dbp = skip_non_spaces (dbp);
4636 dbp = skip_spaces (dbp);
4637 L_getit ();
4639 else
4641 /* Check for (foo::defmumble name-defined ... */
4643 dbp++;
4644 while (!notinname (*dbp) && *dbp != ':');
4645 if (*dbp == ':')
4648 dbp++;
4649 while (*dbp == ':');
4651 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4653 dbp = skip_non_spaces (dbp);
4654 dbp = skip_spaces (dbp);
4655 L_getit ();
4664 * Postscript tag functions
4665 * Just look for lines where the first character is '/'
4666 * Also look at "defineps" for PSWrap
4667 * Ideas by:
4668 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4669 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4671 static void
4672 Postscript_functions (inf)
4673 FILE *inf;
4675 register char *bp, *ep;
4677 LOOP_ON_INPUT_LINES (inf, lb, bp)
4679 if (bp[0] == '/')
4681 for (ep = bp+1;
4682 *ep != '\0' && *ep != ' ' && *ep != '{';
4683 ep++)
4684 continue;
4685 pfnote (savenstr (bp, ep-bp), TRUE,
4686 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4688 else if (LOOKING_AT (bp, "defineps"))
4689 get_tag (bp);
4695 * Scheme tag functions
4696 * look for (def... xyzzy
4697 * (def... (xyzzy
4698 * (def ... ((...(xyzzy ....
4699 * (set! xyzzy
4700 * Original code by Ken Haase (1985?)
4703 static void
4704 Scheme_functions (inf)
4705 FILE *inf;
4707 register char *bp;
4709 LOOP_ON_INPUT_LINES (inf, lb, bp)
4711 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4713 bp = skip_non_spaces (bp+4);
4714 /* Skip over open parens and white space */
4715 while (notinname (*bp))
4716 bp++;
4717 get_tag (bp);
4719 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4720 get_tag (bp);
4725 /* Find tags in TeX and LaTeX input files. */
4727 /* TEX_toktab is a table of TeX control sequences that define tags.
4728 * Each entry records one such control sequence.
4730 * Original code from who knows whom.
4731 * Ideas by:
4732 * Stefan Monnier (2002)
4735 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4737 /* Default set of control sequences to put into TEX_toktab.
4738 The value of environment var TEXTAGS is prepended to this. */
4739 static char *TEX_defenv = "\
4740 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4741 :part:appendix:entry:index:def\
4742 :newcommand:renewcommand:newenvironment:renewenvironment";
4744 static void TEX_mode __P((FILE *));
4745 static void TEX_decode_env __P((char *, char *));
4747 static char TEX_esc = '\\';
4748 static char TEX_opgrp = '{';
4749 static char TEX_clgrp = '}';
4752 * TeX/LaTeX scanning loop.
4754 static void
4755 TeX_commands (inf)
4756 FILE *inf;
4758 char *cp;
4759 linebuffer *key;
4761 /* Select either \ or ! as escape character. */
4762 TEX_mode (inf);
4764 /* Initialize token table once from environment. */
4765 if (TEX_toktab == NULL)
4766 TEX_decode_env ("TEXTAGS", TEX_defenv);
4768 LOOP_ON_INPUT_LINES (inf, lb, cp)
4770 /* Look at each TEX keyword in line. */
4771 for (;;)
4773 /* Look for a TEX escape. */
4774 while (*cp++ != TEX_esc)
4775 if (cp[-1] == '\0' || cp[-1] == '%')
4776 goto tex_next_line;
4778 for (key = TEX_toktab; key->buffer != NULL; key++)
4779 if (strneq (cp, key->buffer, key->len))
4781 register char *p;
4782 char *name;
4783 int linelen;
4784 bool opgrp = FALSE;
4786 cp = skip_spaces (cp + key->len);
4787 if (*cp == TEX_opgrp)
4789 opgrp = TRUE;
4790 cp++;
4792 for (p = cp;
4793 (!iswhite (*p) && *p != '#' &&
4794 *p != TEX_opgrp && *p != TEX_clgrp);
4795 p++)
4796 continue;
4797 name = savenstr (cp, p-cp);
4798 linelen = lb.len;
4799 if (!opgrp || *p == TEX_clgrp)
4801 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4802 *p++;
4803 linelen = p - lb.buffer + 1;
4805 pfnote (name, TRUE, lb.buffer, linelen, lineno, linecharno);
4806 goto tex_next_line; /* We only tag a line once */
4809 tex_next_line:
4814 #define TEX_LESC '\\'
4815 #define TEX_SESC '!'
4817 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4818 chars accordingly. */
4819 static void
4820 TEX_mode (inf)
4821 FILE *inf;
4823 int c;
4825 while ((c = getc (inf)) != EOF)
4827 /* Skip to next line if we hit the TeX comment char. */
4828 if (c == '%')
4829 while (c != '\n')
4830 c = getc (inf);
4831 else if (c == TEX_LESC || c == TEX_SESC )
4832 break;
4835 if (c == TEX_LESC)
4837 TEX_esc = TEX_LESC;
4838 TEX_opgrp = '{';
4839 TEX_clgrp = '}';
4841 else
4843 TEX_esc = TEX_SESC;
4844 TEX_opgrp = '<';
4845 TEX_clgrp = '>';
4847 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4848 No attempt is made to correct the situation. */
4849 rewind (inf);
4852 /* Read environment and prepend it to the default string.
4853 Build token table. */
4854 static void
4855 TEX_decode_env (evarname, defenv)
4856 char *evarname;
4857 char *defenv;
4859 register char *env, *p;
4860 int i, len;
4862 /* Append default string to environment. */
4863 env = getenv (evarname);
4864 if (!env)
4865 env = defenv;
4866 else
4868 char *oldenv = env;
4869 env = concat (oldenv, defenv, "");
4872 /* Allocate a token table */
4873 for (len = 1, p = env; p;)
4874 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4875 len++;
4876 TEX_toktab = xnew (len, linebuffer);
4878 /* Unpack environment string into token table. Be careful about */
4879 /* zero-length strings (leading ':', "::" and trailing ':') */
4880 for (i = 0; *env != '\0';)
4882 p = etags_strchr (env, ':');
4883 if (!p) /* End of environment string. */
4884 p = env + strlen (env);
4885 if (p - env > 0)
4886 { /* Only non-zero strings. */
4887 TEX_toktab[i].buffer = savenstr (env, p - env);
4888 TEX_toktab[i].len = p - env;
4889 i++;
4891 if (*p)
4892 env = p + 1;
4893 else
4895 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
4896 TEX_toktab[i].len = 0;
4897 break;
4903 /* Texinfo support. Dave Love, Mar. 2000. */
4904 static void
4905 Texinfo_nodes (inf)
4906 FILE * inf;
4908 char *cp, *start;
4909 LOOP_ON_INPUT_LINES (inf, lb, cp)
4910 if (LOOKING_AT (cp, "@node"))
4912 start = cp;
4913 while (*cp != '\0' && *cp != ',')
4914 cp++;
4915 pfnote (savenstr (start, cp - start), TRUE,
4916 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4922 * Prolog support
4924 * Assumes that the predicate or rule starts at column 0.
4925 * Only the first clause of a predicate or rule is added.
4926 * Original code by Sunichirou Sugou (1989)
4927 * Rewritten by Anders Lindgren (1996)
4929 static int prolog_pr __P((char *, char *));
4930 static void prolog_skip_comment __P((linebuffer *, FILE *));
4931 static int prolog_atom __P((char *, int));
4933 static void
4934 Prolog_functions (inf)
4935 FILE *inf;
4937 char *cp, *last;
4938 int len;
4939 int allocated;
4941 allocated = 0;
4942 len = 0;
4943 last = NULL;
4945 LOOP_ON_INPUT_LINES (inf, lb, cp)
4947 if (cp[0] == '\0') /* Empty line */
4948 continue;
4949 else if (iswhite (cp[0])) /* Not a predicate */
4950 continue;
4951 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4952 prolog_skip_comment (&lb, inf);
4953 else if ((len = prolog_pr (cp, last)) > 0)
4955 /* Predicate or rule. Store the function name so that we
4956 only generate a tag for the first clause. */
4957 if (last == NULL)
4958 last = xnew(len + 1, char);
4959 else if (len + 1 > allocated)
4960 xrnew (last, len + 1, char);
4961 allocated = len + 1;
4962 strncpy (last, cp, len);
4963 last[len] = '\0';
4969 static void
4970 prolog_skip_comment (plb, inf)
4971 linebuffer *plb;
4972 FILE *inf;
4974 char *cp;
4978 for (cp = plb->buffer; *cp != '\0'; cp++)
4979 if (cp[0] == '*' && cp[1] == '/')
4980 return;
4981 readline (plb, inf);
4983 while (!feof(inf));
4987 * A predicate or rule definition is added if it matches:
4988 * <beginning of line><Prolog Atom><whitespace>(
4989 * or <beginning of line><Prolog Atom><whitespace>:-
4991 * It is added to the tags database if it doesn't match the
4992 * name of the previous clause header.
4994 * Return the size of the name of the predicate or rule, or 0 if no
4995 * header was found.
4997 static int
4998 prolog_pr (s, last)
4999 char *s;
5000 char *last; /* Name of last clause. */
5002 int pos;
5003 int len;
5005 pos = prolog_atom (s, 0);
5006 if (pos < 1)
5007 return 0;
5009 len = pos;
5010 pos = skip_spaces (s + pos) - s;
5012 if ((s[pos] == '.'
5013 || (s[pos] == '(' && (pos += 1))
5014 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5015 && (last == NULL /* save only the first clause */
5016 || len != strlen (last)
5017 || !strneq (s, last, len)))
5019 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
5020 return len;
5022 else
5023 return 0;
5027 * Consume a Prolog atom.
5028 * Return the number of bytes consumed, or -1 if there was an error.
5030 * A prolog atom, in this context, could be one of:
5031 * - An alphanumeric sequence, starting with a lower case letter.
5032 * - A quoted arbitrary string. Single quotes can escape themselves.
5033 * Backslash quotes everything.
5035 static int
5036 prolog_atom (s, pos)
5037 char *s;
5038 int pos;
5040 int origpos;
5042 origpos = pos;
5044 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5046 /* The atom is unquoted. */
5047 pos++;
5048 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5050 pos++;
5052 return pos - origpos;
5054 else if (s[pos] == '\'')
5056 pos++;
5058 for (;;)
5060 if (s[pos] == '\'')
5062 pos++;
5063 if (s[pos] != '\'')
5064 break;
5065 pos++; /* A double quote */
5067 else if (s[pos] == '\0')
5068 /* Multiline quoted atoms are ignored. */
5069 return -1;
5070 else if (s[pos] == '\\')
5072 if (s[pos+1] == '\0')
5073 return -1;
5074 pos += 2;
5076 else
5077 pos++;
5079 return pos - origpos;
5081 else
5082 return -1;
5087 * Support for Erlang
5089 * Generates tags for functions, defines, and records.
5090 * Assumes that Erlang functions start at column 0.
5091 * Original code by Anders Lindgren (1996)
5093 static int erlang_func __P((char *, char *));
5094 static void erlang_attribute __P((char *));
5095 static int erlang_atom __P((char *));
5097 static void
5098 Erlang_functions (inf)
5099 FILE *inf;
5101 char *cp, *last;
5102 int len;
5103 int allocated;
5105 allocated = 0;
5106 len = 0;
5107 last = NULL;
5109 LOOP_ON_INPUT_LINES (inf, lb, cp)
5111 if (cp[0] == '\0') /* Empty line */
5112 continue;
5113 else if (iswhite (cp[0])) /* Not function nor attribute */
5114 continue;
5115 else if (cp[0] == '%') /* comment */
5116 continue;
5117 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5118 continue;
5119 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5121 erlang_attribute (cp);
5122 last = NULL;
5124 else if ((len = erlang_func (cp, last)) > 0)
5127 * Function. Store the function name so that we only
5128 * generates a tag for the first clause.
5130 if (last == NULL)
5131 last = xnew (len + 1, char);
5132 else if (len + 1 > allocated)
5133 xrnew (last, len + 1, char);
5134 allocated = len + 1;
5135 strncpy (last, cp, len);
5136 last[len] = '\0';
5143 * A function definition is added if it matches:
5144 * <beginning of line><Erlang Atom><whitespace>(
5146 * It is added to the tags database if it doesn't match the
5147 * name of the previous clause header.
5149 * Return the size of the name of the function, or 0 if no function
5150 * was found.
5152 static int
5153 erlang_func (s, last)
5154 char *s;
5155 char *last; /* Name of last clause. */
5157 int pos;
5158 int len;
5160 pos = erlang_atom (s);
5161 if (pos < 1)
5162 return 0;
5164 len = pos;
5165 pos = skip_spaces (s + pos) - s;
5167 /* Save only the first clause. */
5168 if (s[pos++] == '('
5169 && (last == NULL
5170 || len != (int)strlen (last)
5171 || !strneq (s, last, len)))
5173 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
5174 return len;
5177 return 0;
5182 * Handle attributes. Currently, tags are generated for defines
5183 * and records.
5185 * They are on the form:
5186 * -define(foo, bar).
5187 * -define(Foo(M, N), M+N).
5188 * -record(graph, {vtab = notable, cyclic = true}).
5190 static void
5191 erlang_attribute (s)
5192 char *s;
5194 char *cp = s;
5196 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5197 && *cp++ == '(')
5199 int len = erlang_atom (skip_spaces (cp));
5200 if (len > 0)
5201 pfnote (savenstr (cp, len), TRUE,
5202 s, cp + len - s, lineno, linecharno);
5204 return;
5209 * Consume an Erlang atom (or variable).
5210 * Return the number of bytes consumed, or -1 if there was an error.
5212 static int
5213 erlang_atom (s)
5214 char *s;
5216 int pos = 0;
5218 if (ISALPHA (s[pos]) || s[pos] == '_')
5220 /* The atom is unquoted. */
5222 pos++;
5223 while (ISALNUM (s[pos]) || s[pos] == '_');
5225 else if (s[pos] == '\'')
5227 for (pos++; s[pos] != '\''; pos++)
5228 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5229 || (s[pos] == '\\' && s[++pos] == '\0'))
5230 return 0;
5231 pos++;
5234 return pos;
5238 #ifdef ETAGS_REGEXPS
5240 static char *scan_separators __P((char *));
5241 static void add_regex __P((char *, language *));
5242 static char *substitute __P((char *, char *, struct re_registers *));
5245 * Take a string like "/blah/" and turn it into "blah", verifying
5246 * that the first and last characters are the same, and handling
5247 * quoted separator characters. Actually, stops on the occurrence of
5248 * an unquoted separator. Also process \t, \n, etc. and turn into
5249 * appropriate characters. Works in place. Null terminates name string.
5250 * Returns pointer to terminating separator, or NULL for
5251 * unterminated regexps.
5253 static char *
5254 scan_separators (name)
5255 char *name;
5257 char sep = name[0];
5258 char *copyto = name;
5259 bool quoted = FALSE;
5261 for (++name; *name != '\0'; ++name)
5263 if (quoted)
5265 switch (*name)
5267 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5268 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5269 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5270 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5271 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5272 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5273 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5274 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5275 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5276 default:
5277 if (*name == sep)
5278 *copyto++ = sep;
5279 else
5281 /* Something else is quoted, so preserve the quote. */
5282 *copyto++ = '\\';
5283 *copyto++ = *name;
5285 break;
5287 quoted = FALSE;
5289 else if (*name == '\\')
5290 quoted = TRUE;
5291 else if (*name == sep)
5292 break;
5293 else
5294 *copyto++ = *name;
5296 if (*name != sep)
5297 name = NULL; /* signal unterminated regexp */
5299 /* Terminate copied string. */
5300 *copyto = '\0';
5301 return name;
5304 /* Look at the argument of --regex or --no-regex and do the right
5305 thing. Same for each line of a regexp file. */
5306 static void
5307 analyse_regex (regex_arg)
5308 char *regex_arg;
5310 if (regex_arg == NULL)
5312 free_patterns (); /* --no-regex: remove existing regexps */
5313 return;
5316 /* A real --regexp option or a line in a regexp file. */
5317 switch (regex_arg[0])
5319 /* Comments in regexp file or null arg to --regex. */
5320 case '\0':
5321 case ' ':
5322 case '\t':
5323 break;
5325 /* Read a regex file. This is recursive and may result in a
5326 loop, which will stop when the file descriptors are exhausted. */
5327 case '@':
5329 FILE *regexfp;
5330 linebuffer regexbuf;
5331 char *regexfile = regex_arg + 1;
5333 /* regexfile is a file containing regexps, one per line. */
5334 regexfp = fopen (regexfile, "r");
5335 if (regexfp == NULL)
5337 pfatal (regexfile);
5338 return;
5340 initbuffer (&regexbuf);
5341 while (readline_internal (&regexbuf, regexfp) > 0)
5342 analyse_regex (regexbuf.buffer);
5343 free (regexbuf.buffer);
5344 fclose (regexfp);
5346 break;
5348 /* Regexp to be used for a specific language only. */
5349 case '{':
5351 language *lang;
5352 char *lang_name = regex_arg + 1;
5353 char *cp;
5355 for (cp = lang_name; *cp != '}'; cp++)
5356 if (*cp == '\0')
5358 error ("unterminated language name in regex: %s", regex_arg);
5359 return;
5361 *cp++ = '\0';
5362 lang = get_language_from_langname (lang_name);
5363 if (lang == NULL)
5364 return;
5365 add_regex (cp, lang);
5367 break;
5369 /* Regexp to be used for any language. */
5370 default:
5371 add_regex (regex_arg, NULL);
5372 break;
5376 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5377 expression, into a real regular expression by compiling it. */
5378 static void
5379 add_regex (regexp_pattern, lang)
5380 char *regexp_pattern;
5381 language *lang;
5383 static struct re_pattern_buffer zeropattern;
5384 char sep, *pat, *name, *modifiers;
5385 const char *err;
5386 struct re_pattern_buffer *patbuf;
5387 pattern *pp;
5388 bool ignore_case, multi_line, single_line;
5391 if (strlen(regexp_pattern) < 3)
5393 error ("null regexp", (char *)NULL);
5394 return;
5396 sep = regexp_pattern[0];
5397 name = scan_separators (regexp_pattern);
5398 if (name == NULL)
5400 error ("%s: unterminated regexp", regexp_pattern);
5401 return;
5403 if (name[1] == sep)
5405 error ("null name for regexp \"%s\"", regexp_pattern);
5406 return;
5408 modifiers = scan_separators (name);
5409 if (modifiers == NULL) /* no terminating separator --> no name */
5411 modifiers = name;
5412 name = "";
5414 else
5415 modifiers += 1; /* skip separator */
5417 /* Parse regex modifiers. */
5418 ignore_case = FALSE; /* case is significant */
5419 multi_line = FALSE; /* matches are done one line at a time */
5420 single_line = FALSE; /* dot does not match newline */
5421 for (; modifiers[0] != '\0'; modifiers++)
5422 switch (modifiers[0])
5424 case 'i':
5425 ignore_case = TRUE;
5426 break;
5427 case 's':
5428 single_line = TRUE;
5429 /* FALLTHRU */
5430 case 'm':
5431 multi_line = TRUE;
5432 need_filebuf = TRUE;
5433 break;
5434 default:
5435 modifiers[1] = '\0';
5436 error ("invalid regexp modifier `%s'", modifiers);
5437 return;
5440 patbuf = xnew (1, struct re_pattern_buffer);
5441 *patbuf = zeropattern;
5442 if (ignore_case)
5444 static char lc_trans[CHARS];
5445 int i;
5446 for (i = 0; i < CHARS; i++)
5447 lc_trans[i] = lowcase (i);
5448 patbuf->translate = lc_trans; /* translation table to fold case */
5451 if (multi_line)
5452 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5453 else
5454 pat = regexp_pattern;
5456 if (single_line)
5457 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5458 else
5459 re_set_syntax (RE_SYNTAX_EMACS);
5461 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5462 if (multi_line)
5463 free (pat);
5464 if (err != NULL)
5466 error ("%s while compiling pattern", err);
5467 return;
5470 pp = p_head;
5471 p_head = xnew (1, pattern);
5472 p_head->regex = savestr (regexp_pattern);
5473 p_head->p_next = pp;
5474 p_head->lang = lang;
5475 p_head->pat = patbuf;
5476 p_head->name_pattern = savestr (name);
5477 p_head->error_signaled = FALSE;
5478 p_head->ignore_case = ignore_case;
5479 p_head->multi_line = multi_line;
5483 * Do the substitutions indicated by the regular expression and
5484 * arguments.
5486 static char *
5487 substitute (in, out, regs)
5488 char *in, *out;
5489 struct re_registers *regs;
5491 char *result, *t;
5492 int size, dig, diglen;
5494 result = NULL;
5495 size = strlen (out);
5497 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5498 if (out[size - 1] == '\\')
5499 fatal ("pattern error in \"%s\"", out);
5500 for (t = etags_strchr (out, '\\');
5501 t != NULL;
5502 t = etags_strchr (t + 2, '\\'))
5503 if (ISDIGIT (t[1]))
5505 dig = t[1] - '0';
5506 diglen = regs->end[dig] - regs->start[dig];
5507 size += diglen - 2;
5509 else
5510 size -= 1;
5512 /* Allocate space and do the substitutions. */
5513 result = xnew (size + 1, char);
5515 for (t = result; *out != '\0'; out++)
5516 if (*out == '\\' && ISDIGIT (*++out))
5518 dig = *out - '0';
5519 diglen = regs->end[dig] - regs->start[dig];
5520 strncpy (t, in + regs->start[dig], diglen);
5521 t += diglen;
5523 else
5524 *t++ = *out;
5525 *t = '\0';
5527 assert (t <= result + size && t - result == (int)strlen (result));
5529 return result;
5532 /* Deallocate all patterns. */
5533 static void
5534 free_patterns ()
5536 pattern *pp;
5537 while (p_head != NULL)
5539 pp = p_head->p_next;
5540 free (p_head->regex);
5541 free (p_head->name_pattern);
5542 free (p_head);
5543 p_head = pp;
5545 return;
5549 * Reads the whole file as a single string from `filebuf' and looks for
5550 * multi-line regular expressions, creating tags on matches.
5551 * readline already dealt with normal regexps.
5553 * Idea by Ben Wing <ben@666.com> (2002).
5555 static void
5556 regex_tag_multiline ()
5558 char *buffer = filebuf.buffer;
5559 pattern *pp;
5561 for (pp = p_head; pp != NULL; pp = pp->p_next)
5563 int match = 0;
5565 if (!pp->multi_line)
5566 continue; /* skip normal regexps */
5568 /* Generic initialisations before parsing file from memory. */
5569 lineno = 1; /* reset global line number */
5570 charno = 0; /* reset global char number */
5571 linecharno = 0; /* reset global char number of line start */
5573 /* Only use generic regexps or those for the current language. */
5574 if (pp->lang != NULL && pp->lang != curfdp->lang)
5575 continue;
5577 while (match >= 0 && match < filebuf.len)
5579 match = re_search (pp->pat, buffer, filebuf.len, charno,
5580 filebuf.len - match, &pp->regs);
5581 switch (match)
5583 case -2:
5584 /* Some error. */
5585 if (!pp->error_signaled)
5587 error ("regexp stack overflow while matching \"%s\"",
5588 pp->regex);
5589 pp->error_signaled = TRUE;
5591 break;
5592 case -1:
5593 /* No match. */
5594 break;
5595 default:
5596 if (match == pp->regs.end[0])
5598 if (!pp->error_signaled)
5600 error ("regexp matches the empty string: \"%s\"",
5601 pp->regex);
5602 pp->error_signaled = TRUE;
5604 match = -3; /* exit from while loop */
5605 break;
5608 /* Match occurred. Construct a tag. */
5609 while (charno < pp->regs.end[0])
5610 if (buffer[charno++] == '\n')
5611 lineno++, linecharno = charno;
5612 if (pp->name_pattern[0] != '\0')
5614 /* Make a named tag. */
5615 char *name = substitute (buffer,
5616 pp->name_pattern, &pp->regs);
5617 if (name != NULL)
5618 pfnote (name, TRUE, buffer + linecharno,
5619 charno - linecharno + 1, lineno, linecharno);
5621 else
5623 /* Make an unnamed tag. */
5624 pfnote ((char *)NULL, TRUE, buffer + linecharno,
5625 charno - linecharno + 1, lineno, linecharno);
5627 break;
5633 #endif /* ETAGS_REGEXPS */
5636 static bool
5637 nocase_tail (cp)
5638 char *cp;
5640 register int len = 0;
5642 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5643 cp++, len++;
5644 if (*cp == '\0' && !intoken (dbp[len]))
5646 dbp += len;
5647 return TRUE;
5649 return FALSE;
5652 static char *
5653 get_tag (bp)
5654 register char *bp;
5656 register char *cp, *name;
5658 if (*bp == '\0')
5659 return NULL;
5660 /* Go till you get to white space or a syntactic break */
5661 for (cp = bp + 1; !notinname (*cp); cp++)
5662 continue;
5663 name = savenstr (bp, cp-bp);
5664 pfnote (name, TRUE,
5665 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5666 return name;
5669 /* Initialize a linebuffer for use */
5670 static void
5671 initbuffer (lbp)
5672 linebuffer *lbp;
5674 lbp->size = (DEBUG) ? 3 : 200;
5675 lbp->buffer = xnew (lbp->size, char);
5676 lbp->buffer[0] = '\0';
5677 lbp->len = 0;
5681 * Read a line of text from `stream' into `lbp', excluding the
5682 * newline or CR-NL, if any. Return the number of characters read from
5683 * `stream', which is the length of the line including the newline.
5685 * On DOS or Windows we do not count the CR character, if any before the
5686 * NL, in the returned length; this mirrors the behavior of Emacs on those
5687 * platforms (for text files, it translates CR-NL to NL as it reads in the
5688 * file).
5690 * If multi-line regular expressions are requested, each line read is
5691 * appended to `filebuf'.
5693 static long
5694 readline_internal (lbp, stream)
5695 linebuffer *lbp;
5696 register FILE *stream;
5698 char *buffer = lbp->buffer;
5699 register char *p = lbp->buffer;
5700 register char *pend;
5701 int chars_deleted;
5703 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5705 for (;;)
5707 register int c = getc (stream);
5708 if (p == pend)
5710 /* We're at the end of linebuffer: expand it. */
5711 lbp->size *= 2;
5712 xrnew (buffer, lbp->size, char);
5713 p += buffer - lbp->buffer;
5714 pend = buffer + lbp->size;
5715 lbp->buffer = buffer;
5717 if (c == EOF)
5719 *p = '\0';
5720 chars_deleted = 0;
5721 break;
5723 if (c == '\n')
5725 if (p > buffer && p[-1] == '\r')
5727 p -= 1;
5728 #ifdef DOS_NT
5729 /* Assume CRLF->LF translation will be performed by Emacs
5730 when loading this file, so CRs won't appear in the buffer.
5731 It would be cleaner to compensate within Emacs;
5732 however, Emacs does not know how many CRs were deleted
5733 before any given point in the file. */
5734 chars_deleted = 1;
5735 #else
5736 chars_deleted = 2;
5737 #endif
5739 else
5741 chars_deleted = 1;
5743 *p = '\0';
5744 break;
5746 *p++ = c;
5748 lbp->len = p - buffer;
5750 if (need_filebuf /* we need filebuf for multi-line regexps */
5751 && chars_deleted > 0) /* not at EOF */
5753 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
5755 /* Expand filebuf. */
5756 filebuf.size *= 2;
5757 xrnew (filebuf.buffer, filebuf.size, char);
5759 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
5760 filebuf.len += lbp->len;
5761 filebuf.buffer[filebuf.len++] = '\n';
5762 filebuf.buffer[filebuf.len] = '\0';
5765 return lbp->len + chars_deleted;
5769 * Like readline_internal, above, but in addition try to match the
5770 * input line against relevant regular expressions and manage #line
5771 * directives.
5773 static void
5774 readline (lbp, stream)
5775 linebuffer *lbp;
5776 FILE *stream;
5778 long result;
5780 linecharno = charno; /* update global char number of line start */
5781 result = readline_internal (lbp, stream); /* read line */
5782 lineno += 1; /* increment global line number */
5783 charno += result; /* increment global char number */
5785 /* Honour #line directives. */
5786 if (!no_line_directive)
5788 static bool discard_until_line_directive;
5790 /* Check whether this is a #line directive. */
5791 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
5793 int start, lno;
5795 if (DEBUG) start = 0; /* shut up the compiler */
5796 if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
5798 char *endp = lbp->buffer + start;
5800 assert (start > 0);
5801 while ((endp = etags_strchr (endp, '"')) != NULL
5802 && endp[-1] == '\\')
5803 endp++;
5804 if (endp != NULL)
5805 /* Ok, this is a real #line directive. Let's deal with it. */
5807 char *taggedabsname; /* absolute name of original file */
5808 char *taggedfname; /* name of original file as given */
5809 char *name; /* temp var */
5811 discard_until_line_directive = FALSE; /* found it */
5812 name = lbp->buffer + start;
5813 *endp = '\0';
5814 canonicalize_filename (name); /* for DOS */
5815 taggedabsname = absolute_filename (name, curfdp->infabsdir);
5816 if (filename_is_absolute (name)
5817 || filename_is_absolute (curfdp->infname))
5818 taggedfname = savestr (taggedabsname);
5819 else
5820 taggedfname = relative_filename (taggedabsname,tagfiledir);
5822 if (streq (curfdp->taggedfname, taggedfname))
5823 /* The #line directive is only a line number change. We
5824 deal with this afterwards. */
5825 free (taggedfname);
5826 else
5827 /* The tags following this #line directive should be
5828 attributed to taggedfname. In order to do this, set
5829 curfdp accordingly. */
5831 fdesc *fdp; /* file description pointer */
5833 /* Go look for a file description already set up for the
5834 file indicated in the #line directive. If there is
5835 one, use it from now until the next #line
5836 directive. */
5837 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
5838 if (streq (fdp->infname, curfdp->infname)
5839 && streq (fdp->taggedfname, taggedfname))
5840 /* If we remove the second test above (after the &&)
5841 then all entries pertaining to the same file are
5842 coalesced in the tags file. If we use it, then
5843 entries pertaining to the same file but generated
5844 from different files (via #line directives) will
5845 go into separate sections in the tags file. These
5846 alternatives look equivalent. The first one
5847 destroys some apparently useless information. */
5849 curfdp = fdp;
5850 free (taggedfname);
5851 break;
5853 /* Else, if we already tagged the real file, skip all
5854 input lines until the next #line directive. */
5855 if (fdp == NULL) /* not found */
5856 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
5857 if (streq (fdp->infabsname, taggedabsname))
5859 discard_until_line_directive = TRUE;
5860 free (taggedfname);
5861 break;
5863 /* Else create a new file description and use that from
5864 now on, until the next #line directive. */
5865 if (fdp == NULL) /* not found */
5867 fdp = fdhead;
5868 fdhead = xnew (1, fdesc);
5869 *fdhead = *curfdp; /* copy curr. file description */
5870 fdhead->next = fdp;
5871 fdhead->infname = savestr (curfdp->infname);
5872 fdhead->infabsname = savestr (curfdp->infabsname);
5873 fdhead->infabsdir = savestr (curfdp->infabsdir);
5874 fdhead->taggedfname = taggedfname;
5875 fdhead->usecharno = FALSE;
5876 curfdp = fdhead;
5879 free (taggedabsname);
5880 lineno = lno - 1;
5881 readline (lbp, stream);
5882 return;
5883 } /* if a real #line directive */
5884 } /* if #line is followed by a a number */
5885 } /* if line begins with "#line " */
5887 /* If we are here, no #line directive was found. */
5888 if (discard_until_line_directive)
5890 if (result > 0)
5892 /* Do a tail recursion on ourselves, thus discarding the contents
5893 of the line buffer. */
5894 readline (lbp, stream);
5895 return;
5897 /* End of file. */
5898 discard_until_line_directive = FALSE;
5899 return;
5901 } /* if #line directives should be considered */
5903 #ifdef ETAGS_REGEXPS
5905 int match;
5906 pattern *pp;
5908 /* Match against relevant patterns. */
5909 if (lbp->len > 0)
5910 for (pp = p_head; pp != NULL; pp = pp->p_next)
5912 /* Only use generic regexps or those for the current language.
5913 Also do not use multiline regexps, which is the job of
5914 regex_tag_multiline. */
5915 if ((pp->lang != NULL && pp->lang != fdhead->lang)
5916 || pp->multi_line)
5917 continue;
5919 match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs);
5920 switch (match)
5922 case -2:
5923 /* Some error. */
5924 if (!pp->error_signaled)
5926 error ("regexp stack overflow while matching \"%s\"",
5927 pp->regex);
5928 pp->error_signaled = TRUE;
5930 break;
5931 case -1:
5932 /* No match. */
5933 break;
5934 case 0:
5935 /* Empty string matched. */
5936 if (!pp->error_signaled)
5938 error ("regexp matches the empty string: \"%s\"",
5939 pp->regex);
5940 pp->error_signaled = TRUE;
5942 break;
5943 default:
5944 /* Match occurred. Construct a tag. */
5945 if (pp->name_pattern[0] != '\0')
5947 /* Make a named tag. */
5948 char *name = substitute (lbp->buffer,
5949 pp->name_pattern, &pp->regs);
5950 if (name != NULL)
5951 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5953 else
5955 /* Make an unnamed tag. */
5956 pfnote ((char *)NULL, TRUE,
5957 lbp->buffer, match, lineno, linecharno);
5959 break;
5963 #endif /* ETAGS_REGEXPS */
5968 * Return a pointer to a space of size strlen(cp)+1 allocated
5969 * with xnew where the string CP has been copied.
5971 static char *
5972 savestr (cp)
5973 char *cp;
5975 return savenstr (cp, strlen (cp));
5979 * Return a pointer to a space of size LEN+1 allocated with xnew where
5980 * the string CP has been copied for at most the first LEN characters.
5982 static char *
5983 savenstr (cp, len)
5984 char *cp;
5985 int len;
5987 register char *dp;
5989 dp = xnew (len + 1, char);
5990 strncpy (dp, cp, len);
5991 dp[len] = '\0';
5992 return dp;
5996 * Return the ptr in sp at which the character c last
5997 * appears; NULL if not found
5999 * Identical to POSIX strrchr, included for portability.
6001 static char *
6002 etags_strrchr (sp, c)
6003 register const char *sp;
6004 register int c;
6006 register const char *r;
6008 r = NULL;
6011 if (*sp == c)
6012 r = sp;
6013 } while (*sp++);
6014 return (char *)r;
6018 * Return the ptr in sp at which the character c first
6019 * appears; NULL if not found
6021 * Identical to POSIX strchr, included for portability.
6023 static char *
6024 etags_strchr (sp, c)
6025 register const char *sp;
6026 register int c;
6030 if (*sp == c)
6031 return (char *)sp;
6032 } while (*sp++);
6033 return NULL;
6037 * Return TRUE if the two strings are equal, ignoring case for alphabetic
6038 * characters.
6040 * Analogous to BSD's strcasecmp, included for portability.
6042 static bool
6043 strcaseeq (s1, s2)
6044 register const char *s1;
6045 register const char *s2;
6047 while (*s1 != '\0'
6048 && (ISALPHA (*s1) && ISALPHA (*s2)
6049 ? lowcase (*s1) == lowcase (*s2)
6050 : *s1 == *s2))
6051 s1++, s2++;
6053 return (*s1 == *s2);
6056 /* Skip spaces, return new pointer. */
6057 static char *
6058 skip_spaces (cp)
6059 char *cp;
6061 while (iswhite (*cp))
6062 cp++;
6063 return cp;
6066 /* Skip non spaces, return new pointer. */
6067 static char *
6068 skip_non_spaces (cp)
6069 char *cp;
6071 while (*cp != '\0' && !iswhite (*cp))
6072 cp++;
6073 return cp;
6076 /* Print error message and exit. */
6077 void
6078 fatal (s1, s2)
6079 char *s1, *s2;
6081 error (s1, s2);
6082 exit (BAD);
6085 static void
6086 pfatal (s1)
6087 char *s1;
6089 perror (s1);
6090 exit (BAD);
6093 static void
6094 suggest_asking_for_help ()
6096 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6097 progname,
6098 #ifdef LONG_OPTIONS
6099 "--help"
6100 #else
6101 "-h"
6102 #endif
6104 exit (BAD);
6107 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6108 static void
6109 error (s1, s2)
6110 const char *s1, *s2;
6112 fprintf (stderr, "%s: ", progname);
6113 fprintf (stderr, s1, s2);
6114 fprintf (stderr, "\n");
6117 /* Return a newly-allocated string whose contents
6118 concatenate those of s1, s2, s3. */
6119 static char *
6120 concat (s1, s2, s3)
6121 char *s1, *s2, *s3;
6123 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6124 char *result = xnew (len1 + len2 + len3 + 1, char);
6126 strcpy (result, s1);
6127 strcpy (result + len1, s2);
6128 strcpy (result + len1 + len2, s3);
6129 result[len1 + len2 + len3] = '\0';
6131 return result;
6135 /* Does the same work as the system V getcwd, but does not need to
6136 guess the buffer size in advance. */
6137 static char *
6138 etags_getcwd ()
6140 #ifdef HAVE_GETCWD
6141 int bufsize = 200;
6142 char *path = xnew (bufsize, char);
6144 while (getcwd (path, bufsize) == NULL)
6146 if (errno != ERANGE)
6147 pfatal ("getcwd");
6148 bufsize *= 2;
6149 free (path);
6150 path = xnew (bufsize, char);
6153 canonicalize_filename (path);
6154 return path;
6156 #else /* not HAVE_GETCWD */
6157 #if MSDOS
6159 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6161 getwd (path);
6163 for (p = path; *p != '\0'; p++)
6164 if (*p == '\\')
6165 *p = '/';
6166 else
6167 *p = lowcase (*p);
6169 return strdup (path);
6170 #else /* not MSDOS */
6171 linebuffer path;
6172 FILE *pipe;
6174 initbuffer (&path);
6175 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6176 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6177 pfatal ("pwd");
6178 pclose (pipe);
6180 return path.buffer;
6181 #endif /* not MSDOS */
6182 #endif /* not HAVE_GETCWD */
6185 /* Return a newly allocated string containing the file name of FILE
6186 relative to the absolute directory DIR (which should end with a slash). */
6187 static char *
6188 relative_filename (file, dir)
6189 char *file, *dir;
6191 char *fp, *dp, *afn, *res;
6192 int i;
6194 /* Find the common root of file and dir (with a trailing slash). */
6195 afn = absolute_filename (file, cwd);
6196 fp = afn;
6197 dp = dir;
6198 while (*fp++ == *dp++)
6199 continue;
6200 fp--, dp--; /* back to the first differing char */
6201 #ifdef DOS_NT
6202 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6203 return afn;
6204 #endif
6205 do /* look at the equal chars until '/' */
6206 fp--, dp--;
6207 while (*fp != '/');
6209 /* Build a sequence of "../" strings for the resulting relative file name. */
6210 i = 0;
6211 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6212 i += 1;
6213 res = xnew (3*i + strlen (fp + 1) + 1, char);
6214 res[0] = '\0';
6215 while (i-- > 0)
6216 strcat (res, "../");
6218 /* Add the file name relative to the common root of file and dir. */
6219 strcat (res, fp + 1);
6220 free (afn);
6222 return res;
6225 /* Return a newly allocated string containing the absolute file name
6226 of FILE given DIR (which should end with a slash). */
6227 static char *
6228 absolute_filename (file, dir)
6229 char *file, *dir;
6231 char *slashp, *cp, *res;
6233 if (filename_is_absolute (file))
6234 res = savestr (file);
6235 #ifdef DOS_NT
6236 /* We don't support non-absolute file names with a drive
6237 letter, like `d:NAME' (it's too much hassle). */
6238 else if (file[1] == ':')
6239 fatal ("%s: relative file names with drive letters not supported", file);
6240 #endif
6241 else
6242 res = concat (dir, file, "");
6244 /* Delete the "/dirname/.." and "/." substrings. */
6245 slashp = etags_strchr (res, '/');
6246 while (slashp != NULL && slashp[0] != '\0')
6248 if (slashp[1] == '.')
6250 if (slashp[2] == '.'
6251 && (slashp[3] == '/' || slashp[3] == '\0'))
6253 cp = slashp;
6255 cp--;
6256 while (cp >= res && !filename_is_absolute (cp));
6257 if (cp < res)
6258 cp = slashp; /* the absolute name begins with "/.." */
6259 #ifdef DOS_NT
6260 /* Under MSDOS and NT we get `d:/NAME' as absolute
6261 file name, so the luser could say `d:/../NAME'.
6262 We silently treat this as `d:/NAME'. */
6263 else if (cp[0] != '/')
6264 cp = slashp;
6265 #endif
6266 strcpy (cp, slashp + 3);
6267 slashp = cp;
6268 continue;
6270 else if (slashp[2] == '/' || slashp[2] == '\0')
6272 strcpy (slashp, slashp + 2);
6273 continue;
6277 slashp = etags_strchr (slashp + 1, '/');
6280 if (res[0] == '\0')
6281 return savestr ("/");
6282 else
6283 return res;
6286 /* Return a newly allocated string containing the absolute
6287 file name of dir where FILE resides given DIR (which should
6288 end with a slash). */
6289 static char *
6290 absolute_dirname (file, dir)
6291 char *file, *dir;
6293 char *slashp, *res;
6294 char save;
6296 canonicalize_filename (file);
6297 slashp = etags_strrchr (file, '/');
6298 if (slashp == NULL)
6299 return savestr (dir);
6300 save = slashp[1];
6301 slashp[1] = '\0';
6302 res = absolute_filename (file, dir);
6303 slashp[1] = save;
6305 return res;
6308 /* Whether the argument string is an absolute file name. The argument
6309 string must have been canonicalized with canonicalize_filename. */
6310 static bool
6311 filename_is_absolute (fn)
6312 char *fn;
6314 return (fn[0] == '/'
6315 #ifdef DOS_NT
6316 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6317 #endif
6321 /* Translate backslashes into slashes. Works in place. */
6322 static void
6323 canonicalize_filename (fn)
6324 register char *fn;
6326 #ifdef DOS_NT
6327 /* Canonicalize drive letter case. */
6328 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6329 fn[0] = upcase (fn[0]);
6330 /* Convert backslashes to slashes. */
6331 for (; *fn != '\0'; fn++)
6332 if (*fn == '\\')
6333 *fn = '/';
6334 #else
6335 /* No action. */
6336 fn = NULL; /* shut up the compiler */
6337 #endif
6340 /* Set the minimum size of a string contained in a linebuffer. */
6341 static void
6342 linebuffer_setlen (lbp, toksize)
6343 linebuffer *lbp;
6344 int toksize;
6346 while (lbp->size <= toksize)
6348 lbp->size *= 2;
6349 xrnew (lbp->buffer, lbp->size, char);
6351 lbp->len = toksize;
6354 /* Like malloc but get fatal error if memory is exhausted. */
6355 static PTR
6356 xmalloc (size)
6357 unsigned int size;
6359 PTR result = (PTR) malloc (size);
6360 if (result == NULL)
6361 fatal ("virtual memory exhausted", (char *)NULL);
6362 return result;
6365 static PTR
6366 xrealloc (ptr, size)
6367 char *ptr;
6368 unsigned int size;
6370 PTR result = (PTR) realloc (ptr, size);
6371 if (result == NULL)
6372 fatal ("virtual memory exhausted", (char *)NULL);
6373 return result;
6377 * Local Variables:
6378 * c-indentation-style: gnu
6379 * indent-tabs-mode: t
6380 * tab-width: 8
6381 * fill-column: 79
6382 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "pattern")
6383 * End: