Use AREF, ASET and ASIZE macros.
[emacs.git] / lib-src / etags.c
blob4a24bbfa375446a5fb4d2dde66a2163f10ce678e
1 /* Tags file maker to go with GNU Emacs
2 Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99, 2000
3 Free Software Foundation, Inc. and Ken Arnold
5 This file is not considered part of GNU Emacs.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * Sam Kendall added C++.
28 * Francesco Potorti` reorganised C and C++ based on work by Joe Wells.
29 * Regexp tags by Tom Tromey.
31 * Francesco Potorti` (pot@gnu.org) is the current maintainer.
34 char pot_etags_version[] = "@(#) pot revision number is 13.44";
36 #define TRUE 1
37 #define FALSE 0
39 #ifndef DEBUG
40 # define DEBUG FALSE
41 #endif
43 #if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
44 # define P_(proto) proto
45 #else
46 # define P_(proto) ()
47 #endif
49 #ifdef HAVE_CONFIG_H
50 # include <config.h>
51 /* On some systems, Emacs defines static as nothing for the sake
52 of unexec. We don't want that here since we don't use unexec. */
53 # undef static
54 # define ETAGS_REGEXPS /* use the regexp features */
55 # define LONG_OPTIONS /* accept long options */
56 #endif /* HAVE_CONFIG_H */
58 #ifndef _GNU_SOURCE
59 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
60 #endif
62 #ifdef MSDOS
63 # undef MSDOS
64 # define MSDOS TRUE
65 # include <fcntl.h>
66 # include <sys/param.h>
67 # include <io.h>
68 # ifndef HAVE_CONFIG_H
69 # define DOS_NT
70 # include <sys/config.h>
71 # endif
72 #else
73 # define MSDOS FALSE
74 #endif /* MSDOS */
76 #ifdef WINDOWSNT
77 # include <stdlib.h>
78 # include <fcntl.h>
79 # include <string.h>
80 # include <direct.h>
81 # include <io.h>
82 # define MAXPATHLEN _MAX_PATH
83 # ifdef HAVE_CONFIG_H
84 # undef HAVE_NTGUI
85 # else
86 # define DOS_NT
87 # endif /* not HAVE_CONFIG_H */
88 # ifndef HAVE_GETCWD
89 # define HAVE_GETCWD
90 # endif /* undef HAVE_GETCWD */
91 #else /* !WINDOWSNT */
92 # ifdef STDC_HEADERS
93 # include <stdlib.h>
94 # include <string.h>
95 # else
96 extern char *getenv ();
97 # endif
98 #endif /* !WINDOWSNT */
100 #ifdef HAVE_UNISTD_H
101 # include <unistd.h>
102 #else
103 # if defined (HAVE_GETCWD) && !WINDOWSNT
104 extern char *getcwd (char *buf, size_t size);
105 # endif
106 #endif /* HAVE_UNISTD_H */
108 #include <stdio.h>
109 #include <ctype.h>
110 #include <errno.h>
111 #ifndef errno
112 extern int errno;
113 #endif
114 #include <sys/types.h>
115 #include <sys/stat.h>
117 #if !defined (S_ISREG) && defined (S_IFREG)
118 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
119 #endif
121 #ifdef LONG_OPTIONS
122 # include <getopt.h>
123 #else
124 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
125 extern char *optarg;
126 extern int optind, opterr;
127 #endif /* LONG_OPTIONS */
129 #ifdef ETAGS_REGEXPS
130 # include <regex.h>
131 #endif /* ETAGS_REGEXPS */
133 /* Define CTAGS to make the program "ctags" compatible with the usual one.
134 Leave it undefined to make the program "etags", which makes emacs-style
135 tag tables and tags typedefs, #defines and struct/union/enum by default. */
136 #ifdef CTAGS
137 # undef CTAGS
138 # define CTAGS TRUE
139 #else
140 # define CTAGS FALSE
141 #endif
143 /* Exit codes for success and failure. */
144 #ifdef VMS
145 # define GOOD 1
146 # define BAD 0
147 #else
148 # define GOOD 0
149 # define BAD 1
150 #endif
152 /* C extensions. */
153 #define C_PLPL 0x00001 /* C++ */
154 #define C_STAR 0x00003 /* C* */
155 #define C_JAVA 0x00005 /* JAVA */
156 #define YACC 0x10000 /* yacc file */
158 #define streq(s,t) ((DEBUG && (s) == NULL && (t) == NULL \
159 && (abort (), 1)) || !strcmp (s, t))
160 #define strneq(s,t,n) ((DEBUG && (s) == NULL && (t) == NULL \
161 && (abort (), 1)) || !strncmp (s, t, n))
163 #define lowcase(c) tolower ((char)c)
165 #define CHARS 256 /* 2^sizeof(char) */
166 #define CHAR(x) ((unsigned int)x & (CHARS - 1))
167 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
168 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
169 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
170 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
171 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
175 * xnew, xrnew -- allocate, reallocate storage
177 * SYNOPSIS: Type *xnew (int n, Type);
178 * Type *xrnew (OldPointer, int n, Type);
180 #ifdef chkmalloc
181 # include "chkmalloc.h"
182 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
183 (n) * sizeof (Type)))
184 # define xrnew(op,n,Type) ((Type *) trace_realloc (__FILE__, __LINE__, \
185 (op), (n) * sizeof (Type)))
186 #else
187 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
188 # define xrnew(op,n,Type) ((Type *) xrealloc ((op), (n) * sizeof (Type)))
189 #endif
191 typedef int bool;
193 typedef void Lang_function P_((FILE *));
195 typedef struct
197 char *suffix;
198 char *command; /* Takes one arg and decompresses to stdout */
199 } compressor;
201 typedef struct
203 char *name;
204 Lang_function *function;
205 char **suffixes;
206 char **interpreters;
207 } language;
209 typedef struct node_st
210 { /* sorting structure */
211 char *name; /* function or type name */
212 char *file; /* file name */
213 bool is_func; /* use pattern or line no */
214 bool been_warned; /* set if noticed dup */
215 int lno; /* line number tag is on */
216 long cno; /* character number line starts on */
217 char *pat; /* search pattern */
218 struct node_st *left, *right; /* left and right sons */
219 } node;
222 * A `linebuffer' is a structure which holds a line of text.
223 * `readline_internal' reads a line from a stream into a linebuffer
224 * and works regardless of the length of the line.
225 * SIZE is the size of BUFFER, LEN is the length of the string in
226 * BUFFER after readline reads it.
228 typedef struct
230 long size;
231 int len;
232 char *buffer;
233 } linebuffer;
235 /* Many compilers barf on this:
236 Lang_function Ada_funcs;
237 so let's write it this way */
238 static void Ada_funcs P_((FILE *));
239 static void Asm_labels P_((FILE *));
240 static void C_entries P_((int c_ext, FILE *));
241 static void default_C_entries P_((FILE *));
242 static void plain_C_entries P_((FILE *));
243 static void Cjava_entries P_((FILE *));
244 static void Cobol_paragraphs P_((FILE *));
245 static void Cplusplus_entries P_((FILE *));
246 static void Cstar_entries P_((FILE *));
247 static void Erlang_functions P_((FILE *));
248 static void Fortran_functions P_((FILE *));
249 static void Yacc_entries P_((FILE *));
250 static void Lisp_functions P_((FILE *));
251 static void Pascal_functions P_((FILE *));
252 static void Perl_functions P_((FILE *));
253 static void Postscript_functions P_((FILE *));
254 static void Prolog_functions P_((FILE *));
255 static void Python_functions P_((FILE *));
256 static void Scheme_functions P_((FILE *));
257 static void TeX_functions P_((FILE *));
258 static void Texinfo_functions P_ ((FILE *));
259 static void just_read_file P_((FILE *));
261 static void print_language_names P_((void));
262 static void print_version P_((void));
263 static void print_help P_((void));
264 int main P_((int, char **));
265 static int number_len P_((long));
267 static compressor *get_compressor_from_suffix P_((char *, char **));
268 static language *get_language_from_name P_((char *));
269 static language *get_language_from_interpreter P_((char *));
270 static language *get_language_from_suffix P_((char *));
271 static int total_size_of_entries P_((node *));
272 static long readline P_((linebuffer *, FILE *));
273 static long readline_internal P_((linebuffer *, FILE *));
274 static void get_tag P_((char *));
276 #ifdef ETAGS_REGEXPS
277 static void analyse_regex P_((char *, bool));
278 static void add_regex P_((char *, bool, language *));
279 static void free_patterns P_((void));
280 #endif /* ETAGS_REGEXPS */
281 static void error P_((const char *, const char *));
282 static void suggest_asking_for_help P_((void));
283 static void fatal P_((char *, char *));
284 static void pfatal P_((char *));
285 static void add_node P_((node *, node **));
287 static void init P_((void));
288 static void initbuffer P_((linebuffer *));
289 static void find_entries P_((char *, FILE *));
290 static void free_tree P_((node *));
291 static void pfnote P_((char *, bool, char *, int, int, long));
292 static void new_pfnote P_((char *, int, bool, char *, int, int, long));
293 static void process_file P_((char *));
294 static void put_entries P_((node *));
295 static void takeprec P_((void));
297 static char *concat P_((char *, char *, char *));
298 static char *skip_spaces P_((char *));
299 static char *skip_non_spaces P_((char *));
300 static char *savenstr P_((char *, int));
301 static char *savestr P_((char *));
302 static char *etags_strchr P_((const char *, int));
303 static char *etags_strrchr P_((const char *, int));
304 static char *etags_getcwd P_((void));
305 static char *relative_filename P_((char *, char *));
306 static char *absolute_filename P_((char *, char *));
307 static char *absolute_dirname P_((char *, char *));
308 static bool filename_is_absolute P_((char *f));
309 static void canonicalize_filename P_((char *));
310 static void grow_linebuffer P_((linebuffer *, int));
311 long *xmalloc P_((unsigned int));
312 long *xrealloc P_((char *, unsigned int));
315 char searchar = '/'; /* use /.../ searches */
317 char *tagfile; /* output file */
318 char *progname; /* name this program was invoked with */
319 char *cwd; /* current working directory */
320 char *tagfiledir; /* directory of tagfile */
321 FILE *tagf; /* ioptr for tags file */
323 char *curfile; /* current input file name */
324 language *curlang; /* current language */
326 int lineno; /* line number of current line */
327 long charno; /* current character number */
328 long linecharno; /* charno of start of current line */
329 char *dbp; /* pointer to start of current tag */
331 node *head; /* the head of the binary tree of tags */
333 linebuffer lb; /* the current line */
334 linebuffer token_name; /* used by C_entries as a temporary area */
335 struct
337 long linepos;
338 linebuffer lb; /* used by C_entries instead of lb */
339 } lbs[2];
341 /* boolean "functions" (see init) */
342 bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
343 char
344 /* white chars */
345 *white = " \f\t\n\r\v",
346 /* not in a name */
347 *nonam = " \f\t\n\r(=,[;",
348 /* token ending chars */
349 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
350 /* token starting chars */
351 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
352 /* valid in-token chars */
353 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
355 bool append_to_tagfile; /* -a: append to tags */
356 /* The following four default to TRUE for etags, but to FALSE for ctags. */
357 bool typedefs; /* -t: create tags for C and Ada typedefs */
358 bool typedefs_and_cplusplus; /* -T: create tags for C typedefs, level */
359 /* 0 struct/enum/union decls, and C++ */
360 /* member functions. */
361 bool constantypedefs; /* -d: create tags for C #define, enum */
362 /* constants and variables. */
363 /* -D: opposite of -d. Default under ctags. */
364 bool declarations; /* --declarations: tag them and extern in C&Co*/
365 bool globals; /* create tags for global variables */
366 bool members; /* create tags for C member variables */
367 bool update; /* -u: update tags */
368 bool vgrind_style; /* -v: create vgrind style index output */
369 bool no_warnings; /* -w: suppress warnings */
370 bool cxref_style; /* -x: create cxref style output */
371 bool cplusplus; /* .[hc] means C++, not C */
372 bool noindentypedefs; /* -I: ignore indentation in C */
373 bool packages_only; /* --packages-only: in Ada, only tag packages*/
375 #ifdef LONG_OPTIONS
376 struct option longopts[] =
378 { "packages-only", no_argument, &packages_only, TRUE },
379 { "append", no_argument, NULL, 'a' },
380 { "backward-search", no_argument, NULL, 'B' },
381 { "c++", no_argument, NULL, 'C' },
382 { "cxref", no_argument, NULL, 'x' },
383 { "defines", no_argument, NULL, 'd' },
384 { "declarations", no_argument, &declarations, TRUE },
385 { "no-defines", no_argument, NULL, 'D' },
386 { "globals", no_argument, &globals, TRUE },
387 { "no-globals", no_argument, &globals, FALSE },
388 { "help", no_argument, NULL, 'h' },
389 { "help", no_argument, NULL, 'H' },
390 { "ignore-indentation", no_argument, NULL, 'I' },
391 { "include", required_argument, NULL, 'i' },
392 { "language", required_argument, NULL, 'l' },
393 { "members", no_argument, &members, TRUE },
394 { "no-members", no_argument, &members, FALSE },
395 { "no-warn", no_argument, NULL, 'w' },
396 { "output", required_argument, NULL, 'o' },
397 #ifdef ETAGS_REGEXPS
398 { "regex", required_argument, NULL, 'r' },
399 { "no-regex", no_argument, NULL, 'R' },
400 { "ignore-case-regex", required_argument, NULL, 'c' },
401 #endif /* ETAGS_REGEXPS */
402 { "typedefs", no_argument, NULL, 't' },
403 { "typedefs-and-c++", no_argument, NULL, 'T' },
404 { "update", no_argument, NULL, 'u' },
405 { "version", no_argument, NULL, 'V' },
406 { "vgrind", no_argument, NULL, 'v' },
407 { NULL }
409 #endif /* LONG_OPTIONS */
411 #ifdef ETAGS_REGEXPS
412 /* Structure defining a regular expression. Elements are
413 the compiled pattern, and the name string. */
414 typedef struct pattern
416 struct pattern *p_next;
417 language *language;
418 char *regex;
419 struct re_pattern_buffer *pattern;
420 struct re_registers regs;
421 char *name_pattern;
422 bool error_signaled;
423 } pattern;
425 /* List of all regexps. */
426 pattern *p_head = NULL;
428 /* How many characters in the character set. (From regex.c.) */
429 #define CHAR_SET_SIZE 256
430 /* Translation table for case-insensitive matching. */
431 char lc_trans[CHAR_SET_SIZE];
432 #endif /* ETAGS_REGEXPS */
434 compressor compressors[] =
436 { "z", "gzip -d -c"},
437 { "Z", "gzip -d -c"},
438 { "gz", "gzip -d -c"},
439 { "GZ", "gzip -d -c"},
440 { "bz2", "bzip2 -d -c" },
441 { NULL }
445 * Language stuff.
448 /* Non-NULL if language fixed. */
449 language *forced_lang = NULL;
451 /* Ada code */
452 char *Ada_suffixes [] =
453 { "ads", "adb", "ada", NULL };
455 /* Assembly code */
456 char *Asm_suffixes [] = { "a", /* Unix assembler */
457 "asm", /* Microcontroller assembly */
458 "def", /* BSO/Tasking definition includes */
459 "inc", /* Microcontroller include files */
460 "ins", /* Microcontroller include files */
461 "s", "sa", /* Unix assembler */
462 "S", /* cpp-processed Unix assembler */
463 "src", /* BSO/Tasking C compiler output */
464 NULL
467 /* Note that .c and .h can be considered C++, if the --c++ flag was
468 given. That is why default_C_entries is called here. */
469 char *default_C_suffixes [] =
470 { "c", "h", NULL };
472 char *Cplusplus_suffixes [] =
473 { "C", "H", "c++", "cc", "cpp", "cxx", "h++", "hh", "hpp", "hxx",
474 "M", /* Objective C++ */
475 "pdb", /* Postscript with C syntax */
476 NULL };
478 char *Cjava_suffixes [] =
479 { "java", NULL };
481 char *Cobol_suffixes [] =
482 { "COB", "cob", NULL };
484 char *Cstar_suffixes [] =
485 { "cs", "hs", NULL };
487 char *Erlang_suffixes [] =
488 { "erl", "hrl", NULL };
490 char *Fortran_suffixes [] =
491 { "F", "f", "f90", "for", NULL };
493 char *Lisp_suffixes [] =
494 { "cl", "clisp", "el", "l", "lisp", "lsp", "ml", "LSP", NULL };
496 char *Pascal_suffixes [] =
497 { "p", "pas", NULL };
499 char *Perl_suffixes [] =
500 { "pl", "pm", NULL };
501 char *Perl_interpreters [] =
502 { "perl", "@PERL@", NULL };
504 char *plain_C_suffixes [] =
505 { "pc", /* Pro*C file */
506 "m", /* Objective C file */
507 "lm", /* Objective lex file */
508 NULL };
510 char *Postscript_suffixes [] =
511 { "ps", "psw", NULL }; /* .psw is for PSWrap */
513 char *Prolog_suffixes [] =
514 { "prolog", NULL };
516 char *Python_suffixes [] =
517 { "py", NULL };
519 /* Can't do the `SCM' or `scm' prefix with a version number. */
520 char *Scheme_suffixes [] =
521 { "SCM", "SM", "oak", "sch", "scheme", "scm", "sm", "ss", "t", NULL };
523 char *TeX_suffixes [] =
524 { "TeX", "bib", "clo", "cls", "ltx", "sty", "tex", NULL };
526 char *Texinfo_suffixes [] =
527 { "texi", "txi", "texinfo", NULL };
529 char *Yacc_suffixes [] =
530 { "y", "ym", "yy", "yxx", "y++", NULL }; /* .ym is Objective yacc file */
533 * Table of languages.
535 * It is ok for a given function to be listed under more than one
536 * name. I just didn't.
539 language lang_names [] =
541 { "ada", Ada_funcs, Ada_suffixes, NULL },
542 { "asm", Asm_labels, Asm_suffixes, NULL },
543 { "c", default_C_entries, default_C_suffixes, NULL },
544 { "c++", Cplusplus_entries, Cplusplus_suffixes, NULL },
545 { "c*", Cstar_entries, Cstar_suffixes, NULL },
546 { "cobol", Cobol_paragraphs, Cobol_suffixes, NULL },
547 { "erlang", Erlang_functions, Erlang_suffixes, NULL },
548 { "fortran", Fortran_functions, Fortran_suffixes, NULL },
549 { "java", Cjava_entries, Cjava_suffixes, NULL },
550 { "lisp", Lisp_functions, Lisp_suffixes, NULL },
551 { "pascal", Pascal_functions, Pascal_suffixes, NULL },
552 { "perl", Perl_functions, Perl_suffixes, Perl_interpreters },
553 { "postscript", Postscript_functions, Postscript_suffixes, NULL },
554 { "proc", plain_C_entries, plain_C_suffixes, NULL },
555 { "prolog", Prolog_functions, Prolog_suffixes, NULL },
556 { "python", Python_functions, Python_suffixes, NULL },
557 { "scheme", Scheme_functions, Scheme_suffixes, NULL },
558 { "tex", TeX_functions, TeX_suffixes, NULL },
559 { "texinfo", Texinfo_functions, Texinfo_suffixes, NULL },
560 { "yacc", Yacc_entries, Yacc_suffixes, NULL },
561 { "auto", NULL }, /* default guessing scheme */
562 { "none", just_read_file }, /* regexp matching only */
563 { NULL, NULL } /* end of list */
566 static void
567 print_language_names ()
569 language *lang;
570 char **ext;
572 puts ("\nThese are the currently supported languages, along with the\n\
573 default file name suffixes:");
574 for (lang = lang_names; lang->name != NULL; lang++)
576 printf ("\t%s\t", lang->name);
577 if (lang->suffixes != NULL)
578 for (ext = lang->suffixes; *ext != NULL; ext++)
579 printf (" .%s", *ext);
580 puts ("");
582 puts ("Where `auto' means use default language for files based on file\n\
583 name suffix, and `none' means only do regexp processing on files.\n\
584 If no language is specified and no matching suffix is found,\n\
585 the first line of the file is read for a sharp-bang (#!) sequence\n\
586 followed by the name of an interpreter. If no such sequence is found,\n\
587 Fortran is tried first; if no tags are found, C is tried next.\n\
588 Compressed files are supported using gzip and bzip2.");
591 #ifndef EMACS_NAME
592 # define EMACS_NAME "GNU Emacs"
593 #endif
594 #ifndef VERSION
595 # define VERSION "21"
596 #endif
597 static void
598 print_version ()
600 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
601 puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
602 puts ("This program is distributed under the same terms as Emacs");
604 exit (GOOD);
607 static void
608 print_help ()
610 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
612 These are the options accepted by %s.\n", progname, progname);
613 #ifdef LONG_OPTIONS
614 puts ("You may use unambiguous abbreviations for the long option names.");
615 #else
616 puts ("Long option names do not work with this executable, as it is not\n\
617 linked with GNU getopt.");
618 #endif /* LONG_OPTIONS */
619 puts ("A - as file name means read names from stdin (one per line).");
620 if (!CTAGS)
621 printf (" Absolute names are stored in the output file as they are.\n\
622 Relative ones are stored relative to the output file's directory.");
623 puts ("\n");
625 puts ("-a, --append\n\
626 Append tag entries to existing tags file.");
628 puts ("--packages-only\n\
629 For Ada files, only generate tags for packages .");
631 if (CTAGS)
632 puts ("-B, --backward-search\n\
633 Write the search commands for the tag entries using '?', the\n\
634 backward-search command instead of '/', the forward-search command.");
636 puts ("-C, --c++\n\
637 Treat files whose name suffix defaults to C language as C++ files.");
639 puts ("--declarations\n\
640 In C and derived languages, create tags for function declarations,");
641 if (CTAGS)
642 puts ("\tand create tags for extern variables if --globals is used.");
643 else
644 puts
645 ("\tand create tags for extern variables unless --no-globals is used.");
647 if (CTAGS)
648 puts ("-d, --defines\n\
649 Create tag entries for C #define constants and enum constants, too.");
650 else
651 puts ("-D, --no-defines\n\
652 Don't create tag entries for C #define constants and enum constants.\n\
653 This makes the tags file smaller.");
655 if (!CTAGS)
657 puts ("-i FILE, --include=FILE\n\
658 Include a note in tag file indicating that, when searching for\n\
659 a tag, one should also consult the tags file FILE after\n\
660 checking the current file.");
661 puts ("-l LANG, --language=LANG\n\
662 Force the following files to be considered as written in the\n\
663 named language up to the next --language=LANG option.");
666 if (CTAGS)
667 puts ("--globals\n\
668 Create tag entries for global variables in some languages.");
669 else
670 puts ("--no-globals\n\
671 Do not create tag entries for global variables in some\n\
672 languages. This makes the tags file smaller.");
673 puts ("--members\n\
674 Create tag entries for member variables in C and derived languages.");
676 #ifdef ETAGS_REGEXPS
677 puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
678 Make a tag for each line matching pattern REGEXP in the following\n\
679 files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
680 regexfile is a file containing one REGEXP per line.\n\
681 REGEXP is anchored (as if preceded by ^).\n\
682 The form /REGEXP/NAME/ creates a named tag.\n\
683 For example Tcl named tags can be created with:\n\
684 --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
685 puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
686 Like -r, --regex but ignore case when matching expressions.");
687 puts ("-R, --no-regex\n\
688 Don't create tags from regexps for the following files.");
689 #endif /* ETAGS_REGEXPS */
690 puts ("-o FILE, --output=FILE\n\
691 Write the tags to FILE.");
692 puts ("-I, --ignore-indentation\n\
693 Don't rely on indentation quite as much as normal. Currently,\n\
694 this means not to assume that a closing brace in the first\n\
695 column is the final brace of a function or structure\n\
696 definition in C and C++.");
698 if (CTAGS)
700 puts ("-t, --typedefs\n\
701 Generate tag entries for C and Ada typedefs.");
702 puts ("-T, --typedefs-and-c++\n\
703 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
704 and C++ member functions.");
705 puts ("-u, --update\n\
706 Update the tag entries for the given files, leaving tag\n\
707 entries for other files in place. Currently, this is\n\
708 implemented by deleting the existing entries for the given\n\
709 files and then rewriting the new entries at the end of the\n\
710 tags file. It is often faster to simply rebuild the entire\n\
711 tag file than to use this.");
712 puts ("-v, --vgrind\n\
713 Generates an index of items intended for human consumption,\n\
714 similar to the output of vgrind. The index is sorted, and\n\
715 gives the page number of each item.");
716 puts ("-w, --no-warn\n\
717 Suppress warning messages about entries defined in multiple\n\
718 files.");
719 puts ("-x, --cxref\n\
720 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
721 The output uses line numbers instead of page numbers, but\n\
722 beyond that the differences are cosmetic; try both to see\n\
723 which you like.");
726 puts ("-V, --version\n\
727 Print the version of the program.\n\
728 -h, --help\n\
729 Print this help message.");
731 print_language_names ();
733 puts ("");
734 puts ("Report bugs to bug-gnu-emacs@gnu.org");
736 exit (GOOD);
740 enum argument_type
742 at_language,
743 at_regexp,
744 at_filename,
745 at_icregexp
748 /* This structure helps us allow mixing of --lang and file names. */
749 typedef struct
751 enum argument_type arg_type;
752 char *what;
753 language *lang; /* language of the regexp */
754 } argument;
756 #ifdef VMS /* VMS specific functions */
758 #define EOS '\0'
760 /* This is a BUG! ANY arbitrary limit is a BUG!
761 Won't someone please fix this? */
762 #define MAX_FILE_SPEC_LEN 255
763 typedef struct {
764 short curlen;
765 char body[MAX_FILE_SPEC_LEN + 1];
766 } vspec;
769 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
770 returning in each successive call the next file name matching the input
771 spec. The function expects that each in_spec passed
772 to it will be processed to completion; in particular, up to and
773 including the call following that in which the last matching name
774 is returned, the function ignores the value of in_spec, and will
775 only start processing a new spec with the following call.
776 If an error occurs, on return out_spec contains the value
777 of in_spec when the error occurred.
779 With each successive file name returned in out_spec, the
780 function's return value is one. When there are no more matching
781 names the function returns zero. If on the first call no file
782 matches in_spec, or there is any other error, -1 is returned.
785 #include <rmsdef.h>
786 #include <descrip.h>
787 #define OUTSIZE MAX_FILE_SPEC_LEN
788 static short
789 fn_exp (out, in)
790 vspec *out;
791 char *in;
793 static long context = 0;
794 static struct dsc$descriptor_s o;
795 static struct dsc$descriptor_s i;
796 static bool pass1 = TRUE;
797 long status;
798 short retval;
800 if (pass1)
802 pass1 = FALSE;
803 o.dsc$a_pointer = (char *) out;
804 o.dsc$w_length = (short)OUTSIZE;
805 i.dsc$a_pointer = in;
806 i.dsc$w_length = (short)strlen(in);
807 i.dsc$b_dtype = DSC$K_DTYPE_T;
808 i.dsc$b_class = DSC$K_CLASS_S;
809 o.dsc$b_dtype = DSC$K_DTYPE_VT;
810 o.dsc$b_class = DSC$K_CLASS_VS;
812 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
814 out->body[out->curlen] = EOS;
815 return 1;
817 else if (status == RMS$_NMF)
818 retval = 0;
819 else
821 strcpy(out->body, in);
822 retval = -1;
824 lib$find_file_end(&context);
825 pass1 = TRUE;
826 return retval;
830 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
831 name of each file specified by the provided arg expanding wildcards.
833 static char *
834 gfnames (arg, p_error)
835 char *arg;
836 bool *p_error;
838 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
840 switch (fn_exp (&filename, arg))
842 case 1:
843 *p_error = FALSE;
844 return filename.body;
845 case 0:
846 *p_error = FALSE;
847 return NULL;
848 default:
849 *p_error = TRUE;
850 return filename.body;
854 #ifndef OLD /* Newer versions of VMS do provide `system'. */
855 system (cmd)
856 char *cmd;
858 error ("%s", "system() function not implemented under VMS");
860 #endif
862 #define VERSION_DELIM ';'
863 char *massage_name (s)
864 char *s;
866 char *start = s;
868 for ( ; *s; s++)
869 if (*s == VERSION_DELIM)
871 *s = EOS;
872 break;
874 else
875 *s = lowcase (*s);
876 return start;
878 #endif /* VMS */
882 main (argc, argv)
883 int argc;
884 char *argv[];
886 int i;
887 unsigned int nincluded_files;
888 char **included_files;
889 char *this_file;
890 argument *argbuffer;
891 int current_arg, file_count;
892 linebuffer filename_lb;
893 #ifdef VMS
894 bool got_err;
895 #endif
897 #ifdef DOS_NT
898 _fmode = O_BINARY; /* all of files are treated as binary files */
899 #endif /* DOS_NT */
901 progname = argv[0];
902 nincluded_files = 0;
903 included_files = xnew (argc, char *);
904 current_arg = 0;
905 file_count = 0;
907 /* Allocate enough no matter what happens. Overkill, but each one
908 is small. */
909 argbuffer = xnew (argc, argument);
911 #ifdef ETAGS_REGEXPS
912 /* Set syntax for regular expression routines. */
913 re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
914 /* Translation table for case-insensitive search. */
915 for (i = 0; i < CHAR_SET_SIZE; i++)
916 lc_trans[i] = lowcase (i);
917 #endif /* ETAGS_REGEXPS */
920 * If etags, always find typedefs and structure tags. Why not?
921 * Also default is to find macro constants, enum constants and
922 * global variables.
924 if (!CTAGS)
926 typedefs = typedefs_and_cplusplus = constantypedefs = TRUE;
927 globals = TRUE;
928 members = FALSE;
931 while (1)
933 int opt;
934 char *optstring;
936 #ifdef ETAGS_REGEXPS
937 optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
938 #else
939 optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
940 #endif /* ETAGS_REGEXPS */
942 #ifndef LONG_OPTIONS
943 optstring = optstring + 1;
944 #endif /* LONG_OPTIONS */
946 opt = getopt_long (argc, argv, optstring, longopts, 0);
947 if (opt == EOF)
948 break;
950 switch (opt)
952 case 0:
953 /* If getopt returns 0, then it has already processed a
954 long-named option. We should do nothing. */
955 break;
957 case 1:
958 /* This means that a file name has been seen. Record it. */
959 argbuffer[current_arg].arg_type = at_filename;
960 argbuffer[current_arg].what = optarg;
961 ++current_arg;
962 ++file_count;
963 break;
965 /* Common options. */
966 case 'a': append_to_tagfile = TRUE; break;
967 case 'C': cplusplus = TRUE; break;
968 case 'd': constantypedefs = TRUE; break;
969 case 'D': constantypedefs = FALSE; break;
970 case 'f': /* for compatibility with old makefiles */
971 case 'o':
972 if (tagfile)
974 error ("-o option may only be given once.", (char *)NULL);
975 suggest_asking_for_help ();
977 tagfile = optarg;
978 break;
979 case 'I':
980 case 'S': /* for backward compatibility */
981 noindentypedefs = TRUE;
982 break;
983 case 'l':
985 language *lang = get_language_from_name (optarg);
986 if (lang != NULL)
988 argbuffer[current_arg].lang = lang;
989 argbuffer[current_arg].arg_type = at_language;
990 ++current_arg;
993 break;
994 #ifdef ETAGS_REGEXPS
995 case 'r':
996 argbuffer[current_arg].arg_type = at_regexp;
997 argbuffer[current_arg].what = optarg;
998 ++current_arg;
999 break;
1000 case 'R':
1001 argbuffer[current_arg].arg_type = at_regexp;
1002 argbuffer[current_arg].what = NULL;
1003 ++current_arg;
1004 break;
1005 case 'c':
1006 argbuffer[current_arg].arg_type = at_icregexp;
1007 argbuffer[current_arg].what = optarg;
1008 ++current_arg;
1009 break;
1010 #endif /* ETAGS_REGEXPS */
1011 case 'V':
1012 print_version ();
1013 break;
1014 case 'h':
1015 case 'H':
1016 print_help ();
1017 break;
1018 case 't':
1019 typedefs = TRUE;
1020 break;
1021 case 'T':
1022 typedefs = typedefs_and_cplusplus = TRUE;
1023 break;
1024 #if (!CTAGS)
1025 /* Etags options */
1026 case 'i':
1027 included_files[nincluded_files++] = optarg;
1028 break;
1029 #else /* CTAGS */
1030 /* Ctags options. */
1031 case 'B': searchar = '?'; break;
1032 case 'u': update = TRUE; break;
1033 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1034 case 'x': cxref_style = TRUE; break;
1035 case 'w': no_warnings = TRUE; break;
1036 #endif /* CTAGS */
1037 default:
1038 suggest_asking_for_help ();
1042 for (; optind < argc; ++optind)
1044 argbuffer[current_arg].arg_type = at_filename;
1045 argbuffer[current_arg].what = argv[optind];
1046 ++current_arg;
1047 ++file_count;
1050 if (nincluded_files == 0 && file_count == 0)
1052 error ("no input files specified.", (char *)NULL);
1053 suggest_asking_for_help ();
1056 if (tagfile == NULL)
1057 tagfile = CTAGS ? "tags" : "TAGS";
1058 cwd = etags_getcwd (); /* the current working directory */
1059 if (cwd[strlen (cwd) - 1] != '/')
1061 char *oldcwd = cwd;
1062 cwd = concat (oldcwd, "/", "");
1063 free (oldcwd);
1065 if (streq (tagfile, "-"))
1066 tagfiledir = cwd;
1067 else
1068 tagfiledir = absolute_dirname (tagfile, cwd);
1070 init (); /* set up boolean "functions" */
1072 initbuffer (&lb);
1073 initbuffer (&token_name);
1074 initbuffer (&lbs[0].lb);
1075 initbuffer (&lbs[1].lb);
1076 initbuffer (&filename_lb);
1078 if (!CTAGS)
1080 if (streq (tagfile, "-"))
1082 tagf = stdout;
1083 #ifdef DOS_NT
1084 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1085 doesn't take effect until after `stdout' is already open). */
1086 if (!isatty (fileno (stdout)))
1087 setmode (fileno (stdout), O_BINARY);
1088 #endif /* DOS_NT */
1090 else
1091 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1092 if (tagf == NULL)
1093 pfatal (tagfile);
1097 * Loop through files finding functions.
1099 for (i = 0; i < current_arg; ++i)
1101 switch (argbuffer[i].arg_type)
1103 case at_language:
1104 forced_lang = argbuffer[i].lang;
1105 break;
1106 #ifdef ETAGS_REGEXPS
1107 case at_regexp:
1108 analyse_regex (argbuffer[i].what, FALSE);
1109 break;
1110 case at_icregexp:
1111 analyse_regex (argbuffer[i].what, TRUE);
1112 break;
1113 #endif
1114 case at_filename:
1115 #ifdef VMS
1116 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1118 if (got_err)
1120 error ("can't find file %s\n", this_file);
1121 argc--, argv++;
1123 else
1125 this_file = massage_name (this_file);
1127 #else
1128 this_file = argbuffer[i].what;
1129 #endif
1130 /* Input file named "-" means read file names from stdin
1131 (one per line) and use them. */
1132 if (streq (this_file, "-"))
1133 while (readline_internal (&filename_lb, stdin) > 0)
1134 process_file (filename_lb.buffer);
1135 else
1136 process_file (this_file);
1137 #ifdef VMS
1139 #endif
1140 break;
1144 #ifdef ETAGS_REGEXPS
1145 free_patterns ();
1146 #endif /* ETAGS_REGEXPS */
1148 if (!CTAGS)
1150 while (nincluded_files-- > 0)
1151 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1153 fclose (tagf);
1154 exit (GOOD);
1157 /* If CTAGS, we are here. process_file did not write the tags yet,
1158 because we want them ordered. Let's do it now. */
1159 if (cxref_style)
1161 put_entries (head);
1162 free_tree (head);
1163 head = NULL;
1164 exit (GOOD);
1167 if (update)
1169 char cmd[BUFSIZ];
1170 for (i = 0; i < current_arg; ++i)
1172 if (argbuffer[i].arg_type != at_filename)
1173 continue;
1174 sprintf (cmd,
1175 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1176 tagfile, argbuffer[i].what, tagfile);
1177 if (system (cmd) != GOOD)
1178 fatal ("failed to execute shell command", (char *)NULL);
1180 append_to_tagfile = TRUE;
1183 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1184 if (tagf == NULL)
1185 pfatal (tagfile);
1186 put_entries (head);
1187 free_tree (head);
1188 head = NULL;
1189 fclose (tagf);
1191 if (update)
1193 char cmd[BUFSIZ];
1194 sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
1195 exit (system (cmd));
1197 return GOOD;
1203 * Return a compressor given the file name. If EXTPTR is non-zero,
1204 * return a pointer into FILE where the compressor-specific
1205 * extension begins. If no compressor is found, NULL is returned
1206 * and EXTPTR is not significant.
1207 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca>
1209 static compressor *
1210 get_compressor_from_suffix (file, extptr)
1211 char *file;
1212 char **extptr;
1214 compressor *compr;
1215 char *slash, *suffix;
1217 /* This relies on FN to be after canonicalize_filename,
1218 so we don't need to consider backslashes on DOS_NT. */
1219 slash = etags_strrchr (file, '/');
1220 suffix = etags_strrchr (file, '.');
1221 if (suffix == NULL || suffix < slash)
1222 return NULL;
1223 if (extptr != NULL)
1224 *extptr = suffix;
1225 suffix += 1;
1226 /* Let those poor souls who live with DOS 8+3 file name limits get
1227 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1228 Only the first do loop is run if not MSDOS */
1231 for (compr = compressors; compr->suffix != NULL; compr++)
1232 if (streq (compr->suffix, suffix))
1233 return compr;
1234 if (!MSDOS)
1235 break; /* do it only once: not really a loop */
1236 if (extptr != NULL)
1237 *extptr = ++suffix;
1238 } while (*suffix != '\0');
1239 return NULL;
1245 * Return a language given the name.
1247 static language *
1248 get_language_from_name (name)
1249 char *name;
1251 language *lang;
1253 if (name == NULL)
1254 error ("empty language name", (char *)NULL);
1255 else
1257 for (lang = lang_names; lang->name != NULL; lang++)
1258 if (streq (name, lang->name))
1259 return lang;
1260 error ("unknown language \"%s\"", name);
1263 return NULL;
1268 * Return a language given the interpreter name.
1270 static language *
1271 get_language_from_interpreter (interpreter)
1272 char *interpreter;
1274 language *lang;
1275 char **iname;
1277 if (interpreter == NULL)
1278 return NULL;
1279 for (lang = lang_names; lang->name != NULL; lang++)
1280 if (lang->interpreters != NULL)
1281 for (iname = lang->interpreters; *iname != NULL; iname++)
1282 if (streq (*iname, interpreter))
1283 return lang;
1285 return NULL;
1291 * Return a language given the file name.
1293 static language *
1294 get_language_from_suffix (file)
1295 char *file;
1297 language *lang;
1298 char **ext, *suffix;
1300 suffix = etags_strrchr (file, '.');
1301 if (suffix == NULL)
1302 return NULL;
1303 suffix += 1;
1304 for (lang = lang_names; lang->name != NULL; lang++)
1305 if (lang->suffixes != NULL)
1306 for (ext = lang->suffixes; *ext != NULL; ext++)
1307 if (streq (*ext, suffix))
1308 return lang;
1309 return NULL;
1315 * This routine is called on each file argument.
1317 static void
1318 process_file (file)
1319 char *file;
1321 struct stat stat_buf;
1322 FILE *inf;
1323 compressor *compr;
1324 char *compressed_name, *uncompressed_name;
1325 char *ext, *real_name;
1328 canonicalize_filename (file);
1329 if (streq (file, tagfile) && !streq (tagfile, "-"))
1331 error ("skipping inclusion of %s in self.", file);
1332 return;
1334 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1336 compressed_name = NULL;
1337 real_name = uncompressed_name = savestr (file);
1339 else
1341 real_name = compressed_name = savestr (file);
1342 uncompressed_name = savenstr (file, ext - file);
1345 /* If the canonicalised uncompressed name has already be dealt with,
1346 skip it silently, else add it to the list. */
1348 typedef struct processed_file
1350 char *filename;
1351 struct processed_file *next;
1352 } processed_file;
1353 static processed_file *pf_head = NULL;
1354 register processed_file *fnp;
1356 for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1357 if (streq (uncompressed_name, fnp->filename))
1358 goto exit;
1359 fnp = pf_head;
1360 pf_head = xnew (1, struct processed_file);
1361 pf_head->filename = savestr (uncompressed_name);
1362 pf_head->next = fnp;
1365 if (stat (real_name, &stat_buf) != 0)
1367 /* Reset real_name and try with a different name. */
1368 real_name = NULL;
1369 if (compressed_name != NULL) /* try with the given suffix */
1371 if (stat (uncompressed_name, &stat_buf) == 0)
1372 real_name = uncompressed_name;
1374 else /* try all possible suffixes */
1376 for (compr = compressors; compr->suffix != NULL; compr++)
1378 compressed_name = concat (file, ".", compr->suffix);
1379 if (stat (compressed_name, &stat_buf) != 0)
1381 if (MSDOS)
1383 char *suf = compressed_name + strlen (file);
1384 size_t suflen = strlen (compr->suffix) + 1;
1385 for ( ; suf[1]; suf++, suflen--)
1387 memmove (suf, suf + 1, suflen);
1388 if (stat (compressed_name, &stat_buf) == 0)
1390 real_name = compressed_name;
1391 break;
1394 if (real_name != NULL)
1395 break;
1396 } /* MSDOS */
1397 free (compressed_name);
1398 compressed_name = NULL;
1400 else
1402 real_name = compressed_name;
1403 break;
1407 if (real_name == NULL)
1409 perror (file);
1410 goto exit;
1412 } /* try with a different name */
1414 if (!S_ISREG (stat_buf.st_mode))
1416 error ("skipping %s: it is not a regular file.", real_name);
1417 goto exit;
1419 if (real_name == compressed_name)
1421 char *cmd = concat (compr->command, " ", real_name);
1422 inf = popen (cmd, "r");
1423 free (cmd);
1425 else
1426 inf = fopen (real_name, "r");
1427 if (inf == NULL)
1429 perror (real_name);
1430 goto exit;
1433 find_entries (uncompressed_name, inf);
1435 if (real_name == compressed_name)
1436 pclose (inf);
1437 else
1438 fclose (inf);
1440 if (!CTAGS)
1442 char *filename;
1444 if (filename_is_absolute (uncompressed_name))
1446 /* file is an absolute file name. Canonicalise it. */
1447 filename = absolute_filename (uncompressed_name, cwd);
1449 else
1451 /* file is a file name relative to cwd. Make it relative
1452 to the directory of the tags file. */
1453 filename = relative_filename (uncompressed_name, tagfiledir);
1455 fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1456 free (filename);
1457 put_entries (head);
1458 free_tree (head);
1459 head = NULL;
1462 exit:
1463 if (compressed_name) free(compressed_name);
1464 if (uncompressed_name) free(uncompressed_name);
1465 return;
1469 * This routine sets up the boolean pseudo-functions which work
1470 * by setting boolean flags dependent upon the corresponding character.
1471 * Every char which is NOT in that string is not a white char. Therefore,
1472 * all of the array "_wht" is set to FALSE, and then the elements
1473 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1474 * of a char is TRUE if it is the string "white", else FALSE.
1476 static void
1477 init ()
1479 register char *sp;
1480 register int i;
1482 for (i = 0; i < CHARS; i++)
1483 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1484 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1485 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1486 notinname('\0') = notinname('\n');
1487 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1488 begtoken('\0') = begtoken('\n');
1489 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1490 intoken('\0') = intoken('\n');
1491 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1492 endtoken('\0') = endtoken('\n');
1496 * This routine opens the specified file and calls the function
1497 * which finds the function and type definitions.
1499 node *last_node = NULL;
1501 static void
1502 find_entries (file, inf)
1503 char *file;
1504 FILE *inf;
1506 char *cp;
1507 language *lang;
1508 node *old_last_node;
1510 /* Memory leakage here: the string pointed by curfile is
1511 never released, because curfile is copied into np->file
1512 for each node, to be used in CTAGS mode. The amount of
1513 memory leaked here is the sum of the lengths of the
1514 file names. */
1515 curfile = savestr (file);
1517 /* If user specified a language, use it. */
1518 lang = forced_lang;
1519 if (lang != NULL && lang->function != NULL)
1521 curlang = lang;
1522 lang->function (inf);
1523 return;
1526 /* Try to guess the language given the file name. */
1527 lang = get_language_from_suffix (file);
1528 if (lang != NULL && lang->function != NULL)
1530 curlang = lang;
1531 lang->function (inf);
1532 return;
1535 /* Look for sharp-bang as the first two characters. */
1536 if (readline_internal (&lb, inf) > 0
1537 && lb.len >= 2
1538 && lb.buffer[0] == '#'
1539 && lb.buffer[1] == '!')
1541 char *lp;
1543 /* Set lp to point at the first char after the last slash in the
1544 line or, if no slashes, at the first nonblank. Then set cp to
1545 the first successive blank and terminate the string. */
1546 lp = etags_strrchr (lb.buffer+2, '/');
1547 if (lp != NULL)
1548 lp += 1;
1549 else
1550 lp = skip_spaces (lb.buffer + 2);
1551 cp = skip_non_spaces (lp);
1552 *cp = '\0';
1554 if (strlen (lp) > 0)
1556 lang = get_language_from_interpreter (lp);
1557 if (lang != NULL && lang->function != NULL)
1559 curlang = lang;
1560 lang->function (inf);
1561 return;
1565 /* We rewind here, even if inf may be a pipe. We fail if the
1566 length of the first line is longer than the pipe block size,
1567 which is unlikely. */
1568 rewind (inf);
1570 /* Try Fortran. */
1571 old_last_node = last_node;
1572 curlang = get_language_from_name ("fortran");
1573 Fortran_functions (inf);
1575 /* No Fortran entries found. Try C. */
1576 if (old_last_node == last_node)
1578 /* We do not tag if rewind fails.
1579 Only the file name will be recorded in the tags file. */
1580 rewind (inf);
1581 curlang = get_language_from_name (cplusplus ? "c++" : "c");
1582 default_C_entries (inf);
1584 return;
1587 /* Record a tag. */
1588 static void
1589 pfnote (name, is_func, linestart, linelen, lno, cno)
1590 char *name; /* tag name, or NULL if unnamed */
1591 bool is_func; /* tag is a function */
1592 char *linestart; /* start of the line where tag is */
1593 int linelen; /* length of the line where tag is */
1594 int lno; /* line number */
1595 long cno; /* character number */
1597 register node *np;
1599 if (CTAGS && name == NULL)
1600 return;
1602 np = xnew (1, node);
1604 /* If ctags mode, change name "main" to M<thisfilename>. */
1605 if (CTAGS && !cxref_style && streq (name, "main"))
1607 register char *fp = etags_strrchr (curfile, '/');
1608 np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
1609 fp = etags_strrchr (np->name, '.');
1610 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1611 fp[0] = '\0';
1613 else
1614 np->name = name;
1615 np->been_warned = FALSE;
1616 np->file = curfile;
1617 np->is_func = is_func;
1618 np->lno = lno;
1619 /* Our char numbers are 0-base, because of C language tradition?
1620 ctags compatibility? old versions compatibility? I don't know.
1621 Anyway, since emacs's are 1-base we expect etags.el to take care
1622 of the difference. If we wanted to have 1-based numbers, we would
1623 uncomment the +1 below. */
1624 np->cno = cno /* + 1 */ ;
1625 np->left = np->right = NULL;
1626 if (CTAGS && !cxref_style)
1628 if (strlen (linestart) < 50)
1629 np->pat = concat (linestart, "$", "");
1630 else
1631 np->pat = savenstr (linestart, 50);
1633 else
1634 np->pat = savenstr (linestart, linelen);
1636 add_node (np, &head);
1639 /* Date: Wed, 22 Jan 1997 02:56:31 -0500 [last amended 18 Sep 1997]
1640 * From: Sam Kendall <kendall@mv.mv.com>
1641 * Subject: Proposal for firming up the TAGS format specification
1642 * To: F.Potorti@cnuce.cnr.it
1644 * pfnote should emit the optimized form [unnamed tag] only if:
1645 * 1. name does not contain any of the characters " \t\r\n(),;";
1646 * 2. linestart contains name as either a rightmost, or rightmost but
1647 * one character, substring;
1648 * 3. the character, if any, immediately before name in linestart must
1649 * be one of the characters " \t(),;";
1650 * 4. the character, if any, immediately after name in linestart must
1651 * also be one of the characters " \t(),;".
1653 * The real implementation uses the notinname() macro, which recognises
1654 * characters slightly different form " \t\r\n(),;". See the variable
1655 * `nonam'.
1657 #define traditional_tag_style TRUE
1658 static void
1659 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1660 char *name; /* tag name, or NULL if unnamed */
1661 int namelen; /* tag length */
1662 bool is_func; /* tag is a function */
1663 char *linestart; /* start of the line where tag is */
1664 int linelen; /* length of the line where tag is */
1665 int lno; /* line number */
1666 long cno; /* character number */
1668 register char *cp;
1669 bool named;
1671 named = TRUE;
1672 if (!CTAGS)
1674 for (cp = name; !notinname (*cp); cp++)
1675 continue;
1676 if (*cp == '\0') /* rule #1 */
1678 cp = linestart + linelen - namelen;
1679 if (notinname (linestart[linelen-1]))
1680 cp -= 1; /* rule #4 */
1681 if (cp >= linestart /* rule #2 */
1682 && (cp == linestart
1683 || notinname (cp[-1])) /* rule #3 */
1684 && strneq (name, cp, namelen)) /* rule #2 */
1685 named = FALSE; /* use unnamed tag */
1689 if (named)
1690 name = savenstr (name, namelen);
1691 else
1692 name = NULL;
1693 pfnote (name, is_func, linestart, linelen, lno, cno);
1697 * free_tree ()
1698 * recurse on left children, iterate on right children.
1700 static void
1701 free_tree (np)
1702 register node *np;
1704 while (np)
1706 register node *node_right = np->right;
1707 free_tree (np->left);
1708 if (np->name != NULL)
1709 free (np->name);
1710 free (np->pat);
1711 free (np);
1712 np = node_right;
1717 * add_node ()
1718 * Adds a node to the tree of nodes. In etags mode, we don't keep
1719 * it sorted; we just keep a linear list. In ctags mode, maintain
1720 * an ordered tree, with no attempt at balancing.
1722 * add_node is the only function allowed to add nodes, so it can
1723 * maintain state.
1725 static void
1726 add_node (np, cur_node_p)
1727 node *np, **cur_node_p;
1729 register int dif;
1730 register node *cur_node = *cur_node_p;
1732 if (cur_node == NULL)
1734 *cur_node_p = np;
1735 last_node = np;
1736 return;
1739 if (!CTAGS)
1741 /* Etags Mode */
1742 if (last_node == NULL)
1743 fatal ("internal error in add_node", (char *)NULL);
1744 last_node->right = np;
1745 last_node = np;
1747 else
1749 /* Ctags Mode */
1750 dif = strcmp (np->name, cur_node->name);
1753 * If this tag name matches an existing one, then
1754 * do not add the node, but maybe print a warning.
1756 if (!dif)
1758 if (streq (np->file, cur_node->file))
1760 if (!no_warnings)
1762 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1763 np->file, lineno, np->name);
1764 fprintf (stderr, "Second entry ignored\n");
1767 else if (!cur_node->been_warned && !no_warnings)
1769 fprintf
1770 (stderr,
1771 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1772 np->file, cur_node->file, np->name);
1773 cur_node->been_warned = TRUE;
1775 return;
1778 /* Actually add the node */
1779 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1783 static void
1784 put_entries (np)
1785 register node *np;
1787 register char *sp;
1789 if (np == NULL)
1790 return;
1792 /* Output subentries that precede this one */
1793 put_entries (np->left);
1795 /* Output this entry */
1797 if (!CTAGS)
1799 if (np->name != NULL)
1800 fprintf (tagf, "%s\177%s\001%d,%ld\n",
1801 np->pat, np->name, np->lno, np->cno);
1802 else
1803 fprintf (tagf, "%s\177%d,%ld\n",
1804 np->pat, np->lno, np->cno);
1806 else
1808 if (np->name == NULL)
1809 error ("internal error: NULL name in ctags mode.", (char *)NULL);
1811 if (cxref_style)
1813 if (vgrind_style)
1814 fprintf (stdout, "%s %s %d\n",
1815 np->name, np->file, (np->lno + 63) / 64);
1816 else
1817 fprintf (stdout, "%-16s %3d %-16s %s\n",
1818 np->name, np->lno, np->file, np->pat);
1820 else
1822 fprintf (tagf, "%s\t%s\t", np->name, np->file);
1824 if (np->is_func)
1825 { /* a function */
1826 putc (searchar, tagf);
1827 putc ('^', tagf);
1829 for (sp = np->pat; *sp; sp++)
1831 if (*sp == '\\' || *sp == searchar)
1832 putc ('\\', tagf);
1833 putc (*sp, tagf);
1835 putc (searchar, tagf);
1837 else
1838 { /* a typedef; text pattern inadequate */
1839 fprintf (tagf, "%d", np->lno);
1841 putc ('\n', tagf);
1845 /* Output subentries that follow this one */
1846 put_entries (np->right);
1849 /* Length of a number's decimal representation. */
1850 static int
1851 number_len (num)
1852 long num;
1854 int len = 1;
1855 while ((num /= 10) > 0)
1856 len += 1;
1857 return len;
1861 * Return total number of characters that put_entries will output for
1862 * the nodes in the subtree of the specified node. Works only if
1863 * we are not ctags, but called only in that case. This count
1864 * is irrelevant with the new tags.el, but is still supplied for
1865 * backward compatibility.
1867 static int
1868 total_size_of_entries (np)
1869 register node *np;
1871 register int total;
1873 if (np == NULL)
1874 return 0;
1876 for (total = 0; np != NULL; np = np->right)
1878 /* Count left subentries. */
1879 total += total_size_of_entries (np->left);
1881 /* Count this entry */
1882 total += strlen (np->pat) + 1;
1883 total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
1884 if (np->name != NULL)
1885 total += 1 + strlen (np->name); /* \001name */
1888 return total;
1892 * The C symbol tables.
1894 enum sym_type
1896 st_none,
1897 st_C_objprot, st_C_objimpl, st_C_objend,
1898 st_C_gnumacro,
1899 st_C_ignore,
1900 st_C_javastruct,
1901 st_C_operator,
1902 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1905 static unsigned int hash P_((const char *, unsigned int));
1906 static struct C_stab_entry * in_word_set P_((const char *, unsigned int));
1907 static enum sym_type C_symtype P_((char *, int, int));
1909 /* Feed stuff between (but not including) %[ and %] lines to:
1910 gperf -c -k 1,3 -o -p -r -t
1912 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1914 if, 0, st_C_ignore
1915 for, 0, st_C_ignore
1916 while, 0, st_C_ignore
1917 switch, 0, st_C_ignore
1918 return, 0, st_C_ignore
1919 @interface, 0, st_C_objprot
1920 @protocol, 0, st_C_objprot
1921 @implementation,0, st_C_objimpl
1922 @end, 0, st_C_objend
1923 import, C_JAVA, st_C_ignore
1924 package, C_JAVA, st_C_ignore
1925 friend, C_PLPL, st_C_ignore
1926 extends, C_JAVA, st_C_javastruct
1927 implements, C_JAVA, st_C_javastruct
1928 interface, C_JAVA, st_C_struct
1929 class, C_PLPL, st_C_struct
1930 namespace, C_PLPL, st_C_struct
1931 domain, C_STAR, st_C_struct
1932 union, 0, st_C_struct
1933 struct, 0, st_C_struct
1934 extern, 0, st_C_extern
1935 enum, 0, st_C_enum
1936 typedef, 0, st_C_typedef
1937 define, 0, st_C_define
1938 operator, C_PLPL, st_C_operator
1939 bool, C_PLPL, st_C_typespec
1940 long, 0, st_C_typespec
1941 short, 0, st_C_typespec
1942 int, 0, st_C_typespec
1943 char, 0, st_C_typespec
1944 float, 0, st_C_typespec
1945 double, 0, st_C_typespec
1946 signed, 0, st_C_typespec
1947 unsigned, 0, st_C_typespec
1948 auto, 0, st_C_typespec
1949 void, 0, st_C_typespec
1950 static, 0, st_C_typespec
1951 const, 0, st_C_typespec
1952 volatile, 0, st_C_typespec
1953 explicit, C_PLPL, st_C_typespec
1954 mutable, C_PLPL, st_C_typespec
1955 typename, C_PLPL, st_C_typespec
1956 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
1957 DEFUN, 0, st_C_gnumacro
1958 SYSCALL, 0, st_C_gnumacro
1959 ENTRY, 0, st_C_gnumacro
1960 PSEUDO, 0, st_C_gnumacro
1961 # These are defined inside C functions, so currently they are not met.
1962 # EXFUN used in glibc, DEFVAR_* in emacs.
1963 #EXFUN, 0, st_C_gnumacro
1964 #DEFVAR_, 0, st_C_gnumacro
1966 and replace lines between %< and %> with its output. */
1967 /*%<*/
1968 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
1969 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
1970 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
1972 #define TOTAL_KEYWORDS 46
1973 #define MIN_WORD_LENGTH 2
1974 #define MAX_WORD_LENGTH 15
1975 #define MIN_HASH_VALUE 13
1976 #define MAX_HASH_VALUE 123
1977 /* maximum key range = 111, duplicates = 0 */
1979 #ifdef __GNUC__
1980 __inline
1981 #endif
1982 static unsigned int
1983 hash (str, len)
1984 register const char *str;
1985 register unsigned int len;
1987 static unsigned char asso_values[] =
1989 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1990 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1991 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1992 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1993 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1994 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1995 124, 124, 124, 124, 3, 124, 124, 124, 43, 6,
1996 11, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1997 11, 124, 124, 58, 7, 124, 124, 124, 124, 124,
1998 124, 124, 124, 124, 124, 124, 124, 57, 7, 42,
1999 4, 14, 52, 0, 124, 53, 124, 124, 29, 11,
2000 6, 35, 32, 124, 29, 34, 59, 58, 51, 24,
2001 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2002 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2003 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2004 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2005 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2006 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2007 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2008 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2009 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2010 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2011 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2012 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2013 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2014 124, 124, 124, 124, 124, 124
2016 register int hval = len;
2018 switch (hval)
2020 default:
2021 case 3:
2022 hval += asso_values[(unsigned char)str[2]];
2023 case 2:
2024 case 1:
2025 hval += asso_values[(unsigned char)str[0]];
2026 break;
2028 return hval;
2031 #ifdef __GNUC__
2032 __inline
2033 #endif
2034 static struct C_stab_entry *
2035 in_word_set (str, len)
2036 register const char *str;
2037 register unsigned int len;
2039 static struct C_stab_entry wordlist[] =
2041 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2042 {""}, {""}, {""}, {""},
2043 {"@end", 0, st_C_objend},
2044 {""}, {""}, {""}, {""},
2045 {"ENTRY", 0, st_C_gnumacro},
2046 {"@interface", 0, st_C_objprot},
2047 {""},
2048 {"domain", C_STAR, st_C_struct},
2049 {""},
2050 {"PSEUDO", 0, st_C_gnumacro},
2051 {""}, {""},
2052 {"namespace", C_PLPL, st_C_struct},
2053 {""}, {""},
2054 {"@implementation",0, st_C_objimpl},
2055 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2056 {"long", 0, st_C_typespec},
2057 {"signed", 0, st_C_typespec},
2058 {"@protocol", 0, st_C_objprot},
2059 {""}, {""}, {""}, {""},
2060 {"bool", C_PLPL, st_C_typespec},
2061 {""}, {""}, {""}, {""}, {""}, {""},
2062 {"const", 0, st_C_typespec},
2063 {"explicit", C_PLPL, st_C_typespec},
2064 {"if", 0, st_C_ignore},
2065 {""},
2066 {"operator", C_PLPL, st_C_operator},
2067 {""},
2068 {"DEFUN", 0, st_C_gnumacro},
2069 {""}, {""},
2070 {"define", 0, st_C_define},
2071 {""}, {""}, {""}, {""}, {""},
2072 {"double", 0, st_C_typespec},
2073 {"struct", 0, st_C_struct},
2074 {""}, {""}, {""}, {""},
2075 {"short", 0, st_C_typespec},
2076 {""},
2077 {"enum", 0, st_C_enum},
2078 {"mutable", C_PLPL, st_C_typespec},
2079 {""},
2080 {"extern", 0, st_C_extern},
2081 {"extends", C_JAVA, st_C_javastruct},
2082 {"package", C_JAVA, st_C_ignore},
2083 {"while", 0, st_C_ignore},
2084 {""},
2085 {"for", 0, st_C_ignore},
2086 {""}, {""}, {""},
2087 {"volatile", 0, st_C_typespec},
2088 {""}, {""},
2089 {"import", C_JAVA, st_C_ignore},
2090 {"float", 0, st_C_typespec},
2091 {"switch", 0, st_C_ignore},
2092 {"return", 0, st_C_ignore},
2093 {"implements", C_JAVA, st_C_javastruct},
2094 {""},
2095 {"static", 0, st_C_typespec},
2096 {"typedef", 0, st_C_typedef},
2097 {"typename", C_PLPL, st_C_typespec},
2098 {"unsigned", 0, st_C_typespec},
2099 {""}, {""},
2100 {"char", 0, st_C_typespec},
2101 {"class", C_PLPL, st_C_struct},
2102 {""}, {""}, {""},
2103 {"void", 0, st_C_typespec},
2104 {""}, {""},
2105 {"friend", C_PLPL, st_C_ignore},
2106 {""}, {""}, {""},
2107 {"int", 0, st_C_typespec},
2108 {"union", 0, st_C_struct},
2109 {""}, {""}, {""},
2110 {"auto", 0, st_C_typespec},
2111 {"interface", C_JAVA, st_C_struct},
2112 {""},
2113 {"SYSCALL", 0, st_C_gnumacro}
2116 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2118 register int key = hash (str, len);
2120 if (key <= MAX_HASH_VALUE && key >= 0)
2122 register const char *s = wordlist[key].name;
2124 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2125 return &wordlist[key];
2128 return 0;
2130 /*%>*/
2132 static enum sym_type
2133 C_symtype (str, len, c_ext)
2134 char *str;
2135 int len;
2136 int c_ext;
2138 register struct C_stab_entry *se = in_word_set (str, len);
2140 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2141 return st_none;
2142 return se->type;
2146 * C functions and variables are recognized using a simple
2147 * finite automaton. fvdef is its state variable.
2149 enum
2151 fvnone, /* nothing seen */
2152 foperator, /* func: operator keyword seen (cplpl) */
2153 fvnameseen, /* function or variable name seen */
2154 fstartlist, /* func: just after open parenthesis */
2155 finlist, /* func: in parameter list */
2156 flistseen, /* func: after parameter list */
2157 fignore, /* func: before open brace */
2158 vignore /* var-like: ignore until ';' */
2159 } fvdef;
2161 bool fvextern; /* func or var: extern keyword seen; */
2164 * typedefs are recognized using a simple finite automaton.
2165 * typdef is its state variable.
2167 enum
2169 tnone, /* nothing seen */
2170 tkeyseen, /* typedef keyword seen */
2171 ttypeseen, /* defined type seen */
2172 tinbody, /* inside typedef body */
2173 tend, /* just before typedef tag */
2174 tignore /* junk after typedef tag */
2175 } typdef;
2179 * struct-like structures (enum, struct and union) are recognized
2180 * using another simple finite automaton. `structdef' is its state
2181 * variable.
2183 enum
2185 snone, /* nothing seen yet */
2186 skeyseen, /* struct-like keyword seen */
2187 stagseen, /* struct-like tag seen */
2188 scolonseen, /* colon seen after struct-like tag */
2189 sinbody /* in struct body: recognize member func defs*/
2190 } structdef;
2193 * When structdef is stagseen, scolonseen, or sinbody, structtag is the
2194 * struct tag, and structtype is the type of the preceding struct-like
2195 * keyword.
2197 char *structtag = "<uninited>";
2198 enum sym_type structtype;
2201 * When objdef is different from onone, objtag is the name of the class.
2203 char *objtag = "<uninited>";
2206 * Yet another little state machine to deal with preprocessor lines.
2208 enum
2210 dnone, /* nothing seen */
2211 dsharpseen, /* '#' seen as first char on line */
2212 ddefineseen, /* '#' and 'define' seen */
2213 dignorerest /* ignore rest of line */
2214 } definedef;
2217 * State machine for Objective C protocols and implementations.
2218 * Tom R.Hageman <tom@basil.icce.rug.nl>
2220 enum
2222 onone, /* nothing seen */
2223 oprotocol, /* @interface or @protocol seen */
2224 oimplementation, /* @implementations seen */
2225 otagseen, /* class name seen */
2226 oparenseen, /* parenthesis before category seen */
2227 ocatseen, /* category name seen */
2228 oinbody, /* in @implementation body */
2229 omethodsign, /* in @implementation body, after +/- */
2230 omethodtag, /* after method name */
2231 omethodcolon, /* after method colon */
2232 omethodparm, /* after method parameter */
2233 oignore /* wait for @end */
2234 } objdef;
2238 * Use this structure to keep info about the token read, and how it
2239 * should be tagged. Used by the make_C_tag function to build a tag.
2241 typedef struct
2243 bool valid;
2244 char *str;
2245 bool named;
2246 int linelen;
2247 int lineno;
2248 long linepos;
2249 char *buffer;
2250 } token;
2252 token tok; /* latest token read */
2255 * Set this to TRUE, and the next token considered is called a function.
2256 * Used only for GNU emacs's function-defining macros.
2258 bool next_token_is_func;
2261 * TRUE in the rules part of a yacc file, FALSE outside (parse as C).
2263 bool yacc_rules;
2266 * methodlen is the length of the method name stored in token_name.
2268 int methodlen;
2270 static bool consider_token P_((char *, int, int, int, int, int, bool *));
2271 static void make_C_tag P_((bool));
2274 * consider_token ()
2275 * checks to see if the current token is at the start of a
2276 * function or variable, or corresponds to a typedef, or
2277 * is a struct/union/enum tag, or #define, or an enum constant.
2279 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2280 * with args. C_EXT is which language we are looking at.
2282 * Globals
2283 * fvdef IN OUT
2284 * structdef IN OUT
2285 * definedef IN OUT
2286 * typdef IN OUT
2287 * objdef IN OUT
2288 * next_token_is_func IN OUT
2291 static bool
2292 consider_token (str, len, c, c_ext, cblev, parlev, is_func_or_var)
2293 register char *str; /* IN: token pointer */
2294 register int len; /* IN: token length */
2295 register int c; /* IN: first char after the token */
2296 int c_ext; /* IN: C extensions mask */
2297 int cblev; /* IN: curly brace level */
2298 int parlev; /* IN: parenthesis level */
2299 bool *is_func_or_var; /* OUT: function or variable found */
2301 enum sym_type toktype = C_symtype (str, len, c_ext);
2304 * Advance the definedef state machine.
2306 switch (definedef)
2308 case dnone:
2309 /* We're not on a preprocessor line. */
2310 break;
2311 case dsharpseen:
2312 if (toktype == st_C_define)
2314 definedef = ddefineseen;
2316 else
2318 definedef = dignorerest;
2320 return FALSE;
2321 case ddefineseen:
2323 * Make a tag for any macro, unless it is a constant
2324 * and constantypedefs is FALSE.
2326 definedef = dignorerest;
2327 *is_func_or_var = (c == '(');
2328 if (!*is_func_or_var && !constantypedefs)
2329 return FALSE;
2330 else
2331 return TRUE;
2332 case dignorerest:
2333 return FALSE;
2334 default:
2335 error ("internal error: definedef value.", (char *)NULL);
2339 * Now typedefs
2341 switch (typdef)
2343 case tnone:
2344 if (toktype == st_C_typedef)
2346 if (typedefs)
2347 typdef = tkeyseen;
2348 fvextern = FALSE;
2349 fvdef = fvnone;
2350 return FALSE;
2352 break;
2353 case tkeyseen:
2354 switch (toktype)
2356 case st_none:
2357 case st_C_typespec:
2358 case st_C_struct:
2359 case st_C_enum:
2360 typdef = ttypeseen;
2361 break;
2363 /* Do not return here, so the structdef stuff has a chance. */
2364 break;
2365 case tend:
2366 switch (toktype)
2368 case st_C_typespec:
2369 case st_C_struct:
2370 case st_C_enum:
2371 return FALSE;
2373 return TRUE;
2377 * This structdef business is currently only invoked when cblev==0.
2378 * It should be recursively invoked whatever the curly brace level,
2379 * and a stack of states kept, to allow for definitions of structs
2380 * within structs.
2382 * This structdef business is NOT invoked when we are ctags and the
2383 * file is plain C. This is because a struct tag may have the same
2384 * name as another tag, and this loses with ctags.
2386 switch (toktype)
2388 case st_C_javastruct:
2389 if (structdef == stagseen)
2390 structdef = scolonseen;
2391 return FALSE;
2392 case st_C_struct:
2393 case st_C_enum:
2394 if (typdef == tkeyseen
2395 || (typedefs_and_cplusplus && cblev == 0 && structdef == snone))
2397 structdef = skeyseen;
2398 structtype = toktype;
2400 return FALSE;
2403 if (structdef == skeyseen)
2405 /* Save the tag for struct/union/class, for functions and variables
2406 that may be defined inside. */
2407 if (structtype == st_C_struct)
2408 structtag = savenstr (str, len);
2409 else
2410 structtag = "<enum>";
2411 structdef = stagseen;
2412 return TRUE;
2415 if (typdef != tnone)
2416 definedef = dnone;
2418 /* Detect GNU macros.
2420 Writers of emacs code are recommended to put the
2421 first two args of a DEFUN on the same line.
2423 The DEFUN macro, used in emacs C source code, has a first arg
2424 that is a string (the lisp function name), and a second arg that
2425 is a C function name. Since etags skips strings, the second arg
2426 is tagged. This is unfortunate, as it would be better to tag the
2427 first arg. The simplest way to deal with this problem would be
2428 to name the tag with a name built from the function name, by
2429 removing the initial 'F' character and substituting '-' for '_'.
2430 Anyway, this assumes that the conventions of naming lisp
2431 functions will never change. Currently, this method is not
2432 implemented. */
2433 if (definedef == dnone && toktype == st_C_gnumacro)
2435 next_token_is_func = TRUE;
2436 return FALSE;
2438 if (next_token_is_func)
2440 next_token_is_func = FALSE;
2441 fvdef = fignore;
2442 *is_func_or_var = TRUE;
2443 return TRUE;
2446 /* Detect Objective C constructs. */
2447 switch (objdef)
2449 case onone:
2450 switch (toktype)
2452 case st_C_objprot:
2453 objdef = oprotocol;
2454 return FALSE;
2455 case st_C_objimpl:
2456 objdef = oimplementation;
2457 return FALSE;
2459 break;
2460 case oimplementation:
2461 /* Save the class tag for functions or variables defined inside. */
2462 objtag = savenstr (str, len);
2463 objdef = oinbody;
2464 return FALSE;
2465 case oprotocol:
2466 /* Save the class tag for categories. */
2467 objtag = savenstr (str, len);
2468 objdef = otagseen;
2469 *is_func_or_var = TRUE;
2470 return TRUE;
2471 case oparenseen:
2472 objdef = ocatseen;
2473 *is_func_or_var = TRUE;
2474 return TRUE;
2475 case oinbody:
2476 break;
2477 case omethodsign:
2478 if (parlev == 0)
2480 objdef = omethodtag;
2481 methodlen = len;
2482 grow_linebuffer (&token_name, methodlen + 1);
2483 strncpy (token_name.buffer, str, len);
2484 token_name.buffer[methodlen] = '\0';
2485 token_name.len = methodlen;
2486 return TRUE;
2488 return FALSE;
2489 case omethodcolon:
2490 if (parlev == 0)
2491 objdef = omethodparm;
2492 return FALSE;
2493 case omethodparm:
2494 if (parlev == 0)
2496 objdef = omethodtag;
2497 methodlen += len;
2498 grow_linebuffer (&token_name, methodlen + 1);
2499 strncat (token_name.buffer, str, len);
2500 token_name.len = methodlen;
2501 return TRUE;
2503 return FALSE;
2504 case oignore:
2505 if (toktype == st_C_objend)
2507 /* Memory leakage here: the string pointed by objtag is
2508 never released, because many tests would be needed to
2509 avoid breaking on incorrect input code. The amount of
2510 memory leaked here is the sum of the lengths of the
2511 class tags.
2512 free (objtag); */
2513 objdef = onone;
2515 return FALSE;
2518 /* A function, variable or enum constant? */
2519 switch (toktype)
2521 case st_C_extern:
2522 fvextern = TRUE;
2523 /* FALLTHRU */
2524 case st_C_typespec:
2525 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2526 fvdef = fvnone; /* should be useless */
2527 return FALSE;
2528 case st_C_ignore:
2529 fvextern = FALSE;
2530 fvdef = vignore;
2531 return FALSE;
2532 case st_C_operator:
2533 fvdef = foperator;
2534 *is_func_or_var = TRUE;
2535 return TRUE;
2536 case st_none:
2537 if ((c_ext & C_PLPL) && strneq (str+len-10, "::operator", 10))
2539 fvdef = foperator;
2540 *is_func_or_var = TRUE;
2541 return TRUE;
2543 if (constantypedefs && structdef == sinbody && structtype == st_C_enum)
2544 return TRUE;
2545 if (fvdef == fvnone)
2547 fvdef = fvnameseen; /* function or variable */
2548 *is_func_or_var = TRUE;
2549 return TRUE;
2551 break;
2554 return FALSE;
2558 * C_entries ()
2559 * This routine finds functions, variables, typedefs,
2560 * #define's, enum constants and struct/union/enum definitions in
2561 * C syntax and adds them to the list.
2563 #define current_lb_is_new (newndx == curndx)
2564 #define switch_line_buffers() (curndx = 1 - curndx)
2566 #define curlb (lbs[curndx].lb)
2567 #define othlb (lbs[1-curndx].lb)
2568 #define newlb (lbs[newndx].lb)
2569 #define curlinepos (lbs[curndx].linepos)
2570 #define othlinepos (lbs[1-curndx].linepos)
2571 #define newlinepos (lbs[newndx].linepos)
2573 #define CNL_SAVE_DEFINEDEF() \
2574 do { \
2575 curlinepos = charno; \
2576 lineno++; \
2577 linecharno = charno; \
2578 charno += readline (&curlb, inf); \
2579 lp = curlb.buffer; \
2580 quotednl = FALSE; \
2581 newndx = curndx; \
2582 } while (0)
2584 #define CNL() \
2585 do { \
2586 CNL_SAVE_DEFINEDEF(); \
2587 if (savetok.valid) \
2589 tok = savetok; \
2590 savetok.valid = FALSE; \
2592 definedef = dnone; \
2593 } while (0)
2596 static void
2597 make_C_tag (isfun)
2598 bool isfun;
2600 /* This function should never be called when tok.valid is FALSE, but
2601 we must protect against invalid input or internal errors. */
2602 if (tok.valid)
2604 if (traditional_tag_style)
2606 /* This was the original code. Now we call new_pfnote instead,
2607 which uses the new method for naming tags (see new_pfnote). */
2608 char *name = NULL;
2610 if (CTAGS || tok.named)
2611 name = savestr (token_name.buffer);
2612 pfnote (name, isfun,
2613 tok.buffer, tok.linelen, tok.lineno, tok.linepos);
2615 else
2616 new_pfnote (token_name.buffer, token_name.len, isfun,
2617 tok.buffer, tok.linelen, tok.lineno, tok.linepos);
2618 tok.valid = FALSE;
2620 else if (DEBUG)
2621 abort ();
2625 static void
2626 C_entries (c_ext, inf)
2627 int c_ext; /* extension of C */
2628 FILE *inf; /* input file */
2630 register char c; /* latest char read; '\0' for end of line */
2631 register char *lp; /* pointer one beyond the character `c' */
2632 int curndx, newndx; /* indices for current and new lb */
2633 register int tokoff; /* offset in line of start of current token */
2634 register int toklen; /* length of current token */
2635 char *qualifier; /* string used to qualify names */
2636 int qlen; /* length of qualifier */
2637 int cblev; /* current curly brace level */
2638 int parlev; /* current parenthesis level */
2639 bool incomm, inquote, inchar, quotednl, midtoken;
2640 bool purec, cplpl, cjava;
2641 token savetok; /* token saved during preprocessor handling */
2644 tokoff = toklen = 0; /* keep compiler quiet */
2645 curndx = newndx = 0;
2646 lineno = 0;
2647 charno = 0;
2648 lp = curlb.buffer;
2649 *lp = 0;
2651 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2652 structdef = snone; definedef = dnone; objdef = onone;
2653 next_token_is_func = yacc_rules = FALSE;
2654 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2655 tok.valid = savetok.valid = FALSE;
2656 cblev = 0;
2657 parlev = 0;
2658 purec = !(c_ext & ~YACC); /* no extensions (apart from possibly yacc) */
2659 cplpl = (c_ext & C_PLPL) == C_PLPL;
2660 cjava = (c_ext & C_JAVA) == C_JAVA;
2661 if (cjava)
2662 { qualifier = "."; qlen = 1; }
2663 else
2664 { qualifier = "::"; qlen = 2; }
2666 while (!feof (inf))
2668 c = *lp++;
2669 if (c == '\\')
2671 /* If we're at the end of the line, the next character is a
2672 '\0'; don't skip it, because it's the thing that tells us
2673 to read the next line. */
2674 if (*lp == '\0')
2676 quotednl = TRUE;
2677 continue;
2679 lp++;
2680 c = ' ';
2682 else if (incomm)
2684 switch (c)
2686 case '*':
2687 if (*lp == '/')
2689 c = *lp++;
2690 incomm = FALSE;
2692 break;
2693 case '\0':
2694 /* Newlines inside comments do not end macro definitions in
2695 traditional cpp. */
2696 CNL_SAVE_DEFINEDEF ();
2697 break;
2699 continue;
2701 else if (inquote)
2703 switch (c)
2705 case '"':
2706 inquote = FALSE;
2707 break;
2708 case '\0':
2709 /* Newlines inside strings do not end macro definitions
2710 in traditional cpp, even though compilers don't
2711 usually accept them. */
2712 CNL_SAVE_DEFINEDEF ();
2713 break;
2715 continue;
2717 else if (inchar)
2719 switch (c)
2721 case '\0':
2722 /* Hmmm, something went wrong. */
2723 CNL ();
2724 /* FALLTHRU */
2725 case '\'':
2726 inchar = FALSE;
2727 break;
2729 continue;
2731 else
2732 switch (c)
2734 case '"':
2735 inquote = TRUE;
2736 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2738 fvextern = FALSE;
2739 fvdef = fvnone;
2741 continue;
2742 case '\'':
2743 inchar = TRUE;
2744 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2746 fvextern = FALSE;
2747 fvdef = fvnone;
2749 continue;
2750 case '/':
2751 if (*lp == '*')
2753 lp++;
2754 incomm = TRUE;
2755 continue;
2757 else if (/* cplpl && */ *lp == '/')
2759 c = '\0';
2760 break;
2762 else
2763 break;
2764 case '%':
2765 if ((c_ext & YACC) && *lp == '%')
2767 /* entering or exiting rules section in yacc file */
2768 lp++;
2769 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2770 typdef = tnone; structdef = snone;
2771 next_token_is_func = FALSE;
2772 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2773 cblev = 0;
2774 yacc_rules = !yacc_rules;
2775 continue;
2777 else
2778 break;
2779 case '#':
2780 if (definedef == dnone)
2782 char *cp;
2783 bool cpptoken = TRUE;
2785 /* Look back on this line. If all blanks, or nonblanks
2786 followed by an end of comment, this is a preprocessor
2787 token. */
2788 for (cp = newlb.buffer; cp < lp-1; cp++)
2789 if (!iswhite (*cp))
2791 if (*cp == '*' && *(cp+1) == '/')
2793 cp++;
2794 cpptoken = TRUE;
2796 else
2797 cpptoken = FALSE;
2799 if (cpptoken)
2800 definedef = dsharpseen;
2801 } /* if (definedef == dnone) */
2803 continue;
2804 } /* switch (c) */
2807 /* Consider token only if some complicated conditions are satisfied. */
2808 if ((definedef != dnone
2809 || (cblev == 0 && structdef != scolonseen)
2810 || (cblev == 1 && cplpl && structdef == sinbody)
2811 || (structdef == sinbody && purec))
2812 && typdef != tignore
2813 && definedef != dignorerest
2814 && fvdef != finlist)
2816 if (midtoken)
2818 if (endtoken (c))
2820 bool funorvar = FALSE;
2822 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
2825 * This handles :: in the middle, but not at the
2826 * beginning of an identifier. Also, space-separated
2827 * :: is not recognised.
2829 lp += 2;
2830 toklen += 2;
2831 c = lp[-1];
2832 goto intok;
2834 else
2836 if (yacc_rules
2837 || consider_token (newlb.buffer + tokoff, toklen, c,
2838 c_ext, cblev, parlev, &funorvar))
2840 if (fvdef == foperator)
2842 char *oldlp = lp;
2843 lp = skip_spaces (lp-1);
2844 if (*lp != '\0')
2845 lp += 1;
2846 while (*lp != '\0'
2847 && !iswhite (*lp) && *lp != '(')
2848 lp += 1;
2849 c = *lp++;
2850 toklen += lp - oldlp;
2852 tok.named = FALSE;
2853 if (!purec
2854 && funorvar
2855 && definedef == dnone
2856 && structdef == sinbody)
2857 /* function or var defined in C++ class body */
2859 int len = strlen (structtag) + qlen + toklen;
2860 grow_linebuffer (&token_name, len + 1);
2861 strcpy (token_name.buffer, structtag);
2862 strcat (token_name.buffer, qualifier);
2863 strncat (token_name.buffer,
2864 newlb.buffer + tokoff, toklen);
2865 token_name.len = len;
2866 tok.named = TRUE;
2868 else if (objdef == ocatseen)
2869 /* Objective C category */
2871 int len = strlen (objtag) + 2 + toklen;
2872 grow_linebuffer (&token_name, len + 1);
2873 strcpy (token_name.buffer, objtag);
2874 strcat (token_name.buffer, "(");
2875 strncat (token_name.buffer,
2876 newlb.buffer + tokoff, toklen);
2877 strcat (token_name.buffer, ")");
2878 token_name.len = len;
2879 tok.named = TRUE;
2881 else if (objdef == omethodtag
2882 || objdef == omethodparm)
2883 /* Objective C method */
2885 tok.named = TRUE;
2887 else
2889 grow_linebuffer (&token_name, toklen + 1);
2890 strncpy (token_name.buffer,
2891 newlb.buffer + tokoff, toklen);
2892 token_name.buffer[toklen] = '\0';
2893 token_name.len = toklen;
2894 /* Name macros and members. */
2895 tok.named = (structdef == stagseen
2896 || typdef == ttypeseen
2897 || typdef == tend
2898 || (funorvar
2899 && definedef == dignorerest)
2900 || (funorvar
2901 && definedef == dnone
2902 && structdef == sinbody));
2904 tok.lineno = lineno;
2905 tok.linelen = tokoff + toklen + 1;
2906 tok.buffer = newlb.buffer;
2907 tok.linepos = newlinepos;
2908 tok.valid = TRUE;
2910 if (definedef == dnone
2911 && (fvdef == fvnameseen
2912 || fvdef == foperator
2913 || structdef == stagseen
2914 || typdef == tend
2915 || objdef != onone))
2917 if (current_lb_is_new)
2918 switch_line_buffers ();
2920 else
2921 make_C_tag (funorvar);
2923 midtoken = FALSE;
2925 } /* if (endtoken (c)) */
2926 else if (intoken (c))
2927 intok:
2929 toklen++;
2930 continue;
2932 } /* if (midtoken) */
2933 else if (begtoken (c))
2935 switch (definedef)
2937 case dnone:
2938 switch (fvdef)
2940 case fstartlist:
2941 fvdef = finlist;
2942 continue;
2943 case flistseen:
2944 make_C_tag (TRUE); /* a function */
2945 fvdef = fignore;
2946 break;
2947 case fvnameseen:
2948 fvdef = fvnone;
2949 break;
2951 if (structdef == stagseen && !cjava)
2952 structdef = snone;
2953 break;
2954 case dsharpseen:
2955 savetok = tok;
2957 if (!yacc_rules || lp == newlb.buffer + 1)
2959 tokoff = lp - 1 - newlb.buffer;
2960 toklen = 1;
2961 midtoken = TRUE;
2963 continue;
2964 } /* if (begtoken) */
2965 } /* if must look at token */
2968 /* Detect end of line, colon, comma, semicolon and various braces
2969 after having handled a token.*/
2970 switch (c)
2972 case ':':
2973 if (definedef != dnone)
2974 break;
2975 switch (objdef)
2977 case otagseen:
2978 objdef = oignore;
2979 make_C_tag (TRUE); /* an Objective C class */
2980 break;
2981 case omethodtag:
2982 case omethodparm:
2983 objdef = omethodcolon;
2984 methodlen += 1;
2985 grow_linebuffer (&token_name, methodlen + 1);
2986 strcat (token_name.buffer, ":");
2987 token_name.len = methodlen;
2988 break;
2990 if (structdef == stagseen)
2991 structdef = scolonseen;
2992 else
2993 switch (fvdef)
2995 case fvnameseen:
2996 if (yacc_rules)
2998 make_C_tag (FALSE); /* a yacc function */
2999 fvdef = fignore;
3001 break;
3002 case fstartlist:
3003 fvextern = FALSE;
3004 fvdef = fvnone;
3005 break;
3007 break;
3008 case ';':
3009 if (definedef != dnone)
3010 break;
3011 if (cblev == 0)
3012 switch (typdef)
3014 case tend:
3015 make_C_tag (FALSE); /* a typedef */
3016 /* FALLTHRU */
3017 default:
3018 typdef = tnone;
3020 switch (fvdef)
3022 case fignore:
3023 break;
3024 case fvnameseen:
3025 if ((members && cblev == 1)
3026 || (globals && cblev == 0 && (!fvextern || declarations)))
3027 make_C_tag (FALSE); /* a variable */
3028 fvextern = FALSE;
3029 fvdef = fvnone;
3030 tok.valid = FALSE;
3031 break;
3032 case flistseen:
3033 if (declarations && (cblev == 0 || cblev == 1))
3034 make_C_tag (TRUE); /* a function declaration */
3035 /* FALLTHRU */
3036 default:
3037 fvextern = FALSE;
3038 fvdef = fvnone;
3039 /* The following instruction invalidates the token.
3040 Probably the token should be invalidated in all
3041 other cases where some state machine is reset. */
3042 tok.valid = FALSE;
3044 if (structdef == stagseen)
3045 structdef = snone;
3046 break;
3047 case ',':
3048 if (definedef != dnone)
3049 break;
3050 switch (objdef)
3052 case omethodtag:
3053 case omethodparm:
3054 make_C_tag (TRUE); /* an Objective C method */
3055 objdef = oinbody;
3056 break;
3058 switch (fvdef)
3060 case foperator:
3061 case finlist:
3062 case fignore:
3063 case vignore:
3064 break;
3065 case fvnameseen:
3066 if ((members && cblev == 1)
3067 || (globals && cblev == 0 && (!fvextern || declarations)))
3068 make_C_tag (FALSE); /* a variable */
3069 break;
3070 default:
3071 fvdef = fvnone;
3073 if (structdef == stagseen)
3074 structdef = snone;
3075 break;
3076 case '[':
3077 if (definedef != dnone)
3078 break;
3079 if (cblev == 0 && typdef == tend)
3081 typdef = tignore;
3082 make_C_tag (FALSE); /* a typedef */
3083 break;
3085 switch (fvdef)
3087 case foperator:
3088 case finlist:
3089 case fignore:
3090 case vignore:
3091 break;
3092 case fvnameseen:
3093 if ((members && cblev == 1)
3094 || (globals && cblev == 0 && (!fvextern || declarations)))
3095 make_C_tag (FALSE); /* a variable */
3096 /* FALLTHRU */
3097 default:
3098 fvdef = fvnone;
3100 if (structdef == stagseen)
3101 structdef = snone;
3102 break;
3103 case '(':
3104 if (definedef != dnone)
3105 break;
3106 if (objdef == otagseen && parlev == 0)
3107 objdef = oparenseen;
3108 switch (fvdef)
3110 case fvnameseen:
3111 if (typdef == ttypeseen
3112 && tok.valid
3113 && *lp != '*'
3114 && structdef != sinbody)
3116 /* This handles constructs like:
3117 typedef void OperatorFun (int fun); */
3118 make_C_tag (FALSE);
3119 typdef = tignore;
3121 /* FALLTHRU */
3122 case foperator:
3123 fvdef = fstartlist;
3124 break;
3125 case flistseen:
3126 fvdef = finlist;
3127 break;
3129 parlev++;
3130 break;
3131 case ')':
3132 if (definedef != dnone)
3133 break;
3134 if (objdef == ocatseen && parlev == 1)
3136 make_C_tag (TRUE); /* an Objective C category */
3137 objdef = oignore;
3139 if (--parlev == 0)
3141 switch (fvdef)
3143 case fstartlist:
3144 case finlist:
3145 fvdef = flistseen;
3146 break;
3148 if (cblev == 0 && (typdef == tend))
3150 typdef = tignore;
3151 make_C_tag (FALSE); /* a typedef */
3154 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3155 parlev = 0;
3156 break;
3157 case '{':
3158 if (definedef != dnone)
3159 break;
3160 if (typdef == ttypeseen)
3161 typdef = tinbody;
3162 switch (structdef)
3164 case skeyseen: /* unnamed struct */
3165 structdef = sinbody;
3166 structtag = "_anonymous_";
3167 break;
3168 case stagseen:
3169 case scolonseen: /* named struct */
3170 structdef = sinbody;
3171 make_C_tag (FALSE); /* a struct */
3172 break;
3174 switch (fvdef)
3176 case flistseen:
3177 make_C_tag (TRUE); /* a function */
3178 /* FALLTHRU */
3179 case fignore:
3180 fvdef = fvnone;
3181 break;
3182 case fvnone:
3183 switch (objdef)
3185 case otagseen:
3186 make_C_tag (TRUE); /* an Objective C class */
3187 objdef = oignore;
3188 break;
3189 case omethodtag:
3190 case omethodparm:
3191 make_C_tag (TRUE); /* an Objective C method */
3192 objdef = oinbody;
3193 break;
3194 default:
3195 /* Neutralize `extern "C" {' grot. */
3196 if (cblev == 0 && structdef == snone && typdef == tnone)
3197 cblev = -1;
3200 cblev++;
3201 break;
3202 case '*':
3203 if (definedef != dnone)
3204 break;
3205 if (fvdef == fstartlist)
3206 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3207 break;
3208 case '}':
3209 if (definedef != dnone)
3210 break;
3211 if (!noindentypedefs && lp == newlb.buffer + 1)
3213 cblev = 0; /* reset curly brace level if first column */
3214 parlev = 0; /* also reset paren level, just in case... */
3216 else if (cblev > 0)
3217 cblev--;
3218 if (cblev == 0)
3220 if (typdef == tinbody)
3221 typdef = tend;
3222 /* Memory leakage here: the string pointed by structtag is
3223 never released, because I fear to miss something and
3224 break things while freeing the area. The amount of
3225 memory leaked here is the sum of the lengths of the
3226 struct tags.
3227 if (structdef == sinbody)
3228 free (structtag); */
3230 structdef = snone;
3231 structtag = "<error>";
3233 break;
3234 case '=':
3235 if (definedef != dnone)
3236 break;
3237 switch (fvdef)
3239 case foperator:
3240 case finlist:
3241 case fignore:
3242 case vignore:
3243 break;
3244 case fvnameseen:
3245 if ((members && cblev == 1)
3246 || (globals && cblev == 0 && (!fvextern || declarations)))
3247 make_C_tag (FALSE); /* a variable */
3248 /* FALLTHRU */
3249 default:
3250 fvdef = vignore;
3252 break;
3253 case '+':
3254 case '-':
3255 if (objdef == oinbody && cblev == 0)
3257 objdef = omethodsign;
3258 break;
3260 /* FALLTHRU */
3261 case '#': case '~': case '&': case '%': case '/': case '|':
3262 case '^': case '!': case '<': case '>': case '.': case '?': case ']':
3263 if (definedef != dnone)
3264 break;
3265 /* These surely cannot follow a function tag in C. */
3266 switch (fvdef)
3268 case foperator:
3269 case finlist:
3270 case fignore:
3271 case vignore:
3272 break;
3273 default:
3274 fvdef = fvnone;
3276 break;
3277 case '\0':
3278 if (objdef == otagseen)
3280 make_C_tag (TRUE); /* an Objective C class */
3281 objdef = oignore;
3283 /* If a macro spans multiple lines don't reset its state. */
3284 if (quotednl)
3285 CNL_SAVE_DEFINEDEF ();
3286 else
3287 CNL ();
3288 break;
3289 } /* switch (c) */
3291 } /* while not eof */
3295 * Process either a C++ file or a C file depending on the setting
3296 * of a global flag.
3298 static void
3299 default_C_entries (inf)
3300 FILE *inf;
3302 C_entries (cplusplus ? C_PLPL : 0, inf);
3305 /* Always do plain ANSI C. */
3306 static void
3307 plain_C_entries (inf)
3308 FILE *inf;
3310 C_entries (0, inf);
3313 /* Always do C++. */
3314 static void
3315 Cplusplus_entries (inf)
3316 FILE *inf;
3318 C_entries (C_PLPL, inf);
3321 /* Always do Java. */
3322 static void
3323 Cjava_entries (inf)
3324 FILE *inf;
3326 C_entries (C_JAVA, inf);
3329 /* Always do C*. */
3330 static void
3331 Cstar_entries (inf)
3332 FILE *inf;
3334 C_entries (C_STAR, inf);
3337 /* Always do Yacc. */
3338 static void
3339 Yacc_entries (inf)
3340 FILE *inf;
3342 C_entries (YACC, inf);
3345 /* A useful macro. */
3346 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3347 for (lineno = charno = 0; /* loop initialization */ \
3348 !feof (file_pointer) /* loop test */ \
3349 && (lineno++, /* instructions at start of loop */ \
3350 linecharno = charno, \
3351 charno += readline (&line_buffer, file_pointer), \
3352 char_pointer = lb.buffer, \
3353 TRUE); \
3358 * Read a file, but do no processing. This is used to do regexp
3359 * matching on files that have no language defined.
3361 static void
3362 just_read_file (inf)
3363 FILE *inf;
3365 register char *dummy;
3367 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3368 continue;
3371 /* Fortran parsing */
3373 static bool tail P_((char *));
3374 static void takeprec P_((void));
3375 static void getit P_((FILE *));
3377 static bool
3378 tail (cp)
3379 char *cp;
3381 register int len = 0;
3383 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
3384 cp++, len++;
3385 if (*cp == '\0' && !intoken (dbp[len]))
3387 dbp += len;
3388 return TRUE;
3390 return FALSE;
3393 static void
3394 takeprec ()
3396 dbp = skip_spaces (dbp);
3397 if (*dbp != '*')
3398 return;
3399 dbp++;
3400 dbp = skip_spaces (dbp);
3401 if (strneq (dbp, "(*)", 3))
3403 dbp += 3;
3404 return;
3406 if (!isdigit (*dbp))
3408 --dbp; /* force failure */
3409 return;
3412 dbp++;
3413 while (isdigit (*dbp));
3416 static void
3417 getit (inf)
3418 FILE *inf;
3420 register char *cp;
3422 dbp = skip_spaces (dbp);
3423 if (*dbp == '\0')
3425 lineno++;
3426 linecharno = charno;
3427 charno += readline (&lb, inf);
3428 dbp = lb.buffer;
3429 if (dbp[5] != '&')
3430 return;
3431 dbp += 6;
3432 dbp = skip_spaces (dbp);
3434 if (!isalpha (*dbp) && *dbp != '_' && *dbp != '$')
3435 return;
3436 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3437 continue;
3438 pfnote (savenstr (dbp, cp-dbp), TRUE,
3439 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3443 static void
3444 Fortran_functions (inf)
3445 FILE *inf;
3447 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3449 if (*dbp == '%')
3450 dbp++; /* Ratfor escape to fortran */
3451 dbp = skip_spaces (dbp);
3452 if (*dbp == '\0')
3453 continue;
3454 switch (lowcase (*dbp))
3456 case 'i':
3457 if (tail ("integer"))
3458 takeprec ();
3459 break;
3460 case 'r':
3461 if (tail ("real"))
3462 takeprec ();
3463 break;
3464 case 'l':
3465 if (tail ("logical"))
3466 takeprec ();
3467 break;
3468 case 'c':
3469 if (tail ("complex") || tail ("character"))
3470 takeprec ();
3471 break;
3472 case 'd':
3473 if (tail ("double"))
3475 dbp = skip_spaces (dbp);
3476 if (*dbp == '\0')
3477 continue;
3478 if (tail ("precision"))
3479 break;
3480 continue;
3482 break;
3484 dbp = skip_spaces (dbp);
3485 if (*dbp == '\0')
3486 continue;
3487 switch (lowcase (*dbp))
3489 case 'f':
3490 if (tail ("function"))
3491 getit (inf);
3492 continue;
3493 case 's':
3494 if (tail ("subroutine"))
3495 getit (inf);
3496 continue;
3497 case 'e':
3498 if (tail ("entry"))
3499 getit (inf);
3500 continue;
3501 case 'b':
3502 if (tail ("blockdata") || tail ("block data"))
3504 dbp = skip_spaces (dbp);
3505 if (*dbp == '\0') /* assume un-named */
3506 pfnote (savestr ("blockdata"), TRUE,
3507 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3508 else
3509 getit (inf); /* look for name */
3511 continue;
3517 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be>, 1998-04-24
3518 * Ada parsing
3521 static void adagetit P_((FILE *, char *));
3523 /* Once we are positioned after an "interesting" keyword, let's get
3524 the real tag value necessary. */
3525 static void
3526 adagetit (inf, name_qualifier)
3527 FILE *inf;
3528 char *name_qualifier;
3530 register char *cp;
3531 char *name;
3532 char c;
3534 while (!feof (inf))
3536 dbp = skip_spaces (dbp);
3537 if (*dbp == '\0'
3538 || (dbp[0] == '-' && dbp[1] == '-'))
3540 lineno++;
3541 linecharno = charno;
3542 charno += readline (&lb, inf);
3543 dbp = lb.buffer;
3545 switch (*dbp)
3547 case 'b':
3548 case 'B':
3549 if (tail ("body"))
3551 /* Skipping body of procedure body or package body or ....
3552 resetting qualifier to body instead of spec. */
3553 name_qualifier = "/b";
3554 continue;
3556 break;
3557 case 't':
3558 case 'T':
3559 /* Skipping type of task type or protected type ... */
3560 if (tail ("type"))
3561 continue;
3562 break;
3564 if (*dbp == '"')
3566 dbp += 1;
3567 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3568 continue;
3570 else
3572 dbp = skip_spaces (dbp);
3573 for (cp = dbp;
3574 (*cp != '\0'
3575 && (isalpha (*cp) || isdigit (*cp) || *cp == '_' || *cp == '.'));
3576 cp++)
3577 continue;
3578 if (cp == dbp)
3579 return;
3581 c = *cp;
3582 *cp = '\0';
3583 name = concat (dbp, name_qualifier, "");
3584 *cp = c;
3585 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3586 if (c == '"')
3587 dbp = cp + 1;
3588 return;
3592 static void
3593 Ada_funcs (inf)
3594 FILE *inf;
3596 bool inquote = FALSE;
3598 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3600 while (*dbp != '\0')
3602 /* Skip a string i.e. "abcd". */
3603 if (inquote || (*dbp == '"'))
3605 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3606 if (dbp != NULL)
3608 inquote = FALSE;
3609 dbp += 1;
3610 continue; /* advance char */
3612 else
3614 inquote = TRUE;
3615 break; /* advance line */
3619 /* Skip comments. */
3620 if (dbp[0] == '-' && dbp[1] == '-')
3621 break; /* advance line */
3623 /* Skip character enclosed in single quote i.e. 'a'
3624 and skip single quote starting an attribute i.e. 'Image. */
3625 if (*dbp == '\'')
3627 dbp++ ;
3628 if (*dbp != '\0')
3629 dbp++;
3630 continue;
3633 /* Search for beginning of a token. */
3634 if (!begtoken (*dbp))
3636 dbp++;
3637 continue; /* advance char */
3640 /* We are at the beginning of a token. */
3641 switch (*dbp)
3643 case 'f':
3644 case 'F':
3645 if (!packages_only && tail ("function"))
3646 adagetit (inf, "/f");
3647 else
3648 break; /* from switch */
3649 continue; /* advance char */
3650 case 'p':
3651 case 'P':
3652 if (!packages_only && tail ("procedure"))
3653 adagetit (inf, "/p");
3654 else if (tail ("package"))
3655 adagetit (inf, "/s");
3656 else if (tail ("protected")) /* protected type */
3657 adagetit (inf, "/t");
3658 else
3659 break; /* from switch */
3660 continue; /* advance char */
3661 case 't':
3662 case 'T':
3663 if (!packages_only && tail ("task"))
3664 adagetit (inf, "/k");
3665 else if (typedefs && !packages_only && tail ("type"))
3667 adagetit (inf, "/t");
3668 while (*dbp != '\0')
3669 dbp += 1;
3671 else
3672 break; /* from switch */
3673 continue; /* advance char */
3676 /* Look for the end of the token. */
3677 while (!endtoken (*dbp))
3678 dbp++;
3680 } /* advance char */
3681 } /* advance line */
3685 * Bob Weiner, Motorola Inc., 4/3/94
3686 * Unix and microcontroller assembly tag handling
3687 * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
3689 static void
3690 Asm_labels (inf)
3691 FILE *inf;
3693 register char *cp;
3695 LOOP_ON_INPUT_LINES (inf, lb, cp)
3697 /* If first char is alphabetic or one of [_.$], test for colon
3698 following identifier. */
3699 if (isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3701 /* Read past label. */
3702 cp++;
3703 while (isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3704 cp++;
3705 if (*cp == ':' || iswhite (*cp))
3707 /* Found end of label, so copy it and add it to the table. */
3708 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3709 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3716 * Perl support by Bart Robinson <lomew@cs.utah.edu>
3717 * enhanced by Michael Ernst <mernst@alum.mit.edu>
3718 * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
3719 * Perl variable names: /^(my|local).../
3721 static void
3722 Perl_functions (inf)
3723 FILE *inf;
3725 register char *cp;
3727 LOOP_ON_INPUT_LINES (inf, lb, cp)
3729 if (*cp++ == 's'
3730 && *cp++ == 'u'
3731 && *cp++ == 'b' && iswhite (*cp++))
3733 cp = skip_spaces (cp);
3734 if (*cp != '\0')
3736 char *sp = cp;
3737 while (*cp != '\0'
3738 && !iswhite (*cp) && *cp != '{' && *cp != '(')
3739 cp++;
3740 pfnote (savenstr (sp, cp-sp), TRUE,
3741 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3744 else if (globals /* only if tagging global vars is enabled */
3745 && ((cp = lb.buffer,
3746 *cp++ == 'm'
3747 && *cp++ == 'y')
3748 || (cp = lb.buffer,
3749 *cp++ == 'l'
3750 && *cp++ == 'o'
3751 && *cp++ == 'c'
3752 && *cp++ == 'a'
3753 && *cp++ == 'l'))
3754 && (*cp == '(' || iswhite (*cp)))
3756 /* After "my" or "local", but before any following paren or space. */
3757 char *varname = NULL;
3759 cp = skip_spaces (cp);
3760 if (*cp == '$' || *cp == '@' || *cp == '%')
3762 char* varstart = ++cp;
3763 while (isalnum (*cp) || *cp == '_')
3764 cp++;
3765 varname = savenstr (varstart, cp-varstart);
3767 else
3769 /* Should be examining a variable list at this point;
3770 could insist on seeing an open parenthesis. */
3771 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
3772 cp++;
3775 /* Perhaps I should back cp up one character, so the TAGS table
3776 doesn't mention (and so depend upon) the following char. */
3777 pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
3778 FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3784 * Python support by Eric S. Raymond <esr@thyrsus.com>
3785 * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
3787 static void
3788 Python_functions (inf)
3789 FILE *inf;
3791 register char *cp;
3793 LOOP_ON_INPUT_LINES (inf, lb, cp)
3795 if (*cp++ == 'd'
3796 && *cp++ == 'e'
3797 && *cp++ == 'f' && iswhite (*cp++))
3799 cp = skip_spaces (cp);
3800 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
3801 cp++;
3802 pfnote (NULL, TRUE,
3803 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3806 cp = lb.buffer;
3807 if (*cp++ == 'c'
3808 && *cp++ == 'l'
3809 && *cp++ == 'a'
3810 && *cp++ == 's'
3811 && *cp++ == 's' && iswhite (*cp++))
3813 cp = skip_spaces (cp);
3814 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
3815 cp++;
3816 pfnote (NULL, TRUE,
3817 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3822 /* Idea by Corny de Souza
3823 * Cobol tag functions
3824 * We could look for anything that could be a paragraph name.
3825 * i.e. anything that starts in column 8 is one word and ends in a full stop.
3827 static void
3828 Cobol_paragraphs (inf)
3829 FILE *inf;
3831 register char *bp, *ep;
3833 LOOP_ON_INPUT_LINES (inf, lb, bp)
3835 if (lb.len < 9)
3836 continue;
3837 bp += 8;
3839 /* If eoln, compiler option or comment ignore whole line. */
3840 if (bp[-1] != ' ' || !isalnum (bp[0]))
3841 continue;
3843 for (ep = bp; isalnum (*ep) || *ep == '-'; ep++)
3844 continue;
3845 if (*ep++ == '.')
3846 pfnote (savenstr (bp, ep-bp), TRUE,
3847 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
3851 /* Added by Mosur Mohan, 4/22/88 */
3852 /* Pascal parsing */
3855 * Locates tags for procedures & functions. Doesn't do any type- or
3856 * var-definitions. It does look for the keyword "extern" or
3857 * "forward" immediately following the procedure statement; if found,
3858 * the tag is skipped.
3860 static void
3861 Pascal_functions (inf)
3862 FILE *inf;
3864 linebuffer tline; /* mostly copied from C_entries */
3865 long save_lcno;
3866 int save_lineno, save_len;
3867 char c, *cp, *namebuf;
3869 bool /* each of these flags is TRUE iff: */
3870 incomment, /* point is inside a comment */
3871 inquote, /* point is inside '..' string */
3872 get_tagname, /* point is after PROCEDURE/FUNCTION
3873 keyword, so next item = potential tag */
3874 found_tag, /* point is after a potential tag */
3875 inparms, /* point is within parameter-list */
3876 verify_tag; /* point has passed the parm-list, so the
3877 next token will determine whether this
3878 is a FORWARD/EXTERN to be ignored, or
3879 whether it is a real tag */
3881 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
3882 namebuf = NULL; /* keep compiler quiet */
3883 lineno = 0;
3884 charno = 0;
3885 dbp = lb.buffer;
3886 *dbp = '\0';
3887 initbuffer (&tline);
3889 incomment = inquote = FALSE;
3890 found_tag = FALSE; /* have a proc name; check if extern */
3891 get_tagname = FALSE; /* have found "procedure" keyword */
3892 inparms = FALSE; /* found '(' after "proc" */
3893 verify_tag = FALSE; /* check if "extern" is ahead */
3896 while (!feof (inf)) /* long main loop to get next char */
3898 c = *dbp++;
3899 if (c == '\0') /* if end of line */
3901 lineno++;
3902 linecharno = charno;
3903 charno += readline (&lb, inf);
3904 dbp = lb.buffer;
3905 if (*dbp == '\0')
3906 continue;
3907 if (!((found_tag && verify_tag)
3908 || get_tagname))
3909 c = *dbp++; /* only if don't need *dbp pointing
3910 to the beginning of the name of
3911 the procedure or function */
3913 if (incomment)
3915 if (c == '}') /* within { } comments */
3916 incomment = FALSE;
3917 else if (c == '*' && *dbp == ')') /* within (* *) comments */
3919 dbp++;
3920 incomment = FALSE;
3922 continue;
3924 else if (inquote)
3926 if (c == '\'')
3927 inquote = FALSE;
3928 continue;
3930 else
3931 switch (c)
3933 case '\'':
3934 inquote = TRUE; /* found first quote */
3935 continue;
3936 case '{': /* found open { comment */
3937 incomment = TRUE;
3938 continue;
3939 case '(':
3940 if (*dbp == '*') /* found open (* comment */
3942 incomment = TRUE;
3943 dbp++;
3945 else if (found_tag) /* found '(' after tag, i.e., parm-list */
3946 inparms = TRUE;
3947 continue;
3948 case ')': /* end of parms list */
3949 if (inparms)
3950 inparms = FALSE;
3951 continue;
3952 case ';':
3953 if (found_tag && !inparms) /* end of proc or fn stmt */
3955 verify_tag = TRUE;
3956 break;
3958 continue;
3960 if (found_tag && verify_tag && (*dbp != ' '))
3962 /* check if this is an "extern" declaration */
3963 if (*dbp == '\0')
3964 continue;
3965 if (lowcase (*dbp == 'e'))
3967 if (tail ("extern")) /* superfluous, really! */
3969 found_tag = FALSE;
3970 verify_tag = FALSE;
3973 else if (lowcase (*dbp) == 'f')
3975 if (tail ("forward")) /* check for forward reference */
3977 found_tag = FALSE;
3978 verify_tag = FALSE;
3981 if (found_tag && verify_tag) /* not external proc, so make tag */
3983 found_tag = FALSE;
3984 verify_tag = FALSE;
3985 pfnote (namebuf, TRUE,
3986 tline.buffer, save_len, save_lineno, save_lcno);
3987 continue;
3990 if (get_tagname) /* grab name of proc or fn */
3992 if (*dbp == '\0')
3993 continue;
3995 /* save all values for later tagging */
3996 grow_linebuffer (&tline, lb.len + 1);
3997 strcpy (tline.buffer, lb.buffer);
3998 save_lineno = lineno;
3999 save_lcno = linecharno;
4001 /* grab block name */
4002 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4003 continue;
4004 namebuf = savenstr (dbp, cp-dbp);
4005 dbp = cp; /* set dbp to e-o-token */
4006 save_len = dbp - lb.buffer + 1;
4007 get_tagname = FALSE;
4008 found_tag = TRUE;
4009 continue;
4011 /* and proceed to check for "extern" */
4013 else if (!incomment && !inquote && !found_tag)
4015 /* check for proc/fn keywords */
4016 switch (lowcase (c))
4018 case 'p':
4019 if (tail ("rocedure")) /* c = 'p', dbp has advanced */
4020 get_tagname = TRUE;
4021 continue;
4022 case 'f':
4023 if (tail ("unction"))
4024 get_tagname = TRUE;
4025 continue;
4028 } /* while not eof */
4030 free (tline.buffer);
4034 * lisp tag functions
4035 * look for (def or (DEF, quote or QUOTE
4038 static int L_isdef P_((char *));
4039 static int L_isquote P_((char *));
4040 static void L_getit P_((void));
4042 static int
4043 L_isdef (strp)
4044 register char *strp;
4046 return ((strp[1] == 'd' || strp[1] == 'D')
4047 && (strp[2] == 'e' || strp[2] == 'E')
4048 && (strp[3] == 'f' || strp[3] == 'F'));
4051 static int
4052 L_isquote (strp)
4053 register char *strp;
4055 return ((*++strp == 'q' || *strp == 'Q')
4056 && (*++strp == 'u' || *strp == 'U')
4057 && (*++strp == 'o' || *strp == 'O')
4058 && (*++strp == 't' || *strp == 'T')
4059 && (*++strp == 'e' || *strp == 'E')
4060 && iswhite (*++strp));
4063 static void
4064 L_getit ()
4066 register char *cp;
4068 if (*dbp == '\'') /* Skip prefix quote */
4069 dbp++;
4070 else if (*dbp == '(')
4072 if (L_isquote (dbp))
4073 dbp += 7; /* Skip "(quote " */
4074 else
4075 dbp += 1; /* Skip "(" before name in (defstruct (foo)) */
4076 dbp = skip_spaces (dbp);
4079 for (cp = dbp /*+1*/;
4080 *cp != '\0' && *cp != '(' && !iswhite(*cp) && *cp != ')';
4081 cp++)
4082 continue;
4083 if (cp == dbp)
4084 return;
4086 pfnote (savenstr (dbp, cp-dbp), TRUE,
4087 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4090 static void
4091 Lisp_functions (inf)
4092 FILE *inf;
4094 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4096 if (dbp[0] == '(')
4098 if (L_isdef (dbp))
4100 dbp = skip_non_spaces (dbp);
4101 dbp = skip_spaces (dbp);
4102 L_getit ();
4104 else
4106 /* Check for (foo::defmumble name-defined ... */
4108 dbp++;
4109 while (*dbp != '\0' && !iswhite (*dbp)
4110 && *dbp != ':' && *dbp != '(' && *dbp != ')');
4111 if (*dbp == ':')
4114 dbp++;
4115 while (*dbp == ':');
4117 if (L_isdef (dbp - 1))
4119 dbp = skip_non_spaces (dbp);
4120 dbp = skip_spaces (dbp);
4121 L_getit ();
4130 * Postscript tag functions
4131 * Just look for lines where the first character is '/'
4132 * Richard Mlynarik <mly@adoc.xerox.com>
4133 * Also look at "defineps" for PSWrap
4134 * suggested by Masatake YAMATO <masata-y@is.aist-nara.ac.jp>
4136 static void
4137 Postscript_functions (inf)
4138 FILE *inf;
4140 register char *bp, *ep;
4142 LOOP_ON_INPUT_LINES (inf, lb, bp)
4144 if (bp[0] == '/')
4146 for (ep = bp+1;
4147 *ep != '\0' && *ep != ' ' && *ep != '{';
4148 ep++)
4149 continue;
4150 pfnote (savenstr (bp, ep-bp), TRUE,
4151 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4153 else if (strneq (bp, "defineps", 8))
4155 bp = skip_non_spaces (bp);
4156 bp = skip_spaces (bp);
4157 get_tag (bp);
4164 * Scheme tag functions
4165 * look for (def... xyzzy
4166 * look for (def... (xyzzy
4167 * look for (def ... ((...(xyzzy ....
4168 * look for (set! xyzzy
4171 static void
4172 Scheme_functions (inf)
4173 FILE *inf;
4175 register char *bp;
4177 LOOP_ON_INPUT_LINES (inf, lb, bp)
4179 if (bp[0] == '('
4180 && (bp[1] == 'D' || bp[1] == 'd')
4181 && (bp[2] == 'E' || bp[2] == 'e')
4182 && (bp[3] == 'F' || bp[3] == 'f'))
4184 bp = skip_non_spaces (bp);
4185 /* Skip over open parens and white space */
4186 while (iswhite (*bp) || *bp == '(')
4187 bp++;
4188 get_tag (bp);
4190 if (bp[0] == '('
4191 && (bp[1] == 'S' || bp[1] == 's')
4192 && (bp[2] == 'E' || bp[2] == 'e')
4193 && (bp[3] == 'T' || bp[3] == 't')
4194 && (bp[4] == '!' || bp[4] == '!')
4195 && (iswhite (bp[5])))
4197 bp = skip_non_spaces (bp);
4198 bp = skip_spaces (bp);
4199 get_tag (bp);
4204 /* Find tags in TeX and LaTeX input files. */
4206 /* TEX_toktab is a table of TeX control sequences that define tags.
4207 Each TEX_tabent records one such control sequence.
4208 CONVERT THIS TO USE THE Stab TYPE!! */
4209 struct TEX_tabent
4211 char *name;
4212 int len;
4215 struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
4217 /* Default set of control sequences to put into TEX_toktab.
4218 The value of environment var TEXTAGS is prepended to this. */
4220 char *TEX_defenv = "\
4221 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4222 :part:appendix:entry:index";
4224 static void TEX_mode P_((FILE *));
4225 static struct TEX_tabent *TEX_decode_env P_((char *, char *));
4226 static int TEX_Token P_((char *));
4228 char TEX_esc = '\\';
4229 char TEX_opgrp = '{';
4230 char TEX_clgrp = '}';
4233 * TeX/LaTeX scanning loop.
4235 static void
4236 TeX_functions (inf)
4237 FILE *inf;
4239 char *cp, *lasthit;
4240 register int i;
4242 /* Select either \ or ! as escape character. */
4243 TEX_mode (inf);
4245 /* Initialize token table once from environment. */
4246 if (!TEX_toktab)
4247 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4249 LOOP_ON_INPUT_LINES (inf, lb, cp)
4251 lasthit = cp;
4252 /* Look at each esc in line. */
4253 while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4255 if (*++cp == '\0')
4256 break;
4257 linecharno += cp - lasthit;
4258 lasthit = cp;
4259 i = TEX_Token (lasthit);
4260 if (i >= 0)
4262 /* We seem to include the TeX command in the tag name.
4263 register char *p;
4264 for (p = lasthit + TEX_toktab[i].len;
4265 *p != '\0' && *p != TEX_clgrp;
4266 p++)
4267 continue; */
4268 pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
4269 lb.buffer, lb.len, lineno, linecharno);
4270 break; /* We only tag a line once */
4276 #define TEX_LESC '\\'
4277 #define TEX_SESC '!'
4278 #define TEX_cmt '%'
4280 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4281 chars accordingly. */
4282 static void
4283 TEX_mode (inf)
4284 FILE *inf;
4286 int c;
4288 while ((c = getc (inf)) != EOF)
4290 /* Skip to next line if we hit the TeX comment char. */
4291 if (c == TEX_cmt)
4292 while (c != '\n')
4293 c = getc (inf);
4294 else if (c == TEX_LESC || c == TEX_SESC )
4295 break;
4298 if (c == TEX_LESC)
4300 TEX_esc = TEX_LESC;
4301 TEX_opgrp = '{';
4302 TEX_clgrp = '}';
4304 else
4306 TEX_esc = TEX_SESC;
4307 TEX_opgrp = '<';
4308 TEX_clgrp = '>';
4310 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4311 No attempt is made to correct the situation. */
4312 rewind (inf);
4315 /* Read environment and prepend it to the default string.
4316 Build token table. */
4317 static struct TEX_tabent *
4318 TEX_decode_env (evarname, defenv)
4319 char *evarname;
4320 char *defenv;
4322 register char *env, *p;
4324 struct TEX_tabent *tab;
4325 int size, i;
4327 /* Append default string to environment. */
4328 env = getenv (evarname);
4329 if (!env)
4330 env = defenv;
4331 else
4333 char *oldenv = env;
4334 env = concat (oldenv, defenv, "");
4337 /* Allocate a token table */
4338 for (size = 1, p = env; p;)
4339 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4340 size++;
4341 /* Add 1 to leave room for null terminator. */
4342 tab = xnew (size + 1, struct TEX_tabent);
4344 /* Unpack environment string into token table. Be careful about */
4345 /* zero-length strings (leading ':', "::" and trailing ':') */
4346 for (i = 0; *env;)
4348 p = etags_strchr (env, ':');
4349 if (!p) /* End of environment string. */
4350 p = env + strlen (env);
4351 if (p - env > 0)
4352 { /* Only non-zero strings. */
4353 tab[i].name = savenstr (env, p - env);
4354 tab[i].len = strlen (tab[i].name);
4355 i++;
4357 if (*p)
4358 env = p + 1;
4359 else
4361 tab[i].name = NULL; /* Mark end of table. */
4362 tab[i].len = 0;
4363 break;
4366 return tab;
4369 /* If the text at CP matches one of the tag-defining TeX command names,
4370 return the pointer to the first occurrence of that command in TEX_toktab.
4371 Otherwise return -1.
4372 Keep the capital `T' in `token' for dumb truncating compilers
4373 (this distinguishes it from `TEX_toktab' */
4374 static int
4375 TEX_Token (cp)
4376 char *cp;
4378 int i;
4380 for (i = 0; TEX_toktab[i].len > 0; i++)
4381 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4382 return i;
4383 return -1;
4386 /* Texinfo support. Dave Love, Mar. 2000. */
4387 static void
4388 Texinfo_functions (inf)
4389 FILE * inf;
4391 char *cp, *start;
4392 LOOP_ON_INPUT_LINES (inf, lb, cp)
4394 if ((*cp++ == '@' && *cp++ == 'n' && *cp++ == 'o' && *cp++ == 'd'
4395 && *cp++ == 'e' && iswhite (*cp++)))
4397 while (iswhite (*cp))
4398 cp++;
4399 start = cp;
4400 while (*cp != '\0' && *cp != ',')
4401 cp++;
4402 pfnote (savenstr (start, cp - start), TRUE,
4403 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4409 * Prolog support (rewritten) by Anders Lindgren, Mar. 96
4411 * Assumes that the predicate starts at column 0.
4412 * Only the first clause of a predicate is added.
4414 static int prolog_pred P_((char *, char *));
4415 static void prolog_skip_comment P_((linebuffer *, FILE *));
4416 static int prolog_atom P_((char *, int));
4418 static void
4419 Prolog_functions (inf)
4420 FILE *inf;
4422 char *cp, *last;
4423 int len;
4424 int allocated;
4426 allocated = 0;
4427 len = 0;
4428 last = NULL;
4430 LOOP_ON_INPUT_LINES (inf, lb, cp)
4432 if (cp[0] == '\0') /* Empty line */
4433 continue;
4434 else if (iswhite (cp[0])) /* Not a predicate */
4435 continue;
4436 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4437 prolog_skip_comment (&lb, inf);
4438 else if ((len = prolog_pred (cp, last)) > 0)
4440 /* Predicate. Store the function name so that we only
4441 generate a tag for the first clause. */
4442 if (last == NULL)
4443 last = xnew(len + 1, char);
4444 else if (len + 1 > allocated)
4445 last = xrnew (last, len + 1, char);
4446 allocated = len + 1;
4447 strncpy (last, cp, len);
4448 last[len] = '\0';
4454 static void
4455 prolog_skip_comment (plb, inf)
4456 linebuffer *plb;
4457 FILE *inf;
4459 char *cp;
4463 for (cp = plb->buffer; *cp != '\0'; cp++)
4464 if (cp[0] == '*' && cp[1] == '/')
4465 return;
4466 lineno++;
4467 linecharno += readline (plb, inf);
4469 while (!feof(inf));
4473 * A predicate definition is added if it matches:
4474 * <beginning of line><Prolog Atom><whitespace>(
4476 * It is added to the tags database if it doesn't match the
4477 * name of the previous clause header.
4479 * Return the size of the name of the predicate, or 0 if no header
4480 * was found.
4482 static int
4483 prolog_pred (s, last)
4484 char *s;
4485 char *last; /* Name of last clause. */
4487 int pos;
4488 int len;
4490 pos = prolog_atom (s, 0);
4491 if (pos < 1)
4492 return 0;
4494 len = pos;
4495 pos = skip_spaces (s + pos) - s;
4497 if ((s[pos] == '(') || (s[pos] == '.'))
4499 if (s[pos] == '(')
4500 pos++;
4502 /* Save only the first clause. */
4503 if (last == NULL
4504 || len != (int)strlen (last)
4505 || !strneq (s, last, len))
4507 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4508 return len;
4511 return 0;
4515 * Consume a Prolog atom.
4516 * Return the number of bytes consumed, or -1 if there was an error.
4518 * A prolog atom, in this context, could be one of:
4519 * - An alphanumeric sequence, starting with a lower case letter.
4520 * - A quoted arbitrary string. Single quotes can escape themselves.
4521 * Backslash quotes everything.
4523 static int
4524 prolog_atom (s, pos)
4525 char *s;
4526 int pos;
4528 int origpos;
4530 origpos = pos;
4532 if (islower(s[pos]) || (s[pos] == '_'))
4534 /* The atom is unquoted. */
4535 pos++;
4536 while (isalnum(s[pos]) || (s[pos] == '_'))
4538 pos++;
4540 return pos - origpos;
4542 else if (s[pos] == '\'')
4544 pos++;
4546 while (1)
4548 if (s[pos] == '\'')
4550 pos++;
4551 if (s[pos] != '\'')
4552 break;
4553 pos++; /* A double quote */
4555 else if (s[pos] == '\0')
4556 /* Multiline quoted atoms are ignored. */
4557 return -1;
4558 else if (s[pos] == '\\')
4560 if (s[pos+1] == '\0')
4561 return -1;
4562 pos += 2;
4564 else
4565 pos++;
4567 return pos - origpos;
4569 else
4570 return -1;
4574 * Support for Erlang -- Anders Lindgren, Feb 1996.
4576 * Generates tags for functions, defines, and records.
4578 * Assumes that Erlang functions start at column 0.
4580 static int erlang_func P_((char *, char *));
4581 static void erlang_attribute P_((char *));
4582 static int erlang_atom P_((char *, int));
4584 static void
4585 Erlang_functions (inf)
4586 FILE *inf;
4588 char *cp, *last;
4589 int len;
4590 int allocated;
4592 allocated = 0;
4593 len = 0;
4594 last = NULL;
4596 LOOP_ON_INPUT_LINES (inf, lb, cp)
4598 if (cp[0] == '\0') /* Empty line */
4599 continue;
4600 else if (iswhite (cp[0])) /* Not function nor attribute */
4601 continue;
4602 else if (cp[0] == '%') /* comment */
4603 continue;
4604 else if (cp[0] == '"') /* Sometimes, strings start in column one */
4605 continue;
4606 else if (cp[0] == '-') /* attribute, e.g. "-define" */
4608 erlang_attribute (cp);
4609 last = NULL;
4611 else if ((len = erlang_func (cp, last)) > 0)
4614 * Function. Store the function name so that we only
4615 * generates a tag for the first clause.
4617 if (last == NULL)
4618 last = xnew (len + 1, char);
4619 else if (len + 1 > allocated)
4620 last = xrnew (last, len + 1, char);
4621 allocated = len + 1;
4622 strncpy (last, cp, len);
4623 last[len] = '\0';
4630 * A function definition is added if it matches:
4631 * <beginning of line><Erlang Atom><whitespace>(
4633 * It is added to the tags database if it doesn't match the
4634 * name of the previous clause header.
4636 * Return the size of the name of the function, or 0 if no function
4637 * was found.
4639 static int
4640 erlang_func (s, last)
4641 char *s;
4642 char *last; /* Name of last clause. */
4644 int pos;
4645 int len;
4647 pos = erlang_atom (s, 0);
4648 if (pos < 1)
4649 return 0;
4651 len = pos;
4652 pos = skip_spaces (s + pos) - s;
4654 /* Save only the first clause. */
4655 if (s[pos++] == '('
4656 && (last == NULL
4657 || len != (int)strlen (last)
4658 || !strneq (s, last, len)))
4660 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4661 return len;
4664 return 0;
4669 * Handle attributes. Currently, tags are generated for defines
4670 * and records.
4672 * They are on the form:
4673 * -define(foo, bar).
4674 * -define(Foo(M, N), M+N).
4675 * -record(graph, {vtab = notable, cyclic = true}).
4677 static void
4678 erlang_attribute (s)
4679 char *s;
4681 int pos;
4682 int len;
4684 if (strneq (s, "-define", 7) || strneq (s, "-record", 7))
4686 pos = skip_spaces (s + 7) - s;
4687 if (s[pos++] == '(')
4689 pos = skip_spaces (s + pos) - s;
4690 len = erlang_atom (s, pos);
4691 if (len != 0)
4692 pfnote (savenstr (& s[pos], len), TRUE,
4693 s, pos + len, lineno, linecharno);
4696 return;
4701 * Consume an Erlang atom (or variable).
4702 * Return the number of bytes consumed, or -1 if there was an error.
4704 static int
4705 erlang_atom (s, pos)
4706 char *s;
4707 int pos;
4709 int origpos;
4711 origpos = pos;
4713 if (isalpha (s[pos]) || s[pos] == '_')
4715 /* The atom is unquoted. */
4716 pos++;
4717 while (isalnum (s[pos]) || s[pos] == '_')
4718 pos++;
4719 return pos - origpos;
4721 else if (s[pos] == '\'')
4723 pos++;
4725 while (1)
4727 if (s[pos] == '\'')
4729 pos++;
4730 break;
4732 else if (s[pos] == '\0')
4733 /* Multiline quoted atoms are ignored. */
4734 return -1;
4735 else if (s[pos] == '\\')
4737 if (s[pos+1] == '\0')
4738 return -1;
4739 pos += 2;
4741 else
4742 pos++;
4744 return pos - origpos;
4746 else
4747 return -1;
4750 #ifdef ETAGS_REGEXPS
4752 static char *scan_separators P_((char *));
4753 static void analyse_regex P_((char *, bool));
4754 static void add_regex P_((char *, bool, language *));
4755 static char *substitute P_((char *, char *, struct re_registers *));
4757 /* Take a string like "/blah/" and turn it into "blah", making sure
4758 that the first and last characters are the same, and handling
4759 quoted separator characters. Actually, stops on the occurrence of
4760 an unquoted separator. Also turns "\t" into a Tab character.
4761 Returns pointer to terminating separator. Works in place. Null
4762 terminates name string. */
4763 static char *
4764 scan_separators (name)
4765 char *name;
4767 char sep = name[0];
4768 char *copyto = name;
4769 bool quoted = FALSE;
4771 for (++name; *name != '\0'; ++name)
4773 if (quoted)
4775 if (*name == 't')
4776 *copyto++ = '\t';
4777 else if (*name == sep)
4778 *copyto++ = sep;
4779 else
4781 /* Something else is quoted, so preserve the quote. */
4782 *copyto++ = '\\';
4783 *copyto++ = *name;
4785 quoted = FALSE;
4787 else if (*name == '\\')
4788 quoted = TRUE;
4789 else if (*name == sep)
4790 break;
4791 else
4792 *copyto++ = *name;
4795 /* Terminate copied string. */
4796 *copyto = '\0';
4797 return name;
4800 /* Look at the argument of --regex or --no-regex and do the right
4801 thing. Same for each line of a regexp file. */
4802 static void
4803 analyse_regex (regex_arg, ignore_case)
4804 char *regex_arg;
4805 bool ignore_case;
4807 if (regex_arg == NULL)
4808 free_patterns (); /* --no-regex: remove existing regexps */
4810 /* A real --regexp option or a line in a regexp file. */
4811 switch (regex_arg[0])
4813 /* Comments in regexp file or null arg to --regex. */
4814 case '\0':
4815 case ' ':
4816 case '\t':
4817 break;
4819 /* Read a regex file. This is recursive and may result in a
4820 loop, which will stop when the file descriptors are exhausted. */
4821 case '@':
4823 FILE *regexfp;
4824 linebuffer regexbuf;
4825 char *regexfile = regex_arg + 1;
4827 /* regexfile is a file containing regexps, one per line. */
4828 regexfp = fopen (regexfile, "r");
4829 if (regexfp == NULL)
4831 pfatal (regexfile);
4832 return;
4834 initbuffer (&regexbuf);
4835 while (readline_internal (&regexbuf, regexfp) > 0)
4836 analyse_regex (regexbuf.buffer, ignore_case);
4837 free (regexbuf.buffer);
4838 fclose (regexfp);
4840 break;
4842 /* Regexp to be used for a specific language only. */
4843 case '{':
4845 language *lang;
4846 char *lang_name = regex_arg + 1;
4847 char *cp;
4849 for (cp = lang_name; *cp != '}'; cp++)
4850 if (*cp == '\0')
4852 error ("unterminated language name in regex: %s", regex_arg);
4853 return;
4855 *cp = '\0';
4856 lang = get_language_from_name (lang_name);
4857 if (lang == NULL)
4858 return;
4859 add_regex (cp + 1, ignore_case, lang);
4861 break;
4863 /* Regexp to be used for any language. */
4864 default:
4865 add_regex (regex_arg, ignore_case, NULL);
4866 break;
4870 /* Turn a name, which is an ed-style (but Emacs syntax) regular
4871 expression, into a real regular expression by compiling it. */
4872 static void
4873 add_regex (regexp_pattern, ignore_case, lang)
4874 char *regexp_pattern;
4875 bool ignore_case;
4876 language *lang;
4878 char *name;
4879 const char *err;
4880 struct re_pattern_buffer *patbuf;
4881 pattern *pp;
4884 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
4886 error ("%s: unterminated regexp", regexp_pattern);
4887 return;
4889 name = scan_separators (regexp_pattern);
4890 if (regexp_pattern[0] == '\0')
4892 error ("null regexp", (char *)NULL);
4893 return;
4895 (void) scan_separators (name);
4897 patbuf = xnew (1, struct re_pattern_buffer);
4898 /* Translation table to fold case if appropriate. */
4899 patbuf->translate = (ignore_case) ? lc_trans : NULL;
4900 patbuf->fastmap = NULL;
4901 patbuf->buffer = NULL;
4902 patbuf->allocated = 0;
4904 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
4905 if (err != NULL)
4907 error ("%s while compiling pattern", err);
4908 return;
4911 pp = p_head;
4912 p_head = xnew (1, pattern);
4913 p_head->regex = savestr (regexp_pattern);
4914 p_head->p_next = pp;
4915 p_head->language = lang;
4916 p_head->pattern = patbuf;
4917 p_head->name_pattern = savestr (name);
4918 p_head->error_signaled = FALSE;
4922 * Do the substitutions indicated by the regular expression and
4923 * arguments.
4925 static char *
4926 substitute (in, out, regs)
4927 char *in, *out;
4928 struct re_registers *regs;
4930 char *result, *t;
4931 int size, dig, diglen;
4933 result = NULL;
4934 size = strlen (out);
4936 /* Pass 1: figure out how much to allocate by finding all \N strings. */
4937 if (out[size - 1] == '\\')
4938 fatal ("pattern error in \"%s\"", out);
4939 for (t = etags_strchr (out, '\\');
4940 t != NULL;
4941 t = etags_strchr (t + 2, '\\'))
4942 if (isdigit (t[1]))
4944 dig = t[1] - '0';
4945 diglen = regs->end[dig] - regs->start[dig];
4946 size += diglen - 2;
4948 else
4949 size -= 1;
4951 /* Allocate space and do the substitutions. */
4952 result = xnew (size + 1, char);
4954 for (t = result; *out != '\0'; out++)
4955 if (*out == '\\' && isdigit (*++out))
4957 /* Using "dig2" satisfies my debugger. Bleah. */
4958 dig = *out - '0';
4959 diglen = regs->end[dig] - regs->start[dig];
4960 strncpy (t, in + regs->start[dig], diglen);
4961 t += diglen;
4963 else
4964 *t++ = *out;
4965 *t = '\0';
4967 if (DEBUG && (t > result + size || t - result != (int)strlen (result)))
4968 abort ();
4970 return result;
4973 /* Deallocate all patterns. */
4974 static void
4975 free_patterns ()
4977 pattern *pp;
4978 while (p_head != NULL)
4980 pp = p_head->p_next;
4981 free (p_head->regex);
4982 free (p_head->name_pattern);
4983 free (p_head);
4984 p_head = pp;
4986 return;
4989 static void
4990 get_tag (bp)
4991 register char *bp;
4993 register char *cp;
4995 if (*bp == '\0')
4996 return;
4997 /* Go till you get to white space or a syntactic break */
4998 for (cp = bp + 1;
4999 *cp != '\0' && *cp != '(' && *cp != ')' && !iswhite (*cp);
5000 cp++)
5001 continue;
5002 pfnote (savenstr (bp, cp-bp), TRUE,
5003 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5006 #endif /* ETAGS_REGEXPS */
5007 /* Initialize a linebuffer for use */
5008 static void
5009 initbuffer (lbp)
5010 linebuffer *lbp;
5012 lbp->size = 200;
5013 lbp->buffer = xnew (200, char);
5017 * Read a line of text from `stream' into `lbp', excluding the
5018 * newline or CR-NL, if any. Return the number of characters read from
5019 * `stream', which is the length of the line including the newline.
5021 * On DOS or Windows we do not count the CR character, if any, before the
5022 * NL, in the returned length; this mirrors the behavior of emacs on those
5023 * platforms (for text files, it translates CR-NL to NL as it reads in the
5024 * file).
5026 static long
5027 readline_internal (lbp, stream)
5028 linebuffer *lbp;
5029 register FILE *stream;
5031 char *buffer = lbp->buffer;
5032 register char *p = lbp->buffer;
5033 register char *pend;
5034 int chars_deleted;
5036 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5038 while (1)
5040 register int c = getc (stream);
5041 if (p == pend)
5043 /* We're at the end of linebuffer: expand it. */
5044 lbp->size *= 2;
5045 buffer = xrnew (buffer, lbp->size, char);
5046 p += buffer - lbp->buffer;
5047 pend = buffer + lbp->size;
5048 lbp->buffer = buffer;
5050 if (c == EOF)
5052 *p = '\0';
5053 chars_deleted = 0;
5054 break;
5056 if (c == '\n')
5058 if (p > buffer && p[-1] == '\r')
5060 p -= 1;
5061 #ifdef DOS_NT
5062 /* Assume CRLF->LF translation will be performed by Emacs
5063 when loading this file, so CRs won't appear in the buffer.
5064 It would be cleaner to compensate within Emacs;
5065 however, Emacs does not know how many CRs were deleted
5066 before any given point in the file. */
5067 chars_deleted = 1;
5068 #else
5069 chars_deleted = 2;
5070 #endif
5072 else
5074 chars_deleted = 1;
5076 *p = '\0';
5077 break;
5079 *p++ = c;
5081 lbp->len = p - buffer;
5083 return lbp->len + chars_deleted;
5087 * Like readline_internal, above, but in addition try to match the
5088 * input line against relevant regular expressions.
5090 static long
5091 readline (lbp, stream)
5092 linebuffer *lbp;
5093 FILE *stream;
5095 /* Read new line. */
5096 long result = readline_internal (lbp, stream);
5097 #ifdef ETAGS_REGEXPS
5098 int match;
5099 pattern *pp;
5101 /* Match against relevant patterns. */
5102 if (lbp->len > 0)
5103 for (pp = p_head; pp != NULL; pp = pp->p_next)
5105 /* Only use generic regexps or those for the current language. */
5106 if (pp->language != NULL && pp->language != curlang)
5107 continue;
5109 match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs);
5110 switch (match)
5112 case -2:
5113 /* Some error. */
5114 if (!pp->error_signaled)
5116 error ("error while matching \"%s\"", pp->regex);
5117 pp->error_signaled = TRUE;
5119 break;
5120 case -1:
5121 /* No match. */
5122 break;
5123 default:
5124 /* Match occurred. Construct a tag. */
5125 if (pp->name_pattern[0] != '\0')
5127 /* Make a named tag. */
5128 char *name = substitute (lbp->buffer,
5129 pp->name_pattern, &pp->regs);
5130 if (name != NULL)
5131 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5133 else
5135 /* Make an unnamed tag. */
5136 pfnote ((char *)NULL, TRUE,
5137 lbp->buffer, match, lineno, linecharno);
5139 break;
5142 #endif /* ETAGS_REGEXPS */
5144 return result;
5148 * Return a pointer to a space of size strlen(cp)+1 allocated
5149 * with xnew where the string CP has been copied.
5151 static char *
5152 savestr (cp)
5153 char *cp;
5155 return savenstr (cp, strlen (cp));
5159 * Return a pointer to a space of size LEN+1 allocated with xnew where
5160 * the string CP has been copied for at most the first LEN characters.
5162 static char *
5163 savenstr (cp, len)
5164 char *cp;
5165 int len;
5167 register char *dp;
5169 dp = xnew (len + 1, char);
5170 strncpy (dp, cp, len);
5171 dp[len] = '\0';
5172 return dp;
5176 * Return the ptr in sp at which the character c last
5177 * appears; NULL if not found
5179 * Identical to POSIX strrchr, included for portability.
5181 static char *
5182 etags_strrchr (sp, c)
5183 register const char *sp;
5184 register int c;
5186 register const char *r;
5188 r = NULL;
5191 if (*sp == c)
5192 r = sp;
5193 } while (*sp++);
5194 return (char *)r;
5199 * Return the ptr in sp at which the character c first
5200 * appears; NULL if not found
5202 * Identical to POSIX strchr, included for portability.
5204 static char *
5205 etags_strchr (sp, c)
5206 register const char *sp;
5207 register int c;
5211 if (*sp == c)
5212 return (char *)sp;
5213 } while (*sp++);
5214 return NULL;
5217 /* Skip spaces, return new pointer. */
5218 static char *
5219 skip_spaces (cp)
5220 char *cp;
5222 while (iswhite (*cp))
5223 cp++;
5224 return cp;
5227 /* Skip non spaces, return new pointer. */
5228 static char *
5229 skip_non_spaces (cp)
5230 char *cp;
5232 while (*cp != '\0' && !iswhite (*cp))
5233 cp++;
5234 return cp;
5237 /* Print error message and exit. */
5238 static void
5239 fatal (s1, s2)
5240 char *s1, *s2;
5242 error (s1, s2);
5243 exit (BAD);
5246 static void
5247 pfatal (s1)
5248 char *s1;
5250 perror (s1);
5251 exit (BAD);
5254 static void
5255 suggest_asking_for_help ()
5257 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5258 progname,
5259 #ifdef LONG_OPTIONS
5260 "--help"
5261 #else
5262 "-h"
5263 #endif
5265 exit (BAD);
5268 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
5269 static void
5270 error (s1, s2)
5271 const char *s1, *s2;
5273 fprintf (stderr, "%s: ", progname);
5274 fprintf (stderr, s1, s2);
5275 fprintf (stderr, "\n");
5278 /* Return a newly-allocated string whose contents
5279 concatenate those of s1, s2, s3. */
5280 static char *
5281 concat (s1, s2, s3)
5282 char *s1, *s2, *s3;
5284 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5285 char *result = xnew (len1 + len2 + len3 + 1, char);
5287 strcpy (result, s1);
5288 strcpy (result + len1, s2);
5289 strcpy (result + len1 + len2, s3);
5290 result[len1 + len2 + len3] = '\0';
5292 return result;
5295 /* Does the same work as the system V getcwd, but does not need to
5296 guess the buffer size in advance. */
5297 static char *
5298 etags_getcwd ()
5300 #ifdef HAVE_GETCWD
5301 int bufsize = 200;
5302 char *path = xnew (bufsize, char);
5304 while (getcwd (path, bufsize) == NULL)
5306 if (errno != ERANGE)
5307 pfatal ("getcwd");
5308 bufsize *= 2;
5309 free (path);
5310 path = xnew (bufsize, char);
5313 canonicalize_filename (path);
5314 return path;
5316 #else /* not HAVE_GETCWD */
5317 #ifdef MSDOS
5318 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
5320 getwd (path);
5322 for (p = path; *p != '\0'; p++)
5323 if (*p == '\\')
5324 *p = '/';
5325 else
5326 *p = lowcase (*p);
5328 return strdup (path);
5329 #else /* not MSDOS */
5330 linebuffer path;
5331 FILE *pipe;
5333 initbuffer (&path);
5334 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5335 if (pipe == NULL || readline_internal (&path, pipe) == 0)
5336 pfatal ("pwd");
5337 pclose (pipe);
5339 return path.buffer;
5340 #endif /* not MSDOS */
5341 #endif /* not HAVE_GETCWD */
5344 /* Return a newly allocated string containing the file name of FILE
5345 relative to the absolute directory DIR (which should end with a slash). */
5346 static char *
5347 relative_filename (file, dir)
5348 char *file, *dir;
5350 char *fp, *dp, *afn, *res;
5351 int i;
5353 /* Find the common root of file and dir (with a trailing slash). */
5354 afn = absolute_filename (file, cwd);
5355 fp = afn;
5356 dp = dir;
5357 while (*fp++ == *dp++)
5358 continue;
5359 fp--, dp--; /* back to the first differing char */
5360 #ifdef DOS_NT
5361 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5362 return afn;
5363 #endif
5364 do /* look at the equal chars until '/' */
5365 fp--, dp--;
5366 while (*fp != '/');
5368 /* Build a sequence of "../" strings for the resulting relative file name. */
5369 i = 0;
5370 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5371 i += 1;
5372 res = xnew (3*i + strlen (fp + 1) + 1, char);
5373 res[0] = '\0';
5374 while (i-- > 0)
5375 strcat (res, "../");
5377 /* Add the file name relative to the common root of file and dir. */
5378 strcat (res, fp + 1);
5379 free (afn);
5381 return res;
5384 /* Return a newly allocated string containing the absolute file name
5385 of FILE given DIR (which should end with a slash). */
5386 static char *
5387 absolute_filename (file, dir)
5388 char *file, *dir;
5390 char *slashp, *cp, *res;
5392 if (filename_is_absolute (file))
5393 res = savestr (file);
5394 #ifdef DOS_NT
5395 /* We don't support non-absolute file names with a drive
5396 letter, like `d:NAME' (it's too much hassle). */
5397 else if (file[1] == ':')
5398 fatal ("%s: relative file names with drive letters not supported", file);
5399 #endif
5400 else
5401 res = concat (dir, file, "");
5403 /* Delete the "/dirname/.." and "/." substrings. */
5404 slashp = etags_strchr (res, '/');
5405 while (slashp != NULL && slashp[0] != '\0')
5407 if (slashp[1] == '.')
5409 if (slashp[2] == '.'
5410 && (slashp[3] == '/' || slashp[3] == '\0'))
5412 cp = slashp;
5414 cp--;
5415 while (cp >= res && !filename_is_absolute (cp));
5416 if (cp < res)
5417 cp = slashp; /* the absolute name begins with "/.." */
5418 #ifdef DOS_NT
5419 /* Under MSDOS and NT we get `d:/NAME' as absolute
5420 file name, so the luser could say `d:/../NAME'.
5421 We silently treat this as `d:/NAME'. */
5422 else if (cp[0] != '/')
5423 cp = slashp;
5424 #endif
5425 strcpy (cp, slashp + 3);
5426 slashp = cp;
5427 continue;
5429 else if (slashp[2] == '/' || slashp[2] == '\0')
5431 strcpy (slashp, slashp + 2);
5432 continue;
5436 slashp = etags_strchr (slashp + 1, '/');
5439 if (res[0] == '\0')
5440 return savestr ("/");
5441 else
5442 return res;
5445 /* Return a newly allocated string containing the absolute
5446 file name of dir where FILE resides given DIR (which should
5447 end with a slash). */
5448 static char *
5449 absolute_dirname (file, dir)
5450 char *file, *dir;
5452 char *slashp, *res;
5453 char save;
5455 canonicalize_filename (file);
5456 slashp = etags_strrchr (file, '/');
5457 if (slashp == NULL)
5458 return savestr (dir);
5459 save = slashp[1];
5460 slashp[1] = '\0';
5461 res = absolute_filename (file, dir);
5462 slashp[1] = save;
5464 return res;
5467 /* Whether the argument string is an absolute file name. The argument
5468 string must have been canonicalized with canonicalize_filename. */
5469 static bool
5470 filename_is_absolute (fn)
5471 char *fn;
5473 return (fn[0] == '/'
5474 #ifdef DOS_NT
5475 || (isalpha(fn[0]) && fn[1] == ':' && fn[2] == '/')
5476 #endif
5480 /* Translate backslashes into slashes. Works in place. */
5481 static void
5482 canonicalize_filename (fn)
5483 register char *fn;
5485 #ifdef DOS_NT
5486 /* Canonicalize drive letter case. */
5487 if (fn[0] && fn[1] == ':' && islower (fn[0]))
5488 fn[0] = toupper (fn[0]);
5489 /* Convert backslashes to slashes. */
5490 for (; *fn != '\0'; fn++)
5491 if (*fn == '\\')
5492 *fn = '/';
5493 #else
5494 /* No action. */
5495 fn = NULL; /* shut up the compiler */
5496 #endif
5499 /* Increase the size of a linebuffer. */
5500 static void
5501 grow_linebuffer (lbp, toksize)
5502 linebuffer *lbp;
5503 int toksize;
5505 while (lbp->size < toksize)
5506 lbp->size *= 2;
5507 lbp->buffer = xrnew (lbp->buffer, lbp->size, char);
5510 /* Like malloc but get fatal error if memory is exhausted. */
5511 long *
5512 xmalloc (size)
5513 unsigned int size;
5515 long *result = (long *) malloc (size);
5516 if (result == NULL)
5517 fatal ("virtual memory exhausted", (char *)NULL);
5518 return result;
5521 long *
5522 xrealloc (ptr, size)
5523 char *ptr;
5524 unsigned int size;
5526 long *result = (long *) realloc (ptr, size);
5527 if (result == NULL)
5528 fatal ("virtual memory exhausted", (char *)NULL);
5529 return result;