* viper-cmd.el (viper-envelop-ESC-key): added the option to
[emacs.git] / lib-src / etags.c
blobe0902077822fc3b6e571708e1ce905629d89f44b
1 /* Tags file maker to go with GNU Emacs
2 Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99
3 Free Software Foundation, Inc. and Ken Arnold
5 This file is not considered part of GNU Emacs.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * Sam Kendall added C++.
28 * Francesco Potorti` reorganised C and C++ based on work by Joe Wells.
29 * Regexp tags by Tom Tromey.
31 * Francesco Potorti` (pot@gnu.org) is the current maintainer.
34 char pot_etags_version[] = "@(#) pot revision number is 13.44";
36 #define TRUE 1
37 #define FALSE 0
39 #ifndef DEBUG
40 # define DEBUG FALSE
41 #endif
43 #if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
44 # define P_(proto) proto
45 #else
46 # define P_(proto) ()
47 #endif
49 #ifdef HAVE_CONFIG_H
50 # include <config.h>
51 /* On some systems, Emacs defines static as nothing for the sake
52 of unexec. We don't want that here since we don't use unexec. */
53 # undef static
54 # define ETAGS_REGEXPS /* use the regexp features */
55 # define LONG_OPTIONS /* accept long options */
56 #endif /* HAVE_CONFIG_H */
58 #ifndef _GNU_SOURCE
59 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
60 #endif
62 #ifdef MSDOS
63 # undef MSDOS
64 # define MSDOS TRUE
65 # include <fcntl.h>
66 # include <sys/param.h>
67 # include <io.h>
68 # ifndef HAVE_CONFIG_H
69 # define DOS_NT
70 # include <sys/config.h>
71 # endif
72 #else
73 # define MSDOS FALSE
74 #endif /* MSDOS */
76 #ifdef WINDOWSNT
77 # include <stdlib.h>
78 # include <fcntl.h>
79 # include <string.h>
80 # include <direct.h>
81 # include <io.h>
82 # define MAXPATHLEN _MAX_PATH
83 # ifdef HAVE_CONFIG_H
84 # undef HAVE_NTGUI
85 # else
86 # define DOS_NT
87 # endif /* not HAVE_CONFIG_H */
88 # ifndef HAVE_GETCWD
89 # define HAVE_GETCWD
90 # endif /* undef HAVE_GETCWD */
91 #else /* !WINDOWSNT */
92 # ifdef STDC_HEADERS
93 # include <stdlib.h>
94 # include <string.h>
95 # else
96 extern char *getenv ();
97 # endif
98 #endif /* !WINDOWSNT */
100 #ifdef HAVE_UNISTD_H
101 # include <unistd.h>
102 #else
103 # if defined (HAVE_GETCWD) && !WINDOWSNT
104 extern char *getcwd (char *buf, size_t size);
105 # endif
106 #endif /* HAVE_UNISTD_H */
108 #include <stdio.h>
109 #include <ctype.h>
110 #include <errno.h>
111 #ifndef errno
112 extern int errno;
113 #endif
114 #include <sys/types.h>
115 #include <sys/stat.h>
117 #if !defined (S_ISREG) && defined (S_IFREG)
118 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
119 #endif
121 #ifdef LONG_OPTIONS
122 # include <getopt.h>
123 #else
124 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
125 extern char *optarg;
126 extern int optind, opterr;
127 #endif /* LONG_OPTIONS */
129 #ifdef ETAGS_REGEXPS
130 # include <regex.h>
131 #endif /* ETAGS_REGEXPS */
133 /* Define CTAGS to make the program "ctags" compatible with the usual one.
134 Leave it undefined to make the program "etags", which makes emacs-style
135 tag tables and tags typedefs, #defines and struct/union/enum by default. */
136 #ifdef CTAGS
137 # undef CTAGS
138 # define CTAGS TRUE
139 #else
140 # define CTAGS FALSE
141 #endif
143 /* Exit codes for success and failure. */
144 #ifdef VMS
145 # define GOOD 1
146 # define BAD 0
147 #else
148 # define GOOD 0
149 # define BAD 1
150 #endif
152 /* C extensions. */
153 #define C_PLPL 0x00001 /* C++ */
154 #define C_STAR 0x00003 /* C* */
155 #define C_JAVA 0x00005 /* JAVA */
156 #define YACC 0x10000 /* yacc file */
158 #define streq(s,t) ((DEBUG && (s) == NULL && (t) == NULL \
159 && (abort (), 1)) || !strcmp (s, t))
160 #define strneq(s,t,n) ((DEBUG && (s) == NULL && (t) == NULL \
161 && (abort (), 1)) || !strncmp (s, t, n))
163 #define lowcase(c) tolower ((char)c)
165 #define CHARS 256 /* 2^sizeof(char) */
166 #define CHAR(x) ((unsigned int)x & (CHARS - 1))
167 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
168 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
169 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
170 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
171 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
175 * xnew, xrnew -- allocate, reallocate storage
177 * SYNOPSIS: Type *xnew (int n, Type);
178 * Type *xrnew (OldPointer, int n, Type);
180 #ifdef chkmalloc
181 # include "chkmalloc.h"
182 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
183 (n) * sizeof (Type)))
184 # define xrnew(op,n,Type) ((Type *) trace_realloc (__FILE__, __LINE__, \
185 (op), (n) * sizeof (Type)))
186 #else
187 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
188 # define xrnew(op,n,Type) ((Type *) xrealloc ((op), (n) * sizeof (Type)))
189 #endif
191 typedef int bool;
193 typedef void Lang_function P_((FILE *));
195 typedef struct
197 char *suffix;
198 char *command; /* Takes one arg and decompresses to stdout */
199 } compressor;
201 typedef struct
203 char *name;
204 Lang_function *function;
205 char **suffixes;
206 char **interpreters;
207 } language;
209 typedef struct node_st
210 { /* sorting structure */
211 char *name; /* function or type name */
212 char *file; /* file name */
213 bool is_func; /* use pattern or line no */
214 bool been_warned; /* set if noticed dup */
215 int lno; /* line number tag is on */
216 long cno; /* character number line starts on */
217 char *pat; /* search pattern */
218 struct node_st *left, *right; /* left and right sons */
219 } node;
222 * A `linebuffer' is a structure which holds a line of text.
223 * `readline_internal' reads a line from a stream into a linebuffer
224 * and works regardless of the length of the line.
225 * SIZE is the size of BUFFER, LEN is the length of the string in
226 * BUFFER after readline reads it.
228 typedef struct
230 long size;
231 int len;
232 char *buffer;
233 } linebuffer;
235 /* Many compilers barf on this:
236 Lang_function Ada_funcs;
237 so let's write it this way */
238 static void Ada_funcs P_((FILE *));
239 static void Asm_labels P_((FILE *));
240 static void C_entries P_((int c_ext, FILE *));
241 static void default_C_entries P_((FILE *));
242 static void plain_C_entries P_((FILE *));
243 static void Cjava_entries P_((FILE *));
244 static void Cobol_paragraphs P_((FILE *));
245 static void Cplusplus_entries P_((FILE *));
246 static void Cstar_entries P_((FILE *));
247 static void Erlang_functions P_((FILE *));
248 static void Fortran_functions P_((FILE *));
249 static void Yacc_entries P_((FILE *));
250 static void Lisp_functions P_((FILE *));
251 static void Pascal_functions P_((FILE *));
252 static void Perl_functions P_((FILE *));
253 static void Postscript_functions P_((FILE *));
254 static void Prolog_functions P_((FILE *));
255 static void Python_functions P_((FILE *));
256 static void Scheme_functions P_((FILE *));
257 static void TeX_functions P_((FILE *));
258 static void just_read_file P_((FILE *));
260 static void print_language_names P_((void));
261 static void print_version P_((void));
262 static void print_help P_((void));
263 int main P_((int, char **));
264 static int number_len P_((long));
266 static compressor *get_compressor_from_suffix P_((char *, char **));
267 static language *get_language_from_name P_((char *));
268 static language *get_language_from_interpreter P_((char *));
269 static language *get_language_from_suffix P_((char *));
270 static int total_size_of_entries P_((node *));
271 static long readline P_((linebuffer *, FILE *));
272 static long readline_internal P_((linebuffer *, FILE *));
273 static void get_tag P_((char *));
275 #ifdef ETAGS_REGEXPS
276 static void analyse_regex P_((char *, bool));
277 static void add_regex P_((char *, bool, language *));
278 static void free_patterns P_((void));
279 #endif /* ETAGS_REGEXPS */
280 static void error P_((const char *, const char *));
281 static void suggest_asking_for_help P_((void));
282 static void fatal P_((char *, char *));
283 static void pfatal P_((char *));
284 static void add_node P_((node *, node **));
286 static void init P_((void));
287 static void initbuffer P_((linebuffer *));
288 static void find_entries P_((char *, FILE *));
289 static void free_tree P_((node *));
290 static void pfnote P_((char *, bool, char *, int, int, long));
291 static void new_pfnote P_((char *, int, bool, char *, int, int, long));
292 static void process_file P_((char *));
293 static void put_entries P_((node *));
294 static void takeprec P_((void));
296 static char *concat P_((char *, char *, char *));
297 static char *skip_spaces P_((char *));
298 static char *skip_non_spaces P_((char *));
299 static char *savenstr P_((char *, int));
300 static char *savestr P_((char *));
301 static char *etags_strchr P_((const char *, int));
302 static char *etags_strrchr P_((const char *, int));
303 static char *etags_getcwd P_((void));
304 static char *relative_filename P_((char *, char *));
305 static char *absolute_filename P_((char *, char *));
306 static char *absolute_dirname P_((char *, char *));
307 static bool filename_is_absolute P_((char *f));
308 static void canonicalize_filename P_((char *));
309 static void grow_linebuffer P_((linebuffer *, int));
310 static long *xmalloc P_((unsigned int));
311 static long *xrealloc P_((char *, unsigned int));
314 char searchar = '/'; /* use /.../ searches */
316 char *tagfile; /* output file */
317 char *progname; /* name this program was invoked with */
318 char *cwd; /* current working directory */
319 char *tagfiledir; /* directory of tagfile */
320 FILE *tagf; /* ioptr for tags file */
322 char *curfile; /* current input file name */
323 language *curlang; /* current language */
325 int lineno; /* line number of current line */
326 long charno; /* current character number */
327 long linecharno; /* charno of start of current line */
328 char *dbp; /* pointer to start of current tag */
330 node *head; /* the head of the binary tree of tags */
332 linebuffer lb; /* the current line */
333 linebuffer token_name; /* used by C_entries as a temporary area */
334 struct
336 long linepos;
337 linebuffer lb; /* used by C_entries instead of lb */
338 } lbs[2];
340 /* boolean "functions" (see init) */
341 bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
342 char
343 /* white chars */
344 *white = " \f\t\n\r\v",
345 /* not in a name */
346 *nonam = " \f\t\n\r(=,[;",
347 /* token ending chars */
348 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
349 /* token starting chars */
350 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
351 /* valid in-token chars */
352 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
354 bool append_to_tagfile; /* -a: append to tags */
355 /* The following four default to TRUE for etags, but to FALSE for ctags. */
356 bool typedefs; /* -t: create tags for C and Ada typedefs */
357 bool typedefs_and_cplusplus; /* -T: create tags for C typedefs, level */
358 /* 0 struct/enum/union decls, and C++ */
359 /* member functions. */
360 bool constantypedefs; /* -d: create tags for C #define, enum */
361 /* constants and variables. */
362 /* -D: opposite of -d. Default under ctags. */
363 bool declarations; /* --declarations: tag them and extern in C&Co*/
364 bool globals; /* create tags for global variables */
365 bool members; /* create tags for C member variables */
366 bool update; /* -u: update tags */
367 bool vgrind_style; /* -v: create vgrind style index output */
368 bool no_warnings; /* -w: suppress warnings */
369 bool cxref_style; /* -x: create cxref style output */
370 bool cplusplus; /* .[hc] means C++, not C */
371 bool noindentypedefs; /* -I: ignore indentation in C */
372 bool packages_only; /* --packages-only: in Ada, only tag packages*/
374 #ifdef LONG_OPTIONS
375 struct option longopts[] =
377 { "packages-only", no_argument, &packages_only, TRUE },
378 { "append", no_argument, NULL, 'a' },
379 { "backward-search", no_argument, NULL, 'B' },
380 { "c++", no_argument, NULL, 'C' },
381 { "cxref", no_argument, NULL, 'x' },
382 { "defines", no_argument, NULL, 'd' },
383 { "declarations", no_argument, &declarations, TRUE },
384 { "no-defines", no_argument, NULL, 'D' },
385 { "globals", no_argument, &globals, TRUE },
386 { "no-globals", no_argument, &globals, FALSE },
387 { "help", no_argument, NULL, 'h' },
388 { "help", no_argument, NULL, 'H' },
389 { "ignore-indentation", no_argument, NULL, 'I' },
390 { "include", required_argument, NULL, 'i' },
391 { "language", required_argument, NULL, 'l' },
392 { "members", no_argument, &members, TRUE },
393 { "no-members", no_argument, &members, FALSE },
394 { "no-warn", no_argument, NULL, 'w' },
395 { "output", required_argument, NULL, 'o' },
396 #ifdef ETAGS_REGEXPS
397 { "regex", required_argument, NULL, 'r' },
398 { "no-regex", no_argument, NULL, 'R' },
399 { "ignore-case-regex", required_argument, NULL, 'c' },
400 #endif /* ETAGS_REGEXPS */
401 { "typedefs", no_argument, NULL, 't' },
402 { "typedefs-and-c++", no_argument, NULL, 'T' },
403 { "update", no_argument, NULL, 'u' },
404 { "version", no_argument, NULL, 'V' },
405 { "vgrind", no_argument, NULL, 'v' },
406 { NULL }
408 #endif /* LONG_OPTIONS */
410 #ifdef ETAGS_REGEXPS
411 /* Structure defining a regular expression. Elements are
412 the compiled pattern, and the name string. */
413 typedef struct pattern
415 struct pattern *p_next;
416 language *language;
417 char *regex;
418 struct re_pattern_buffer *pattern;
419 struct re_registers regs;
420 char *name_pattern;
421 bool error_signaled;
422 } pattern;
424 /* List of all regexps. */
425 pattern *p_head = NULL;
427 /* How many characters in the character set. (From regex.c.) */
428 #define CHAR_SET_SIZE 256
429 /* Translation table for case-insensitive matching. */
430 char lc_trans[CHAR_SET_SIZE];
431 #endif /* ETAGS_REGEXPS */
433 compressor compressors[] =
435 { "z", "gzip -d -c"},
436 { "Z", "gzip -d -c"},
437 { "gz", "gzip -d -c"},
438 { "GZ", "gzip -d -c"},
439 { "bz2", "bzip2 -d -c" },
440 { NULL }
444 * Language stuff.
447 /* Non-NULL if language fixed. */
448 language *forced_lang = NULL;
450 /* Ada code */
451 char *Ada_suffixes [] =
452 { "ads", "adb", "ada", NULL };
454 /* Assembly code */
455 char *Asm_suffixes [] = { "a", /* Unix assembler */
456 "asm", /* Microcontroller assembly */
457 "def", /* BSO/Tasking definition includes */
458 "inc", /* Microcontroller include files */
459 "ins", /* Microcontroller include files */
460 "s", "sa", /* Unix assembler */
461 "S", /* cpp-processed Unix assembler */
462 "src", /* BSO/Tasking C compiler output */
463 NULL
466 /* Note that .c and .h can be considered C++, if the --c++ flag was
467 given. That is why default_C_entries is called here. */
468 char *default_C_suffixes [] =
469 { "c", "h", NULL };
471 char *Cplusplus_suffixes [] =
472 { "C", "H", "c++", "cc", "cpp", "cxx", "h++", "hh", "hpp", "hxx",
473 "M", /* Objective C++ */
474 "pdb", /* Postscript with C syntax */
475 NULL };
477 char *Cjava_suffixes [] =
478 { "java", NULL };
480 char *Cobol_suffixes [] =
481 { "COB", "cob", NULL };
483 char *Cstar_suffixes [] =
484 { "cs", "hs", NULL };
486 char *Erlang_suffixes [] =
487 { "erl", "hrl", NULL };
489 char *Fortran_suffixes [] =
490 { "F", "f", "f90", "for", NULL };
492 char *Lisp_suffixes [] =
493 { "cl", "clisp", "el", "l", "lisp", "lsp", "ml", NULL };
495 char *Pascal_suffixes [] =
496 { "p", "pas", NULL };
498 char *Perl_suffixes [] =
499 { "pl", "pm", NULL };
500 char *Perl_interpreters [] =
501 { "perl", "@PERL@", NULL };
503 char *plain_C_suffixes [] =
504 { "pc", /* Pro*C file */
505 "m", /* Objective C file */
506 "lm", /* Objective lex file */
507 NULL };
509 char *Postscript_suffixes [] =
510 { "ps", "psw", NULL }; /* .psw is for PSWrap */
512 char *Prolog_suffixes [] =
513 { "prolog", NULL };
515 char *Python_suffixes [] =
516 { "py", NULL };
518 /* Can't do the `SCM' or `scm' prefix with a version number. */
519 char *Scheme_suffixes [] =
520 { "SCM", "SM", "oak", "sch", "scheme", "scm", "sm", "ss", "t", NULL };
522 char *TeX_suffixes [] =
523 { "TeX", "bib", "clo", "cls", "ltx", "sty", "tex", NULL };
525 char *Yacc_suffixes [] =
526 { "y", "ym", "yy", "yxx", "y++", NULL }; /* .ym is Objective yacc file */
529 * Table of languages.
531 * It is ok for a given function to be listed under more than one
532 * name. I just didn't.
535 language lang_names [] =
537 { "ada", Ada_funcs, Ada_suffixes, NULL },
538 { "asm", Asm_labels, Asm_suffixes, NULL },
539 { "c", default_C_entries, default_C_suffixes, NULL },
540 { "c++", Cplusplus_entries, Cplusplus_suffixes, NULL },
541 { "c*", Cstar_entries, Cstar_suffixes, NULL },
542 { "cobol", Cobol_paragraphs, Cobol_suffixes, NULL },
543 { "erlang", Erlang_functions, Erlang_suffixes, NULL },
544 { "fortran", Fortran_functions, Fortran_suffixes, NULL },
545 { "java", Cjava_entries, Cjava_suffixes, NULL },
546 { "lisp", Lisp_functions, Lisp_suffixes, NULL },
547 { "pascal", Pascal_functions, Pascal_suffixes, NULL },
548 { "perl", Perl_functions, Perl_suffixes, Perl_interpreters },
549 { "postscript", Postscript_functions, Postscript_suffixes, NULL },
550 { "proc", plain_C_entries, plain_C_suffixes, NULL },
551 { "prolog", Prolog_functions, Prolog_suffixes, NULL },
552 { "python", Python_functions, Python_suffixes, NULL },
553 { "scheme", Scheme_functions, Scheme_suffixes, NULL },
554 { "tex", TeX_functions, TeX_suffixes, NULL },
555 { "yacc", Yacc_entries, Yacc_suffixes, NULL },
556 { "auto", NULL }, /* default guessing scheme */
557 { "none", just_read_file }, /* regexp matching only */
558 { NULL, NULL } /* end of list */
561 static void
562 print_language_names ()
564 language *lang;
565 char **ext;
567 puts ("\nThese are the currently supported languages, along with the\n\
568 default file name suffixes:");
569 for (lang = lang_names; lang->name != NULL; lang++)
571 printf ("\t%s\t", lang->name);
572 if (lang->suffixes != NULL)
573 for (ext = lang->suffixes; *ext != NULL; ext++)
574 printf (" .%s", *ext);
575 puts ("");
577 puts ("Where `auto' means use default language for files based on file\n\
578 name suffix, and `none' means only do regexp processing on files.\n\
579 If no language is specified and no matching suffix is found,\n\
580 the first line of the file is read for a sharp-bang (#!) sequence\n\
581 followed by the name of an interpreter. If no such sequence is found,\n\
582 Fortran is tried first; if no tags are found, C is tried next.\n\
583 Compressed files are supported using gzip and bzip2.");
586 #ifndef EMACS_NAME
587 # define EMACS_NAME "GNU Emacs"
588 #endif
589 #ifndef VERSION
590 # define VERSION "21"
591 #endif
592 static void
593 print_version ()
595 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
596 puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
597 puts ("This program is distributed under the same terms as Emacs");
599 exit (GOOD);
602 static void
603 print_help ()
605 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
607 These are the options accepted by %s.\n", progname, progname);
608 #ifdef LONG_OPTIONS
609 puts ("You may use unambiguous abbreviations for the long option names.");
610 #else
611 puts ("Long option names do not work with this executable, as it is not\n\
612 linked with GNU getopt.");
613 #endif /* LONG_OPTIONS */
614 puts ("A - as file name means read names from stdin (one per line).");
615 if (!CTAGS)
616 printf (" Absolute names are stored in the output file as they are.\n\
617 Relative ones are stored relative to the output file's directory.");
618 puts ("\n");
620 puts ("-a, --append\n\
621 Append tag entries to existing tags file.");
623 puts ("--packages-only\n\
624 For Ada files, only generate tags for packages .");
626 if (CTAGS)
627 puts ("-B, --backward-search\n\
628 Write the search commands for the tag entries using '?', the\n\
629 backward-search command instead of '/', the forward-search command.");
631 puts ("-C, --c++\n\
632 Treat files whose name suffix defaults to C language as C++ files.");
634 puts ("--declarations\n\
635 In C and derived languages, create tags for function declarations,");
636 if (CTAGS)
637 puts ("\tand create tags for extern variables if --globals is used.");
638 else
639 puts
640 ("\tand create tags for extern variables unless --no-globals is used.");
642 if (CTAGS)
643 puts ("-d, --defines\n\
644 Create tag entries for C #define constants and enum constants, too.");
645 else
646 puts ("-D, --no-defines\n\
647 Don't create tag entries for C #define constants and enum constants.\n\
648 This makes the tags file smaller.");
650 if (!CTAGS)
652 puts ("-i FILE, --include=FILE\n\
653 Include a note in tag file indicating that, when searching for\n\
654 a tag, one should also consult the tags file FILE after\n\
655 checking the current file.");
656 puts ("-l LANG, --language=LANG\n\
657 Force the following files to be considered as written in the\n\
658 named language up to the next --language=LANG option.");
661 if (CTAGS)
662 puts ("--globals\n\
663 Create tag entries for global variables in some languages.");
664 else
665 puts ("--no-globals\n\
666 Do not create tag entries for global variables in some\n\
667 languages. This makes the tags file smaller.");
668 puts ("--members\n\
669 Create tag entries for member variables in C and derived languages.");
671 #ifdef ETAGS_REGEXPS
672 puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
673 Make a tag for each line matching pattern REGEXP in the following\n\
674 files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
675 regexfile is a file containing one REGEXP per line.\n\
676 REGEXP is anchored (as if preceded by ^).\n\
677 The form /REGEXP/NAME/ creates a named tag.\n\
678 For example Tcl named tags can be created with:\n\
679 --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
680 puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
681 Like -r, --regex but ignore case when matching expressions.");
682 puts ("-R, --no-regex\n\
683 Don't create tags from regexps for the following files.");
684 #endif /* ETAGS_REGEXPS */
685 puts ("-o FILE, --output=FILE\n\
686 Write the tags to FILE.");
687 puts ("-I, --ignore-indentation\n\
688 Don't rely on indentation quite as much as normal. Currently,\n\
689 this means not to assume that a closing brace in the first\n\
690 column is the final brace of a function or structure\n\
691 definition in C and C++.");
693 if (CTAGS)
695 puts ("-t, --typedefs\n\
696 Generate tag entries for C and Ada typedefs.");
697 puts ("-T, --typedefs-and-c++\n\
698 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
699 and C++ member functions.");
700 puts ("-u, --update\n\
701 Update the tag entries for the given files, leaving tag\n\
702 entries for other files in place. Currently, this is\n\
703 implemented by deleting the existing entries for the given\n\
704 files and then rewriting the new entries at the end of the\n\
705 tags file. It is often faster to simply rebuild the entire\n\
706 tag file than to use this.");
707 puts ("-v, --vgrind\n\
708 Generates an index of items intended for human consumption,\n\
709 similar to the output of vgrind. The index is sorted, and\n\
710 gives the page number of each item.");
711 puts ("-w, --no-warn\n\
712 Suppress warning messages about entries defined in multiple\n\
713 files.");
714 puts ("-x, --cxref\n\
715 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
716 The output uses line numbers instead of page numbers, but\n\
717 beyond that the differences are cosmetic; try both to see\n\
718 which you like.");
721 puts ("-V, --version\n\
722 Print the version of the program.\n\
723 -h, --help\n\
724 Print this help message.");
726 print_language_names ();
728 puts ("");
729 puts ("Report bugs to bug-gnu-emacs@gnu.org");
731 exit (GOOD);
735 enum argument_type
737 at_language,
738 at_regexp,
739 at_filename,
740 at_icregexp
743 /* This structure helps us allow mixing of --lang and file names. */
744 typedef struct
746 enum argument_type arg_type;
747 char *what;
748 language *lang; /* language of the regexp */
749 } argument;
751 #ifdef VMS /* VMS specific functions */
753 #define EOS '\0'
755 /* This is a BUG! ANY arbitrary limit is a BUG!
756 Won't someone please fix this? */
757 #define MAX_FILE_SPEC_LEN 255
758 typedef struct {
759 short curlen;
760 char body[MAX_FILE_SPEC_LEN + 1];
761 } vspec;
764 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
765 returning in each successive call the next file name matching the input
766 spec. The function expects that each in_spec passed
767 to it will be processed to completion; in particular, up to and
768 including the call following that in which the last matching name
769 is returned, the function ignores the value of in_spec, and will
770 only start processing a new spec with the following call.
771 If an error occurs, on return out_spec contains the value
772 of in_spec when the error occurred.
774 With each successive file name returned in out_spec, the
775 function's return value is one. When there are no more matching
776 names the function returns zero. If on the first call no file
777 matches in_spec, or there is any other error, -1 is returned.
780 #include <rmsdef.h>
781 #include <descrip.h>
782 #define OUTSIZE MAX_FILE_SPEC_LEN
783 static short
784 fn_exp (out, in)
785 vspec *out;
786 char *in;
788 static long context = 0;
789 static struct dsc$descriptor_s o;
790 static struct dsc$descriptor_s i;
791 static bool pass1 = TRUE;
792 long status;
793 short retval;
795 if (pass1)
797 pass1 = FALSE;
798 o.dsc$a_pointer = (char *) out;
799 o.dsc$w_length = (short)OUTSIZE;
800 i.dsc$a_pointer = in;
801 i.dsc$w_length = (short)strlen(in);
802 i.dsc$b_dtype = DSC$K_DTYPE_T;
803 i.dsc$b_class = DSC$K_CLASS_S;
804 o.dsc$b_dtype = DSC$K_DTYPE_VT;
805 o.dsc$b_class = DSC$K_CLASS_VS;
807 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
809 out->body[out->curlen] = EOS;
810 return 1;
812 else if (status == RMS$_NMF)
813 retval = 0;
814 else
816 strcpy(out->body, in);
817 retval = -1;
819 lib$find_file_end(&context);
820 pass1 = TRUE;
821 return retval;
825 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
826 name of each file specified by the provided arg expanding wildcards.
828 static char *
829 gfnames (arg, p_error)
830 char *arg;
831 bool *p_error;
833 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
835 switch (fn_exp (&filename, arg))
837 case 1:
838 *p_error = FALSE;
839 return filename.body;
840 case 0:
841 *p_error = FALSE;
842 return NULL;
843 default:
844 *p_error = TRUE;
845 return filename.body;
849 #ifndef OLD /* Newer versions of VMS do provide `system'. */
850 system (cmd)
851 char *cmd;
853 error ("%s", "system() function not implemented under VMS");
855 #endif
857 #define VERSION_DELIM ';'
858 char *massage_name (s)
859 char *s;
861 char *start = s;
863 for ( ; *s; s++)
864 if (*s == VERSION_DELIM)
866 *s = EOS;
867 break;
869 else
870 *s = lowcase (*s);
871 return start;
873 #endif /* VMS */
877 main (argc, argv)
878 int argc;
879 char *argv[];
881 int i;
882 unsigned int nincluded_files;
883 char **included_files;
884 char *this_file;
885 argument *argbuffer;
886 int current_arg, file_count;
887 linebuffer filename_lb;
888 #ifdef VMS
889 bool got_err;
890 #endif
892 #ifdef DOS_NT
893 _fmode = O_BINARY; /* all of files are treated as binary files */
894 #endif /* DOS_NT */
896 progname = argv[0];
897 nincluded_files = 0;
898 included_files = xnew (argc, char *);
899 current_arg = 0;
900 file_count = 0;
902 /* Allocate enough no matter what happens. Overkill, but each one
903 is small. */
904 argbuffer = xnew (argc, argument);
906 #ifdef ETAGS_REGEXPS
907 /* Set syntax for regular expression routines. */
908 re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
909 /* Translation table for case-insensitive search. */
910 for (i = 0; i < CHAR_SET_SIZE; i++)
911 lc_trans[i] = lowcase (i);
912 #endif /* ETAGS_REGEXPS */
915 * If etags, always find typedefs and structure tags. Why not?
916 * Also default is to find macro constants, enum constants and
917 * global variables.
919 if (!CTAGS)
921 typedefs = typedefs_and_cplusplus = constantypedefs = TRUE;
922 globals = TRUE;
923 members = FALSE;
926 while (1)
928 int opt;
929 char *optstring;
931 #ifdef ETAGS_REGEXPS
932 optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
933 #else
934 optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
935 #endif /* ETAGS_REGEXPS */
937 #ifndef LONG_OPTIONS
938 optstring = optstring + 1;
939 #endif /* LONG_OPTIONS */
941 opt = getopt_long (argc, argv, optstring, longopts, 0);
942 if (opt == EOF)
943 break;
945 switch (opt)
947 case 0:
948 /* If getopt returns 0, then it has already processed a
949 long-named option. We should do nothing. */
950 break;
952 case 1:
953 /* This means that a file name has been seen. Record it. */
954 argbuffer[current_arg].arg_type = at_filename;
955 argbuffer[current_arg].what = optarg;
956 ++current_arg;
957 ++file_count;
958 break;
960 /* Common options. */
961 case 'a': append_to_tagfile = TRUE; break;
962 case 'C': cplusplus = TRUE; break;
963 case 'd': constantypedefs = TRUE; break;
964 case 'D': constantypedefs = FALSE; break;
965 case 'f': /* for compatibility with old makefiles */
966 case 'o':
967 if (tagfile)
969 error ("-o option may only be given once.", (char *)NULL);
970 suggest_asking_for_help ();
972 tagfile = optarg;
973 break;
974 case 'I':
975 case 'S': /* for backward compatibility */
976 noindentypedefs = TRUE;
977 break;
978 case 'l':
980 language *lang = get_language_from_name (optarg);
981 if (lang != NULL)
983 argbuffer[current_arg].lang = lang;
984 argbuffer[current_arg].arg_type = at_language;
985 ++current_arg;
988 break;
989 #ifdef ETAGS_REGEXPS
990 case 'r':
991 argbuffer[current_arg].arg_type = at_regexp;
992 argbuffer[current_arg].what = optarg;
993 ++current_arg;
994 break;
995 case 'R':
996 argbuffer[current_arg].arg_type = at_regexp;
997 argbuffer[current_arg].what = NULL;
998 ++current_arg;
999 break;
1000 case 'c':
1001 argbuffer[current_arg].arg_type = at_icregexp;
1002 argbuffer[current_arg].what = optarg;
1003 ++current_arg;
1004 break;
1005 #endif /* ETAGS_REGEXPS */
1006 case 'V':
1007 print_version ();
1008 break;
1009 case 'h':
1010 case 'H':
1011 print_help ();
1012 break;
1013 case 't':
1014 typedefs = TRUE;
1015 break;
1016 case 'T':
1017 typedefs = typedefs_and_cplusplus = TRUE;
1018 break;
1019 #if (!CTAGS)
1020 /* Etags options */
1021 case 'i':
1022 included_files[nincluded_files++] = optarg;
1023 break;
1024 #else /* CTAGS */
1025 /* Ctags options. */
1026 case 'B': searchar = '?'; break;
1027 case 'u': update = TRUE; break;
1028 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1029 case 'x': cxref_style = TRUE; break;
1030 case 'w': no_warnings = TRUE; break;
1031 #endif /* CTAGS */
1032 default:
1033 suggest_asking_for_help ();
1037 for (; optind < argc; ++optind)
1039 argbuffer[current_arg].arg_type = at_filename;
1040 argbuffer[current_arg].what = argv[optind];
1041 ++current_arg;
1042 ++file_count;
1045 if (nincluded_files == 0 && file_count == 0)
1047 error ("no input files specified.", (char *)NULL);
1048 suggest_asking_for_help ();
1051 if (tagfile == NULL)
1052 tagfile = CTAGS ? "tags" : "TAGS";
1053 cwd = etags_getcwd (); /* the current working directory */
1054 if (cwd[strlen (cwd) - 1] != '/')
1056 char *oldcwd = cwd;
1057 cwd = concat (oldcwd, "/", "");
1058 free (oldcwd);
1060 if (streq (tagfile, "-"))
1061 tagfiledir = cwd;
1062 else
1063 tagfiledir = absolute_dirname (tagfile, cwd);
1065 init (); /* set up boolean "functions" */
1067 initbuffer (&lb);
1068 initbuffer (&token_name);
1069 initbuffer (&lbs[0].lb);
1070 initbuffer (&lbs[1].lb);
1071 initbuffer (&filename_lb);
1073 if (!CTAGS)
1075 if (streq (tagfile, "-"))
1077 tagf = stdout;
1078 #ifdef DOS_NT
1079 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1080 doesn't take effect until after `stdout' is already open). */
1081 if (!isatty (fileno (stdout)))
1082 setmode (fileno (stdout), O_BINARY);
1083 #endif /* DOS_NT */
1085 else
1086 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1087 if (tagf == NULL)
1088 pfatal (tagfile);
1092 * Loop through files finding functions.
1094 for (i = 0; i < current_arg; ++i)
1096 switch (argbuffer[i].arg_type)
1098 case at_language:
1099 forced_lang = argbuffer[i].lang;
1100 break;
1101 #ifdef ETAGS_REGEXPS
1102 case at_regexp:
1103 analyse_regex (argbuffer[i].what, FALSE);
1104 break;
1105 case at_icregexp:
1106 analyse_regex (argbuffer[i].what, TRUE);
1107 break;
1108 #endif
1109 case at_filename:
1110 #ifdef VMS
1111 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1113 if (got_err)
1115 error ("can't find file %s\n", this_file);
1116 argc--, argv++;
1118 else
1120 this_file = massage_name (this_file);
1122 #else
1123 this_file = argbuffer[i].what;
1124 #endif
1125 /* Input file named "-" means read file names from stdin
1126 (one per line) and use them. */
1127 if (streq (this_file, "-"))
1128 while (readline_internal (&filename_lb, stdin) > 0)
1129 process_file (filename_lb.buffer);
1130 else
1131 process_file (this_file);
1132 #ifdef VMS
1134 #endif
1135 break;
1139 #ifdef ETAGS_REGEXPS
1140 free_patterns ();
1141 #endif /* ETAGS_REGEXPS */
1143 if (!CTAGS)
1145 while (nincluded_files-- > 0)
1146 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1148 fclose (tagf);
1149 exit (GOOD);
1152 /* If CTAGS, we are here. process_file did not write the tags yet,
1153 because we want them ordered. Let's do it now. */
1154 if (cxref_style)
1156 put_entries (head);
1157 free_tree (head);
1158 head = NULL;
1159 exit (GOOD);
1162 if (update)
1164 char cmd[BUFSIZ];
1165 for (i = 0; i < current_arg; ++i)
1167 if (argbuffer[i].arg_type != at_filename)
1168 continue;
1169 sprintf (cmd,
1170 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1171 tagfile, argbuffer[i].what, tagfile);
1172 if (system (cmd) != GOOD)
1173 fatal ("failed to execute shell command", (char *)NULL);
1175 append_to_tagfile = TRUE;
1178 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1179 if (tagf == NULL)
1180 pfatal (tagfile);
1181 put_entries (head);
1182 free_tree (head);
1183 head = NULL;
1184 fclose (tagf);
1186 if (update)
1188 char cmd[BUFSIZ];
1189 sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
1190 exit (system (cmd));
1192 return GOOD;
1198 * Return a compressor given the file name. If EXTPTR is non-zero,
1199 * return a pointer into FILE where the compressor-specific
1200 * extension begins. If no compressor is found, NULL is returned
1201 * and EXTPTR is not significant.
1202 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca>
1204 static compressor *
1205 get_compressor_from_suffix (file, extptr)
1206 char *file;
1207 char **extptr;
1209 compressor *compr;
1210 char *slash, *suffix;
1212 /* This relies on FN to be after canonicalize_filename,
1213 so we don't need to consider backslashes on DOS_NT. */
1214 slash = etags_strrchr (file, '/');
1215 suffix = etags_strrchr (file, '.');
1216 if (suffix == NULL || suffix < slash)
1217 return NULL;
1218 if (extptr != NULL)
1219 *extptr = suffix;
1220 suffix += 1;
1221 /* Let those poor souls who live with DOS 8+3 file name limits get
1222 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1223 Only the first do loop is run if not MSDOS */
1226 for (compr = compressors; compr->suffix != NULL; compr++)
1227 if (streq (compr->suffix, suffix))
1228 return compr;
1229 if (!MSDOS)
1230 break; /* do it only once: not really a loop */
1231 if (extptr != NULL)
1232 *extptr = ++suffix;
1233 } while (*suffix != '\0');
1234 return NULL;
1240 * Return a language given the name.
1242 static language *
1243 get_language_from_name (name)
1244 char *name;
1246 language *lang;
1248 if (name == NULL)
1249 error ("empty language name", (char *)NULL);
1250 else
1252 for (lang = lang_names; lang->name != NULL; lang++)
1253 if (streq (name, lang->name))
1254 return lang;
1255 error ("unknown language \"%s\"", name);
1258 return NULL;
1263 * Return a language given the interpreter name.
1265 static language *
1266 get_language_from_interpreter (interpreter)
1267 char *interpreter;
1269 language *lang;
1270 char **iname;
1272 if (interpreter == NULL)
1273 return NULL;
1274 for (lang = lang_names; lang->name != NULL; lang++)
1275 if (lang->interpreters != NULL)
1276 for (iname = lang->interpreters; *iname != NULL; iname++)
1277 if (streq (*iname, interpreter))
1278 return lang;
1280 return NULL;
1286 * Return a language given the file name.
1288 static language *
1289 get_language_from_suffix (file)
1290 char *file;
1292 language *lang;
1293 char **ext, *suffix;
1295 suffix = etags_strrchr (file, '.');
1296 if (suffix == NULL)
1297 return NULL;
1298 suffix += 1;
1299 for (lang = lang_names; lang->name != NULL; lang++)
1300 if (lang->suffixes != NULL)
1301 for (ext = lang->suffixes; *ext != NULL; ext++)
1302 if (streq (*ext, suffix))
1303 return lang;
1304 return NULL;
1310 * This routine is called on each file argument.
1312 static void
1313 process_file (file)
1314 char *file;
1316 struct stat stat_buf;
1317 FILE *inf;
1318 compressor *compr;
1319 char *compressed_name, *uncompressed_name;
1320 char *ext, *real_name;
1323 canonicalize_filename (file);
1324 if (streq (file, tagfile) && !streq (tagfile, "-"))
1326 error ("skipping inclusion of %s in self.", file);
1327 return;
1329 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1331 compressed_name = NULL;
1332 real_name = uncompressed_name = savestr (file);
1334 else
1336 real_name = compressed_name = savestr (file);
1337 uncompressed_name = savenstr (file, ext - file);
1340 /* If the canonicalised uncompressed name has already be dealt with,
1341 skip it silently, else add it to the list. */
1343 typedef struct processed_file
1345 char *filename;
1346 struct processed_file *next;
1347 } processed_file;
1348 static processed_file *pf_head = NULL;
1349 register processed_file *fnp;
1351 for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1352 if (streq (uncompressed_name, fnp->filename))
1353 goto exit;
1354 fnp = pf_head;
1355 pf_head = xnew (1, struct processed_file);
1356 pf_head->filename = savestr (uncompressed_name);
1357 pf_head->next = fnp;
1360 if (stat (real_name, &stat_buf) != 0)
1362 /* Reset real_name and try with a different name. */
1363 real_name = NULL;
1364 if (compressed_name != NULL) /* try with the given suffix */
1366 if (stat (uncompressed_name, &stat_buf) == 0)
1367 real_name = uncompressed_name;
1369 else /* try all possible suffixes */
1371 for (compr = compressors; compr->suffix != NULL; compr++)
1373 compressed_name = concat (file, ".", compr->suffix);
1374 if (stat (compressed_name, &stat_buf) != 0)
1376 if (MSDOS)
1378 char *suf = compressed_name + strlen (file);
1379 size_t suflen = strlen (compr->suffix) + 1;
1380 for ( ; suf[1]; suf++, suflen--)
1382 memmove (suf, suf + 1, suflen);
1383 if (stat (compressed_name, &stat_buf) == 0)
1385 real_name = compressed_name;
1386 break;
1389 if (real_name != NULL)
1390 break;
1391 } /* MSDOS */
1392 free (compressed_name);
1393 compressed_name = NULL;
1395 else
1397 real_name = compressed_name;
1398 break;
1402 if (real_name == NULL)
1404 perror (file);
1405 goto exit;
1407 } /* try with a different name */
1409 if (!S_ISREG (stat_buf.st_mode))
1411 error ("skipping %s: it is not a regular file.", real_name);
1412 goto exit;
1414 if (real_name == compressed_name)
1416 char *cmd = concat (compr->command, " ", real_name);
1417 inf = popen (cmd, "r");
1418 free (cmd);
1420 else
1421 inf = fopen (real_name, "r");
1422 if (inf == NULL)
1424 perror (real_name);
1425 goto exit;
1428 find_entries (uncompressed_name, inf);
1430 if (real_name == compressed_name)
1431 pclose (inf);
1432 else
1433 fclose (inf);
1435 if (!CTAGS)
1437 char *filename;
1439 if (filename_is_absolute (uncompressed_name))
1441 /* file is an absolute file name. Canonicalise it. */
1442 filename = absolute_filename (uncompressed_name, cwd);
1444 else
1446 /* file is a file name relative to cwd. Make it relative
1447 to the directory of the tags file. */
1448 filename = relative_filename (uncompressed_name, tagfiledir);
1450 fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1451 free (filename);
1452 put_entries (head);
1453 free_tree (head);
1454 head = NULL;
1457 exit:
1458 if (compressed_name) free(compressed_name);
1459 if (uncompressed_name) free(uncompressed_name);
1460 return;
1464 * This routine sets up the boolean pseudo-functions which work
1465 * by setting boolean flags dependent upon the corresponding character.
1466 * Every char which is NOT in that string is not a white char. Therefore,
1467 * all of the array "_wht" is set to FALSE, and then the elements
1468 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1469 * of a char is TRUE if it is the string "white", else FALSE.
1471 static void
1472 init ()
1474 register char *sp;
1475 register int i;
1477 for (i = 0; i < CHARS; i++)
1478 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1479 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1480 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1481 notinname('\0') = notinname('\n');
1482 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1483 begtoken('\0') = begtoken('\n');
1484 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1485 intoken('\0') = intoken('\n');
1486 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1487 endtoken('\0') = endtoken('\n');
1491 * This routine opens the specified file and calls the function
1492 * which finds the function and type definitions.
1494 node *last_node = NULL;
1496 static void
1497 find_entries (file, inf)
1498 char *file;
1499 FILE *inf;
1501 char *cp;
1502 language *lang;
1503 node *old_last_node;
1505 /* Memory leakage here: the string pointed by curfile is
1506 never released, because curfile is copied into np->file
1507 for each node, to be used in CTAGS mode. The amount of
1508 memory leaked here is the sum of the lengths of the
1509 file names. */
1510 curfile = savestr (file);
1512 /* If user specified a language, use it. */
1513 lang = forced_lang;
1514 if (lang != NULL && lang->function != NULL)
1516 curlang = lang;
1517 lang->function (inf);
1518 return;
1521 /* Try to guess the language given the file name. */
1522 lang = get_language_from_suffix (file);
1523 if (lang != NULL && lang->function != NULL)
1525 curlang = lang;
1526 lang->function (inf);
1527 return;
1530 /* Look for sharp-bang as the first two characters. */
1531 if (readline_internal (&lb, inf) > 0
1532 && lb.len >= 2
1533 && lb.buffer[0] == '#'
1534 && lb.buffer[1] == '!')
1536 char *lp;
1538 /* Set lp to point at the first char after the last slash in the
1539 line or, if no slashes, at the first nonblank. Then set cp to
1540 the first successive blank and terminate the string. */
1541 lp = etags_strrchr (lb.buffer+2, '/');
1542 if (lp != NULL)
1543 lp += 1;
1544 else
1545 lp = skip_spaces (lb.buffer + 2);
1546 cp = skip_non_spaces (lp);
1547 *cp = '\0';
1549 if (strlen (lp) > 0)
1551 lang = get_language_from_interpreter (lp);
1552 if (lang != NULL && lang->function != NULL)
1554 curlang = lang;
1555 lang->function (inf);
1556 return;
1560 /* We rewind here, even if inf may be a pipe. We fail if the
1561 length of the first line is longer than the pipe block size,
1562 which is unlikely. */
1563 rewind (inf);
1565 /* Try Fortran. */
1566 old_last_node = last_node;
1567 curlang = get_language_from_name ("fortran");
1568 Fortran_functions (inf);
1570 /* No Fortran entries found. Try C. */
1571 if (old_last_node == last_node)
1573 /* We do not tag if rewind fails.
1574 Only the file name will be recorded in the tags file. */
1575 rewind (inf);
1576 curlang = get_language_from_name (cplusplus ? "c++" : "c");
1577 default_C_entries (inf);
1579 return;
1582 /* Record a tag. */
1583 static void
1584 pfnote (name, is_func, linestart, linelen, lno, cno)
1585 char *name; /* tag name, or NULL if unnamed */
1586 bool is_func; /* tag is a function */
1587 char *linestart; /* start of the line where tag is */
1588 int linelen; /* length of the line where tag is */
1589 int lno; /* line number */
1590 long cno; /* character number */
1592 register node *np;
1594 if (CTAGS && name == NULL)
1595 return;
1597 np = xnew (1, node);
1599 /* If ctags mode, change name "main" to M<thisfilename>. */
1600 if (CTAGS && !cxref_style && streq (name, "main"))
1602 register char *fp = etags_strrchr (curfile, '/');
1603 np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
1604 fp = etags_strrchr (np->name, '.');
1605 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1606 fp[0] = '\0';
1608 else
1609 np->name = name;
1610 np->been_warned = FALSE;
1611 np->file = curfile;
1612 np->is_func = is_func;
1613 np->lno = lno;
1614 /* Our char numbers are 0-base, because of C language tradition?
1615 ctags compatibility? old versions compatibility? I don't know.
1616 Anyway, since emacs's are 1-base we expect etags.el to take care
1617 of the difference. If we wanted to have 1-based numbers, we would
1618 uncomment the +1 below. */
1619 np->cno = cno /* + 1 */ ;
1620 np->left = np->right = NULL;
1621 if (CTAGS && !cxref_style)
1623 if (strlen (linestart) < 50)
1624 np->pat = concat (linestart, "$", "");
1625 else
1626 np->pat = savenstr (linestart, 50);
1628 else
1629 np->pat = savenstr (linestart, linelen);
1631 add_node (np, &head);
1634 /* Date: Wed, 22 Jan 1997 02:56:31 -0500 [last amended 18 Sep 1997]
1635 * From: Sam Kendall <kendall@mv.mv.com>
1636 * Subject: Proposal for firming up the TAGS format specification
1637 * To: F.Potorti@cnuce.cnr.it
1639 * pfnote should emit the optimized form [unnamed tag] only if:
1640 * 1. name does not contain any of the characters " \t\r\n(),;";
1641 * 2. linestart contains name as either a rightmost, or rightmost but
1642 * one character, substring;
1643 * 3. the character, if any, immediately before name in linestart must
1644 * be one of the characters " \t(),;";
1645 * 4. the character, if any, immediately after name in linestart must
1646 * also be one of the characters " \t(),;".
1648 * The real implementation uses the notinname() macro, which recognises
1649 * characters slightly different form " \t\r\n(),;". See the variable
1650 * `nonam'.
1652 #define traditional_tag_style TRUE
1653 static void
1654 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1655 char *name; /* tag name, or NULL if unnamed */
1656 int namelen; /* tag length */
1657 bool is_func; /* tag is a function */
1658 char *linestart; /* start of the line where tag is */
1659 int linelen; /* length of the line where tag is */
1660 int lno; /* line number */
1661 long cno; /* character number */
1663 register char *cp;
1664 bool named;
1666 named = TRUE;
1667 if (!CTAGS)
1669 for (cp = name; !notinname (*cp); cp++)
1670 continue;
1671 if (*cp == '\0') /* rule #1 */
1673 cp = linestart + linelen - namelen;
1674 if (notinname (linestart[linelen-1]))
1675 cp -= 1; /* rule #4 */
1676 if (cp >= linestart /* rule #2 */
1677 && (cp == linestart
1678 || notinname (cp[-1])) /* rule #3 */
1679 && strneq (name, cp, namelen)) /* rule #2 */
1680 named = FALSE; /* use unnamed tag */
1684 if (named)
1685 name = savenstr (name, namelen);
1686 else
1687 name = NULL;
1688 pfnote (name, is_func, linestart, linelen, lno, cno);
1692 * free_tree ()
1693 * recurse on left children, iterate on right children.
1695 static void
1696 free_tree (np)
1697 register node *np;
1699 while (np)
1701 register node *node_right = np->right;
1702 free_tree (np->left);
1703 if (np->name != NULL)
1704 free (np->name);
1705 free (np->pat);
1706 free (np);
1707 np = node_right;
1712 * add_node ()
1713 * Adds a node to the tree of nodes. In etags mode, we don't keep
1714 * it sorted; we just keep a linear list. In ctags mode, maintain
1715 * an ordered tree, with no attempt at balancing.
1717 * add_node is the only function allowed to add nodes, so it can
1718 * maintain state.
1720 static void
1721 add_node (np, cur_node_p)
1722 node *np, **cur_node_p;
1724 register int dif;
1725 register node *cur_node = *cur_node_p;
1727 if (cur_node == NULL)
1729 *cur_node_p = np;
1730 last_node = np;
1731 return;
1734 if (!CTAGS)
1736 /* Etags Mode */
1737 if (last_node == NULL)
1738 fatal ("internal error in add_node", (char *)NULL);
1739 last_node->right = np;
1740 last_node = np;
1742 else
1744 /* Ctags Mode */
1745 dif = strcmp (np->name, cur_node->name);
1748 * If this tag name matches an existing one, then
1749 * do not add the node, but maybe print a warning.
1751 if (!dif)
1753 if (streq (np->file, cur_node->file))
1755 if (!no_warnings)
1757 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1758 np->file, lineno, np->name);
1759 fprintf (stderr, "Second entry ignored\n");
1762 else if (!cur_node->been_warned && !no_warnings)
1764 fprintf
1765 (stderr,
1766 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1767 np->file, cur_node->file, np->name);
1768 cur_node->been_warned = TRUE;
1770 return;
1773 /* Actually add the node */
1774 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1778 static void
1779 put_entries (np)
1780 register node *np;
1782 register char *sp;
1784 if (np == NULL)
1785 return;
1787 /* Output subentries that precede this one */
1788 put_entries (np->left);
1790 /* Output this entry */
1792 if (!CTAGS)
1794 if (np->name != NULL)
1795 fprintf (tagf, "%s\177%s\001%d,%ld\n",
1796 np->pat, np->name, np->lno, np->cno);
1797 else
1798 fprintf (tagf, "%s\177%d,%ld\n",
1799 np->pat, np->lno, np->cno);
1801 else
1803 if (np->name == NULL)
1804 error ("internal error: NULL name in ctags mode.", (char *)NULL);
1806 if (cxref_style)
1808 if (vgrind_style)
1809 fprintf (stdout, "%s %s %d\n",
1810 np->name, np->file, (np->lno + 63) / 64);
1811 else
1812 fprintf (stdout, "%-16s %3d %-16s %s\n",
1813 np->name, np->lno, np->file, np->pat);
1815 else
1817 fprintf (tagf, "%s\t%s\t", np->name, np->file);
1819 if (np->is_func)
1820 { /* a function */
1821 putc (searchar, tagf);
1822 putc ('^', tagf);
1824 for (sp = np->pat; *sp; sp++)
1826 if (*sp == '\\' || *sp == searchar)
1827 putc ('\\', tagf);
1828 putc (*sp, tagf);
1830 putc (searchar, tagf);
1832 else
1833 { /* a typedef; text pattern inadequate */
1834 fprintf (tagf, "%d", np->lno);
1836 putc ('\n', tagf);
1840 /* Output subentries that follow this one */
1841 put_entries (np->right);
1844 /* Length of a number's decimal representation. */
1845 static int
1846 number_len (num)
1847 long num;
1849 int len = 1;
1850 while ((num /= 10) > 0)
1851 len += 1;
1852 return len;
1856 * Return total number of characters that put_entries will output for
1857 * the nodes in the subtree of the specified node. Works only if
1858 * we are not ctags, but called only in that case. This count
1859 * is irrelevant with the new tags.el, but is still supplied for
1860 * backward compatibility.
1862 static int
1863 total_size_of_entries (np)
1864 register node *np;
1866 register int total;
1868 if (np == NULL)
1869 return 0;
1871 for (total = 0; np != NULL; np = np->right)
1873 /* Count left subentries. */
1874 total += total_size_of_entries (np->left);
1876 /* Count this entry */
1877 total += strlen (np->pat) + 1;
1878 total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
1879 if (np->name != NULL)
1880 total += 1 + strlen (np->name); /* \001name */
1883 return total;
1887 * The C symbol tables.
1889 enum sym_type
1891 st_none,
1892 st_C_objprot, st_C_objimpl, st_C_objend,
1893 st_C_gnumacro,
1894 st_C_ignore,
1895 st_C_javastruct,
1896 st_C_operator,
1897 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1900 static unsigned int hash P_((const char *, unsigned int));
1901 static struct C_stab_entry * in_word_set P_((const char *, unsigned int));
1902 static enum sym_type C_symtype P_((char *, int, int));
1904 /* Feed stuff between (but not including) %[ and %] lines to:
1905 gperf -c -k 1,3 -o -p -r -t
1907 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1909 if, 0, st_C_ignore
1910 for, 0, st_C_ignore
1911 while, 0, st_C_ignore
1912 switch, 0, st_C_ignore
1913 return, 0, st_C_ignore
1914 @interface, 0, st_C_objprot
1915 @protocol, 0, st_C_objprot
1916 @implementation,0, st_C_objimpl
1917 @end, 0, st_C_objend
1918 import, C_JAVA, st_C_ignore
1919 package, C_JAVA, st_C_ignore
1920 friend, C_PLPL, st_C_ignore
1921 extends, C_JAVA, st_C_javastruct
1922 implements, C_JAVA, st_C_javastruct
1923 interface, C_JAVA, st_C_struct
1924 class, C_PLPL, st_C_struct
1925 namespace, C_PLPL, st_C_struct
1926 domain, C_STAR, st_C_struct
1927 union, 0, st_C_struct
1928 struct, 0, st_C_struct
1929 extern, 0, st_C_extern
1930 enum, 0, st_C_enum
1931 typedef, 0, st_C_typedef
1932 define, 0, st_C_define
1933 operator, C_PLPL, st_C_operator
1934 bool, C_PLPL, st_C_typespec
1935 long, 0, st_C_typespec
1936 short, 0, st_C_typespec
1937 int, 0, st_C_typespec
1938 char, 0, st_C_typespec
1939 float, 0, st_C_typespec
1940 double, 0, st_C_typespec
1941 signed, 0, st_C_typespec
1942 unsigned, 0, st_C_typespec
1943 auto, 0, st_C_typespec
1944 void, 0, st_C_typespec
1945 static, 0, st_C_typespec
1946 const, 0, st_C_typespec
1947 volatile, 0, st_C_typespec
1948 explicit, C_PLPL, st_C_typespec
1949 mutable, C_PLPL, st_C_typespec
1950 typename, C_PLPL, st_C_typespec
1951 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
1952 DEFUN, 0, st_C_gnumacro
1953 SYSCALL, 0, st_C_gnumacro
1954 ENTRY, 0, st_C_gnumacro
1955 PSEUDO, 0, st_C_gnumacro
1956 # These are defined inside C functions, so currently they are not met.
1957 # EXFUN used in glibc, DEFVAR_* in emacs.
1958 #EXFUN, 0, st_C_gnumacro
1959 #DEFVAR_, 0, st_C_gnumacro
1961 and replace lines between %< and %> with its output. */
1962 /*%<*/
1963 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
1964 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
1965 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
1967 #define TOTAL_KEYWORDS 46
1968 #define MIN_WORD_LENGTH 2
1969 #define MAX_WORD_LENGTH 15
1970 #define MIN_HASH_VALUE 13
1971 #define MAX_HASH_VALUE 123
1972 /* maximum key range = 111, duplicates = 0 */
1974 #ifdef __GNUC__
1975 __inline
1976 #endif
1977 static unsigned int
1978 hash (str, len)
1979 register const char *str;
1980 register unsigned int len;
1982 static unsigned char asso_values[] =
1984 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1985 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1986 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1987 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1988 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1989 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1990 124, 124, 124, 124, 3, 124, 124, 124, 43, 6,
1991 11, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1992 11, 124, 124, 58, 7, 124, 124, 124, 124, 124,
1993 124, 124, 124, 124, 124, 124, 124, 57, 7, 42,
1994 4, 14, 52, 0, 124, 53, 124, 124, 29, 11,
1995 6, 35, 32, 124, 29, 34, 59, 58, 51, 24,
1996 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1997 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1998 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1999 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2000 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2001 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2002 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2003 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2004 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2005 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2006 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2007 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2008 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2009 124, 124, 124, 124, 124, 124
2011 register int hval = len;
2013 switch (hval)
2015 default:
2016 case 3:
2017 hval += asso_values[(unsigned char)str[2]];
2018 case 2:
2019 case 1:
2020 hval += asso_values[(unsigned char)str[0]];
2021 break;
2023 return hval;
2026 #ifdef __GNUC__
2027 __inline
2028 #endif
2029 static struct C_stab_entry *
2030 in_word_set (str, len)
2031 register const char *str;
2032 register unsigned int len;
2034 static struct C_stab_entry wordlist[] =
2036 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2037 {""}, {""}, {""}, {""},
2038 {"@end", 0, st_C_objend},
2039 {""}, {""}, {""}, {""},
2040 {"ENTRY", 0, st_C_gnumacro},
2041 {"@interface", 0, st_C_objprot},
2042 {""},
2043 {"domain", C_STAR, st_C_struct},
2044 {""},
2045 {"PSEUDO", 0, st_C_gnumacro},
2046 {""}, {""},
2047 {"namespace", C_PLPL, st_C_struct},
2048 {""}, {""},
2049 {"@implementation",0, st_C_objimpl},
2050 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2051 {"long", 0, st_C_typespec},
2052 {"signed", 0, st_C_typespec},
2053 {"@protocol", 0, st_C_objprot},
2054 {""}, {""}, {""}, {""},
2055 {"bool", C_PLPL, st_C_typespec},
2056 {""}, {""}, {""}, {""}, {""}, {""},
2057 {"const", 0, st_C_typespec},
2058 {"explicit", C_PLPL, st_C_typespec},
2059 {"if", 0, st_C_ignore},
2060 {""},
2061 {"operator", C_PLPL, st_C_operator},
2062 {""},
2063 {"DEFUN", 0, st_C_gnumacro},
2064 {""}, {""},
2065 {"define", 0, st_C_define},
2066 {""}, {""}, {""}, {""}, {""},
2067 {"double", 0, st_C_typespec},
2068 {"struct", 0, st_C_struct},
2069 {""}, {""}, {""}, {""},
2070 {"short", 0, st_C_typespec},
2071 {""},
2072 {"enum", 0, st_C_enum},
2073 {"mutable", C_PLPL, st_C_typespec},
2074 {""},
2075 {"extern", 0, st_C_extern},
2076 {"extends", C_JAVA, st_C_javastruct},
2077 {"package", C_JAVA, st_C_ignore},
2078 {"while", 0, st_C_ignore},
2079 {""},
2080 {"for", 0, st_C_ignore},
2081 {""}, {""}, {""},
2082 {"volatile", 0, st_C_typespec},
2083 {""}, {""},
2084 {"import", C_JAVA, st_C_ignore},
2085 {"float", 0, st_C_typespec},
2086 {"switch", 0, st_C_ignore},
2087 {"return", 0, st_C_ignore},
2088 {"implements", C_JAVA, st_C_javastruct},
2089 {""},
2090 {"static", 0, st_C_typespec},
2091 {"typedef", 0, st_C_typedef},
2092 {"typename", C_PLPL, st_C_typespec},
2093 {"unsigned", 0, st_C_typespec},
2094 {""}, {""},
2095 {"char", 0, st_C_typespec},
2096 {"class", C_PLPL, st_C_struct},
2097 {""}, {""}, {""},
2098 {"void", 0, st_C_typespec},
2099 {""}, {""},
2100 {"friend", C_PLPL, st_C_ignore},
2101 {""}, {""}, {""},
2102 {"int", 0, st_C_typespec},
2103 {"union", 0, st_C_struct},
2104 {""}, {""}, {""},
2105 {"auto", 0, st_C_typespec},
2106 {"interface", C_JAVA, st_C_struct},
2107 {""},
2108 {"SYSCALL", 0, st_C_gnumacro}
2111 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2113 register int key = hash (str, len);
2115 if (key <= MAX_HASH_VALUE && key >= 0)
2117 register const char *s = wordlist[key].name;
2119 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2120 return &wordlist[key];
2123 return 0;
2125 /*%>*/
2127 static enum sym_type
2128 C_symtype (str, len, c_ext)
2129 char *str;
2130 int len;
2131 int c_ext;
2133 register struct C_stab_entry *se = in_word_set (str, len);
2135 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2136 return st_none;
2137 return se->type;
2141 * C functions and variables are recognized using a simple
2142 * finite automaton. fvdef is its state variable.
2144 enum
2146 fvnone, /* nothing seen */
2147 foperator, /* func: operator keyword seen (cplpl) */
2148 fvnameseen, /* function or variable name seen */
2149 fstartlist, /* func: just after open parenthesis */
2150 finlist, /* func: in parameter list */
2151 flistseen, /* func: after parameter list */
2152 fignore, /* func: before open brace */
2153 vignore /* var-like: ignore until ';' */
2154 } fvdef;
2156 bool fvextern; /* func or var: extern keyword seen; */
2159 * typedefs are recognized using a simple finite automaton.
2160 * typdef is its state variable.
2162 enum
2164 tnone, /* nothing seen */
2165 tkeyseen, /* typedef keyword seen */
2166 ttypeseen, /* defined type seen */
2167 tinbody, /* inside typedef body */
2168 tend, /* just before typedef tag */
2169 tignore /* junk after typedef tag */
2170 } typdef;
2174 * struct-like structures (enum, struct and union) are recognized
2175 * using another simple finite automaton. `structdef' is its state
2176 * variable.
2178 enum
2180 snone, /* nothing seen yet */
2181 skeyseen, /* struct-like keyword seen */
2182 stagseen, /* struct-like tag seen */
2183 scolonseen, /* colon seen after struct-like tag */
2184 sinbody /* in struct body: recognize member func defs*/
2185 } structdef;
2188 * When structdef is stagseen, scolonseen, or sinbody, structtag is the
2189 * struct tag, and structtype is the type of the preceding struct-like
2190 * keyword.
2192 char *structtag = "<uninited>";
2193 enum sym_type structtype;
2196 * When objdef is different from onone, objtag is the name of the class.
2198 char *objtag = "<uninited>";
2201 * Yet another little state machine to deal with preprocessor lines.
2203 enum
2205 dnone, /* nothing seen */
2206 dsharpseen, /* '#' seen as first char on line */
2207 ddefineseen, /* '#' and 'define' seen */
2208 dignorerest /* ignore rest of line */
2209 } definedef;
2212 * State machine for Objective C protocols and implementations.
2213 * Tom R.Hageman <tom@basil.icce.rug.nl>
2215 enum
2217 onone, /* nothing seen */
2218 oprotocol, /* @interface or @protocol seen */
2219 oimplementation, /* @implementations seen */
2220 otagseen, /* class name seen */
2221 oparenseen, /* parenthesis before category seen */
2222 ocatseen, /* category name seen */
2223 oinbody, /* in @implementation body */
2224 omethodsign, /* in @implementation body, after +/- */
2225 omethodtag, /* after method name */
2226 omethodcolon, /* after method colon */
2227 omethodparm, /* after method parameter */
2228 oignore /* wait for @end */
2229 } objdef;
2233 * Use this structure to keep info about the token read, and how it
2234 * should be tagged. Used by the make_C_tag function to build a tag.
2236 typedef struct
2238 bool valid;
2239 char *str;
2240 bool named;
2241 int linelen;
2242 int lineno;
2243 long linepos;
2244 char *buffer;
2245 } token;
2247 token tok; /* latest token read */
2250 * Set this to TRUE, and the next token considered is called a function.
2251 * Used only for GNU emacs's function-defining macros.
2253 bool next_token_is_func;
2256 * TRUE in the rules part of a yacc file, FALSE outside (parse as C).
2258 bool yacc_rules;
2261 * methodlen is the length of the method name stored in token_name.
2263 int methodlen;
2265 static bool consider_token P_((char *, int, int, int, int, int, bool *));
2266 static void make_C_tag P_((bool));
2269 * consider_token ()
2270 * checks to see if the current token is at the start of a
2271 * function or variable, or corresponds to a typedef, or
2272 * is a struct/union/enum tag, or #define, or an enum constant.
2274 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2275 * with args. C_EXT is which language we are looking at.
2277 * Globals
2278 * fvdef IN OUT
2279 * structdef IN OUT
2280 * definedef IN OUT
2281 * typdef IN OUT
2282 * objdef IN OUT
2283 * next_token_is_func IN OUT
2286 static bool
2287 consider_token (str, len, c, c_ext, cblev, parlev, is_func_or_var)
2288 register char *str; /* IN: token pointer */
2289 register int len; /* IN: token length */
2290 register int c; /* IN: first char after the token */
2291 int c_ext; /* IN: C extensions mask */
2292 int cblev; /* IN: curly brace level */
2293 int parlev; /* IN: parenthesis level */
2294 bool *is_func_or_var; /* OUT: function or variable found */
2296 enum sym_type toktype = C_symtype (str, len, c_ext);
2299 * Advance the definedef state machine.
2301 switch (definedef)
2303 case dnone:
2304 /* We're not on a preprocessor line. */
2305 break;
2306 case dsharpseen:
2307 if (toktype == st_C_define)
2309 definedef = ddefineseen;
2311 else
2313 definedef = dignorerest;
2315 return FALSE;
2316 case ddefineseen:
2318 * Make a tag for any macro, unless it is a constant
2319 * and constantypedefs is FALSE.
2321 definedef = dignorerest;
2322 *is_func_or_var = (c == '(');
2323 if (!*is_func_or_var && !constantypedefs)
2324 return FALSE;
2325 else
2326 return TRUE;
2327 case dignorerest:
2328 return FALSE;
2329 default:
2330 error ("internal error: definedef value.", (char *)NULL);
2334 * Now typedefs
2336 switch (typdef)
2338 case tnone:
2339 if (toktype == st_C_typedef)
2341 if (typedefs)
2342 typdef = tkeyseen;
2343 fvextern = FALSE;
2344 fvdef = fvnone;
2345 return FALSE;
2347 break;
2348 case tkeyseen:
2349 switch (toktype)
2351 case st_none:
2352 case st_C_typespec:
2353 case st_C_struct:
2354 case st_C_enum:
2355 typdef = ttypeseen;
2356 break;
2358 /* Do not return here, so the structdef stuff has a chance. */
2359 break;
2360 case tend:
2361 switch (toktype)
2363 case st_C_typespec:
2364 case st_C_struct:
2365 case st_C_enum:
2366 return FALSE;
2368 return TRUE;
2372 * This structdef business is currently only invoked when cblev==0.
2373 * It should be recursively invoked whatever the curly brace level,
2374 * and a stack of states kept, to allow for definitions of structs
2375 * within structs.
2377 * This structdef business is NOT invoked when we are ctags and the
2378 * file is plain C. This is because a struct tag may have the same
2379 * name as another tag, and this loses with ctags.
2381 switch (toktype)
2383 case st_C_javastruct:
2384 if (structdef == stagseen)
2385 structdef = scolonseen;
2386 return FALSE;
2387 case st_C_struct:
2388 case st_C_enum:
2389 if (typdef == tkeyseen
2390 || (typedefs_and_cplusplus && cblev == 0 && structdef == snone))
2392 structdef = skeyseen;
2393 structtype = toktype;
2395 return FALSE;
2398 if (structdef == skeyseen)
2400 /* Save the tag for struct/union/class, for functions and variables
2401 that may be defined inside. */
2402 if (structtype == st_C_struct)
2403 structtag = savenstr (str, len);
2404 else
2405 structtag = "<enum>";
2406 structdef = stagseen;
2407 return TRUE;
2410 if (typdef != tnone)
2411 definedef = dnone;
2413 /* Detect GNU macros.
2415 Writers of emacs code are recommended to put the
2416 first two args of a DEFUN on the same line.
2418 The DEFUN macro, used in emacs C source code, has a first arg
2419 that is a string (the lisp function name), and a second arg that
2420 is a C function name. Since etags skips strings, the second arg
2421 is tagged. This is unfortunate, as it would be better to tag the
2422 first arg. The simplest way to deal with this problem would be
2423 to name the tag with a name built from the function name, by
2424 removing the initial 'F' character and substituting '-' for '_'.
2425 Anyway, this assumes that the conventions of naming lisp
2426 functions will never change. Currently, this method is not
2427 implemented. */
2428 if (definedef == dnone && toktype == st_C_gnumacro)
2430 next_token_is_func = TRUE;
2431 return FALSE;
2433 if (next_token_is_func)
2435 next_token_is_func = FALSE;
2436 fvdef = fignore;
2437 *is_func_or_var = TRUE;
2438 return TRUE;
2441 /* Detect Objective C constructs. */
2442 switch (objdef)
2444 case onone:
2445 switch (toktype)
2447 case st_C_objprot:
2448 objdef = oprotocol;
2449 return FALSE;
2450 case st_C_objimpl:
2451 objdef = oimplementation;
2452 return FALSE;
2454 break;
2455 case oimplementation:
2456 /* Save the class tag for functions or variables defined inside. */
2457 objtag = savenstr (str, len);
2458 objdef = oinbody;
2459 return FALSE;
2460 case oprotocol:
2461 /* Save the class tag for categories. */
2462 objtag = savenstr (str, len);
2463 objdef = otagseen;
2464 *is_func_or_var = TRUE;
2465 return TRUE;
2466 case oparenseen:
2467 objdef = ocatseen;
2468 *is_func_or_var = TRUE;
2469 return TRUE;
2470 case oinbody:
2471 break;
2472 case omethodsign:
2473 if (parlev == 0)
2475 objdef = omethodtag;
2476 methodlen = len;
2477 grow_linebuffer (&token_name, methodlen + 1);
2478 strncpy (token_name.buffer, str, len);
2479 token_name.buffer[methodlen] = '\0';
2480 token_name.len = methodlen;
2481 return TRUE;
2483 return FALSE;
2484 case omethodcolon:
2485 if (parlev == 0)
2486 objdef = omethodparm;
2487 return FALSE;
2488 case omethodparm:
2489 if (parlev == 0)
2491 objdef = omethodtag;
2492 methodlen += len;
2493 grow_linebuffer (&token_name, methodlen + 1);
2494 strncat (token_name.buffer, str, len);
2495 token_name.len = methodlen;
2496 return TRUE;
2498 return FALSE;
2499 case oignore:
2500 if (toktype == st_C_objend)
2502 /* Memory leakage here: the string pointed by objtag is
2503 never released, because many tests would be needed to
2504 avoid breaking on incorrect input code. The amount of
2505 memory leaked here is the sum of the lengths of the
2506 class tags.
2507 free (objtag); */
2508 objdef = onone;
2510 return FALSE;
2513 /* A function, variable or enum constant? */
2514 switch (toktype)
2516 case st_C_extern:
2517 fvextern = TRUE;
2518 /* FALLTHRU */
2519 case st_C_typespec:
2520 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2521 fvdef = fvnone; /* should be useless */
2522 return FALSE;
2523 case st_C_ignore:
2524 fvextern = FALSE;
2525 fvdef = vignore;
2526 return FALSE;
2527 case st_C_operator:
2528 fvdef = foperator;
2529 *is_func_or_var = TRUE;
2530 return TRUE;
2531 case st_none:
2532 if ((c_ext & C_PLPL) && strneq (str+len-10, "::operator", 10))
2534 fvdef = foperator;
2535 *is_func_or_var = TRUE;
2536 return TRUE;
2538 if (constantypedefs && structdef == sinbody && structtype == st_C_enum)
2539 return TRUE;
2540 if (fvdef == fvnone)
2542 fvdef = fvnameseen; /* function or variable */
2543 *is_func_or_var = TRUE;
2544 return TRUE;
2546 break;
2549 return FALSE;
2553 * C_entries ()
2554 * This routine finds functions, variables, typedefs,
2555 * #define's, enum constants and struct/union/enum definitions in
2556 * C syntax and adds them to the list.
2558 #define current_lb_is_new (newndx == curndx)
2559 #define switch_line_buffers() (curndx = 1 - curndx)
2561 #define curlb (lbs[curndx].lb)
2562 #define othlb (lbs[1-curndx].lb)
2563 #define newlb (lbs[newndx].lb)
2564 #define curlinepos (lbs[curndx].linepos)
2565 #define othlinepos (lbs[1-curndx].linepos)
2566 #define newlinepos (lbs[newndx].linepos)
2568 #define CNL_SAVE_DEFINEDEF() \
2569 do { \
2570 curlinepos = charno; \
2571 lineno++; \
2572 linecharno = charno; \
2573 charno += readline (&curlb, inf); \
2574 lp = curlb.buffer; \
2575 quotednl = FALSE; \
2576 newndx = curndx; \
2577 } while (0)
2579 #define CNL() \
2580 do { \
2581 CNL_SAVE_DEFINEDEF(); \
2582 if (savetok.valid) \
2584 tok = savetok; \
2585 savetok.valid = FALSE; \
2587 definedef = dnone; \
2588 } while (0)
2591 static void
2592 make_C_tag (isfun)
2593 bool isfun;
2595 /* This function should never be called when tok.valid is FALSE, but
2596 we must protect against invalid input or internal errors. */
2597 if (tok.valid)
2599 if (traditional_tag_style)
2601 /* This was the original code. Now we call new_pfnote instead,
2602 which uses the new method for naming tags (see new_pfnote). */
2603 char *name = NULL;
2605 if (CTAGS || tok.named)
2606 name = savestr (token_name.buffer);
2607 pfnote (name, isfun,
2608 tok.buffer, tok.linelen, tok.lineno, tok.linepos);
2610 else
2611 new_pfnote (token_name.buffer, token_name.len, isfun,
2612 tok.buffer, tok.linelen, tok.lineno, tok.linepos);
2613 tok.valid = FALSE;
2615 else if (DEBUG)
2616 abort ();
2620 static void
2621 C_entries (c_ext, inf)
2622 int c_ext; /* extension of C */
2623 FILE *inf; /* input file */
2625 register char c; /* latest char read; '\0' for end of line */
2626 register char *lp; /* pointer one beyond the character `c' */
2627 int curndx, newndx; /* indices for current and new lb */
2628 register int tokoff; /* offset in line of start of current token */
2629 register int toklen; /* length of current token */
2630 char *qualifier; /* string used to qualify names */
2631 int qlen; /* length of qualifier */
2632 int cblev; /* current curly brace level */
2633 int parlev; /* current parenthesis level */
2634 bool incomm, inquote, inchar, quotednl, midtoken;
2635 bool purec, cplpl, cjava;
2636 token savetok; /* token saved during preprocessor handling */
2639 tokoff = toklen = 0; /* keep compiler quiet */
2640 curndx = newndx = 0;
2641 lineno = 0;
2642 charno = 0;
2643 lp = curlb.buffer;
2644 *lp = 0;
2646 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2647 structdef = snone; definedef = dnone; objdef = onone;
2648 next_token_is_func = yacc_rules = FALSE;
2649 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2650 tok.valid = savetok.valid = FALSE;
2651 cblev = 0;
2652 parlev = 0;
2653 purec = !(c_ext & ~YACC); /* no extensions (apart from possibly yacc) */
2654 cplpl = (c_ext & C_PLPL) == C_PLPL;
2655 cjava = (c_ext & C_JAVA) == C_JAVA;
2656 if (cjava)
2657 { qualifier = "."; qlen = 1; }
2658 else
2659 { qualifier = "::"; qlen = 2; }
2661 while (!feof (inf))
2663 c = *lp++;
2664 if (c == '\\')
2666 /* If we're at the end of the line, the next character is a
2667 '\0'; don't skip it, because it's the thing that tells us
2668 to read the next line. */
2669 if (*lp == '\0')
2671 quotednl = TRUE;
2672 continue;
2674 lp++;
2675 c = ' ';
2677 else if (incomm)
2679 switch (c)
2681 case '*':
2682 if (*lp == '/')
2684 c = *lp++;
2685 incomm = FALSE;
2687 break;
2688 case '\0':
2689 /* Newlines inside comments do not end macro definitions in
2690 traditional cpp. */
2691 CNL_SAVE_DEFINEDEF ();
2692 break;
2694 continue;
2696 else if (inquote)
2698 switch (c)
2700 case '"':
2701 inquote = FALSE;
2702 break;
2703 case '\0':
2704 /* Newlines inside strings do not end macro definitions
2705 in traditional cpp, even though compilers don't
2706 usually accept them. */
2707 CNL_SAVE_DEFINEDEF ();
2708 break;
2710 continue;
2712 else if (inchar)
2714 switch (c)
2716 case '\0':
2717 /* Hmmm, something went wrong. */
2718 CNL ();
2719 /* FALLTHRU */
2720 case '\'':
2721 inchar = FALSE;
2722 break;
2724 continue;
2726 else
2727 switch (c)
2729 case '"':
2730 inquote = TRUE;
2731 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2733 fvextern = FALSE;
2734 fvdef = fvnone;
2736 continue;
2737 case '\'':
2738 inchar = TRUE;
2739 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2741 fvextern = FALSE;
2742 fvdef = fvnone;
2744 continue;
2745 case '/':
2746 if (*lp == '*')
2748 lp++;
2749 incomm = TRUE;
2750 continue;
2752 else if (/* cplpl && */ *lp == '/')
2754 c = '\0';
2755 break;
2757 else
2758 break;
2759 case '%':
2760 if ((c_ext & YACC) && *lp == '%')
2762 /* entering or exiting rules section in yacc file */
2763 lp++;
2764 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2765 typdef = tnone; structdef = snone;
2766 next_token_is_func = FALSE;
2767 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2768 cblev = 0;
2769 yacc_rules = !yacc_rules;
2770 continue;
2772 else
2773 break;
2774 case '#':
2775 if (definedef == dnone)
2777 char *cp;
2778 bool cpptoken = TRUE;
2780 /* Look back on this line. If all blanks, or nonblanks
2781 followed by an end of comment, this is a preprocessor
2782 token. */
2783 for (cp = newlb.buffer; cp < lp-1; cp++)
2784 if (!iswhite (*cp))
2786 if (*cp == '*' && *(cp+1) == '/')
2788 cp++;
2789 cpptoken = TRUE;
2791 else
2792 cpptoken = FALSE;
2794 if (cpptoken)
2795 definedef = dsharpseen;
2796 } /* if (definedef == dnone) */
2798 continue;
2799 } /* switch (c) */
2802 /* Consider token only if some complicated conditions are satisfied. */
2803 if ((definedef != dnone
2804 || (cblev == 0 && structdef != scolonseen)
2805 || (cblev == 1 && cplpl && structdef == sinbody)
2806 || (structdef == sinbody && purec))
2807 && typdef != tignore
2808 && definedef != dignorerest
2809 && fvdef != finlist)
2811 if (midtoken)
2813 if (endtoken (c))
2815 bool funorvar = FALSE;
2817 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
2820 * This handles :: in the middle, but not at the
2821 * beginning of an identifier. Also, space-separated
2822 * :: is not recognised.
2824 lp += 2;
2825 toklen += 2;
2826 c = lp[-1];
2827 goto intok;
2829 else
2831 if (yacc_rules
2832 || consider_token (newlb.buffer + tokoff, toklen, c,
2833 c_ext, cblev, parlev, &funorvar))
2835 if (fvdef == foperator)
2837 char *oldlp = lp;
2838 lp = skip_spaces (lp-1);
2839 if (*lp != '\0')
2840 lp += 1;
2841 while (*lp != '\0'
2842 && !iswhite (*lp) && *lp != '(')
2843 lp += 1;
2844 c = *lp++;
2845 toklen += lp - oldlp;
2847 tok.named = FALSE;
2848 if (!purec
2849 && funorvar
2850 && definedef == dnone
2851 && structdef == sinbody)
2852 /* function or var defined in C++ class body */
2854 int len = strlen (structtag) + qlen + toklen;
2855 grow_linebuffer (&token_name, len + 1);
2856 strcpy (token_name.buffer, structtag);
2857 strcat (token_name.buffer, qualifier);
2858 strncat (token_name.buffer,
2859 newlb.buffer + tokoff, toklen);
2860 token_name.len = len;
2861 tok.named = TRUE;
2863 else if (objdef == ocatseen)
2864 /* Objective C category */
2866 int len = strlen (objtag) + 2 + toklen;
2867 grow_linebuffer (&token_name, len + 1);
2868 strcpy (token_name.buffer, objtag);
2869 strcat (token_name.buffer, "(");
2870 strncat (token_name.buffer,
2871 newlb.buffer + tokoff, toklen);
2872 strcat (token_name.buffer, ")");
2873 token_name.len = len;
2874 tok.named = TRUE;
2876 else if (objdef == omethodtag
2877 || objdef == omethodparm)
2878 /* Objective C method */
2880 tok.named = TRUE;
2882 else
2884 grow_linebuffer (&token_name, toklen + 1);
2885 strncpy (token_name.buffer,
2886 newlb.buffer + tokoff, toklen);
2887 token_name.buffer[toklen] = '\0';
2888 token_name.len = toklen;
2889 /* Name macros and members. */
2890 tok.named = (structdef == stagseen
2891 || typdef == ttypeseen
2892 || typdef == tend
2893 || (funorvar
2894 && definedef == dignorerest)
2895 || (funorvar
2896 && definedef == dnone
2897 && structdef == sinbody));
2899 tok.lineno = lineno;
2900 tok.linelen = tokoff + toklen + 1;
2901 tok.buffer = newlb.buffer;
2902 tok.linepos = newlinepos;
2903 tok.valid = TRUE;
2905 if (definedef == dnone
2906 && (fvdef == fvnameseen
2907 || fvdef == foperator
2908 || structdef == stagseen
2909 || typdef == tend
2910 || objdef != onone))
2912 if (current_lb_is_new)
2913 switch_line_buffers ();
2915 else
2916 make_C_tag (funorvar);
2918 midtoken = FALSE;
2920 } /* if (endtoken (c)) */
2921 else if (intoken (c))
2922 intok:
2924 toklen++;
2925 continue;
2927 } /* if (midtoken) */
2928 else if (begtoken (c))
2930 switch (definedef)
2932 case dnone:
2933 switch (fvdef)
2935 case fstartlist:
2936 fvdef = finlist;
2937 continue;
2938 case flistseen:
2939 make_C_tag (TRUE); /* a function */
2940 fvdef = fignore;
2941 break;
2942 case fvnameseen:
2943 fvdef = fvnone;
2944 break;
2946 if (structdef == stagseen && !cjava)
2947 structdef = snone;
2948 break;
2949 case dsharpseen:
2950 savetok = tok;
2952 if (!yacc_rules || lp == newlb.buffer + 1)
2954 tokoff = lp - 1 - newlb.buffer;
2955 toklen = 1;
2956 midtoken = TRUE;
2958 continue;
2959 } /* if (begtoken) */
2960 } /* if must look at token */
2963 /* Detect end of line, colon, comma, semicolon and various braces
2964 after having handled a token.*/
2965 switch (c)
2967 case ':':
2968 if (definedef != dnone)
2969 break;
2970 switch (objdef)
2972 case otagseen:
2973 objdef = oignore;
2974 make_C_tag (TRUE); /* an Objective C class */
2975 break;
2976 case omethodtag:
2977 case omethodparm:
2978 objdef = omethodcolon;
2979 methodlen += 1;
2980 grow_linebuffer (&token_name, methodlen + 1);
2981 strcat (token_name.buffer, ":");
2982 token_name.len = methodlen;
2983 break;
2985 if (structdef == stagseen)
2986 structdef = scolonseen;
2987 else
2988 switch (fvdef)
2990 case fvnameseen:
2991 if (yacc_rules)
2993 make_C_tag (FALSE); /* a yacc function */
2994 fvdef = fignore;
2996 break;
2997 case fstartlist:
2998 fvextern = FALSE;
2999 fvdef = fvnone;
3000 break;
3002 break;
3003 case ';':
3004 if (definedef != dnone)
3005 break;
3006 if (cblev == 0)
3007 switch (typdef)
3009 case tend:
3010 make_C_tag (FALSE); /* a typedef */
3011 /* FALLTHRU */
3012 default:
3013 typdef = tnone;
3015 switch (fvdef)
3017 case fignore:
3018 break;
3019 case fvnameseen:
3020 if ((members && cblev == 1)
3021 || (globals && cblev == 0 && (!fvextern || declarations)))
3022 make_C_tag (FALSE); /* a variable */
3023 fvextern = FALSE;
3024 fvdef = fvnone;
3025 tok.valid = FALSE;
3026 break;
3027 case flistseen:
3028 if (declarations && (cblev == 0 || cblev == 1))
3029 make_C_tag (TRUE); /* a function declaration */
3030 /* FALLTHRU */
3031 default:
3032 fvextern = FALSE;
3033 fvdef = fvnone;
3034 /* The following instruction invalidates the token.
3035 Probably the token should be invalidated in all
3036 other cases where some state machine is reset. */
3037 tok.valid = FALSE;
3039 if (structdef == stagseen)
3040 structdef = snone;
3041 break;
3042 case ',':
3043 if (definedef != dnone)
3044 break;
3045 switch (objdef)
3047 case omethodtag:
3048 case omethodparm:
3049 make_C_tag (TRUE); /* an Objective C method */
3050 objdef = oinbody;
3051 break;
3053 switch (fvdef)
3055 case foperator:
3056 case finlist:
3057 case fignore:
3058 case vignore:
3059 break;
3060 case fvnameseen:
3061 if ((members && cblev == 1)
3062 || (globals && cblev == 0 && (!fvextern || declarations)))
3063 make_C_tag (FALSE); /* a variable */
3064 break;
3065 default:
3066 fvdef = fvnone;
3068 if (structdef == stagseen)
3069 structdef = snone;
3070 break;
3071 case '[':
3072 if (definedef != dnone)
3073 break;
3074 if (cblev == 0 && typdef == tend)
3076 typdef = tignore;
3077 make_C_tag (FALSE); /* a typedef */
3078 break;
3080 switch (fvdef)
3082 case foperator:
3083 case finlist:
3084 case fignore:
3085 case vignore:
3086 break;
3087 case fvnameseen:
3088 if ((members && cblev == 1)
3089 || (globals && cblev == 0 && (!fvextern || declarations)))
3090 make_C_tag (FALSE); /* a variable */
3091 /* FALLTHRU */
3092 default:
3093 fvdef = fvnone;
3095 if (structdef == stagseen)
3096 structdef = snone;
3097 break;
3098 case '(':
3099 if (definedef != dnone)
3100 break;
3101 if (objdef == otagseen && parlev == 0)
3102 objdef = oparenseen;
3103 switch (fvdef)
3105 case fvnameseen:
3106 if (typdef == ttypeseen
3107 && tok.valid
3108 && *lp != '*'
3109 && structdef != sinbody)
3111 /* This handles constructs like:
3112 typedef void OperatorFun (int fun); */
3113 make_C_tag (FALSE);
3114 typdef = tignore;
3116 /* FALLTHRU */
3117 case foperator:
3118 fvdef = fstartlist;
3119 break;
3120 case flistseen:
3121 fvdef = finlist;
3122 break;
3124 parlev++;
3125 break;
3126 case ')':
3127 if (definedef != dnone)
3128 break;
3129 if (objdef == ocatseen && parlev == 1)
3131 make_C_tag (TRUE); /* an Objective C category */
3132 objdef = oignore;
3134 if (--parlev == 0)
3136 switch (fvdef)
3138 case fstartlist:
3139 case finlist:
3140 fvdef = flistseen;
3141 break;
3143 if (cblev == 0 && (typdef == tend))
3145 typdef = tignore;
3146 make_C_tag (FALSE); /* a typedef */
3149 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3150 parlev = 0;
3151 break;
3152 case '{':
3153 if (definedef != dnone)
3154 break;
3155 if (typdef == ttypeseen)
3156 typdef = tinbody;
3157 switch (structdef)
3159 case skeyseen: /* unnamed struct */
3160 structdef = sinbody;
3161 structtag = "_anonymous_";
3162 break;
3163 case stagseen:
3164 case scolonseen: /* named struct */
3165 structdef = sinbody;
3166 make_C_tag (FALSE); /* a struct */
3167 break;
3169 switch (fvdef)
3171 case flistseen:
3172 make_C_tag (TRUE); /* a function */
3173 /* FALLTHRU */
3174 case fignore:
3175 fvdef = fvnone;
3176 break;
3177 case fvnone:
3178 switch (objdef)
3180 case otagseen:
3181 make_C_tag (TRUE); /* an Objective C class */
3182 objdef = oignore;
3183 break;
3184 case omethodtag:
3185 case omethodparm:
3186 make_C_tag (TRUE); /* an Objective C method */
3187 objdef = oinbody;
3188 break;
3189 default:
3190 /* Neutralize `extern "C" {' grot. */
3191 if (cblev == 0 && structdef == snone && typdef == tnone)
3192 cblev = -1;
3195 cblev++;
3196 break;
3197 case '*':
3198 if (definedef != dnone)
3199 break;
3200 if (fvdef == fstartlist)
3201 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3202 break;
3203 case '}':
3204 if (definedef != dnone)
3205 break;
3206 if (!noindentypedefs && lp == newlb.buffer + 1)
3208 cblev = 0; /* reset curly brace level if first column */
3209 parlev = 0; /* also reset paren level, just in case... */
3211 else if (cblev > 0)
3212 cblev--;
3213 if (cblev == 0)
3215 if (typdef == tinbody)
3216 typdef = tend;
3217 /* Memory leakage here: the string pointed by structtag is
3218 never released, because I fear to miss something and
3219 break things while freeing the area. The amount of
3220 memory leaked here is the sum of the lengths of the
3221 struct tags.
3222 if (structdef == sinbody)
3223 free (structtag); */
3225 structdef = snone;
3226 structtag = "<error>";
3228 break;
3229 case '=':
3230 if (definedef != dnone)
3231 break;
3232 switch (fvdef)
3234 case foperator:
3235 case finlist:
3236 case fignore:
3237 case vignore:
3238 break;
3239 case fvnameseen:
3240 if ((members && cblev == 1)
3241 || (globals && cblev == 0 && (!fvextern || declarations)))
3242 make_C_tag (FALSE); /* a variable */
3243 /* FALLTHRU */
3244 default:
3245 fvdef = vignore;
3247 break;
3248 case '+':
3249 case '-':
3250 if (objdef == oinbody && cblev == 0)
3252 objdef = omethodsign;
3253 break;
3255 /* FALLTHRU */
3256 case '#': case '~': case '&': case '%': case '/': case '|':
3257 case '^': case '!': case '<': case '>': case '.': case '?': case ']':
3258 if (definedef != dnone)
3259 break;
3260 /* These surely cannot follow a function tag in C. */
3261 switch (fvdef)
3263 case foperator:
3264 case finlist:
3265 case fignore:
3266 case vignore:
3267 break;
3268 default:
3269 fvdef = fvnone;
3271 break;
3272 case '\0':
3273 if (objdef == otagseen)
3275 make_C_tag (TRUE); /* an Objective C class */
3276 objdef = oignore;
3278 /* If a macro spans multiple lines don't reset its state. */
3279 if (quotednl)
3280 CNL_SAVE_DEFINEDEF ();
3281 else
3282 CNL ();
3283 break;
3284 } /* switch (c) */
3286 } /* while not eof */
3290 * Process either a C++ file or a C file depending on the setting
3291 * of a global flag.
3293 static void
3294 default_C_entries (inf)
3295 FILE *inf;
3297 C_entries (cplusplus ? C_PLPL : 0, inf);
3300 /* Always do plain ANSI C. */
3301 static void
3302 plain_C_entries (inf)
3303 FILE *inf;
3305 C_entries (0, inf);
3308 /* Always do C++. */
3309 static void
3310 Cplusplus_entries (inf)
3311 FILE *inf;
3313 C_entries (C_PLPL, inf);
3316 /* Always do Java. */
3317 static void
3318 Cjava_entries (inf)
3319 FILE *inf;
3321 C_entries (C_JAVA, inf);
3324 /* Always do C*. */
3325 static void
3326 Cstar_entries (inf)
3327 FILE *inf;
3329 C_entries (C_STAR, inf);
3332 /* Always do Yacc. */
3333 static void
3334 Yacc_entries (inf)
3335 FILE *inf;
3337 C_entries (YACC, inf);
3340 /* A useful macro. */
3341 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3342 for (lineno = charno = 0; /* loop initialization */ \
3343 !feof (file_pointer) /* loop test */ \
3344 && (lineno++, /* instructions at start of loop */ \
3345 linecharno = charno, \
3346 charno += readline (&line_buffer, file_pointer), \
3347 char_pointer = lb.buffer, \
3348 TRUE); \
3353 * Read a file, but do no processing. This is used to do regexp
3354 * matching on files that have no language defined.
3356 static void
3357 just_read_file (inf)
3358 FILE *inf;
3360 register char *dummy;
3362 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3363 continue;
3366 /* Fortran parsing */
3368 static bool tail P_((char *));
3369 static void takeprec P_((void));
3370 static void getit P_((FILE *));
3372 static bool
3373 tail (cp)
3374 char *cp;
3376 register int len = 0;
3378 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
3379 cp++, len++;
3380 if (*cp == '\0' && !intoken (dbp[len]))
3382 dbp += len;
3383 return TRUE;
3385 return FALSE;
3388 static void
3389 takeprec ()
3391 dbp = skip_spaces (dbp);
3392 if (*dbp != '*')
3393 return;
3394 dbp++;
3395 dbp = skip_spaces (dbp);
3396 if (strneq (dbp, "(*)", 3))
3398 dbp += 3;
3399 return;
3401 if (!isdigit (*dbp))
3403 --dbp; /* force failure */
3404 return;
3407 dbp++;
3408 while (isdigit (*dbp));
3411 static void
3412 getit (inf)
3413 FILE *inf;
3415 register char *cp;
3417 dbp = skip_spaces (dbp);
3418 if (*dbp == '\0')
3420 lineno++;
3421 linecharno = charno;
3422 charno += readline (&lb, inf);
3423 dbp = lb.buffer;
3424 if (dbp[5] != '&')
3425 return;
3426 dbp += 6;
3427 dbp = skip_spaces (dbp);
3429 if (!isalpha (*dbp) && *dbp != '_' && *dbp != '$')
3430 return;
3431 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3432 continue;
3433 pfnote (savenstr (dbp, cp-dbp), TRUE,
3434 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3438 static void
3439 Fortran_functions (inf)
3440 FILE *inf;
3442 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3444 if (*dbp == '%')
3445 dbp++; /* Ratfor escape to fortran */
3446 dbp = skip_spaces (dbp);
3447 if (*dbp == '\0')
3448 continue;
3449 switch (lowcase (*dbp))
3451 case 'i':
3452 if (tail ("integer"))
3453 takeprec ();
3454 break;
3455 case 'r':
3456 if (tail ("real"))
3457 takeprec ();
3458 break;
3459 case 'l':
3460 if (tail ("logical"))
3461 takeprec ();
3462 break;
3463 case 'c':
3464 if (tail ("complex") || tail ("character"))
3465 takeprec ();
3466 break;
3467 case 'd':
3468 if (tail ("double"))
3470 dbp = skip_spaces (dbp);
3471 if (*dbp == '\0')
3472 continue;
3473 if (tail ("precision"))
3474 break;
3475 continue;
3477 break;
3479 dbp = skip_spaces (dbp);
3480 if (*dbp == '\0')
3481 continue;
3482 switch (lowcase (*dbp))
3484 case 'f':
3485 if (tail ("function"))
3486 getit (inf);
3487 continue;
3488 case 's':
3489 if (tail ("subroutine"))
3490 getit (inf);
3491 continue;
3492 case 'e':
3493 if (tail ("entry"))
3494 getit (inf);
3495 continue;
3496 case 'b':
3497 if (tail ("blockdata") || tail ("block data"))
3499 dbp = skip_spaces (dbp);
3500 if (*dbp == '\0') /* assume un-named */
3501 pfnote (savestr ("blockdata"), TRUE,
3502 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3503 else
3504 getit (inf); /* look for name */
3506 continue;
3512 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be>, 1998-04-24
3513 * Ada parsing
3516 static void adagetit P_((FILE *, char *));
3518 /* Once we are positioned after an "interesting" keyword, let's get
3519 the real tag value necessary. */
3520 static void
3521 adagetit (inf, name_qualifier)
3522 FILE *inf;
3523 char *name_qualifier;
3525 register char *cp;
3526 char *name;
3527 char c;
3529 while (!feof (inf))
3531 dbp = skip_spaces (dbp);
3532 if (*dbp == '\0'
3533 || (dbp[0] == '-' && dbp[1] == '-'))
3535 lineno++;
3536 linecharno = charno;
3537 charno += readline (&lb, inf);
3538 dbp = lb.buffer;
3540 switch (*dbp)
3542 case 'b':
3543 case 'B':
3544 if (tail ("body"))
3546 /* Skipping body of procedure body or package body or ....
3547 resetting qualifier to body instead of spec. */
3548 name_qualifier = "/b";
3549 continue;
3551 break;
3552 case 't':
3553 case 'T':
3554 /* Skipping type of task type or protected type ... */
3555 if (tail ("type"))
3556 continue;
3557 break;
3559 if (*dbp == '"')
3561 dbp += 1;
3562 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3563 continue;
3565 else
3567 dbp = skip_spaces (dbp);
3568 for (cp = dbp;
3569 (*cp != '\0'
3570 && (isalpha (*cp) || isdigit (*cp) || *cp == '_' || *cp == '.'));
3571 cp++)
3572 continue;
3573 if (cp == dbp)
3574 return;
3576 c = *cp;
3577 *cp = '\0';
3578 name = concat (dbp, name_qualifier, "");
3579 *cp = c;
3580 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3581 if (c == '"')
3582 dbp = cp + 1;
3583 return;
3587 static void
3588 Ada_funcs (inf)
3589 FILE *inf;
3591 bool inquote = FALSE;
3593 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3595 while (*dbp != '\0')
3597 /* Skip a string i.e. "abcd". */
3598 if (inquote || (*dbp == '"'))
3600 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3601 if (dbp != NULL)
3603 inquote = FALSE;
3604 dbp += 1;
3605 continue; /* advance char */
3607 else
3609 inquote = TRUE;
3610 break; /* advance line */
3614 /* Skip comments. */
3615 if (dbp[0] == '-' && dbp[1] == '-')
3616 break; /* advance line */
3618 /* Skip character enclosed in single quote i.e. 'a'
3619 and skip single quote starting an attribute i.e. 'Image. */
3620 if (*dbp == '\'')
3622 dbp++ ;
3623 if (*dbp != '\0')
3624 dbp++;
3625 continue;
3628 /* Search for beginning of a token. */
3629 if (!begtoken (*dbp))
3631 dbp++;
3632 continue; /* advance char */
3635 /* We are at the beginning of a token. */
3636 switch (*dbp)
3638 case 'f':
3639 case 'F':
3640 if (!packages_only && tail ("function"))
3641 adagetit (inf, "/f");
3642 else
3643 break; /* from switch */
3644 continue; /* advance char */
3645 case 'p':
3646 case 'P':
3647 if (!packages_only && tail ("procedure"))
3648 adagetit (inf, "/p");
3649 else if (tail ("package"))
3650 adagetit (inf, "/s");
3651 else if (tail ("protected")) /* protected type */
3652 adagetit (inf, "/t");
3653 else
3654 break; /* from switch */
3655 continue; /* advance char */
3656 case 't':
3657 case 'T':
3658 if (!packages_only && tail ("task"))
3659 adagetit (inf, "/k");
3660 else if (typedefs && !packages_only && tail ("type"))
3662 adagetit (inf, "/t");
3663 while (*dbp != '\0')
3664 dbp += 1;
3666 else
3667 break; /* from switch */
3668 continue; /* advance char */
3671 /* Look for the end of the token. */
3672 while (!endtoken (*dbp))
3673 dbp++;
3675 } /* advance char */
3676 } /* advance line */
3680 * Bob Weiner, Motorola Inc., 4/3/94
3681 * Unix and microcontroller assembly tag handling
3682 * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
3684 static void
3685 Asm_labels (inf)
3686 FILE *inf;
3688 register char *cp;
3690 LOOP_ON_INPUT_LINES (inf, lb, cp)
3692 /* If first char is alphabetic or one of [_.$], test for colon
3693 following identifier. */
3694 if (isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3696 /* Read past label. */
3697 cp++;
3698 while (isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3699 cp++;
3700 if (*cp == ':' || iswhite (*cp))
3702 /* Found end of label, so copy it and add it to the table. */
3703 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3704 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3711 * Perl support by Bart Robinson <lomew@cs.utah.edu>
3712 * enhanced by Michael Ernst <mernst@alum.mit.edu>
3713 * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
3714 * Perl variable names: /^(my|local).../
3716 static void
3717 Perl_functions (inf)
3718 FILE *inf;
3720 register char *cp;
3722 LOOP_ON_INPUT_LINES (inf, lb, cp)
3724 if (*cp++ == 's'
3725 && *cp++ == 'u'
3726 && *cp++ == 'b' && iswhite (*cp++))
3728 cp = skip_spaces (cp);
3729 if (*cp != '\0')
3731 char *sp = cp;
3732 while (*cp != '\0'
3733 && !iswhite (*cp) && *cp != '{' && *cp != '(')
3734 cp++;
3735 pfnote (savenstr (sp, cp-sp), TRUE,
3736 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3739 else if (globals /* only if tagging global vars is enabled */
3740 && ((cp = lb.buffer,
3741 *cp++ == 'm'
3742 && *cp++ == 'y')
3743 || (cp = lb.buffer,
3744 *cp++ == 'l'
3745 && *cp++ == 'o'
3746 && *cp++ == 'c'
3747 && *cp++ == 'a'
3748 && *cp++ == 'l'))
3749 && (*cp == '(' || iswhite (*cp)))
3751 /* After "my" or "local", but before any following paren or space. */
3752 char *varname = NULL;
3754 cp = skip_spaces (cp);
3755 if (*cp == '$' || *cp == '@' || *cp == '%')
3757 char* varstart = ++cp;
3758 while (isalnum (*cp) || *cp == '_')
3759 cp++;
3760 varname = savenstr (varstart, cp-varstart);
3762 else
3764 /* Should be examining a variable list at this point;
3765 could insist on seeing an open parenthesis. */
3766 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
3767 cp++;
3770 /* Perhaps I should back cp up one character, so the TAGS table
3771 doesn't mention (and so depend upon) the following char. */
3772 pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
3773 FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3779 * Python support by Eric S. Raymond <esr@thyrsus.com>
3780 * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
3782 static void
3783 Python_functions (inf)
3784 FILE *inf;
3786 register char *cp;
3788 LOOP_ON_INPUT_LINES (inf, lb, cp)
3790 if (*cp++ == 'd'
3791 && *cp++ == 'e'
3792 && *cp++ == 'f' && iswhite (*cp++))
3794 cp = skip_spaces (cp);
3795 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
3796 cp++;
3797 pfnote (NULL, TRUE,
3798 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3801 cp = lb.buffer;
3802 if (*cp++ == 'c'
3803 && *cp++ == 'l'
3804 && *cp++ == 'a'
3805 && *cp++ == 's'
3806 && *cp++ == 's' && iswhite (*cp++))
3808 cp = skip_spaces (cp);
3809 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
3810 cp++;
3811 pfnote (NULL, TRUE,
3812 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3817 /* Idea by Corny de Souza
3818 * Cobol tag functions
3819 * We could look for anything that could be a paragraph name.
3820 * i.e. anything that starts in column 8 is one word and ends in a full stop.
3822 static void
3823 Cobol_paragraphs (inf)
3824 FILE *inf;
3826 register char *bp, *ep;
3828 LOOP_ON_INPUT_LINES (inf, lb, bp)
3830 if (lb.len < 9)
3831 continue;
3832 bp += 8;
3834 /* If eoln, compiler option or comment ignore whole line. */
3835 if (bp[-1] != ' ' || !isalnum (bp[0]))
3836 continue;
3838 for (ep = bp; isalnum (*ep) || *ep == '-'; ep++)
3839 continue;
3840 if (*ep++ == '.')
3841 pfnote (savenstr (bp, ep-bp), TRUE,
3842 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
3846 /* Added by Mosur Mohan, 4/22/88 */
3847 /* Pascal parsing */
3850 * Locates tags for procedures & functions. Doesn't do any type- or
3851 * var-definitions. It does look for the keyword "extern" or
3852 * "forward" immediately following the procedure statement; if found,
3853 * the tag is skipped.
3855 static void
3856 Pascal_functions (inf)
3857 FILE *inf;
3859 linebuffer tline; /* mostly copied from C_entries */
3860 long save_lcno;
3861 int save_lineno, save_len;
3862 char c, *cp, *namebuf;
3864 bool /* each of these flags is TRUE iff: */
3865 incomment, /* point is inside a comment */
3866 inquote, /* point is inside '..' string */
3867 get_tagname, /* point is after PROCEDURE/FUNCTION
3868 keyword, so next item = potential tag */
3869 found_tag, /* point is after a potential tag */
3870 inparms, /* point is within parameter-list */
3871 verify_tag; /* point has passed the parm-list, so the
3872 next token will determine whether this
3873 is a FORWARD/EXTERN to be ignored, or
3874 whether it is a real tag */
3876 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
3877 namebuf = NULL; /* keep compiler quiet */
3878 lineno = 0;
3879 charno = 0;
3880 dbp = lb.buffer;
3881 *dbp = '\0';
3882 initbuffer (&tline);
3884 incomment = inquote = FALSE;
3885 found_tag = FALSE; /* have a proc name; check if extern */
3886 get_tagname = FALSE; /* have found "procedure" keyword */
3887 inparms = FALSE; /* found '(' after "proc" */
3888 verify_tag = FALSE; /* check if "extern" is ahead */
3891 while (!feof (inf)) /* long main loop to get next char */
3893 c = *dbp++;
3894 if (c == '\0') /* if end of line */
3896 lineno++;
3897 linecharno = charno;
3898 charno += readline (&lb, inf);
3899 dbp = lb.buffer;
3900 if (*dbp == '\0')
3901 continue;
3902 if (!((found_tag && verify_tag)
3903 || get_tagname))
3904 c = *dbp++; /* only if don't need *dbp pointing
3905 to the beginning of the name of
3906 the procedure or function */
3908 if (incomment)
3910 if (c == '}') /* within { } comments */
3911 incomment = FALSE;
3912 else if (c == '*' && *dbp == ')') /* within (* *) comments */
3914 dbp++;
3915 incomment = FALSE;
3917 continue;
3919 else if (inquote)
3921 if (c == '\'')
3922 inquote = FALSE;
3923 continue;
3925 else
3926 switch (c)
3928 case '\'':
3929 inquote = TRUE; /* found first quote */
3930 continue;
3931 case '{': /* found open { comment */
3932 incomment = TRUE;
3933 continue;
3934 case '(':
3935 if (*dbp == '*') /* found open (* comment */
3937 incomment = TRUE;
3938 dbp++;
3940 else if (found_tag) /* found '(' after tag, i.e., parm-list */
3941 inparms = TRUE;
3942 continue;
3943 case ')': /* end of parms list */
3944 if (inparms)
3945 inparms = FALSE;
3946 continue;
3947 case ';':
3948 if (found_tag && !inparms) /* end of proc or fn stmt */
3950 verify_tag = TRUE;
3951 break;
3953 continue;
3955 if (found_tag && verify_tag && (*dbp != ' '))
3957 /* check if this is an "extern" declaration */
3958 if (*dbp == '\0')
3959 continue;
3960 if (lowcase (*dbp == 'e'))
3962 if (tail ("extern")) /* superfluous, really! */
3964 found_tag = FALSE;
3965 verify_tag = FALSE;
3968 else if (lowcase (*dbp) == 'f')
3970 if (tail ("forward")) /* check for forward reference */
3972 found_tag = FALSE;
3973 verify_tag = FALSE;
3976 if (found_tag && verify_tag) /* not external proc, so make tag */
3978 found_tag = FALSE;
3979 verify_tag = FALSE;
3980 pfnote (namebuf, TRUE,
3981 tline.buffer, save_len, save_lineno, save_lcno);
3982 continue;
3985 if (get_tagname) /* grab name of proc or fn */
3987 if (*dbp == '\0')
3988 continue;
3990 /* save all values for later tagging */
3991 grow_linebuffer (&tline, lb.len + 1);
3992 strcpy (tline.buffer, lb.buffer);
3993 save_lineno = lineno;
3994 save_lcno = linecharno;
3996 /* grab block name */
3997 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
3998 continue;
3999 namebuf = savenstr (dbp, cp-dbp);
4000 dbp = cp; /* set dbp to e-o-token */
4001 save_len = dbp - lb.buffer + 1;
4002 get_tagname = FALSE;
4003 found_tag = TRUE;
4004 continue;
4006 /* and proceed to check for "extern" */
4008 else if (!incomment && !inquote && !found_tag)
4010 /* check for proc/fn keywords */
4011 switch (lowcase (c))
4013 case 'p':
4014 if (tail ("rocedure")) /* c = 'p', dbp has advanced */
4015 get_tagname = TRUE;
4016 continue;
4017 case 'f':
4018 if (tail ("unction"))
4019 get_tagname = TRUE;
4020 continue;
4023 } /* while not eof */
4025 free (tline.buffer);
4029 * lisp tag functions
4030 * look for (def or (DEF, quote or QUOTE
4033 static int L_isdef P_((char *));
4034 static int L_isquote P_((char *));
4035 static void L_getit P_((void));
4037 static int
4038 L_isdef (strp)
4039 register char *strp;
4041 return ((strp[1] == 'd' || strp[1] == 'D')
4042 && (strp[2] == 'e' || strp[2] == 'E')
4043 && (strp[3] == 'f' || strp[3] == 'F'));
4046 static int
4047 L_isquote (strp)
4048 register char *strp;
4050 return ((*++strp == 'q' || *strp == 'Q')
4051 && (*++strp == 'u' || *strp == 'U')
4052 && (*++strp == 'o' || *strp == 'O')
4053 && (*++strp == 't' || *strp == 'T')
4054 && (*++strp == 'e' || *strp == 'E')
4055 && iswhite (*++strp));
4058 static void
4059 L_getit ()
4061 register char *cp;
4063 if (*dbp == '\'') /* Skip prefix quote */
4064 dbp++;
4065 else if (*dbp == '(')
4067 if (L_isquote (dbp))
4068 dbp += 7; /* Skip "(quote " */
4069 else
4070 dbp += 1; /* Skip "(" before name in (defstruct (foo)) */
4071 dbp = skip_spaces (dbp);
4074 for (cp = dbp /*+1*/;
4075 *cp != '\0' && *cp != '(' && !iswhite(*cp) && *cp != ')';
4076 cp++)
4077 continue;
4078 if (cp == dbp)
4079 return;
4081 pfnote (savenstr (dbp, cp-dbp), TRUE,
4082 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4085 static void
4086 Lisp_functions (inf)
4087 FILE *inf;
4089 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4091 if (dbp[0] == '(')
4093 if (L_isdef (dbp))
4095 dbp = skip_non_spaces (dbp);
4096 dbp = skip_spaces (dbp);
4097 L_getit ();
4099 else
4101 /* Check for (foo::defmumble name-defined ... */
4103 dbp++;
4104 while (*dbp != '\0' && !iswhite (*dbp)
4105 && *dbp != ':' && *dbp != '(' && *dbp != ')');
4106 if (*dbp == ':')
4109 dbp++;
4110 while (*dbp == ':');
4112 if (L_isdef (dbp - 1))
4114 dbp = skip_non_spaces (dbp);
4115 dbp = skip_spaces (dbp);
4116 L_getit ();
4125 * Postscript tag functions
4126 * Just look for lines where the first character is '/'
4127 * Richard Mlynarik <mly@adoc.xerox.com>
4128 * Also look at "defineps" for PSWrap
4129 * suggested by Masatake YAMATO <masata-y@is.aist-nara.ac.jp>
4131 static void
4132 Postscript_functions (inf)
4133 FILE *inf;
4135 register char *bp, *ep;
4137 LOOP_ON_INPUT_LINES (inf, lb, bp)
4139 if (bp[0] == '/')
4141 for (ep = bp+1;
4142 *ep != '\0' && *ep != ' ' && *ep != '{';
4143 ep++)
4144 continue;
4145 pfnote (savenstr (bp, ep-bp), TRUE,
4146 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4148 else if (strneq (bp, "defineps", 8))
4150 bp = skip_non_spaces (bp);
4151 bp = skip_spaces (bp);
4152 get_tag (bp);
4159 * Scheme tag functions
4160 * look for (def... xyzzy
4161 * look for (def... (xyzzy
4162 * look for (def ... ((...(xyzzy ....
4163 * look for (set! xyzzy
4166 static void
4167 Scheme_functions (inf)
4168 FILE *inf;
4170 register char *bp;
4172 LOOP_ON_INPUT_LINES (inf, lb, bp)
4174 if (bp[0] == '('
4175 && (bp[1] == 'D' || bp[1] == 'd')
4176 && (bp[2] == 'E' || bp[2] == 'e')
4177 && (bp[3] == 'F' || bp[3] == 'f'))
4179 bp = skip_non_spaces (bp);
4180 /* Skip over open parens and white space */
4181 while (iswhite (*bp) || *bp == '(')
4182 bp++;
4183 get_tag (bp);
4185 if (bp[0] == '('
4186 && (bp[1] == 'S' || bp[1] == 's')
4187 && (bp[2] == 'E' || bp[2] == 'e')
4188 && (bp[3] == 'T' || bp[3] == 't')
4189 && (bp[4] == '!' || bp[4] == '!')
4190 && (iswhite (bp[5])))
4192 bp = skip_non_spaces (bp);
4193 bp = skip_spaces (bp);
4194 get_tag (bp);
4199 /* Find tags in TeX and LaTeX input files. */
4201 /* TEX_toktab is a table of TeX control sequences that define tags.
4202 Each TEX_tabent records one such control sequence.
4203 CONVERT THIS TO USE THE Stab TYPE!! */
4204 struct TEX_tabent
4206 char *name;
4207 int len;
4210 struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
4212 /* Default set of control sequences to put into TEX_toktab.
4213 The value of environment var TEXTAGS is prepended to this. */
4215 char *TEX_defenv = "\
4216 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4217 :part:appendix:entry:index";
4219 static void TEX_mode P_((FILE *));
4220 static struct TEX_tabent *TEX_decode_env P_((char *, char *));
4221 static int TEX_Token P_((char *));
4223 char TEX_esc = '\\';
4224 char TEX_opgrp = '{';
4225 char TEX_clgrp = '}';
4228 * TeX/LaTeX scanning loop.
4230 static void
4231 TeX_functions (inf)
4232 FILE *inf;
4234 char *cp, *lasthit;
4235 register int i;
4237 /* Select either \ or ! as escape character. */
4238 TEX_mode (inf);
4240 /* Initialize token table once from environment. */
4241 if (!TEX_toktab)
4242 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4244 LOOP_ON_INPUT_LINES (inf, lb, cp)
4246 lasthit = cp;
4247 /* Look at each esc in line. */
4248 while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4250 if (*++cp == '\0')
4251 break;
4252 linecharno += cp - lasthit;
4253 lasthit = cp;
4254 i = TEX_Token (lasthit);
4255 if (i >= 0)
4257 /* We seem to include the TeX command in the tag name.
4258 register char *p;
4259 for (p = lasthit + TEX_toktab[i].len;
4260 *p != '\0' && *p != TEX_clgrp;
4261 p++)
4262 continue; */
4263 pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
4264 lb.buffer, lb.len, lineno, linecharno);
4265 break; /* We only tag a line once */
4271 #define TEX_LESC '\\'
4272 #define TEX_SESC '!'
4273 #define TEX_cmt '%'
4275 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4276 chars accordingly. */
4277 static void
4278 TEX_mode (inf)
4279 FILE *inf;
4281 int c;
4283 while ((c = getc (inf)) != EOF)
4285 /* Skip to next line if we hit the TeX comment char. */
4286 if (c == TEX_cmt)
4287 while (c != '\n')
4288 c = getc (inf);
4289 else if (c == TEX_LESC || c == TEX_SESC )
4290 break;
4293 if (c == TEX_LESC)
4295 TEX_esc = TEX_LESC;
4296 TEX_opgrp = '{';
4297 TEX_clgrp = '}';
4299 else
4301 TEX_esc = TEX_SESC;
4302 TEX_opgrp = '<';
4303 TEX_clgrp = '>';
4305 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4306 No attempt is made to correct the situation. */
4307 rewind (inf);
4310 /* Read environment and prepend it to the default string.
4311 Build token table. */
4312 static struct TEX_tabent *
4313 TEX_decode_env (evarname, defenv)
4314 char *evarname;
4315 char *defenv;
4317 register char *env, *p;
4319 struct TEX_tabent *tab;
4320 int size, i;
4322 /* Append default string to environment. */
4323 env = getenv (evarname);
4324 if (!env)
4325 env = defenv;
4326 else
4328 char *oldenv = env;
4329 env = concat (oldenv, defenv, "");
4332 /* Allocate a token table */
4333 for (size = 1, p = env; p;)
4334 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4335 size++;
4336 /* Add 1 to leave room for null terminator. */
4337 tab = xnew (size + 1, struct TEX_tabent);
4339 /* Unpack environment string into token table. Be careful about */
4340 /* zero-length strings (leading ':', "::" and trailing ':') */
4341 for (i = 0; *env;)
4343 p = etags_strchr (env, ':');
4344 if (!p) /* End of environment string. */
4345 p = env + strlen (env);
4346 if (p - env > 0)
4347 { /* Only non-zero strings. */
4348 tab[i].name = savenstr (env, p - env);
4349 tab[i].len = strlen (tab[i].name);
4350 i++;
4352 if (*p)
4353 env = p + 1;
4354 else
4356 tab[i].name = NULL; /* Mark end of table. */
4357 tab[i].len = 0;
4358 break;
4361 return tab;
4364 /* If the text at CP matches one of the tag-defining TeX command names,
4365 return the pointer to the first occurrence of that command in TEX_toktab.
4366 Otherwise return -1.
4367 Keep the capital `T' in `token' for dumb truncating compilers
4368 (this distinguishes it from `TEX_toktab' */
4369 static int
4370 TEX_Token (cp)
4371 char *cp;
4373 int i;
4375 for (i = 0; TEX_toktab[i].len > 0; i++)
4376 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4377 return i;
4378 return -1;
4382 * Prolog support (rewritten) by Anders Lindgren, Mar. 96
4384 * Assumes that the predicate starts at column 0.
4385 * Only the first clause of a predicate is added.
4387 static int prolog_pred P_((char *, char *));
4388 static void prolog_skip_comment P_((linebuffer *, FILE *));
4389 static int prolog_atom P_((char *, int));
4391 static void
4392 Prolog_functions (inf)
4393 FILE *inf;
4395 char *cp, *last;
4396 int len;
4397 int allocated;
4399 allocated = 0;
4400 len = 0;
4401 last = NULL;
4403 LOOP_ON_INPUT_LINES (inf, lb, cp)
4405 if (cp[0] == '\0') /* Empty line */
4406 continue;
4407 else if (iswhite (cp[0])) /* Not a predicate */
4408 continue;
4409 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4410 prolog_skip_comment (&lb, inf);
4411 else if ((len = prolog_pred (cp, last)) > 0)
4413 /* Predicate. Store the function name so that we only
4414 generate a tag for the first clause. */
4415 if (last == NULL)
4416 last = xnew(len + 1, char);
4417 else if (len + 1 > allocated)
4418 last = xrnew (last, len + 1, char);
4419 allocated = len + 1;
4420 strncpy (last, cp, len);
4421 last[len] = '\0';
4427 static void
4428 prolog_skip_comment (plb, inf)
4429 linebuffer *plb;
4430 FILE *inf;
4432 char *cp;
4436 for (cp = plb->buffer; *cp != '\0'; cp++)
4437 if (cp[0] == '*' && cp[1] == '/')
4438 return;
4439 lineno++;
4440 linecharno += readline (plb, inf);
4442 while (!feof(inf));
4446 * A predicate definition is added if it matches:
4447 * <beginning of line><Prolog Atom><whitespace>(
4449 * It is added to the tags database if it doesn't match the
4450 * name of the previous clause header.
4452 * Return the size of the name of the predicate, or 0 if no header
4453 * was found.
4455 static int
4456 prolog_pred (s, last)
4457 char *s;
4458 char *last; /* Name of last clause. */
4460 int pos;
4461 int len;
4463 pos = prolog_atom (s, 0);
4464 if (pos < 1)
4465 return 0;
4467 len = pos;
4468 pos = skip_spaces (s + pos) - s;
4470 if ((s[pos] == '(') || (s[pos] == '.'))
4472 if (s[pos] == '(')
4473 pos++;
4475 /* Save only the first clause. */
4476 if (last == NULL
4477 || len != (int)strlen (last)
4478 || !strneq (s, last, len))
4480 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4481 return len;
4484 return 0;
4488 * Consume a Prolog atom.
4489 * Return the number of bytes consumed, or -1 if there was an error.
4491 * A prolog atom, in this context, could be one of:
4492 * - An alphanumeric sequence, starting with a lower case letter.
4493 * - A quoted arbitrary string. Single quotes can escape themselves.
4494 * Backslash quotes everything.
4496 static int
4497 prolog_atom (s, pos)
4498 char *s;
4499 int pos;
4501 int origpos;
4503 origpos = pos;
4505 if (islower(s[pos]) || (s[pos] == '_'))
4507 /* The atom is unquoted. */
4508 pos++;
4509 while (isalnum(s[pos]) || (s[pos] == '_'))
4511 pos++;
4513 return pos - origpos;
4515 else if (s[pos] == '\'')
4517 pos++;
4519 while (1)
4521 if (s[pos] == '\'')
4523 pos++;
4524 if (s[pos] != '\'')
4525 break;
4526 pos++; /* A double quote */
4528 else if (s[pos] == '\0')
4529 /* Multiline quoted atoms are ignored. */
4530 return -1;
4531 else if (s[pos] == '\\')
4533 if (s[pos+1] == '\0')
4534 return -1;
4535 pos += 2;
4537 else
4538 pos++;
4540 return pos - origpos;
4542 else
4543 return -1;
4547 * Support for Erlang -- Anders Lindgren, Feb 1996.
4549 * Generates tags for functions, defines, and records.
4551 * Assumes that Erlang functions start at column 0.
4553 static int erlang_func P_((char *, char *));
4554 static void erlang_attribute P_((char *));
4555 static int erlang_atom P_((char *, int));
4557 static void
4558 Erlang_functions (inf)
4559 FILE *inf;
4561 char *cp, *last;
4562 int len;
4563 int allocated;
4565 allocated = 0;
4566 len = 0;
4567 last = NULL;
4569 LOOP_ON_INPUT_LINES (inf, lb, cp)
4571 if (cp[0] == '\0') /* Empty line */
4572 continue;
4573 else if (iswhite (cp[0])) /* Not function nor attribute */
4574 continue;
4575 else if (cp[0] == '%') /* comment */
4576 continue;
4577 else if (cp[0] == '"') /* Sometimes, strings start in column one */
4578 continue;
4579 else if (cp[0] == '-') /* attribute, e.g. "-define" */
4581 erlang_attribute (cp);
4582 last = NULL;
4584 else if ((len = erlang_func (cp, last)) > 0)
4587 * Function. Store the function name so that we only
4588 * generates a tag for the first clause.
4590 if (last == NULL)
4591 last = xnew (len + 1, char);
4592 else if (len + 1 > allocated)
4593 last = xrnew (last, len + 1, char);
4594 allocated = len + 1;
4595 strncpy (last, cp, len);
4596 last[len] = '\0';
4603 * A function definition is added if it matches:
4604 * <beginning of line><Erlang Atom><whitespace>(
4606 * It is added to the tags database if it doesn't match the
4607 * name of the previous clause header.
4609 * Return the size of the name of the function, or 0 if no function
4610 * was found.
4612 static int
4613 erlang_func (s, last)
4614 char *s;
4615 char *last; /* Name of last clause. */
4617 int pos;
4618 int len;
4620 pos = erlang_atom (s, 0);
4621 if (pos < 1)
4622 return 0;
4624 len = pos;
4625 pos = skip_spaces (s + pos) - s;
4627 /* Save only the first clause. */
4628 if (s[pos++] == '('
4629 && (last == NULL
4630 || len != (int)strlen (last)
4631 || !strneq (s, last, len)))
4633 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4634 return len;
4637 return 0;
4642 * Handle attributes. Currently, tags are generated for defines
4643 * and records.
4645 * They are on the form:
4646 * -define(foo, bar).
4647 * -define(Foo(M, N), M+N).
4648 * -record(graph, {vtab = notable, cyclic = true}).
4650 static void
4651 erlang_attribute (s)
4652 char *s;
4654 int pos;
4655 int len;
4657 if (strneq (s, "-define", 7) || strneq (s, "-record", 7))
4659 pos = skip_spaces (s + 7) - s;
4660 if (s[pos++] == '(')
4662 pos = skip_spaces (s + pos) - s;
4663 len = erlang_atom (s, pos);
4664 if (len != 0)
4665 pfnote (savenstr (& s[pos], len), TRUE,
4666 s, pos + len, lineno, linecharno);
4669 return;
4674 * Consume an Erlang atom (or variable).
4675 * Return the number of bytes consumed, or -1 if there was an error.
4677 static int
4678 erlang_atom (s, pos)
4679 char *s;
4680 int pos;
4682 int origpos;
4684 origpos = pos;
4686 if (isalpha (s[pos]) || s[pos] == '_')
4688 /* The atom is unquoted. */
4689 pos++;
4690 while (isalnum (s[pos]) || s[pos] == '_')
4691 pos++;
4692 return pos - origpos;
4694 else if (s[pos] == '\'')
4696 pos++;
4698 while (1)
4700 if (s[pos] == '\'')
4702 pos++;
4703 break;
4705 else if (s[pos] == '\0')
4706 /* Multiline quoted atoms are ignored. */
4707 return -1;
4708 else if (s[pos] == '\\')
4710 if (s[pos+1] == '\0')
4711 return -1;
4712 pos += 2;
4714 else
4715 pos++;
4717 return pos - origpos;
4719 else
4720 return -1;
4723 #ifdef ETAGS_REGEXPS
4725 static char *scan_separators P_((char *));
4726 static void analyse_regex P_((char *, bool));
4727 static void add_regex P_((char *, bool, language *));
4728 static char *substitute P_((char *, char *, struct re_registers *));
4730 /* Take a string like "/blah/" and turn it into "blah", making sure
4731 that the first and last characters are the same, and handling
4732 quoted separator characters. Actually, stops on the occurrence of
4733 an unquoted separator. Also turns "\t" into a Tab character.
4734 Returns pointer to terminating separator. Works in place. Null
4735 terminates name string. */
4736 static char *
4737 scan_separators (name)
4738 char *name;
4740 char sep = name[0];
4741 char *copyto = name;
4742 bool quoted = FALSE;
4744 for (++name; *name != '\0'; ++name)
4746 if (quoted)
4748 if (*name == 't')
4749 *copyto++ = '\t';
4750 else if (*name == sep)
4751 *copyto++ = sep;
4752 else
4754 /* Something else is quoted, so preserve the quote. */
4755 *copyto++ = '\\';
4756 *copyto++ = *name;
4758 quoted = FALSE;
4760 else if (*name == '\\')
4761 quoted = TRUE;
4762 else if (*name == sep)
4763 break;
4764 else
4765 *copyto++ = *name;
4768 /* Terminate copied string. */
4769 *copyto = '\0';
4770 return name;
4773 /* Look at the argument of --regex or --no-regex and do the right
4774 thing. Same for each line of a regexp file. */
4775 static void
4776 analyse_regex (regex_arg, ignore_case)
4777 char *regex_arg;
4778 bool ignore_case;
4780 if (regex_arg == NULL)
4781 free_patterns (); /* --no-regex: remove existing regexps */
4783 /* A real --regexp option or a line in a regexp file. */
4784 switch (regex_arg[0])
4786 /* Comments in regexp file or null arg to --regex. */
4787 case '\0':
4788 case ' ':
4789 case '\t':
4790 break;
4792 /* Read a regex file. This is recursive and may result in a
4793 loop, which will stop when the file descriptors are exhausted. */
4794 case '@':
4796 FILE *regexfp;
4797 linebuffer regexbuf;
4798 char *regexfile = regex_arg + 1;
4800 /* regexfile is a file containing regexps, one per line. */
4801 regexfp = fopen (regexfile, "r");
4802 if (regexfp == NULL)
4804 pfatal (regexfile);
4805 return;
4807 initbuffer (&regexbuf);
4808 while (readline_internal (&regexbuf, regexfp) > 0)
4809 analyse_regex (regexbuf.buffer, ignore_case);
4810 free (regexbuf.buffer);
4811 fclose (regexfp);
4813 break;
4815 /* Regexp to be used for a specific language only. */
4816 case '{':
4818 language *lang;
4819 char *lang_name = regex_arg + 1;
4820 char *cp;
4822 for (cp = lang_name; *cp != '}'; cp++)
4823 if (*cp == '\0')
4825 error ("unterminated language name in regex: %s", regex_arg);
4826 return;
4828 *cp = '\0';
4829 lang = get_language_from_name (lang_name);
4830 if (lang == NULL)
4831 return;
4832 add_regex (cp + 1, ignore_case, lang);
4834 break;
4836 /* Regexp to be used for any language. */
4837 default:
4838 add_regex (regex_arg, ignore_case, NULL);
4839 break;
4843 /* Turn a name, which is an ed-style (but Emacs syntax) regular
4844 expression, into a real regular expression by compiling it. */
4845 static void
4846 add_regex (regexp_pattern, ignore_case, lang)
4847 char *regexp_pattern;
4848 bool ignore_case;
4849 language *lang;
4851 char *name;
4852 const char *err;
4853 struct re_pattern_buffer *patbuf;
4854 pattern *pp;
4857 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
4859 error ("%s: unterminated regexp", regexp_pattern);
4860 return;
4862 name = scan_separators (regexp_pattern);
4863 if (regexp_pattern[0] == '\0')
4865 error ("null regexp", (char *)NULL);
4866 return;
4868 (void) scan_separators (name);
4870 patbuf = xnew (1, struct re_pattern_buffer);
4871 /* Translation table to fold case if appropriate. */
4872 patbuf->translate = (ignore_case) ? lc_trans : NULL;
4873 patbuf->fastmap = NULL;
4874 patbuf->buffer = NULL;
4875 patbuf->allocated = 0;
4877 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
4878 if (err != NULL)
4880 error ("%s while compiling pattern", err);
4881 return;
4884 pp = p_head;
4885 p_head = xnew (1, pattern);
4886 p_head->regex = savestr (regexp_pattern);
4887 p_head->p_next = pp;
4888 p_head->language = lang;
4889 p_head->pattern = patbuf;
4890 p_head->name_pattern = savestr (name);
4891 p_head->error_signaled = FALSE;
4895 * Do the substitutions indicated by the regular expression and
4896 * arguments.
4898 static char *
4899 substitute (in, out, regs)
4900 char *in, *out;
4901 struct re_registers *regs;
4903 char *result, *t;
4904 int size, dig, diglen;
4906 result = NULL;
4907 size = strlen (out);
4909 /* Pass 1: figure out how much to allocate by finding all \N strings. */
4910 if (out[size - 1] == '\\')
4911 fatal ("pattern error in \"%s\"", out);
4912 for (t = etags_strchr (out, '\\');
4913 t != NULL;
4914 t = etags_strchr (t + 2, '\\'))
4915 if (isdigit (t[1]))
4917 dig = t[1] - '0';
4918 diglen = regs->end[dig] - regs->start[dig];
4919 size += diglen - 2;
4921 else
4922 size -= 1;
4924 /* Allocate space and do the substitutions. */
4925 result = xnew (size + 1, char);
4927 for (t = result; *out != '\0'; out++)
4928 if (*out == '\\' && isdigit (*++out))
4930 /* Using "dig2" satisfies my debugger. Bleah. */
4931 dig = *out - '0';
4932 diglen = regs->end[dig] - regs->start[dig];
4933 strncpy (t, in + regs->start[dig], diglen);
4934 t += diglen;
4936 else
4937 *t++ = *out;
4938 *t = '\0';
4940 if (DEBUG && (t > result + size || t - result != (int)strlen (result)))
4941 abort ();
4943 return result;
4946 /* Deallocate all patterns. */
4947 static void
4948 free_patterns ()
4950 pattern *pp;
4951 while (p_head != NULL)
4953 pp = p_head->p_next;
4954 free (p_head->regex);
4955 free (p_head->name_pattern);
4956 free (p_head);
4957 p_head = pp;
4959 return;
4962 static void
4963 get_tag (bp)
4964 register char *bp;
4966 register char *cp;
4968 if (*bp == '\0')
4969 return;
4970 /* Go till you get to white space or a syntactic break */
4971 for (cp = bp + 1;
4972 *cp != '\0' && *cp != '(' && *cp != ')' && !iswhite (*cp);
4973 cp++)
4974 continue;
4975 pfnote (savenstr (bp, cp-bp), TRUE,
4976 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4979 #endif /* ETAGS_REGEXPS */
4980 /* Initialize a linebuffer for use */
4981 static void
4982 initbuffer (lbp)
4983 linebuffer *lbp;
4985 lbp->size = 200;
4986 lbp->buffer = xnew (200, char);
4990 * Read a line of text from `stream' into `lbp', excluding the
4991 * newline or CR-NL, if any. Return the number of characters read from
4992 * `stream', which is the length of the line including the newline.
4994 * On DOS or Windows we do not count the CR character, if any, before the
4995 * NL, in the returned length; this mirrors the behavior of emacs on those
4996 * platforms (for text files, it translates CR-NL to NL as it reads in the
4997 * file).
4999 static long
5000 readline_internal (lbp, stream)
5001 linebuffer *lbp;
5002 register FILE *stream;
5004 char *buffer = lbp->buffer;
5005 register char *p = lbp->buffer;
5006 register char *pend;
5007 int chars_deleted;
5009 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5011 while (1)
5013 register int c = getc (stream);
5014 if (p == pend)
5016 /* We're at the end of linebuffer: expand it. */
5017 lbp->size *= 2;
5018 buffer = xrnew (buffer, lbp->size, char);
5019 p += buffer - lbp->buffer;
5020 pend = buffer + lbp->size;
5021 lbp->buffer = buffer;
5023 if (c == EOF)
5025 *p = '\0';
5026 chars_deleted = 0;
5027 break;
5029 if (c == '\n')
5031 if (p > buffer && p[-1] == '\r')
5033 p -= 1;
5034 #ifdef DOS_NT
5035 /* Assume CRLF->LF translation will be performed by Emacs
5036 when loading this file, so CRs won't appear in the buffer.
5037 It would be cleaner to compensate within Emacs;
5038 however, Emacs does not know how many CRs were deleted
5039 before any given point in the file. */
5040 chars_deleted = 1;
5041 #else
5042 chars_deleted = 2;
5043 #endif
5045 else
5047 chars_deleted = 1;
5049 *p = '\0';
5050 break;
5052 *p++ = c;
5054 lbp->len = p - buffer;
5056 return lbp->len + chars_deleted;
5060 * Like readline_internal, above, but in addition try to match the
5061 * input line against relevant regular expressions.
5063 static long
5064 readline (lbp, stream)
5065 linebuffer *lbp;
5066 FILE *stream;
5068 /* Read new line. */
5069 long result = readline_internal (lbp, stream);
5070 #ifdef ETAGS_REGEXPS
5071 int match;
5072 pattern *pp;
5074 /* Match against relevant patterns. */
5075 if (lbp->len > 0)
5076 for (pp = p_head; pp != NULL; pp = pp->p_next)
5078 /* Only use generic regexps or those for the current language. */
5079 if (pp->language != NULL && pp->language != curlang)
5080 continue;
5082 match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs);
5083 switch (match)
5085 case -2:
5086 /* Some error. */
5087 if (!pp->error_signaled)
5089 error ("error while matching \"%s\"", pp->regex);
5090 pp->error_signaled = TRUE;
5092 break;
5093 case -1:
5094 /* No match. */
5095 break;
5096 default:
5097 /* Match occurred. Construct a tag. */
5098 if (pp->name_pattern[0] != '\0')
5100 /* Make a named tag. */
5101 char *name = substitute (lbp->buffer,
5102 pp->name_pattern, &pp->regs);
5103 if (name != NULL)
5104 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5106 else
5108 /* Make an unnamed tag. */
5109 pfnote ((char *)NULL, TRUE,
5110 lbp->buffer, match, lineno, linecharno);
5112 break;
5115 #endif /* ETAGS_REGEXPS */
5117 return result;
5121 * Return a pointer to a space of size strlen(cp)+1 allocated
5122 * with xnew where the string CP has been copied.
5124 static char *
5125 savestr (cp)
5126 char *cp;
5128 return savenstr (cp, strlen (cp));
5132 * Return a pointer to a space of size LEN+1 allocated with xnew where
5133 * the string CP has been copied for at most the first LEN characters.
5135 static char *
5136 savenstr (cp, len)
5137 char *cp;
5138 int len;
5140 register char *dp;
5142 dp = xnew (len + 1, char);
5143 strncpy (dp, cp, len);
5144 dp[len] = '\0';
5145 return dp;
5149 * Return the ptr in sp at which the character c last
5150 * appears; NULL if not found
5152 * Identical to POSIX strrchr, included for portability.
5154 static char *
5155 etags_strrchr (sp, c)
5156 register const char *sp;
5157 register int c;
5159 register const char *r;
5161 r = NULL;
5164 if (*sp == c)
5165 r = sp;
5166 } while (*sp++);
5167 return (char *)r;
5172 * Return the ptr in sp at which the character c first
5173 * appears; NULL if not found
5175 * Identical to POSIX strchr, included for portability.
5177 static char *
5178 etags_strchr (sp, c)
5179 register const char *sp;
5180 register int c;
5184 if (*sp == c)
5185 return (char *)sp;
5186 } while (*sp++);
5187 return NULL;
5190 /* Skip spaces, return new pointer. */
5191 static char *
5192 skip_spaces (cp)
5193 char *cp;
5195 while (iswhite (*cp))
5196 cp++;
5197 return cp;
5200 /* Skip non spaces, return new pointer. */
5201 static char *
5202 skip_non_spaces (cp)
5203 char *cp;
5205 while (*cp != '\0' && !iswhite (*cp))
5206 cp++;
5207 return cp;
5210 /* Print error message and exit. */
5211 static void
5212 fatal (s1, s2)
5213 char *s1, *s2;
5215 error (s1, s2);
5216 exit (BAD);
5219 static void
5220 pfatal (s1)
5221 char *s1;
5223 perror (s1);
5224 exit (BAD);
5227 static void
5228 suggest_asking_for_help ()
5230 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5231 progname,
5232 #ifdef LONG_OPTIONS
5233 "--help"
5234 #else
5235 "-h"
5236 #endif
5238 exit (BAD);
5241 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
5242 static void
5243 error (s1, s2)
5244 const char *s1, *s2;
5246 fprintf (stderr, "%s: ", progname);
5247 fprintf (stderr, s1, s2);
5248 fprintf (stderr, "\n");
5251 /* Return a newly-allocated string whose contents
5252 concatenate those of s1, s2, s3. */
5253 static char *
5254 concat (s1, s2, s3)
5255 char *s1, *s2, *s3;
5257 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5258 char *result = xnew (len1 + len2 + len3 + 1, char);
5260 strcpy (result, s1);
5261 strcpy (result + len1, s2);
5262 strcpy (result + len1 + len2, s3);
5263 result[len1 + len2 + len3] = '\0';
5265 return result;
5268 /* Does the same work as the system V getcwd, but does not need to
5269 guess the buffer size in advance. */
5270 static char *
5271 etags_getcwd ()
5273 #ifdef HAVE_GETCWD
5274 int bufsize = 200;
5275 char *path = xnew (bufsize, char);
5277 while (getcwd (path, bufsize) == NULL)
5279 if (errno != ERANGE)
5280 pfatal ("getcwd");
5281 bufsize *= 2;
5282 free (path);
5283 path = xnew (bufsize, char);
5286 canonicalize_filename (path);
5287 return path;
5289 #else /* not HAVE_GETCWD */
5290 #ifdef MSDOS
5291 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
5293 getwd (path);
5295 for (p = path; *p != '\0'; p++)
5296 if (*p == '\\')
5297 *p = '/';
5298 else
5299 *p = lowcase (*p);
5301 return strdup (path);
5302 #else /* not MSDOS */
5303 linebuffer path;
5304 FILE *pipe;
5306 initbuffer (&path);
5307 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5308 if (pipe == NULL || readline_internal (&path, pipe) == 0)
5309 pfatal ("pwd");
5310 pclose (pipe);
5312 return path.buffer;
5313 #endif /* not MSDOS */
5314 #endif /* not HAVE_GETCWD */
5317 /* Return a newly allocated string containing the file name of FILE
5318 relative to the absolute directory DIR (which should end with a slash). */
5319 static char *
5320 relative_filename (file, dir)
5321 char *file, *dir;
5323 char *fp, *dp, *afn, *res;
5324 int i;
5326 /* Find the common root of file and dir (with a trailing slash). */
5327 afn = absolute_filename (file, cwd);
5328 fp = afn;
5329 dp = dir;
5330 while (*fp++ == *dp++)
5331 continue;
5332 fp--, dp--; /* back to the first differing char */
5333 #ifdef DOS_NT
5334 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5335 return afn;
5336 #endif
5337 do /* look at the equal chars until '/' */
5338 fp--, dp--;
5339 while (*fp != '/');
5341 /* Build a sequence of "../" strings for the resulting relative file name. */
5342 i = 0;
5343 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5344 i += 1;
5345 res = xnew (3*i + strlen (fp + 1) + 1, char);
5346 res[0] = '\0';
5347 while (i-- > 0)
5348 strcat (res, "../");
5350 /* Add the file name relative to the common root of file and dir. */
5351 strcat (res, fp + 1);
5352 free (afn);
5354 return res;
5357 /* Return a newly allocated string containing the absolute file name
5358 of FILE given DIR (which should end with a slash). */
5359 static char *
5360 absolute_filename (file, dir)
5361 char *file, *dir;
5363 char *slashp, *cp, *res;
5365 if (filename_is_absolute (file))
5366 res = savestr (file);
5367 #ifdef DOS_NT
5368 /* We don't support non-absolute file names with a drive
5369 letter, like `d:NAME' (it's too much hassle). */
5370 else if (file[1] == ':')
5371 fatal ("%s: relative file names with drive letters not supported", file);
5372 #endif
5373 else
5374 res = concat (dir, file, "");
5376 /* Delete the "/dirname/.." and "/." substrings. */
5377 slashp = etags_strchr (res, '/');
5378 while (slashp != NULL && slashp[0] != '\0')
5380 if (slashp[1] == '.')
5382 if (slashp[2] == '.'
5383 && (slashp[3] == '/' || slashp[3] == '\0'))
5385 cp = slashp;
5387 cp--;
5388 while (cp >= res && !filename_is_absolute (cp));
5389 if (cp < res)
5390 cp = slashp; /* the absolute name begins with "/.." */
5391 #ifdef DOS_NT
5392 /* Under MSDOS and NT we get `d:/NAME' as absolute
5393 file name, so the luser could say `d:/../NAME'.
5394 We silently treat this as `d:/NAME'. */
5395 else if (cp[0] != '/')
5396 cp = slashp;
5397 #endif
5398 strcpy (cp, slashp + 3);
5399 slashp = cp;
5400 continue;
5402 else if (slashp[2] == '/' || slashp[2] == '\0')
5404 strcpy (slashp, slashp + 2);
5405 continue;
5409 slashp = etags_strchr (slashp + 1, '/');
5412 if (res[0] == '\0')
5413 return savestr ("/");
5414 else
5415 return res;
5418 /* Return a newly allocated string containing the absolute
5419 file name of dir where FILE resides given DIR (which should
5420 end with a slash). */
5421 static char *
5422 absolute_dirname (file, dir)
5423 char *file, *dir;
5425 char *slashp, *res;
5426 char save;
5428 canonicalize_filename (file);
5429 slashp = etags_strrchr (file, '/');
5430 if (slashp == NULL)
5431 return savestr (dir);
5432 save = slashp[1];
5433 slashp[1] = '\0';
5434 res = absolute_filename (file, dir);
5435 slashp[1] = save;
5437 return res;
5440 /* Whether the argument string is an absolute file name. The argument
5441 string must have been canonicalized with canonicalize_filename. */
5442 static bool
5443 filename_is_absolute (fn)
5444 char *fn;
5446 return (fn[0] == '/'
5447 #ifdef DOS_NT
5448 || (isalpha(fn[0]) && fn[1] == ':' && fn[2] == '/')
5449 #endif
5453 /* Translate backslashes into slashes. Works in place. */
5454 static void
5455 canonicalize_filename (fn)
5456 register char *fn;
5458 #ifdef DOS_NT
5459 /* Canonicalize drive letter case. */
5460 if (islower (fn[0]))
5461 fn[0] = toupper (fn[0]);
5462 /* Convert backslashes to slashes. */
5463 for (; *fn != '\0'; fn++)
5464 if (*fn == '\\')
5465 *fn = '/';
5466 #else
5467 /* No action. */
5468 fn = NULL; /* shut up the compiler */
5469 #endif
5472 /* Increase the size of a linebuffer. */
5473 static void
5474 grow_linebuffer (lbp, toksize)
5475 linebuffer *lbp;
5476 int toksize;
5478 while (lbp->size < toksize)
5479 lbp->size *= 2;
5480 lbp->buffer = xrnew (lbp->buffer, lbp->size, char);
5483 /* Like malloc but get fatal error if memory is exhausted. */
5484 static long *
5485 xmalloc (size)
5486 unsigned int size;
5488 long *result = (long *) malloc (size);
5489 if (result == NULL)
5490 fatal ("virtual memory exhausted", (char *)NULL);
5491 return result;
5494 static long *
5495 xrealloc (ptr, size)
5496 char *ptr;
5497 unsigned int size;
5499 long *result = (long *) realloc (ptr, size);
5500 if (result == NULL)
5501 fatal ("virtual memory exhausted", (char *)NULL);
5502 return result;