(uninstall): Ignore exit code of `rm'.
[emacs.git] / lib-src / etags.c
blob7ae810d96fc0443b562def274b45a2dfcf8fbb9e
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99, 2000, 2001
3 Free Software Foundation, Inc. and Ken Arnold
5 This file is not considered part of GNU Emacs.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1993 Francesco Potortì reorganised C and C++ based on work by Joe Wells.
29 * 1994 Regexp tags by Tom Tromey.
30 * 2001 Nested classes by Francesco Potortì based on work by Mykola Dzyuba.
32 * Francesco Potortì <pot@gnu.org> has maintained it since 1993.
35 char pot_etags_version[] = "@(#) pot revision number is 14.15";
37 #define TRUE 1
38 #define FALSE 0
40 #ifdef DEBUG
41 # undef DEBUG
42 # define DEBUG TRUE
43 #else
44 # define DEBUG FALSE
45 # define NDEBUG /* disable assert */
46 #endif
48 #if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
49 # define P_(proto) proto
50 #else
51 # define P_(proto) ()
52 #endif
54 #ifdef HAVE_CONFIG_H
55 # include <config.h>
56 /* On some systems, Emacs defines static as nothing for the sake
57 of unexec. We don't want that here since we don't use unexec. */
58 # undef static
59 # define ETAGS_REGEXPS /* use the regexp features */
60 # define LONG_OPTIONS /* accept long options */
61 #endif /* HAVE_CONFIG_H */
63 #ifndef _GNU_SOURCE
64 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
65 #endif
67 /* WIN32_NATIVE is for Xemacs.
68 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
69 #ifdef WIN32_NATIVE
70 # undef MSDOS
71 # undef WINDOWSNT
72 # define WINDOWSNT
73 #endif /* WIN32_NATIVE */
75 #ifdef MSDOS
76 # undef MSDOS
77 # define MSDOS TRUE
78 # include <fcntl.h>
79 # include <sys/param.h>
80 # include <io.h>
81 # ifndef HAVE_CONFIG_H
82 # define DOS_NT
83 # include <sys/config.h>
84 # endif
85 #else
86 # define MSDOS FALSE
87 #endif /* MSDOS */
89 #ifdef WINDOWSNT
90 # include <stdlib.h>
91 # include <fcntl.h>
92 # include <string.h>
93 # include <direct.h>
94 # include <io.h>
95 # define MAXPATHLEN _MAX_PATH
96 # undef HAVE_NTGUI
97 # undef DOS_NT
98 # define DOS_NT
99 # ifndef HAVE_GETCWD
100 # define HAVE_GETCWD
101 # endif /* undef HAVE_GETCWD */
102 #else /* !WINDOWSNT */
103 # ifdef STDC_HEADERS
104 # include <stdlib.h>
105 # include <string.h>
106 # else
107 extern char *getenv ();
108 # endif
109 #endif /* !WINDOWSNT */
111 #ifdef HAVE_UNISTD_H
112 # include <unistd.h>
113 #else
114 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
115 extern char *getcwd (char *buf, size_t size);
116 # endif
117 #endif /* HAVE_UNISTD_H */
119 #include <stdio.h>
120 #include <ctype.h>
121 #include <errno.h>
122 #ifndef errno
123 extern int errno;
124 #endif
125 #include <sys/types.h>
126 #include <sys/stat.h>
128 #include <assert.h>
129 #ifdef NDEBUG
130 # undef assert /* some systems have a buggy assert.h */
131 # define assert(x) ((void) 0)
132 #endif
134 #if !defined (S_ISREG) && defined (S_IFREG)
135 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
136 #endif
138 #ifdef LONG_OPTIONS
139 # include <getopt.h>
140 #else
141 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
142 extern char *optarg;
143 extern int optind, opterr;
144 #endif /* LONG_OPTIONS */
146 #ifdef ETAGS_REGEXPS
147 # include <regex.h>
148 #endif /* ETAGS_REGEXPS */
150 /* Define CTAGS to make the program "ctags" compatible with the usual one.
151 Leave it undefined to make the program "etags", which makes emacs-style
152 tag tables and tags typedefs, #defines and struct/union/enum by default. */
153 #ifdef CTAGS
154 # undef CTAGS
155 # define CTAGS TRUE
156 #else
157 # define CTAGS FALSE
158 #endif
160 /* Exit codes for success and failure. */
161 #ifdef VMS
162 # define GOOD 1
163 # define BAD 0
164 #else
165 # define GOOD 0
166 # define BAD 1
167 #endif
169 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
170 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
172 #define CHARS 256 /* 2^sizeof(char) */
173 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
174 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
175 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
176 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
177 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
178 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
180 #define ISALNUM(c) isalnum (CHAR(c))
181 #define ISALPHA(c) isalpha (CHAR(c))
182 #define ISDIGIT(c) isdigit (CHAR(c))
183 #define ISLOWER(c) islower (CHAR(c))
185 #define lowcase(c) tolower (CHAR(c))
186 #define upcase(c) toupper (CHAR(c))
190 * xnew, xrnew -- allocate, reallocate storage
192 * SYNOPSIS: Type *xnew (int n, Type);
193 * void xrnew (OldPointer, int n, Type);
195 #if DEBUG
196 # include "chkmalloc.h"
197 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
198 (n) * sizeof (Type)))
199 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
200 (char *) (op), (n) * sizeof (Type)))
201 #else
202 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
203 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
204 (char *) (op), (n) * sizeof (Type)))
205 #endif
207 typedef int bool;
209 typedef void Lang_function P_((FILE *));
211 typedef struct
213 char *suffix;
214 char *command; /* Takes one arg and decompresses to stdout */
215 } compressor;
217 typedef struct
219 char *name;
220 Lang_function *function;
221 char **filenames;
222 char **suffixes;
223 char **interpreters;
224 } language;
226 typedef struct node_st
227 { /* sorting structure */
228 char *name; /* function or type name */
229 char *file; /* file name */
230 bool is_func; /* use pattern or line no */
231 bool been_warned; /* set if noticed dup */
232 int lno; /* line number tag is on */
233 long cno; /* character number line starts on */
234 char *pat; /* search pattern */
235 struct node_st *left, *right; /* left and right sons */
236 } node;
239 * A `linebuffer' is a structure which holds a line of text.
240 * `readline_internal' reads a line from a stream into a linebuffer
241 * and works regardless of the length of the line.
242 * SIZE is the size of BUFFER, LEN is the length of the string in
243 * BUFFER after readline reads it.
245 typedef struct
247 long size;
248 int len;
249 char *buffer;
250 } linebuffer;
252 /* Many compilers barf on this:
253 Lang_function Ada_funcs;
254 so let's write it this way */
255 static void Ada_funcs P_((FILE *));
256 static void Asm_labels P_((FILE *));
257 static void C_entries P_((int c_ext, FILE *));
258 static void default_C_entries P_((FILE *));
259 static void plain_C_entries P_((FILE *));
260 static void Cjava_entries P_((FILE *));
261 static void Cobol_paragraphs P_((FILE *));
262 static void Cplusplus_entries P_((FILE *));
263 static void Cstar_entries P_((FILE *));
264 static void Erlang_functions P_((FILE *));
265 static void Fortran_functions P_((FILE *));
266 static void Yacc_entries P_((FILE *));
267 static void Lisp_functions P_((FILE *));
268 static void Makefile_targets P_((FILE *));
269 static void Pascal_functions P_((FILE *));
270 static void Perl_functions P_((FILE *));
271 static void Postscript_functions P_((FILE *));
272 static void Prolog_functions P_((FILE *));
273 static void Python_functions P_((FILE *));
274 static void Scheme_functions P_((FILE *));
275 static void TeX_commands P_((FILE *));
276 static void Texinfo_nodes P_((FILE *));
277 static void just_read_file P_((FILE *));
279 static void print_language_names P_((void));
280 static void print_version P_((void));
281 static void print_help P_((void));
282 int main P_((int, char **));
283 static int number_len P_((long));
285 static compressor *get_compressor_from_suffix P_((char *, char **));
286 static language *get_language_from_langname P_((char *));
287 static language *get_language_from_interpreter P_((char *));
288 static language *get_language_from_filename P_((char *));
289 static int total_size_of_entries P_((node *));
290 static long readline P_((linebuffer *, FILE *));
291 static long readline_internal P_((linebuffer *, FILE *));
292 static void get_tag P_((char *));
294 #ifdef ETAGS_REGEXPS
295 static void analyse_regex P_((char *, bool));
296 static void add_regex P_((char *, bool, language *));
297 static void free_patterns P_((void));
298 #endif /* ETAGS_REGEXPS */
299 static void error P_((const char *, const char *));
300 static void suggest_asking_for_help P_((void));
301 void fatal P_((char *, char *));
302 static void pfatal P_((char *));
303 static void add_node P_((node *, node **));
305 static void init P_((void));
306 static void initbuffer P_((linebuffer *));
307 static void find_entries P_((char *, FILE *));
308 static void free_tree P_((node *));
309 static void pfnote P_((char *, bool, char *, int, int, long));
310 static void new_pfnote P_((char *, int, bool, char *, int, int, long));
311 static void process_file P_((char *));
312 static void put_entries P_((node *));
313 static void takeprec P_((void));
315 static char *concat P_((char *, char *, char *));
316 static char *skip_spaces P_((char *));
317 static char *skip_non_spaces P_((char *));
318 static char *savenstr P_((char *, int));
319 static char *savestr P_((char *));
320 static char *etags_strchr P_((const char *, int));
321 static char *etags_strrchr P_((const char *, int));
322 static char *etags_getcwd P_((void));
323 static char *relative_filename P_((char *, char *));
324 static char *absolute_filename P_((char *, char *));
325 static char *absolute_dirname P_((char *, char *));
326 static bool filename_is_absolute P_((char *f));
327 static void canonicalize_filename P_((char *));
328 static void linebuffer_setlen P_((linebuffer *, int));
329 long *xmalloc P_((unsigned int));
330 long *xrealloc P_((char *, unsigned int));
333 char searchar = '/'; /* use /.../ searches */
335 char *tagfile; /* output file */
336 char *progname; /* name this program was invoked with */
337 char *cwd; /* current working directory */
338 char *tagfiledir; /* directory of tagfile */
339 FILE *tagf; /* ioptr for tags file */
341 char *curfile; /* current input file name */
342 language *curlang; /* current language */
344 int lineno; /* line number of current line */
345 long charno; /* current character number */
346 long linecharno; /* charno of start of current line */
347 char *dbp; /* pointer to start of current tag */
349 node *head; /* the head of the binary tree of tags */
351 linebuffer lb; /* the current line */
353 /* boolean "functions" (see init) */
354 bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
355 char
356 /* white chars */
357 *white = " \f\t\n\r\v",
358 /* not in a name */
359 *nonam = " \f\t\n\r(=,[;",
360 /* token ending chars */
361 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
362 /* token starting chars */
363 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
364 /* valid in-token chars */
365 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
367 bool append_to_tagfile; /* -a: append to tags */
368 /* The following four default to TRUE for etags, but to FALSE for ctags. */
369 bool typedefs; /* -t: create tags for C and Ada typedefs */
370 bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
371 /* 0 struct/enum/union decls, and C++ */
372 /* member functions. */
373 bool constantypedefs; /* -d: create tags for C #define, enum */
374 /* constants and variables. */
375 /* -D: opposite of -d. Default under ctags. */
376 bool declarations; /* --declarations: tag them and extern in C&Co*/
377 bool globals; /* create tags for global variables */
378 bool members; /* create tags for C member variables */
379 bool update; /* -u: update tags */
380 bool vgrind_style; /* -v: create vgrind style index output */
381 bool no_warnings; /* -w: suppress warnings */
382 bool cxref_style; /* -x: create cxref style output */
383 bool cplusplus; /* .[hc] means C++, not C */
384 bool noindentypedefs; /* -I: ignore indentation in C */
385 bool packages_only; /* --packages-only: in Ada, only tag packages*/
387 #ifdef LONG_OPTIONS
388 struct option longopts[] =
390 { "packages-only", no_argument, &packages_only, TRUE },
391 { "append", no_argument, NULL, 'a' },
392 { "backward-search", no_argument, NULL, 'B' },
393 { "c++", no_argument, NULL, 'C' },
394 { "cxref", no_argument, NULL, 'x' },
395 { "defines", no_argument, NULL, 'd' },
396 { "declarations", no_argument, &declarations, TRUE },
397 { "no-defines", no_argument, NULL, 'D' },
398 { "globals", no_argument, &globals, TRUE },
399 { "no-globals", no_argument, &globals, FALSE },
400 { "help", no_argument, NULL, 'h' },
401 { "help", no_argument, NULL, 'H' },
402 { "ignore-indentation", no_argument, NULL, 'I' },
403 { "include", required_argument, NULL, 'i' },
404 { "language", required_argument, NULL, 'l' },
405 { "members", no_argument, &members, TRUE },
406 { "no-members", no_argument, &members, FALSE },
407 { "no-warn", no_argument, NULL, 'w' },
408 { "output", required_argument, NULL, 'o' },
409 #ifdef ETAGS_REGEXPS
410 { "regex", required_argument, NULL, 'r' },
411 { "no-regex", no_argument, NULL, 'R' },
412 { "ignore-case-regex", required_argument, NULL, 'c' },
413 #endif /* ETAGS_REGEXPS */
414 { "typedefs", no_argument, NULL, 't' },
415 { "typedefs-and-c++", no_argument, NULL, 'T' },
416 { "update", no_argument, NULL, 'u' },
417 { "version", no_argument, NULL, 'V' },
418 { "vgrind", no_argument, NULL, 'v' },
419 { NULL }
421 #endif /* LONG_OPTIONS */
423 #ifdef ETAGS_REGEXPS
424 /* Structure defining a regular expression. Elements are
425 the compiled pattern, and the name string. */
426 typedef struct pattern
428 struct pattern *p_next;
429 language *language;
430 char *regex;
431 struct re_pattern_buffer *pattern;
432 struct re_registers regs;
433 char *name_pattern;
434 bool error_signaled;
435 } pattern;
437 /* List of all regexps. */
438 pattern *p_head = NULL;
440 /* How many characters in the character set. (From regex.c.) */
441 #define CHAR_SET_SIZE 256
442 /* Translation table for case-insensitive matching. */
443 char lc_trans[CHAR_SET_SIZE];
444 #endif /* ETAGS_REGEXPS */
446 compressor compressors[] =
448 { "z", "gzip -d -c"},
449 { "Z", "gzip -d -c"},
450 { "gz", "gzip -d -c"},
451 { "GZ", "gzip -d -c"},
452 { "bz2", "bzip2 -d -c" },
453 { NULL }
457 * Language stuff.
460 /* Non-NULL if language fixed. */
461 language *forced_lang = NULL;
463 /* Ada code */
464 char *Ada_suffixes [] =
465 { "ads", "adb", "ada", NULL };
467 /* Assembly code */
468 char *Asm_suffixes [] = { "a", /* Unix assembler */
469 "asm", /* Microcontroller assembly */
470 "def", /* BSO/Tasking definition includes */
471 "inc", /* Microcontroller include files */
472 "ins", /* Microcontroller include files */
473 "s", "sa", /* Unix assembler */
474 "S", /* cpp-processed Unix assembler */
475 "src", /* BSO/Tasking C compiler output */
476 NULL
479 /* Note that .c and .h can be considered C++, if the --c++ flag was
480 given, or if the `class' keyowrd is met inside the file.
481 That is why default_C_entries is called for these. */
482 char *default_C_suffixes [] =
483 { "c", "h", NULL };
485 char *Cplusplus_suffixes [] =
486 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
487 "M", /* Objective C++ */
488 "pdb", /* Postscript with C syntax */
489 NULL };
491 char *Cjava_suffixes [] =
492 { "java", NULL };
494 char *Cobol_suffixes [] =
495 { "COB", "cob", NULL };
497 char *Cstar_suffixes [] =
498 { "cs", "hs", NULL };
500 char *Erlang_suffixes [] =
501 { "erl", "hrl", NULL };
503 char *Fortran_suffixes [] =
504 { "F", "f", "f90", "for", NULL };
506 char *Lisp_suffixes [] =
507 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
509 char *Makefile_filenames [] =
510 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
512 char *Pascal_suffixes [] =
513 { "p", "pas", NULL };
515 char *Perl_suffixes [] =
516 { "pl", "pm", NULL };
517 char *Perl_interpreters [] =
518 { "perl", "@PERL@", NULL };
520 char *plain_C_suffixes [] =
521 { "lm", /* Objective lex file */
522 "m", /* Objective C file */
523 "pc", /* Pro*C file */
524 NULL };
526 char *Postscript_suffixes [] =
527 { "ps", "psw", NULL }; /* .psw is for PSWrap */
529 char *Prolog_suffixes [] =
530 { "prolog", NULL };
532 char *Python_suffixes [] =
533 { "py", NULL };
535 /* Can't do the `SCM' or `scm' prefix with a version number. */
536 char *Scheme_suffixes [] =
537 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
539 char *TeX_suffixes [] =
540 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
542 char *Texinfo_suffixes [] =
543 { "texi", "texinfo", "txi", NULL };
545 char *Yacc_suffixes [] =
546 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
549 * Table of languages.
551 * It is ok for a given function to be listed under more than one
552 * name. I just didn't.
555 language lang_names [] =
557 { "ada", Ada_funcs, NULL, Ada_suffixes, NULL },
558 { "asm", Asm_labels, NULL, Asm_suffixes, NULL },
559 { "c", default_C_entries, NULL, default_C_suffixes, NULL },
560 { "c++", Cplusplus_entries, NULL, Cplusplus_suffixes, NULL },
561 { "c*", Cstar_entries, NULL, Cstar_suffixes, NULL },
562 { "cobol", Cobol_paragraphs, NULL, Cobol_suffixes, NULL },
563 { "erlang", Erlang_functions, NULL, Erlang_suffixes, NULL },
564 { "fortran", Fortran_functions, NULL, Fortran_suffixes, NULL },
565 { "java", Cjava_entries, NULL, Cjava_suffixes, NULL },
566 { "lisp", Lisp_functions, NULL, Lisp_suffixes, NULL },
567 { "makefile", Makefile_targets, Makefile_filenames, NULL, NULL },
568 { "pascal", Pascal_functions, NULL, Pascal_suffixes, NULL },
569 { "perl", Perl_functions, NULL, Perl_suffixes, Perl_interpreters },
570 { "postscript", Postscript_functions, NULL, Postscript_suffixes, NULL },
571 { "proc", plain_C_entries, NULL, plain_C_suffixes, NULL },
572 { "prolog", Prolog_functions, NULL, Prolog_suffixes, NULL },
573 { "python", Python_functions, NULL, Python_suffixes, NULL },
574 { "scheme", Scheme_functions, NULL, Scheme_suffixes, NULL },
575 { "tex", TeX_commands, NULL, TeX_suffixes, NULL },
576 { "texinfo", Texinfo_nodes, NULL, Texinfo_suffixes, NULL },
577 { "yacc", Yacc_entries, NULL, Yacc_suffixes, NULL },
578 { "auto", NULL }, /* default guessing scheme */
579 { "none", just_read_file }, /* regexp matching only */
580 { NULL, NULL } /* end of list */
584 static void
585 print_language_names ()
587 language *lang;
588 char **name, **ext;
590 puts ("\nThese are the currently supported languages, along with the\n\
591 default file names and dot suffixes:");
592 for (lang = lang_names; lang->name != NULL; lang++)
594 printf (" %-*s", 10, lang->name);
595 if (lang->filenames != NULL)
596 for (name = lang->filenames; *name != NULL; name++)
597 printf (" %s", *name);
598 if (lang->suffixes != NULL)
599 for (ext = lang->suffixes; *ext != NULL; ext++)
600 printf (" .%s", *ext);
601 puts ("");
603 puts ("Where `auto' means use default language for files based on file\n\
604 name suffix, and `none' means only do regexp processing on files.\n\
605 If no language is specified and no matching suffix is found,\n\
606 the first line of the file is read for a sharp-bang (#!) sequence\n\
607 followed by the name of an interpreter. If no such sequence is found,\n\
608 Fortran is tried first; if no tags are found, C is tried next.\n\
609 When parsing any C file, a \"class\" keyword switches to C++.\n\
610 Compressed files are supported using gzip and bzip2.");
613 #ifndef EMACS_NAME
614 # define EMACS_NAME "GNU Emacs"
615 #endif
616 #ifndef VERSION
617 # define VERSION "21"
618 #endif
619 static void
620 print_version ()
622 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
623 puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
624 puts ("This program is distributed under the same terms as Emacs");
626 exit (GOOD);
629 static void
630 print_help ()
632 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
634 These are the options accepted by %s.\n", progname, progname);
635 #ifdef LONG_OPTIONS
636 puts ("You may use unambiguous abbreviations for the long option names.");
637 #else
638 puts ("Long option names do not work with this executable, as it is not\n\
639 linked with GNU getopt.");
640 #endif /* LONG_OPTIONS */
641 puts ("A - as file name means read names from stdin (one per line).");
642 if (!CTAGS)
643 printf (" Absolute names are stored in the output file as they are.\n\
644 Relative ones are stored relative to the output file's directory.");
645 puts ("\n");
647 puts ("-a, --append\n\
648 Append tag entries to existing tags file.");
650 puts ("--packages-only\n\
651 For Ada files, only generate tags for packages .");
653 if (CTAGS)
654 puts ("-B, --backward-search\n\
655 Write the search commands for the tag entries using '?', the\n\
656 backward-search command instead of '/', the forward-search command.");
658 /* This option is mostly obsolete, because etags can now automatically
659 detect C++. Retained for backward compatibility and for debugging and
660 experimentation. In principle, we could want to tag as C++ even
661 before any "class" keyword.
662 puts ("-C, --c++\n\
663 Treat files whose name suffix defaults to C language as C++ files.");
666 puts ("--declarations\n\
667 In C and derived languages, create tags for function declarations,");
668 if (CTAGS)
669 puts ("\tand create tags for extern variables if --globals is used.");
670 else
671 puts
672 ("\tand create tags for extern variables unless --no-globals is used.");
674 if (CTAGS)
675 puts ("-d, --defines\n\
676 Create tag entries for C #define constants and enum constants, too.");
677 else
678 puts ("-D, --no-defines\n\
679 Don't create tag entries for C #define constants and enum constants.\n\
680 This makes the tags file smaller.");
682 if (!CTAGS)
684 puts ("-i FILE, --include=FILE\n\
685 Include a note in tag file indicating that, when searching for\n\
686 a tag, one should also consult the tags file FILE after\n\
687 checking the current file.");
688 puts ("-l LANG, --language=LANG\n\
689 Force the following files to be considered as written in the\n\
690 named language up to the next --language=LANG option.");
693 if (CTAGS)
694 puts ("--globals\n\
695 Create tag entries for global variables in some languages.");
696 else
697 puts ("--no-globals\n\
698 Do not create tag entries for global variables in some\n\
699 languages. This makes the tags file smaller.");
700 puts ("--members\n\
701 Create tag entries for member variables in C and derived languages.");
703 #ifdef ETAGS_REGEXPS
704 puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
705 Make a tag for each line matching pattern REGEXP in the following\n\
706 files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
707 regexfile is a file containing one REGEXP per line.\n\
708 REGEXP is anchored (as if preceded by ^).\n\
709 The form /REGEXP/NAME/ creates a named tag.\n\
710 For example Tcl named tags can be created with:\n\
711 --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
712 puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
713 Like -r, --regex but ignore case when matching expressions.");
714 puts ("-R, --no-regex\n\
715 Don't create tags from regexps for the following files.");
716 #endif /* ETAGS_REGEXPS */
717 puts ("-o FILE, --output=FILE\n\
718 Write the tags to FILE.");
719 puts ("-I, --ignore-indentation\n\
720 Don't rely on indentation quite as much as normal. Currently,\n\
721 this means not to assume that a closing brace in the first\n\
722 column is the final brace of a function or structure\n\
723 definition in C and C++.");
725 if (CTAGS)
727 puts ("-t, --typedefs\n\
728 Generate tag entries for C and Ada typedefs.");
729 puts ("-T, --typedefs-and-c++\n\
730 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
731 and C++ member functions.");
732 puts ("-u, --update\n\
733 Update the tag entries for the given files, leaving tag\n\
734 entries for other files in place. Currently, this is\n\
735 implemented by deleting the existing entries for the given\n\
736 files and then rewriting the new entries at the end of the\n\
737 tags file. It is often faster to simply rebuild the entire\n\
738 tag file than to use this.");
739 puts ("-v, --vgrind\n\
740 Generates an index of items intended for human consumption,\n\
741 similar to the output of vgrind. The index is sorted, and\n\
742 gives the page number of each item.");
743 puts ("-w, --no-warn\n\
744 Suppress warning messages about entries defined in multiple\n\
745 files.");
746 puts ("-x, --cxref\n\
747 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
748 The output uses line numbers instead of page numbers, but\n\
749 beyond that the differences are cosmetic; try both to see\n\
750 which you like.");
753 puts ("-V, --version\n\
754 Print the version of the program.\n\
755 -h, --help\n\
756 Print this help message.");
758 print_language_names ();
760 puts ("");
761 puts ("Report bugs to bug-gnu-emacs@gnu.org");
763 exit (GOOD);
767 enum argument_type
769 at_language,
770 at_regexp,
771 at_filename,
772 at_icregexp
775 /* This structure helps us allow mixing of --lang and file names. */
776 typedef struct
778 enum argument_type arg_type;
779 char *what;
780 language *lang; /* language of the regexp */
781 } argument;
783 #ifdef VMS /* VMS specific functions */
785 #define EOS '\0'
787 /* This is a BUG! ANY arbitrary limit is a BUG!
788 Won't someone please fix this? */
789 #define MAX_FILE_SPEC_LEN 255
790 typedef struct {
791 short curlen;
792 char body[MAX_FILE_SPEC_LEN + 1];
793 } vspec;
796 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
797 returning in each successive call the next file name matching the input
798 spec. The function expects that each in_spec passed
799 to it will be processed to completion; in particular, up to and
800 including the call following that in which the last matching name
801 is returned, the function ignores the value of in_spec, and will
802 only start processing a new spec with the following call.
803 If an error occurs, on return out_spec contains the value
804 of in_spec when the error occurred.
806 With each successive file name returned in out_spec, the
807 function's return value is one. When there are no more matching
808 names the function returns zero. If on the first call no file
809 matches in_spec, or there is any other error, -1 is returned.
812 #include <rmsdef.h>
813 #include <descrip.h>
814 #define OUTSIZE MAX_FILE_SPEC_LEN
815 static short
816 fn_exp (out, in)
817 vspec *out;
818 char *in;
820 static long context = 0;
821 static struct dsc$descriptor_s o;
822 static struct dsc$descriptor_s i;
823 static bool pass1 = TRUE;
824 long status;
825 short retval;
827 if (pass1)
829 pass1 = FALSE;
830 o.dsc$a_pointer = (char *) out;
831 o.dsc$w_length = (short)OUTSIZE;
832 i.dsc$a_pointer = in;
833 i.dsc$w_length = (short)strlen(in);
834 i.dsc$b_dtype = DSC$K_DTYPE_T;
835 i.dsc$b_class = DSC$K_CLASS_S;
836 o.dsc$b_dtype = DSC$K_DTYPE_VT;
837 o.dsc$b_class = DSC$K_CLASS_VS;
839 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
841 out->body[out->curlen] = EOS;
842 return 1;
844 else if (status == RMS$_NMF)
845 retval = 0;
846 else
848 strcpy(out->body, in);
849 retval = -1;
851 lib$find_file_end(&context);
852 pass1 = TRUE;
853 return retval;
857 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
858 name of each file specified by the provided arg expanding wildcards.
860 static char *
861 gfnames (arg, p_error)
862 char *arg;
863 bool *p_error;
865 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
867 switch (fn_exp (&filename, arg))
869 case 1:
870 *p_error = FALSE;
871 return filename.body;
872 case 0:
873 *p_error = FALSE;
874 return NULL;
875 default:
876 *p_error = TRUE;
877 return filename.body;
881 #ifndef OLD /* Newer versions of VMS do provide `system'. */
882 system (cmd)
883 char *cmd;
885 error ("%s", "system() function not implemented under VMS");
887 #endif
889 #define VERSION_DELIM ';'
890 char *massage_name (s)
891 char *s;
893 char *start = s;
895 for ( ; *s; s++)
896 if (*s == VERSION_DELIM)
898 *s = EOS;
899 break;
901 else
902 *s = lowcase (*s);
903 return start;
905 #endif /* VMS */
909 main (argc, argv)
910 int argc;
911 char *argv[];
913 int i;
914 unsigned int nincluded_files;
915 char **included_files;
916 char *this_file;
917 argument *argbuffer;
918 int current_arg, file_count;
919 linebuffer filename_lb;
920 #ifdef VMS
921 bool got_err;
922 #endif
924 #ifdef DOS_NT
925 _fmode = O_BINARY; /* all of files are treated as binary files */
926 #endif /* DOS_NT */
928 progname = argv[0];
929 nincluded_files = 0;
930 included_files = xnew (argc, char *);
931 current_arg = 0;
932 file_count = 0;
934 /* Allocate enough no matter what happens. Overkill, but each one
935 is small. */
936 argbuffer = xnew (argc, argument);
938 #ifdef ETAGS_REGEXPS
939 /* Set syntax for regular expression routines. */
940 re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
941 /* Translation table for case-insensitive search. */
942 for (i = 0; i < CHAR_SET_SIZE; i++)
943 lc_trans[i] = lowcase (i);
944 #endif /* ETAGS_REGEXPS */
947 * If etags, always find typedefs and structure tags. Why not?
948 * Also default to find macro constants, enum constants and
949 * global variables.
951 if (!CTAGS)
953 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
954 globals = TRUE;
955 declarations = FALSE;
956 members = FALSE;
959 while (1)
961 int opt;
962 char *optstring;
964 #ifdef ETAGS_REGEXPS
965 optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
966 #else
967 optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
968 #endif /* ETAGS_REGEXPS */
970 #ifndef LONG_OPTIONS
971 optstring = optstring + 1;
972 #endif /* LONG_OPTIONS */
974 opt = getopt_long (argc, argv, optstring, longopts, 0);
975 if (opt == EOF)
976 break;
978 switch (opt)
980 case 0:
981 /* If getopt returns 0, then it has already processed a
982 long-named option. We should do nothing. */
983 break;
985 case 1:
986 /* This means that a file name has been seen. Record it. */
987 argbuffer[current_arg].arg_type = at_filename;
988 argbuffer[current_arg].what = optarg;
989 ++current_arg;
990 ++file_count;
991 break;
993 /* Common options. */
994 case 'a': append_to_tagfile = TRUE; break;
995 case 'C': cplusplus = TRUE; break;
996 case 'd': constantypedefs = TRUE; break;
997 case 'D': constantypedefs = FALSE; break;
998 case 'f': /* for compatibility with old makefiles */
999 case 'o':
1000 if (tagfile)
1002 error ("-o option may only be given once.", (char *)NULL);
1003 suggest_asking_for_help ();
1005 tagfile = optarg;
1006 break;
1007 case 'I':
1008 case 'S': /* for backward compatibility */
1009 noindentypedefs = TRUE;
1010 break;
1011 case 'l':
1013 language *lang = get_language_from_langname (optarg);
1014 if (lang != NULL)
1016 argbuffer[current_arg].lang = lang;
1017 argbuffer[current_arg].arg_type = at_language;
1018 ++current_arg;
1021 break;
1022 #ifdef ETAGS_REGEXPS
1023 case 'r':
1024 argbuffer[current_arg].arg_type = at_regexp;
1025 argbuffer[current_arg].what = optarg;
1026 ++current_arg;
1027 break;
1028 case 'R':
1029 argbuffer[current_arg].arg_type = at_regexp;
1030 argbuffer[current_arg].what = NULL;
1031 ++current_arg;
1032 break;
1033 case 'c':
1034 argbuffer[current_arg].arg_type = at_icregexp;
1035 argbuffer[current_arg].what = optarg;
1036 ++current_arg;
1037 break;
1038 #endif /* ETAGS_REGEXPS */
1039 case 'V':
1040 print_version ();
1041 break;
1042 case 'h':
1043 case 'H':
1044 print_help ();
1045 break;
1046 case 't':
1047 typedefs = TRUE;
1048 break;
1049 case 'T':
1050 typedefs = typedefs_or_cplusplus = TRUE;
1051 break;
1052 #if (!CTAGS)
1053 /* Etags options */
1054 case 'i':
1055 included_files[nincluded_files++] = optarg;
1056 break;
1057 #else /* CTAGS */
1058 /* Ctags options. */
1059 case 'B': searchar = '?'; break;
1060 case 'u': update = TRUE; break;
1061 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1062 case 'x': cxref_style = TRUE; break;
1063 case 'w': no_warnings = TRUE; break;
1064 #endif /* CTAGS */
1065 default:
1066 suggest_asking_for_help ();
1070 for (; optind < argc; ++optind)
1072 argbuffer[current_arg].arg_type = at_filename;
1073 argbuffer[current_arg].what = argv[optind];
1074 ++current_arg;
1075 ++file_count;
1078 if (nincluded_files == 0 && file_count == 0)
1080 error ("no input files specified.", (char *)NULL);
1081 suggest_asking_for_help ();
1084 if (tagfile == NULL)
1085 tagfile = CTAGS ? "tags" : "TAGS";
1086 cwd = etags_getcwd (); /* the current working directory */
1087 if (cwd[strlen (cwd) - 1] != '/')
1089 char *oldcwd = cwd;
1090 cwd = concat (oldcwd, "/", "");
1091 free (oldcwd);
1093 if (streq (tagfile, "-"))
1094 tagfiledir = cwd;
1095 else
1096 tagfiledir = absolute_dirname (tagfile, cwd);
1098 init (); /* set up boolean "functions" */
1100 initbuffer (&lb);
1101 initbuffer (&filename_lb);
1103 if (!CTAGS)
1105 if (streq (tagfile, "-"))
1107 tagf = stdout;
1108 #ifdef DOS_NT
1109 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1110 doesn't take effect until after `stdout' is already open). */
1111 if (!isatty (fileno (stdout)))
1112 setmode (fileno (stdout), O_BINARY);
1113 #endif /* DOS_NT */
1115 else
1116 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1117 if (tagf == NULL)
1118 pfatal (tagfile);
1122 * Loop through files finding functions.
1124 for (i = 0; i < current_arg; ++i)
1126 switch (argbuffer[i].arg_type)
1128 case at_language:
1129 forced_lang = argbuffer[i].lang;
1130 break;
1131 #ifdef ETAGS_REGEXPS
1132 case at_regexp:
1133 analyse_regex (argbuffer[i].what, FALSE);
1134 break;
1135 case at_icregexp:
1136 analyse_regex (argbuffer[i].what, TRUE);
1137 break;
1138 #endif
1139 case at_filename:
1140 #ifdef VMS
1141 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1143 if (got_err)
1145 error ("can't find file %s\n", this_file);
1146 argc--, argv++;
1148 else
1150 this_file = massage_name (this_file);
1152 #else
1153 this_file = argbuffer[i].what;
1154 #endif
1155 /* Input file named "-" means read file names from stdin
1156 (one per line) and use them. */
1157 if (streq (this_file, "-"))
1158 while (readline_internal (&filename_lb, stdin) > 0)
1159 process_file (filename_lb.buffer);
1160 else
1161 process_file (this_file);
1162 #ifdef VMS
1164 #endif
1165 break;
1169 #ifdef ETAGS_REGEXPS
1170 free_patterns ();
1171 #endif /* ETAGS_REGEXPS */
1173 if (!CTAGS)
1175 while (nincluded_files-- > 0)
1176 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1178 fclose (tagf);
1179 exit (GOOD);
1182 /* If CTAGS, we are here. process_file did not write the tags yet,
1183 because we want them ordered. Let's do it now. */
1184 if (cxref_style)
1186 put_entries (head);
1187 free_tree (head);
1188 head = NULL;
1189 exit (GOOD);
1192 if (update)
1194 char cmd[BUFSIZ];
1195 for (i = 0; i < current_arg; ++i)
1197 if (argbuffer[i].arg_type != at_filename)
1198 continue;
1199 sprintf (cmd,
1200 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1201 tagfile, argbuffer[i].what, tagfile);
1202 if (system (cmd) != GOOD)
1203 fatal ("failed to execute shell command", (char *)NULL);
1205 append_to_tagfile = TRUE;
1208 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1209 if (tagf == NULL)
1210 pfatal (tagfile);
1211 put_entries (head);
1212 free_tree (head);
1213 head = NULL;
1214 fclose (tagf);
1216 if (update)
1218 char cmd[BUFSIZ];
1219 sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
1220 exit (system (cmd));
1222 return GOOD;
1228 * Return a compressor given the file name. If EXTPTR is non-zero,
1229 * return a pointer into FILE where the compressor-specific
1230 * extension begins. If no compressor is found, NULL is returned
1231 * and EXTPTR is not significant.
1232 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1234 static compressor *
1235 get_compressor_from_suffix (file, extptr)
1236 char *file;
1237 char **extptr;
1239 compressor *compr;
1240 char *slash, *suffix;
1242 /* This relies on FN to be after canonicalize_filename,
1243 so we don't need to consider backslashes on DOS_NT. */
1244 slash = etags_strrchr (file, '/');
1245 suffix = etags_strrchr (file, '.');
1246 if (suffix == NULL || suffix < slash)
1247 return NULL;
1248 if (extptr != NULL)
1249 *extptr = suffix;
1250 suffix += 1;
1251 /* Let those poor souls who live with DOS 8+3 file name limits get
1252 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1253 Only the first do loop is run if not MSDOS */
1256 for (compr = compressors; compr->suffix != NULL; compr++)
1257 if (streq (compr->suffix, suffix))
1258 return compr;
1259 if (!MSDOS)
1260 break; /* do it only once: not really a loop */
1261 if (extptr != NULL)
1262 *extptr = ++suffix;
1263 } while (*suffix != '\0');
1264 return NULL;
1270 * Return a language given the name.
1272 static language *
1273 get_language_from_langname (name)
1274 char *name;
1276 language *lang;
1278 if (name == NULL)
1279 error ("empty language name", (char *)NULL);
1280 else
1282 for (lang = lang_names; lang->name != NULL; lang++)
1283 if (streq (name, lang->name))
1284 return lang;
1285 error ("unknown language \"%s\"", name);
1288 return NULL;
1293 * Return a language given the interpreter name.
1295 static language *
1296 get_language_from_interpreter (interpreter)
1297 char *interpreter;
1299 language *lang;
1300 char **iname;
1302 if (interpreter == NULL)
1303 return NULL;
1304 for (lang = lang_names; lang->name != NULL; lang++)
1305 if (lang->interpreters != NULL)
1306 for (iname = lang->interpreters; *iname != NULL; iname++)
1307 if (streq (*iname, interpreter))
1308 return lang;
1310 return NULL;
1316 * Return a language given the file name.
1318 static language *
1319 get_language_from_filename (file)
1320 char *file;
1322 language *lang;
1323 char **name, **ext, *suffix;
1325 /* Try whole file name first. */
1326 for (lang = lang_names; lang->name != NULL; lang++)
1327 if (lang->filenames != NULL)
1328 for (name = lang->filenames; *name != NULL; name++)
1329 if (streq (*name, file))
1330 return lang;
1332 /* If not found, try suffix after last dot. */
1333 suffix = etags_strrchr (file, '.');
1334 if (suffix == NULL)
1335 return NULL;
1336 suffix += 1;
1337 for (lang = lang_names; lang->name != NULL; lang++)
1338 if (lang->suffixes != NULL)
1339 for (ext = lang->suffixes; *ext != NULL; ext++)
1340 if (streq (*ext, suffix))
1341 return lang;
1342 return NULL;
1348 * This routine is called on each file argument.
1350 static void
1351 process_file (file)
1352 char *file;
1354 struct stat stat_buf;
1355 FILE *inf;
1356 compressor *compr;
1357 char *compressed_name, *uncompressed_name;
1358 char *ext, *real_name;
1361 canonicalize_filename (file);
1362 if (streq (file, tagfile) && !streq (tagfile, "-"))
1364 error ("skipping inclusion of %s in self.", file);
1365 return;
1367 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1369 compressed_name = NULL;
1370 real_name = uncompressed_name = savestr (file);
1372 else
1374 real_name = compressed_name = savestr (file);
1375 uncompressed_name = savenstr (file, ext - file);
1378 /* If the canonicalised uncompressed name has already be dealt with,
1379 skip it silently, else add it to the list. */
1381 typedef struct processed_file
1383 char *filename;
1384 struct processed_file *next;
1385 } processed_file;
1386 static processed_file *pf_head = NULL;
1387 register processed_file *fnp;
1389 for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1390 if (streq (uncompressed_name, fnp->filename))
1391 goto exit;
1392 fnp = pf_head;
1393 pf_head = xnew (1, struct processed_file);
1394 pf_head->filename = savestr (uncompressed_name);
1395 pf_head->next = fnp;
1398 if (stat (real_name, &stat_buf) != 0)
1400 /* Reset real_name and try with a different name. */
1401 real_name = NULL;
1402 if (compressed_name != NULL) /* try with the given suffix */
1404 if (stat (uncompressed_name, &stat_buf) == 0)
1405 real_name = uncompressed_name;
1407 else /* try all possible suffixes */
1409 for (compr = compressors; compr->suffix != NULL; compr++)
1411 compressed_name = concat (file, ".", compr->suffix);
1412 if (stat (compressed_name, &stat_buf) != 0)
1414 if (MSDOS)
1416 char *suf = compressed_name + strlen (file);
1417 size_t suflen = strlen (compr->suffix) + 1;
1418 for ( ; suf[1]; suf++, suflen--)
1420 memmove (suf, suf + 1, suflen);
1421 if (stat (compressed_name, &stat_buf) == 0)
1423 real_name = compressed_name;
1424 break;
1427 if (real_name != NULL)
1428 break;
1429 } /* MSDOS */
1430 free (compressed_name);
1431 compressed_name = NULL;
1433 else
1435 real_name = compressed_name;
1436 break;
1440 if (real_name == NULL)
1442 perror (file);
1443 goto exit;
1445 } /* try with a different name */
1447 if (!S_ISREG (stat_buf.st_mode))
1449 error ("skipping %s: it is not a regular file.", real_name);
1450 goto exit;
1452 if (real_name == compressed_name)
1454 char *cmd = concat (compr->command, " ", real_name);
1455 inf = (FILE *) popen (cmd, "r");
1456 free (cmd);
1458 else
1459 inf = fopen (real_name, "r");
1460 if (inf == NULL)
1462 perror (real_name);
1463 goto exit;
1466 find_entries (uncompressed_name, inf);
1468 if (real_name == compressed_name)
1469 pclose (inf);
1470 else
1471 fclose (inf);
1473 if (!CTAGS)
1475 char *filename;
1477 if (filename_is_absolute (uncompressed_name))
1479 /* file is an absolute file name. Canonicalise it. */
1480 filename = absolute_filename (uncompressed_name, cwd);
1482 else
1484 /* file is a file name relative to cwd. Make it relative
1485 to the directory of the tags file. */
1486 filename = relative_filename (uncompressed_name, tagfiledir);
1488 fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1489 free (filename);
1490 put_entries (head);
1491 free_tree (head);
1492 head = NULL;
1495 exit:
1496 if (compressed_name) free(compressed_name);
1497 if (uncompressed_name) free(uncompressed_name);
1498 return;
1502 * This routine sets up the boolean pseudo-functions which work
1503 * by setting boolean flags dependent upon the corresponding character.
1504 * Every char which is NOT in that string is not a white char. Therefore,
1505 * all of the array "_wht" is set to FALSE, and then the elements
1506 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1507 * of a char is TRUE if it is the string "white", else FALSE.
1509 static void
1510 init ()
1512 register char *sp;
1513 register int i;
1515 for (i = 0; i < CHARS; i++)
1516 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1517 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1518 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1519 notinname('\0') = notinname('\n');
1520 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1521 begtoken('\0') = begtoken('\n');
1522 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1523 intoken('\0') = intoken('\n');
1524 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1525 endtoken('\0') = endtoken('\n');
1529 * This routine opens the specified file and calls the function
1530 * which finds the function and type definitions.
1532 node *last_node = NULL;
1534 static void
1535 find_entries (file, inf)
1536 char *file;
1537 FILE *inf;
1539 char *cp;
1540 language *lang;
1541 node *old_last_node;
1543 /* Memory leakage here: the string pointed by curfile is
1544 never released, because curfile is copied into np->file
1545 for each node, to be used in CTAGS mode. The amount of
1546 memory leaked here is the sum of the lengths of the
1547 file names. */
1548 curfile = savestr (file);
1550 /* If user specified a language, use it. */
1551 lang = forced_lang;
1552 if (lang != NULL && lang->function != NULL)
1554 curlang = lang;
1555 lang->function (inf);
1556 return;
1559 /* Try to guess the language given the file name. */
1560 lang = get_language_from_filename (file);
1561 if (lang != NULL && lang->function != NULL)
1563 curlang = lang;
1564 lang->function (inf);
1565 return;
1568 /* Look for sharp-bang as the first two characters. */
1569 if (readline_internal (&lb, inf) > 0
1570 && lb.len >= 2
1571 && lb.buffer[0] == '#'
1572 && lb.buffer[1] == '!')
1574 char *lp;
1576 /* Set lp to point at the first char after the last slash in the
1577 line or, if no slashes, at the first nonblank. Then set cp to
1578 the first successive blank and terminate the string. */
1579 lp = etags_strrchr (lb.buffer+2, '/');
1580 if (lp != NULL)
1581 lp += 1;
1582 else
1583 lp = skip_spaces (lb.buffer + 2);
1584 cp = skip_non_spaces (lp);
1585 *cp = '\0';
1587 if (strlen (lp) > 0)
1589 lang = get_language_from_interpreter (lp);
1590 if (lang != NULL && lang->function != NULL)
1592 curlang = lang;
1593 lang->function (inf);
1594 return;
1598 /* We rewind here, even if inf may be a pipe. We fail if the
1599 length of the first line is longer than the pipe block size,
1600 which is unlikely. */
1601 rewind (inf);
1603 /* Try Fortran. */
1604 old_last_node = last_node;
1605 curlang = get_language_from_langname ("fortran");
1606 Fortran_functions (inf);
1608 /* No Fortran entries found. Try C. */
1609 if (old_last_node == last_node)
1611 /* We do not tag if rewind fails.
1612 Only the file name will be recorded in the tags file. */
1613 rewind (inf);
1614 curlang = get_language_from_langname (cplusplus ? "c++" : "c");
1615 default_C_entries (inf);
1617 return;
1621 /* Record a tag. */
1622 static void
1623 pfnote (name, is_func, linestart, linelen, lno, cno)
1624 char *name; /* tag name, or NULL if unnamed */
1625 bool is_func; /* tag is a function */
1626 char *linestart; /* start of the line where tag is */
1627 int linelen; /* length of the line where tag is */
1628 int lno; /* line number */
1629 long cno; /* character number */
1631 register node *np;
1633 if (CTAGS && name == NULL)
1634 return;
1636 np = xnew (1, node);
1638 /* If ctags mode, change name "main" to M<thisfilename>. */
1639 if (CTAGS && !cxref_style && streq (name, "main"))
1641 register char *fp = etags_strrchr (curfile, '/');
1642 np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
1643 fp = etags_strrchr (np->name, '.');
1644 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1645 fp[0] = '\0';
1647 else
1648 np->name = name;
1649 np->been_warned = FALSE;
1650 np->file = curfile;
1651 np->is_func = is_func;
1652 np->lno = lno;
1653 /* Our char numbers are 0-base, because of C language tradition?
1654 ctags compatibility? old versions compatibility? I don't know.
1655 Anyway, since emacs's are 1-base we expect etags.el to take care
1656 of the difference. If we wanted to have 1-based numbers, we would
1657 uncomment the +1 below. */
1658 np->cno = cno /* + 1 */ ;
1659 np->left = np->right = NULL;
1660 if (CTAGS && !cxref_style)
1662 if (strlen (linestart) < 50)
1663 np->pat = concat (linestart, "$", "");
1664 else
1665 np->pat = savenstr (linestart, 50);
1667 else
1668 np->pat = savenstr (linestart, linelen);
1670 add_node (np, &head);
1674 * TAGS format specification
1675 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1677 * pfnote should emit the optimized form [unnamed tag] only if:
1678 * 1. name does not contain any of the characters " \t\r\n(),;";
1679 * 2. linestart contains name as either a rightmost, or rightmost but
1680 * one character, substring;
1681 * 3. the character, if any, immediately before name in linestart must
1682 * be one of the characters " \t(),;";
1683 * 4. the character, if any, immediately after name in linestart must
1684 * also be one of the characters " \t(),;".
1686 * The real implementation uses the notinname() macro, which recognises
1687 * characters slightly different form " \t\r\n(),;". See the variable
1688 * `nonam'.
1690 #define traditional_tag_style TRUE
1691 static void
1692 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1693 char *name; /* tag name, or NULL if unnamed */
1694 int namelen; /* tag length */
1695 bool is_func; /* tag is a function */
1696 char *linestart; /* start of the line where tag is */
1697 int linelen; /* length of the line where tag is */
1698 int lno; /* line number */
1699 long cno; /* character number */
1701 register char *cp;
1702 bool named;
1704 named = TRUE;
1705 if (!CTAGS)
1707 for (cp = name; !notinname (*cp); cp++)
1708 continue;
1709 if (*cp == '\0') /* rule #1 */
1711 cp = linestart + linelen - namelen;
1712 if (notinname (linestart[linelen-1]))
1713 cp -= 1; /* rule #4 */
1714 if (cp >= linestart /* rule #2 */
1715 && (cp == linestart
1716 || notinname (cp[-1])) /* rule #3 */
1717 && strneq (name, cp, namelen)) /* rule #2 */
1718 named = FALSE; /* use unnamed tag */
1722 if (named)
1723 name = savenstr (name, namelen);
1724 else
1725 name = NULL;
1726 pfnote (name, is_func, linestart, linelen, lno, cno);
1730 * free_tree ()
1731 * recurse on left children, iterate on right children.
1733 static void
1734 free_tree (np)
1735 register node *np;
1737 while (np)
1739 register node *node_right = np->right;
1740 free_tree (np->left);
1741 if (np->name != NULL)
1742 free (np->name);
1743 free (np->pat);
1744 free (np);
1745 np = node_right;
1750 * add_node ()
1751 * Adds a node to the tree of nodes. In etags mode, we don't keep
1752 * it sorted; we just keep a linear list. In ctags mode, maintain
1753 * an ordered tree, with no attempt at balancing.
1755 * add_node is the only function allowed to add nodes, so it can
1756 * maintain state.
1758 static void
1759 add_node (np, cur_node_p)
1760 node *np, **cur_node_p;
1762 register int dif;
1763 register node *cur_node = *cur_node_p;
1765 if (cur_node == NULL)
1767 *cur_node_p = np;
1768 last_node = np;
1769 return;
1772 if (!CTAGS)
1774 /* Etags Mode */
1775 if (last_node == NULL)
1776 fatal ("internal error in add_node", (char *)NULL);
1777 last_node->right = np;
1778 last_node = np;
1780 else
1782 /* Ctags Mode */
1783 dif = strcmp (np->name, cur_node->name);
1786 * If this tag name matches an existing one, then
1787 * do not add the node, but maybe print a warning.
1789 if (!dif)
1791 if (streq (np->file, cur_node->file))
1793 if (!no_warnings)
1795 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1796 np->file, lineno, np->name);
1797 fprintf (stderr, "Second entry ignored\n");
1800 else if (!cur_node->been_warned && !no_warnings)
1802 fprintf
1803 (stderr,
1804 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1805 np->file, cur_node->file, np->name);
1806 cur_node->been_warned = TRUE;
1808 return;
1811 /* Actually add the node */
1812 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1817 static void
1818 put_entries (np)
1819 register node *np;
1821 register char *sp;
1823 if (np == NULL)
1824 return;
1826 /* Output subentries that precede this one */
1827 put_entries (np->left);
1829 /* Output this entry */
1831 if (!CTAGS)
1833 if (np->name != NULL)
1834 fprintf (tagf, "%s\177%s\001%d,%ld\n",
1835 np->pat, np->name, np->lno, np->cno);
1836 else
1837 fprintf (tagf, "%s\177%d,%ld\n",
1838 np->pat, np->lno, np->cno);
1840 else
1842 if (np->name == NULL)
1843 error ("internal error: NULL name in ctags mode.", (char *)NULL);
1845 if (cxref_style)
1847 if (vgrind_style)
1848 fprintf (stdout, "%s %s %d\n",
1849 np->name, np->file, (np->lno + 63) / 64);
1850 else
1851 fprintf (stdout, "%-16s %3d %-16s %s\n",
1852 np->name, np->lno, np->file, np->pat);
1854 else
1856 fprintf (tagf, "%s\t%s\t", np->name, np->file);
1858 if (np->is_func)
1859 { /* a function */
1860 putc (searchar, tagf);
1861 putc ('^', tagf);
1863 for (sp = np->pat; *sp; sp++)
1865 if (*sp == '\\' || *sp == searchar)
1866 putc ('\\', tagf);
1867 putc (*sp, tagf);
1869 putc (searchar, tagf);
1871 else
1872 { /* a typedef; text pattern inadequate */
1873 fprintf (tagf, "%d", np->lno);
1875 putc ('\n', tagf);
1879 /* Output subentries that follow this one */
1880 put_entries (np->right);
1883 /* Length of a number's decimal representation. */
1884 static int
1885 number_len (num)
1886 long num;
1888 int len = 1;
1889 while ((num /= 10) > 0)
1890 len += 1;
1891 return len;
1895 * Return total number of characters that put_entries will output for
1896 * the nodes in the subtree of the specified node. Works only if
1897 * we are not ctags, but called only in that case. This count
1898 * is irrelevant with the new tags.el, but is still supplied for
1899 * backward compatibility.
1901 static int
1902 total_size_of_entries (np)
1903 register node *np;
1905 register int total;
1907 if (np == NULL)
1908 return 0;
1910 for (total = 0; np != NULL; np = np->right)
1912 /* Count left subentries. */
1913 total += total_size_of_entries (np->left);
1915 /* Count this entry */
1916 total += strlen (np->pat) + 1;
1917 total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
1918 if (np->name != NULL)
1919 total += 1 + strlen (np->name); /* \001name */
1922 return total;
1926 /* C extensions. */
1927 #define C_EXT 0x00fff /* C extensions */
1928 #define C_PLAIN 0x00000 /* C */
1929 #define C_PLPL 0x00001 /* C++ */
1930 #define C_STAR 0x00003 /* C* */
1931 #define C_JAVA 0x00005 /* JAVA */
1932 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
1933 #define YACC 0x10000 /* yacc file */
1936 * The C symbol tables.
1938 enum sym_type
1940 st_none,
1941 st_C_objprot, st_C_objimpl, st_C_objend,
1942 st_C_gnumacro,
1943 st_C_ignore,
1944 st_C_javastruct,
1945 st_C_operator,
1946 st_C_class,
1947 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1950 static unsigned int hash P_((const char *, unsigned int));
1951 static struct C_stab_entry * in_word_set P_((const char *, unsigned int));
1952 static enum sym_type C_symtype P_((char *, int, int));
1954 /* Feed stuff between (but not including) %[ and %] lines to:
1955 gperf -c -k 1,3 -o -p -r -t
1956 then put a `static' keyword in front of the in_word_set function.
1958 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1960 if, 0, st_C_ignore
1961 for, 0, st_C_ignore
1962 while, 0, st_C_ignore
1963 switch, 0, st_C_ignore
1964 return, 0, st_C_ignore
1965 @interface, 0, st_C_objprot
1966 @protocol, 0, st_C_objprot
1967 @implementation,0, st_C_objimpl
1968 @end, 0, st_C_objend
1969 import, C_JAVA, st_C_ignore
1970 package, C_JAVA, st_C_ignore
1971 friend, C_PLPL, st_C_ignore
1972 extends, C_JAVA, st_C_javastruct
1973 implements, C_JAVA, st_C_javastruct
1974 interface, C_JAVA, st_C_struct
1975 class, 0, st_C_class
1976 namespace, C_PLPL, st_C_struct
1977 domain, C_STAR, st_C_struct
1978 union, 0, st_C_struct
1979 struct, 0, st_C_struct
1980 extern, 0, st_C_extern
1981 enum, 0, st_C_enum
1982 typedef, 0, st_C_typedef
1983 define, 0, st_C_define
1984 operator, C_PLPL, st_C_operator
1985 bool, C_PLPL, st_C_typespec
1986 long, 0, st_C_typespec
1987 short, 0, st_C_typespec
1988 int, 0, st_C_typespec
1989 char, 0, st_C_typespec
1990 float, 0, st_C_typespec
1991 double, 0, st_C_typespec
1992 signed, 0, st_C_typespec
1993 unsigned, 0, st_C_typespec
1994 auto, 0, st_C_typespec
1995 void, 0, st_C_typespec
1996 static, 0, st_C_typespec
1997 const, 0, st_C_typespec
1998 volatile, 0, st_C_typespec
1999 explicit, C_PLPL, st_C_typespec
2000 mutable, C_PLPL, st_C_typespec
2001 typename, C_PLPL, st_C_typespec
2002 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2003 DEFUN, 0, st_C_gnumacro
2004 SYSCALL, 0, st_C_gnumacro
2005 ENTRY, 0, st_C_gnumacro
2006 PSEUDO, 0, st_C_gnumacro
2007 # These are defined inside C functions, so currently they are not met.
2008 # EXFUN used in glibc, DEFVAR_* in emacs.
2009 #EXFUN, 0, st_C_gnumacro
2010 #DEFVAR_, 0, st_C_gnumacro
2012 and replace lines between %< and %> with its output,
2013 then make in_word_set static. */
2014 /*%<*/
2015 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2016 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
2017 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2019 #define TOTAL_KEYWORDS 46
2020 #define MIN_WORD_LENGTH 2
2021 #define MAX_WORD_LENGTH 15
2022 #define MIN_HASH_VALUE 13
2023 #define MAX_HASH_VALUE 121
2024 /* maximum key range = 109, duplicates = 0 */
2026 #ifdef __GNUC__
2027 __inline
2028 #endif
2029 static unsigned int
2030 hash (str, len)
2031 register const char *str;
2032 register unsigned int len;
2034 static unsigned char asso_values[] =
2036 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2037 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2038 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2039 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2040 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2041 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2042 122, 122, 122, 122, 57, 122, 122, 122, 55, 6,
2043 60, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2044 51, 122, 122, 10, 2, 122, 122, 122, 122, 122,
2045 122, 122, 122, 122, 122, 122, 122, 2, 52, 59,
2046 49, 38, 56, 41, 122, 22, 122, 122, 9, 32,
2047 33, 60, 26, 122, 1, 28, 46, 59, 44, 51,
2048 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2049 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2050 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2051 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2052 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2053 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2054 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2055 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2056 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2057 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2058 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2059 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2060 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2061 122, 122, 122, 122, 122, 122
2063 register int hval = len;
2065 switch (hval)
2067 default:
2068 case 3:
2069 hval += asso_values[(unsigned char)str[2]];
2070 case 2:
2071 case 1:
2072 hval += asso_values[(unsigned char)str[0]];
2073 break;
2075 return hval;
2078 #ifdef __GNUC__
2079 __inline
2080 #endif
2081 static struct C_stab_entry *
2082 in_word_set (str, len)
2083 register const char *str;
2084 register unsigned int len;
2086 static struct C_stab_entry wordlist[] =
2088 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2089 {""}, {""}, {""}, {""},
2090 {"ENTRY", 0, st_C_gnumacro},
2091 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2092 {""},
2093 {"if", 0, st_C_ignore},
2094 {""}, {""},
2095 {"SYSCALL", 0, st_C_gnumacro},
2096 {""}, {""}, {""}, {""}, {""}, {""}, {""},
2097 {"struct", 0, st_C_struct},
2098 {"static", 0, st_C_typespec},
2099 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2100 {"long", 0, st_C_typespec},
2101 {""}, {""}, {""}, {""}, {""},
2102 {"auto", 0, st_C_typespec},
2103 {"return", 0, st_C_ignore},
2104 {"import", C_JAVA, st_C_ignore},
2105 {""},
2106 {"switch", 0, st_C_ignore},
2107 {""},
2108 {"implements", C_JAVA, st_C_javastruct},
2109 {""},
2110 {"for", 0, st_C_ignore},
2111 {"volatile", 0, st_C_typespec},
2112 {""},
2113 {"PSEUDO", 0, st_C_gnumacro},
2114 {""},
2115 {"char", 0, st_C_typespec},
2116 {"class", 0, st_C_class},
2117 {"@protocol", 0, st_C_objprot},
2118 {""}, {""},
2119 {"void", 0, st_C_typespec},
2120 {"int", 0, st_C_typespec},
2121 {"explicit", C_PLPL, st_C_typespec},
2122 {""},
2123 {"namespace", C_PLPL, st_C_struct},
2124 {"signed", 0, st_C_typespec},
2125 {""},
2126 {"interface", C_JAVA, st_C_struct},
2127 {"while", 0, st_C_ignore},
2128 {"typedef", 0, st_C_typedef},
2129 {"typename", C_PLPL, st_C_typespec},
2130 {""}, {""}, {""},
2131 {"friend", C_PLPL, st_C_ignore},
2132 {"mutable", C_PLPL, st_C_typespec},
2133 {"union", 0, st_C_struct},
2134 {"domain", C_STAR, st_C_struct},
2135 {""}, {""},
2136 {"extern", 0, st_C_extern},
2137 {"extends", C_JAVA, st_C_javastruct},
2138 {"package", C_JAVA, st_C_ignore},
2139 {"short", 0, st_C_typespec},
2140 {"@end", 0, st_C_objend},
2141 {"unsigned", 0, st_C_typespec},
2142 {""},
2143 {"const", 0, st_C_typespec},
2144 {""}, {""},
2145 {"@interface", 0, st_C_objprot},
2146 {"enum", 0, st_C_enum},
2147 {""}, {""},
2148 {"@implementation",0, st_C_objimpl},
2149 {""},
2150 {"operator", C_PLPL, st_C_operator},
2151 {""}, {""}, {""}, {""},
2152 {"define", 0, st_C_define},
2153 {""}, {""},
2154 {"double", 0, st_C_typespec},
2155 {""},
2156 {"bool", C_PLPL, st_C_typespec},
2157 {""}, {""}, {""},
2158 {"DEFUN", 0, st_C_gnumacro},
2159 {"float", 0, st_C_typespec}
2162 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2164 register int key = hash (str, len);
2166 if (key <= MAX_HASH_VALUE && key >= 0)
2168 register const char *s = wordlist[key].name;
2170 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2171 return &wordlist[key];
2174 return 0;
2176 /*%>*/
2178 static enum sym_type
2179 C_symtype (str, len, c_ext)
2180 char *str;
2181 int len;
2182 int c_ext;
2184 register struct C_stab_entry *se = in_word_set (str, len);
2186 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2187 return st_none;
2188 return se->type;
2193 * C functions and variables are recognized using a simple
2194 * finite automaton. fvdef is its state variable.
2196 enum
2198 fvnone, /* nothing seen */
2199 fdefunkey, /* Emacs DEFUN keyword seen */
2200 fdefunname, /* Emacs DEFUN name seen */
2201 foperator, /* func: operator keyword seen (cplpl) */
2202 fvnameseen, /* function or variable name seen */
2203 fstartlist, /* func: just after open parenthesis */
2204 finlist, /* func: in parameter list */
2205 flistseen, /* func: after parameter list */
2206 fignore, /* func: before open brace */
2207 vignore /* var-like: ignore until ';' */
2208 } fvdef;
2210 bool fvextern; /* func or var: extern keyword seen; */
2213 * typedefs are recognized using a simple finite automaton.
2214 * typdef is its state variable.
2216 enum
2218 tnone, /* nothing seen */
2219 tkeyseen, /* typedef keyword seen */
2220 ttypeseen, /* defined type seen */
2221 tinbody, /* inside typedef body */
2222 tend, /* just before typedef tag */
2223 tignore /* junk after typedef tag */
2224 } typdef;
2227 * struct-like structures (enum, struct and union) are recognized
2228 * using another simple finite automaton. `structdef' is its state
2229 * variable.
2231 enum
2233 snone, /* nothing seen yet,
2234 or in struct body if cblev > 0 */
2235 skeyseen, /* struct-like keyword seen */
2236 stagseen, /* struct-like tag seen */
2237 sintemplate, /* inside template (ignore) */
2238 scolonseen /* colon seen after struct-like tag */
2239 } structdef;
2242 * When objdef is different from onone, objtag is the name of the class.
2244 char *objtag = "<uninited>";
2247 * Yet another little state machine to deal with preprocessor lines.
2249 enum
2251 dnone, /* nothing seen */
2252 dsharpseen, /* '#' seen as first char on line */
2253 ddefineseen, /* '#' and 'define' seen */
2254 dignorerest /* ignore rest of line */
2255 } definedef;
2258 * State machine for Objective C protocols and implementations.
2259 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2261 enum
2263 onone, /* nothing seen */
2264 oprotocol, /* @interface or @protocol seen */
2265 oimplementation, /* @implementations seen */
2266 otagseen, /* class name seen */
2267 oparenseen, /* parenthesis before category seen */
2268 ocatseen, /* category name seen */
2269 oinbody, /* in @implementation body */
2270 omethodsign, /* in @implementation body, after +/- */
2271 omethodtag, /* after method name */
2272 omethodcolon, /* after method colon */
2273 omethodparm, /* after method parameter */
2274 oignore /* wait for @end */
2275 } objdef;
2279 * Use this structure to keep info about the token read, and how it
2280 * should be tagged. Used by the make_C_tag function to build a tag.
2282 struct tok
2284 bool valid;
2285 bool named;
2286 int offset;
2287 int length;
2288 int lineno;
2289 long linepos;
2290 char *line;
2291 } token; /* latest token read */
2292 linebuffer token_name; /* its name */
2295 * Variables and functions for dealing with nested structures.
2296 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2298 static void pushclass_above P_((int, char *, int));
2299 static void popclass_above P_((int));
2300 static void write_classname P_((linebuffer *, char *qualifier));
2302 struct {
2303 char **cname; /* nested class names */
2304 int *cblev; /* nested class curly brace level */
2305 int nl; /* class nesting level (elements used) */
2306 int size; /* length of the array */
2307 } cstack; /* stack for nested declaration tags */
2308 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2309 #define nestlev (cstack.nl)
2310 /* After struct keyword or in struct body, not inside an nested function. */
2311 #define instruct (structdef == snone && nestlev > 0 \
2312 && cblev == cstack.cblev[nestlev-1] + 1)
2314 static void
2315 pushclass_above (cblev, str, len)
2316 int cblev;
2317 char *str;
2318 int len;
2320 int nl;
2322 popclass_above (cblev);
2323 nl = cstack.nl;
2324 if (nl >= cstack.size)
2326 int size = cstack.size *= 2;
2327 xrnew (cstack.cname, size, char *);
2328 xrnew (cstack.cblev, size, int);
2330 assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2331 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2332 cstack.cblev[nl] = cblev;
2333 cstack.nl = nl + 1;
2336 static void
2337 popclass_above (cblev)
2338 int cblev;
2340 int nl;
2342 for (nl = cstack.nl - 1;
2343 nl >= 0 && cstack.cblev[nl] >= cblev;
2344 nl--)
2346 if (cstack.cname[nl] != NULL)
2347 free (cstack.cname[nl]);
2348 cstack.nl = nl;
2352 static void
2353 write_classname (cn, qualifier)
2354 linebuffer *cn;
2355 char *qualifier;
2357 int i, len;
2358 int qlen = strlen (qualifier);
2360 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2362 len = 0;
2363 cn->len = 0;
2364 cn->buffer[0] = '\0';
2366 else
2368 len = strlen (cstack.cname[0]);
2369 linebuffer_setlen (cn, len);
2370 strcpy (cn->buffer, cstack.cname[0]);
2372 for (i = 1; i < cstack.nl; i++)
2374 char *s;
2375 int slen;
2377 s = cstack.cname[i];
2378 if (s == NULL)
2379 continue;
2380 slen = strlen (s);
2381 len += slen + qlen;
2382 linebuffer_setlen (cn, len);
2383 strncat (cn->buffer, qualifier, qlen);
2384 strncat (cn->buffer, s, slen);
2389 static bool consider_token P_((char *, int, int, int *, int, int, bool *));
2390 static void make_C_tag P_((bool));
2393 * consider_token ()
2394 * checks to see if the current token is at the start of a
2395 * function or variable, or corresponds to a typedef, or
2396 * is a struct/union/enum tag, or #define, or an enum constant.
2398 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2399 * with args. C_EXTP points to which language we are looking at.
2401 * Globals
2402 * fvdef IN OUT
2403 * structdef IN OUT
2404 * definedef IN OUT
2405 * typdef IN OUT
2406 * objdef IN OUT
2409 static bool
2410 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2411 register char *str; /* IN: token pointer */
2412 register int len; /* IN: token length */
2413 register int c; /* IN: first char after the token */
2414 int *c_extp; /* IN, OUT: C extensions mask */
2415 int cblev; /* IN: curly brace level */
2416 int parlev; /* IN: parenthesis level */
2417 bool *is_func_or_var; /* OUT: function or variable found */
2419 /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2420 structtype is the type of the preceding struct-like keyword, and
2421 structcblev is the curly brace level where it has been seen. */
2422 static enum sym_type structtype;
2423 static int structcblev;
2424 static enum sym_type toktype;
2427 toktype = C_symtype (str, len, *c_extp);
2430 * Advance the definedef state machine.
2432 switch (definedef)
2434 case dnone:
2435 /* We're not on a preprocessor line. */
2436 if (toktype == st_C_gnumacro)
2438 fvdef = fdefunkey;
2439 return FALSE;
2441 break;
2442 case dsharpseen:
2443 if (toktype == st_C_define)
2445 definedef = ddefineseen;
2447 else
2449 definedef = dignorerest;
2451 return FALSE;
2452 case ddefineseen:
2454 * Make a tag for any macro, unless it is a constant
2455 * and constantypedefs is FALSE.
2457 definedef = dignorerest;
2458 *is_func_or_var = (c == '(');
2459 if (!*is_func_or_var && !constantypedefs)
2460 return FALSE;
2461 else
2462 return TRUE;
2463 case dignorerest:
2464 return FALSE;
2465 default:
2466 error ("internal error: definedef value.", (char *)NULL);
2470 * Now typedefs
2472 switch (typdef)
2474 case tnone:
2475 if (toktype == st_C_typedef)
2477 if (typedefs)
2478 typdef = tkeyseen;
2479 fvextern = FALSE;
2480 fvdef = fvnone;
2481 return FALSE;
2483 break;
2484 case tkeyseen:
2485 switch (toktype)
2487 case st_none:
2488 case st_C_typespec:
2489 case st_C_class:
2490 case st_C_struct:
2491 case st_C_enum:
2492 typdef = ttypeseen;
2493 break;
2495 break;
2496 case ttypeseen:
2497 if (structdef == snone && fvdef == fvnone)
2499 fvdef = fvnameseen;
2500 return TRUE;
2502 break;
2503 case tend:
2504 switch (toktype)
2506 case st_C_typespec:
2507 case st_C_class:
2508 case st_C_struct:
2509 case st_C_enum:
2510 return FALSE;
2512 return TRUE;
2516 * This structdef business is NOT invoked when we are ctags and the
2517 * file is plain C. This is because a struct tag may have the same
2518 * name as another tag, and this loses with ctags.
2520 switch (toktype)
2522 case st_C_javastruct:
2523 if (structdef == stagseen)
2524 structdef = scolonseen;
2525 return FALSE;
2526 case st_C_class:
2527 if (cblev == 0
2528 && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2529 && definedef == dnone && structdef == snone
2530 && typdef == tnone && fvdef == fvnone)
2531 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2532 /* FALLTHRU */
2533 case st_C_struct:
2534 case st_C_enum:
2535 if (parlev == 0
2536 && fvdef != vignore
2537 && (typdef == tkeyseen
2538 || (typedefs_or_cplusplus && structdef == snone)))
2540 structdef = skeyseen;
2541 structtype = toktype;
2542 structcblev = cblev;
2544 return FALSE;
2547 if (structdef == skeyseen)
2549 structdef = stagseen;
2550 return TRUE;
2553 if (typdef != tnone)
2554 definedef = dnone;
2556 /* Detect Objective C constructs. */
2557 switch (objdef)
2559 case onone:
2560 switch (toktype)
2562 case st_C_objprot:
2563 objdef = oprotocol;
2564 return FALSE;
2565 case st_C_objimpl:
2566 objdef = oimplementation;
2567 return FALSE;
2569 break;
2570 case oimplementation:
2571 /* Save the class tag for functions or variables defined inside. */
2572 objtag = savenstr (str, len);
2573 objdef = oinbody;
2574 return FALSE;
2575 case oprotocol:
2576 /* Save the class tag for categories. */
2577 objtag = savenstr (str, len);
2578 objdef = otagseen;
2579 *is_func_or_var = TRUE;
2580 return TRUE;
2581 case oparenseen:
2582 objdef = ocatseen;
2583 *is_func_or_var = TRUE;
2584 return TRUE;
2585 case oinbody:
2586 break;
2587 case omethodsign:
2588 if (parlev == 0)
2590 objdef = omethodtag;
2591 linebuffer_setlen (&token_name, len);
2592 strncpy (token_name.buffer, str, len);
2593 token_name.buffer[len] = '\0';
2594 return TRUE;
2596 return FALSE;
2597 case omethodcolon:
2598 if (parlev == 0)
2599 objdef = omethodparm;
2600 return FALSE;
2601 case omethodparm:
2602 if (parlev == 0)
2604 objdef = omethodtag;
2605 linebuffer_setlen (&token_name, token_name.len + len);
2606 strncat (token_name.buffer, str, len);
2607 return TRUE;
2609 return FALSE;
2610 case oignore:
2611 if (toktype == st_C_objend)
2613 /* Memory leakage here: the string pointed by objtag is
2614 never released, because many tests would be needed to
2615 avoid breaking on incorrect input code. The amount of
2616 memory leaked here is the sum of the lengths of the
2617 class tags.
2618 free (objtag); */
2619 objdef = onone;
2621 return FALSE;
2624 /* A function, variable or enum constant? */
2625 switch (toktype)
2627 case st_C_extern:
2628 fvextern = TRUE;
2629 /* FALLTHRU */
2630 case st_C_typespec:
2631 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2632 fvdef = fvnone; /* should be useless */
2633 return FALSE;
2634 case st_C_ignore:
2635 fvextern = FALSE;
2636 fvdef = vignore;
2637 return FALSE;
2638 case st_C_operator:
2639 fvdef = foperator;
2640 *is_func_or_var = TRUE;
2641 return TRUE;
2642 case st_none:
2643 if (constantypedefs
2644 && structdef == snone
2645 && structtype == st_C_enum && cblev > structcblev)
2646 return TRUE; /* enum constant */
2647 switch (fvdef)
2649 case fdefunkey:
2650 if (cblev > 0)
2651 break;
2652 fvdef = fdefunname; /* GNU macro */
2653 *is_func_or_var = TRUE;
2654 return TRUE;
2655 case fvnone:
2656 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2657 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2659 fvdef = vignore;
2660 return FALSE;
2662 if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2664 fvdef = foperator;
2665 *is_func_or_var = TRUE;
2666 return TRUE;
2668 if (cblev > 0 && !instruct)
2669 break;
2670 fvdef = fvnameseen; /* function or variable */
2671 *is_func_or_var = TRUE;
2672 return TRUE;
2674 break;
2677 return FALSE;
2682 * C_entries often keeps pointers to tokens or lines which are older than
2683 * the line currently read. By keeping two line buffers, and switching
2684 * them at end of line, it is possible to use those pointers.
2686 struct
2688 long linepos;
2689 linebuffer lb;
2690 } lbs[2];
2692 #define current_lb_is_new (newndx == curndx)
2693 #define switch_line_buffers() (curndx = 1 - curndx)
2695 #define curlb (lbs[curndx].lb)
2696 #define newlb (lbs[newndx].lb)
2697 #define curlinepos (lbs[curndx].linepos)
2698 #define newlinepos (lbs[newndx].linepos)
2700 #define CNL_SAVE_DEFINEDEF() \
2701 do { \
2702 curlinepos = charno; \
2703 lineno++; \
2704 linecharno = charno; \
2705 charno += readline (&curlb, inf); \
2706 lp = curlb.buffer; \
2707 quotednl = FALSE; \
2708 newndx = curndx; \
2709 } while (0)
2711 #define CNL() \
2712 do { \
2713 CNL_SAVE_DEFINEDEF(); \
2714 if (savetoken.valid) \
2716 token = savetoken; \
2717 savetoken.valid = FALSE; \
2719 definedef = dnone; \
2720 } while (0)
2723 static void
2724 make_C_tag (isfun)
2725 bool isfun;
2727 /* This function should never be called when token.valid is FALSE, but
2728 we must protect against invalid input or internal errors. */
2729 if (DEBUG || token.valid)
2731 if (traditional_tag_style)
2733 /* This was the original code. Now we call new_pfnote instead,
2734 which uses the new method for naming tags (see new_pfnote). */
2735 char *name = NULL;
2737 if (CTAGS || token.named)
2738 name = savestr (token_name.buffer);
2739 if (DEBUG && !token.valid)
2741 if (token.named)
2742 name = concat (name, "##invalid##", "");
2743 else
2744 name = savestr ("##invalid##");
2746 pfnote (name, isfun, token.line,
2747 token.offset+token.length+1, token.lineno, token.linepos);
2749 else
2750 new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
2751 token.offset+token.length+1, token.lineno, token.linepos);
2752 token.valid = FALSE;
2758 * C_entries ()
2759 * This routine finds functions, variables, typedefs,
2760 * #define's, enum constants and struct/union/enum definitions in
2761 * C syntax and adds them to the list.
2763 static void
2764 C_entries (c_ext, inf)
2765 int c_ext; /* extension of C */
2766 FILE *inf; /* input file */
2768 register char c; /* latest char read; '\0' for end of line */
2769 register char *lp; /* pointer one beyond the character `c' */
2770 int curndx, newndx; /* indices for current and new lb */
2771 register int tokoff; /* offset in line of start of current token */
2772 register int toklen; /* length of current token */
2773 char *qualifier; /* string used to qualify names */
2774 int qlen; /* length of qualifier */
2775 int cblev; /* current curly brace level */
2776 int parlev; /* current parenthesis level */
2777 int typdefcblev; /* cblev where a typedef struct body begun */
2778 bool incomm, inquote, inchar, quotednl, midtoken;
2779 bool cplpl, cjava;
2780 bool yacc_rules; /* in the rules part of a yacc file */
2781 struct tok savetoken; /* token saved during preprocessor handling */
2784 initbuffer (&token_name);
2785 initbuffer (&lbs[0].lb);
2786 initbuffer (&lbs[1].lb);
2787 if (cstack.size == 0)
2789 cstack.size = (DEBUG) ? 1 : 4;
2790 cstack.nl = 0;
2791 cstack.cname = xnew (cstack.size, char *);
2792 cstack.cblev = xnew (cstack.size, int);
2795 tokoff = toklen = 0; /* keep compiler quiet */
2796 curndx = newndx = 0;
2797 lineno = 0;
2798 charno = 0;
2799 lp = curlb.buffer;
2800 *lp = 0;
2802 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2803 structdef = snone; definedef = dnone; objdef = onone;
2804 yacc_rules = FALSE;
2805 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2806 token.valid = savetoken.valid = FALSE;
2807 cblev = 0;
2808 parlev = 0;
2809 cplpl = (c_ext & C_PLPL) == C_PLPL;
2810 cjava = (c_ext & C_JAVA) == C_JAVA;
2811 if (cjava)
2812 { qualifier = "."; qlen = 1; }
2813 else
2814 { qualifier = "::"; qlen = 2; }
2817 while (!feof (inf))
2819 c = *lp++;
2820 if (c == '\\')
2822 /* If we're at the end of the line, the next character is a
2823 '\0'; don't skip it, because it's the thing that tells us
2824 to read the next line. */
2825 if (*lp == '\0')
2827 quotednl = TRUE;
2828 continue;
2830 lp++;
2831 c = ' ';
2833 else if (incomm)
2835 switch (c)
2837 case '*':
2838 if (*lp == '/')
2840 c = *lp++;
2841 incomm = FALSE;
2843 break;
2844 case '\0':
2845 /* Newlines inside comments do not end macro definitions in
2846 traditional cpp. */
2847 CNL_SAVE_DEFINEDEF ();
2848 break;
2850 continue;
2852 else if (inquote)
2854 switch (c)
2856 case '"':
2857 inquote = FALSE;
2858 break;
2859 case '\0':
2860 /* Newlines inside strings do not end macro definitions
2861 in traditional cpp, even though compilers don't
2862 usually accept them. */
2863 CNL_SAVE_DEFINEDEF ();
2864 break;
2866 continue;
2868 else if (inchar)
2870 switch (c)
2872 case '\0':
2873 /* Hmmm, something went wrong. */
2874 CNL ();
2875 /* FALLTHRU */
2876 case '\'':
2877 inchar = FALSE;
2878 break;
2880 continue;
2882 else
2883 switch (c)
2885 case '"':
2886 inquote = TRUE;
2887 switch (fvdef)
2889 case fdefunkey:
2890 case fstartlist:
2891 case finlist:
2892 case fignore:
2893 case vignore:
2894 break;
2895 default:
2896 fvextern = FALSE;
2897 fvdef = fvnone;
2899 continue;
2900 case '\'':
2901 inchar = TRUE;
2902 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2904 fvextern = FALSE;
2905 fvdef = fvnone;
2907 continue;
2908 case '/':
2909 if (*lp == '*')
2911 lp++;
2912 incomm = TRUE;
2913 continue;
2915 else if (/* cplpl && */ *lp == '/')
2917 c = '\0';
2918 break;
2920 else
2921 break;
2922 case '%':
2923 if ((c_ext & YACC) && *lp == '%')
2925 /* Entering or exiting rules section in yacc file. */
2926 lp++;
2927 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2928 typdef = tnone; structdef = snone;
2929 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2930 cblev = 0;
2931 yacc_rules = !yacc_rules;
2932 continue;
2934 else
2935 break;
2936 case '#':
2937 if (definedef == dnone)
2939 char *cp;
2940 bool cpptoken = TRUE;
2942 /* Look back on this line. If all blanks, or nonblanks
2943 followed by an end of comment, this is a preprocessor
2944 token. */
2945 for (cp = newlb.buffer; cp < lp-1; cp++)
2946 if (!iswhite (*cp))
2948 if (*cp == '*' && *(cp+1) == '/')
2950 cp++;
2951 cpptoken = TRUE;
2953 else
2954 cpptoken = FALSE;
2956 if (cpptoken)
2957 definedef = dsharpseen;
2958 } /* if (definedef == dnone) */
2960 continue;
2961 } /* switch (c) */
2964 /* Consider token only if some involved conditions are satisfied. */
2965 if (typdef != tignore
2966 && definedef != dignorerest
2967 && fvdef != finlist
2968 && structdef != sintemplate
2969 && (definedef != dnone
2970 || structdef != scolonseen))
2972 if (midtoken)
2974 if (endtoken (c))
2976 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
2979 * This handles :: in the middle, but not at the
2980 * beginning of an identifier. Also, space-separated
2981 * :: is not recognised.
2983 lp += 2;
2984 toklen += 2;
2985 c = lp[-1];
2986 goto still_in_token;
2988 else
2990 bool funorvar = FALSE;
2992 if (yacc_rules
2993 || consider_token (newlb.buffer + tokoff, toklen, c,
2994 &c_ext, cblev, parlev, &funorvar))
2996 if (fvdef == foperator)
2998 char *oldlp = lp;
2999 lp = skip_spaces (lp-1);
3000 if (*lp != '\0')
3001 lp += 1;
3002 while (*lp != '\0'
3003 && !iswhite (*lp) && *lp != '(')
3004 lp += 1;
3005 c = *lp++;
3006 toklen += lp - oldlp;
3008 token.named = FALSE;
3009 if ((c_ext & C_EXT) /* not pure C */
3010 && nestlev > 0 && definedef == dnone)
3011 /* in struct body */
3013 write_classname (&token_name, qualifier);
3014 linebuffer_setlen (&token_name,
3015 token_name.len+qlen+toklen);
3016 strcat (token_name.buffer, qualifier);
3017 strncat (token_name.buffer,
3018 newlb.buffer + tokoff, toklen);
3019 token.named = TRUE;
3021 else if (objdef == ocatseen)
3022 /* Objective C category */
3024 int len = strlen (objtag) + 2 + toklen;
3025 linebuffer_setlen (&token_name, len);
3026 strcpy (token_name.buffer, objtag);
3027 strcat (token_name.buffer, "(");
3028 strncat (token_name.buffer,
3029 newlb.buffer + tokoff, toklen);
3030 strcat (token_name.buffer, ")");
3031 token.named = TRUE;
3033 else if (objdef == omethodtag
3034 || objdef == omethodparm)
3035 /* Objective C method */
3037 token.named = TRUE;
3039 else if (fvdef == fdefunname)
3040 /* GNU DEFUN and similar macros */
3042 bool defun = (newlb.buffer[tokoff] == 'F');
3043 int off = tokoff;
3044 int len = toklen;
3046 /* Rewrite the tag so that emacs lisp DEFUNs
3047 can be found by their elisp name */
3048 if (defun)
3050 off += 1;
3051 len -= 1;
3053 len = toklen;
3054 linebuffer_setlen (&token_name, len);
3055 strncpy (token_name.buffer,
3056 newlb.buffer + off, len);
3057 token_name.buffer[len] = '\0';
3058 if (defun)
3059 while (--len >= 0)
3060 if (token_name.buffer[len] == '_')
3061 token_name.buffer[len] = '-';
3062 token.named = defun;
3064 else
3066 linebuffer_setlen (&token_name, toklen);
3067 strncpy (token_name.buffer,
3068 newlb.buffer + tokoff, toklen);
3069 token_name.buffer[toklen] = '\0';
3070 /* Name macros and members. */
3071 token.named = (structdef == stagseen
3072 || typdef == ttypeseen
3073 || typdef == tend
3074 || (funorvar
3075 && definedef == dignorerest)
3076 || (funorvar
3077 && definedef == dnone
3078 && structdef == snone
3079 && cblev > 0));
3081 token.lineno = lineno;
3082 token.offset = tokoff;
3083 token.length = toklen;
3084 token.line = newlb.buffer;
3085 token.linepos = newlinepos;
3086 token.valid = TRUE;
3088 if (definedef == dnone
3089 && (fvdef == fvnameseen
3090 || fvdef == foperator
3091 || structdef == stagseen
3092 || typdef == tend
3093 || typdef == ttypeseen
3094 || objdef != onone))
3096 if (current_lb_is_new)
3097 switch_line_buffers ();
3099 else if (definedef != dnone
3100 || fvdef == fdefunname
3101 || instruct)
3102 make_C_tag (funorvar);
3104 midtoken = FALSE;
3106 } /* if (endtoken (c)) */
3107 else if (intoken (c))
3108 still_in_token:
3110 toklen++;
3111 continue;
3113 } /* if (midtoken) */
3114 else if (begtoken (c))
3116 switch (definedef)
3118 case dnone:
3119 switch (fvdef)
3121 case fstartlist:
3122 fvdef = finlist;
3123 continue;
3124 case flistseen:
3125 make_C_tag (TRUE); /* a function */
3126 fvdef = fignore;
3127 break;
3128 case fvnameseen:
3129 fvdef = fvnone;
3130 break;
3132 if (structdef == stagseen && !cjava)
3134 popclass_above (cblev);
3135 structdef = snone;
3137 break;
3138 case dsharpseen:
3139 savetoken = token;
3141 if (!yacc_rules || lp == newlb.buffer + 1)
3143 tokoff = lp - 1 - newlb.buffer;
3144 toklen = 1;
3145 midtoken = TRUE;
3147 continue;
3148 } /* if (begtoken) */
3149 } /* if must look at token */
3152 /* Detect end of line, colon, comma, semicolon and various braces
3153 after having handled a token.*/
3154 switch (c)
3156 case ':':
3157 if (yacc_rules && token.offset == 0 && token.valid)
3159 make_C_tag (FALSE); /* a yacc function */
3160 break;
3162 if (definedef != dnone)
3163 break;
3164 switch (objdef)
3166 case otagseen:
3167 objdef = oignore;
3168 make_C_tag (TRUE); /* an Objective C class */
3169 break;
3170 case omethodtag:
3171 case omethodparm:
3172 objdef = omethodcolon;
3173 linebuffer_setlen (&token_name, token_name.len + 1);
3174 strcat (token_name.buffer, ":");
3175 break;
3177 if (structdef == stagseen)
3178 structdef = scolonseen;
3179 break;
3180 case ';':
3181 if (definedef != dnone)
3182 break;
3183 switch (typdef)
3185 case tend:
3186 case ttypeseen:
3187 make_C_tag (FALSE); /* a typedef */
3188 typdef = tnone;
3189 fvdef = fvnone;
3190 break;
3191 case tnone:
3192 case tinbody:
3193 case tignore:
3194 switch (fvdef)
3196 case fignore:
3197 if (typdef == tignore)
3198 fvdef = fvnone;
3199 break;
3200 case fvnameseen:
3201 if ((globals && cblev == 0 && (!fvextern || declarations))
3202 || (members && instruct))
3203 make_C_tag (FALSE); /* a variable */
3204 fvextern = FALSE;
3205 fvdef = fvnone;
3206 token.valid = FALSE;
3207 break;
3208 case flistseen:
3209 if ((declarations && typdef == tnone && !instruct)
3210 || (members && typdef != tignore && instruct))
3211 make_C_tag (TRUE); /* a function declaration */
3212 /* FALLTHRU */
3213 default:
3214 fvextern = FALSE;
3215 fvdef = fvnone;
3216 if (declarations
3217 && structdef == stagseen && (c_ext & C_PLPL))
3218 make_C_tag (FALSE); /* forward declaration */
3219 else
3220 /* The following instruction invalidates the token.
3221 Probably the token should be invalidated in all other
3222 cases where some state machine is reset prematurely. */
3223 token.valid = FALSE;
3224 } /* switch (fvdef) */
3225 /* FALLTHRU */
3226 default:
3227 if (!instruct)
3228 typdef = tnone;
3230 if (structdef == stagseen)
3231 structdef = snone;
3232 break;
3233 case ',':
3234 if (definedef != dnone)
3235 break;
3236 switch (objdef)
3238 case omethodtag:
3239 case omethodparm:
3240 make_C_tag (TRUE); /* an Objective C method */
3241 objdef = oinbody;
3242 break;
3244 switch (fvdef)
3246 case fdefunkey:
3247 case foperator:
3248 case fstartlist:
3249 case finlist:
3250 case fignore:
3251 case vignore:
3252 break;
3253 case fdefunname:
3254 fvdef = fignore;
3255 break;
3256 case fvnameseen: /* a variable */
3257 if ((globals && cblev == 0 && (!fvextern || declarations))
3258 || (members && instruct))
3259 make_C_tag (FALSE);
3260 break;
3261 case flistseen: /* a function */
3262 if ((declarations && typdef == tnone && !instruct)
3263 || (members && typdef != tignore && instruct))
3265 make_C_tag (TRUE); /* a function declaration */
3266 fvdef = fvnameseen;
3268 else if (!declarations)
3269 fvdef = fvnone;
3270 token.valid = FALSE;
3271 break;
3272 default:
3273 fvdef = fvnone;
3275 if (structdef == stagseen)
3276 structdef = snone;
3277 break;
3278 case '[':
3279 if (definedef != dnone)
3280 break;
3281 if (structdef == stagseen)
3282 structdef = snone;
3283 switch (typdef)
3285 case ttypeseen:
3286 case tend:
3287 typdef = tignore;
3288 make_C_tag (FALSE); /* a typedef */
3289 break;
3290 case tnone:
3291 case tinbody:
3292 switch (fvdef)
3294 case foperator:
3295 case finlist:
3296 case fignore:
3297 case vignore:
3298 break;
3299 case fvnameseen:
3300 if ((members && cblev == 1)
3301 || (globals && cblev == 0
3302 && (!fvextern || declarations)))
3303 make_C_tag (FALSE); /* a variable */
3304 /* FALLTHRU */
3305 default:
3306 fvdef = fvnone;
3308 break;
3310 break;
3311 case '(':
3312 if (definedef != dnone)
3313 break;
3314 if (objdef == otagseen && parlev == 0)
3315 objdef = oparenseen;
3316 switch (fvdef)
3318 case fvnameseen:
3319 if (typdef == ttypeseen
3320 && *lp != '*'
3321 && !instruct)
3323 /* This handles constructs like:
3324 typedef void OperatorFun (int fun); */
3325 make_C_tag (FALSE);
3326 typdef = tignore;
3327 fvdef = fignore;
3328 break;
3330 /* FALLTHRU */
3331 case foperator:
3332 fvdef = fstartlist;
3333 break;
3334 case flistseen:
3335 fvdef = finlist;
3336 break;
3338 parlev++;
3339 break;
3340 case ')':
3341 if (definedef != dnone)
3342 break;
3343 if (objdef == ocatseen && parlev == 1)
3345 make_C_tag (TRUE); /* an Objective C category */
3346 objdef = oignore;
3348 if (--parlev == 0)
3350 switch (fvdef)
3352 case fstartlist:
3353 case finlist:
3354 fvdef = flistseen;
3355 break;
3357 if (!instruct
3358 && (typdef == tend
3359 || typdef == ttypeseen))
3361 typdef = tignore;
3362 make_C_tag (FALSE); /* a typedef */
3365 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3366 parlev = 0;
3367 break;
3368 case '{':
3369 if (definedef != dnone)
3370 break;
3371 if (typdef == ttypeseen)
3373 typdefcblev = cblev;
3374 typdef = tinbody;
3376 switch (fvdef)
3378 case flistseen:
3379 make_C_tag (TRUE); /* a function */
3380 /* FALLTHRU */
3381 case fignore:
3382 fvdef = fvnone;
3383 break;
3384 case fvnone:
3385 switch (objdef)
3387 case otagseen:
3388 make_C_tag (TRUE); /* an Objective C class */
3389 objdef = oignore;
3390 break;
3391 case omethodtag:
3392 case omethodparm:
3393 make_C_tag (TRUE); /* an Objective C method */
3394 objdef = oinbody;
3395 break;
3396 default:
3397 /* Neutralize `extern "C" {' grot. */
3398 if (cblev == 0 && structdef == snone && nestlev == 0
3399 && typdef == tnone)
3400 cblev = -1;
3403 switch (structdef)
3405 case skeyseen: /* unnamed struct */
3406 pushclass_above (cblev, NULL, 0);
3407 structdef = snone;
3408 break;
3409 case stagseen: /* named struct or enum */
3410 case scolonseen: /* a class */
3411 pushclass_above (cblev, token.line+token.offset, token.length);
3412 structdef = snone;
3413 make_C_tag (FALSE); /* a struct or enum */
3414 break;
3416 cblev++;
3417 break;
3418 case '*':
3419 if (definedef != dnone)
3420 break;
3421 if (fvdef == fstartlist)
3422 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3423 break;
3424 case '}':
3425 if (definedef != dnone)
3426 break;
3427 if (!noindentypedefs && lp == newlb.buffer + 1)
3429 cblev = 0; /* reset curly brace level if first column */
3430 parlev = 0; /* also reset paren level, just in case... */
3432 else if (cblev > 0)
3433 cblev--;
3434 popclass_above (cblev);
3435 structdef = snone;
3436 if (typdef == tinbody && cblev <= typdefcblev)
3438 assert (cblev == typdefcblev);
3439 typdef = tend;
3441 break;
3442 case '=':
3443 if (definedef != dnone)
3444 break;
3445 switch (fvdef)
3447 case foperator:
3448 case finlist:
3449 case fignore:
3450 case vignore:
3451 break;
3452 case fvnameseen:
3453 if ((members && cblev == 1)
3454 || (globals && cblev == 0 && (!fvextern || declarations)))
3455 make_C_tag (FALSE); /* a variable */
3456 /* FALLTHRU */
3457 default:
3458 fvdef = vignore;
3460 break;
3461 case '<':
3462 if (cplpl && structdef == stagseen)
3464 structdef = sintemplate;
3465 break;
3467 goto resetfvdef;
3468 case '>':
3469 if (structdef == sintemplate)
3471 structdef = stagseen;
3472 break;
3474 goto resetfvdef;
3475 case '+':
3476 case '-':
3477 if (objdef == oinbody && cblev == 0)
3479 objdef = omethodsign;
3480 break;
3482 /* FALLTHRU */
3483 resetfvdef:
3484 case '#': case '~': case '&': case '%': case '/': case '|':
3485 case '^': case '!': case '.': case '?': case ']':
3486 if (definedef != dnone)
3487 break;
3488 /* These surely cannot follow a function tag in C. */
3489 switch (fvdef)
3491 case foperator:
3492 case finlist:
3493 case fignore:
3494 case vignore:
3495 break;
3496 default:
3497 fvdef = fvnone;
3499 break;
3500 case '\0':
3501 if (objdef == otagseen)
3503 make_C_tag (TRUE); /* an Objective C class */
3504 objdef = oignore;
3506 /* If a macro spans multiple lines don't reset its state. */
3507 if (quotednl)
3508 CNL_SAVE_DEFINEDEF ();
3509 else
3510 CNL ();
3511 break;
3512 } /* switch (c) */
3514 } /* while not eof */
3516 free (token_name.buffer);
3517 free (lbs[0].lb.buffer);
3518 free (lbs[1].lb.buffer);
3522 * Process either a C++ file or a C file depending on the setting
3523 * of a global flag.
3525 static void
3526 default_C_entries (inf)
3527 FILE *inf;
3529 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3532 /* Always do plain C. */
3533 static void
3534 plain_C_entries (inf)
3535 FILE *inf;
3537 C_entries (0, inf);
3540 /* Always do C++. */
3541 static void
3542 Cplusplus_entries (inf)
3543 FILE *inf;
3545 C_entries (C_PLPL, inf);
3548 /* Always do Java. */
3549 static void
3550 Cjava_entries (inf)
3551 FILE *inf;
3553 C_entries (C_JAVA, inf);
3556 /* Always do C*. */
3557 static void
3558 Cstar_entries (inf)
3559 FILE *inf;
3561 C_entries (C_STAR, inf);
3564 /* Always do Yacc. */
3565 static void
3566 Yacc_entries (inf)
3567 FILE *inf;
3569 C_entries (YACC, inf);
3573 /* A useful macro. */
3574 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3575 for (lineno = charno = 0; /* loop initialization */ \
3576 !feof (file_pointer) /* loop test */ \
3577 && (lineno++, /* instructions at start of loop */ \
3578 linecharno = charno, \
3579 charno += readline (&line_buffer, file_pointer), \
3580 char_pointer = lb.buffer, \
3581 TRUE); \
3586 * Read a file, but do no processing. This is used to do regexp
3587 * matching on files that have no language defined.
3589 static void
3590 just_read_file (inf)
3591 FILE *inf;
3593 register char *dummy;
3595 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3596 continue;
3600 /* Fortran parsing */
3602 static bool tail P_((char *));
3603 static void takeprec P_((void));
3604 static void getit P_((FILE *));
3606 static bool
3607 tail (cp)
3608 char *cp;
3610 register int len = 0;
3612 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
3613 cp++, len++;
3614 if (*cp == '\0' && !intoken (dbp[len]))
3616 dbp += len;
3617 return TRUE;
3619 return FALSE;
3622 static void
3623 takeprec ()
3625 dbp = skip_spaces (dbp);
3626 if (*dbp != '*')
3627 return;
3628 dbp++;
3629 dbp = skip_spaces (dbp);
3630 if (strneq (dbp, "(*)", 3))
3632 dbp += 3;
3633 return;
3635 if (!ISDIGIT (*dbp))
3637 --dbp; /* force failure */
3638 return;
3641 dbp++;
3642 while (ISDIGIT (*dbp));
3645 static void
3646 getit (inf)
3647 FILE *inf;
3649 register char *cp;
3651 dbp = skip_spaces (dbp);
3652 if (*dbp == '\0')
3654 lineno++;
3655 linecharno = charno;
3656 charno += readline (&lb, inf);
3657 dbp = lb.buffer;
3658 if (dbp[5] != '&')
3659 return;
3660 dbp += 6;
3661 dbp = skip_spaces (dbp);
3663 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3664 return;
3665 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3666 continue;
3667 pfnote (savenstr (dbp, cp-dbp), TRUE,
3668 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3672 static void
3673 Fortran_functions (inf)
3674 FILE *inf;
3676 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3678 if (*dbp == '%')
3679 dbp++; /* Ratfor escape to fortran */
3680 dbp = skip_spaces (dbp);
3681 if (*dbp == '\0')
3682 continue;
3683 switch (lowcase (*dbp))
3685 case 'i':
3686 if (tail ("integer"))
3687 takeprec ();
3688 break;
3689 case 'r':
3690 if (tail ("real"))
3691 takeprec ();
3692 break;
3693 case 'l':
3694 if (tail ("logical"))
3695 takeprec ();
3696 break;
3697 case 'c':
3698 if (tail ("complex") || tail ("character"))
3699 takeprec ();
3700 break;
3701 case 'd':
3702 if (tail ("double"))
3704 dbp = skip_spaces (dbp);
3705 if (*dbp == '\0')
3706 continue;
3707 if (tail ("precision"))
3708 break;
3709 continue;
3711 break;
3713 dbp = skip_spaces (dbp);
3714 if (*dbp == '\0')
3715 continue;
3716 switch (lowcase (*dbp))
3718 case 'f':
3719 if (tail ("function"))
3720 getit (inf);
3721 continue;
3722 case 's':
3723 if (tail ("subroutine"))
3724 getit (inf);
3725 continue;
3726 case 'e':
3727 if (tail ("entry"))
3728 getit (inf);
3729 continue;
3730 case 'b':
3731 if (tail ("blockdata") || tail ("block data"))
3733 dbp = skip_spaces (dbp);
3734 if (*dbp == '\0') /* assume un-named */
3735 pfnote (savestr ("blockdata"), TRUE,
3736 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3737 else
3738 getit (inf); /* look for name */
3740 continue;
3747 * Ada parsing
3748 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3751 static void adagetit P_((FILE *, char *));
3753 /* Once we are positioned after an "interesting" keyword, let's get
3754 the real tag value necessary. */
3755 static void
3756 adagetit (inf, name_qualifier)
3757 FILE *inf;
3758 char *name_qualifier;
3760 register char *cp;
3761 char *name;
3762 char c;
3764 while (!feof (inf))
3766 dbp = skip_spaces (dbp);
3767 if (*dbp == '\0'
3768 || (dbp[0] == '-' && dbp[1] == '-'))
3770 lineno++;
3771 linecharno = charno;
3772 charno += readline (&lb, inf);
3773 dbp = lb.buffer;
3775 switch (*dbp)
3777 case 'b':
3778 case 'B':
3779 if (tail ("body"))
3781 /* Skipping body of procedure body or package body or ....
3782 resetting qualifier to body instead of spec. */
3783 name_qualifier = "/b";
3784 continue;
3786 break;
3787 case 't':
3788 case 'T':
3789 /* Skipping type of task type or protected type ... */
3790 if (tail ("type"))
3791 continue;
3792 break;
3794 if (*dbp == '"')
3796 dbp += 1;
3797 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3798 continue;
3800 else
3802 dbp = skip_spaces (dbp);
3803 for (cp = dbp;
3804 (*cp != '\0'
3805 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
3806 cp++)
3807 continue;
3808 if (cp == dbp)
3809 return;
3811 c = *cp;
3812 *cp = '\0';
3813 name = concat (dbp, name_qualifier, "");
3814 *cp = c;
3815 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3816 if (c == '"')
3817 dbp = cp + 1;
3818 return;
3822 static void
3823 Ada_funcs (inf)
3824 FILE *inf;
3826 bool inquote = FALSE;
3828 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3830 while (*dbp != '\0')
3832 /* Skip a string i.e. "abcd". */
3833 if (inquote || (*dbp == '"'))
3835 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3836 if (dbp != NULL)
3838 inquote = FALSE;
3839 dbp += 1;
3840 continue; /* advance char */
3842 else
3844 inquote = TRUE;
3845 break; /* advance line */
3849 /* Skip comments. */
3850 if (dbp[0] == '-' && dbp[1] == '-')
3851 break; /* advance line */
3853 /* Skip character enclosed in single quote i.e. 'a'
3854 and skip single quote starting an attribute i.e. 'Image. */
3855 if (*dbp == '\'')
3857 dbp++ ;
3858 if (*dbp != '\0')
3859 dbp++;
3860 continue;
3863 /* Search for beginning of a token. */
3864 if (!begtoken (*dbp))
3866 dbp++;
3867 continue; /* advance char */
3870 /* We are at the beginning of a token. */
3871 switch (*dbp)
3873 case 'f':
3874 case 'F':
3875 if (!packages_only && tail ("function"))
3876 adagetit (inf, "/f");
3877 else
3878 break; /* from switch */
3879 continue; /* advance char */
3880 case 'p':
3881 case 'P':
3882 if (!packages_only && tail ("procedure"))
3883 adagetit (inf, "/p");
3884 else if (tail ("package"))
3885 adagetit (inf, "/s");
3886 else if (tail ("protected")) /* protected type */
3887 adagetit (inf, "/t");
3888 else
3889 break; /* from switch */
3890 continue; /* advance char */
3891 case 't':
3892 case 'T':
3893 if (!packages_only && tail ("task"))
3894 adagetit (inf, "/k");
3895 else if (typedefs && !packages_only && tail ("type"))
3897 adagetit (inf, "/t");
3898 while (*dbp != '\0')
3899 dbp += 1;
3901 else
3902 break; /* from switch */
3903 continue; /* advance char */
3906 /* Look for the end of the token. */
3907 while (!endtoken (*dbp))
3908 dbp++;
3910 } /* advance char */
3911 } /* advance line */
3916 * Bob Weiner, Motorola Inc., 4/3/94
3917 * Unix and microcontroller assembly tag handling
3918 * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
3920 static void
3921 Asm_labels (inf)
3922 FILE *inf;
3924 register char *cp;
3926 LOOP_ON_INPUT_LINES (inf, lb, cp)
3928 /* If first char is alphabetic or one of [_.$], test for colon
3929 following identifier. */
3930 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3932 /* Read past label. */
3933 cp++;
3934 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3935 cp++;
3936 if (*cp == ':' || iswhite (*cp))
3938 /* Found end of label, so copy it and add it to the table. */
3939 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3940 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3948 * Perl support
3949 * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
3950 * Perl variable names: /^(my|local).../
3951 * Bart Robinson <lomew@cs.utah.edu> (1995)
3952 * Michael Ernst <mernst@alum.mit.edu> (1997)
3954 static void
3955 Perl_functions (inf)
3956 FILE *inf;
3958 register char *cp;
3960 LOOP_ON_INPUT_LINES (inf, lb, cp)
3962 if (*cp++ == 's'
3963 && *cp++ == 'u'
3964 && *cp++ == 'b' && iswhite (*cp++))
3966 cp = skip_spaces (cp);
3967 if (*cp != '\0')
3969 char *sp = cp;
3970 while (*cp != '\0'
3971 && !iswhite (*cp) && *cp != '{' && *cp != '(')
3972 cp++;
3973 pfnote (savenstr (sp, cp-sp), TRUE,
3974 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3977 else if (globals /* only if tagging global vars is enabled */
3978 && ((cp = lb.buffer,
3979 *cp++ == 'm'
3980 && *cp++ == 'y')
3981 || (cp = lb.buffer,
3982 *cp++ == 'l'
3983 && *cp++ == 'o'
3984 && *cp++ == 'c'
3985 && *cp++ == 'a'
3986 && *cp++ == 'l'))
3987 && (*cp == '(' || iswhite (*cp)))
3989 /* After "my" or "local", but before any following paren or space. */
3990 char *varname = NULL;
3992 cp = skip_spaces (cp);
3993 if (*cp == '$' || *cp == '@' || *cp == '%')
3995 char* varstart = ++cp;
3996 while (ISALNUM (*cp) || *cp == '_')
3997 cp++;
3998 varname = savenstr (varstart, cp-varstart);
4000 else
4002 /* Should be examining a variable list at this point;
4003 could insist on seeing an open parenthesis. */
4004 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4005 cp++;
4008 /* Perhaps I should back cp up one character, so the TAGS table
4009 doesn't mention (and so depend upon) the following char. */
4010 pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
4011 FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4018 * Python support
4019 * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4020 * Eric S. Raymond <esr@thyrsus.com> (1997)
4022 static void
4023 Python_functions (inf)
4024 FILE *inf;
4026 register char *cp;
4028 LOOP_ON_INPUT_LINES (inf, lb, cp)
4030 if (*cp++ == 'd'
4031 && *cp++ == 'e'
4032 && *cp++ == 'f' && iswhite (*cp++))
4034 cp = skip_spaces (cp);
4035 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
4036 cp++;
4037 pfnote (NULL, TRUE,
4038 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4041 cp = lb.buffer;
4042 if (*cp++ == 'c'
4043 && *cp++ == 'l'
4044 && *cp++ == 'a'
4045 && *cp++ == 's'
4046 && *cp++ == 's' && iswhite (*cp++))
4048 cp = skip_spaces (cp);
4049 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
4050 cp++;
4051 pfnote (NULL, TRUE,
4052 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4058 /* Idea by Corny de Souza
4059 * Cobol tag functions
4060 * We could look for anything that could be a paragraph name.
4061 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4063 static void
4064 Cobol_paragraphs (inf)
4065 FILE *inf;
4067 register char *bp, *ep;
4069 LOOP_ON_INPUT_LINES (inf, lb, bp)
4071 if (lb.len < 9)
4072 continue;
4073 bp += 8;
4075 /* If eoln, compiler option or comment ignore whole line. */
4076 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4077 continue;
4079 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4080 continue;
4081 if (*ep++ == '.')
4082 pfnote (savenstr (bp, ep-bp), TRUE,
4083 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4089 * Makefile support
4090 * Idea by Assar Westerlund <assar@sics.se> (2001)
4092 static void
4093 Makefile_targets (inf)
4094 FILE *inf;
4096 register char *bp;
4098 LOOP_ON_INPUT_LINES (inf, lb, bp)
4100 if (*bp == '\t' || *bp == '#')
4101 continue;
4102 while (*bp != '\0' && *bp != '=' && *bp != ':')
4103 bp++;
4104 if (*bp == ':')
4105 pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4106 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4111 /* Added by Mosur Mohan, 4/22/88 */
4112 /* Pascal parsing */
4115 * Locates tags for procedures & functions. Doesn't do any type- or
4116 * var-definitions. It does look for the keyword "extern" or
4117 * "forward" immediately following the procedure statement; if found,
4118 * the tag is skipped.
4120 static void
4121 Pascal_functions (inf)
4122 FILE *inf;
4124 linebuffer tline; /* mostly copied from C_entries */
4125 long save_lcno;
4126 int save_lineno, save_len;
4127 char c, *cp, *namebuf;
4129 bool /* each of these flags is TRUE iff: */
4130 incomment, /* point is inside a comment */
4131 inquote, /* point is inside '..' string */
4132 get_tagname, /* point is after PROCEDURE/FUNCTION
4133 keyword, so next item = potential tag */
4134 found_tag, /* point is after a potential tag */
4135 inparms, /* point is within parameter-list */
4136 verify_tag; /* point has passed the parm-list, so the
4137 next token will determine whether this
4138 is a FORWARD/EXTERN to be ignored, or
4139 whether it is a real tag */
4141 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4142 namebuf = NULL; /* keep compiler quiet */
4143 lineno = 0;
4144 charno = 0;
4145 dbp = lb.buffer;
4146 *dbp = '\0';
4147 initbuffer (&tline);
4149 incomment = inquote = FALSE;
4150 found_tag = FALSE; /* have a proc name; check if extern */
4151 get_tagname = FALSE; /* have found "procedure" keyword */
4152 inparms = FALSE; /* found '(' after "proc" */
4153 verify_tag = FALSE; /* check if "extern" is ahead */
4156 while (!feof (inf)) /* long main loop to get next char */
4158 c = *dbp++;
4159 if (c == '\0') /* if end of line */
4161 lineno++;
4162 linecharno = charno;
4163 charno += readline (&lb, inf);
4164 dbp = lb.buffer;
4165 if (*dbp == '\0')
4166 continue;
4167 if (!((found_tag && verify_tag)
4168 || get_tagname))
4169 c = *dbp++; /* only if don't need *dbp pointing
4170 to the beginning of the name of
4171 the procedure or function */
4173 if (incomment)
4175 if (c == '}') /* within { } comments */
4176 incomment = FALSE;
4177 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4179 dbp++;
4180 incomment = FALSE;
4182 continue;
4184 else if (inquote)
4186 if (c == '\'')
4187 inquote = FALSE;
4188 continue;
4190 else
4191 switch (c)
4193 case '\'':
4194 inquote = TRUE; /* found first quote */
4195 continue;
4196 case '{': /* found open { comment */
4197 incomment = TRUE;
4198 continue;
4199 case '(':
4200 if (*dbp == '*') /* found open (* comment */
4202 incomment = TRUE;
4203 dbp++;
4205 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4206 inparms = TRUE;
4207 continue;
4208 case ')': /* end of parms list */
4209 if (inparms)
4210 inparms = FALSE;
4211 continue;
4212 case ';':
4213 if (found_tag && !inparms) /* end of proc or fn stmt */
4215 verify_tag = TRUE;
4216 break;
4218 continue;
4220 if (found_tag && verify_tag && (*dbp != ' '))
4222 /* check if this is an "extern" declaration */
4223 if (*dbp == '\0')
4224 continue;
4225 if (lowcase (*dbp == 'e'))
4227 if (tail ("extern")) /* superfluous, really! */
4229 found_tag = FALSE;
4230 verify_tag = FALSE;
4233 else if (lowcase (*dbp) == 'f')
4235 if (tail ("forward")) /* check for forward reference */
4237 found_tag = FALSE;
4238 verify_tag = FALSE;
4241 if (found_tag && verify_tag) /* not external proc, so make tag */
4243 found_tag = FALSE;
4244 verify_tag = FALSE;
4245 pfnote (namebuf, TRUE,
4246 tline.buffer, save_len, save_lineno, save_lcno);
4247 continue;
4250 if (get_tagname) /* grab name of proc or fn */
4252 if (*dbp == '\0')
4253 continue;
4255 /* save all values for later tagging */
4256 linebuffer_setlen (&tline, lb.len);
4257 strcpy (tline.buffer, lb.buffer);
4258 save_lineno = lineno;
4259 save_lcno = linecharno;
4261 /* grab block name */
4262 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4263 continue;
4264 namebuf = savenstr (dbp, cp-dbp);
4265 dbp = cp; /* set dbp to e-o-token */
4266 save_len = dbp - lb.buffer + 1;
4267 get_tagname = FALSE;
4268 found_tag = TRUE;
4269 continue;
4271 /* and proceed to check for "extern" */
4273 else if (!incomment && !inquote && !found_tag)
4275 /* check for proc/fn keywords */
4276 switch (lowcase (c))
4278 case 'p':
4279 if (tail ("rocedure")) /* c = 'p', dbp has advanced */
4280 get_tagname = TRUE;
4281 continue;
4282 case 'f':
4283 if (tail ("unction"))
4284 get_tagname = TRUE;
4285 continue;
4288 } /* while not eof */
4290 free (tline.buffer);
4295 * Lisp tag functions
4296 * look for (def or (DEF, quote or QUOTE
4299 static int L_isdef P_((char *));
4300 static int L_isquote P_((char *));
4301 static void L_getit P_((void));
4303 static int
4304 L_isdef (strp)
4305 register char *strp;
4307 return ((strp[1] == 'd' || strp[1] == 'D')
4308 && (strp[2] == 'e' || strp[2] == 'E')
4309 && (strp[3] == 'f' || strp[3] == 'F'));
4312 static int
4313 L_isquote (strp)
4314 register char *strp;
4316 return ((*++strp == 'q' || *strp == 'Q')
4317 && (*++strp == 'u' || *strp == 'U')
4318 && (*++strp == 'o' || *strp == 'O')
4319 && (*++strp == 't' || *strp == 'T')
4320 && (*++strp == 'e' || *strp == 'E')
4321 && iswhite (*++strp));
4324 static void
4325 L_getit ()
4327 register char *cp;
4329 if (*dbp == '\'') /* Skip prefix quote */
4330 dbp++;
4331 else if (*dbp == '(')
4333 if (L_isquote (dbp))
4334 dbp += 7; /* Skip "(quote " */
4335 else
4336 dbp += 1; /* Skip "(" before name in (defstruct (foo)) */
4337 dbp = skip_spaces (dbp);
4340 for (cp = dbp /*+1*/;
4341 *cp != '\0' && *cp != '(' && !iswhite(*cp) && *cp != ')';
4342 cp++)
4343 continue;
4344 if (cp == dbp)
4345 return;
4347 pfnote (savenstr (dbp, cp-dbp), TRUE,
4348 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4351 static void
4352 Lisp_functions (inf)
4353 FILE *inf;
4355 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4357 if (dbp[0] == '(')
4359 if (L_isdef (dbp))
4361 dbp = skip_non_spaces (dbp);
4362 dbp = skip_spaces (dbp);
4363 L_getit ();
4365 else
4367 /* Check for (foo::defmumble name-defined ... */
4369 dbp++;
4370 while (*dbp != '\0' && !iswhite (*dbp)
4371 && *dbp != ':' && *dbp != '(' && *dbp != ')');
4372 if (*dbp == ':')
4375 dbp++;
4376 while (*dbp == ':');
4378 if (L_isdef (dbp - 1))
4380 dbp = skip_non_spaces (dbp);
4381 dbp = skip_spaces (dbp);
4382 L_getit ();
4392 * Postscript tag functions
4393 * Just look for lines where the first character is '/'
4394 * Also look at "defineps" for PSWrap
4395 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4396 * Ideas by Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4398 static void
4399 Postscript_functions (inf)
4400 FILE *inf;
4402 register char *bp, *ep;
4404 LOOP_ON_INPUT_LINES (inf, lb, bp)
4406 if (bp[0] == '/')
4408 for (ep = bp+1;
4409 *ep != '\0' && *ep != ' ' && *ep != '{';
4410 ep++)
4411 continue;
4412 pfnote (savenstr (bp, ep-bp), TRUE,
4413 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4415 else if (strneq (bp, "defineps", 8))
4417 bp = skip_non_spaces (bp);
4418 bp = skip_spaces (bp);
4419 get_tag (bp);
4426 * Scheme tag functions
4427 * look for (def... xyzzy
4428 * look for (def... (xyzzy
4429 * look for (def ... ((...(xyzzy ....
4430 * look for (set! xyzzy
4433 static void
4434 Scheme_functions (inf)
4435 FILE *inf;
4437 register char *bp;
4439 LOOP_ON_INPUT_LINES (inf, lb, bp)
4441 if (bp[0] == '('
4442 && (bp[1] == 'D' || bp[1] == 'd')
4443 && (bp[2] == 'E' || bp[2] == 'e')
4444 && (bp[3] == 'F' || bp[3] == 'f'))
4446 bp = skip_non_spaces (bp);
4447 /* Skip over open parens and white space */
4448 while (iswhite (*bp) || *bp == '(')
4449 bp++;
4450 get_tag (bp);
4452 if (bp[0] == '('
4453 && (bp[1] == 'S' || bp[1] == 's')
4454 && (bp[2] == 'E' || bp[2] == 'e')
4455 && (bp[3] == 'T' || bp[3] == 't')
4456 && (bp[4] == '!' || bp[4] == '!')
4457 && (iswhite (bp[5])))
4459 bp = skip_non_spaces (bp);
4460 bp = skip_spaces (bp);
4461 get_tag (bp);
4467 /* Find tags in TeX and LaTeX input files. */
4469 /* TEX_toktab is a table of TeX control sequences that define tags.
4470 Each TEX_tabent records one such control sequence.
4471 CONVERT THIS TO USE THE Stab TYPE!! */
4472 struct TEX_tabent
4474 char *name;
4475 int len;
4478 struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
4480 /* Default set of control sequences to put into TEX_toktab.
4481 The value of environment var TEXTAGS is prepended to this. */
4483 char *TEX_defenv = "\
4484 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4485 :part:appendix:entry:index";
4487 static void TEX_mode P_((FILE *));
4488 static struct TEX_tabent *TEX_decode_env P_((char *, char *));
4489 static int TEX_Token P_((char *));
4491 char TEX_esc = '\\';
4492 char TEX_opgrp = '{';
4493 char TEX_clgrp = '}';
4496 * TeX/LaTeX scanning loop.
4498 static void
4499 TeX_commands (inf)
4500 FILE *inf;
4502 char *cp, *lasthit;
4503 register int i;
4505 /* Select either \ or ! as escape character. */
4506 TEX_mode (inf);
4508 /* Initialize token table once from environment. */
4509 if (!TEX_toktab)
4510 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4512 LOOP_ON_INPUT_LINES (inf, lb, cp)
4514 lasthit = cp;
4515 /* Look at each esc in line. */
4516 while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4518 if (*++cp == '\0')
4519 break;
4520 linecharno += cp - lasthit;
4521 lasthit = cp;
4522 i = TEX_Token (lasthit);
4523 if (i >= 0)
4525 /* We seem to include the TeX command in the tag name.
4526 register char *p;
4527 for (p = lasthit + TEX_toktab[i].len;
4528 *p != '\0' && *p != TEX_clgrp;
4529 p++)
4530 continue; */
4531 pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
4532 lb.buffer, lb.len, lineno, linecharno);
4533 break; /* We only tag a line once */
4539 #define TEX_LESC '\\'
4540 #define TEX_SESC '!'
4541 #define TEX_cmt '%'
4543 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4544 chars accordingly. */
4545 static void
4546 TEX_mode (inf)
4547 FILE *inf;
4549 int c;
4551 while ((c = getc (inf)) != EOF)
4553 /* Skip to next line if we hit the TeX comment char. */
4554 if (c == TEX_cmt)
4555 while (c != '\n')
4556 c = getc (inf);
4557 else if (c == TEX_LESC || c == TEX_SESC )
4558 break;
4561 if (c == TEX_LESC)
4563 TEX_esc = TEX_LESC;
4564 TEX_opgrp = '{';
4565 TEX_clgrp = '}';
4567 else
4569 TEX_esc = TEX_SESC;
4570 TEX_opgrp = '<';
4571 TEX_clgrp = '>';
4573 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4574 No attempt is made to correct the situation. */
4575 rewind (inf);
4578 /* Read environment and prepend it to the default string.
4579 Build token table. */
4580 static struct TEX_tabent *
4581 TEX_decode_env (evarname, defenv)
4582 char *evarname;
4583 char *defenv;
4585 register char *env, *p;
4587 struct TEX_tabent *tab;
4588 int size, i;
4590 /* Append default string to environment. */
4591 env = getenv (evarname);
4592 if (!env)
4593 env = defenv;
4594 else
4596 char *oldenv = env;
4597 env = concat (oldenv, defenv, "");
4600 /* Allocate a token table */
4601 for (size = 1, p = env; p;)
4602 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4603 size++;
4604 /* Add 1 to leave room for null terminator. */
4605 tab = xnew (size + 1, struct TEX_tabent);
4607 /* Unpack environment string into token table. Be careful about */
4608 /* zero-length strings (leading ':', "::" and trailing ':') */
4609 for (i = 0; *env;)
4611 p = etags_strchr (env, ':');
4612 if (!p) /* End of environment string. */
4613 p = env + strlen (env);
4614 if (p - env > 0)
4615 { /* Only non-zero strings. */
4616 tab[i].name = savenstr (env, p - env);
4617 tab[i].len = strlen (tab[i].name);
4618 i++;
4620 if (*p)
4621 env = p + 1;
4622 else
4624 tab[i].name = NULL; /* Mark end of table. */
4625 tab[i].len = 0;
4626 break;
4629 return tab;
4632 /* If the text at CP matches one of the tag-defining TeX command names,
4633 return the pointer to the first occurrence of that command in TEX_toktab.
4634 Otherwise return -1.
4635 Keep the capital `T' in `token' for dumb truncating compilers
4636 (this distinguishes it from `TEX_toktab' */
4637 static int
4638 TEX_Token (cp)
4639 char *cp;
4641 int i;
4643 for (i = 0; TEX_toktab[i].len > 0; i++)
4644 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4645 return i;
4646 return -1;
4650 /* Texinfo support. Dave Love, Mar. 2000. */
4651 static void
4652 Texinfo_nodes (inf)
4653 FILE * inf;
4655 char *cp, *start;
4656 LOOP_ON_INPUT_LINES (inf, lb, cp)
4658 if ((*cp++ == '@'
4659 && *cp++ == 'n'
4660 && *cp++ == 'o'
4661 && *cp++ == 'd'
4662 && *cp++ == 'e' && iswhite (*cp++)))
4664 start = cp = skip_spaces(cp);
4665 while (*cp != '\0' && *cp != ',')
4666 cp++;
4667 pfnote (savenstr (start, cp - start), TRUE,
4668 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4675 * Prolog support (rewritten) by Anders Lindgren, Mar. 96
4677 * Assumes that the predicate starts at column 0.
4678 * Only the first clause of a predicate is added.
4680 static int prolog_pred P_((char *, char *));
4681 static void prolog_skip_comment P_((linebuffer *, FILE *));
4682 static int prolog_atom P_((char *, int));
4684 static void
4685 Prolog_functions (inf)
4686 FILE *inf;
4688 char *cp, *last;
4689 int len;
4690 int allocated;
4692 allocated = 0;
4693 len = 0;
4694 last = NULL;
4696 LOOP_ON_INPUT_LINES (inf, lb, cp)
4698 if (cp[0] == '\0') /* Empty line */
4699 continue;
4700 else if (iswhite (cp[0])) /* Not a predicate */
4701 continue;
4702 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4703 prolog_skip_comment (&lb, inf);
4704 else if ((len = prolog_pred (cp, last)) > 0)
4706 /* Predicate. Store the function name so that we only
4707 generate a tag for the first clause. */
4708 if (last == NULL)
4709 last = xnew(len + 1, char);
4710 else if (len + 1 > allocated)
4711 xrnew (last, len + 1, char);
4712 allocated = len + 1;
4713 strncpy (last, cp, len);
4714 last[len] = '\0';
4720 static void
4721 prolog_skip_comment (plb, inf)
4722 linebuffer *plb;
4723 FILE *inf;
4725 char *cp;
4729 for (cp = plb->buffer; *cp != '\0'; cp++)
4730 if (cp[0] == '*' && cp[1] == '/')
4731 return;
4732 lineno++;
4733 linecharno += readline (plb, inf);
4735 while (!feof(inf));
4739 * A predicate definition is added if it matches:
4740 * <beginning of line><Prolog Atom><whitespace>(
4742 * It is added to the tags database if it doesn't match the
4743 * name of the previous clause header.
4745 * Return the size of the name of the predicate, or 0 if no header
4746 * was found.
4748 static int
4749 prolog_pred (s, last)
4750 char *s;
4751 char *last; /* Name of last clause. */
4753 int pos;
4754 int len;
4756 pos = prolog_atom (s, 0);
4757 if (pos < 1)
4758 return 0;
4760 len = pos;
4761 pos = skip_spaces (s + pos) - s;
4763 if ((s[pos] == '(') || (s[pos] == '.'))
4765 if (s[pos] == '(')
4766 pos++;
4768 /* Save only the first clause. */
4769 if (last == NULL
4770 || len != (int)strlen (last)
4771 || !strneq (s, last, len))
4773 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4774 return len;
4777 return 0;
4781 * Consume a Prolog atom.
4782 * Return the number of bytes consumed, or -1 if there was an error.
4784 * A prolog atom, in this context, could be one of:
4785 * - An alphanumeric sequence, starting with a lower case letter.
4786 * - A quoted arbitrary string. Single quotes can escape themselves.
4787 * Backslash quotes everything.
4789 static int
4790 prolog_atom (s, pos)
4791 char *s;
4792 int pos;
4794 int origpos;
4796 origpos = pos;
4798 if (ISLOWER(s[pos]) || (s[pos] == '_'))
4800 /* The atom is unquoted. */
4801 pos++;
4802 while (ISALNUM(s[pos]) || (s[pos] == '_'))
4804 pos++;
4806 return pos - origpos;
4808 else if (s[pos] == '\'')
4810 pos++;
4812 while (1)
4814 if (s[pos] == '\'')
4816 pos++;
4817 if (s[pos] != '\'')
4818 break;
4819 pos++; /* A double quote */
4821 else if (s[pos] == '\0')
4822 /* Multiline quoted atoms are ignored. */
4823 return -1;
4824 else if (s[pos] == '\\')
4826 if (s[pos+1] == '\0')
4827 return -1;
4828 pos += 2;
4830 else
4831 pos++;
4833 return pos - origpos;
4835 else
4836 return -1;
4841 * Support for Erlang -- Anders Lindgren, Feb 1996.
4843 * Generates tags for functions, defines, and records.
4845 * Assumes that Erlang functions start at column 0.
4847 static int erlang_func P_((char *, char *));
4848 static void erlang_attribute P_((char *));
4849 static int erlang_atom P_((char *, int));
4851 static void
4852 Erlang_functions (inf)
4853 FILE *inf;
4855 char *cp, *last;
4856 int len;
4857 int allocated;
4859 allocated = 0;
4860 len = 0;
4861 last = NULL;
4863 LOOP_ON_INPUT_LINES (inf, lb, cp)
4865 if (cp[0] == '\0') /* Empty line */
4866 continue;
4867 else if (iswhite (cp[0])) /* Not function nor attribute */
4868 continue;
4869 else if (cp[0] == '%') /* comment */
4870 continue;
4871 else if (cp[0] == '"') /* Sometimes, strings start in column one */
4872 continue;
4873 else if (cp[0] == '-') /* attribute, e.g. "-define" */
4875 erlang_attribute (cp);
4876 last = NULL;
4878 else if ((len = erlang_func (cp, last)) > 0)
4881 * Function. Store the function name so that we only
4882 * generates a tag for the first clause.
4884 if (last == NULL)
4885 last = xnew (len + 1, char);
4886 else if (len + 1 > allocated)
4887 xrnew (last, len + 1, char);
4888 allocated = len + 1;
4889 strncpy (last, cp, len);
4890 last[len] = '\0';
4897 * A function definition is added if it matches:
4898 * <beginning of line><Erlang Atom><whitespace>(
4900 * It is added to the tags database if it doesn't match the
4901 * name of the previous clause header.
4903 * Return the size of the name of the function, or 0 if no function
4904 * was found.
4906 static int
4907 erlang_func (s, last)
4908 char *s;
4909 char *last; /* Name of last clause. */
4911 int pos;
4912 int len;
4914 pos = erlang_atom (s, 0);
4915 if (pos < 1)
4916 return 0;
4918 len = pos;
4919 pos = skip_spaces (s + pos) - s;
4921 /* Save only the first clause. */
4922 if (s[pos++] == '('
4923 && (last == NULL
4924 || len != (int)strlen (last)
4925 || !strneq (s, last, len)))
4927 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4928 return len;
4931 return 0;
4936 * Handle attributes. Currently, tags are generated for defines
4937 * and records.
4939 * They are on the form:
4940 * -define(foo, bar).
4941 * -define(Foo(M, N), M+N).
4942 * -record(graph, {vtab = notable, cyclic = true}).
4944 static void
4945 erlang_attribute (s)
4946 char *s;
4948 int pos;
4949 int len;
4951 if (strneq (s, "-define", 7) || strneq (s, "-record", 7))
4953 pos = skip_spaces (s + 7) - s;
4954 if (s[pos++] == '(')
4956 pos = skip_spaces (s + pos) - s;
4957 len = erlang_atom (s, pos);
4958 if (len != 0)
4959 pfnote (savenstr (& s[pos], len), TRUE,
4960 s, pos + len, lineno, linecharno);
4963 return;
4968 * Consume an Erlang atom (or variable).
4969 * Return the number of bytes consumed, or -1 if there was an error.
4971 static int
4972 erlang_atom (s, pos)
4973 char *s;
4974 int pos;
4976 int origpos;
4978 origpos = pos;
4980 if (ISALPHA (s[pos]) || s[pos] == '_')
4982 /* The atom is unquoted. */
4983 pos++;
4984 while (ISALNUM (s[pos]) || s[pos] == '_')
4985 pos++;
4986 return pos - origpos;
4988 else if (s[pos] == '\'')
4990 pos++;
4992 while (1)
4994 if (s[pos] == '\'')
4996 pos++;
4997 break;
4999 else if (s[pos] == '\0')
5000 /* Multiline quoted atoms are ignored. */
5001 return -1;
5002 else if (s[pos] == '\\')
5004 if (s[pos+1] == '\0')
5005 return -1;
5006 pos += 2;
5008 else
5009 pos++;
5011 return pos - origpos;
5013 else
5014 return -1;
5018 #ifdef ETAGS_REGEXPS
5020 static char *scan_separators P_((char *));
5021 static void analyse_regex P_((char *, bool));
5022 static void add_regex P_((char *, bool, language *));
5023 static char *substitute P_((char *, char *, struct re_registers *));
5025 /* Take a string like "/blah/" and turn it into "blah", making sure
5026 that the first and last characters are the same, and handling
5027 quoted separator characters. Actually, stops on the occurrence of
5028 an unquoted separator. Also turns "\t" into a Tab character.
5029 Returns pointer to terminating separator. Works in place. Null
5030 terminates name string. */
5031 static char *
5032 scan_separators (name)
5033 char *name;
5035 char sep = name[0];
5036 char *copyto = name;
5037 bool quoted = FALSE;
5039 for (++name; *name != '\0'; ++name)
5041 if (quoted)
5043 if (*name == 't')
5044 *copyto++ = '\t';
5045 else if (*name == sep)
5046 *copyto++ = sep;
5047 else
5049 /* Something else is quoted, so preserve the quote. */
5050 *copyto++ = '\\';
5051 *copyto++ = *name;
5053 quoted = FALSE;
5055 else if (*name == '\\')
5056 quoted = TRUE;
5057 else if (*name == sep)
5058 break;
5059 else
5060 *copyto++ = *name;
5063 /* Terminate copied string. */
5064 *copyto = '\0';
5065 return name;
5068 /* Look at the argument of --regex or --no-regex and do the right
5069 thing. Same for each line of a regexp file. */
5070 static void
5071 analyse_regex (regex_arg, ignore_case)
5072 char *regex_arg;
5073 bool ignore_case;
5075 if (regex_arg == NULL)
5076 free_patterns (); /* --no-regex: remove existing regexps */
5078 /* A real --regexp option or a line in a regexp file. */
5079 switch (regex_arg[0])
5081 /* Comments in regexp file or null arg to --regex. */
5082 case '\0':
5083 case ' ':
5084 case '\t':
5085 break;
5087 /* Read a regex file. This is recursive and may result in a
5088 loop, which will stop when the file descriptors are exhausted. */
5089 case '@':
5091 FILE *regexfp;
5092 linebuffer regexbuf;
5093 char *regexfile = regex_arg + 1;
5095 /* regexfile is a file containing regexps, one per line. */
5096 regexfp = fopen (regexfile, "r");
5097 if (regexfp == NULL)
5099 pfatal (regexfile);
5100 return;
5102 initbuffer (&regexbuf);
5103 while (readline_internal (&regexbuf, regexfp) > 0)
5104 analyse_regex (regexbuf.buffer, ignore_case);
5105 free (regexbuf.buffer);
5106 fclose (regexfp);
5108 break;
5110 /* Regexp to be used for a specific language only. */
5111 case '{':
5113 language *lang;
5114 char *lang_name = regex_arg + 1;
5115 char *cp;
5117 for (cp = lang_name; *cp != '}'; cp++)
5118 if (*cp == '\0')
5120 error ("unterminated language name in regex: %s", regex_arg);
5121 return;
5123 *cp = '\0';
5124 lang = get_language_from_langname (lang_name);
5125 if (lang == NULL)
5126 return;
5127 add_regex (cp + 1, ignore_case, lang);
5129 break;
5131 /* Regexp to be used for any language. */
5132 default:
5133 add_regex (regex_arg, ignore_case, NULL);
5134 break;
5138 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5139 expression, into a real regular expression by compiling it. */
5140 static void
5141 add_regex (regexp_pattern, ignore_case, lang)
5142 char *regexp_pattern;
5143 bool ignore_case;
5144 language *lang;
5146 char *name;
5147 const char *err;
5148 struct re_pattern_buffer *patbuf;
5149 pattern *pp;
5152 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
5154 error ("%s: unterminated regexp", regexp_pattern);
5155 return;
5157 name = scan_separators (regexp_pattern);
5158 if (regexp_pattern[0] == '\0')
5160 error ("null regexp", (char *)NULL);
5161 return;
5163 (void) scan_separators (name);
5165 patbuf = xnew (1, struct re_pattern_buffer);
5166 /* Translation table to fold case if appropriate. */
5167 patbuf->translate = (ignore_case) ? lc_trans : NULL;
5168 patbuf->fastmap = NULL;
5169 patbuf->buffer = NULL;
5170 patbuf->allocated = 0;
5172 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
5173 if (err != NULL)
5175 error ("%s while compiling pattern", err);
5176 return;
5179 pp = p_head;
5180 p_head = xnew (1, pattern);
5181 p_head->regex = savestr (regexp_pattern);
5182 p_head->p_next = pp;
5183 p_head->language = lang;
5184 p_head->pattern = patbuf;
5185 p_head->name_pattern = savestr (name);
5186 p_head->error_signaled = FALSE;
5190 * Do the substitutions indicated by the regular expression and
5191 * arguments.
5193 static char *
5194 substitute (in, out, regs)
5195 char *in, *out;
5196 struct re_registers *regs;
5198 char *result, *t;
5199 int size, dig, diglen;
5201 result = NULL;
5202 size = strlen (out);
5204 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5205 if (out[size - 1] == '\\')
5206 fatal ("pattern error in \"%s\"", out);
5207 for (t = etags_strchr (out, '\\');
5208 t != NULL;
5209 t = etags_strchr (t + 2, '\\'))
5210 if (ISDIGIT (t[1]))
5212 dig = t[1] - '0';
5213 diglen = regs->end[dig] - regs->start[dig];
5214 size += diglen - 2;
5216 else
5217 size -= 1;
5219 /* Allocate space and do the substitutions. */
5220 result = xnew (size + 1, char);
5222 for (t = result; *out != '\0'; out++)
5223 if (*out == '\\' && ISDIGIT (*++out))
5225 dig = *out - '0';
5226 diglen = regs->end[dig] - regs->start[dig];
5227 strncpy (t, in + regs->start[dig], diglen);
5228 t += diglen;
5230 else
5231 *t++ = *out;
5232 *t = '\0';
5234 assert (t <= result + size && t - result == (int)strlen (result));
5236 return result;
5239 /* Deallocate all patterns. */
5240 static void
5241 free_patterns ()
5243 pattern *pp;
5244 while (p_head != NULL)
5246 pp = p_head->p_next;
5247 free (p_head->regex);
5248 free (p_head->name_pattern);
5249 free (p_head);
5250 p_head = pp;
5252 return;
5254 #endif /* ETAGS_REGEXPS */
5257 static void
5258 get_tag (bp)
5259 register char *bp;
5261 register char *cp;
5263 if (*bp == '\0')
5264 return;
5265 /* Go till you get to white space or a syntactic break */
5266 for (cp = bp + 1;
5267 *cp != '\0' && *cp != '(' && *cp != ')' && !iswhite (*cp);
5268 cp++)
5269 continue;
5270 pfnote (savenstr (bp, cp-bp), TRUE,
5271 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5274 /* Initialize a linebuffer for use */
5275 static void
5276 initbuffer (lbp)
5277 linebuffer *lbp;
5279 lbp->size = (DEBUG) ? 3 : 200;
5280 lbp->buffer = xnew (lbp->size, char);
5281 lbp->buffer[0] = '\0';
5282 lbp->len = 0;
5286 * Read a line of text from `stream' into `lbp', excluding the
5287 * newline or CR-NL, if any. Return the number of characters read from
5288 * `stream', which is the length of the line including the newline.
5290 * On DOS or Windows we do not count the CR character, if any, before the
5291 * NL, in the returned length; this mirrors the behavior of emacs on those
5292 * platforms (for text files, it translates CR-NL to NL as it reads in the
5293 * file).
5295 static long
5296 readline_internal (lbp, stream)
5297 linebuffer *lbp;
5298 register FILE *stream;
5300 char *buffer = lbp->buffer;
5301 register char *p = lbp->buffer;
5302 register char *pend;
5303 int chars_deleted;
5305 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5307 while (1)
5309 register int c = getc (stream);
5310 if (p == pend)
5312 /* We're at the end of linebuffer: expand it. */
5313 lbp->size *= 2;
5314 xrnew (buffer, lbp->size, char);
5315 p += buffer - lbp->buffer;
5316 pend = buffer + lbp->size;
5317 lbp->buffer = buffer;
5319 if (c == EOF)
5321 *p = '\0';
5322 chars_deleted = 0;
5323 break;
5325 if (c == '\n')
5327 if (p > buffer && p[-1] == '\r')
5329 p -= 1;
5330 #ifdef DOS_NT
5331 /* Assume CRLF->LF translation will be performed by Emacs
5332 when loading this file, so CRs won't appear in the buffer.
5333 It would be cleaner to compensate within Emacs;
5334 however, Emacs does not know how many CRs were deleted
5335 before any given point in the file. */
5336 chars_deleted = 1;
5337 #else
5338 chars_deleted = 2;
5339 #endif
5341 else
5343 chars_deleted = 1;
5345 *p = '\0';
5346 break;
5348 *p++ = c;
5350 lbp->len = p - buffer;
5352 return lbp->len + chars_deleted;
5356 * Like readline_internal, above, but in addition try to match the
5357 * input line against relevant regular expressions.
5359 static long
5360 readline (lbp, stream)
5361 linebuffer *lbp;
5362 FILE *stream;
5364 /* Read new line. */
5365 long result = readline_internal (lbp, stream);
5366 #ifdef ETAGS_REGEXPS
5367 int match;
5368 pattern *pp;
5370 /* Match against relevant patterns. */
5371 if (lbp->len > 0)
5372 for (pp = p_head; pp != NULL; pp = pp->p_next)
5374 /* Only use generic regexps or those for the current language. */
5375 if (pp->language != NULL && pp->language != curlang)
5376 continue;
5378 match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs);
5379 switch (match)
5381 case -2:
5382 /* Some error. */
5383 if (!pp->error_signaled)
5385 error ("error while matching \"%s\"", pp->regex);
5386 pp->error_signaled = TRUE;
5388 break;
5389 case -1:
5390 /* No match. */
5391 break;
5392 default:
5393 /* Match occurred. Construct a tag. */
5394 if (pp->name_pattern[0] != '\0')
5396 /* Make a named tag. */
5397 char *name = substitute (lbp->buffer,
5398 pp->name_pattern, &pp->regs);
5399 if (name != NULL)
5400 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5402 else
5404 /* Make an unnamed tag. */
5405 pfnote ((char *)NULL, TRUE,
5406 lbp->buffer, match, lineno, linecharno);
5408 break;
5411 #endif /* ETAGS_REGEXPS */
5413 return result;
5418 * Return a pointer to a space of size strlen(cp)+1 allocated
5419 * with xnew where the string CP has been copied.
5421 static char *
5422 savestr (cp)
5423 char *cp;
5425 return savenstr (cp, strlen (cp));
5429 * Return a pointer to a space of size LEN+1 allocated with xnew where
5430 * the string CP has been copied for at most the first LEN characters.
5432 static char *
5433 savenstr (cp, len)
5434 char *cp;
5435 int len;
5437 register char *dp;
5439 dp = xnew (len + 1, char);
5440 strncpy (dp, cp, len);
5441 dp[len] = '\0';
5442 return dp;
5446 * Return the ptr in sp at which the character c last
5447 * appears; NULL if not found
5449 * Identical to POSIX strrchr, included for portability.
5451 static char *
5452 etags_strrchr (sp, c)
5453 register const char *sp;
5454 register int c;
5456 register const char *r;
5458 r = NULL;
5461 if (*sp == c)
5462 r = sp;
5463 } while (*sp++);
5464 return (char *)r;
5469 * Return the ptr in sp at which the character c first
5470 * appears; NULL if not found
5472 * Identical to POSIX strchr, included for portability.
5474 static char *
5475 etags_strchr (sp, c)
5476 register const char *sp;
5477 register int c;
5481 if (*sp == c)
5482 return (char *)sp;
5483 } while (*sp++);
5484 return NULL;
5487 /* Skip spaces, return new pointer. */
5488 static char *
5489 skip_spaces (cp)
5490 char *cp;
5492 while (iswhite (*cp))
5493 cp++;
5494 return cp;
5497 /* Skip non spaces, return new pointer. */
5498 static char *
5499 skip_non_spaces (cp)
5500 char *cp;
5502 while (*cp != '\0' && !iswhite (*cp))
5503 cp++;
5504 return cp;
5507 /* Print error message and exit. */
5508 void
5509 fatal (s1, s2)
5510 char *s1, *s2;
5512 error (s1, s2);
5513 exit (BAD);
5516 static void
5517 pfatal (s1)
5518 char *s1;
5520 perror (s1);
5521 exit (BAD);
5524 static void
5525 suggest_asking_for_help ()
5527 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5528 progname,
5529 #ifdef LONG_OPTIONS
5530 "--help"
5531 #else
5532 "-h"
5533 #endif
5535 exit (BAD);
5538 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
5539 static void
5540 error (s1, s2)
5541 const char *s1, *s2;
5543 fprintf (stderr, "%s: ", progname);
5544 fprintf (stderr, s1, s2);
5545 fprintf (stderr, "\n");
5548 /* Return a newly-allocated string whose contents
5549 concatenate those of s1, s2, s3. */
5550 static char *
5551 concat (s1, s2, s3)
5552 char *s1, *s2, *s3;
5554 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5555 char *result = xnew (len1 + len2 + len3 + 1, char);
5557 strcpy (result, s1);
5558 strcpy (result + len1, s2);
5559 strcpy (result + len1 + len2, s3);
5560 result[len1 + len2 + len3] = '\0';
5562 return result;
5566 /* Does the same work as the system V getcwd, but does not need to
5567 guess the buffer size in advance. */
5568 static char *
5569 etags_getcwd ()
5571 #ifdef HAVE_GETCWD
5572 int bufsize = 200;
5573 char *path = xnew (bufsize, char);
5575 while (getcwd (path, bufsize) == NULL)
5577 if (errno != ERANGE)
5578 pfatal ("getcwd");
5579 bufsize *= 2;
5580 free (path);
5581 path = xnew (bufsize, char);
5584 canonicalize_filename (path);
5585 return path;
5587 #else /* not HAVE_GETCWD */
5588 #if MSDOS
5590 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
5592 getwd (path);
5594 for (p = path; *p != '\0'; p++)
5595 if (*p == '\\')
5596 *p = '/';
5597 else
5598 *p = lowcase (*p);
5600 return strdup (path);
5601 #else /* not MSDOS */
5602 linebuffer path;
5603 FILE *pipe;
5605 initbuffer (&path);
5606 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5607 if (pipe == NULL || readline_internal (&path, pipe) == 0)
5608 pfatal ("pwd");
5609 pclose (pipe);
5611 return path.buffer;
5612 #endif /* not MSDOS */
5613 #endif /* not HAVE_GETCWD */
5616 /* Return a newly allocated string containing the file name of FILE
5617 relative to the absolute directory DIR (which should end with a slash). */
5618 static char *
5619 relative_filename (file, dir)
5620 char *file, *dir;
5622 char *fp, *dp, *afn, *res;
5623 int i;
5625 /* Find the common root of file and dir (with a trailing slash). */
5626 afn = absolute_filename (file, cwd);
5627 fp = afn;
5628 dp = dir;
5629 while (*fp++ == *dp++)
5630 continue;
5631 fp--, dp--; /* back to the first differing char */
5632 #ifdef DOS_NT
5633 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5634 return afn;
5635 #endif
5636 do /* look at the equal chars until '/' */
5637 fp--, dp--;
5638 while (*fp != '/');
5640 /* Build a sequence of "../" strings for the resulting relative file name. */
5641 i = 0;
5642 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5643 i += 1;
5644 res = xnew (3*i + strlen (fp + 1) + 1, char);
5645 res[0] = '\0';
5646 while (i-- > 0)
5647 strcat (res, "../");
5649 /* Add the file name relative to the common root of file and dir. */
5650 strcat (res, fp + 1);
5651 free (afn);
5653 return res;
5656 /* Return a newly allocated string containing the absolute file name
5657 of FILE given DIR (which should end with a slash). */
5658 static char *
5659 absolute_filename (file, dir)
5660 char *file, *dir;
5662 char *slashp, *cp, *res;
5664 if (filename_is_absolute (file))
5665 res = savestr (file);
5666 #ifdef DOS_NT
5667 /* We don't support non-absolute file names with a drive
5668 letter, like `d:NAME' (it's too much hassle). */
5669 else if (file[1] == ':')
5670 fatal ("%s: relative file names with drive letters not supported", file);
5671 #endif
5672 else
5673 res = concat (dir, file, "");
5675 /* Delete the "/dirname/.." and "/." substrings. */
5676 slashp = etags_strchr (res, '/');
5677 while (slashp != NULL && slashp[0] != '\0')
5679 if (slashp[1] == '.')
5681 if (slashp[2] == '.'
5682 && (slashp[3] == '/' || slashp[3] == '\0'))
5684 cp = slashp;
5686 cp--;
5687 while (cp >= res && !filename_is_absolute (cp));
5688 if (cp < res)
5689 cp = slashp; /* the absolute name begins with "/.." */
5690 #ifdef DOS_NT
5691 /* Under MSDOS and NT we get `d:/NAME' as absolute
5692 file name, so the luser could say `d:/../NAME'.
5693 We silently treat this as `d:/NAME'. */
5694 else if (cp[0] != '/')
5695 cp = slashp;
5696 #endif
5697 strcpy (cp, slashp + 3);
5698 slashp = cp;
5699 continue;
5701 else if (slashp[2] == '/' || slashp[2] == '\0')
5703 strcpy (slashp, slashp + 2);
5704 continue;
5708 slashp = etags_strchr (slashp + 1, '/');
5711 if (res[0] == '\0')
5712 return savestr ("/");
5713 else
5714 return res;
5717 /* Return a newly allocated string containing the absolute
5718 file name of dir where FILE resides given DIR (which should
5719 end with a slash). */
5720 static char *
5721 absolute_dirname (file, dir)
5722 char *file, *dir;
5724 char *slashp, *res;
5725 char save;
5727 canonicalize_filename (file);
5728 slashp = etags_strrchr (file, '/');
5729 if (slashp == NULL)
5730 return savestr (dir);
5731 save = slashp[1];
5732 slashp[1] = '\0';
5733 res = absolute_filename (file, dir);
5734 slashp[1] = save;
5736 return res;
5739 /* Whether the argument string is an absolute file name. The argument
5740 string must have been canonicalized with canonicalize_filename. */
5741 static bool
5742 filename_is_absolute (fn)
5743 char *fn;
5745 return (fn[0] == '/'
5746 #ifdef DOS_NT
5747 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
5748 #endif
5752 /* Translate backslashes into slashes. Works in place. */
5753 static void
5754 canonicalize_filename (fn)
5755 register char *fn;
5757 #ifdef DOS_NT
5758 /* Canonicalize drive letter case. */
5759 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
5760 fn[0] = upcase (fn[0]);
5761 /* Convert backslashes to slashes. */
5762 for (; *fn != '\0'; fn++)
5763 if (*fn == '\\')
5764 *fn = '/';
5765 #else
5766 /* No action. */
5767 fn = NULL; /* shut up the compiler */
5768 #endif
5771 /* Set the minimum size of a string contained in a linebuffer. */
5772 static void
5773 linebuffer_setlen (lbp, toksize)
5774 linebuffer *lbp;
5775 int toksize;
5777 while (lbp->size <= toksize)
5779 lbp->size *= 2;
5780 xrnew (lbp->buffer, lbp->size, char);
5782 lbp->len = toksize;
5785 /* Like malloc but get fatal error if memory is exhausted. */
5786 long *
5787 xmalloc (size)
5788 unsigned int size;
5790 long *result = (long *) malloc (size);
5791 if (result == NULL)
5792 fatal ("virtual memory exhausted", (char *)NULL);
5793 return result;
5796 long *
5797 xrealloc (ptr, size)
5798 char *ptr;
5799 unsigned int size;
5801 long *result = (long *) realloc (ptr, size);
5802 if (result == NULL)
5803 fatal ("virtual memory exhausted", (char *)NULL);
5804 return result;