#
[emacs.git] / lib-src / etags.c
blobf0c613f232bfb0617a2243f4b25ef8ad02bfca0d
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99, 2000, 2001
3 Free Software Foundation, Inc. and Ken Arnold
5 This file is not considered part of GNU Emacs.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1993 Francesco Potortì reorganised C and C++ based on work by Joe Wells.
29 * 1994 Regexp tags by Tom Tromey.
30 * 2001 Nested classes by Francesco Potortì based on work by Mykola Dzyuba.
32 * Francesco Potortì <pot@gnu.org> has maintained it since 1993.
35 char pot_etags_version[] = "@(#) pot revision number is 14.21";
37 #define TRUE 1
38 #define FALSE 0
40 #ifdef DEBUG
41 # undef DEBUG
42 # define DEBUG TRUE
43 #else
44 # define DEBUG FALSE
45 # define NDEBUG /* disable assert */
46 #endif
48 #if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
49 # define P_(proto) proto
50 #else
51 # define P_(proto) ()
52 #endif
54 #ifdef HAVE_CONFIG_H
55 # include <config.h>
56 /* On some systems, Emacs defines static as nothing for the sake
57 of unexec. We don't want that here since we don't use unexec. */
58 # undef static
59 # define ETAGS_REGEXPS /* use the regexp features */
60 # define LONG_OPTIONS /* accept long options */
61 #else
62 # ifndef __STDC__
63 # define static /* remove static for old compilers' sake */
64 # endif
65 #endif /* !HAVE_CONFIG_H */
67 #ifndef _GNU_SOURCE
68 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
69 #endif
71 /* WIN32_NATIVE is for Xemacs.
72 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
73 #ifdef WIN32_NATIVE
74 # undef MSDOS
75 # undef WINDOWSNT
76 # define WINDOWSNT
77 #endif /* WIN32_NATIVE */
79 #ifdef MSDOS
80 # undef MSDOS
81 # define MSDOS TRUE
82 # include <fcntl.h>
83 # include <sys/param.h>
84 # include <io.h>
85 # ifndef HAVE_CONFIG_H
86 # define DOS_NT
87 # include <sys/config.h>
88 # endif
89 #else
90 # define MSDOS FALSE
91 #endif /* MSDOS */
93 #ifdef WINDOWSNT
94 # include <stdlib.h>
95 # include <fcntl.h>
96 # include <string.h>
97 # include <direct.h>
98 # include <io.h>
99 # define MAXPATHLEN _MAX_PATH
100 # undef HAVE_NTGUI
101 # undef DOS_NT
102 # define DOS_NT
103 # ifndef HAVE_GETCWD
104 # define HAVE_GETCWD
105 # endif /* undef HAVE_GETCWD */
106 #else /* !WINDOWSNT */
107 # ifdef STDC_HEADERS
108 # include <stdlib.h>
109 # include <string.h>
110 # else
111 extern char *getenv ();
112 # endif
113 #endif /* !WINDOWSNT */
115 #ifdef HAVE_UNISTD_H
116 # include <unistd.h>
117 #else
118 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
119 extern char *getcwd (char *buf, size_t size);
120 # endif
121 #endif /* HAVE_UNISTD_H */
123 #include <stdio.h>
124 #include <ctype.h>
125 #include <errno.h>
126 #ifndef errno
127 extern int errno;
128 #endif
129 #include <sys/types.h>
130 #include <sys/stat.h>
132 #include <assert.h>
133 #ifdef NDEBUG
134 # undef assert /* some systems have a buggy assert.h */
135 # define assert(x) ((void) 0)
136 #endif
138 #if !defined (S_ISREG) && defined (S_IFREG)
139 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
140 #endif
142 #ifdef LONG_OPTIONS
143 # include <getopt.h>
144 #else
145 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
146 extern char *optarg;
147 extern int optind, opterr;
148 #endif /* LONG_OPTIONS */
150 #ifdef ETAGS_REGEXPS
151 # include <regex.h>
152 #endif /* ETAGS_REGEXPS */
154 /* Define CTAGS to make the program "ctags" compatible with the usual one.
155 Leave it undefined to make the program "etags", which makes emacs-style
156 tag tables and tags typedefs, #defines and struct/union/enum by default. */
157 #ifdef CTAGS
158 # undef CTAGS
159 # define CTAGS TRUE
160 #else
161 # define CTAGS FALSE
162 #endif
164 /* Exit codes for success and failure. */
165 #ifdef VMS
166 # define GOOD 1
167 # define BAD 0
168 #else
169 # define GOOD 0
170 # define BAD 1
171 #endif
173 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
174 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
176 #define CHARS 256 /* 2^sizeof(char) */
177 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
178 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
179 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
180 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
181 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
182 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
184 #define ISALNUM(c) isalnum (CHAR(c))
185 #define ISALPHA(c) isalpha (CHAR(c))
186 #define ISDIGIT(c) isdigit (CHAR(c))
187 #define ISLOWER(c) islower (CHAR(c))
189 #define lowcase(c) tolower (CHAR(c))
190 #define upcase(c) toupper (CHAR(c))
194 * xnew, xrnew -- allocate, reallocate storage
196 * SYNOPSIS: Type *xnew (int n, Type);
197 * void xrnew (OldPointer, int n, Type);
199 #if DEBUG
200 # include "chkmalloc.h"
201 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
202 (n) * sizeof (Type)))
203 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
204 (char *) (op), (n) * sizeof (Type)))
205 #else
206 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
207 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
208 (char *) (op), (n) * sizeof (Type)))
209 #endif
211 typedef int bool;
213 typedef void Lang_function P_((FILE *));
215 typedef struct
217 char *suffix;
218 char *command; /* Takes one arg and decompresses to stdout */
219 } compressor;
221 typedef struct
223 char *name;
224 Lang_function *function;
225 char **filenames;
226 char **suffixes;
227 char **interpreters;
228 } language;
230 typedef struct node_st
231 { /* sorting structure */
232 char *name; /* function or type name */
233 char *file; /* file name */
234 bool is_func; /* use pattern or line no */
235 bool been_warned; /* set if noticed dup */
236 int lno; /* line number tag is on */
237 long cno; /* character number line starts on */
238 char *pat; /* search pattern */
239 struct node_st *left, *right; /* left and right sons */
240 } node;
243 * A `linebuffer' is a structure which holds a line of text.
244 * `readline_internal' reads a line from a stream into a linebuffer
245 * and works regardless of the length of the line.
246 * SIZE is the size of BUFFER, LEN is the length of the string in
247 * BUFFER after readline reads it.
249 typedef struct
251 long size;
252 int len;
253 char *buffer;
254 } linebuffer;
256 /* Many compilers barf on this:
257 Lang_function Ada_funcs;
258 so let's write it this way */
259 static void Ada_funcs P_((FILE *));
260 static void Asm_labels P_((FILE *));
261 static void C_entries P_((int c_ext, FILE *));
262 static void default_C_entries P_((FILE *));
263 static void plain_C_entries P_((FILE *));
264 static void Cjava_entries P_((FILE *));
265 static void Cobol_paragraphs P_((FILE *));
266 static void Cplusplus_entries P_((FILE *));
267 static void Cstar_entries P_((FILE *));
268 static void Erlang_functions P_((FILE *));
269 static void Fortran_functions P_((FILE *));
270 static void Yacc_entries P_((FILE *));
271 static void Lisp_functions P_((FILE *));
272 static void Makefile_targets P_((FILE *));
273 static void Pascal_functions P_((FILE *));
274 static void Perl_functions P_((FILE *));
275 static void Postscript_functions P_((FILE *));
276 static void Prolog_functions P_((FILE *));
277 static void Python_functions P_((FILE *));
278 static void Scheme_functions P_((FILE *));
279 static void TeX_commands P_((FILE *));
280 static void Texinfo_nodes P_((FILE *));
281 static void just_read_file P_((FILE *));
283 static void print_language_names P_((void));
284 static void print_version P_((void));
285 static void print_help P_((void));
286 int main P_((int, char **));
287 static int number_len P_((long));
289 static compressor *get_compressor_from_suffix P_((char *, char **));
290 static language *get_language_from_langname P_((char *));
291 static language *get_language_from_interpreter P_((char *));
292 static language *get_language_from_filename P_((char *));
293 static int total_size_of_entries P_((node *));
294 static long readline P_((linebuffer *, FILE *));
295 static long readline_internal P_((linebuffer *, FILE *));
296 static void get_tag P_((char *));
298 #ifdef ETAGS_REGEXPS
299 static void analyse_regex P_((char *, bool));
300 static void add_regex P_((char *, bool, language *));
301 static void free_patterns P_((void));
302 #endif /* ETAGS_REGEXPS */
303 static void error P_((const char *, const char *));
304 static void suggest_asking_for_help P_((void));
305 void fatal P_((char *, char *));
306 static void pfatal P_((char *));
307 static void add_node P_((node *, node **));
309 static void init P_((void));
310 static void initbuffer P_((linebuffer *));
311 static void find_entries P_((char *, FILE *));
312 static void free_tree P_((node *));
313 static void pfnote P_((char *, bool, char *, int, int, long));
314 static void new_pfnote P_((char *, int, bool, char *, int, int, long));
315 static void process_file P_((char *));
316 static void put_entries P_((node *));
317 static void takeprec P_((void));
319 static char *concat P_((char *, char *, char *));
320 static char *skip_spaces P_((char *));
321 static char *skip_non_spaces P_((char *));
322 static char *savenstr P_((char *, int));
323 static char *savestr P_((char *));
324 static char *etags_strchr P_((const char *, int));
325 static char *etags_strrchr P_((const char *, int));
326 static char *etags_getcwd P_((void));
327 static char *relative_filename P_((char *, char *));
328 static char *absolute_filename P_((char *, char *));
329 static char *absolute_dirname P_((char *, char *));
330 static bool filename_is_absolute P_((char *f));
331 static void canonicalize_filename P_((char *));
332 static void linebuffer_setlen P_((linebuffer *, int));
333 long *xmalloc P_((unsigned int));
334 long *xrealloc P_((char *, unsigned int));
337 char searchar = '/'; /* use /.../ searches */
339 char *tagfile; /* output file */
340 char *progname; /* name this program was invoked with */
341 char *cwd; /* current working directory */
342 char *tagfiledir; /* directory of tagfile */
343 FILE *tagf; /* ioptr for tags file */
345 char *curfile; /* current input file name */
346 language *curlang; /* current language */
348 int lineno; /* line number of current line */
349 long charno; /* current character number */
350 long linecharno; /* charno of start of current line */
351 char *dbp; /* pointer to start of current tag */
353 node *head; /* the head of the binary tree of tags */
355 linebuffer lb; /* the current line */
357 /* boolean "functions" (see init) */
358 bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
359 char
360 /* white chars */
361 *white = " \f\t\n\r\v",
362 /* not in a name */
363 *nonam = " \f\t\n\r(=,[;",
364 /* token ending chars */
365 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
366 /* token starting chars */
367 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
368 /* valid in-token chars */
369 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
371 bool append_to_tagfile; /* -a: append to tags */
372 /* The following four default to TRUE for etags, but to FALSE for ctags. */
373 bool typedefs; /* -t: create tags for C and Ada typedefs */
374 bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
375 /* 0 struct/enum/union decls, and C++ */
376 /* member functions. */
377 bool constantypedefs; /* -d: create tags for C #define, enum */
378 /* constants and variables. */
379 /* -D: opposite of -d. Default under ctags. */
380 bool declarations; /* --declarations: tag them and extern in C&Co*/
381 bool globals; /* create tags for global variables */
382 bool members; /* create tags for C member variables */
383 bool update; /* -u: update tags */
384 bool vgrind_style; /* -v: create vgrind style index output */
385 bool no_warnings; /* -w: suppress warnings */
386 bool cxref_style; /* -x: create cxref style output */
387 bool cplusplus; /* .[hc] means C++, not C */
388 bool noindentypedefs; /* -I: ignore indentation in C */
389 bool packages_only; /* --packages-only: in Ada, only tag packages*/
391 #ifdef LONG_OPTIONS
392 struct option longopts[] =
394 { "packages-only", no_argument, &packages_only, TRUE },
395 { "append", no_argument, NULL, 'a' },
396 { "backward-search", no_argument, NULL, 'B' },
397 { "c++", no_argument, NULL, 'C' },
398 { "cxref", no_argument, NULL, 'x' },
399 { "defines", no_argument, NULL, 'd' },
400 { "declarations", no_argument, &declarations, TRUE },
401 { "no-defines", no_argument, NULL, 'D' },
402 { "globals", no_argument, &globals, TRUE },
403 { "no-globals", no_argument, &globals, FALSE },
404 { "help", no_argument, NULL, 'h' },
405 { "help", no_argument, NULL, 'H' },
406 { "ignore-indentation", no_argument, NULL, 'I' },
407 { "include", required_argument, NULL, 'i' },
408 { "language", required_argument, NULL, 'l' },
409 { "members", no_argument, &members, TRUE },
410 { "no-members", no_argument, &members, FALSE },
411 { "no-warn", no_argument, NULL, 'w' },
412 { "output", required_argument, NULL, 'o' },
413 #ifdef ETAGS_REGEXPS
414 { "regex", required_argument, NULL, 'r' },
415 { "no-regex", no_argument, NULL, 'R' },
416 { "ignore-case-regex", required_argument, NULL, 'c' },
417 #endif /* ETAGS_REGEXPS */
418 { "typedefs", no_argument, NULL, 't' },
419 { "typedefs-and-c++", no_argument, NULL, 'T' },
420 { "update", no_argument, NULL, 'u' },
421 { "version", no_argument, NULL, 'V' },
422 { "vgrind", no_argument, NULL, 'v' },
423 { NULL }
425 #endif /* LONG_OPTIONS */
427 #ifdef ETAGS_REGEXPS
428 /* Structure defining a regular expression. Elements are
429 the compiled pattern, and the name string. */
430 typedef struct pattern
432 struct pattern *p_next;
433 language *language;
434 char *regex;
435 struct re_pattern_buffer *pattern;
436 struct re_registers regs;
437 char *name_pattern;
438 bool error_signaled;
439 } pattern;
441 /* List of all regexps. */
442 pattern *p_head = NULL;
444 /* How many characters in the character set. (From regex.c.) */
445 #define CHAR_SET_SIZE 256
446 /* Translation table for case-insensitive matching. */
447 char lc_trans[CHAR_SET_SIZE];
448 #endif /* ETAGS_REGEXPS */
450 compressor compressors[] =
452 { "z", "gzip -d -c"},
453 { "Z", "gzip -d -c"},
454 { "gz", "gzip -d -c"},
455 { "GZ", "gzip -d -c"},
456 { "bz2", "bzip2 -d -c" },
457 { NULL }
461 * Language stuff.
464 /* Non-NULL if language fixed. */
465 language *forced_lang = NULL;
467 /* Ada code */
468 char *Ada_suffixes [] =
469 { "ads", "adb", "ada", NULL };
471 /* Assembly code */
472 char *Asm_suffixes [] = { "a", /* Unix assembler */
473 "asm", /* Microcontroller assembly */
474 "def", /* BSO/Tasking definition includes */
475 "inc", /* Microcontroller include files */
476 "ins", /* Microcontroller include files */
477 "s", "sa", /* Unix assembler */
478 "S", /* cpp-processed Unix assembler */
479 "src", /* BSO/Tasking C compiler output */
480 NULL
483 /* Note that .c and .h can be considered C++, if the --c++ flag was
484 given, or if the `class' keyowrd is met inside the file.
485 That is why default_C_entries is called for these. */
486 char *default_C_suffixes [] =
487 { "c", "h", NULL };
489 char *Cplusplus_suffixes [] =
490 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
491 "M", /* Objective C++ */
492 "pdb", /* Postscript with C syntax */
493 NULL };
495 char *Cjava_suffixes [] =
496 { "java", NULL };
498 char *Cobol_suffixes [] =
499 { "COB", "cob", NULL };
501 char *Cstar_suffixes [] =
502 { "cs", "hs", NULL };
504 char *Erlang_suffixes [] =
505 { "erl", "hrl", NULL };
507 char *Fortran_suffixes [] =
508 { "F", "f", "f90", "for", NULL };
510 char *Lisp_suffixes [] =
511 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
513 char *Makefile_filenames [] =
514 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
516 char *Pascal_suffixes [] =
517 { "p", "pas", NULL };
519 char *Perl_suffixes [] =
520 { "pl", "pm", NULL };
521 char *Perl_interpreters [] =
522 { "perl", "@PERL@", NULL };
524 char *plain_C_suffixes [] =
525 { "lm", /* Objective lex file */
526 "m", /* Objective C file */
527 "pc", /* Pro*C file */
528 NULL };
530 char *Postscript_suffixes [] =
531 { "ps", "psw", NULL }; /* .psw is for PSWrap */
533 char *Prolog_suffixes [] =
534 { "prolog", NULL };
536 char *Python_suffixes [] =
537 { "py", NULL };
539 /* Can't do the `SCM' or `scm' prefix with a version number. */
540 char *Scheme_suffixes [] =
541 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
543 char *TeX_suffixes [] =
544 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
546 char *Texinfo_suffixes [] =
547 { "texi", "texinfo", "txi", NULL };
549 char *Yacc_suffixes [] =
550 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
553 * Table of languages.
555 * It is ok for a given function to be listed under more than one
556 * name. I just didn't.
559 language lang_names [] =
561 { "ada", Ada_funcs, NULL, Ada_suffixes, NULL },
562 { "asm", Asm_labels, NULL, Asm_suffixes, NULL },
563 { "c", default_C_entries, NULL, default_C_suffixes, NULL },
564 { "c++", Cplusplus_entries, NULL, Cplusplus_suffixes, NULL },
565 { "c*", Cstar_entries, NULL, Cstar_suffixes, NULL },
566 { "cobol", Cobol_paragraphs, NULL, Cobol_suffixes, NULL },
567 { "erlang", Erlang_functions, NULL, Erlang_suffixes, NULL },
568 { "fortran", Fortran_functions, NULL, Fortran_suffixes, NULL },
569 { "java", Cjava_entries, NULL, Cjava_suffixes, NULL },
570 { "lisp", Lisp_functions, NULL, Lisp_suffixes, NULL },
571 { "makefile", Makefile_targets, Makefile_filenames, NULL, NULL },
572 { "pascal", Pascal_functions, NULL, Pascal_suffixes, NULL },
573 { "perl", Perl_functions, NULL, Perl_suffixes, Perl_interpreters },
574 { "postscript", Postscript_functions, NULL, Postscript_suffixes, NULL },
575 { "proc", plain_C_entries, NULL, plain_C_suffixes, NULL },
576 { "prolog", Prolog_functions, NULL, Prolog_suffixes, NULL },
577 { "python", Python_functions, NULL, Python_suffixes, NULL },
578 { "scheme", Scheme_functions, NULL, Scheme_suffixes, NULL },
579 { "tex", TeX_commands, NULL, TeX_suffixes, NULL },
580 { "texinfo", Texinfo_nodes, NULL, Texinfo_suffixes, NULL },
581 { "yacc", Yacc_entries, NULL, Yacc_suffixes, NULL },
582 { "auto", NULL }, /* default guessing scheme */
583 { "none", just_read_file }, /* regexp matching only */
584 { NULL, NULL } /* end of list */
588 static void
589 print_language_names ()
591 language *lang;
592 char **name, **ext;
594 puts ("\nThese are the currently supported languages, along with the\n\
595 default file names and dot suffixes:");
596 for (lang = lang_names; lang->name != NULL; lang++)
598 printf (" %-*s", 10, lang->name);
599 if (lang->filenames != NULL)
600 for (name = lang->filenames; *name != NULL; name++)
601 printf (" %s", *name);
602 if (lang->suffixes != NULL)
603 for (ext = lang->suffixes; *ext != NULL; ext++)
604 printf (" .%s", *ext);
605 puts ("");
607 puts ("Where `auto' means use default language for files based on file\n\
608 name suffix, and `none' means only do regexp processing on files.\n\
609 If no language is specified and no matching suffix is found,\n\
610 the first line of the file is read for a sharp-bang (#!) sequence\n\
611 followed by the name of an interpreter. If no such sequence is found,\n\
612 Fortran is tried first; if no tags are found, C is tried next.\n\
613 When parsing any C file, a \"class\" keyword switches to C++.\n\
614 Compressed files are supported using gzip and bzip2.");
617 #ifndef EMACS_NAME
618 # define EMACS_NAME "GNU Emacs"
619 #endif
620 #ifndef VERSION
621 # define VERSION "21"
622 #endif
623 static void
624 print_version ()
626 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
627 puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
628 puts ("This program is distributed under the same terms as Emacs");
630 exit (GOOD);
633 static void
634 print_help ()
636 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
638 These are the options accepted by %s.\n", progname, progname);
639 #ifdef LONG_OPTIONS
640 puts ("You may use unambiguous abbreviations for the long option names.");
641 #else
642 puts ("Long option names do not work with this executable, as it is not\n\
643 linked with GNU getopt.");
644 #endif /* LONG_OPTIONS */
645 puts ("A - as file name means read names from stdin (one per line).");
646 if (!CTAGS)
647 printf (" Absolute names are stored in the output file as they are.\n\
648 Relative ones are stored relative to the output file's directory.");
649 puts ("\n");
651 puts ("-a, --append\n\
652 Append tag entries to existing tags file.");
654 puts ("--packages-only\n\
655 For Ada files, only generate tags for packages .");
657 if (CTAGS)
658 puts ("-B, --backward-search\n\
659 Write the search commands for the tag entries using '?', the\n\
660 backward-search command instead of '/', the forward-search command.");
662 /* This option is mostly obsolete, because etags can now automatically
663 detect C++. Retained for backward compatibility and for debugging and
664 experimentation. In principle, we could want to tag as C++ even
665 before any "class" keyword.
666 puts ("-C, --c++\n\
667 Treat files whose name suffix defaults to C language as C++ files.");
670 puts ("--declarations\n\
671 In C and derived languages, create tags for function declarations,");
672 if (CTAGS)
673 puts ("\tand create tags for extern variables if --globals is used.");
674 else
675 puts
676 ("\tand create tags for extern variables unless --no-globals is used.");
678 if (CTAGS)
679 puts ("-d, --defines\n\
680 Create tag entries for C #define constants and enum constants, too.");
681 else
682 puts ("-D, --no-defines\n\
683 Don't create tag entries for C #define constants and enum constants.\n\
684 This makes the tags file smaller.");
686 if (!CTAGS)
688 puts ("-i FILE, --include=FILE\n\
689 Include a note in tag file indicating that, when searching for\n\
690 a tag, one should also consult the tags file FILE after\n\
691 checking the current file.");
692 puts ("-l LANG, --language=LANG\n\
693 Force the following files to be considered as written in the\n\
694 named language up to the next --language=LANG option.");
697 if (CTAGS)
698 puts ("--globals\n\
699 Create tag entries for global variables in some languages.");
700 else
701 puts ("--no-globals\n\
702 Do not create tag entries for global variables in some\n\
703 languages. This makes the tags file smaller.");
704 puts ("--members\n\
705 Create tag entries for member variables in C and derived languages.");
707 #ifdef ETAGS_REGEXPS
708 puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
709 Make a tag for each line matching pattern REGEXP in the following\n\
710 files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
711 regexfile is a file containing one REGEXP per line.\n\
712 REGEXP is anchored (as if preceded by ^).\n\
713 The form /REGEXP/NAME/ creates a named tag.\n\
714 For example Tcl named tags can be created with:\n\
715 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\"");
716 puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
717 Like -r, --regex but ignore case when matching expressions.");
718 puts ("-R, --no-regex\n\
719 Don't create tags from regexps for the following files.");
720 #endif /* ETAGS_REGEXPS */
721 puts ("-o FILE, --output=FILE\n\
722 Write the tags to FILE.");
723 puts ("-I, --ignore-indentation\n\
724 Don't rely on indentation quite as much as normal. Currently,\n\
725 this means not to assume that a closing brace in the first\n\
726 column is the final brace of a function or structure\n\
727 definition in C and C++.");
729 if (CTAGS)
731 puts ("-t, --typedefs\n\
732 Generate tag entries for C and Ada typedefs.");
733 puts ("-T, --typedefs-and-c++\n\
734 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
735 and C++ member functions.");
736 puts ("-u, --update\n\
737 Update the tag entries for the given files, leaving tag\n\
738 entries for other files in place. Currently, this is\n\
739 implemented by deleting the existing entries for the given\n\
740 files and then rewriting the new entries at the end of the\n\
741 tags file. It is often faster to simply rebuild the entire\n\
742 tag file than to use this.");
743 puts ("-v, --vgrind\n\
744 Generates an index of items intended for human consumption,\n\
745 similar to the output of vgrind. The index is sorted, and\n\
746 gives the page number of each item.");
747 puts ("-w, --no-warn\n\
748 Suppress warning messages about entries defined in multiple\n\
749 files.");
750 puts ("-x, --cxref\n\
751 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
752 The output uses line numbers instead of page numbers, but\n\
753 beyond that the differences are cosmetic; try both to see\n\
754 which you like.");
757 puts ("-V, --version\n\
758 Print the version of the program.\n\
759 -h, --help\n\
760 Print this help message.");
762 print_language_names ();
764 puts ("");
765 puts ("Report bugs to bug-gnu-emacs@gnu.org");
767 exit (GOOD);
771 enum argument_type
773 at_language,
774 at_regexp,
775 at_filename,
776 at_icregexp
779 /* This structure helps us allow mixing of --lang and file names. */
780 typedef struct
782 enum argument_type arg_type;
783 char *what;
784 language *lang; /* language of the regexp */
785 } argument;
787 #ifdef VMS /* VMS specific functions */
789 #define EOS '\0'
791 /* This is a BUG! ANY arbitrary limit is a BUG!
792 Won't someone please fix this? */
793 #define MAX_FILE_SPEC_LEN 255
794 typedef struct {
795 short curlen;
796 char body[MAX_FILE_SPEC_LEN + 1];
797 } vspec;
800 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
801 returning in each successive call the next file name matching the input
802 spec. The function expects that each in_spec passed
803 to it will be processed to completion; in particular, up to and
804 including the call following that in which the last matching name
805 is returned, the function ignores the value of in_spec, and will
806 only start processing a new spec with the following call.
807 If an error occurs, on return out_spec contains the value
808 of in_spec when the error occurred.
810 With each successive file name returned in out_spec, the
811 function's return value is one. When there are no more matching
812 names the function returns zero. If on the first call no file
813 matches in_spec, or there is any other error, -1 is returned.
816 #include <rmsdef.h>
817 #include <descrip.h>
818 #define OUTSIZE MAX_FILE_SPEC_LEN
819 static short
820 fn_exp (out, in)
821 vspec *out;
822 char *in;
824 static long context = 0;
825 static struct dsc$descriptor_s o;
826 static struct dsc$descriptor_s i;
827 static bool pass1 = TRUE;
828 long status;
829 short retval;
831 if (pass1)
833 pass1 = FALSE;
834 o.dsc$a_pointer = (char *) out;
835 o.dsc$w_length = (short)OUTSIZE;
836 i.dsc$a_pointer = in;
837 i.dsc$w_length = (short)strlen(in);
838 i.dsc$b_dtype = DSC$K_DTYPE_T;
839 i.dsc$b_class = DSC$K_CLASS_S;
840 o.dsc$b_dtype = DSC$K_DTYPE_VT;
841 o.dsc$b_class = DSC$K_CLASS_VS;
843 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
845 out->body[out->curlen] = EOS;
846 return 1;
848 else if (status == RMS$_NMF)
849 retval = 0;
850 else
852 strcpy(out->body, in);
853 retval = -1;
855 lib$find_file_end(&context);
856 pass1 = TRUE;
857 return retval;
861 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
862 name of each file specified by the provided arg expanding wildcards.
864 static char *
865 gfnames (arg, p_error)
866 char *arg;
867 bool *p_error;
869 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
871 switch (fn_exp (&filename, arg))
873 case 1:
874 *p_error = FALSE;
875 return filename.body;
876 case 0:
877 *p_error = FALSE;
878 return NULL;
879 default:
880 *p_error = TRUE;
881 return filename.body;
885 #ifndef OLD /* Newer versions of VMS do provide `system'. */
886 system (cmd)
887 char *cmd;
889 error ("%s", "system() function not implemented under VMS");
891 #endif
893 #define VERSION_DELIM ';'
894 char *massage_name (s)
895 char *s;
897 char *start = s;
899 for ( ; *s; s++)
900 if (*s == VERSION_DELIM)
902 *s = EOS;
903 break;
905 else
906 *s = lowcase (*s);
907 return start;
909 #endif /* VMS */
913 main (argc, argv)
914 int argc;
915 char *argv[];
917 int i;
918 unsigned int nincluded_files;
919 char **included_files;
920 char *this_file;
921 argument *argbuffer;
922 int current_arg, file_count;
923 linebuffer filename_lb;
924 #ifdef VMS
925 bool got_err;
926 #endif
928 #ifdef DOS_NT
929 _fmode = O_BINARY; /* all of files are treated as binary files */
930 #endif /* DOS_NT */
932 progname = argv[0];
933 nincluded_files = 0;
934 included_files = xnew (argc, char *);
935 current_arg = 0;
936 file_count = 0;
938 /* Allocate enough no matter what happens. Overkill, but each one
939 is small. */
940 argbuffer = xnew (argc, argument);
942 #ifdef ETAGS_REGEXPS
943 /* Set syntax for regular expression routines. */
944 re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
945 /* Translation table for case-insensitive search. */
946 for (i = 0; i < CHAR_SET_SIZE; i++)
947 lc_trans[i] = lowcase (i);
948 #endif /* ETAGS_REGEXPS */
951 * If etags, always find typedefs and structure tags. Why not?
952 * Also default to find macro constants, enum constants and
953 * global variables.
955 if (!CTAGS)
957 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
958 globals = TRUE;
959 declarations = FALSE;
960 members = FALSE;
963 while (1)
965 int opt;
966 char *optstring;
968 #ifdef ETAGS_REGEXPS
969 optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
970 #else
971 optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
972 #endif /* ETAGS_REGEXPS */
974 #ifndef LONG_OPTIONS
975 optstring = optstring + 1;
976 #endif /* LONG_OPTIONS */
978 opt = getopt_long (argc, argv, optstring, longopts, 0);
979 if (opt == EOF)
980 break;
982 switch (opt)
984 case 0:
985 /* If getopt returns 0, then it has already processed a
986 long-named option. We should do nothing. */
987 break;
989 case 1:
990 /* This means that a file name has been seen. Record it. */
991 argbuffer[current_arg].arg_type = at_filename;
992 argbuffer[current_arg].what = optarg;
993 ++current_arg;
994 ++file_count;
995 break;
997 /* Common options. */
998 case 'a': append_to_tagfile = TRUE; break;
999 case 'C': cplusplus = TRUE; break;
1000 case 'd': constantypedefs = TRUE; break;
1001 case 'D': constantypedefs = FALSE; break;
1002 case 'f': /* for compatibility with old makefiles */
1003 case 'o':
1004 if (tagfile)
1006 error ("-o option may only be given once.", (char *)NULL);
1007 suggest_asking_for_help ();
1009 tagfile = optarg;
1010 break;
1011 case 'I':
1012 case 'S': /* for backward compatibility */
1013 noindentypedefs = TRUE;
1014 break;
1015 case 'l':
1017 language *lang = get_language_from_langname (optarg);
1018 if (lang != NULL)
1020 argbuffer[current_arg].lang = lang;
1021 argbuffer[current_arg].arg_type = at_language;
1022 ++current_arg;
1025 break;
1026 #ifdef ETAGS_REGEXPS
1027 case 'r':
1028 argbuffer[current_arg].arg_type = at_regexp;
1029 argbuffer[current_arg].what = optarg;
1030 ++current_arg;
1031 break;
1032 case 'R':
1033 argbuffer[current_arg].arg_type = at_regexp;
1034 argbuffer[current_arg].what = NULL;
1035 ++current_arg;
1036 break;
1037 case 'c':
1038 argbuffer[current_arg].arg_type = at_icregexp;
1039 argbuffer[current_arg].what = optarg;
1040 ++current_arg;
1041 break;
1042 #endif /* ETAGS_REGEXPS */
1043 case 'V':
1044 print_version ();
1045 break;
1046 case 'h':
1047 case 'H':
1048 print_help ();
1049 break;
1050 case 't':
1051 typedefs = TRUE;
1052 break;
1053 case 'T':
1054 typedefs = typedefs_or_cplusplus = TRUE;
1055 break;
1056 #if (!CTAGS)
1057 /* Etags options */
1058 case 'i':
1059 included_files[nincluded_files++] = optarg;
1060 break;
1061 #else /* CTAGS */
1062 /* Ctags options. */
1063 case 'B': searchar = '?'; break;
1064 case 'u': update = TRUE; break;
1065 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1066 case 'x': cxref_style = TRUE; break;
1067 case 'w': no_warnings = TRUE; break;
1068 #endif /* CTAGS */
1069 default:
1070 suggest_asking_for_help ();
1074 for (; optind < argc; ++optind)
1076 argbuffer[current_arg].arg_type = at_filename;
1077 argbuffer[current_arg].what = argv[optind];
1078 ++current_arg;
1079 ++file_count;
1082 if (nincluded_files == 0 && file_count == 0)
1084 error ("no input files specified.", (char *)NULL);
1085 suggest_asking_for_help ();
1088 if (tagfile == NULL)
1089 tagfile = CTAGS ? "tags" : "TAGS";
1090 cwd = etags_getcwd (); /* the current working directory */
1091 if (cwd[strlen (cwd) - 1] != '/')
1093 char *oldcwd = cwd;
1094 cwd = concat (oldcwd, "/", "");
1095 free (oldcwd);
1097 if (streq (tagfile, "-"))
1098 tagfiledir = cwd;
1099 else
1100 tagfiledir = absolute_dirname (tagfile, cwd);
1102 init (); /* set up boolean "functions" */
1104 initbuffer (&lb);
1105 initbuffer (&filename_lb);
1107 if (!CTAGS)
1109 if (streq (tagfile, "-"))
1111 tagf = stdout;
1112 #ifdef DOS_NT
1113 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1114 doesn't take effect until after `stdout' is already open). */
1115 if (!isatty (fileno (stdout)))
1116 setmode (fileno (stdout), O_BINARY);
1117 #endif /* DOS_NT */
1119 else
1120 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1121 if (tagf == NULL)
1122 pfatal (tagfile);
1126 * Loop through files finding functions.
1128 for (i = 0; i < current_arg; ++i)
1130 switch (argbuffer[i].arg_type)
1132 case at_language:
1133 forced_lang = argbuffer[i].lang;
1134 break;
1135 #ifdef ETAGS_REGEXPS
1136 case at_regexp:
1137 analyse_regex (argbuffer[i].what, FALSE);
1138 break;
1139 case at_icregexp:
1140 analyse_regex (argbuffer[i].what, TRUE);
1141 break;
1142 #endif
1143 case at_filename:
1144 #ifdef VMS
1145 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1147 if (got_err)
1149 error ("can't find file %s\n", this_file);
1150 argc--, argv++;
1152 else
1154 this_file = massage_name (this_file);
1156 #else
1157 this_file = argbuffer[i].what;
1158 #endif
1159 /* Input file named "-" means read file names from stdin
1160 (one per line) and use them. */
1161 if (streq (this_file, "-"))
1162 while (readline_internal (&filename_lb, stdin) > 0)
1163 process_file (filename_lb.buffer);
1164 else
1165 process_file (this_file);
1166 #ifdef VMS
1168 #endif
1169 break;
1173 #ifdef ETAGS_REGEXPS
1174 free_patterns ();
1175 #endif /* ETAGS_REGEXPS */
1177 if (!CTAGS)
1179 while (nincluded_files-- > 0)
1180 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1182 fclose (tagf);
1183 exit (GOOD);
1186 /* If CTAGS, we are here. process_file did not write the tags yet,
1187 because we want them ordered. Let's do it now. */
1188 if (cxref_style)
1190 put_entries (head);
1191 free_tree (head);
1192 head = NULL;
1193 exit (GOOD);
1196 if (update)
1198 char cmd[BUFSIZ];
1199 for (i = 0; i < current_arg; ++i)
1201 if (argbuffer[i].arg_type != at_filename)
1202 continue;
1203 sprintf (cmd,
1204 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1205 tagfile, argbuffer[i].what, tagfile);
1206 if (system (cmd) != GOOD)
1207 fatal ("failed to execute shell command", (char *)NULL);
1209 append_to_tagfile = TRUE;
1212 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1213 if (tagf == NULL)
1214 pfatal (tagfile);
1215 put_entries (head);
1216 free_tree (head);
1217 head = NULL;
1218 fclose (tagf);
1220 if (update)
1222 char cmd[BUFSIZ];
1223 sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
1224 exit (system (cmd));
1226 return GOOD;
1232 * Return a compressor given the file name. If EXTPTR is non-zero,
1233 * return a pointer into FILE where the compressor-specific
1234 * extension begins. If no compressor is found, NULL is returned
1235 * and EXTPTR is not significant.
1236 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1238 static compressor *
1239 get_compressor_from_suffix (file, extptr)
1240 char *file;
1241 char **extptr;
1243 compressor *compr;
1244 char *slash, *suffix;
1246 /* This relies on FN to be after canonicalize_filename,
1247 so we don't need to consider backslashes on DOS_NT. */
1248 slash = etags_strrchr (file, '/');
1249 suffix = etags_strrchr (file, '.');
1250 if (suffix == NULL || suffix < slash)
1251 return NULL;
1252 if (extptr != NULL)
1253 *extptr = suffix;
1254 suffix += 1;
1255 /* Let those poor souls who live with DOS 8+3 file name limits get
1256 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1257 Only the first do loop is run if not MSDOS */
1260 for (compr = compressors; compr->suffix != NULL; compr++)
1261 if (streq (compr->suffix, suffix))
1262 return compr;
1263 if (!MSDOS)
1264 break; /* do it only once: not really a loop */
1265 if (extptr != NULL)
1266 *extptr = ++suffix;
1267 } while (*suffix != '\0');
1268 return NULL;
1274 * Return a language given the name.
1276 static language *
1277 get_language_from_langname (name)
1278 char *name;
1280 language *lang;
1282 if (name == NULL)
1283 error ("empty language name", (char *)NULL);
1284 else
1286 for (lang = lang_names; lang->name != NULL; lang++)
1287 if (streq (name, lang->name))
1288 return lang;
1289 error ("unknown language \"%s\"", name);
1292 return NULL;
1297 * Return a language given the interpreter name.
1299 static language *
1300 get_language_from_interpreter (interpreter)
1301 char *interpreter;
1303 language *lang;
1304 char **iname;
1306 if (interpreter == NULL)
1307 return NULL;
1308 for (lang = lang_names; lang->name != NULL; lang++)
1309 if (lang->interpreters != NULL)
1310 for (iname = lang->interpreters; *iname != NULL; iname++)
1311 if (streq (*iname, interpreter))
1312 return lang;
1314 return NULL;
1320 * Return a language given the file name.
1322 static language *
1323 get_language_from_filename (file)
1324 char *file;
1326 language *lang;
1327 char **name, **ext, *suffix;
1329 /* Try whole file name first. */
1330 for (lang = lang_names; lang->name != NULL; lang++)
1331 if (lang->filenames != NULL)
1332 for (name = lang->filenames; *name != NULL; name++)
1333 if (streq (*name, file))
1334 return lang;
1336 /* If not found, try suffix after last dot. */
1337 suffix = etags_strrchr (file, '.');
1338 if (suffix == NULL)
1339 return NULL;
1340 suffix += 1;
1341 for (lang = lang_names; lang->name != NULL; lang++)
1342 if (lang->suffixes != NULL)
1343 for (ext = lang->suffixes; *ext != NULL; ext++)
1344 if (streq (*ext, suffix))
1345 return lang;
1346 return NULL;
1352 * This routine is called on each file argument.
1354 static void
1355 process_file (file)
1356 char *file;
1358 struct stat stat_buf;
1359 FILE *inf;
1360 compressor *compr;
1361 char *compressed_name, *uncompressed_name;
1362 char *ext, *real_name;
1365 canonicalize_filename (file);
1366 if (streq (file, tagfile) && !streq (tagfile, "-"))
1368 error ("skipping inclusion of %s in self.", file);
1369 return;
1371 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1373 compressed_name = NULL;
1374 real_name = uncompressed_name = savestr (file);
1376 else
1378 real_name = compressed_name = savestr (file);
1379 uncompressed_name = savenstr (file, ext - file);
1382 /* If the canonicalised uncompressed name has already be dealt with,
1383 skip it silently, else add it to the list. */
1385 typedef struct processed_file
1387 char *filename;
1388 struct processed_file *next;
1389 } processed_file;
1390 static processed_file *pf_head = NULL;
1391 register processed_file *fnp;
1393 for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1394 if (streq (uncompressed_name, fnp->filename))
1395 goto exit;
1396 fnp = pf_head;
1397 pf_head = xnew (1, struct processed_file);
1398 pf_head->filename = savestr (uncompressed_name);
1399 pf_head->next = fnp;
1402 if (stat (real_name, &stat_buf) != 0)
1404 /* Reset real_name and try with a different name. */
1405 real_name = NULL;
1406 if (compressed_name != NULL) /* try with the given suffix */
1408 if (stat (uncompressed_name, &stat_buf) == 0)
1409 real_name = uncompressed_name;
1411 else /* try all possible suffixes */
1413 for (compr = compressors; compr->suffix != NULL; compr++)
1415 compressed_name = concat (file, ".", compr->suffix);
1416 if (stat (compressed_name, &stat_buf) != 0)
1418 if (MSDOS)
1420 char *suf = compressed_name + strlen (file);
1421 size_t suflen = strlen (compr->suffix) + 1;
1422 for ( ; suf[1]; suf++, suflen--)
1424 memmove (suf, suf + 1, suflen);
1425 if (stat (compressed_name, &stat_buf) == 0)
1427 real_name = compressed_name;
1428 break;
1431 if (real_name != NULL)
1432 break;
1433 } /* MSDOS */
1434 free (compressed_name);
1435 compressed_name = NULL;
1437 else
1439 real_name = compressed_name;
1440 break;
1444 if (real_name == NULL)
1446 perror (file);
1447 goto exit;
1449 } /* try with a different name */
1451 if (!S_ISREG (stat_buf.st_mode))
1453 error ("skipping %s: it is not a regular file.", real_name);
1454 goto exit;
1456 if (real_name == compressed_name)
1458 char *cmd = concat (compr->command, " ", real_name);
1459 inf = (FILE *) popen (cmd, "r");
1460 free (cmd);
1462 else
1463 inf = fopen (real_name, "r");
1464 if (inf == NULL)
1466 perror (real_name);
1467 goto exit;
1470 find_entries (uncompressed_name, inf);
1472 if (real_name == compressed_name)
1473 pclose (inf);
1474 else
1475 fclose (inf);
1477 if (!CTAGS)
1479 char *filename;
1481 if (filename_is_absolute (uncompressed_name))
1483 /* file is an absolute file name. Canonicalise it. */
1484 filename = absolute_filename (uncompressed_name, cwd);
1486 else
1488 /* file is a file name relative to cwd. Make it relative
1489 to the directory of the tags file. */
1490 filename = relative_filename (uncompressed_name, tagfiledir);
1492 fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1493 free (filename);
1494 put_entries (head);
1495 free_tree (head);
1496 head = NULL;
1499 exit:
1500 if (compressed_name) free(compressed_name);
1501 if (uncompressed_name) free(uncompressed_name);
1502 return;
1506 * This routine sets up the boolean pseudo-functions which work
1507 * by setting boolean flags dependent upon the corresponding character.
1508 * Every char which is NOT in that string is not a white char. Therefore,
1509 * all of the array "_wht" is set to FALSE, and then the elements
1510 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1511 * of a char is TRUE if it is the string "white", else FALSE.
1513 static void
1514 init ()
1516 register char *sp;
1517 register int i;
1519 for (i = 0; i < CHARS; i++)
1520 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1521 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1522 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1523 notinname('\0') = notinname('\n');
1524 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1525 begtoken('\0') = begtoken('\n');
1526 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1527 intoken('\0') = intoken('\n');
1528 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1529 endtoken('\0') = endtoken('\n');
1533 * This routine opens the specified file and calls the function
1534 * which finds the function and type definitions.
1536 node *last_node = NULL;
1538 static void
1539 find_entries (file, inf)
1540 char *file;
1541 FILE *inf;
1543 char *cp;
1544 language *lang;
1545 node *old_last_node;
1547 /* Memory leakage here: the string pointed by curfile is
1548 never released, because curfile is copied into np->file
1549 for each node, to be used in CTAGS mode. The amount of
1550 memory leaked here is the sum of the lengths of the
1551 file names. */
1552 curfile = savestr (file);
1554 /* If user specified a language, use it. */
1555 lang = forced_lang;
1556 if (lang != NULL && lang->function != NULL)
1558 curlang = lang;
1559 lang->function (inf);
1560 return;
1563 /* Try to guess the language given the file name. */
1564 lang = get_language_from_filename (file);
1565 if (lang != NULL && lang->function != NULL)
1567 curlang = lang;
1568 lang->function (inf);
1569 return;
1572 /* Look for sharp-bang as the first two characters. */
1573 if (readline_internal (&lb, inf) > 0
1574 && lb.len >= 2
1575 && lb.buffer[0] == '#'
1576 && lb.buffer[1] == '!')
1578 char *lp;
1580 /* Set lp to point at the first char after the last slash in the
1581 line or, if no slashes, at the first nonblank. Then set cp to
1582 the first successive blank and terminate the string. */
1583 lp = etags_strrchr (lb.buffer+2, '/');
1584 if (lp != NULL)
1585 lp += 1;
1586 else
1587 lp = skip_spaces (lb.buffer + 2);
1588 cp = skip_non_spaces (lp);
1589 *cp = '\0';
1591 if (strlen (lp) > 0)
1593 lang = get_language_from_interpreter (lp);
1594 if (lang != NULL && lang->function != NULL)
1596 curlang = lang;
1597 lang->function (inf);
1598 return;
1602 /* We rewind here, even if inf may be a pipe. We fail if the
1603 length of the first line is longer than the pipe block size,
1604 which is unlikely. */
1605 rewind (inf);
1607 /* Try Fortran. */
1608 old_last_node = last_node;
1609 curlang = get_language_from_langname ("fortran");
1610 Fortran_functions (inf);
1612 /* No Fortran entries found. Try C. */
1613 if (old_last_node == last_node)
1615 /* We do not tag if rewind fails.
1616 Only the file name will be recorded in the tags file. */
1617 rewind (inf);
1618 curlang = get_language_from_langname (cplusplus ? "c++" : "c");
1619 default_C_entries (inf);
1621 return;
1625 /* Record a tag. */
1626 static void
1627 pfnote (name, is_func, linestart, linelen, lno, cno)
1628 char *name; /* tag name, or NULL if unnamed */
1629 bool is_func; /* tag is a function */
1630 char *linestart; /* start of the line where tag is */
1631 int linelen; /* length of the line where tag is */
1632 int lno; /* line number */
1633 long cno; /* character number */
1635 register node *np;
1637 if (CTAGS && name == NULL)
1638 return;
1640 np = xnew (1, node);
1642 /* If ctags mode, change name "main" to M<thisfilename>. */
1643 if (CTAGS && !cxref_style && streq (name, "main"))
1645 register char *fp = etags_strrchr (curfile, '/');
1646 np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
1647 fp = etags_strrchr (np->name, '.');
1648 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1649 fp[0] = '\0';
1651 else
1652 np->name = name;
1653 np->been_warned = FALSE;
1654 np->file = curfile;
1655 np->is_func = is_func;
1656 np->lno = lno;
1657 /* Our char numbers are 0-base, because of C language tradition?
1658 ctags compatibility? old versions compatibility? I don't know.
1659 Anyway, since emacs's are 1-base we expect etags.el to take care
1660 of the difference. If we wanted to have 1-based numbers, we would
1661 uncomment the +1 below. */
1662 np->cno = cno /* + 1 */ ;
1663 np->left = np->right = NULL;
1664 if (CTAGS && !cxref_style)
1666 if (strlen (linestart) < 50)
1667 np->pat = concat (linestart, "$", "");
1668 else
1669 np->pat = savenstr (linestart, 50);
1671 else
1672 np->pat = savenstr (linestart, linelen);
1674 add_node (np, &head);
1678 * TAGS format specification
1679 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1681 * pfnote should emit the optimized form [unnamed tag] only if:
1682 * 1. name does not contain any of the characters " \t\r\n(),;";
1683 * 2. linestart contains name as either a rightmost, or rightmost but
1684 * one character, substring;
1685 * 3. the character, if any, immediately before name in linestart must
1686 * be one of the characters " \t(),;";
1687 * 4. the character, if any, immediately after name in linestart must
1688 * also be one of the characters " \t(),;".
1690 * The real implementation uses the notinname() macro, which recognises
1691 * characters slightly different form " \t\r\n(),;". See the variable
1692 * `nonam'.
1694 #define traditional_tag_style TRUE
1695 static void
1696 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1697 char *name; /* tag name, or NULL if unnamed */
1698 int namelen; /* tag length */
1699 bool is_func; /* tag is a function */
1700 char *linestart; /* start of the line where tag is */
1701 int linelen; /* length of the line where tag is */
1702 int lno; /* line number */
1703 long cno; /* character number */
1705 register char *cp;
1706 bool named;
1708 named = TRUE;
1709 if (!CTAGS)
1711 for (cp = name; !notinname (*cp); cp++)
1712 continue;
1713 if (*cp == '\0') /* rule #1 */
1715 cp = linestart + linelen - namelen;
1716 if (notinname (linestart[linelen-1]))
1717 cp -= 1; /* rule #4 */
1718 if (cp >= linestart /* rule #2 */
1719 && (cp == linestart
1720 || notinname (cp[-1])) /* rule #3 */
1721 && strneq (name, cp, namelen)) /* rule #2 */
1722 named = FALSE; /* use unnamed tag */
1726 if (named)
1727 name = savenstr (name, namelen);
1728 else
1729 name = NULL;
1730 pfnote (name, is_func, linestart, linelen, lno, cno);
1734 * free_tree ()
1735 * recurse on left children, iterate on right children.
1737 static void
1738 free_tree (np)
1739 register node *np;
1741 while (np)
1743 register node *node_right = np->right;
1744 free_tree (np->left);
1745 if (np->name != NULL)
1746 free (np->name);
1747 free (np->pat);
1748 free (np);
1749 np = node_right;
1754 * add_node ()
1755 * Adds a node to the tree of nodes. In etags mode, we don't keep
1756 * it sorted; we just keep a linear list. In ctags mode, maintain
1757 * an ordered tree, with no attempt at balancing.
1759 * add_node is the only function allowed to add nodes, so it can
1760 * maintain state.
1762 static void
1763 add_node (np, cur_node_p)
1764 node *np, **cur_node_p;
1766 register int dif;
1767 register node *cur_node = *cur_node_p;
1769 if (cur_node == NULL)
1771 *cur_node_p = np;
1772 last_node = np;
1773 return;
1776 if (!CTAGS)
1778 /* Etags Mode */
1779 if (last_node == NULL)
1780 fatal ("internal error in add_node", (char *)NULL);
1781 last_node->right = np;
1782 last_node = np;
1784 else
1786 /* Ctags Mode */
1787 dif = strcmp (np->name, cur_node->name);
1790 * If this tag name matches an existing one, then
1791 * do not add the node, but maybe print a warning.
1793 if (!dif)
1795 if (streq (np->file, cur_node->file))
1797 if (!no_warnings)
1799 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1800 np->file, lineno, np->name);
1801 fprintf (stderr, "Second entry ignored\n");
1804 else if (!cur_node->been_warned && !no_warnings)
1806 fprintf
1807 (stderr,
1808 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1809 np->file, cur_node->file, np->name);
1810 cur_node->been_warned = TRUE;
1812 return;
1815 /* Actually add the node */
1816 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1821 static void
1822 put_entries (np)
1823 register node *np;
1825 register char *sp;
1827 if (np == NULL)
1828 return;
1830 /* Output subentries that precede this one */
1831 put_entries (np->left);
1833 /* Output this entry */
1835 if (!CTAGS)
1837 if (np->name != NULL)
1838 fprintf (tagf, "%s\177%s\001%d,%ld\n",
1839 np->pat, np->name, np->lno, np->cno);
1840 else
1841 fprintf (tagf, "%s\177%d,%ld\n",
1842 np->pat, np->lno, np->cno);
1844 else
1846 if (np->name == NULL)
1847 error ("internal error: NULL name in ctags mode.", (char *)NULL);
1849 if (cxref_style)
1851 if (vgrind_style)
1852 fprintf (stdout, "%s %s %d\n",
1853 np->name, np->file, (np->lno + 63) / 64);
1854 else
1855 fprintf (stdout, "%-16s %3d %-16s %s\n",
1856 np->name, np->lno, np->file, np->pat);
1858 else
1860 fprintf (tagf, "%s\t%s\t", np->name, np->file);
1862 if (np->is_func)
1863 { /* a function */
1864 putc (searchar, tagf);
1865 putc ('^', tagf);
1867 for (sp = np->pat; *sp; sp++)
1869 if (*sp == '\\' || *sp == searchar)
1870 putc ('\\', tagf);
1871 putc (*sp, tagf);
1873 putc (searchar, tagf);
1875 else
1876 { /* a typedef; text pattern inadequate */
1877 fprintf (tagf, "%d", np->lno);
1879 putc ('\n', tagf);
1883 /* Output subentries that follow this one */
1884 put_entries (np->right);
1887 /* Length of a number's decimal representation. */
1888 static int
1889 number_len (num)
1890 long num;
1892 int len = 1;
1893 while ((num /= 10) > 0)
1894 len += 1;
1895 return len;
1899 * Return total number of characters that put_entries will output for
1900 * the nodes in the subtree of the specified node. Works only if
1901 * we are not ctags, but called only in that case. This count
1902 * is irrelevant with the new tags.el, but is still supplied for
1903 * backward compatibility.
1905 static int
1906 total_size_of_entries (np)
1907 register node *np;
1909 register int total;
1911 if (np == NULL)
1912 return 0;
1914 for (total = 0; np != NULL; np = np->right)
1916 /* Count left subentries. */
1917 total += total_size_of_entries (np->left);
1919 /* Count this entry */
1920 total += strlen (np->pat) + 1;
1921 total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
1922 if (np->name != NULL)
1923 total += 1 + strlen (np->name); /* \001name */
1926 return total;
1930 /* C extensions. */
1931 #define C_EXT 0x00fff /* C extensions */
1932 #define C_PLAIN 0x00000 /* C */
1933 #define C_PLPL 0x00001 /* C++ */
1934 #define C_STAR 0x00003 /* C* */
1935 #define C_JAVA 0x00005 /* JAVA */
1936 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
1937 #define YACC 0x10000 /* yacc file */
1940 * The C symbol tables.
1942 enum sym_type
1944 st_none,
1945 st_C_objprot, st_C_objimpl, st_C_objend,
1946 st_C_gnumacro,
1947 st_C_ignore,
1948 st_C_javastruct,
1949 st_C_operator,
1950 st_C_class, st_C_template,
1951 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1954 static unsigned int hash P_((const char *, unsigned int));
1955 static struct C_stab_entry * in_word_set P_((const char *, unsigned int));
1956 static enum sym_type C_symtype P_((char *, int, int));
1958 /* Feed stuff between (but not including) %[ and %] lines to:
1959 gperf -c -k 1,3 -o -p -r -t
1961 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1963 if, 0, st_C_ignore
1964 for, 0, st_C_ignore
1965 while, 0, st_C_ignore
1966 switch, 0, st_C_ignore
1967 return, 0, st_C_ignore
1968 @interface, 0, st_C_objprot
1969 @protocol, 0, st_C_objprot
1970 @implementation,0, st_C_objimpl
1971 @end, 0, st_C_objend
1972 import, C_JAVA, st_C_ignore
1973 package, C_JAVA, st_C_ignore
1974 friend, C_PLPL, st_C_ignore
1975 extends, C_JAVA, st_C_javastruct
1976 implements, C_JAVA, st_C_javastruct
1977 interface, C_JAVA, st_C_struct
1978 class, 0, st_C_class
1979 namespace, C_PLPL, st_C_struct
1980 domain, C_STAR, st_C_struct
1981 union, 0, st_C_struct
1982 struct, 0, st_C_struct
1983 extern, 0, st_C_extern
1984 enum, 0, st_C_enum
1985 typedef, 0, st_C_typedef
1986 define, 0, st_C_define
1987 operator, C_PLPL, st_C_operator
1988 template, 0, st_C_template
1989 bool, C_PLPL, st_C_typespec
1990 long, 0, st_C_typespec
1991 short, 0, st_C_typespec
1992 int, 0, st_C_typespec
1993 char, 0, st_C_typespec
1994 float, 0, st_C_typespec
1995 double, 0, st_C_typespec
1996 signed, 0, st_C_typespec
1997 unsigned, 0, st_C_typespec
1998 auto, 0, st_C_typespec
1999 void, 0, st_C_typespec
2000 static, 0, st_C_typespec
2001 const, 0, st_C_typespec
2002 volatile, 0, st_C_typespec
2003 explicit, C_PLPL, st_C_typespec
2004 mutable, C_PLPL, st_C_typespec
2005 typename, C_PLPL, st_C_typespec
2006 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2007 DEFUN, 0, st_C_gnumacro
2008 SYSCALL, 0, st_C_gnumacro
2009 ENTRY, 0, st_C_gnumacro
2010 PSEUDO, 0, st_C_gnumacro
2011 # These are defined inside C functions, so currently they are not met.
2012 # EXFUN used in glibc, DEFVAR_* in emacs.
2013 #EXFUN, 0, st_C_gnumacro
2014 #DEFVAR_, 0, st_C_gnumacro
2016 and replace lines between %< and %> with its output,
2017 then make in_word_set static. */
2018 /*%<*/
2019 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2020 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
2021 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2023 #define TOTAL_KEYWORDS 47
2024 #define MIN_WORD_LENGTH 2
2025 #define MAX_WORD_LENGTH 15
2026 #define MIN_HASH_VALUE 18
2027 #define MAX_HASH_VALUE 138
2028 /* maximum key range = 121, duplicates = 0 */
2030 #ifdef __GNUC__
2031 __inline
2032 #endif
2033 static unsigned int
2034 hash (str, len)
2035 register const char *str;
2036 register unsigned int len;
2038 static unsigned char asso_values[] =
2040 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2041 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2042 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2043 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2044 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2045 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2046 139, 139, 139, 139, 63, 139, 139, 139, 33, 44,
2047 62, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2048 42, 139, 139, 12, 32, 139, 139, 139, 139, 139,
2049 139, 139, 139, 139, 139, 139, 139, 34, 59, 37,
2050 24, 58, 33, 3, 139, 16, 139, 139, 42, 60,
2051 18, 11, 39, 139, 23, 57, 4, 63, 6, 20,
2052 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2053 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2054 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2055 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2056 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2057 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2058 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2059 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2060 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2061 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2062 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2063 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2064 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2065 139, 139, 139, 139, 139, 139
2067 register int hval = len;
2069 switch (hval)
2071 default:
2072 case 3:
2073 hval += asso_values[(unsigned char)str[2]];
2074 case 2:
2075 case 1:
2076 hval += asso_values[(unsigned char)str[0]];
2077 break;
2079 return hval;
2082 #ifdef __GNUC__
2083 __inline
2084 #endif
2085 static struct C_stab_entry *
2086 in_word_set (str, len)
2087 register const char *str;
2088 register unsigned int len;
2090 static struct C_stab_entry wordlist[] =
2092 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2093 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2094 {"if", 0, st_C_ignore},
2095 {""}, {""}, {""}, {""},
2096 {"int", 0, st_C_typespec},
2097 {""}, {""},
2098 {"void", 0, st_C_typespec},
2099 {""}, {""},
2100 {"interface", C_JAVA, st_C_struct},
2101 {""},
2102 {"SYSCALL", 0, st_C_gnumacro},
2103 {""},
2104 {"return", 0, st_C_ignore},
2105 {""}, {""}, {""}, {""}, {""}, {""}, {""},
2106 {"while", 0, st_C_ignore},
2107 {"auto", 0, st_C_typespec},
2108 {""}, {""}, {""}, {""}, {""}, {""},
2109 {"float", 0, st_C_typespec},
2110 {"typedef", 0, st_C_typedef},
2111 {"typename", C_PLPL, st_C_typespec},
2112 {""}, {""}, {""},
2113 {"friend", C_PLPL, st_C_ignore},
2114 {"volatile", 0, st_C_typespec},
2115 {""}, {""},
2116 {"for", 0, st_C_ignore},
2117 {"const", 0, st_C_typespec},
2118 {"import", C_JAVA, st_C_ignore},
2119 {""},
2120 {"define", 0, st_C_define},
2121 {"long", 0, st_C_typespec},
2122 {"implements", C_JAVA, st_C_javastruct},
2123 {"signed", 0, st_C_typespec},
2124 {""},
2125 {"extern", 0, st_C_extern},
2126 {"extends", C_JAVA, st_C_javastruct},
2127 {""},
2128 {"mutable", C_PLPL, st_C_typespec},
2129 {"template", 0, st_C_template},
2130 {"short", 0, st_C_typespec},
2131 {"bool", C_PLPL, st_C_typespec},
2132 {"char", 0, st_C_typespec},
2133 {"class", 0, st_C_class},
2134 {"operator", C_PLPL, st_C_operator},
2135 {""},
2136 {"switch", 0, st_C_ignore},
2137 {""},
2138 {"ENTRY", 0, st_C_gnumacro},
2139 {""},
2140 {"package", C_JAVA, st_C_ignore},
2141 {"union", 0, st_C_struct},
2142 {"@end", 0, st_C_objend},
2143 {"struct", 0, st_C_struct},
2144 {"namespace", C_PLPL, st_C_struct},
2145 {""}, {""},
2146 {"domain", C_STAR, st_C_struct},
2147 {"@interface", 0, st_C_objprot},
2148 {"PSEUDO", 0, st_C_gnumacro},
2149 {"double", 0, st_C_typespec},
2150 {""},
2151 {"@protocol", 0, st_C_objprot},
2152 {""},
2153 {"static", 0, st_C_typespec},
2154 {""}, {""},
2155 {"DEFUN", 0, st_C_gnumacro},
2156 {""}, {""}, {""}, {""},
2157 {"explicit", C_PLPL, st_C_typespec},
2158 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2159 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2160 {""},
2161 {"enum", 0, st_C_enum},
2162 {""}, {""},
2163 {"unsigned", 0, st_C_typespec},
2164 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2165 {"@implementation",0, st_C_objimpl}
2168 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2170 register int key = hash (str, len);
2172 if (key <= MAX_HASH_VALUE && key >= 0)
2174 register const char *s = wordlist[key].name;
2176 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2177 return &wordlist[key];
2180 return 0;
2182 /*%>*/
2184 static enum sym_type
2185 C_symtype (str, len, c_ext)
2186 char *str;
2187 int len;
2188 int c_ext;
2190 register struct C_stab_entry *se = in_word_set (str, len);
2192 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2193 return st_none;
2194 return se->type;
2199 * C functions and variables are recognized using a simple
2200 * finite automaton. fvdef is its state variable.
2202 enum
2204 fvnone, /* nothing seen */
2205 fdefunkey, /* Emacs DEFUN keyword seen */
2206 fdefunname, /* Emacs DEFUN name seen */
2207 foperator, /* func: operator keyword seen (cplpl) */
2208 fvnameseen, /* function or variable name seen */
2209 fstartlist, /* func: just after open parenthesis */
2210 finlist, /* func: in parameter list */
2211 flistseen, /* func: after parameter list */
2212 fignore, /* func: before open brace */
2213 vignore /* var-like: ignore until ';' */
2214 } fvdef;
2216 bool fvextern; /* func or var: extern keyword seen; */
2219 * typedefs are recognized using a simple finite automaton.
2220 * typdef is its state variable.
2222 enum
2224 tnone, /* nothing seen */
2225 tkeyseen, /* typedef keyword seen */
2226 ttypeseen, /* defined type seen */
2227 tinbody, /* inside typedef body */
2228 tend, /* just before typedef tag */
2229 tignore /* junk after typedef tag */
2230 } typdef;
2233 * struct-like structures (enum, struct and union) are recognized
2234 * using another simple finite automaton. `structdef' is its state
2235 * variable.
2237 enum
2239 snone, /* nothing seen yet,
2240 or in struct body if cblev > 0 */
2241 skeyseen, /* struct-like keyword seen */
2242 stagseen, /* struct-like tag seen */
2243 sintemplate, /* inside template (ignore) */
2244 scolonseen /* colon seen after struct-like tag */
2245 } structdef;
2248 * When objdef is different from onone, objtag is the name of the class.
2250 char *objtag = "<uninited>";
2253 * Yet another little state machine to deal with preprocessor lines.
2255 enum
2257 dnone, /* nothing seen */
2258 dsharpseen, /* '#' seen as first char on line */
2259 ddefineseen, /* '#' and 'define' seen */
2260 dignorerest /* ignore rest of line */
2261 } definedef;
2264 * State machine for Objective C protocols and implementations.
2265 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2267 enum
2269 onone, /* nothing seen */
2270 oprotocol, /* @interface or @protocol seen */
2271 oimplementation, /* @implementations seen */
2272 otagseen, /* class name seen */
2273 oparenseen, /* parenthesis before category seen */
2274 ocatseen, /* category name seen */
2275 oinbody, /* in @implementation body */
2276 omethodsign, /* in @implementation body, after +/- */
2277 omethodtag, /* after method name */
2278 omethodcolon, /* after method colon */
2279 omethodparm, /* after method parameter */
2280 oignore /* wait for @end */
2281 } objdef;
2285 * Use this structure to keep info about the token read, and how it
2286 * should be tagged. Used by the make_C_tag function to build a tag.
2288 struct tok
2290 bool valid;
2291 bool named;
2292 int offset;
2293 int length;
2294 int lineno;
2295 long linepos;
2296 char *line;
2297 } token; /* latest token read */
2298 linebuffer token_name; /* its name */
2301 * Variables and functions for dealing with nested structures.
2302 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2304 static void pushclass_above P_((int, char *, int));
2305 static void popclass_above P_((int));
2306 static void write_classname P_((linebuffer *, char *qualifier));
2308 struct {
2309 char **cname; /* nested class names */
2310 int *cblev; /* nested class curly brace level */
2311 int nl; /* class nesting level (elements used) */
2312 int size; /* length of the array */
2313 } cstack; /* stack for nested declaration tags */
2314 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2315 #define nestlev (cstack.nl)
2316 /* After struct keyword or in struct body, not inside an nested function. */
2317 #define instruct (structdef == snone && nestlev > 0 \
2318 && cblev == cstack.cblev[nestlev-1] + 1)
2320 static void
2321 pushclass_above (cblev, str, len)
2322 int cblev;
2323 char *str;
2324 int len;
2326 int nl;
2328 popclass_above (cblev);
2329 nl = cstack.nl;
2330 if (nl >= cstack.size)
2332 int size = cstack.size *= 2;
2333 xrnew (cstack.cname, size, char *);
2334 xrnew (cstack.cblev, size, int);
2336 assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2337 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2338 cstack.cblev[nl] = cblev;
2339 cstack.nl = nl + 1;
2342 static void
2343 popclass_above (cblev)
2344 int cblev;
2346 int nl;
2348 for (nl = cstack.nl - 1;
2349 nl >= 0 && cstack.cblev[nl] >= cblev;
2350 nl--)
2352 if (cstack.cname[nl] != NULL)
2353 free (cstack.cname[nl]);
2354 cstack.nl = nl;
2358 static void
2359 write_classname (cn, qualifier)
2360 linebuffer *cn;
2361 char *qualifier;
2363 int i, len;
2364 int qlen = strlen (qualifier);
2366 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2368 len = 0;
2369 cn->len = 0;
2370 cn->buffer[0] = '\0';
2372 else
2374 len = strlen (cstack.cname[0]);
2375 linebuffer_setlen (cn, len);
2376 strcpy (cn->buffer, cstack.cname[0]);
2378 for (i = 1; i < cstack.nl; i++)
2380 char *s;
2381 int slen;
2383 s = cstack.cname[i];
2384 if (s == NULL)
2385 continue;
2386 slen = strlen (s);
2387 len += slen + qlen;
2388 linebuffer_setlen (cn, len);
2389 strncat (cn->buffer, qualifier, qlen);
2390 strncat (cn->buffer, s, slen);
2395 static bool consider_token P_((char *, int, int, int *, int, int, bool *));
2396 static void make_C_tag P_((bool));
2399 * consider_token ()
2400 * checks to see if the current token is at the start of a
2401 * function or variable, or corresponds to a typedef, or
2402 * is a struct/union/enum tag, or #define, or an enum constant.
2404 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2405 * with args. C_EXTP points to which language we are looking at.
2407 * Globals
2408 * fvdef IN OUT
2409 * structdef IN OUT
2410 * definedef IN OUT
2411 * typdef IN OUT
2412 * objdef IN OUT
2415 static bool
2416 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2417 register char *str; /* IN: token pointer */
2418 register int len; /* IN: token length */
2419 register int c; /* IN: first char after the token */
2420 int *c_extp; /* IN, OUT: C extensions mask */
2421 int cblev; /* IN: curly brace level */
2422 int parlev; /* IN: parenthesis level */
2423 bool *is_func_or_var; /* OUT: function or variable found */
2425 /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2426 structtype is the type of the preceding struct-like keyword, and
2427 structcblev is the curly brace level where it has been seen. */
2428 static enum sym_type structtype;
2429 static int structcblev;
2430 static enum sym_type toktype;
2433 toktype = C_symtype (str, len, *c_extp);
2436 * Advance the definedef state machine.
2438 switch (definedef)
2440 case dnone:
2441 /* We're not on a preprocessor line. */
2442 if (toktype == st_C_gnumacro)
2444 fvdef = fdefunkey;
2445 return FALSE;
2447 break;
2448 case dsharpseen:
2449 if (toktype == st_C_define)
2451 definedef = ddefineseen;
2453 else
2455 definedef = dignorerest;
2457 return FALSE;
2458 case ddefineseen:
2460 * Make a tag for any macro, unless it is a constant
2461 * and constantypedefs is FALSE.
2463 definedef = dignorerest;
2464 *is_func_or_var = (c == '(');
2465 if (!*is_func_or_var && !constantypedefs)
2466 return FALSE;
2467 else
2468 return TRUE;
2469 case dignorerest:
2470 return FALSE;
2471 default:
2472 error ("internal error: definedef value.", (char *)NULL);
2476 * Now typedefs
2478 switch (typdef)
2480 case tnone:
2481 if (toktype == st_C_typedef)
2483 if (typedefs)
2484 typdef = tkeyseen;
2485 fvextern = FALSE;
2486 fvdef = fvnone;
2487 return FALSE;
2489 break;
2490 case tkeyseen:
2491 switch (toktype)
2493 case st_none:
2494 case st_C_typespec:
2495 case st_C_class:
2496 case st_C_struct:
2497 case st_C_enum:
2498 typdef = ttypeseen;
2499 break;
2501 break;
2502 case ttypeseen:
2503 if (structdef == snone && fvdef == fvnone)
2505 fvdef = fvnameseen;
2506 return TRUE;
2508 break;
2509 case tend:
2510 switch (toktype)
2512 case st_C_typespec:
2513 case st_C_class:
2514 case st_C_struct:
2515 case st_C_enum:
2516 return FALSE;
2518 return TRUE;
2522 * This structdef business is NOT invoked when we are ctags and the
2523 * file is plain C. This is because a struct tag may have the same
2524 * name as another tag, and this loses with ctags.
2526 switch (toktype)
2528 case st_C_javastruct:
2529 if (structdef == stagseen)
2530 structdef = scolonseen;
2531 return FALSE;
2532 case st_C_template:
2533 case st_C_class:
2534 if (cblev == 0
2535 && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2536 && definedef == dnone && structdef == snone
2537 && typdef == tnone && fvdef == fvnone)
2538 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2539 if (toktype == st_C_template)
2540 break;
2541 /* FALLTHRU */
2542 case st_C_struct:
2543 case st_C_enum:
2544 if (parlev == 0
2545 && fvdef != vignore
2546 && (typdef == tkeyseen
2547 || (typedefs_or_cplusplus && structdef == snone)))
2549 structdef = skeyseen;
2550 structtype = toktype;
2551 structcblev = cblev;
2553 return FALSE;
2556 if (structdef == skeyseen)
2558 structdef = stagseen;
2559 return TRUE;
2562 if (typdef != tnone)
2563 definedef = dnone;
2565 /* Detect Objective C constructs. */
2566 switch (objdef)
2568 case onone:
2569 switch (toktype)
2571 case st_C_objprot:
2572 objdef = oprotocol;
2573 return FALSE;
2574 case st_C_objimpl:
2575 objdef = oimplementation;
2576 return FALSE;
2578 break;
2579 case oimplementation:
2580 /* Save the class tag for functions or variables defined inside. */
2581 objtag = savenstr (str, len);
2582 objdef = oinbody;
2583 return FALSE;
2584 case oprotocol:
2585 /* Save the class tag for categories. */
2586 objtag = savenstr (str, len);
2587 objdef = otagseen;
2588 *is_func_or_var = TRUE;
2589 return TRUE;
2590 case oparenseen:
2591 objdef = ocatseen;
2592 *is_func_or_var = TRUE;
2593 return TRUE;
2594 case oinbody:
2595 break;
2596 case omethodsign:
2597 if (parlev == 0)
2599 objdef = omethodtag;
2600 linebuffer_setlen (&token_name, len);
2601 strncpy (token_name.buffer, str, len);
2602 token_name.buffer[len] = '\0';
2603 return TRUE;
2605 return FALSE;
2606 case omethodcolon:
2607 if (parlev == 0)
2608 objdef = omethodparm;
2609 return FALSE;
2610 case omethodparm:
2611 if (parlev == 0)
2613 objdef = omethodtag;
2614 linebuffer_setlen (&token_name, token_name.len + len);
2615 strncat (token_name.buffer, str, len);
2616 return TRUE;
2618 return FALSE;
2619 case oignore:
2620 if (toktype == st_C_objend)
2622 /* Memory leakage here: the string pointed by objtag is
2623 never released, because many tests would be needed to
2624 avoid breaking on incorrect input code. The amount of
2625 memory leaked here is the sum of the lengths of the
2626 class tags.
2627 free (objtag); */
2628 objdef = onone;
2630 return FALSE;
2633 /* A function, variable or enum constant? */
2634 switch (toktype)
2636 case st_C_extern:
2637 fvextern = TRUE;
2638 /* FALLTHRU */
2639 case st_C_typespec:
2640 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2641 fvdef = fvnone; /* should be useless */
2642 return FALSE;
2643 case st_C_ignore:
2644 fvextern = FALSE;
2645 fvdef = vignore;
2646 return FALSE;
2647 case st_C_operator:
2648 fvdef = foperator;
2649 *is_func_or_var = TRUE;
2650 return TRUE;
2651 case st_none:
2652 if (constantypedefs
2653 && structdef == snone
2654 && structtype == st_C_enum && cblev > structcblev)
2655 return TRUE; /* enum constant */
2656 switch (fvdef)
2658 case fdefunkey:
2659 if (cblev > 0)
2660 break;
2661 fvdef = fdefunname; /* GNU macro */
2662 *is_func_or_var = TRUE;
2663 return TRUE;
2664 case fvnone:
2665 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2666 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2668 fvdef = vignore;
2669 return FALSE;
2671 if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2673 fvdef = foperator;
2674 *is_func_or_var = TRUE;
2675 return TRUE;
2677 if (cblev > 0 && !instruct)
2678 break;
2679 fvdef = fvnameseen; /* function or variable */
2680 *is_func_or_var = TRUE;
2681 return TRUE;
2683 break;
2686 return FALSE;
2691 * C_entries often keeps pointers to tokens or lines which are older than
2692 * the line currently read. By keeping two line buffers, and switching
2693 * them at end of line, it is possible to use those pointers.
2695 struct
2697 long linepos;
2698 linebuffer lb;
2699 } lbs[2];
2701 #define current_lb_is_new (newndx == curndx)
2702 #define switch_line_buffers() (curndx = 1 - curndx)
2704 #define curlb (lbs[curndx].lb)
2705 #define newlb (lbs[newndx].lb)
2706 #define curlinepos (lbs[curndx].linepos)
2707 #define newlinepos (lbs[newndx].linepos)
2709 #define CNL_SAVE_DEFINEDEF() \
2710 do { \
2711 curlinepos = charno; \
2712 lineno++; \
2713 linecharno = charno; \
2714 charno += readline (&curlb, inf); \
2715 lp = curlb.buffer; \
2716 quotednl = FALSE; \
2717 newndx = curndx; \
2718 } while (0)
2720 #define CNL() \
2721 do { \
2722 CNL_SAVE_DEFINEDEF(); \
2723 if (savetoken.valid) \
2725 token = savetoken; \
2726 savetoken.valid = FALSE; \
2728 definedef = dnone; \
2729 } while (0)
2732 static void
2733 make_C_tag (isfun)
2734 bool isfun;
2736 /* This function should never be called when token.valid is FALSE, but
2737 we must protect against invalid input or internal errors. */
2738 if (DEBUG || token.valid)
2740 if (traditional_tag_style)
2742 /* This was the original code. Now we call new_pfnote instead,
2743 which uses the new method for naming tags (see new_pfnote). */
2744 char *name = NULL;
2746 if (CTAGS || token.named)
2747 name = savestr (token_name.buffer);
2748 if (DEBUG && !token.valid)
2750 if (token.named)
2751 name = concat (name, "##invalid##", "");
2752 else
2753 name = savestr ("##invalid##");
2755 pfnote (name, isfun, token.line,
2756 token.offset+token.length+1, token.lineno, token.linepos);
2758 else
2759 new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
2760 token.offset+token.length+1, token.lineno, token.linepos);
2761 token.valid = FALSE;
2767 * C_entries ()
2768 * This routine finds functions, variables, typedefs,
2769 * #define's, enum constants and struct/union/enum definitions in
2770 * C syntax and adds them to the list.
2772 static void
2773 C_entries (c_ext, inf)
2774 int c_ext; /* extension of C */
2775 FILE *inf; /* input file */
2777 register char c; /* latest char read; '\0' for end of line */
2778 register char *lp; /* pointer one beyond the character `c' */
2779 int curndx, newndx; /* indices for current and new lb */
2780 register int tokoff; /* offset in line of start of current token */
2781 register int toklen; /* length of current token */
2782 char *qualifier; /* string used to qualify names */
2783 int qlen; /* length of qualifier */
2784 int cblev; /* current curly brace level */
2785 int parlev; /* current parenthesis level */
2786 int typdefcblev; /* cblev where a typedef struct body begun */
2787 bool incomm, inquote, inchar, quotednl, midtoken;
2788 bool cplpl, cjava;
2789 bool yacc_rules; /* in the rules part of a yacc file */
2790 struct tok savetoken; /* token saved during preprocessor handling */
2793 initbuffer (&token_name);
2794 initbuffer (&lbs[0].lb);
2795 initbuffer (&lbs[1].lb);
2796 if (cstack.size == 0)
2798 cstack.size = (DEBUG) ? 1 : 4;
2799 cstack.nl = 0;
2800 cstack.cname = xnew (cstack.size, char *);
2801 cstack.cblev = xnew (cstack.size, int);
2804 tokoff = toklen = typdefcblev = 0; /* keep compiler quiet */
2805 curndx = newndx = 0;
2806 lineno = 0;
2807 charno = 0;
2808 lp = curlb.buffer;
2809 *lp = 0;
2811 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2812 structdef = snone; definedef = dnone; objdef = onone;
2813 yacc_rules = FALSE;
2814 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2815 token.valid = savetoken.valid = FALSE;
2816 cblev = 0;
2817 parlev = 0;
2818 cplpl = (c_ext & C_PLPL) == C_PLPL;
2819 cjava = (c_ext & C_JAVA) == C_JAVA;
2820 if (cjava)
2821 { qualifier = "."; qlen = 1; }
2822 else
2823 { qualifier = "::"; qlen = 2; }
2826 while (!feof (inf))
2828 c = *lp++;
2829 if (c == '\\')
2831 /* If we're at the end of the line, the next character is a
2832 '\0'; don't skip it, because it's the thing that tells us
2833 to read the next line. */
2834 if (*lp == '\0')
2836 quotednl = TRUE;
2837 continue;
2839 lp++;
2840 c = ' ';
2842 else if (incomm)
2844 switch (c)
2846 case '*':
2847 if (*lp == '/')
2849 c = *lp++;
2850 incomm = FALSE;
2852 break;
2853 case '\0':
2854 /* Newlines inside comments do not end macro definitions in
2855 traditional cpp. */
2856 CNL_SAVE_DEFINEDEF ();
2857 break;
2859 continue;
2861 else if (inquote)
2863 switch (c)
2865 case '"':
2866 inquote = FALSE;
2867 break;
2868 case '\0':
2869 /* Newlines inside strings do not end macro definitions
2870 in traditional cpp, even though compilers don't
2871 usually accept them. */
2872 CNL_SAVE_DEFINEDEF ();
2873 break;
2875 continue;
2877 else if (inchar)
2879 switch (c)
2881 case '\0':
2882 /* Hmmm, something went wrong. */
2883 CNL ();
2884 /* FALLTHRU */
2885 case '\'':
2886 inchar = FALSE;
2887 break;
2889 continue;
2891 else
2892 switch (c)
2894 case '"':
2895 inquote = TRUE;
2896 switch (fvdef)
2898 case fdefunkey:
2899 case fstartlist:
2900 case finlist:
2901 case fignore:
2902 case vignore:
2903 break;
2904 default:
2905 fvextern = FALSE;
2906 fvdef = fvnone;
2908 continue;
2909 case '\'':
2910 inchar = TRUE;
2911 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2913 fvextern = FALSE;
2914 fvdef = fvnone;
2916 continue;
2917 case '/':
2918 if (*lp == '*')
2920 lp++;
2921 incomm = TRUE;
2922 continue;
2924 else if (/* cplpl && */ *lp == '/')
2926 c = '\0';
2927 break;
2929 else
2930 break;
2931 case '%':
2932 if ((c_ext & YACC) && *lp == '%')
2934 /* Entering or exiting rules section in yacc file. */
2935 lp++;
2936 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2937 typdef = tnone; structdef = snone;
2938 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2939 cblev = 0;
2940 yacc_rules = !yacc_rules;
2941 continue;
2943 else
2944 break;
2945 case '#':
2946 if (definedef == dnone)
2948 char *cp;
2949 bool cpptoken = TRUE;
2951 /* Look back on this line. If all blanks, or nonblanks
2952 followed by an end of comment, this is a preprocessor
2953 token. */
2954 for (cp = newlb.buffer; cp < lp-1; cp++)
2955 if (!iswhite (*cp))
2957 if (*cp == '*' && *(cp+1) == '/')
2959 cp++;
2960 cpptoken = TRUE;
2962 else
2963 cpptoken = FALSE;
2965 if (cpptoken)
2966 definedef = dsharpseen;
2967 } /* if (definedef == dnone) */
2969 continue;
2970 } /* switch (c) */
2973 /* Consider token only if some involved conditions are satisfied. */
2974 if (typdef != tignore
2975 && definedef != dignorerest
2976 && fvdef != finlist
2977 && structdef != sintemplate
2978 && (definedef != dnone
2979 || structdef != scolonseen))
2981 if (midtoken)
2983 if (endtoken (c))
2985 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
2988 * This handles :: in the middle, but not at the
2989 * beginning of an identifier. Also, space-separated
2990 * :: is not recognised.
2992 lp += 2;
2993 toklen += 2;
2994 c = lp[-1];
2995 goto still_in_token;
2997 else
2999 bool funorvar = FALSE;
3001 if (yacc_rules
3002 || consider_token (newlb.buffer + tokoff, toklen, c,
3003 &c_ext, cblev, parlev, &funorvar))
3005 if (fvdef == foperator)
3007 char *oldlp = lp;
3008 lp = skip_spaces (lp-1);
3009 if (*lp != '\0')
3010 lp += 1;
3011 while (*lp != '\0'
3012 && !iswhite (*lp) && *lp != '(')
3013 lp += 1;
3014 c = *lp++;
3015 toklen += lp - oldlp;
3017 token.named = FALSE;
3018 if ((c_ext & C_EXT) /* not pure C */
3019 && nestlev > 0 && definedef == dnone)
3020 /* in struct body */
3022 write_classname (&token_name, qualifier);
3023 linebuffer_setlen (&token_name,
3024 token_name.len+qlen+toklen);
3025 strcat (token_name.buffer, qualifier);
3026 strncat (token_name.buffer,
3027 newlb.buffer + tokoff, toklen);
3028 token.named = TRUE;
3030 else if (objdef == ocatseen)
3031 /* Objective C category */
3033 int len = strlen (objtag) + 2 + toklen;
3034 linebuffer_setlen (&token_name, len);
3035 strcpy (token_name.buffer, objtag);
3036 strcat (token_name.buffer, "(");
3037 strncat (token_name.buffer,
3038 newlb.buffer + tokoff, toklen);
3039 strcat (token_name.buffer, ")");
3040 token.named = TRUE;
3042 else if (objdef == omethodtag
3043 || objdef == omethodparm)
3044 /* Objective C method */
3046 token.named = TRUE;
3048 else if (fvdef == fdefunname)
3049 /* GNU DEFUN and similar macros */
3051 bool defun = (newlb.buffer[tokoff] == 'F');
3052 int off = tokoff;
3053 int len = toklen;
3055 /* Rewrite the tag so that emacs lisp DEFUNs
3056 can be found by their elisp name */
3057 if (defun)
3059 off += 1;
3060 len -= 1;
3062 len = toklen;
3063 linebuffer_setlen (&token_name, len);
3064 strncpy (token_name.buffer,
3065 newlb.buffer + off, len);
3066 token_name.buffer[len] = '\0';
3067 if (defun)
3068 while (--len >= 0)
3069 if (token_name.buffer[len] == '_')
3070 token_name.buffer[len] = '-';
3071 token.named = defun;
3073 else
3075 linebuffer_setlen (&token_name, toklen);
3076 strncpy (token_name.buffer,
3077 newlb.buffer + tokoff, toklen);
3078 token_name.buffer[toklen] = '\0';
3079 /* Name macros and members. */
3080 token.named = (structdef == stagseen
3081 || typdef == ttypeseen
3082 || typdef == tend
3083 || (funorvar
3084 && definedef == dignorerest)
3085 || (funorvar
3086 && definedef == dnone
3087 && structdef == snone
3088 && cblev > 0));
3090 token.lineno = lineno;
3091 token.offset = tokoff;
3092 token.length = toklen;
3093 token.line = newlb.buffer;
3094 token.linepos = newlinepos;
3095 token.valid = TRUE;
3097 if (definedef == dnone
3098 && (fvdef == fvnameseen
3099 || fvdef == foperator
3100 || structdef == stagseen
3101 || typdef == tend
3102 || typdef == ttypeseen
3103 || objdef != onone))
3105 if (current_lb_is_new)
3106 switch_line_buffers ();
3108 else if (definedef != dnone
3109 || fvdef == fdefunname
3110 || instruct)
3111 make_C_tag (funorvar);
3113 midtoken = FALSE;
3115 } /* if (endtoken (c)) */
3116 else if (intoken (c))
3117 still_in_token:
3119 toklen++;
3120 continue;
3122 } /* if (midtoken) */
3123 else if (begtoken (c))
3125 switch (definedef)
3127 case dnone:
3128 switch (fvdef)
3130 case fstartlist:
3131 fvdef = finlist;
3132 continue;
3133 case flistseen:
3134 make_C_tag (TRUE); /* a function */
3135 fvdef = fignore;
3136 break;
3137 case fvnameseen:
3138 fvdef = fvnone;
3139 break;
3141 if (structdef == stagseen && !cjava)
3143 popclass_above (cblev);
3144 structdef = snone;
3146 break;
3147 case dsharpseen:
3148 savetoken = token;
3150 if (!yacc_rules || lp == newlb.buffer + 1)
3152 tokoff = lp - 1 - newlb.buffer;
3153 toklen = 1;
3154 midtoken = TRUE;
3156 continue;
3157 } /* if (begtoken) */
3158 } /* if must look at token */
3161 /* Detect end of line, colon, comma, semicolon and various braces
3162 after having handled a token.*/
3163 switch (c)
3165 case ':':
3166 if (yacc_rules && token.offset == 0 && token.valid)
3168 make_C_tag (FALSE); /* a yacc function */
3169 break;
3171 if (definedef != dnone)
3172 break;
3173 switch (objdef)
3175 case otagseen:
3176 objdef = oignore;
3177 make_C_tag (TRUE); /* an Objective C class */
3178 break;
3179 case omethodtag:
3180 case omethodparm:
3181 objdef = omethodcolon;
3182 linebuffer_setlen (&token_name, token_name.len + 1);
3183 strcat (token_name.buffer, ":");
3184 break;
3186 if (structdef == stagseen)
3187 structdef = scolonseen;
3188 break;
3189 case ';':
3190 if (definedef != dnone)
3191 break;
3192 switch (typdef)
3194 case tend:
3195 case ttypeseen:
3196 make_C_tag (FALSE); /* a typedef */
3197 typdef = tnone;
3198 fvdef = fvnone;
3199 break;
3200 case tnone:
3201 case tinbody:
3202 case tignore:
3203 switch (fvdef)
3205 case fignore:
3206 if (typdef == tignore)
3207 fvdef = fvnone;
3208 break;
3209 case fvnameseen:
3210 if ((globals && cblev == 0 && (!fvextern || declarations))
3211 || (members && instruct))
3212 make_C_tag (FALSE); /* a variable */
3213 fvextern = FALSE;
3214 fvdef = fvnone;
3215 token.valid = FALSE;
3216 break;
3217 case flistseen:
3218 if ((declarations && typdef == tnone && !instruct)
3219 || (members && typdef != tignore && instruct))
3220 make_C_tag (TRUE); /* a function declaration */
3221 /* FALLTHRU */
3222 default:
3223 fvextern = FALSE;
3224 fvdef = fvnone;
3225 if (declarations
3226 && structdef == stagseen && (c_ext & C_PLPL))
3227 make_C_tag (FALSE); /* forward declaration */
3228 else
3229 /* The following instruction invalidates the token.
3230 Probably the token should be invalidated in all other
3231 cases where some state machine is reset prematurely. */
3232 token.valid = FALSE;
3233 } /* switch (fvdef) */
3234 /* FALLTHRU */
3235 default:
3236 if (!instruct)
3237 typdef = tnone;
3239 if (structdef == stagseen)
3240 structdef = snone;
3241 break;
3242 case ',':
3243 if (definedef != dnone)
3244 break;
3245 switch (objdef)
3247 case omethodtag:
3248 case omethodparm:
3249 make_C_tag (TRUE); /* an Objective C method */
3250 objdef = oinbody;
3251 break;
3253 switch (fvdef)
3255 case fdefunkey:
3256 case foperator:
3257 case fstartlist:
3258 case finlist:
3259 case fignore:
3260 case vignore:
3261 break;
3262 case fdefunname:
3263 fvdef = fignore;
3264 break;
3265 case fvnameseen: /* a variable */
3266 if ((globals && cblev == 0 && (!fvextern || declarations))
3267 || (members && instruct))
3268 make_C_tag (FALSE);
3269 break;
3270 case flistseen: /* a function */
3271 if ((declarations && typdef == tnone && !instruct)
3272 || (members && typdef != tignore && instruct))
3274 make_C_tag (TRUE); /* a function declaration */
3275 fvdef = fvnameseen;
3277 else if (!declarations)
3278 fvdef = fvnone;
3279 token.valid = FALSE;
3280 break;
3281 default:
3282 fvdef = fvnone;
3284 if (structdef == stagseen)
3285 structdef = snone;
3286 break;
3287 case '[':
3288 if (definedef != dnone)
3289 break;
3290 if (structdef == stagseen)
3291 structdef = snone;
3292 switch (typdef)
3294 case ttypeseen:
3295 case tend:
3296 typdef = tignore;
3297 make_C_tag (FALSE); /* a typedef */
3298 break;
3299 case tnone:
3300 case tinbody:
3301 switch (fvdef)
3303 case foperator:
3304 case finlist:
3305 case fignore:
3306 case vignore:
3307 break;
3308 case fvnameseen:
3309 if ((members && cblev == 1)
3310 || (globals && cblev == 0
3311 && (!fvextern || declarations)))
3312 make_C_tag (FALSE); /* a variable */
3313 /* FALLTHRU */
3314 default:
3315 fvdef = fvnone;
3317 break;
3319 break;
3320 case '(':
3321 if (definedef != dnone)
3322 break;
3323 if (objdef == otagseen && parlev == 0)
3324 objdef = oparenseen;
3325 switch (fvdef)
3327 case fvnameseen:
3328 if (typdef == ttypeseen
3329 && *lp != '*'
3330 && !instruct)
3332 /* This handles constructs like:
3333 typedef void OperatorFun (int fun); */
3334 make_C_tag (FALSE);
3335 typdef = tignore;
3336 fvdef = fignore;
3337 break;
3339 /* FALLTHRU */
3340 case foperator:
3341 fvdef = fstartlist;
3342 break;
3343 case flistseen:
3344 fvdef = finlist;
3345 break;
3347 parlev++;
3348 break;
3349 case ')':
3350 if (definedef != dnone)
3351 break;
3352 if (objdef == ocatseen && parlev == 1)
3354 make_C_tag (TRUE); /* an Objective C category */
3355 objdef = oignore;
3357 if (--parlev == 0)
3359 switch (fvdef)
3361 case fstartlist:
3362 case finlist:
3363 fvdef = flistseen;
3364 break;
3366 if (!instruct
3367 && (typdef == tend
3368 || typdef == ttypeseen))
3370 typdef = tignore;
3371 make_C_tag (FALSE); /* a typedef */
3374 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3375 parlev = 0;
3376 break;
3377 case '{':
3378 if (definedef != dnone)
3379 break;
3380 if (typdef == ttypeseen)
3382 /* Whenever typdef is set to tinbody (currently only
3383 here), typdefcblev should be set to cblev. */
3384 typdef = tinbody;
3385 typdefcblev = cblev;
3387 switch (fvdef)
3389 case flistseen:
3390 make_C_tag (TRUE); /* a function */
3391 /* FALLTHRU */
3392 case fignore:
3393 fvdef = fvnone;
3394 break;
3395 case fvnone:
3396 switch (objdef)
3398 case otagseen:
3399 make_C_tag (TRUE); /* an Objective C class */
3400 objdef = oignore;
3401 break;
3402 case omethodtag:
3403 case omethodparm:
3404 make_C_tag (TRUE); /* an Objective C method */
3405 objdef = oinbody;
3406 break;
3407 default:
3408 /* Neutralize `extern "C" {' grot. */
3409 if (cblev == 0 && structdef == snone && nestlev == 0
3410 && typdef == tnone)
3411 cblev = -1;
3414 switch (structdef)
3416 case skeyseen: /* unnamed struct */
3417 pushclass_above (cblev, NULL, 0);
3418 structdef = snone;
3419 break;
3420 case stagseen: /* named struct or enum */
3421 case scolonseen: /* a class */
3422 pushclass_above (cblev, token.line+token.offset, token.length);
3423 structdef = snone;
3424 make_C_tag (FALSE); /* a struct or enum */
3425 break;
3427 cblev++;
3428 break;
3429 case '*':
3430 if (definedef != dnone)
3431 break;
3432 if (fvdef == fstartlist)
3433 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3434 break;
3435 case '}':
3436 if (definedef != dnone)
3437 break;
3438 if (!noindentypedefs && lp == newlb.buffer + 1)
3440 cblev = 0; /* reset curly brace level if first column */
3441 parlev = 0; /* also reset paren level, just in case... */
3443 else if (cblev > 0)
3444 cblev--;
3445 popclass_above (cblev);
3446 structdef = snone;
3447 /* Only if typdef == tinbody is typdefcblev significant. */
3448 if (typdef == tinbody && cblev <= typdefcblev)
3450 assert (cblev == typdefcblev);
3451 typdef = tend;
3453 break;
3454 case '=':
3455 if (definedef != dnone)
3456 break;
3457 switch (fvdef)
3459 case foperator:
3460 case finlist:
3461 case fignore:
3462 case vignore:
3463 break;
3464 case fvnameseen:
3465 if ((members && cblev == 1)
3466 || (globals && cblev == 0 && (!fvextern || declarations)))
3467 make_C_tag (FALSE); /* a variable */
3468 /* FALLTHRU */
3469 default:
3470 fvdef = vignore;
3472 break;
3473 case '<':
3474 if (cplpl && structdef == stagseen)
3476 structdef = sintemplate;
3477 break;
3479 goto resetfvdef;
3480 case '>':
3481 if (structdef == sintemplate)
3483 structdef = stagseen;
3484 break;
3486 goto resetfvdef;
3487 case '+':
3488 case '-':
3489 if (objdef == oinbody && cblev == 0)
3491 objdef = omethodsign;
3492 break;
3494 /* FALLTHRU */
3495 resetfvdef:
3496 case '#': case '~': case '&': case '%': case '/': case '|':
3497 case '^': case '!': case '.': case '?': case ']':
3498 if (definedef != dnone)
3499 break;
3500 /* These surely cannot follow a function tag in C. */
3501 switch (fvdef)
3503 case foperator:
3504 case finlist:
3505 case fignore:
3506 case vignore:
3507 break;
3508 default:
3509 fvdef = fvnone;
3511 break;
3512 case '\0':
3513 if (objdef == otagseen)
3515 make_C_tag (TRUE); /* an Objective C class */
3516 objdef = oignore;
3518 /* If a macro spans multiple lines don't reset its state. */
3519 if (quotednl)
3520 CNL_SAVE_DEFINEDEF ();
3521 else
3522 CNL ();
3523 break;
3524 } /* switch (c) */
3526 } /* while not eof */
3528 free (token_name.buffer);
3529 free (lbs[0].lb.buffer);
3530 free (lbs[1].lb.buffer);
3534 * Process either a C++ file or a C file depending on the setting
3535 * of a global flag.
3537 static void
3538 default_C_entries (inf)
3539 FILE *inf;
3541 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3544 /* Always do plain C. */
3545 static void
3546 plain_C_entries (inf)
3547 FILE *inf;
3549 C_entries (0, inf);
3552 /* Always do C++. */
3553 static void
3554 Cplusplus_entries (inf)
3555 FILE *inf;
3557 C_entries (C_PLPL, inf);
3560 /* Always do Java. */
3561 static void
3562 Cjava_entries (inf)
3563 FILE *inf;
3565 C_entries (C_JAVA, inf);
3568 /* Always do C*. */
3569 static void
3570 Cstar_entries (inf)
3571 FILE *inf;
3573 C_entries (C_STAR, inf);
3576 /* Always do Yacc. */
3577 static void
3578 Yacc_entries (inf)
3579 FILE *inf;
3581 C_entries (YACC, inf);
3585 /* A useful macro. */
3586 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3587 for (lineno = charno = 0; /* loop initialization */ \
3588 !feof (file_pointer) /* loop test */ \
3589 && (lineno++, /* instructions at start of loop */ \
3590 linecharno = charno, \
3591 charno += readline (&line_buffer, file_pointer), \
3592 char_pointer = lb.buffer, \
3593 TRUE); \
3598 * Read a file, but do no processing. This is used to do regexp
3599 * matching on files that have no language defined.
3601 static void
3602 just_read_file (inf)
3603 FILE *inf;
3605 register char *dummy;
3607 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3608 continue;
3612 /* Fortran parsing */
3614 static bool tail P_((char *));
3615 static void takeprec P_((void));
3616 static void getit P_((FILE *));
3618 static bool
3619 tail (cp)
3620 char *cp;
3622 register int len = 0;
3624 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
3625 cp++, len++;
3626 if (*cp == '\0' && !intoken (dbp[len]))
3628 dbp += len;
3629 return TRUE;
3631 return FALSE;
3634 static void
3635 takeprec ()
3637 dbp = skip_spaces (dbp);
3638 if (*dbp != '*')
3639 return;
3640 dbp++;
3641 dbp = skip_spaces (dbp);
3642 if (strneq (dbp, "(*)", 3))
3644 dbp += 3;
3645 return;
3647 if (!ISDIGIT (*dbp))
3649 --dbp; /* force failure */
3650 return;
3653 dbp++;
3654 while (ISDIGIT (*dbp));
3657 static void
3658 getit (inf)
3659 FILE *inf;
3661 register char *cp;
3663 dbp = skip_spaces (dbp);
3664 if (*dbp == '\0')
3666 lineno++;
3667 linecharno = charno;
3668 charno += readline (&lb, inf);
3669 dbp = lb.buffer;
3670 if (dbp[5] != '&')
3671 return;
3672 dbp += 6;
3673 dbp = skip_spaces (dbp);
3675 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3676 return;
3677 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3678 continue;
3679 pfnote (savenstr (dbp, cp-dbp), TRUE,
3680 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3684 static void
3685 Fortran_functions (inf)
3686 FILE *inf;
3688 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3690 if (*dbp == '%')
3691 dbp++; /* Ratfor escape to fortran */
3692 dbp = skip_spaces (dbp);
3693 if (*dbp == '\0')
3694 continue;
3695 switch (lowcase (*dbp))
3697 case 'i':
3698 if (tail ("integer"))
3699 takeprec ();
3700 break;
3701 case 'r':
3702 if (tail ("real"))
3703 takeprec ();
3704 break;
3705 case 'l':
3706 if (tail ("logical"))
3707 takeprec ();
3708 break;
3709 case 'c':
3710 if (tail ("complex") || tail ("character"))
3711 takeprec ();
3712 break;
3713 case 'd':
3714 if (tail ("double"))
3716 dbp = skip_spaces (dbp);
3717 if (*dbp == '\0')
3718 continue;
3719 if (tail ("precision"))
3720 break;
3721 continue;
3723 break;
3725 dbp = skip_spaces (dbp);
3726 if (*dbp == '\0')
3727 continue;
3728 switch (lowcase (*dbp))
3730 case 'f':
3731 if (tail ("function"))
3732 getit (inf);
3733 continue;
3734 case 's':
3735 if (tail ("subroutine"))
3736 getit (inf);
3737 continue;
3738 case 'e':
3739 if (tail ("entry"))
3740 getit (inf);
3741 continue;
3742 case 'b':
3743 if (tail ("blockdata") || tail ("block data"))
3745 dbp = skip_spaces (dbp);
3746 if (*dbp == '\0') /* assume un-named */
3747 pfnote (savestr ("blockdata"), TRUE,
3748 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3749 else
3750 getit (inf); /* look for name */
3752 continue;
3759 * Ada parsing
3760 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3763 static void adagetit P_((FILE *, char *));
3765 /* Once we are positioned after an "interesting" keyword, let's get
3766 the real tag value necessary. */
3767 static void
3768 adagetit (inf, name_qualifier)
3769 FILE *inf;
3770 char *name_qualifier;
3772 register char *cp;
3773 char *name;
3774 char c;
3776 while (!feof (inf))
3778 dbp = skip_spaces (dbp);
3779 if (*dbp == '\0'
3780 || (dbp[0] == '-' && dbp[1] == '-'))
3782 lineno++;
3783 linecharno = charno;
3784 charno += readline (&lb, inf);
3785 dbp = lb.buffer;
3787 switch (*dbp)
3789 case 'b':
3790 case 'B':
3791 if (tail ("body"))
3793 /* Skipping body of procedure body or package body or ....
3794 resetting qualifier to body instead of spec. */
3795 name_qualifier = "/b";
3796 continue;
3798 break;
3799 case 't':
3800 case 'T':
3801 /* Skipping type of task type or protected type ... */
3802 if (tail ("type"))
3803 continue;
3804 break;
3806 if (*dbp == '"')
3808 dbp += 1;
3809 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3810 continue;
3812 else
3814 dbp = skip_spaces (dbp);
3815 for (cp = dbp;
3816 (*cp != '\0'
3817 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
3818 cp++)
3819 continue;
3820 if (cp == dbp)
3821 return;
3823 c = *cp;
3824 *cp = '\0';
3825 name = concat (dbp, name_qualifier, "");
3826 *cp = c;
3827 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3828 if (c == '"')
3829 dbp = cp + 1;
3830 return;
3834 static void
3835 Ada_funcs (inf)
3836 FILE *inf;
3838 bool inquote = FALSE;
3840 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3842 while (*dbp != '\0')
3844 /* Skip a string i.e. "abcd". */
3845 if (inquote || (*dbp == '"'))
3847 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3848 if (dbp != NULL)
3850 inquote = FALSE;
3851 dbp += 1;
3852 continue; /* advance char */
3854 else
3856 inquote = TRUE;
3857 break; /* advance line */
3861 /* Skip comments. */
3862 if (dbp[0] == '-' && dbp[1] == '-')
3863 break; /* advance line */
3865 /* Skip character enclosed in single quote i.e. 'a'
3866 and skip single quote starting an attribute i.e. 'Image. */
3867 if (*dbp == '\'')
3869 dbp++ ;
3870 if (*dbp != '\0')
3871 dbp++;
3872 continue;
3875 /* Search for beginning of a token. */
3876 if (!begtoken (*dbp))
3878 dbp++;
3879 continue; /* advance char */
3882 /* We are at the beginning of a token. */
3883 switch (*dbp)
3885 case 'f':
3886 case 'F':
3887 if (!packages_only && tail ("function"))
3888 adagetit (inf, "/f");
3889 else
3890 break; /* from switch */
3891 continue; /* advance char */
3892 case 'p':
3893 case 'P':
3894 if (!packages_only && tail ("procedure"))
3895 adagetit (inf, "/p");
3896 else if (tail ("package"))
3897 adagetit (inf, "/s");
3898 else if (tail ("protected")) /* protected type */
3899 adagetit (inf, "/t");
3900 else
3901 break; /* from switch */
3902 continue; /* advance char */
3903 case 't':
3904 case 'T':
3905 if (!packages_only && tail ("task"))
3906 adagetit (inf, "/k");
3907 else if (typedefs && !packages_only && tail ("type"))
3909 adagetit (inf, "/t");
3910 while (*dbp != '\0')
3911 dbp += 1;
3913 else
3914 break; /* from switch */
3915 continue; /* advance char */
3918 /* Look for the end of the token. */
3919 while (!endtoken (*dbp))
3920 dbp++;
3922 } /* advance char */
3923 } /* advance line */
3928 * Bob Weiner, Motorola Inc., 4/3/94
3929 * Unix and microcontroller assembly tag handling
3930 * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
3932 static void
3933 Asm_labels (inf)
3934 FILE *inf;
3936 register char *cp;
3938 LOOP_ON_INPUT_LINES (inf, lb, cp)
3940 /* If first char is alphabetic or one of [_.$], test for colon
3941 following identifier. */
3942 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3944 /* Read past label. */
3945 cp++;
3946 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3947 cp++;
3948 if (*cp == ':' || iswhite (*cp))
3950 /* Found end of label, so copy it and add it to the table. */
3951 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3952 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3960 * Perl support
3961 * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
3962 * Perl variable names: /^(my|local).../
3963 * Bart Robinson <lomew@cs.utah.edu> (1995)
3964 * Michael Ernst <mernst@alum.mit.edu> (1997)
3966 static void
3967 Perl_functions (inf)
3968 FILE *inf;
3970 register char *cp;
3972 LOOP_ON_INPUT_LINES (inf, lb, cp)
3974 if (*cp++ == 's'
3975 && *cp++ == 'u'
3976 && *cp++ == 'b' && iswhite (*cp++))
3978 cp = skip_spaces (cp);
3979 if (*cp != '\0')
3981 char *sp = cp;
3982 while (*cp != '\0'
3983 && !iswhite (*cp) && *cp != '{' && *cp != '(')
3984 cp++;
3985 pfnote (savenstr (sp, cp-sp), TRUE,
3986 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3989 else if (globals /* only if tagging global vars is enabled */
3990 && ((cp = lb.buffer,
3991 *cp++ == 'm'
3992 && *cp++ == 'y')
3993 || (cp = lb.buffer,
3994 *cp++ == 'l'
3995 && *cp++ == 'o'
3996 && *cp++ == 'c'
3997 && *cp++ == 'a'
3998 && *cp++ == 'l'))
3999 && (*cp == '(' || iswhite (*cp)))
4001 /* After "my" or "local", but before any following paren or space. */
4002 char *varname = NULL;
4004 cp = skip_spaces (cp);
4005 if (*cp == '$' || *cp == '@' || *cp == '%')
4007 char* varstart = ++cp;
4008 while (ISALNUM (*cp) || *cp == '_')
4009 cp++;
4010 varname = savenstr (varstart, cp-varstart);
4012 else
4014 /* Should be examining a variable list at this point;
4015 could insist on seeing an open parenthesis. */
4016 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4017 cp++;
4020 /* Perhaps I should back cp up one character, so the TAGS table
4021 doesn't mention (and so depend upon) the following char. */
4022 pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
4023 FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4030 * Python support
4031 * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4032 * Eric S. Raymond <esr@thyrsus.com> (1997)
4034 static void
4035 Python_functions (inf)
4036 FILE *inf;
4038 register char *cp;
4040 LOOP_ON_INPUT_LINES (inf, lb, cp)
4042 if (*cp++ == 'd'
4043 && *cp++ == 'e'
4044 && *cp++ == 'f' && iswhite (*cp++))
4046 cp = skip_spaces (cp);
4047 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
4048 cp++;
4049 pfnote (NULL, TRUE,
4050 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4053 cp = lb.buffer;
4054 if (*cp++ == 'c'
4055 && *cp++ == 'l'
4056 && *cp++ == 'a'
4057 && *cp++ == 's'
4058 && *cp++ == 's' && iswhite (*cp++))
4060 cp = skip_spaces (cp);
4061 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
4062 cp++;
4063 pfnote (NULL, TRUE,
4064 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4070 /* Idea by Corny de Souza
4071 * Cobol tag functions
4072 * We could look for anything that could be a paragraph name.
4073 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4075 static void
4076 Cobol_paragraphs (inf)
4077 FILE *inf;
4079 register char *bp, *ep;
4081 LOOP_ON_INPUT_LINES (inf, lb, bp)
4083 if (lb.len < 9)
4084 continue;
4085 bp += 8;
4087 /* If eoln, compiler option or comment ignore whole line. */
4088 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4089 continue;
4091 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4092 continue;
4093 if (*ep++ == '.')
4094 pfnote (savenstr (bp, ep-bp), TRUE,
4095 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4101 * Makefile support
4102 * Idea by Assar Westerlund <assar@sics.se> (2001)
4104 static void
4105 Makefile_targets (inf)
4106 FILE *inf;
4108 register char *bp;
4110 LOOP_ON_INPUT_LINES (inf, lb, bp)
4112 if (*bp == '\t' || *bp == '#')
4113 continue;
4114 while (*bp != '\0' && *bp != '=' && *bp != ':')
4115 bp++;
4116 if (*bp == ':')
4117 pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4118 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4123 /* Added by Mosur Mohan, 4/22/88 */
4124 /* Pascal parsing */
4127 * Locates tags for procedures & functions. Doesn't do any type- or
4128 * var-definitions. It does look for the keyword "extern" or
4129 * "forward" immediately following the procedure statement; if found,
4130 * the tag is skipped.
4132 static void
4133 Pascal_functions (inf)
4134 FILE *inf;
4136 linebuffer tline; /* mostly copied from C_entries */
4137 long save_lcno;
4138 int save_lineno, save_len;
4139 char c, *cp, *namebuf;
4141 bool /* each of these flags is TRUE iff: */
4142 incomment, /* point is inside a comment */
4143 inquote, /* point is inside '..' string */
4144 get_tagname, /* point is after PROCEDURE/FUNCTION
4145 keyword, so next item = potential tag */
4146 found_tag, /* point is after a potential tag */
4147 inparms, /* point is within parameter-list */
4148 verify_tag; /* point has passed the parm-list, so the
4149 next token will determine whether this
4150 is a FORWARD/EXTERN to be ignored, or
4151 whether it is a real tag */
4153 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4154 namebuf = NULL; /* keep compiler quiet */
4155 lineno = 0;
4156 charno = 0;
4157 dbp = lb.buffer;
4158 *dbp = '\0';
4159 initbuffer (&tline);
4161 incomment = inquote = FALSE;
4162 found_tag = FALSE; /* have a proc name; check if extern */
4163 get_tagname = FALSE; /* have found "procedure" keyword */
4164 inparms = FALSE; /* found '(' after "proc" */
4165 verify_tag = FALSE; /* check if "extern" is ahead */
4168 while (!feof (inf)) /* long main loop to get next char */
4170 c = *dbp++;
4171 if (c == '\0') /* if end of line */
4173 lineno++;
4174 linecharno = charno;
4175 charno += readline (&lb, inf);
4176 dbp = lb.buffer;
4177 if (*dbp == '\0')
4178 continue;
4179 if (!((found_tag && verify_tag)
4180 || get_tagname))
4181 c = *dbp++; /* only if don't need *dbp pointing
4182 to the beginning of the name of
4183 the procedure or function */
4185 if (incomment)
4187 if (c == '}') /* within { } comments */
4188 incomment = FALSE;
4189 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4191 dbp++;
4192 incomment = FALSE;
4194 continue;
4196 else if (inquote)
4198 if (c == '\'')
4199 inquote = FALSE;
4200 continue;
4202 else
4203 switch (c)
4205 case '\'':
4206 inquote = TRUE; /* found first quote */
4207 continue;
4208 case '{': /* found open { comment */
4209 incomment = TRUE;
4210 continue;
4211 case '(':
4212 if (*dbp == '*') /* found open (* comment */
4214 incomment = TRUE;
4215 dbp++;
4217 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4218 inparms = TRUE;
4219 continue;
4220 case ')': /* end of parms list */
4221 if (inparms)
4222 inparms = FALSE;
4223 continue;
4224 case ';':
4225 if (found_tag && !inparms) /* end of proc or fn stmt */
4227 verify_tag = TRUE;
4228 break;
4230 continue;
4232 if (found_tag && verify_tag && (*dbp != ' '))
4234 /* check if this is an "extern" declaration */
4235 if (*dbp == '\0')
4236 continue;
4237 if (lowcase (*dbp == 'e'))
4239 if (tail ("extern")) /* superfluous, really! */
4241 found_tag = FALSE;
4242 verify_tag = FALSE;
4245 else if (lowcase (*dbp) == 'f')
4247 if (tail ("forward")) /* check for forward reference */
4249 found_tag = FALSE;
4250 verify_tag = FALSE;
4253 if (found_tag && verify_tag) /* not external proc, so make tag */
4255 found_tag = FALSE;
4256 verify_tag = FALSE;
4257 pfnote (namebuf, TRUE,
4258 tline.buffer, save_len, save_lineno, save_lcno);
4259 continue;
4262 if (get_tagname) /* grab name of proc or fn */
4264 if (*dbp == '\0')
4265 continue;
4267 /* save all values for later tagging */
4268 linebuffer_setlen (&tline, lb.len);
4269 strcpy (tline.buffer, lb.buffer);
4270 save_lineno = lineno;
4271 save_lcno = linecharno;
4273 /* grab block name */
4274 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4275 continue;
4276 namebuf = savenstr (dbp, cp-dbp);
4277 dbp = cp; /* set dbp to e-o-token */
4278 save_len = dbp - lb.buffer + 1;
4279 get_tagname = FALSE;
4280 found_tag = TRUE;
4281 continue;
4283 /* and proceed to check for "extern" */
4285 else if (!incomment && !inquote && !found_tag)
4287 /* check for proc/fn keywords */
4288 switch (lowcase (c))
4290 case 'p':
4291 if (tail ("rocedure")) /* c = 'p', dbp has advanced */
4292 get_tagname = TRUE;
4293 continue;
4294 case 'f':
4295 if (tail ("unction"))
4296 get_tagname = TRUE;
4297 continue;
4300 } /* while not eof */
4302 free (tline.buffer);
4307 * Lisp tag functions
4308 * look for (def or (DEF, quote or QUOTE
4311 static int L_isdef P_((char *));
4312 static int L_isquote P_((char *));
4313 static void L_getit P_((void));
4315 static int
4316 L_isdef (strp)
4317 register char *strp;
4319 return ((strp[1] == 'd' || strp[1] == 'D')
4320 && (strp[2] == 'e' || strp[2] == 'E')
4321 && (strp[3] == 'f' || strp[3] == 'F'));
4324 static int
4325 L_isquote (strp)
4326 register char *strp;
4328 return ((*++strp == 'q' || *strp == 'Q')
4329 && (*++strp == 'u' || *strp == 'U')
4330 && (*++strp == 'o' || *strp == 'O')
4331 && (*++strp == 't' || *strp == 'T')
4332 && (*++strp == 'e' || *strp == 'E')
4333 && iswhite (*++strp));
4336 static void
4337 L_getit ()
4339 register char *cp;
4341 if (*dbp == '\'') /* Skip prefix quote */
4342 dbp++;
4343 else if (*dbp == '(')
4345 if (L_isquote (dbp))
4346 dbp += 7; /* Skip "(quote " */
4347 else
4348 dbp += 1; /* Skip "(" before name in (defstruct (foo)) */
4349 dbp = skip_spaces (dbp);
4352 for (cp = dbp /*+1*/;
4353 *cp != '\0' && *cp != '(' && !iswhite(*cp) && *cp != ')';
4354 cp++)
4355 continue;
4356 if (cp == dbp)
4357 return;
4359 pfnote (savenstr (dbp, cp-dbp), TRUE,
4360 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4363 static void
4364 Lisp_functions (inf)
4365 FILE *inf;
4367 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4369 if (dbp[0] == '(')
4371 if (L_isdef (dbp))
4373 dbp = skip_non_spaces (dbp);
4374 dbp = skip_spaces (dbp);
4375 L_getit ();
4377 else
4379 /* Check for (foo::defmumble name-defined ... */
4381 dbp++;
4382 while (*dbp != '\0' && !iswhite (*dbp)
4383 && *dbp != ':' && *dbp != '(' && *dbp != ')');
4384 if (*dbp == ':')
4387 dbp++;
4388 while (*dbp == ':');
4390 if (L_isdef (dbp - 1))
4392 dbp = skip_non_spaces (dbp);
4393 dbp = skip_spaces (dbp);
4394 L_getit ();
4404 * Postscript tag functions
4405 * Just look for lines where the first character is '/'
4406 * Also look at "defineps" for PSWrap
4407 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4408 * Ideas by Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4410 static void
4411 Postscript_functions (inf)
4412 FILE *inf;
4414 register char *bp, *ep;
4416 LOOP_ON_INPUT_LINES (inf, lb, bp)
4418 if (bp[0] == '/')
4420 for (ep = bp+1;
4421 *ep != '\0' && *ep != ' ' && *ep != '{';
4422 ep++)
4423 continue;
4424 pfnote (savenstr (bp, ep-bp), TRUE,
4425 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4427 else if (strneq (bp, "defineps", 8))
4429 bp = skip_non_spaces (bp);
4430 bp = skip_spaces (bp);
4431 get_tag (bp);
4438 * Scheme tag functions
4439 * look for (def... xyzzy
4440 * look for (def... (xyzzy
4441 * look for (def ... ((...(xyzzy ....
4442 * look for (set! xyzzy
4445 static void
4446 Scheme_functions (inf)
4447 FILE *inf;
4449 register char *bp;
4451 LOOP_ON_INPUT_LINES (inf, lb, bp)
4453 if (bp[0] == '('
4454 && (bp[1] == 'D' || bp[1] == 'd')
4455 && (bp[2] == 'E' || bp[2] == 'e')
4456 && (bp[3] == 'F' || bp[3] == 'f'))
4458 bp = skip_non_spaces (bp);
4459 /* Skip over open parens and white space */
4460 while (iswhite (*bp) || *bp == '(')
4461 bp++;
4462 get_tag (bp);
4464 if (bp[0] == '('
4465 && (bp[1] == 'S' || bp[1] == 's')
4466 && (bp[2] == 'E' || bp[2] == 'e')
4467 && (bp[3] == 'T' || bp[3] == 't')
4468 && (bp[4] == '!' || bp[4] == '!')
4469 && (iswhite (bp[5])))
4471 bp = skip_non_spaces (bp);
4472 bp = skip_spaces (bp);
4473 get_tag (bp);
4479 /* Find tags in TeX and LaTeX input files. */
4481 /* TEX_toktab is a table of TeX control sequences that define tags.
4482 Each TEX_tabent records one such control sequence.
4483 CONVERT THIS TO USE THE Stab TYPE!! */
4484 struct TEX_tabent
4486 char *name;
4487 int len;
4490 struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
4492 /* Default set of control sequences to put into TEX_toktab.
4493 The value of environment var TEXTAGS is prepended to this. */
4495 char *TEX_defenv = "\
4496 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4497 :part:appendix:entry:index";
4499 static void TEX_mode P_((FILE *));
4500 static struct TEX_tabent *TEX_decode_env P_((char *, char *));
4501 static int TEX_Token P_((char *));
4503 char TEX_esc = '\\';
4504 char TEX_opgrp = '{';
4505 char TEX_clgrp = '}';
4508 * TeX/LaTeX scanning loop.
4510 static void
4511 TeX_commands (inf)
4512 FILE *inf;
4514 char *cp, *lasthit;
4515 register int i;
4517 /* Select either \ or ! as escape character. */
4518 TEX_mode (inf);
4520 /* Initialize token table once from environment. */
4521 if (!TEX_toktab)
4522 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4524 LOOP_ON_INPUT_LINES (inf, lb, cp)
4526 lasthit = cp;
4527 /* Look at each esc in line. */
4528 while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4530 if (*++cp == '\0')
4531 break;
4532 linecharno += cp - lasthit;
4533 lasthit = cp;
4534 i = TEX_Token (lasthit);
4535 if (i >= 0)
4537 /* We seem to include the TeX command in the tag name.
4538 register char *p;
4539 for (p = lasthit + TEX_toktab[i].len;
4540 *p != '\0' && *p != TEX_clgrp;
4541 p++)
4542 continue; */
4543 pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
4544 lb.buffer, lb.len, lineno, linecharno);
4545 break; /* We only tag a line once */
4551 #define TEX_LESC '\\'
4552 #define TEX_SESC '!'
4553 #define TEX_cmt '%'
4555 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4556 chars accordingly. */
4557 static void
4558 TEX_mode (inf)
4559 FILE *inf;
4561 int c;
4563 while ((c = getc (inf)) != EOF)
4565 /* Skip to next line if we hit the TeX comment char. */
4566 if (c == TEX_cmt)
4567 while (c != '\n')
4568 c = getc (inf);
4569 else if (c == TEX_LESC || c == TEX_SESC )
4570 break;
4573 if (c == TEX_LESC)
4575 TEX_esc = TEX_LESC;
4576 TEX_opgrp = '{';
4577 TEX_clgrp = '}';
4579 else
4581 TEX_esc = TEX_SESC;
4582 TEX_opgrp = '<';
4583 TEX_clgrp = '>';
4585 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4586 No attempt is made to correct the situation. */
4587 rewind (inf);
4590 /* Read environment and prepend it to the default string.
4591 Build token table. */
4592 static struct TEX_tabent *
4593 TEX_decode_env (evarname, defenv)
4594 char *evarname;
4595 char *defenv;
4597 register char *env, *p;
4599 struct TEX_tabent *tab;
4600 int size, i;
4602 /* Append default string to environment. */
4603 env = getenv (evarname);
4604 if (!env)
4605 env = defenv;
4606 else
4608 char *oldenv = env;
4609 env = concat (oldenv, defenv, "");
4612 /* Allocate a token table */
4613 for (size = 1, p = env; p;)
4614 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4615 size++;
4616 /* Add 1 to leave room for null terminator. */
4617 tab = xnew (size + 1, struct TEX_tabent);
4619 /* Unpack environment string into token table. Be careful about */
4620 /* zero-length strings (leading ':', "::" and trailing ':') */
4621 for (i = 0; *env;)
4623 p = etags_strchr (env, ':');
4624 if (!p) /* End of environment string. */
4625 p = env + strlen (env);
4626 if (p - env > 0)
4627 { /* Only non-zero strings. */
4628 tab[i].name = savenstr (env, p - env);
4629 tab[i].len = strlen (tab[i].name);
4630 i++;
4632 if (*p)
4633 env = p + 1;
4634 else
4636 tab[i].name = NULL; /* Mark end of table. */
4637 tab[i].len = 0;
4638 break;
4641 return tab;
4644 /* If the text at CP matches one of the tag-defining TeX command names,
4645 return the pointer to the first occurrence of that command in TEX_toktab.
4646 Otherwise return -1.
4647 Keep the capital `T' in `token' for dumb truncating compilers
4648 (this distinguishes it from `TEX_toktab' */
4649 static int
4650 TEX_Token (cp)
4651 char *cp;
4653 int i;
4655 for (i = 0; TEX_toktab[i].len > 0; i++)
4656 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4657 return i;
4658 return -1;
4662 /* Texinfo support. Dave Love, Mar. 2000. */
4663 static void
4664 Texinfo_nodes (inf)
4665 FILE * inf;
4667 char *cp, *start;
4668 LOOP_ON_INPUT_LINES (inf, lb, cp)
4670 if ((*cp++ == '@'
4671 && *cp++ == 'n'
4672 && *cp++ == 'o'
4673 && *cp++ == 'd'
4674 && *cp++ == 'e' && iswhite (*cp++)))
4676 start = cp = skip_spaces(cp);
4677 while (*cp != '\0' && *cp != ',')
4678 cp++;
4679 pfnote (savenstr (start, cp - start), TRUE,
4680 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4687 * Prolog support (rewritten) by Anders Lindgren, Mar. 96
4689 * Assumes that the predicate starts at column 0.
4690 * Only the first clause of a predicate is added.
4692 static int prolog_pred P_((char *, char *));
4693 static void prolog_skip_comment P_((linebuffer *, FILE *));
4694 static int prolog_atom P_((char *, int));
4696 static void
4697 Prolog_functions (inf)
4698 FILE *inf;
4700 char *cp, *last;
4701 int len;
4702 int allocated;
4704 allocated = 0;
4705 len = 0;
4706 last = NULL;
4708 LOOP_ON_INPUT_LINES (inf, lb, cp)
4710 if (cp[0] == '\0') /* Empty line */
4711 continue;
4712 else if (iswhite (cp[0])) /* Not a predicate */
4713 continue;
4714 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4715 prolog_skip_comment (&lb, inf);
4716 else if ((len = prolog_pred (cp, last)) > 0)
4718 /* Predicate. Store the function name so that we only
4719 generate a tag for the first clause. */
4720 if (last == NULL)
4721 last = xnew(len + 1, char);
4722 else if (len + 1 > allocated)
4723 xrnew (last, len + 1, char);
4724 allocated = len + 1;
4725 strncpy (last, cp, len);
4726 last[len] = '\0';
4732 static void
4733 prolog_skip_comment (plb, inf)
4734 linebuffer *plb;
4735 FILE *inf;
4737 char *cp;
4741 for (cp = plb->buffer; *cp != '\0'; cp++)
4742 if (cp[0] == '*' && cp[1] == '/')
4743 return;
4744 lineno++;
4745 linecharno += readline (plb, inf);
4747 while (!feof(inf));
4751 * A predicate definition is added if it matches:
4752 * <beginning of line><Prolog Atom><whitespace>(
4754 * It is added to the tags database if it doesn't match the
4755 * name of the previous clause header.
4757 * Return the size of the name of the predicate, or 0 if no header
4758 * was found.
4760 static int
4761 prolog_pred (s, last)
4762 char *s;
4763 char *last; /* Name of last clause. */
4765 int pos;
4766 int len;
4768 pos = prolog_atom (s, 0);
4769 if (pos < 1)
4770 return 0;
4772 len = pos;
4773 pos = skip_spaces (s + pos) - s;
4775 if ((s[pos] == '(') || (s[pos] == '.'))
4777 if (s[pos] == '(')
4778 pos++;
4780 /* Save only the first clause. */
4781 if (last == NULL
4782 || len != (int)strlen (last)
4783 || !strneq (s, last, len))
4785 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4786 return len;
4789 return 0;
4793 * Consume a Prolog atom.
4794 * Return the number of bytes consumed, or -1 if there was an error.
4796 * A prolog atom, in this context, could be one of:
4797 * - An alphanumeric sequence, starting with a lower case letter.
4798 * - A quoted arbitrary string. Single quotes can escape themselves.
4799 * Backslash quotes everything.
4801 static int
4802 prolog_atom (s, pos)
4803 char *s;
4804 int pos;
4806 int origpos;
4808 origpos = pos;
4810 if (ISLOWER(s[pos]) || (s[pos] == '_'))
4812 /* The atom is unquoted. */
4813 pos++;
4814 while (ISALNUM(s[pos]) || (s[pos] == '_'))
4816 pos++;
4818 return pos - origpos;
4820 else if (s[pos] == '\'')
4822 pos++;
4824 while (1)
4826 if (s[pos] == '\'')
4828 pos++;
4829 if (s[pos] != '\'')
4830 break;
4831 pos++; /* A double quote */
4833 else if (s[pos] == '\0')
4834 /* Multiline quoted atoms are ignored. */
4835 return -1;
4836 else if (s[pos] == '\\')
4838 if (s[pos+1] == '\0')
4839 return -1;
4840 pos += 2;
4842 else
4843 pos++;
4845 return pos - origpos;
4847 else
4848 return -1;
4853 * Support for Erlang -- Anders Lindgren, Feb 1996.
4855 * Generates tags for functions, defines, and records.
4857 * Assumes that Erlang functions start at column 0.
4859 static int erlang_func P_((char *, char *));
4860 static void erlang_attribute P_((char *));
4861 static int erlang_atom P_((char *, int));
4863 static void
4864 Erlang_functions (inf)
4865 FILE *inf;
4867 char *cp, *last;
4868 int len;
4869 int allocated;
4871 allocated = 0;
4872 len = 0;
4873 last = NULL;
4875 LOOP_ON_INPUT_LINES (inf, lb, cp)
4877 if (cp[0] == '\0') /* Empty line */
4878 continue;
4879 else if (iswhite (cp[0])) /* Not function nor attribute */
4880 continue;
4881 else if (cp[0] == '%') /* comment */
4882 continue;
4883 else if (cp[0] == '"') /* Sometimes, strings start in column one */
4884 continue;
4885 else if (cp[0] == '-') /* attribute, e.g. "-define" */
4887 erlang_attribute (cp);
4888 last = NULL;
4890 else if ((len = erlang_func (cp, last)) > 0)
4893 * Function. Store the function name so that we only
4894 * generates a tag for the first clause.
4896 if (last == NULL)
4897 last = xnew (len + 1, char);
4898 else if (len + 1 > allocated)
4899 xrnew (last, len + 1, char);
4900 allocated = len + 1;
4901 strncpy (last, cp, len);
4902 last[len] = '\0';
4909 * A function definition is added if it matches:
4910 * <beginning of line><Erlang Atom><whitespace>(
4912 * It is added to the tags database if it doesn't match the
4913 * name of the previous clause header.
4915 * Return the size of the name of the function, or 0 if no function
4916 * was found.
4918 static int
4919 erlang_func (s, last)
4920 char *s;
4921 char *last; /* Name of last clause. */
4923 int pos;
4924 int len;
4926 pos = erlang_atom (s, 0);
4927 if (pos < 1)
4928 return 0;
4930 len = pos;
4931 pos = skip_spaces (s + pos) - s;
4933 /* Save only the first clause. */
4934 if (s[pos++] == '('
4935 && (last == NULL
4936 || len != (int)strlen (last)
4937 || !strneq (s, last, len)))
4939 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4940 return len;
4943 return 0;
4948 * Handle attributes. Currently, tags are generated for defines
4949 * and records.
4951 * They are on the form:
4952 * -define(foo, bar).
4953 * -define(Foo(M, N), M+N).
4954 * -record(graph, {vtab = notable, cyclic = true}).
4956 static void
4957 erlang_attribute (s)
4958 char *s;
4960 int pos;
4961 int len;
4963 if (strneq (s, "-define", 7) || strneq (s, "-record", 7))
4965 pos = skip_spaces (s + 7) - s;
4966 if (s[pos++] == '(')
4968 pos = skip_spaces (s + pos) - s;
4969 len = erlang_atom (s, pos);
4970 if (len != 0)
4971 pfnote (savenstr (& s[pos], len), TRUE,
4972 s, pos + len, lineno, linecharno);
4975 return;
4980 * Consume an Erlang atom (or variable).
4981 * Return the number of bytes consumed, or -1 if there was an error.
4983 static int
4984 erlang_atom (s, pos)
4985 char *s;
4986 int pos;
4988 int origpos;
4990 origpos = pos;
4992 if (ISALPHA (s[pos]) || s[pos] == '_')
4994 /* The atom is unquoted. */
4995 pos++;
4996 while (ISALNUM (s[pos]) || s[pos] == '_')
4997 pos++;
4998 return pos - origpos;
5000 else if (s[pos] == '\'')
5002 pos++;
5004 while (1)
5006 if (s[pos] == '\'')
5008 pos++;
5009 break;
5011 else if (s[pos] == '\0')
5012 /* Multiline quoted atoms are ignored. */
5013 return -1;
5014 else if (s[pos] == '\\')
5016 if (s[pos+1] == '\0')
5017 return -1;
5018 pos += 2;
5020 else
5021 pos++;
5023 return pos - origpos;
5025 else
5026 return -1;
5030 #ifdef ETAGS_REGEXPS
5032 static char *scan_separators P_((char *));
5033 static void analyse_regex P_((char *, bool));
5034 static void add_regex P_((char *, bool, language *));
5035 static char *substitute P_((char *, char *, struct re_registers *));
5037 /* Take a string like "/blah/" and turn it into "blah", making sure
5038 that the first and last characters are the same, and handling
5039 quoted separator characters. Actually, stops on the occurrence of
5040 an unquoted separator. Also turns "\t" into a Tab character.
5041 Returns pointer to terminating separator. Works in place. Null
5042 terminates name string. */
5043 static char *
5044 scan_separators (name)
5045 char *name;
5047 char sep = name[0];
5048 char *copyto = name;
5049 bool quoted = FALSE;
5051 for (++name; *name != '\0'; ++name)
5053 if (quoted)
5055 if (*name == 't')
5056 *copyto++ = '\t';
5057 else if (*name == sep)
5058 *copyto++ = sep;
5059 else
5061 /* Something else is quoted, so preserve the quote. */
5062 *copyto++ = '\\';
5063 *copyto++ = *name;
5065 quoted = FALSE;
5067 else if (*name == '\\')
5068 quoted = TRUE;
5069 else if (*name == sep)
5070 break;
5071 else
5072 *copyto++ = *name;
5075 /* Terminate copied string. */
5076 *copyto = '\0';
5077 return name;
5080 /* Look at the argument of --regex or --no-regex and do the right
5081 thing. Same for each line of a regexp file. */
5082 static void
5083 analyse_regex (regex_arg, ignore_case)
5084 char *regex_arg;
5085 bool ignore_case;
5087 if (regex_arg == NULL)
5089 free_patterns (); /* --no-regex: remove existing regexps */
5090 return;
5093 /* A real --regexp option or a line in a regexp file. */
5094 switch (regex_arg[0])
5096 /* Comments in regexp file or null arg to --regex. */
5097 case '\0':
5098 case ' ':
5099 case '\t':
5100 break;
5102 /* Read a regex file. This is recursive and may result in a
5103 loop, which will stop when the file descriptors are exhausted. */
5104 case '@':
5106 FILE *regexfp;
5107 linebuffer regexbuf;
5108 char *regexfile = regex_arg + 1;
5110 /* regexfile is a file containing regexps, one per line. */
5111 regexfp = fopen (regexfile, "r");
5112 if (regexfp == NULL)
5114 pfatal (regexfile);
5115 return;
5117 initbuffer (&regexbuf);
5118 while (readline_internal (&regexbuf, regexfp) > 0)
5119 analyse_regex (regexbuf.buffer, ignore_case);
5120 free (regexbuf.buffer);
5121 fclose (regexfp);
5123 break;
5125 /* Regexp to be used for a specific language only. */
5126 case '{':
5128 language *lang;
5129 char *lang_name = regex_arg + 1;
5130 char *cp;
5132 for (cp = lang_name; *cp != '}'; cp++)
5133 if (*cp == '\0')
5135 error ("unterminated language name in regex: %s", regex_arg);
5136 return;
5138 *cp = '\0';
5139 lang = get_language_from_langname (lang_name);
5140 if (lang == NULL)
5141 return;
5142 add_regex (cp + 1, ignore_case, lang);
5144 break;
5146 /* Regexp to be used for any language. */
5147 default:
5148 add_regex (regex_arg, ignore_case, NULL);
5149 break;
5153 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5154 expression, into a real regular expression by compiling it. */
5155 static void
5156 add_regex (regexp_pattern, ignore_case, lang)
5157 char *regexp_pattern;
5158 bool ignore_case;
5159 language *lang;
5161 static struct re_pattern_buffer zeropattern;
5162 char *name;
5163 const char *err;
5164 struct re_pattern_buffer *patbuf;
5165 pattern *pp;
5168 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
5170 error ("%s: unterminated regexp", regexp_pattern);
5171 return;
5173 name = scan_separators (regexp_pattern);
5174 if (regexp_pattern[0] == '\0')
5176 error ("null regexp", (char *)NULL);
5177 return;
5179 (void) scan_separators (name);
5181 patbuf = xnew (1, struct re_pattern_buffer);
5182 *patbuf = zeropattern;
5183 if (ignore_case)
5184 patbuf->translate = lc_trans; /* translation table to fold case */
5186 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
5187 if (err != NULL)
5189 error ("%s while compiling pattern", err);
5190 return;
5193 pp = p_head;
5194 p_head = xnew (1, pattern);
5195 p_head->regex = savestr (regexp_pattern);
5196 p_head->p_next = pp;
5197 p_head->language = lang;
5198 p_head->pattern = patbuf;
5199 p_head->name_pattern = savestr (name);
5200 p_head->error_signaled = FALSE;
5204 * Do the substitutions indicated by the regular expression and
5205 * arguments.
5207 static char *
5208 substitute (in, out, regs)
5209 char *in, *out;
5210 struct re_registers *regs;
5212 char *result, *t;
5213 int size, dig, diglen;
5215 result = NULL;
5216 size = strlen (out);
5218 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5219 if (out[size - 1] == '\\')
5220 fatal ("pattern error in \"%s\"", out);
5221 for (t = etags_strchr (out, '\\');
5222 t != NULL;
5223 t = etags_strchr (t + 2, '\\'))
5224 if (ISDIGIT (t[1]))
5226 dig = t[1] - '0';
5227 diglen = regs->end[dig] - regs->start[dig];
5228 size += diglen - 2;
5230 else
5231 size -= 1;
5233 /* Allocate space and do the substitutions. */
5234 result = xnew (size + 1, char);
5236 for (t = result; *out != '\0'; out++)
5237 if (*out == '\\' && ISDIGIT (*++out))
5239 dig = *out - '0';
5240 diglen = regs->end[dig] - regs->start[dig];
5241 strncpy (t, in + regs->start[dig], diglen);
5242 t += diglen;
5244 else
5245 *t++ = *out;
5246 *t = '\0';
5248 assert (t <= result + size && t - result == (int)strlen (result));
5250 return result;
5253 /* Deallocate all patterns. */
5254 static void
5255 free_patterns ()
5257 pattern *pp;
5258 while (p_head != NULL)
5260 pp = p_head->p_next;
5261 free (p_head->regex);
5262 free (p_head->name_pattern);
5263 free (p_head);
5264 p_head = pp;
5266 return;
5268 #endif /* ETAGS_REGEXPS */
5271 static void
5272 get_tag (bp)
5273 register char *bp;
5275 register char *cp;
5277 if (*bp == '\0')
5278 return;
5279 /* Go till you get to white space or a syntactic break */
5280 for (cp = bp + 1;
5281 *cp != '\0' && *cp != '(' && *cp != ')' && !iswhite (*cp);
5282 cp++)
5283 continue;
5284 pfnote (savenstr (bp, cp-bp), TRUE,
5285 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5288 /* Initialize a linebuffer for use */
5289 static void
5290 initbuffer (lbp)
5291 linebuffer *lbp;
5293 lbp->size = (DEBUG) ? 3 : 200;
5294 lbp->buffer = xnew (lbp->size, char);
5295 lbp->buffer[0] = '\0';
5296 lbp->len = 0;
5300 * Read a line of text from `stream' into `lbp', excluding the
5301 * newline or CR-NL, if any. Return the number of characters read from
5302 * `stream', which is the length of the line including the newline.
5304 * On DOS or Windows we do not count the CR character, if any, before the
5305 * NL, in the returned length; this mirrors the behavior of emacs on those
5306 * platforms (for text files, it translates CR-NL to NL as it reads in the
5307 * file).
5309 static long
5310 readline_internal (lbp, stream)
5311 linebuffer *lbp;
5312 register FILE *stream;
5314 char *buffer = lbp->buffer;
5315 register char *p = lbp->buffer;
5316 register char *pend;
5317 int chars_deleted;
5319 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5321 while (1)
5323 register int c = getc (stream);
5324 if (p == pend)
5326 /* We're at the end of linebuffer: expand it. */
5327 lbp->size *= 2;
5328 xrnew (buffer, lbp->size, char);
5329 p += buffer - lbp->buffer;
5330 pend = buffer + lbp->size;
5331 lbp->buffer = buffer;
5333 if (c == EOF)
5335 *p = '\0';
5336 chars_deleted = 0;
5337 break;
5339 if (c == '\n')
5341 if (p > buffer && p[-1] == '\r')
5343 p -= 1;
5344 #ifdef DOS_NT
5345 /* Assume CRLF->LF translation will be performed by Emacs
5346 when loading this file, so CRs won't appear in the buffer.
5347 It would be cleaner to compensate within Emacs;
5348 however, Emacs does not know how many CRs were deleted
5349 before any given point in the file. */
5350 chars_deleted = 1;
5351 #else
5352 chars_deleted = 2;
5353 #endif
5355 else
5357 chars_deleted = 1;
5359 *p = '\0';
5360 break;
5362 *p++ = c;
5364 lbp->len = p - buffer;
5366 return lbp->len + chars_deleted;
5370 * Like readline_internal, above, but in addition try to match the
5371 * input line against relevant regular expressions.
5373 static long
5374 readline (lbp, stream)
5375 linebuffer *lbp;
5376 FILE *stream;
5378 /* Read new line. */
5379 long result = readline_internal (lbp, stream);
5380 #ifdef ETAGS_REGEXPS
5381 int match;
5382 pattern *pp;
5384 /* Match against relevant patterns. */
5385 if (lbp->len > 0)
5386 for (pp = p_head; pp != NULL; pp = pp->p_next)
5388 /* Only use generic regexps or those for the current language. */
5389 if (pp->language != NULL && pp->language != curlang)
5390 continue;
5392 match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs);
5393 switch (match)
5395 case -2:
5396 /* Some error. */
5397 if (!pp->error_signaled)
5399 error ("error while matching \"%s\"", pp->regex);
5400 pp->error_signaled = TRUE;
5402 break;
5403 case -1:
5404 /* No match. */
5405 break;
5406 default:
5407 /* Match occurred. Construct a tag. */
5408 if (pp->name_pattern[0] != '\0')
5410 /* Make a named tag. */
5411 char *name = substitute (lbp->buffer,
5412 pp->name_pattern, &pp->regs);
5413 if (name != NULL)
5414 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5416 else
5418 /* Make an unnamed tag. */
5419 pfnote ((char *)NULL, TRUE,
5420 lbp->buffer, match, lineno, linecharno);
5422 break;
5425 #endif /* ETAGS_REGEXPS */
5427 return result;
5432 * Return a pointer to a space of size strlen(cp)+1 allocated
5433 * with xnew where the string CP has been copied.
5435 static char *
5436 savestr (cp)
5437 char *cp;
5439 return savenstr (cp, strlen (cp));
5443 * Return a pointer to a space of size LEN+1 allocated with xnew where
5444 * the string CP has been copied for at most the first LEN characters.
5446 static char *
5447 savenstr (cp, len)
5448 char *cp;
5449 int len;
5451 register char *dp;
5453 dp = xnew (len + 1, char);
5454 strncpy (dp, cp, len);
5455 dp[len] = '\0';
5456 return dp;
5460 * Return the ptr in sp at which the character c last
5461 * appears; NULL if not found
5463 * Identical to POSIX strrchr, included for portability.
5465 static char *
5466 etags_strrchr (sp, c)
5467 register const char *sp;
5468 register int c;
5470 register const char *r;
5472 r = NULL;
5475 if (*sp == c)
5476 r = sp;
5477 } while (*sp++);
5478 return (char *)r;
5483 * Return the ptr in sp at which the character c first
5484 * appears; NULL if not found
5486 * Identical to POSIX strchr, included for portability.
5488 static char *
5489 etags_strchr (sp, c)
5490 register const char *sp;
5491 register int c;
5495 if (*sp == c)
5496 return (char *)sp;
5497 } while (*sp++);
5498 return NULL;
5501 /* Skip spaces, return new pointer. */
5502 static char *
5503 skip_spaces (cp)
5504 char *cp;
5506 while (iswhite (*cp))
5507 cp++;
5508 return cp;
5511 /* Skip non spaces, return new pointer. */
5512 static char *
5513 skip_non_spaces (cp)
5514 char *cp;
5516 while (*cp != '\0' && !iswhite (*cp))
5517 cp++;
5518 return cp;
5521 /* Print error message and exit. */
5522 void
5523 fatal (s1, s2)
5524 char *s1, *s2;
5526 error (s1, s2);
5527 exit (BAD);
5530 static void
5531 pfatal (s1)
5532 char *s1;
5534 perror (s1);
5535 exit (BAD);
5538 static void
5539 suggest_asking_for_help ()
5541 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5542 progname,
5543 #ifdef LONG_OPTIONS
5544 "--help"
5545 #else
5546 "-h"
5547 #endif
5549 exit (BAD);
5552 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
5553 static void
5554 error (s1, s2)
5555 const char *s1, *s2;
5557 fprintf (stderr, "%s: ", progname);
5558 fprintf (stderr, s1, s2);
5559 fprintf (stderr, "\n");
5562 /* Return a newly-allocated string whose contents
5563 concatenate those of s1, s2, s3. */
5564 static char *
5565 concat (s1, s2, s3)
5566 char *s1, *s2, *s3;
5568 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5569 char *result = xnew (len1 + len2 + len3 + 1, char);
5571 strcpy (result, s1);
5572 strcpy (result + len1, s2);
5573 strcpy (result + len1 + len2, s3);
5574 result[len1 + len2 + len3] = '\0';
5576 return result;
5580 /* Does the same work as the system V getcwd, but does not need to
5581 guess the buffer size in advance. */
5582 static char *
5583 etags_getcwd ()
5585 #ifdef HAVE_GETCWD
5586 int bufsize = 200;
5587 char *path = xnew (bufsize, char);
5589 while (getcwd (path, bufsize) == NULL)
5591 if (errno != ERANGE)
5592 pfatal ("getcwd");
5593 bufsize *= 2;
5594 free (path);
5595 path = xnew (bufsize, char);
5598 canonicalize_filename (path);
5599 return path;
5601 #else /* not HAVE_GETCWD */
5602 #if MSDOS
5604 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
5606 getwd (path);
5608 for (p = path; *p != '\0'; p++)
5609 if (*p == '\\')
5610 *p = '/';
5611 else
5612 *p = lowcase (*p);
5614 return strdup (path);
5615 #else /* not MSDOS */
5616 linebuffer path;
5617 FILE *pipe;
5619 initbuffer (&path);
5620 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5621 if (pipe == NULL || readline_internal (&path, pipe) == 0)
5622 pfatal ("pwd");
5623 pclose (pipe);
5625 return path.buffer;
5626 #endif /* not MSDOS */
5627 #endif /* not HAVE_GETCWD */
5630 /* Return a newly allocated string containing the file name of FILE
5631 relative to the absolute directory DIR (which should end with a slash). */
5632 static char *
5633 relative_filename (file, dir)
5634 char *file, *dir;
5636 char *fp, *dp, *afn, *res;
5637 int i;
5639 /* Find the common root of file and dir (with a trailing slash). */
5640 afn = absolute_filename (file, cwd);
5641 fp = afn;
5642 dp = dir;
5643 while (*fp++ == *dp++)
5644 continue;
5645 fp--, dp--; /* back to the first differing char */
5646 #ifdef DOS_NT
5647 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5648 return afn;
5649 #endif
5650 do /* look at the equal chars until '/' */
5651 fp--, dp--;
5652 while (*fp != '/');
5654 /* Build a sequence of "../" strings for the resulting relative file name. */
5655 i = 0;
5656 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5657 i += 1;
5658 res = xnew (3*i + strlen (fp + 1) + 1, char);
5659 res[0] = '\0';
5660 while (i-- > 0)
5661 strcat (res, "../");
5663 /* Add the file name relative to the common root of file and dir. */
5664 strcat (res, fp + 1);
5665 free (afn);
5667 return res;
5670 /* Return a newly allocated string containing the absolute file name
5671 of FILE given DIR (which should end with a slash). */
5672 static char *
5673 absolute_filename (file, dir)
5674 char *file, *dir;
5676 char *slashp, *cp, *res;
5678 if (filename_is_absolute (file))
5679 res = savestr (file);
5680 #ifdef DOS_NT
5681 /* We don't support non-absolute file names with a drive
5682 letter, like `d:NAME' (it's too much hassle). */
5683 else if (file[1] == ':')
5684 fatal ("%s: relative file names with drive letters not supported", file);
5685 #endif
5686 else
5687 res = concat (dir, file, "");
5689 /* Delete the "/dirname/.." and "/." substrings. */
5690 slashp = etags_strchr (res, '/');
5691 while (slashp != NULL && slashp[0] != '\0')
5693 if (slashp[1] == '.')
5695 if (slashp[2] == '.'
5696 && (slashp[3] == '/' || slashp[3] == '\0'))
5698 cp = slashp;
5700 cp--;
5701 while (cp >= res && !filename_is_absolute (cp));
5702 if (cp < res)
5703 cp = slashp; /* the absolute name begins with "/.." */
5704 #ifdef DOS_NT
5705 /* Under MSDOS and NT we get `d:/NAME' as absolute
5706 file name, so the luser could say `d:/../NAME'.
5707 We silently treat this as `d:/NAME'. */
5708 else if (cp[0] != '/')
5709 cp = slashp;
5710 #endif
5711 strcpy (cp, slashp + 3);
5712 slashp = cp;
5713 continue;
5715 else if (slashp[2] == '/' || slashp[2] == '\0')
5717 strcpy (slashp, slashp + 2);
5718 continue;
5722 slashp = etags_strchr (slashp + 1, '/');
5725 if (res[0] == '\0')
5726 return savestr ("/");
5727 else
5728 return res;
5731 /* Return a newly allocated string containing the absolute
5732 file name of dir where FILE resides given DIR (which should
5733 end with a slash). */
5734 static char *
5735 absolute_dirname (file, dir)
5736 char *file, *dir;
5738 char *slashp, *res;
5739 char save;
5741 canonicalize_filename (file);
5742 slashp = etags_strrchr (file, '/');
5743 if (slashp == NULL)
5744 return savestr (dir);
5745 save = slashp[1];
5746 slashp[1] = '\0';
5747 res = absolute_filename (file, dir);
5748 slashp[1] = save;
5750 return res;
5753 /* Whether the argument string is an absolute file name. The argument
5754 string must have been canonicalized with canonicalize_filename. */
5755 static bool
5756 filename_is_absolute (fn)
5757 char *fn;
5759 return (fn[0] == '/'
5760 #ifdef DOS_NT
5761 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
5762 #endif
5766 /* Translate backslashes into slashes. Works in place. */
5767 static void
5768 canonicalize_filename (fn)
5769 register char *fn;
5771 #ifdef DOS_NT
5772 /* Canonicalize drive letter case. */
5773 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
5774 fn[0] = upcase (fn[0]);
5775 /* Convert backslashes to slashes. */
5776 for (; *fn != '\0'; fn++)
5777 if (*fn == '\\')
5778 *fn = '/';
5779 #else
5780 /* No action. */
5781 fn = NULL; /* shut up the compiler */
5782 #endif
5785 /* Set the minimum size of a string contained in a linebuffer. */
5786 static void
5787 linebuffer_setlen (lbp, toksize)
5788 linebuffer *lbp;
5789 int toksize;
5791 while (lbp->size <= toksize)
5793 lbp->size *= 2;
5794 xrnew (lbp->buffer, lbp->size, char);
5796 lbp->len = toksize;
5799 /* Like malloc but get fatal error if memory is exhausted. */
5800 long *
5801 xmalloc (size)
5802 unsigned int size;
5804 long *result = (long *) malloc (size);
5805 if (result == NULL)
5806 fatal ("virtual memory exhausted", (char *)NULL);
5807 return result;
5810 long *
5811 xrealloc (ptr, size)
5812 char *ptr;
5813 unsigned int size;
5815 long *result = (long *) realloc (ptr, size);
5816 if (result == NULL)
5817 fatal ("virtual memory exhausted", (char *)NULL);
5818 return result;