(Vnext_word_boundary_function_table): New variable.
[emacs.git] / lib-src / etags.c
blob950389545acdc73536927c25a4a8b8c4d7c4c8e0
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001
3 Free Software Foundation, Inc. and Ken Arnold
5 This file is not considered part of GNU Emacs.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1992 Joseph B. Wells improved C and C++ parsing.
29 * 1993 Francesco Potortì reorganised C and C++.
30 * 1994 Regexp tags by Tom Tromey.
31 * 2001 Nested classes by Francesco Potortì (ideas by Mykola Dzyuba).
33 * Francesco Potortì <pot@gnu.org> has maintained it since 1993.
36 char pot_etags_version[] = "@(#) pot revision number is 14.35";
38 #define TRUE 1
39 #define FALSE 0
41 #ifdef DEBUG
42 # undef DEBUG
43 # define DEBUG TRUE
44 #else
45 # define DEBUG FALSE
46 # define NDEBUG /* disable assert */
47 #endif
49 #ifdef HAVE_CONFIG_H
50 # include <config.h>
51 /* On some systems, Emacs defines static as nothing for the sake
52 of unexec. We don't want that here since we don't use unexec. */
53 # undef static
54 # define ETAGS_REGEXPS /* use the regexp features */
55 # define LONG_OPTIONS /* accept long options */
56 # ifndef PTR /* for Xemacs */
57 # define PTR void *
58 # endif
59 # ifndef __P /* for Xemacs */
60 # define __P(args) args
61 # endif
62 #else
63 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
64 # define __P(args) args /* use prototypes */
65 # define PTR void * /* for generic pointers */
66 # else
67 # define __P(args) () /* no prototypes */
68 # define const /* remove const for old compilers' sake */
69 # define PTR long * /* don't use void* */
70 # endif
71 #endif /* !HAVE_CONFIG_H */
73 #ifndef _GNU_SOURCE
74 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
75 #endif
77 /* WIN32_NATIVE is for Xemacs.
78 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
79 #ifdef WIN32_NATIVE
80 # undef MSDOS
81 # undef WINDOWSNT
82 # define WINDOWSNT
83 #endif /* WIN32_NATIVE */
85 #ifdef MSDOS
86 # undef MSDOS
87 # define MSDOS TRUE
88 # include <fcntl.h>
89 # include <sys/param.h>
90 # include <io.h>
91 # ifndef HAVE_CONFIG_H
92 # define DOS_NT
93 # include <sys/config.h>
94 # endif
95 #else
96 # define MSDOS FALSE
97 #endif /* MSDOS */
99 #ifdef WINDOWSNT
100 # include <stdlib.h>
101 # include <fcntl.h>
102 # include <string.h>
103 # include <direct.h>
104 # include <io.h>
105 # define MAXPATHLEN _MAX_PATH
106 # undef HAVE_NTGUI
107 # undef DOS_NT
108 # define DOS_NT
109 # ifndef HAVE_GETCWD
110 # define HAVE_GETCWD
111 # endif /* undef HAVE_GETCWD */
112 #else /* !WINDOWSNT */
113 # ifdef STDC_HEADERS
114 # include <stdlib.h>
115 # include <string.h>
116 # else
117 extern char *getenv ();
118 # endif
119 #endif /* !WINDOWSNT */
121 #ifdef HAVE_UNISTD_H
122 # include <unistd.h>
123 #else
124 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
125 extern char *getcwd (char *buf, size_t size);
126 # endif
127 #endif /* HAVE_UNISTD_H */
129 #include <stdio.h>
130 #include <ctype.h>
131 #include <errno.h>
132 #ifndef errno
133 extern int errno;
134 #endif
135 #include <sys/types.h>
136 #include <sys/stat.h>
138 #include <assert.h>
139 #ifdef NDEBUG
140 # undef assert /* some systems have a buggy assert.h */
141 # define assert(x) ((void) 0)
142 #endif
144 #if !defined (S_ISREG) && defined (S_IFREG)
145 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
146 #endif
148 #ifdef LONG_OPTIONS
149 # include <getopt.h>
150 #else
151 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
152 extern char *optarg;
153 extern int optind, opterr;
154 #endif /* LONG_OPTIONS */
156 #ifdef ETAGS_REGEXPS
157 # ifndef HAVE_CONFIG_H /* this is a standalone compilation */
158 # ifdef __CYGWIN__ /* compiling on Cygwin */
159 !!! NOTICE !!!
160 the regex.h distributed with Cygwin is not compatible with etags, alas!
161 If you want regular expression support, you should delete this notice and
162 arrange to use the GNU regex.h and regex.c.
163 # endif
164 # endif
165 # include <regex.h>
166 #endif /* ETAGS_REGEXPS */
168 /* Define CTAGS to make the program "ctags" compatible with the usual one.
169 Leave it undefined to make the program "etags", which makes emacs-style
170 tag tables and tags typedefs, #defines and struct/union/enum by default. */
171 #ifdef CTAGS
172 # undef CTAGS
173 # define CTAGS TRUE
174 #else
175 # define CTAGS FALSE
176 #endif
178 /* Exit codes for success and failure. */
179 #ifdef VMS
180 # define GOOD 1
181 # define BAD 0
182 #else
183 # define GOOD 0
184 # define BAD 1
185 #endif
187 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
188 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
190 #define CHARS 256 /* 2^sizeof(char) */
191 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
192 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
193 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
194 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
195 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
196 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
198 #define ISALNUM(c) isalnum (CHAR(c))
199 #define ISALPHA(c) isalpha (CHAR(c))
200 #define ISDIGIT(c) isdigit (CHAR(c))
201 #define ISLOWER(c) islower (CHAR(c))
203 #define lowcase(c) tolower (CHAR(c))
204 #define upcase(c) toupper (CHAR(c))
208 * xnew, xrnew -- allocate, reallocate storage
210 * SYNOPSIS: Type *xnew (int n, Type);
211 * void xrnew (OldPointer, int n, Type);
213 #if DEBUG
214 # include "chkmalloc.h"
215 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
216 (n) * sizeof (Type)))
217 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
218 (char *) (op), (n) * sizeof (Type)))
219 #else
220 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
221 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
222 (char *) (op), (n) * sizeof (Type)))
223 #endif
225 #define bool int
227 typedef void Lang_function __P((FILE *));
229 typedef struct
231 char *suffix;
232 char *command; /* Takes one arg and decompresses to stdout */
233 } compressor;
235 typedef struct
237 char *name;
238 Lang_function *function;
239 char **filenames;
240 char **suffixes;
241 char **interpreters;
242 } language;
244 typedef struct node_st
245 { /* sorting structure */
246 char *name; /* function or type name */
247 char *file; /* file name */
248 bool is_func; /* use pattern or line no */
249 bool been_warned; /* set if noticed dup */
250 int lno; /* line number tag is on */
251 long cno; /* character number line starts on */
252 char *pat; /* search pattern */
253 struct node_st *left, *right; /* left and right sons */
254 } node;
257 * A `linebuffer' is a structure which holds a line of text.
258 * `readline_internal' reads a line from a stream into a linebuffer
259 * and works regardless of the length of the line.
260 * SIZE is the size of BUFFER, LEN is the length of the string in
261 * BUFFER after readline reads it.
263 typedef struct
265 long size;
266 int len;
267 char *buffer;
268 } linebuffer;
270 /* Many compilers barf on this:
271 Lang_function Ada_funcs;
272 so let's write it this way */
273 static void Ada_funcs __P((FILE *));
274 static void Asm_labels __P((FILE *));
275 static void C_entries __P((int c_ext, FILE *));
276 static void default_C_entries __P((FILE *));
277 static void plain_C_entries __P((FILE *));
278 static void Cjava_entries __P((FILE *));
279 static void Cobol_paragraphs __P((FILE *));
280 static void Cplusplus_entries __P((FILE *));
281 static void Cstar_entries __P((FILE *));
282 static void Erlang_functions __P((FILE *));
283 static void Fortran_functions __P((FILE *));
284 static void Yacc_entries __P((FILE *));
285 static void Lisp_functions __P((FILE *));
286 static void Makefile_targets __P((FILE *));
287 static void Pascal_functions __P((FILE *));
288 static void Perl_functions __P((FILE *));
289 static void PHP_functions __P((FILE *));
290 static void Postscript_functions __P((FILE *));
291 static void Prolog_functions __P((FILE *));
292 static void Python_functions __P((FILE *));
293 static void Scheme_functions __P((FILE *));
294 static void TeX_commands __P((FILE *));
295 static void Texinfo_nodes __P((FILE *));
296 static void just_read_file __P((FILE *));
298 static void print_language_names __P((void));
299 static void print_version __P((void));
300 static void print_help __P((void));
301 int main __P((int, char **));
302 static int number_len __P((long));
304 static compressor *get_compressor_from_suffix __P((char *, char **));
305 static language *get_language_from_langname __P((const char *));
306 static language *get_language_from_interpreter __P((char *));
307 static language *get_language_from_filename __P((char *));
308 static int total_size_of_entries __P((node *));
309 static long readline __P((linebuffer *, FILE *));
310 static long readline_internal __P((linebuffer *, FILE *));
311 static bool nocase_tail __P((char *));
312 static char *get_tag __P((char *));
314 #ifdef ETAGS_REGEXPS
315 static void analyse_regex __P((char *, bool));
316 static void add_regex __P((char *, bool, language *));
317 static void free_patterns __P((void));
318 #endif /* ETAGS_REGEXPS */
319 static void error __P((const char *, const char *));
320 static void suggest_asking_for_help __P((void));
321 void fatal __P((char *, char *));
322 static void pfatal __P((char *));
323 static void add_node __P((node *, node **));
325 static void init __P((void));
326 static void initbuffer __P((linebuffer *));
327 static void find_entries __P((char *, FILE *));
328 static void free_tree __P((node *));
329 static void pfnote __P((char *, bool, char *, int, int, long));
330 static void new_pfnote __P((char *, int, bool, char *, int, int, long));
331 static void process_file __P((char *));
332 static void put_entries __P((node *));
334 static char *concat __P((char *, char *, char *));
335 static char *skip_spaces __P((char *));
336 static char *skip_non_spaces __P((char *));
337 static char *savenstr __P((char *, int));
338 static char *savestr __P((char *));
339 static char *etags_strchr __P((const char *, int));
340 static char *etags_strrchr __P((const char *, int));
341 static char *etags_getcwd __P((void));
342 static char *relative_filename __P((char *, char *));
343 static char *absolute_filename __P((char *, char *));
344 static char *absolute_dirname __P((char *, char *));
345 static bool filename_is_absolute __P((char *f));
346 static void canonicalize_filename __P((char *));
347 static void linebuffer_setlen __P((linebuffer *, int));
348 PTR xmalloc __P((unsigned int));
349 PTR xrealloc __P((char *, unsigned int));
352 char searchar = '/'; /* use /.../ searches */
354 char *tagfile; /* output file */
355 char *progname; /* name this program was invoked with */
356 char *cwd; /* current working directory */
357 char *tagfiledir; /* directory of tagfile */
358 FILE *tagf; /* ioptr for tags file */
360 char *curfile; /* current input file name */
361 language *curlang; /* current language */
363 int lineno; /* line number of current line */
364 long charno; /* current character number */
365 long linecharno; /* charno of start of current line */
366 char *dbp; /* pointer to start of current tag */
368 node *head; /* the head of the binary tree of tags */
370 linebuffer lb; /* the current line */
372 /* boolean "functions" (see init) */
373 bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
374 char
375 /* white chars */
376 *white = " \f\t\n\r\v",
377 /* not in a name */
378 *nonam = " \f\t\n\r()=,;",
379 /* token ending chars */
380 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
381 /* token starting chars */
382 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
383 /* valid in-token chars */
384 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
386 bool append_to_tagfile; /* -a: append to tags */
387 /* The following four default to TRUE for etags, but to FALSE for ctags. */
388 bool typedefs; /* -t: create tags for C and Ada typedefs */
389 bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
390 /* 0 struct/enum/union decls, and C++ */
391 /* member functions. */
392 bool constantypedefs; /* -d: create tags for C #define, enum */
393 /* constants and variables. */
394 /* -D: opposite of -d. Default under ctags. */
395 bool declarations; /* --declarations: tag them and extern in C&Co*/
396 bool globals; /* create tags for global variables */
397 bool members; /* create tags for C member variables */
398 bool update; /* -u: update tags */
399 bool vgrind_style; /* -v: create vgrind style index output */
400 bool no_warnings; /* -w: suppress warnings */
401 bool cxref_style; /* -x: create cxref style output */
402 bool cplusplus; /* .[hc] means C++, not C */
403 bool noindentypedefs; /* -I: ignore indentation in C */
404 bool packages_only; /* --packages-only: in Ada, only tag packages*/
406 #ifdef LONG_OPTIONS
407 struct option longopts[] =
409 { "packages-only", no_argument, &packages_only, TRUE },
410 { "append", no_argument, NULL, 'a' },
411 { "backward-search", no_argument, NULL, 'B' },
412 { "c++", no_argument, NULL, 'C' },
413 { "cxref", no_argument, NULL, 'x' },
414 { "defines", no_argument, NULL, 'd' },
415 { "declarations", no_argument, &declarations, TRUE },
416 { "no-defines", no_argument, NULL, 'D' },
417 { "globals", no_argument, &globals, TRUE },
418 { "no-globals", no_argument, &globals, FALSE },
419 { "help", no_argument, NULL, 'h' },
420 { "help", no_argument, NULL, 'H' },
421 { "ignore-indentation", no_argument, NULL, 'I' },
422 { "include", required_argument, NULL, 'i' },
423 { "language", required_argument, NULL, 'l' },
424 { "members", no_argument, &members, TRUE },
425 { "no-members", no_argument, &members, FALSE },
426 { "no-warn", no_argument, NULL, 'w' },
427 { "output", required_argument, NULL, 'o' },
428 #ifdef ETAGS_REGEXPS
429 { "regex", required_argument, NULL, 'r' },
430 { "no-regex", no_argument, NULL, 'R' },
431 { "ignore-case-regex", required_argument, NULL, 'c' },
432 #endif /* ETAGS_REGEXPS */
433 { "typedefs", no_argument, NULL, 't' },
434 { "typedefs-and-c++", no_argument, NULL, 'T' },
435 { "update", no_argument, NULL, 'u' },
436 { "version", no_argument, NULL, 'V' },
437 { "vgrind", no_argument, NULL, 'v' },
438 { NULL }
440 #endif /* LONG_OPTIONS */
442 #ifdef ETAGS_REGEXPS
443 /* Structure defining a regular expression. Elements are
444 the compiled pattern, and the name string. */
445 typedef struct pattern
447 struct pattern *p_next;
448 language *lang;
449 char *regex;
450 struct re_pattern_buffer *pat;
451 struct re_registers regs;
452 char *name_pattern;
453 bool error_signaled;
454 } pattern;
456 /* List of all regexps. */
457 pattern *p_head = NULL;
459 /* How many characters in the character set. (From regex.c.) */
460 #define CHAR_SET_SIZE 256
461 /* Translation table for case-insensitive matching. */
462 char lc_trans[CHAR_SET_SIZE];
463 #endif /* ETAGS_REGEXPS */
465 compressor compressors[] =
467 { "z", "gzip -d -c"},
468 { "Z", "gzip -d -c"},
469 { "gz", "gzip -d -c"},
470 { "GZ", "gzip -d -c"},
471 { "bz2", "bzip2 -d -c" },
472 { NULL }
476 * Language stuff.
479 /* Non-NULL if language fixed. */
480 language *forced_lang = NULL;
482 /* Ada code */
483 char *Ada_suffixes [] =
484 { "ads", "adb", "ada", NULL };
486 /* Assembly code */
487 char *Asm_suffixes [] = { "a", /* Unix assembler */
488 "asm", /* Microcontroller assembly */
489 "def", /* BSO/Tasking definition includes */
490 "inc", /* Microcontroller include files */
491 "ins", /* Microcontroller include files */
492 "s", "sa", /* Unix assembler */
493 "S", /* cpp-processed Unix assembler */
494 "src", /* BSO/Tasking C compiler output */
495 NULL
498 /* Note that .c and .h can be considered C++, if the --c++ flag was
499 given, or if the `class' keyowrd is met inside the file.
500 That is why default_C_entries is called for these. */
501 char *default_C_suffixes [] =
502 { "c", "h", NULL };
504 char *Cplusplus_suffixes [] =
505 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
506 "M", /* Objective C++ */
507 "pdb", /* Postscript with C syntax */
508 NULL };
510 char *Cjava_suffixes [] =
511 { "java", NULL };
513 char *Cobol_suffixes [] =
514 { "COB", "cob", NULL };
516 char *Cstar_suffixes [] =
517 { "cs", "hs", NULL };
519 char *Erlang_suffixes [] =
520 { "erl", "hrl", NULL };
522 char *Fortran_suffixes [] =
523 { "F", "f", "f90", "for", NULL };
525 char *Lisp_suffixes [] =
526 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
528 char *Makefile_filenames [] =
529 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
531 char *Pascal_suffixes [] =
532 { "p", "pas", NULL };
534 char *Perl_suffixes [] =
535 { "pl", "pm", NULL };
536 char *Perl_interpreters [] =
537 { "perl", "@PERL@", NULL };
539 char *PHP_suffixes [] =
540 { "php", "php3", "php4", NULL };
542 char *plain_C_suffixes [] =
543 { "lm", /* Objective lex file */
544 "m", /* Objective C file */
545 "pc", /* Pro*C file */
546 NULL };
548 char *Postscript_suffixes [] =
549 { "ps", "psw", NULL }; /* .psw is for PSWrap */
551 char *Prolog_suffixes [] =
552 { "prolog", NULL };
554 char *Python_suffixes [] =
555 { "py", NULL };
557 /* Can't do the `SCM' or `scm' prefix with a version number. */
558 char *Scheme_suffixes [] =
559 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
561 char *TeX_suffixes [] =
562 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
564 char *Texinfo_suffixes [] =
565 { "texi", "texinfo", "txi", NULL };
567 char *Yacc_suffixes [] =
568 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
571 * Table of languages.
573 * It is ok for a given function to be listed under more than one
574 * name. I just didn't.
577 language lang_names [] =
579 { "ada", Ada_funcs, NULL, Ada_suffixes, NULL },
580 { "asm", Asm_labels, NULL, Asm_suffixes, NULL },
581 { "c", default_C_entries, NULL, default_C_suffixes, NULL },
582 { "c++", Cplusplus_entries, NULL, Cplusplus_suffixes, NULL },
583 { "c*", Cstar_entries, NULL, Cstar_suffixes, NULL },
584 { "cobol", Cobol_paragraphs, NULL, Cobol_suffixes, NULL },
585 { "erlang", Erlang_functions, NULL, Erlang_suffixes, NULL },
586 { "fortran", Fortran_functions, NULL, Fortran_suffixes, NULL },
587 { "java", Cjava_entries, NULL, Cjava_suffixes, NULL },
588 { "lisp", Lisp_functions, NULL, Lisp_suffixes, NULL },
589 { "makefile", Makefile_targets, Makefile_filenames, NULL, NULL },
590 { "pascal", Pascal_functions, NULL, Pascal_suffixes, NULL },
591 { "perl", Perl_functions, NULL, Perl_suffixes, Perl_interpreters },
592 { "php", PHP_functions, NULL, PHP_suffixes, NULL },
593 { "postscript", Postscript_functions, NULL, Postscript_suffixes, NULL },
594 { "proc", plain_C_entries, NULL, plain_C_suffixes, NULL },
595 { "prolog", Prolog_functions, NULL, Prolog_suffixes, NULL },
596 { "python", Python_functions, NULL, Python_suffixes, NULL },
597 { "scheme", Scheme_functions, NULL, Scheme_suffixes, NULL },
598 { "tex", TeX_commands, NULL, TeX_suffixes, NULL },
599 { "texinfo", Texinfo_nodes, NULL, Texinfo_suffixes, NULL },
600 { "yacc", Yacc_entries, NULL, Yacc_suffixes, NULL },
601 { "auto", NULL }, /* default guessing scheme */
602 { "none", just_read_file }, /* regexp matching only */
603 { NULL, NULL } /* end of list */
607 static void
608 print_language_names ()
610 language *lang;
611 char **name, **ext;
613 puts ("\nThese are the currently supported languages, along with the\n\
614 default file names and dot suffixes:");
615 for (lang = lang_names; lang->name != NULL; lang++)
617 printf (" %-*s", 10, lang->name);
618 if (lang->filenames != NULL)
619 for (name = lang->filenames; *name != NULL; name++)
620 printf (" %s", *name);
621 if (lang->suffixes != NULL)
622 for (ext = lang->suffixes; *ext != NULL; ext++)
623 printf (" .%s", *ext);
624 puts ("");
626 puts ("Where `auto' means use default language for files based on file\n\
627 name suffix, and `none' means only do regexp processing on files.\n\
628 If no language is specified and no matching suffix is found,\n\
629 the first line of the file is read for a sharp-bang (#!) sequence\n\
630 followed by the name of an interpreter. If no such sequence is found,\n\
631 Fortran is tried first; if no tags are found, C is tried next.\n\
632 When parsing any C file, a \"class\" keyword switches to C++.\n\
633 Compressed files are supported using gzip and bzip2.");
636 #ifndef EMACS_NAME
637 # define EMACS_NAME "GNU Emacs"
638 #endif
639 #ifndef VERSION
640 # define VERSION "21"
641 #endif
642 static void
643 print_version ()
645 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
646 puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
647 puts ("This program is distributed under the same terms as Emacs");
649 exit (GOOD);
652 static void
653 print_help ()
655 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
657 These are the options accepted by %s.\n", progname, progname);
658 #ifdef LONG_OPTIONS
659 puts ("You may use unambiguous abbreviations for the long option names.");
660 #else
661 puts ("Long option names do not work with this executable, as it is not\n\
662 linked with GNU getopt.");
663 #endif /* LONG_OPTIONS */
664 puts ("A - as file name means read names from stdin (one per line).");
665 if (!CTAGS)
666 printf (" Absolute names are stored in the output file as they are.\n\
667 Relative ones are stored relative to the output file's directory.");
668 puts ("\n");
670 puts ("-a, --append\n\
671 Append tag entries to existing tags file.");
673 puts ("--packages-only\n\
674 For Ada files, only generate tags for packages .");
676 if (CTAGS)
677 puts ("-B, --backward-search\n\
678 Write the search commands for the tag entries using '?', the\n\
679 backward-search command instead of '/', the forward-search command.");
681 /* This option is mostly obsolete, because etags can now automatically
682 detect C++. Retained for backward compatibility and for debugging and
683 experimentation. In principle, we could want to tag as C++ even
684 before any "class" keyword.
685 puts ("-C, --c++\n\
686 Treat files whose name suffix defaults to C language as C++ files.");
689 puts ("--declarations\n\
690 In C and derived languages, create tags for function declarations,");
691 if (CTAGS)
692 puts ("\tand create tags for extern variables if --globals is used.");
693 else
694 puts
695 ("\tand create tags for extern variables unless --no-globals is used.");
697 if (CTAGS)
698 puts ("-d, --defines\n\
699 Create tag entries for C #define constants and enum constants, too.");
700 else
701 puts ("-D, --no-defines\n\
702 Don't create tag entries for C #define constants and enum constants.\n\
703 This makes the tags file smaller.");
705 if (!CTAGS)
707 puts ("-i FILE, --include=FILE\n\
708 Include a note in tag file indicating that, when searching for\n\
709 a tag, one should also consult the tags file FILE after\n\
710 checking the current file.");
711 puts ("-l LANG, --language=LANG\n\
712 Force the following files to be considered as written in the\n\
713 named language up to the next --language=LANG option.");
716 if (CTAGS)
717 puts ("--globals\n\
718 Create tag entries for global variables in some languages.");
719 else
720 puts ("--no-globals\n\
721 Do not create tag entries for global variables in some\n\
722 languages. This makes the tags file smaller.");
723 puts ("--members\n\
724 Create tag entries for member variables in C and derived languages.");
726 #ifdef ETAGS_REGEXPS
727 puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
728 Make a tag for each line matching pattern REGEXP in the following\n\
729 files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
730 regexfile is a file containing one REGEXP per line.\n\
731 REGEXP is anchored (as if preceded by ^).\n\
732 The form /REGEXP/NAME/ creates a named tag.\n\
733 For example Tcl named tags can be created with:\n\
734 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\"");
735 puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
736 Like -r, --regex but ignore case when matching expressions.");
737 puts ("-R, --no-regex\n\
738 Don't create tags from regexps for the following files.");
739 #endif /* ETAGS_REGEXPS */
740 puts ("-o FILE, --output=FILE\n\
741 Write the tags to FILE.");
742 puts ("-I, --ignore-indentation\n\
743 Don't rely on indentation quite as much as normal. Currently,\n\
744 this means not to assume that a closing brace in the first\n\
745 column is the final brace of a function or structure\n\
746 definition in C and C++.");
748 if (CTAGS)
750 puts ("-t, --typedefs\n\
751 Generate tag entries for C and Ada typedefs.");
752 puts ("-T, --typedefs-and-c++\n\
753 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
754 and C++ member functions.");
755 puts ("-u, --update\n\
756 Update the tag entries for the given files, leaving tag\n\
757 entries for other files in place. Currently, this is\n\
758 implemented by deleting the existing entries for the given\n\
759 files and then rewriting the new entries at the end of the\n\
760 tags file. It is often faster to simply rebuild the entire\n\
761 tag file than to use this.");
762 puts ("-v, --vgrind\n\
763 Generates an index of items intended for human consumption,\n\
764 similar to the output of vgrind. The index is sorted, and\n\
765 gives the page number of each item.");
766 puts ("-w, --no-warn\n\
767 Suppress warning messages about entries defined in multiple\n\
768 files.");
769 puts ("-x, --cxref\n\
770 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
771 The output uses line numbers instead of page numbers, but\n\
772 beyond that the differences are cosmetic; try both to see\n\
773 which you like.");
776 puts ("-V, --version\n\
777 Print the version of the program.\n\
778 -h, --help\n\
779 Print this help message.");
781 print_language_names ();
783 puts ("");
784 puts ("Report bugs to bug-gnu-emacs@gnu.org");
786 exit (GOOD);
790 enum argument_type
792 at_language,
793 at_regexp,
794 at_filename,
795 at_icregexp
798 /* This structure helps us allow mixing of --lang and file names. */
799 typedef struct
801 enum argument_type arg_type;
802 char *what;
803 language *lang; /* language of the regexp */
804 } argument;
806 #ifdef VMS /* VMS specific functions */
808 #define EOS '\0'
810 /* This is a BUG! ANY arbitrary limit is a BUG!
811 Won't someone please fix this? */
812 #define MAX_FILE_SPEC_LEN 255
813 typedef struct {
814 short curlen;
815 char body[MAX_FILE_SPEC_LEN + 1];
816 } vspec;
819 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
820 returning in each successive call the next file name matching the input
821 spec. The function expects that each in_spec passed
822 to it will be processed to completion; in particular, up to and
823 including the call following that in which the last matching name
824 is returned, the function ignores the value of in_spec, and will
825 only start processing a new spec with the following call.
826 If an error occurs, on return out_spec contains the value
827 of in_spec when the error occurred.
829 With each successive file name returned in out_spec, the
830 function's return value is one. When there are no more matching
831 names the function returns zero. If on the first call no file
832 matches in_spec, or there is any other error, -1 is returned.
835 #include <rmsdef.h>
836 #include <descrip.h>
837 #define OUTSIZE MAX_FILE_SPEC_LEN
838 static short
839 fn_exp (out, in)
840 vspec *out;
841 char *in;
843 static long context = 0;
844 static struct dsc$descriptor_s o;
845 static struct dsc$descriptor_s i;
846 static bool pass1 = TRUE;
847 long status;
848 short retval;
850 if (pass1)
852 pass1 = FALSE;
853 o.dsc$a_pointer = (char *) out;
854 o.dsc$w_length = (short)OUTSIZE;
855 i.dsc$a_pointer = in;
856 i.dsc$w_length = (short)strlen(in);
857 i.dsc$b_dtype = DSC$K_DTYPE_T;
858 i.dsc$b_class = DSC$K_CLASS_S;
859 o.dsc$b_dtype = DSC$K_DTYPE_VT;
860 o.dsc$b_class = DSC$K_CLASS_VS;
862 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
864 out->body[out->curlen] = EOS;
865 return 1;
867 else if (status == RMS$_NMF)
868 retval = 0;
869 else
871 strcpy(out->body, in);
872 retval = -1;
874 lib$find_file_end(&context);
875 pass1 = TRUE;
876 return retval;
880 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
881 name of each file specified by the provided arg expanding wildcards.
883 static char *
884 gfnames (arg, p_error)
885 char *arg;
886 bool *p_error;
888 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
890 switch (fn_exp (&filename, arg))
892 case 1:
893 *p_error = FALSE;
894 return filename.body;
895 case 0:
896 *p_error = FALSE;
897 return NULL;
898 default:
899 *p_error = TRUE;
900 return filename.body;
904 #ifndef OLD /* Newer versions of VMS do provide `system'. */
905 system (cmd)
906 char *cmd;
908 error ("%s", "system() function not implemented under VMS");
910 #endif
912 #define VERSION_DELIM ';'
913 char *massage_name (s)
914 char *s;
916 char *start = s;
918 for ( ; *s; s++)
919 if (*s == VERSION_DELIM)
921 *s = EOS;
922 break;
924 else
925 *s = lowcase (*s);
926 return start;
928 #endif /* VMS */
932 main (argc, argv)
933 int argc;
934 char *argv[];
936 int i;
937 unsigned int nincluded_files;
938 char **included_files;
939 char *this_file;
940 argument *argbuffer;
941 int current_arg, file_count;
942 linebuffer filename_lb;
943 #ifdef VMS
944 bool got_err;
945 #endif
947 #ifdef DOS_NT
948 _fmode = O_BINARY; /* all of files are treated as binary files */
949 #endif /* DOS_NT */
951 progname = argv[0];
952 nincluded_files = 0;
953 included_files = xnew (argc, char *);
954 current_arg = 0;
955 file_count = 0;
957 /* Allocate enough no matter what happens. Overkill, but each one
958 is small. */
959 argbuffer = xnew (argc, argument);
961 #ifdef ETAGS_REGEXPS
962 /* Set syntax for regular expression routines. */
963 re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
964 /* Translation table for case-insensitive search. */
965 for (i = 0; i < CHAR_SET_SIZE; i++)
966 lc_trans[i] = lowcase (i);
967 #endif /* ETAGS_REGEXPS */
970 * If etags, always find typedefs and structure tags. Why not?
971 * Also default to find macro constants, enum constants and
972 * global variables.
974 if (!CTAGS)
976 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
977 globals = TRUE;
978 declarations = FALSE;
979 members = FALSE;
982 while (1)
984 int opt;
985 char *optstring;
987 #ifdef ETAGS_REGEXPS
988 optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
989 #else
990 optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
991 #endif /* ETAGS_REGEXPS */
993 #ifndef LONG_OPTIONS
994 optstring = optstring + 1;
995 #endif /* LONG_OPTIONS */
997 opt = getopt_long (argc, argv, optstring, longopts, 0);
998 if (opt == EOF)
999 break;
1001 switch (opt)
1003 case 0:
1004 /* If getopt returns 0, then it has already processed a
1005 long-named option. We should do nothing. */
1006 break;
1008 case 1:
1009 /* This means that a file name has been seen. Record it. */
1010 argbuffer[current_arg].arg_type = at_filename;
1011 argbuffer[current_arg].what = optarg;
1012 ++current_arg;
1013 ++file_count;
1014 break;
1016 /* Common options. */
1017 case 'a': append_to_tagfile = TRUE; break;
1018 case 'C': cplusplus = TRUE; break;
1019 case 'd': constantypedefs = TRUE; break;
1020 case 'D': constantypedefs = FALSE; break;
1021 case 'f': /* for compatibility with old makefiles */
1022 case 'o':
1023 if (tagfile)
1025 error ("-o option may only be given once.", (char *)NULL);
1026 suggest_asking_for_help ();
1028 tagfile = optarg;
1029 break;
1030 case 'I':
1031 case 'S': /* for backward compatibility */
1032 noindentypedefs = TRUE;
1033 break;
1034 case 'l':
1036 language *lang = get_language_from_langname (optarg);
1037 if (lang != NULL)
1039 argbuffer[current_arg].lang = lang;
1040 argbuffer[current_arg].arg_type = at_language;
1041 ++current_arg;
1044 break;
1045 #ifdef ETAGS_REGEXPS
1046 case 'r':
1047 argbuffer[current_arg].arg_type = at_regexp;
1048 argbuffer[current_arg].what = optarg;
1049 ++current_arg;
1050 break;
1051 case 'R':
1052 argbuffer[current_arg].arg_type = at_regexp;
1053 argbuffer[current_arg].what = NULL;
1054 ++current_arg;
1055 break;
1056 case 'c':
1057 argbuffer[current_arg].arg_type = at_icregexp;
1058 argbuffer[current_arg].what = optarg;
1059 ++current_arg;
1060 break;
1061 #endif /* ETAGS_REGEXPS */
1062 case 'V':
1063 print_version ();
1064 break;
1065 case 'h':
1066 case 'H':
1067 print_help ();
1068 break;
1069 case 't':
1070 typedefs = TRUE;
1071 break;
1072 case 'T':
1073 typedefs = typedefs_or_cplusplus = TRUE;
1074 break;
1075 #if (!CTAGS)
1076 /* Etags options */
1077 case 'i':
1078 included_files[nincluded_files++] = optarg;
1079 break;
1080 #else /* CTAGS */
1081 /* Ctags options. */
1082 case 'B': searchar = '?'; break;
1083 case 'u': update = TRUE; break;
1084 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1085 case 'x': cxref_style = TRUE; break;
1086 case 'w': no_warnings = TRUE; break;
1087 #endif /* CTAGS */
1088 default:
1089 suggest_asking_for_help ();
1093 for (; optind < argc; ++optind)
1095 argbuffer[current_arg].arg_type = at_filename;
1096 argbuffer[current_arg].what = argv[optind];
1097 ++current_arg;
1098 ++file_count;
1101 if (nincluded_files == 0 && file_count == 0)
1103 error ("no input files specified.", (char *)NULL);
1104 suggest_asking_for_help ();
1107 if (tagfile == NULL)
1108 tagfile = CTAGS ? "tags" : "TAGS";
1109 cwd = etags_getcwd (); /* the current working directory */
1110 if (cwd[strlen (cwd) - 1] != '/')
1112 char *oldcwd = cwd;
1113 cwd = concat (oldcwd, "/", "");
1114 free (oldcwd);
1116 if (streq (tagfile, "-"))
1117 tagfiledir = cwd;
1118 else
1119 tagfiledir = absolute_dirname (tagfile, cwd);
1121 init (); /* set up boolean "functions" */
1123 initbuffer (&lb);
1124 initbuffer (&filename_lb);
1126 if (!CTAGS)
1128 if (streq (tagfile, "-"))
1130 tagf = stdout;
1131 #ifdef DOS_NT
1132 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1133 doesn't take effect until after `stdout' is already open). */
1134 if (!isatty (fileno (stdout)))
1135 setmode (fileno (stdout), O_BINARY);
1136 #endif /* DOS_NT */
1138 else
1139 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1140 if (tagf == NULL)
1141 pfatal (tagfile);
1145 * Loop through files finding functions.
1147 for (i = 0; i < current_arg; ++i)
1149 switch (argbuffer[i].arg_type)
1151 case at_language:
1152 forced_lang = argbuffer[i].lang;
1153 break;
1154 #ifdef ETAGS_REGEXPS
1155 case at_regexp:
1156 analyse_regex (argbuffer[i].what, FALSE);
1157 break;
1158 case at_icregexp:
1159 analyse_regex (argbuffer[i].what, TRUE);
1160 break;
1161 #endif
1162 case at_filename:
1163 #ifdef VMS
1164 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1166 if (got_err)
1168 error ("can't find file %s\n", this_file);
1169 argc--, argv++;
1171 else
1173 this_file = massage_name (this_file);
1175 #else
1176 this_file = argbuffer[i].what;
1177 #endif
1178 /* Input file named "-" means read file names from stdin
1179 (one per line) and use them. */
1180 if (streq (this_file, "-"))
1181 while (readline_internal (&filename_lb, stdin) > 0)
1182 process_file (filename_lb.buffer);
1183 else
1184 process_file (this_file);
1185 #ifdef VMS
1187 #endif
1188 break;
1192 #ifdef ETAGS_REGEXPS
1193 free_patterns ();
1194 #endif /* ETAGS_REGEXPS */
1196 if (!CTAGS)
1198 while (nincluded_files-- > 0)
1199 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1201 fclose (tagf);
1202 exit (GOOD);
1205 /* If CTAGS, we are here. process_file did not write the tags yet,
1206 because we want them ordered. Let's do it now. */
1207 if (cxref_style)
1209 put_entries (head);
1210 free_tree (head);
1211 head = NULL;
1212 exit (GOOD);
1215 if (update)
1217 char cmd[BUFSIZ];
1218 for (i = 0; i < current_arg; ++i)
1220 if (argbuffer[i].arg_type != at_filename)
1221 continue;
1222 sprintf (cmd,
1223 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1224 tagfile, argbuffer[i].what, tagfile);
1225 if (system (cmd) != GOOD)
1226 fatal ("failed to execute shell command", (char *)NULL);
1228 append_to_tagfile = TRUE;
1231 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1232 if (tagf == NULL)
1233 pfatal (tagfile);
1234 put_entries (head);
1235 free_tree (head);
1236 head = NULL;
1237 fclose (tagf);
1239 if (update)
1241 char cmd[BUFSIZ];
1242 sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
1243 exit (system (cmd));
1245 return GOOD;
1251 * Return a compressor given the file name. If EXTPTR is non-zero,
1252 * return a pointer into FILE where the compressor-specific
1253 * extension begins. If no compressor is found, NULL is returned
1254 * and EXTPTR is not significant.
1255 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1257 static compressor *
1258 get_compressor_from_suffix (file, extptr)
1259 char *file;
1260 char **extptr;
1262 compressor *compr;
1263 char *slash, *suffix;
1265 /* This relies on FN to be after canonicalize_filename,
1266 so we don't need to consider backslashes on DOS_NT. */
1267 slash = etags_strrchr (file, '/');
1268 suffix = etags_strrchr (file, '.');
1269 if (suffix == NULL || suffix < slash)
1270 return NULL;
1271 if (extptr != NULL)
1272 *extptr = suffix;
1273 suffix += 1;
1274 /* Let those poor souls who live with DOS 8+3 file name limits get
1275 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1276 Only the first do loop is run if not MSDOS */
1279 for (compr = compressors; compr->suffix != NULL; compr++)
1280 if (streq (compr->suffix, suffix))
1281 return compr;
1282 if (!MSDOS)
1283 break; /* do it only once: not really a loop */
1284 if (extptr != NULL)
1285 *extptr = ++suffix;
1286 } while (*suffix != '\0');
1287 return NULL;
1293 * Return a language given the name.
1295 static language *
1296 get_language_from_langname (name)
1297 const char *name;
1299 language *lang;
1301 if (name == NULL)
1302 error ("empty language name", (char *)NULL);
1303 else
1305 for (lang = lang_names; lang->name != NULL; lang++)
1306 if (streq (name, lang->name))
1307 return lang;
1308 error ("unknown language \"%s\"", name);
1311 return NULL;
1316 * Return a language given the interpreter name.
1318 static language *
1319 get_language_from_interpreter (interpreter)
1320 char *interpreter;
1322 language *lang;
1323 char **iname;
1325 if (interpreter == NULL)
1326 return NULL;
1327 for (lang = lang_names; lang->name != NULL; lang++)
1328 if (lang->interpreters != NULL)
1329 for (iname = lang->interpreters; *iname != NULL; iname++)
1330 if (streq (*iname, interpreter))
1331 return lang;
1333 return NULL;
1339 * Return a language given the file name.
1341 static language *
1342 get_language_from_filename (file)
1343 char *file;
1345 language *lang;
1346 char **name, **ext, *suffix;
1348 /* Try whole file name first. */
1349 for (lang = lang_names; lang->name != NULL; lang++)
1350 if (lang->filenames != NULL)
1351 for (name = lang->filenames; *name != NULL; name++)
1352 if (streq (*name, file))
1353 return lang;
1355 /* If not found, try suffix after last dot. */
1356 suffix = etags_strrchr (file, '.');
1357 if (suffix == NULL)
1358 return NULL;
1359 suffix += 1;
1360 for (lang = lang_names; lang->name != NULL; lang++)
1361 if (lang->suffixes != NULL)
1362 for (ext = lang->suffixes; *ext != NULL; ext++)
1363 if (streq (*ext, suffix))
1364 return lang;
1365 return NULL;
1371 * This routine is called on each file argument.
1373 static void
1374 process_file (file)
1375 char *file;
1377 struct stat stat_buf;
1378 FILE *inf;
1379 compressor *compr;
1380 char *compressed_name, *uncompressed_name;
1381 char *ext, *real_name;
1384 canonicalize_filename (file);
1385 if (streq (file, tagfile) && !streq (tagfile, "-"))
1387 error ("skipping inclusion of %s in self.", file);
1388 return;
1390 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1392 compressed_name = NULL;
1393 real_name = uncompressed_name = savestr (file);
1395 else
1397 real_name = compressed_name = savestr (file);
1398 uncompressed_name = savenstr (file, ext - file);
1401 /* If the canonicalised uncompressed name has already be dealt with,
1402 skip it silently, else add it to the list. */
1404 typedef struct processed_file
1406 char *filename;
1407 struct processed_file *next;
1408 } processed_file;
1409 static processed_file *pf_head = NULL;
1410 register processed_file *fnp;
1412 for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1413 if (streq (uncompressed_name, fnp->filename))
1414 goto exit;
1415 fnp = pf_head;
1416 pf_head = xnew (1, struct processed_file);
1417 pf_head->filename = savestr (uncompressed_name);
1418 pf_head->next = fnp;
1421 if (stat (real_name, &stat_buf) != 0)
1423 /* Reset real_name and try with a different name. */
1424 real_name = NULL;
1425 if (compressed_name != NULL) /* try with the given suffix */
1427 if (stat (uncompressed_name, &stat_buf) == 0)
1428 real_name = uncompressed_name;
1430 else /* try all possible suffixes */
1432 for (compr = compressors; compr->suffix != NULL; compr++)
1434 compressed_name = concat (file, ".", compr->suffix);
1435 if (stat (compressed_name, &stat_buf) != 0)
1437 if (MSDOS)
1439 char *suf = compressed_name + strlen (file);
1440 size_t suflen = strlen (compr->suffix) + 1;
1441 for ( ; suf[1]; suf++, suflen--)
1443 memmove (suf, suf + 1, suflen);
1444 if (stat (compressed_name, &stat_buf) == 0)
1446 real_name = compressed_name;
1447 break;
1450 if (real_name != NULL)
1451 break;
1452 } /* MSDOS */
1453 free (compressed_name);
1454 compressed_name = NULL;
1456 else
1458 real_name = compressed_name;
1459 break;
1463 if (real_name == NULL)
1465 perror (file);
1466 goto exit;
1468 } /* try with a different name */
1470 if (!S_ISREG (stat_buf.st_mode))
1472 error ("skipping %s: it is not a regular file.", real_name);
1473 goto exit;
1475 if (real_name == compressed_name)
1477 char *cmd = concat (compr->command, " ", real_name);
1478 inf = (FILE *) popen (cmd, "r");
1479 free (cmd);
1481 else
1482 inf = fopen (real_name, "r");
1483 if (inf == NULL)
1485 perror (real_name);
1486 goto exit;
1489 find_entries (uncompressed_name, inf);
1491 if (real_name == compressed_name)
1492 pclose (inf);
1493 else
1494 fclose (inf);
1496 if (!CTAGS)
1498 char *filename;
1500 if (filename_is_absolute (uncompressed_name))
1502 /* file is an absolute file name. Canonicalise it. */
1503 filename = absolute_filename (uncompressed_name, cwd);
1505 else
1507 /* file is a file name relative to cwd. Make it relative
1508 to the directory of the tags file. */
1509 filename = relative_filename (uncompressed_name, tagfiledir);
1511 fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1512 free (filename);
1513 put_entries (head);
1514 free_tree (head);
1515 head = NULL;
1518 exit:
1519 if (compressed_name) free(compressed_name);
1520 if (uncompressed_name) free(uncompressed_name);
1521 return;
1525 * This routine sets up the boolean pseudo-functions which work
1526 * by setting boolean flags dependent upon the corresponding character.
1527 * Every char which is NOT in that string is not a white char. Therefore,
1528 * all of the array "_wht" is set to FALSE, and then the elements
1529 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1530 * of a char is TRUE if it is the string "white", else FALSE.
1532 static void
1533 init ()
1535 register char *sp;
1536 register int i;
1538 for (i = 0; i < CHARS; i++)
1539 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1540 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1541 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1542 notinname('\0') = notinname('\n');
1543 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1544 begtoken('\0') = begtoken('\n');
1545 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1546 intoken('\0') = intoken('\n');
1547 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1548 endtoken('\0') = endtoken('\n');
1552 * This routine opens the specified file and calls the function
1553 * which finds the function and type definitions.
1555 node *last_node = NULL;
1557 static void
1558 find_entries (file, inf)
1559 char *file;
1560 FILE *inf;
1562 char *cp;
1563 language *lang;
1564 node *old_last_node;
1566 /* Memory leakage here: the string pointed by curfile is
1567 never released, because curfile is copied into np->file
1568 for each node, to be used in CTAGS mode. The amount of
1569 memory leaked here is the sum of the lengths of the
1570 file names. */
1571 curfile = savestr (file);
1573 /* If user specified a language, use it. */
1574 lang = forced_lang;
1575 if (lang != NULL && lang->function != NULL)
1577 curlang = lang;
1578 lang->function (inf);
1579 return;
1582 /* Try to guess the language given the file name. */
1583 lang = get_language_from_filename (file);
1584 if (lang != NULL && lang->function != NULL)
1586 curlang = lang;
1587 lang->function (inf);
1588 return;
1591 /* Look for sharp-bang as the first two characters. */
1592 if (readline_internal (&lb, inf) > 0
1593 && lb.len >= 2
1594 && lb.buffer[0] == '#'
1595 && lb.buffer[1] == '!')
1597 char *lp;
1599 /* Set lp to point at the first char after the last slash in the
1600 line or, if no slashes, at the first nonblank. Then set cp to
1601 the first successive blank and terminate the string. */
1602 lp = etags_strrchr (lb.buffer+2, '/');
1603 if (lp != NULL)
1604 lp += 1;
1605 else
1606 lp = skip_spaces (lb.buffer + 2);
1607 cp = skip_non_spaces (lp);
1608 *cp = '\0';
1610 if (strlen (lp) > 0)
1612 lang = get_language_from_interpreter (lp);
1613 if (lang != NULL && lang->function != NULL)
1615 curlang = lang;
1616 lang->function (inf);
1617 return;
1621 /* We rewind here, even if inf may be a pipe. We fail if the
1622 length of the first line is longer than the pipe block size,
1623 which is unlikely. */
1624 rewind (inf);
1626 /* Try Fortran. */
1627 old_last_node = last_node;
1628 curlang = get_language_from_langname ("fortran");
1629 Fortran_functions (inf);
1631 /* No Fortran entries found. Try C. */
1632 if (old_last_node == last_node)
1634 /* We do not tag if rewind fails.
1635 Only the file name will be recorded in the tags file. */
1636 rewind (inf);
1637 curlang = get_language_from_langname (cplusplus ? "c++" : "c");
1638 default_C_entries (inf);
1640 return;
1644 /* Record a tag. */
1645 static void
1646 pfnote (name, is_func, linestart, linelen, lno, cno)
1647 char *name; /* tag name, or NULL if unnamed */
1648 bool is_func; /* tag is a function */
1649 char *linestart; /* start of the line where tag is */
1650 int linelen; /* length of the line where tag is */
1651 int lno; /* line number */
1652 long cno; /* character number */
1654 register node *np;
1656 if (CTAGS && name == NULL)
1657 return;
1659 np = xnew (1, node);
1661 /* If ctags mode, change name "main" to M<thisfilename>. */
1662 if (CTAGS && !cxref_style && streq (name, "main"))
1664 register char *fp = etags_strrchr (curfile, '/');
1665 np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
1666 fp = etags_strrchr (np->name, '.');
1667 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1668 fp[0] = '\0';
1670 else
1671 np->name = name;
1672 np->been_warned = FALSE;
1673 np->file = curfile;
1674 np->is_func = is_func;
1675 np->lno = lno;
1676 /* Our char numbers are 0-base, because of C language tradition?
1677 ctags compatibility? old versions compatibility? I don't know.
1678 Anyway, since emacs's are 1-base we expect etags.el to take care
1679 of the difference. If we wanted to have 1-based numbers, we would
1680 uncomment the +1 below. */
1681 np->cno = cno /* + 1 */ ;
1682 np->left = np->right = NULL;
1683 if (CTAGS && !cxref_style)
1685 if (strlen (linestart) < 50)
1686 np->pat = concat (linestart, "$", "");
1687 else
1688 np->pat = savenstr (linestart, 50);
1690 else
1691 np->pat = savenstr (linestart, linelen);
1693 add_node (np, &head);
1697 * TAGS format specification
1698 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1700 * pfnote should emit the optimized form [unnamed tag] only if:
1701 * 1. name does not contain any of the characters " \t\r\n(),;";
1702 * 2. linestart contains name as either a rightmost, or rightmost but
1703 * one character, substring;
1704 * 3. the character, if any, immediately before name in linestart must
1705 * be one of the characters " \t(),;";
1706 * 4. the character, if any, immediately after name in linestart must
1707 * also be one of the characters " \t(),;".
1709 * The real implementation uses the notinname() macro, which recognises
1710 * characters slightly different from " \t\r\n(),;". See the variable
1711 * `nonam'.
1713 #define traditional_tag_style TRUE
1714 static void
1715 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1716 char *name; /* tag name, or NULL if unnamed */
1717 int namelen; /* tag length */
1718 bool is_func; /* tag is a function */
1719 char *linestart; /* start of the line where tag is */
1720 int linelen; /* length of the line where tag is */
1721 int lno; /* line number */
1722 long cno; /* character number */
1724 register char *cp;
1725 bool named;
1727 named = TRUE;
1728 if (!CTAGS)
1730 for (cp = name; !notinname (*cp); cp++)
1731 continue;
1732 if (*cp == '\0') /* rule #1 */
1734 cp = linestart + linelen - namelen;
1735 if (notinname (linestart[linelen-1]))
1736 cp -= 1; /* rule #4 */
1737 if (cp >= linestart /* rule #2 */
1738 && (cp == linestart
1739 || notinname (cp[-1])) /* rule #3 */
1740 && strneq (name, cp, namelen)) /* rule #2 */
1741 named = FALSE; /* use unnamed tag */
1745 if (named)
1746 name = savenstr (name, namelen);
1747 else
1748 name = NULL;
1749 pfnote (name, is_func, linestart, linelen, lno, cno);
1753 * free_tree ()
1754 * recurse on left children, iterate on right children.
1756 static void
1757 free_tree (np)
1758 register node *np;
1760 while (np)
1762 register node *node_right = np->right;
1763 free_tree (np->left);
1764 if (np->name != NULL)
1765 free (np->name);
1766 free (np->pat);
1767 free (np);
1768 np = node_right;
1773 * add_node ()
1774 * Adds a node to the tree of nodes. In etags mode, we don't keep
1775 * it sorted; we just keep a linear list. In ctags mode, maintain
1776 * an ordered tree, with no attempt at balancing.
1778 * add_node is the only function allowed to add nodes, so it can
1779 * maintain state.
1781 static void
1782 add_node (np, cur_node_p)
1783 node *np, **cur_node_p;
1785 register int dif;
1786 register node *cur_node = *cur_node_p;
1788 if (cur_node == NULL)
1790 *cur_node_p = np;
1791 last_node = np;
1792 return;
1795 if (!CTAGS)
1797 /* Etags Mode */
1798 if (last_node == NULL)
1799 fatal ("internal error in add_node", (char *)NULL);
1800 last_node->right = np;
1801 last_node = np;
1803 else
1805 /* Ctags Mode */
1806 dif = strcmp (np->name, cur_node->name);
1809 * If this tag name matches an existing one, then
1810 * do not add the node, but maybe print a warning.
1812 if (!dif)
1814 if (streq (np->file, cur_node->file))
1816 if (!no_warnings)
1818 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1819 np->file, lineno, np->name);
1820 fprintf (stderr, "Second entry ignored\n");
1823 else if (!cur_node->been_warned && !no_warnings)
1825 fprintf
1826 (stderr,
1827 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1828 np->file, cur_node->file, np->name);
1829 cur_node->been_warned = TRUE;
1831 return;
1834 /* Actually add the node */
1835 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1840 static void
1841 put_entries (np)
1842 register node *np;
1844 register char *sp;
1846 if (np == NULL)
1847 return;
1849 /* Output subentries that precede this one */
1850 put_entries (np->left);
1852 /* Output this entry */
1854 if (!CTAGS)
1856 if (np->name != NULL)
1857 fprintf (tagf, "%s\177%s\001%d,%ld\n",
1858 np->pat, np->name, np->lno, np->cno);
1859 else
1860 fprintf (tagf, "%s\177%d,%ld\n",
1861 np->pat, np->lno, np->cno);
1863 else
1865 if (np->name == NULL)
1866 error ("internal error: NULL name in ctags mode.", (char *)NULL);
1868 if (cxref_style)
1870 if (vgrind_style)
1871 fprintf (stdout, "%s %s %d\n",
1872 np->name, np->file, (np->lno + 63) / 64);
1873 else
1874 fprintf (stdout, "%-16s %3d %-16s %s\n",
1875 np->name, np->lno, np->file, np->pat);
1877 else
1879 fprintf (tagf, "%s\t%s\t", np->name, np->file);
1881 if (np->is_func)
1882 { /* a function */
1883 putc (searchar, tagf);
1884 putc ('^', tagf);
1886 for (sp = np->pat; *sp; sp++)
1888 if (*sp == '\\' || *sp == searchar)
1889 putc ('\\', tagf);
1890 putc (*sp, tagf);
1892 putc (searchar, tagf);
1894 else
1895 { /* a typedef; text pattern inadequate */
1896 fprintf (tagf, "%d", np->lno);
1898 putc ('\n', tagf);
1902 /* Output subentries that follow this one */
1903 put_entries (np->right);
1906 /* Length of a number's decimal representation. */
1907 static int
1908 number_len (num)
1909 long num;
1911 int len = 1;
1912 while ((num /= 10) > 0)
1913 len += 1;
1914 return len;
1918 * Return total number of characters that put_entries will output for
1919 * the nodes in the subtree of the specified node. Works only if
1920 * we are not ctags, but called only in that case. This count
1921 * is irrelevant with the new tags.el, but is still supplied for
1922 * backward compatibility.
1924 static int
1925 total_size_of_entries (np)
1926 register node *np;
1928 register int total;
1930 if (np == NULL)
1931 return 0;
1933 for (total = 0; np != NULL; np = np->right)
1935 /* Count left subentries. */
1936 total += total_size_of_entries (np->left);
1938 /* Count this entry */
1939 total += strlen (np->pat) + 1;
1940 total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
1941 if (np->name != NULL)
1942 total += 1 + strlen (np->name); /* \001name */
1945 return total;
1949 /* C extensions. */
1950 #define C_EXT 0x00fff /* C extensions */
1951 #define C_PLAIN 0x00000 /* C */
1952 #define C_PLPL 0x00001 /* C++ */
1953 #define C_STAR 0x00003 /* C* */
1954 #define C_JAVA 0x00005 /* JAVA */
1955 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
1956 #define YACC 0x10000 /* yacc file */
1959 * The C symbol tables.
1961 enum sym_type
1963 st_none,
1964 st_C_objprot, st_C_objimpl, st_C_objend,
1965 st_C_gnumacro,
1966 st_C_ignore,
1967 st_C_javastruct,
1968 st_C_operator,
1969 st_C_class, st_C_template,
1970 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1973 static unsigned int hash __P((const char *, unsigned int));
1974 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
1975 static enum sym_type C_symtype __P((char *, int, int));
1977 /* Feed stuff between (but not including) %[ and %] lines to:
1978 gperf -c -k 1,3 -o -p -r -t
1980 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1982 if, 0, st_C_ignore
1983 for, 0, st_C_ignore
1984 while, 0, st_C_ignore
1985 switch, 0, st_C_ignore
1986 return, 0, st_C_ignore
1987 @interface, 0, st_C_objprot
1988 @protocol, 0, st_C_objprot
1989 @implementation,0, st_C_objimpl
1990 @end, 0, st_C_objend
1991 import, C_JAVA, st_C_ignore
1992 package, C_JAVA, st_C_ignore
1993 friend, C_PLPL, st_C_ignore
1994 extends, C_JAVA, st_C_javastruct
1995 implements, C_JAVA, st_C_javastruct
1996 interface, C_JAVA, st_C_struct
1997 class, 0, st_C_class
1998 namespace, C_PLPL, st_C_struct
1999 domain, C_STAR, st_C_struct
2000 union, 0, st_C_struct
2001 struct, 0, st_C_struct
2002 extern, 0, st_C_extern
2003 enum, 0, st_C_enum
2004 typedef, 0, st_C_typedef
2005 define, 0, st_C_define
2006 operator, C_PLPL, st_C_operator
2007 template, 0, st_C_template
2008 bool, C_PLPL, st_C_typespec
2009 long, 0, st_C_typespec
2010 short, 0, st_C_typespec
2011 int, 0, st_C_typespec
2012 char, 0, st_C_typespec
2013 float, 0, st_C_typespec
2014 double, 0, st_C_typespec
2015 signed, 0, st_C_typespec
2016 unsigned, 0, st_C_typespec
2017 auto, 0, st_C_typespec
2018 void, 0, st_C_typespec
2019 static, 0, st_C_typespec
2020 const, 0, st_C_typespec
2021 volatile, 0, st_C_typespec
2022 explicit, C_PLPL, st_C_typespec
2023 mutable, C_PLPL, st_C_typespec
2024 typename, C_PLPL, st_C_typespec
2025 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2026 DEFUN, 0, st_C_gnumacro
2027 SYSCALL, 0, st_C_gnumacro
2028 ENTRY, 0, st_C_gnumacro
2029 PSEUDO, 0, st_C_gnumacro
2030 # These are defined inside C functions, so currently they are not met.
2031 # EXFUN used in glibc, DEFVAR_* in emacs.
2032 #EXFUN, 0, st_C_gnumacro
2033 #DEFVAR_, 0, st_C_gnumacro
2035 and replace lines between %< and %> with its output,
2036 then make in_word_set static. */
2037 /*%<*/
2038 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2039 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
2040 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2042 #define TOTAL_KEYWORDS 47
2043 #define MIN_WORD_LENGTH 2
2044 #define MAX_WORD_LENGTH 15
2045 #define MIN_HASH_VALUE 18
2046 #define MAX_HASH_VALUE 138
2047 /* maximum key range = 121, duplicates = 0 */
2049 #ifdef __GNUC__
2050 __inline
2051 #endif
2052 static unsigned int
2053 hash (str, len)
2054 register const char *str;
2055 register unsigned int len;
2057 static unsigned char asso_values[] =
2059 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2060 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2061 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2062 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2063 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2064 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2065 139, 139, 139, 139, 63, 139, 139, 139, 33, 44,
2066 62, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2067 42, 139, 139, 12, 32, 139, 139, 139, 139, 139,
2068 139, 139, 139, 139, 139, 139, 139, 34, 59, 37,
2069 24, 58, 33, 3, 139, 16, 139, 139, 42, 60,
2070 18, 11, 39, 139, 23, 57, 4, 63, 6, 20,
2071 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2072 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2073 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2074 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2075 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2076 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2077 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2078 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2079 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2080 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2081 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2082 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2083 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2084 139, 139, 139, 139, 139, 139
2086 register int hval = len;
2088 switch (hval)
2090 default:
2091 case 3:
2092 hval += asso_values[(unsigned char)str[2]];
2093 case 2:
2094 case 1:
2095 hval += asso_values[(unsigned char)str[0]];
2096 break;
2098 return hval;
2101 #ifdef __GNUC__
2102 __inline
2103 #endif
2104 static struct C_stab_entry *
2105 in_word_set (str, len)
2106 register const char *str;
2107 register unsigned int len;
2109 static struct C_stab_entry wordlist[] =
2111 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2112 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2113 {"if", 0, st_C_ignore},
2114 {""}, {""}, {""}, {""},
2115 {"int", 0, st_C_typespec},
2116 {""}, {""},
2117 {"void", 0, st_C_typespec},
2118 {""}, {""},
2119 {"interface", C_JAVA, st_C_struct},
2120 {""},
2121 {"SYSCALL", 0, st_C_gnumacro},
2122 {""},
2123 {"return", 0, st_C_ignore},
2124 {""}, {""}, {""}, {""}, {""}, {""}, {""},
2125 {"while", 0, st_C_ignore},
2126 {"auto", 0, st_C_typespec},
2127 {""}, {""}, {""}, {""}, {""}, {""},
2128 {"float", 0, st_C_typespec},
2129 {"typedef", 0, st_C_typedef},
2130 {"typename", C_PLPL, st_C_typespec},
2131 {""}, {""}, {""},
2132 {"friend", C_PLPL, st_C_ignore},
2133 {"volatile", 0, st_C_typespec},
2134 {""}, {""},
2135 {"for", 0, st_C_ignore},
2136 {"const", 0, st_C_typespec},
2137 {"import", C_JAVA, st_C_ignore},
2138 {""},
2139 {"define", 0, st_C_define},
2140 {"long", 0, st_C_typespec},
2141 {"implements", C_JAVA, st_C_javastruct},
2142 {"signed", 0, st_C_typespec},
2143 {""},
2144 {"extern", 0, st_C_extern},
2145 {"extends", C_JAVA, st_C_javastruct},
2146 {""},
2147 {"mutable", C_PLPL, st_C_typespec},
2148 {"template", 0, st_C_template},
2149 {"short", 0, st_C_typespec},
2150 {"bool", C_PLPL, st_C_typespec},
2151 {"char", 0, st_C_typespec},
2152 {"class", 0, st_C_class},
2153 {"operator", C_PLPL, st_C_operator},
2154 {""},
2155 {"switch", 0, st_C_ignore},
2156 {""},
2157 {"ENTRY", 0, st_C_gnumacro},
2158 {""},
2159 {"package", C_JAVA, st_C_ignore},
2160 {"union", 0, st_C_struct},
2161 {"@end", 0, st_C_objend},
2162 {"struct", 0, st_C_struct},
2163 {"namespace", C_PLPL, st_C_struct},
2164 {""}, {""},
2165 {"domain", C_STAR, st_C_struct},
2166 {"@interface", 0, st_C_objprot},
2167 {"PSEUDO", 0, st_C_gnumacro},
2168 {"double", 0, st_C_typespec},
2169 {""},
2170 {"@protocol", 0, st_C_objprot},
2171 {""},
2172 {"static", 0, st_C_typespec},
2173 {""}, {""},
2174 {"DEFUN", 0, st_C_gnumacro},
2175 {""}, {""}, {""}, {""},
2176 {"explicit", C_PLPL, st_C_typespec},
2177 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2178 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2179 {""},
2180 {"enum", 0, st_C_enum},
2181 {""}, {""},
2182 {"unsigned", 0, st_C_typespec},
2183 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2184 {"@implementation",0, st_C_objimpl}
2187 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2189 register int key = hash (str, len);
2191 if (key <= MAX_HASH_VALUE && key >= 0)
2193 register const char *s = wordlist[key].name;
2195 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2196 return &wordlist[key];
2199 return 0;
2201 /*%>*/
2203 static enum sym_type
2204 C_symtype (str, len, c_ext)
2205 char *str;
2206 int len;
2207 int c_ext;
2209 register struct C_stab_entry *se = in_word_set (str, len);
2211 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2212 return st_none;
2213 return se->type;
2218 * C functions and variables are recognized using a simple
2219 * finite automaton. fvdef is its state variable.
2221 enum
2223 fvnone, /* nothing seen */
2224 fdefunkey, /* Emacs DEFUN keyword seen */
2225 fdefunname, /* Emacs DEFUN name seen */
2226 foperator, /* func: operator keyword seen (cplpl) */
2227 fvnameseen, /* function or variable name seen */
2228 fstartlist, /* func: just after open parenthesis */
2229 finlist, /* func: in parameter list */
2230 flistseen, /* func: after parameter list */
2231 fignore, /* func: before open brace */
2232 vignore /* var-like: ignore until ';' */
2233 } fvdef;
2235 bool fvextern; /* func or var: extern keyword seen; */
2238 * typedefs are recognized using a simple finite automaton.
2239 * typdef is its state variable.
2241 enum
2243 tnone, /* nothing seen */
2244 tkeyseen, /* typedef keyword seen */
2245 ttypeseen, /* defined type seen */
2246 tinbody, /* inside typedef body */
2247 tend, /* just before typedef tag */
2248 tignore /* junk after typedef tag */
2249 } typdef;
2252 * struct-like structures (enum, struct and union) are recognized
2253 * using another simple finite automaton. `structdef' is its state
2254 * variable.
2256 enum
2258 snone, /* nothing seen yet,
2259 or in struct body if cblev > 0 */
2260 skeyseen, /* struct-like keyword seen */
2261 stagseen, /* struct-like tag seen */
2262 sintemplate, /* inside template (ignore) */
2263 scolonseen /* colon seen after struct-like tag */
2264 } structdef;
2267 * When objdef is different from onone, objtag is the name of the class.
2269 char *objtag = "<uninited>";
2272 * Yet another little state machine to deal with preprocessor lines.
2274 enum
2276 dnone, /* nothing seen */
2277 dsharpseen, /* '#' seen as first char on line */
2278 ddefineseen, /* '#' and 'define' seen */
2279 dignorerest /* ignore rest of line */
2280 } definedef;
2283 * State machine for Objective C protocols and implementations.
2284 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2286 enum
2288 onone, /* nothing seen */
2289 oprotocol, /* @interface or @protocol seen */
2290 oimplementation, /* @implementations seen */
2291 otagseen, /* class name seen */
2292 oparenseen, /* parenthesis before category seen */
2293 ocatseen, /* category name seen */
2294 oinbody, /* in @implementation body */
2295 omethodsign, /* in @implementation body, after +/- */
2296 omethodtag, /* after method name */
2297 omethodcolon, /* after method colon */
2298 omethodparm, /* after method parameter */
2299 oignore /* wait for @end */
2300 } objdef;
2304 * Use this structure to keep info about the token read, and how it
2305 * should be tagged. Used by the make_C_tag function to build a tag.
2307 struct tok
2309 bool valid;
2310 bool named;
2311 int offset;
2312 int length;
2313 int lineno;
2314 long linepos;
2315 char *line;
2316 } token; /* latest token read */
2317 linebuffer token_name; /* its name */
2320 * Variables and functions for dealing with nested structures.
2321 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2323 static void pushclass_above __P((int, char *, int));
2324 static void popclass_above __P((int));
2325 static void write_classname __P((linebuffer *, char *qualifier));
2327 struct {
2328 char **cname; /* nested class names */
2329 int *cblev; /* nested class curly brace level */
2330 int nl; /* class nesting level (elements used) */
2331 int size; /* length of the array */
2332 } cstack; /* stack for nested declaration tags */
2333 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2334 #define nestlev (cstack.nl)
2335 /* After struct keyword or in struct body, not inside an nested function. */
2336 #define instruct (structdef == snone && nestlev > 0 \
2337 && cblev == cstack.cblev[nestlev-1] + 1)
2339 static void
2340 pushclass_above (cblev, str, len)
2341 int cblev;
2342 char *str;
2343 int len;
2345 int nl;
2347 popclass_above (cblev);
2348 nl = cstack.nl;
2349 if (nl >= cstack.size)
2351 int size = cstack.size *= 2;
2352 xrnew (cstack.cname, size, char *);
2353 xrnew (cstack.cblev, size, int);
2355 assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2356 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2357 cstack.cblev[nl] = cblev;
2358 cstack.nl = nl + 1;
2361 static void
2362 popclass_above (cblev)
2363 int cblev;
2365 int nl;
2367 for (nl = cstack.nl - 1;
2368 nl >= 0 && cstack.cblev[nl] >= cblev;
2369 nl--)
2371 if (cstack.cname[nl] != NULL)
2372 free (cstack.cname[nl]);
2373 cstack.nl = nl;
2377 static void
2378 write_classname (cn, qualifier)
2379 linebuffer *cn;
2380 char *qualifier;
2382 int i, len;
2383 int qlen = strlen (qualifier);
2385 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2387 len = 0;
2388 cn->len = 0;
2389 cn->buffer[0] = '\0';
2391 else
2393 len = strlen (cstack.cname[0]);
2394 linebuffer_setlen (cn, len);
2395 strcpy (cn->buffer, cstack.cname[0]);
2397 for (i = 1; i < cstack.nl; i++)
2399 char *s;
2400 int slen;
2402 s = cstack.cname[i];
2403 if (s == NULL)
2404 continue;
2405 slen = strlen (s);
2406 len += slen + qlen;
2407 linebuffer_setlen (cn, len);
2408 strncat (cn->buffer, qualifier, qlen);
2409 strncat (cn->buffer, s, slen);
2414 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2415 static void make_C_tag __P((bool));
2418 * consider_token ()
2419 * checks to see if the current token is at the start of a
2420 * function or variable, or corresponds to a typedef, or
2421 * is a struct/union/enum tag, or #define, or an enum constant.
2423 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2424 * with args. C_EXTP points to which language we are looking at.
2426 * Globals
2427 * fvdef IN OUT
2428 * structdef IN OUT
2429 * definedef IN OUT
2430 * typdef IN OUT
2431 * objdef IN OUT
2434 static bool
2435 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2436 register char *str; /* IN: token pointer */
2437 register int len; /* IN: token length */
2438 register int c; /* IN: first char after the token */
2439 int *c_extp; /* IN, OUT: C extensions mask */
2440 int cblev; /* IN: curly brace level */
2441 int parlev; /* IN: parenthesis level */
2442 bool *is_func_or_var; /* OUT: function or variable found */
2444 /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2445 structtype is the type of the preceding struct-like keyword, and
2446 structcblev is the curly brace level where it has been seen. */
2447 static enum sym_type structtype;
2448 static int structcblev;
2449 static enum sym_type toktype;
2452 toktype = C_symtype (str, len, *c_extp);
2455 * Advance the definedef state machine.
2457 switch (definedef)
2459 case dnone:
2460 /* We're not on a preprocessor line. */
2461 if (toktype == st_C_gnumacro)
2463 fvdef = fdefunkey;
2464 return FALSE;
2466 break;
2467 case dsharpseen:
2468 if (toktype == st_C_define)
2470 definedef = ddefineseen;
2472 else
2474 definedef = dignorerest;
2476 return FALSE;
2477 case ddefineseen:
2479 * Make a tag for any macro, unless it is a constant
2480 * and constantypedefs is FALSE.
2482 definedef = dignorerest;
2483 *is_func_or_var = (c == '(');
2484 if (!*is_func_or_var && !constantypedefs)
2485 return FALSE;
2486 else
2487 return TRUE;
2488 case dignorerest:
2489 return FALSE;
2490 default:
2491 error ("internal error: definedef value.", (char *)NULL);
2495 * Now typedefs
2497 switch (typdef)
2499 case tnone:
2500 if (toktype == st_C_typedef)
2502 if (typedefs)
2503 typdef = tkeyseen;
2504 fvextern = FALSE;
2505 fvdef = fvnone;
2506 return FALSE;
2508 break;
2509 case tkeyseen:
2510 switch (toktype)
2512 case st_none:
2513 case st_C_typespec:
2514 case st_C_class:
2515 case st_C_struct:
2516 case st_C_enum:
2517 typdef = ttypeseen;
2518 break;
2520 break;
2521 case ttypeseen:
2522 if (structdef == snone && fvdef == fvnone)
2524 fvdef = fvnameseen;
2525 return TRUE;
2527 break;
2528 case tend:
2529 switch (toktype)
2531 case st_C_typespec:
2532 case st_C_class:
2533 case st_C_struct:
2534 case st_C_enum:
2535 return FALSE;
2537 return TRUE;
2541 * This structdef business is NOT invoked when we are ctags and the
2542 * file is plain C. This is because a struct tag may have the same
2543 * name as another tag, and this loses with ctags.
2545 switch (toktype)
2547 case st_C_javastruct:
2548 if (structdef == stagseen)
2549 structdef = scolonseen;
2550 return FALSE;
2551 case st_C_template:
2552 case st_C_class:
2553 if (cblev == 0
2554 && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2555 && definedef == dnone && structdef == snone
2556 && typdef == tnone && fvdef == fvnone)
2557 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2558 if (toktype == st_C_template)
2559 break;
2560 /* FALLTHRU */
2561 case st_C_struct:
2562 case st_C_enum:
2563 if (parlev == 0
2564 && fvdef != vignore
2565 && (typdef == tkeyseen
2566 || (typedefs_or_cplusplus && structdef == snone)))
2568 structdef = skeyseen;
2569 structtype = toktype;
2570 structcblev = cblev;
2572 return FALSE;
2575 if (structdef == skeyseen)
2577 structdef = stagseen;
2578 return TRUE;
2581 if (typdef != tnone)
2582 definedef = dnone;
2584 /* Detect Objective C constructs. */
2585 switch (objdef)
2587 case onone:
2588 switch (toktype)
2590 case st_C_objprot:
2591 objdef = oprotocol;
2592 return FALSE;
2593 case st_C_objimpl:
2594 objdef = oimplementation;
2595 return FALSE;
2597 break;
2598 case oimplementation:
2599 /* Save the class tag for functions or variables defined inside. */
2600 objtag = savenstr (str, len);
2601 objdef = oinbody;
2602 return FALSE;
2603 case oprotocol:
2604 /* Save the class tag for categories. */
2605 objtag = savenstr (str, len);
2606 objdef = otagseen;
2607 *is_func_or_var = TRUE;
2608 return TRUE;
2609 case oparenseen:
2610 objdef = ocatseen;
2611 *is_func_or_var = TRUE;
2612 return TRUE;
2613 case oinbody:
2614 break;
2615 case omethodsign:
2616 if (parlev == 0)
2618 objdef = omethodtag;
2619 linebuffer_setlen (&token_name, len);
2620 strncpy (token_name.buffer, str, len);
2621 token_name.buffer[len] = '\0';
2622 return TRUE;
2624 return FALSE;
2625 case omethodcolon:
2626 if (parlev == 0)
2627 objdef = omethodparm;
2628 return FALSE;
2629 case omethodparm:
2630 if (parlev == 0)
2632 objdef = omethodtag;
2633 linebuffer_setlen (&token_name, token_name.len + len);
2634 strncat (token_name.buffer, str, len);
2635 return TRUE;
2637 return FALSE;
2638 case oignore:
2639 if (toktype == st_C_objend)
2641 /* Memory leakage here: the string pointed by objtag is
2642 never released, because many tests would be needed to
2643 avoid breaking on incorrect input code. The amount of
2644 memory leaked here is the sum of the lengths of the
2645 class tags.
2646 free (objtag); */
2647 objdef = onone;
2649 return FALSE;
2652 /* A function, variable or enum constant? */
2653 switch (toktype)
2655 case st_C_extern:
2656 fvextern = TRUE;
2657 /* FALLTHRU */
2658 case st_C_typespec:
2659 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2660 fvdef = fvnone; /* should be useless */
2661 return FALSE;
2662 case st_C_ignore:
2663 fvextern = FALSE;
2664 fvdef = vignore;
2665 return FALSE;
2666 case st_C_operator:
2667 fvdef = foperator;
2668 *is_func_or_var = TRUE;
2669 return TRUE;
2670 case st_none:
2671 if (constantypedefs
2672 && structdef == snone
2673 && structtype == st_C_enum && cblev > structcblev)
2674 return TRUE; /* enum constant */
2675 switch (fvdef)
2677 case fdefunkey:
2678 if (cblev > 0)
2679 break;
2680 fvdef = fdefunname; /* GNU macro */
2681 *is_func_or_var = TRUE;
2682 return TRUE;
2683 case fvnone:
2684 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2685 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2687 fvdef = vignore;
2688 return FALSE;
2690 if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2692 fvdef = foperator;
2693 *is_func_or_var = TRUE;
2694 return TRUE;
2696 if (cblev > 0 && !instruct)
2697 break;
2698 fvdef = fvnameseen; /* function or variable */
2699 *is_func_or_var = TRUE;
2700 return TRUE;
2702 break;
2705 return FALSE;
2710 * C_entries often keeps pointers to tokens or lines which are older than
2711 * the line currently read. By keeping two line buffers, and switching
2712 * them at end of line, it is possible to use those pointers.
2714 struct
2716 long linepos;
2717 linebuffer lb;
2718 } lbs[2];
2720 #define current_lb_is_new (newndx == curndx)
2721 #define switch_line_buffers() (curndx = 1 - curndx)
2723 #define curlb (lbs[curndx].lb)
2724 #define newlb (lbs[newndx].lb)
2725 #define curlinepos (lbs[curndx].linepos)
2726 #define newlinepos (lbs[newndx].linepos)
2728 #define CNL_SAVE_DEFINEDEF() \
2729 do { \
2730 curlinepos = charno; \
2731 lineno++; \
2732 linecharno = charno; \
2733 charno += readline (&curlb, inf); \
2734 lp = curlb.buffer; \
2735 quotednl = FALSE; \
2736 newndx = curndx; \
2737 } while (0)
2739 #define CNL() \
2740 do { \
2741 CNL_SAVE_DEFINEDEF(); \
2742 if (savetoken.valid) \
2744 token = savetoken; \
2745 savetoken.valid = FALSE; \
2747 definedef = dnone; \
2748 } while (0)
2751 static void
2752 make_C_tag (isfun)
2753 bool isfun;
2755 /* This function should never be called when token.valid is FALSE, but
2756 we must protect against invalid input or internal errors. */
2757 if (DEBUG || token.valid)
2759 if (traditional_tag_style)
2761 /* This was the original code. Now we call new_pfnote instead,
2762 which uses the new method for naming tags (see new_pfnote). */
2763 char *name = NULL;
2765 if (CTAGS || token.named)
2766 name = savestr (token_name.buffer);
2767 if (DEBUG && !token.valid)
2769 if (token.named)
2770 name = concat (name, "##invalid##", "");
2771 else
2772 name = savestr ("##invalid##");
2774 pfnote (name, isfun, token.line,
2775 token.offset+token.length+1, token.lineno, token.linepos);
2777 else
2778 new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
2779 token.offset+token.length+1, token.lineno, token.linepos);
2780 token.valid = FALSE;
2786 * C_entries ()
2787 * This routine finds functions, variables, typedefs,
2788 * #define's, enum constants and struct/union/enum definitions in
2789 * C syntax and adds them to the list.
2791 static void
2792 C_entries (c_ext, inf)
2793 int c_ext; /* extension of C */
2794 FILE *inf; /* input file */
2796 register char c; /* latest char read; '\0' for end of line */
2797 register char *lp; /* pointer one beyond the character `c' */
2798 int curndx, newndx; /* indices for current and new lb */
2799 register int tokoff; /* offset in line of start of current token */
2800 register int toklen; /* length of current token */
2801 char *qualifier; /* string used to qualify names */
2802 int qlen; /* length of qualifier */
2803 int cblev; /* current curly brace level */
2804 int parlev; /* current parenthesis level */
2805 int typdefcblev; /* cblev where a typedef struct body begun */
2806 bool incomm, inquote, inchar, quotednl, midtoken;
2807 bool cplpl, cjava;
2808 bool yacc_rules; /* in the rules part of a yacc file */
2809 struct tok savetoken; /* token saved during preprocessor handling */
2812 initbuffer (&token_name);
2813 initbuffer (&lbs[0].lb);
2814 initbuffer (&lbs[1].lb);
2815 if (cstack.size == 0)
2817 cstack.size = (DEBUG) ? 1 : 4;
2818 cstack.nl = 0;
2819 cstack.cname = xnew (cstack.size, char *);
2820 cstack.cblev = xnew (cstack.size, int);
2823 tokoff = toklen = typdefcblev = 0; /* keep compiler quiet */
2824 curndx = newndx = 0;
2825 lineno = 0;
2826 charno = 0;
2827 lp = curlb.buffer;
2828 *lp = 0;
2830 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2831 structdef = snone; definedef = dnone; objdef = onone;
2832 yacc_rules = FALSE;
2833 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2834 token.valid = savetoken.valid = FALSE;
2835 cblev = 0;
2836 parlev = 0;
2837 cplpl = (c_ext & C_PLPL) == C_PLPL;
2838 cjava = (c_ext & C_JAVA) == C_JAVA;
2839 if (cjava)
2840 { qualifier = "."; qlen = 1; }
2841 else
2842 { qualifier = "::"; qlen = 2; }
2845 while (!feof (inf))
2847 c = *lp++;
2848 if (c == '\\')
2850 /* If we're at the end of the line, the next character is a
2851 '\0'; don't skip it, because it's the thing that tells us
2852 to read the next line. */
2853 if (*lp == '\0')
2855 quotednl = TRUE;
2856 continue;
2858 lp++;
2859 c = ' ';
2861 else if (incomm)
2863 switch (c)
2865 case '*':
2866 if (*lp == '/')
2868 c = *lp++;
2869 incomm = FALSE;
2871 break;
2872 case '\0':
2873 /* Newlines inside comments do not end macro definitions in
2874 traditional cpp. */
2875 CNL_SAVE_DEFINEDEF ();
2876 break;
2878 continue;
2880 else if (inquote)
2882 switch (c)
2884 case '"':
2885 inquote = FALSE;
2886 break;
2887 case '\0':
2888 /* Newlines inside strings do not end macro definitions
2889 in traditional cpp, even though compilers don't
2890 usually accept them. */
2891 CNL_SAVE_DEFINEDEF ();
2892 break;
2894 continue;
2896 else if (inchar)
2898 switch (c)
2900 case '\0':
2901 /* Hmmm, something went wrong. */
2902 CNL ();
2903 /* FALLTHRU */
2904 case '\'':
2905 inchar = FALSE;
2906 break;
2908 continue;
2910 else
2911 switch (c)
2913 case '"':
2914 inquote = TRUE;
2915 switch (fvdef)
2917 case fdefunkey:
2918 case fstartlist:
2919 case finlist:
2920 case fignore:
2921 case vignore:
2922 break;
2923 default:
2924 fvextern = FALSE;
2925 fvdef = fvnone;
2927 continue;
2928 case '\'':
2929 inchar = TRUE;
2930 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2932 fvextern = FALSE;
2933 fvdef = fvnone;
2935 continue;
2936 case '/':
2937 if (*lp == '*')
2939 lp++;
2940 incomm = TRUE;
2941 continue;
2943 else if (/* cplpl && */ *lp == '/')
2945 c = '\0';
2946 break;
2948 else
2949 break;
2950 case '%':
2951 if ((c_ext & YACC) && *lp == '%')
2953 /* Entering or exiting rules section in yacc file. */
2954 lp++;
2955 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2956 typdef = tnone; structdef = snone;
2957 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2958 cblev = 0;
2959 yacc_rules = !yacc_rules;
2960 continue;
2962 else
2963 break;
2964 case '#':
2965 if (definedef == dnone)
2967 char *cp;
2968 bool cpptoken = TRUE;
2970 /* Look back on this line. If all blanks, or nonblanks
2971 followed by an end of comment, this is a preprocessor
2972 token. */
2973 for (cp = newlb.buffer; cp < lp-1; cp++)
2974 if (!iswhite (*cp))
2976 if (*cp == '*' && *(cp+1) == '/')
2978 cp++;
2979 cpptoken = TRUE;
2981 else
2982 cpptoken = FALSE;
2984 if (cpptoken)
2985 definedef = dsharpseen;
2986 } /* if (definedef == dnone) */
2988 continue;
2989 } /* switch (c) */
2992 /* Consider token only if some involved conditions are satisfied. */
2993 if (typdef != tignore
2994 && definedef != dignorerest
2995 && fvdef != finlist
2996 && structdef != sintemplate
2997 && (definedef != dnone
2998 || structdef != scolonseen))
3000 if (midtoken)
3002 if (endtoken (c))
3004 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
3007 * This handles :: in the middle, but not at the
3008 * beginning of an identifier. Also, space-separated
3009 * :: is not recognised.
3011 lp += 2;
3012 toklen += 2;
3013 c = lp[-1];
3014 goto still_in_token;
3016 else
3018 bool funorvar = FALSE;
3020 if (yacc_rules
3021 || consider_token (newlb.buffer + tokoff, toklen, c,
3022 &c_ext, cblev, parlev, &funorvar))
3024 if (fvdef == foperator)
3026 char *oldlp = lp;
3027 lp = skip_spaces (lp-1);
3028 if (*lp != '\0')
3029 lp += 1;
3030 while (*lp != '\0'
3031 && !iswhite (*lp) && *lp != '(')
3032 lp += 1;
3033 c = *lp++;
3034 toklen += lp - oldlp;
3036 token.named = FALSE;
3037 if ((c_ext & C_EXT) /* not pure C */
3038 && nestlev > 0 && definedef == dnone)
3039 /* in struct body */
3041 write_classname (&token_name, qualifier);
3042 linebuffer_setlen (&token_name,
3043 token_name.len+qlen+toklen);
3044 strcat (token_name.buffer, qualifier);
3045 strncat (token_name.buffer,
3046 newlb.buffer + tokoff, toklen);
3047 token.named = TRUE;
3049 else if (objdef == ocatseen)
3050 /* Objective C category */
3052 int len = strlen (objtag) + 2 + toklen;
3053 linebuffer_setlen (&token_name, len);
3054 strcpy (token_name.buffer, objtag);
3055 strcat (token_name.buffer, "(");
3056 strncat (token_name.buffer,
3057 newlb.buffer + tokoff, toklen);
3058 strcat (token_name.buffer, ")");
3059 token.named = TRUE;
3061 else if (objdef == omethodtag
3062 || objdef == omethodparm)
3063 /* Objective C method */
3065 token.named = TRUE;
3067 else if (fvdef == fdefunname)
3068 /* GNU DEFUN and similar macros */
3070 bool defun = (newlb.buffer[tokoff] == 'F');
3071 int off = tokoff;
3072 int len = toklen;
3074 /* Rewrite the tag so that emacs lisp DEFUNs
3075 can be found by their elisp name */
3076 if (defun)
3078 off += 1;
3079 len -= 1;
3081 len = toklen;
3082 linebuffer_setlen (&token_name, len);
3083 strncpy (token_name.buffer,
3084 newlb.buffer + off, len);
3085 token_name.buffer[len] = '\0';
3086 if (defun)
3087 while (--len >= 0)
3088 if (token_name.buffer[len] == '_')
3089 token_name.buffer[len] = '-';
3090 token.named = defun;
3092 else
3094 linebuffer_setlen (&token_name, toklen);
3095 strncpy (token_name.buffer,
3096 newlb.buffer + tokoff, toklen);
3097 token_name.buffer[toklen] = '\0';
3098 /* Name macros and members. */
3099 token.named = (structdef == stagseen
3100 || typdef == ttypeseen
3101 || typdef == tend
3102 || (funorvar
3103 && definedef == dignorerest)
3104 || (funorvar
3105 && definedef == dnone
3106 && structdef == snone
3107 && cblev > 0));
3109 token.lineno = lineno;
3110 token.offset = tokoff;
3111 token.length = toklen;
3112 token.line = newlb.buffer;
3113 token.linepos = newlinepos;
3114 token.valid = TRUE;
3116 if (definedef == dnone
3117 && (fvdef == fvnameseen
3118 || fvdef == foperator
3119 || structdef == stagseen
3120 || typdef == tend
3121 || typdef == ttypeseen
3122 || objdef != onone))
3124 if (current_lb_is_new)
3125 switch_line_buffers ();
3127 else if (definedef != dnone
3128 || fvdef == fdefunname
3129 || instruct)
3130 make_C_tag (funorvar);
3132 midtoken = FALSE;
3134 } /* if (endtoken (c)) */
3135 else if (intoken (c))
3136 still_in_token:
3138 toklen++;
3139 continue;
3141 } /* if (midtoken) */
3142 else if (begtoken (c))
3144 switch (definedef)
3146 case dnone:
3147 switch (fvdef)
3149 case fstartlist:
3150 fvdef = finlist;
3151 continue;
3152 case flistseen:
3153 make_C_tag (TRUE); /* a function */
3154 fvdef = fignore;
3155 break;
3156 case fvnameseen:
3157 fvdef = fvnone;
3158 break;
3160 if (structdef == stagseen && !cjava)
3162 popclass_above (cblev);
3163 structdef = snone;
3165 break;
3166 case dsharpseen:
3167 savetoken = token;
3169 if (!yacc_rules || lp == newlb.buffer + 1)
3171 tokoff = lp - 1 - newlb.buffer;
3172 toklen = 1;
3173 midtoken = TRUE;
3175 continue;
3176 } /* if (begtoken) */
3177 } /* if must look at token */
3180 /* Detect end of line, colon, comma, semicolon and various braces
3181 after having handled a token.*/
3182 switch (c)
3184 case ':':
3185 if (yacc_rules && token.offset == 0 && token.valid)
3187 make_C_tag (FALSE); /* a yacc function */
3188 break;
3190 if (definedef != dnone)
3191 break;
3192 switch (objdef)
3194 case otagseen:
3195 objdef = oignore;
3196 make_C_tag (TRUE); /* an Objective C class */
3197 break;
3198 case omethodtag:
3199 case omethodparm:
3200 objdef = omethodcolon;
3201 linebuffer_setlen (&token_name, token_name.len + 1);
3202 strcat (token_name.buffer, ":");
3203 break;
3205 if (structdef == stagseen)
3206 structdef = scolonseen;
3207 break;
3208 case ';':
3209 if (definedef != dnone)
3210 break;
3211 switch (typdef)
3213 case tend:
3214 case ttypeseen:
3215 make_C_tag (FALSE); /* a typedef */
3216 typdef = tnone;
3217 fvdef = fvnone;
3218 break;
3219 case tnone:
3220 case tinbody:
3221 case tignore:
3222 switch (fvdef)
3224 case fignore:
3225 if (typdef == tignore)
3226 fvdef = fvnone;
3227 break;
3228 case fvnameseen:
3229 if ((globals && cblev == 0 && (!fvextern || declarations))
3230 || (members && instruct))
3231 make_C_tag (FALSE); /* a variable */
3232 fvextern = FALSE;
3233 fvdef = fvnone;
3234 token.valid = FALSE;
3235 break;
3236 case flistseen:
3237 if ((declarations && typdef == tnone && !instruct)
3238 || (members && typdef != tignore && instruct))
3239 make_C_tag (TRUE); /* a function declaration */
3240 /* FALLTHRU */
3241 default:
3242 fvextern = FALSE;
3243 fvdef = fvnone;
3244 if (declarations
3245 && structdef == stagseen && (c_ext & C_PLPL))
3246 make_C_tag (FALSE); /* forward declaration */
3247 else
3248 /* The following instruction invalidates the token.
3249 Probably the token should be invalidated in all other
3250 cases where some state machine is reset prematurely. */
3251 token.valid = FALSE;
3252 } /* switch (fvdef) */
3253 /* FALLTHRU */
3254 default:
3255 if (!instruct)
3256 typdef = tnone;
3258 if (structdef == stagseen)
3259 structdef = snone;
3260 break;
3261 case ',':
3262 if (definedef != dnone)
3263 break;
3264 switch (objdef)
3266 case omethodtag:
3267 case omethodparm:
3268 make_C_tag (TRUE); /* an Objective C method */
3269 objdef = oinbody;
3270 break;
3272 switch (fvdef)
3274 case fdefunkey:
3275 case foperator:
3276 case fstartlist:
3277 case finlist:
3278 case fignore:
3279 case vignore:
3280 break;
3281 case fdefunname:
3282 fvdef = fignore;
3283 break;
3284 case fvnameseen: /* a variable */
3285 if ((globals && cblev == 0 && (!fvextern || declarations))
3286 || (members && instruct))
3287 make_C_tag (FALSE);
3288 break;
3289 case flistseen: /* a function */
3290 if ((declarations && typdef == tnone && !instruct)
3291 || (members && typdef != tignore && instruct))
3293 make_C_tag (TRUE); /* a function declaration */
3294 fvdef = fvnameseen;
3296 else if (!declarations)
3297 fvdef = fvnone;
3298 token.valid = FALSE;
3299 break;
3300 default:
3301 fvdef = fvnone;
3303 if (structdef == stagseen)
3304 structdef = snone;
3305 break;
3306 case '[':
3307 if (definedef != dnone)
3308 break;
3309 if (structdef == stagseen)
3310 structdef = snone;
3311 switch (typdef)
3313 case ttypeseen:
3314 case tend:
3315 typdef = tignore;
3316 make_C_tag (FALSE); /* a typedef */
3317 break;
3318 case tnone:
3319 case tinbody:
3320 switch (fvdef)
3322 case foperator:
3323 case finlist:
3324 case fignore:
3325 case vignore:
3326 break;
3327 case fvnameseen:
3328 if ((members && cblev == 1)
3329 || (globals && cblev == 0
3330 && (!fvextern || declarations)))
3331 make_C_tag (FALSE); /* a variable */
3332 /* FALLTHRU */
3333 default:
3334 fvdef = fvnone;
3336 break;
3338 break;
3339 case '(':
3340 if (definedef != dnone)
3341 break;
3342 if (objdef == otagseen && parlev == 0)
3343 objdef = oparenseen;
3344 switch (fvdef)
3346 case fvnameseen:
3347 if (typdef == ttypeseen
3348 && *lp != '*'
3349 && !instruct)
3351 /* This handles constructs like:
3352 typedef void OperatorFun (int fun); */
3353 make_C_tag (FALSE);
3354 typdef = tignore;
3355 fvdef = fignore;
3356 break;
3358 /* FALLTHRU */
3359 case foperator:
3360 fvdef = fstartlist;
3361 break;
3362 case flistseen:
3363 fvdef = finlist;
3364 break;
3366 parlev++;
3367 break;
3368 case ')':
3369 if (definedef != dnone)
3370 break;
3371 if (objdef == ocatseen && parlev == 1)
3373 make_C_tag (TRUE); /* an Objective C category */
3374 objdef = oignore;
3376 if (--parlev == 0)
3378 switch (fvdef)
3380 case fstartlist:
3381 case finlist:
3382 fvdef = flistseen;
3383 break;
3385 if (!instruct
3386 && (typdef == tend
3387 || typdef == ttypeseen))
3389 typdef = tignore;
3390 make_C_tag (FALSE); /* a typedef */
3393 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3394 parlev = 0;
3395 break;
3396 case '{':
3397 if (definedef != dnone)
3398 break;
3399 if (typdef == ttypeseen)
3401 /* Whenever typdef is set to tinbody (currently only
3402 here), typdefcblev should be set to cblev. */
3403 typdef = tinbody;
3404 typdefcblev = cblev;
3406 switch (fvdef)
3408 case flistseen:
3409 make_C_tag (TRUE); /* a function */
3410 /* FALLTHRU */
3411 case fignore:
3412 fvdef = fvnone;
3413 break;
3414 case fvnone:
3415 switch (objdef)
3417 case otagseen:
3418 make_C_tag (TRUE); /* an Objective C class */
3419 objdef = oignore;
3420 break;
3421 case omethodtag:
3422 case omethodparm:
3423 make_C_tag (TRUE); /* an Objective C method */
3424 objdef = oinbody;
3425 break;
3426 default:
3427 /* Neutralize `extern "C" {' grot. */
3428 if (cblev == 0 && structdef == snone && nestlev == 0
3429 && typdef == tnone)
3430 cblev = -1;
3433 switch (structdef)
3435 case skeyseen: /* unnamed struct */
3436 pushclass_above (cblev, NULL, 0);
3437 structdef = snone;
3438 break;
3439 case stagseen: /* named struct or enum */
3440 case scolonseen: /* a class */
3441 pushclass_above (cblev, token.line+token.offset, token.length);
3442 structdef = snone;
3443 make_C_tag (FALSE); /* a struct or enum */
3444 break;
3446 cblev++;
3447 break;
3448 case '*':
3449 if (definedef != dnone)
3450 break;
3451 if (fvdef == fstartlist)
3452 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3453 break;
3454 case '}':
3455 if (definedef != dnone)
3456 break;
3457 if (!noindentypedefs && lp == newlb.buffer + 1)
3459 cblev = 0; /* reset curly brace level if first column */
3460 parlev = 0; /* also reset paren level, just in case... */
3462 else if (cblev > 0)
3463 cblev--;
3464 popclass_above (cblev);
3465 structdef = snone;
3466 /* Only if typdef == tinbody is typdefcblev significant. */
3467 if (typdef == tinbody && cblev <= typdefcblev)
3469 assert (cblev == typdefcblev);
3470 typdef = tend;
3472 break;
3473 case '=':
3474 if (definedef != dnone)
3475 break;
3476 switch (fvdef)
3478 case foperator:
3479 case finlist:
3480 case fignore:
3481 case vignore:
3482 break;
3483 case fvnameseen:
3484 if ((members && cblev == 1)
3485 || (globals && cblev == 0 && (!fvextern || declarations)))
3486 make_C_tag (FALSE); /* a variable */
3487 /* FALLTHRU */
3488 default:
3489 fvdef = vignore;
3491 break;
3492 case '<':
3493 if (cplpl && structdef == stagseen)
3495 structdef = sintemplate;
3496 break;
3498 goto resetfvdef;
3499 case '>':
3500 if (structdef == sintemplate)
3502 structdef = stagseen;
3503 break;
3505 goto resetfvdef;
3506 case '+':
3507 case '-':
3508 if (objdef == oinbody && cblev == 0)
3510 objdef = omethodsign;
3511 break;
3513 /* FALLTHRU */
3514 resetfvdef:
3515 case '#': case '~': case '&': case '%': case '/': case '|':
3516 case '^': case '!': case '.': case '?': case ']':
3517 if (definedef != dnone)
3518 break;
3519 /* These surely cannot follow a function tag in C. */
3520 switch (fvdef)
3522 case foperator:
3523 case finlist:
3524 case fignore:
3525 case vignore:
3526 break;
3527 default:
3528 fvdef = fvnone;
3530 break;
3531 case '\0':
3532 if (objdef == otagseen)
3534 make_C_tag (TRUE); /* an Objective C class */
3535 objdef = oignore;
3537 /* If a macro spans multiple lines don't reset its state. */
3538 if (quotednl)
3539 CNL_SAVE_DEFINEDEF ();
3540 else
3541 CNL ();
3542 break;
3543 } /* switch (c) */
3545 } /* while not eof */
3547 free (token_name.buffer);
3548 free (lbs[0].lb.buffer);
3549 free (lbs[1].lb.buffer);
3553 * Process either a C++ file or a C file depending on the setting
3554 * of a global flag.
3556 static void
3557 default_C_entries (inf)
3558 FILE *inf;
3560 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3563 /* Always do plain C. */
3564 static void
3565 plain_C_entries (inf)
3566 FILE *inf;
3568 C_entries (0, inf);
3571 /* Always do C++. */
3572 static void
3573 Cplusplus_entries (inf)
3574 FILE *inf;
3576 C_entries (C_PLPL, inf);
3579 /* Always do Java. */
3580 static void
3581 Cjava_entries (inf)
3582 FILE *inf;
3584 C_entries (C_JAVA, inf);
3587 /* Always do C*. */
3588 static void
3589 Cstar_entries (inf)
3590 FILE *inf;
3592 C_entries (C_STAR, inf);
3595 /* Always do Yacc. */
3596 static void
3597 Yacc_entries (inf)
3598 FILE *inf;
3600 C_entries (YACC, inf);
3604 /* Useful macros. */
3605 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3606 for (lineno = charno = 0; /* loop initialization */ \
3607 !feof (file_pointer) /* loop test */ \
3608 && (lineno++, /* instructions at start of loop */ \
3609 linecharno = charno, \
3610 charno += readline (&line_buffer, file_pointer), \
3611 char_pointer = lb.buffer, \
3612 TRUE); \
3614 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */ \
3615 (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
3616 && notinname ((cp)[sizeof(keyword)-1]) /* end of keyword */ \
3617 && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
3620 * Read a file, but do no processing. This is used to do regexp
3621 * matching on files that have no language defined.
3623 static void
3624 just_read_file (inf)
3625 FILE *inf;
3627 register char *dummy;
3629 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3630 continue;
3634 /* Fortran parsing */
3636 static void F_takeprec __P((void));
3637 static void F_getit __P((FILE *));
3639 static void
3640 F_takeprec ()
3642 dbp = skip_spaces (dbp);
3643 if (*dbp != '*')
3644 return;
3645 dbp++;
3646 dbp = skip_spaces (dbp);
3647 if (strneq (dbp, "(*)", 3))
3649 dbp += 3;
3650 return;
3652 if (!ISDIGIT (*dbp))
3654 --dbp; /* force failure */
3655 return;
3658 dbp++;
3659 while (ISDIGIT (*dbp));
3662 static void
3663 F_getit (inf)
3664 FILE *inf;
3666 register char *cp;
3668 dbp = skip_spaces (dbp);
3669 if (*dbp == '\0')
3671 lineno++;
3672 linecharno = charno;
3673 charno += readline (&lb, inf);
3674 dbp = lb.buffer;
3675 if (dbp[5] != '&')
3676 return;
3677 dbp += 6;
3678 dbp = skip_spaces (dbp);
3680 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3681 return;
3682 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3683 continue;
3684 pfnote (savenstr (dbp, cp-dbp), TRUE,
3685 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3689 static void
3690 Fortran_functions (inf)
3691 FILE *inf;
3693 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3695 if (*dbp == '%')
3696 dbp++; /* Ratfor escape to fortran */
3697 dbp = skip_spaces (dbp);
3698 if (*dbp == '\0')
3699 continue;
3700 switch (lowcase (*dbp))
3702 case 'i':
3703 if (nocase_tail ("integer"))
3704 F_takeprec ();
3705 break;
3706 case 'r':
3707 if (nocase_tail ("real"))
3708 F_takeprec ();
3709 break;
3710 case 'l':
3711 if (nocase_tail ("logical"))
3712 F_takeprec ();
3713 break;
3714 case 'c':
3715 if (nocase_tail ("complex") || nocase_tail ("character"))
3716 F_takeprec ();
3717 break;
3718 case 'd':
3719 if (nocase_tail ("double"))
3721 dbp = skip_spaces (dbp);
3722 if (*dbp == '\0')
3723 continue;
3724 if (nocase_tail ("precision"))
3725 break;
3726 continue;
3728 break;
3730 dbp = skip_spaces (dbp);
3731 if (*dbp == '\0')
3732 continue;
3733 switch (lowcase (*dbp))
3735 case 'f':
3736 if (nocase_tail ("function"))
3737 F_getit (inf);
3738 continue;
3739 case 's':
3740 if (nocase_tail ("subroutine"))
3741 F_getit (inf);
3742 continue;
3743 case 'e':
3744 if (nocase_tail ("entry"))
3745 F_getit (inf);
3746 continue;
3747 case 'b':
3748 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
3750 dbp = skip_spaces (dbp);
3751 if (*dbp == '\0') /* assume un-named */
3752 pfnote (savestr ("blockdata"), TRUE,
3753 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3754 else
3755 F_getit (inf); /* look for name */
3757 continue;
3764 * Ada parsing
3765 * Original code by
3766 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3769 static void Ada_getit __P((FILE *, char *));
3771 /* Once we are positioned after an "interesting" keyword, let's get
3772 the real tag value necessary. */
3773 static void
3774 Ada_getit (inf, name_qualifier)
3775 FILE *inf;
3776 char *name_qualifier;
3778 register char *cp;
3779 char *name;
3780 char c;
3782 while (!feof (inf))
3784 dbp = skip_spaces (dbp);
3785 if (*dbp == '\0'
3786 || (dbp[0] == '-' && dbp[1] == '-'))
3788 lineno++;
3789 linecharno = charno;
3790 charno += readline (&lb, inf);
3791 dbp = lb.buffer;
3793 switch (lowcase(*dbp))
3795 case 'b':
3796 if (nocase_tail ("body"))
3798 /* Skipping body of procedure body or package body or ....
3799 resetting qualifier to body instead of spec. */
3800 name_qualifier = "/b";
3801 continue;
3803 break;
3804 case 't':
3805 /* Skipping type of task type or protected type ... */
3806 if (nocase_tail ("type"))
3807 continue;
3808 break;
3810 if (*dbp == '"')
3812 dbp += 1;
3813 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3814 continue;
3816 else
3818 dbp = skip_spaces (dbp);
3819 for (cp = dbp;
3820 (*cp != '\0'
3821 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
3822 cp++)
3823 continue;
3824 if (cp == dbp)
3825 return;
3827 c = *cp;
3828 *cp = '\0';
3829 name = concat (dbp, name_qualifier, "");
3830 *cp = c;
3831 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3832 if (c == '"')
3833 dbp = cp + 1;
3834 return;
3838 static void
3839 Ada_funcs (inf)
3840 FILE *inf;
3842 bool inquote = FALSE;
3844 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3846 while (*dbp != '\0')
3848 /* Skip a string i.e. "abcd". */
3849 if (inquote || (*dbp == '"'))
3851 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3852 if (dbp != NULL)
3854 inquote = FALSE;
3855 dbp += 1;
3856 continue; /* advance char */
3858 else
3860 inquote = TRUE;
3861 break; /* advance line */
3865 /* Skip comments. */
3866 if (dbp[0] == '-' && dbp[1] == '-')
3867 break; /* advance line */
3869 /* Skip character enclosed in single quote i.e. 'a'
3870 and skip single quote starting an attribute i.e. 'Image. */
3871 if (*dbp == '\'')
3873 dbp++ ;
3874 if (*dbp != '\0')
3875 dbp++;
3876 continue;
3879 /* Search for beginning of a token. */
3880 if (!begtoken (*dbp))
3882 dbp++;
3883 continue; /* advance char */
3886 /* We are at the beginning of a token. */
3887 switch (lowcase(*dbp))
3889 case 'f':
3890 if (!packages_only && nocase_tail ("function"))
3891 Ada_getit (inf, "/f");
3892 else
3893 break; /* from switch */
3894 continue; /* advance char */
3895 case 'p':
3896 if (!packages_only && nocase_tail ("procedure"))
3897 Ada_getit (inf, "/p");
3898 else if (nocase_tail ("package"))
3899 Ada_getit (inf, "/s");
3900 else if (nocase_tail ("protected")) /* protected type */
3901 Ada_getit (inf, "/t");
3902 else
3903 break; /* from switch */
3904 continue; /* advance char */
3905 case 't':
3906 if (!packages_only && nocase_tail ("task"))
3907 Ada_getit (inf, "/k");
3908 else if (typedefs && !packages_only && nocase_tail ("type"))
3910 Ada_getit (inf, "/t");
3911 while (*dbp != '\0')
3912 dbp += 1;
3914 else
3915 break; /* from switch */
3916 continue; /* advance char */
3919 /* Look for the end of the token. */
3920 while (!endtoken (*dbp))
3921 dbp++;
3923 } /* advance char */
3924 } /* advance line */
3929 * Unix and microcontroller assembly tag handling
3930 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
3931 * Idea by Bob Weiner, Motorola Inc. (1994)
3933 static void
3934 Asm_labels (inf)
3935 FILE *inf;
3937 register char *cp;
3939 LOOP_ON_INPUT_LINES (inf, lb, cp)
3941 /* If first char is alphabetic or one of [_.$], test for colon
3942 following identifier. */
3943 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3945 /* Read past label. */
3946 cp++;
3947 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3948 cp++;
3949 if (*cp == ':' || iswhite (*cp))
3951 /* Found end of label, so copy it and add it to the table. */
3952 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3953 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3961 * Perl support
3962 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
3963 * Perl variable names: /^(my|local).../
3964 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
3965 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
3966 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
3968 static void
3969 Perl_functions (inf)
3970 FILE *inf;
3972 char *package = savestr ("main"); /* current package name */
3973 register char *cp;
3975 LOOP_ON_INPUT_LINES (inf, lb, cp)
3977 skip_spaces(cp);
3979 if (LOOKING_AT (cp, "package"))
3981 free (package);
3982 package = get_tag (cp);
3983 if (package == NULL) /* can't parse package name */
3984 package = savestr ("");
3985 else
3986 package = savestr(package); /* make a copy */
3988 else if (LOOKING_AT (cp, "sub"))
3990 char *name, *fullname, *pos;
3991 char *sp = cp;
3993 while (!notinname (*cp))
3994 cp++;
3995 if (cp == sp)
3996 continue;
3997 name = savenstr (sp, cp-sp);
3998 if ((pos = etags_strchr (name, ':')) != NULL && pos[1] == ':')
3999 fullname = name;
4000 else
4001 fullname = concat (package, "::", name);
4002 pfnote (fullname, TRUE,
4003 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4004 if (name != fullname)
4005 free (name);
4007 else if (globals /* only if tagging global vars is enabled */
4008 && (LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local")))
4010 /* After "my" or "local", but before any following paren or space. */
4011 char *varname = NULL;
4013 if (*cp == '$' || *cp == '@' || *cp == '%')
4015 char* varstart = ++cp;
4016 while (ISALNUM (*cp) || *cp == '_')
4017 cp++;
4018 varname = savenstr (varstart, cp-varstart);
4020 else
4022 /* Should be examining a variable list at this point;
4023 could insist on seeing an open parenthesis. */
4024 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4025 cp++;
4028 /* Perhaps I should back cp up one character, so the TAGS table
4029 doesn't mention (and so depend upon) the following char. */
4030 pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
4031 FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4038 * Python support
4039 * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4040 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4042 static void
4043 Python_functions (inf)
4044 FILE *inf;
4046 register char *cp;
4048 LOOP_ON_INPUT_LINES (inf, lb, cp)
4049 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4051 while (!notinname (*cp) && *cp != ':')
4052 cp++;
4053 pfnote (NULL, TRUE,
4054 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4060 * PHP support
4061 * Look for:
4062 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4063 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4064 * - /^[ \t]*define\(\"[^\"]+/
4065 * Only with --members:
4066 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4067 * Idea by Diez B. Roggisch (2001)
4069 static void
4070 PHP_functions (inf)
4071 FILE *inf;
4073 register char *cp;
4074 bool search_identifier = FALSE;
4076 LOOP_ON_INPUT_LINES (inf, lb, cp)
4078 cp = skip_spaces (cp);
4079 if (search_identifier
4080 && *cp != '\0')
4082 while (!notinname (*cp))
4083 cp++;
4084 pfnote (NULL, TRUE,
4085 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4086 search_identifier = FALSE;
4088 else if (LOOKING_AT (cp, "function"))
4090 if(*cp == '&')
4091 cp = skip_spaces (cp+1);
4092 if(*cp != '\0')
4094 while (!notinname (*cp))
4095 cp++;
4096 pfnote (NULL, TRUE,
4097 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4099 else
4100 search_identifier = TRUE;
4102 else if (LOOKING_AT (cp, "class"))
4104 if (*cp != '\0')
4106 while (*cp != '\0' && !iswhite (*cp))
4107 cp++;
4108 pfnote (NULL, FALSE,
4109 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4111 else
4112 search_identifier = TRUE;
4114 else if (strneq (cp, "define", 6)
4115 && (cp = skip_spaces (cp+6))
4116 && *cp++ == '('
4117 && (*cp == '"' || *cp == '\''))
4119 char quote = *cp++;
4120 while (*cp != quote && *cp != '\0')
4121 cp++;
4122 pfnote (NULL, FALSE,
4123 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4125 else if (members
4126 && LOOKING_AT (cp, "var")
4127 && *cp == '$')
4129 while (!notinname(*cp))
4130 cp++;
4131 pfnote (NULL, FALSE,
4132 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4139 * Cobol tag functions
4140 * We could look for anything that could be a paragraph name.
4141 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4142 * Idea by Corny de Souza (1993)
4144 static void
4145 Cobol_paragraphs (inf)
4146 FILE *inf;
4148 register char *bp, *ep;
4150 LOOP_ON_INPUT_LINES (inf, lb, bp)
4152 if (lb.len < 9)
4153 continue;
4154 bp += 8;
4156 /* If eoln, compiler option or comment ignore whole line. */
4157 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4158 continue;
4160 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4161 continue;
4162 if (*ep++ == '.')
4163 pfnote (savenstr (bp, ep-bp), TRUE,
4164 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4170 * Makefile support
4171 * Idea by Assar Westerlund <assar@sics.se> (2001)
4173 static void
4174 Makefile_targets (inf)
4175 FILE *inf;
4177 register char *bp;
4179 LOOP_ON_INPUT_LINES (inf, lb, bp)
4181 if (*bp == '\t' || *bp == '#')
4182 continue;
4183 while (*bp != '\0' && *bp != '=' && *bp != ':')
4184 bp++;
4185 if (*bp == ':')
4186 pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4187 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4193 * Pascal parsing
4194 * Original code by Mosur K. Mohan (1989)
4196 * Locates tags for procedures & functions. Doesn't do any type- or
4197 * var-definitions. It does look for the keyword "extern" or
4198 * "forward" immediately following the procedure statement; if found,
4199 * the tag is skipped.
4201 static void
4202 Pascal_functions (inf)
4203 FILE *inf;
4205 linebuffer tline; /* mostly copied from C_entries */
4206 long save_lcno;
4207 int save_lineno, save_len;
4208 char c, *cp, *namebuf;
4210 bool /* each of these flags is TRUE iff: */
4211 incomment, /* point is inside a comment */
4212 inquote, /* point is inside '..' string */
4213 get_tagname, /* point is after PROCEDURE/FUNCTION
4214 keyword, so next item = potential tag */
4215 found_tag, /* point is after a potential tag */
4216 inparms, /* point is within parameter-list */
4217 verify_tag; /* point has passed the parm-list, so the
4218 next token will determine whether this
4219 is a FORWARD/EXTERN to be ignored, or
4220 whether it is a real tag */
4222 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4223 namebuf = NULL; /* keep compiler quiet */
4224 lineno = 0;
4225 charno = 0;
4226 dbp = lb.buffer;
4227 *dbp = '\0';
4228 initbuffer (&tline);
4230 incomment = inquote = FALSE;
4231 found_tag = FALSE; /* have a proc name; check if extern */
4232 get_tagname = FALSE; /* have found "procedure" keyword */
4233 inparms = FALSE; /* found '(' after "proc" */
4234 verify_tag = FALSE; /* check if "extern" is ahead */
4237 while (!feof (inf)) /* long main loop to get next char */
4239 c = *dbp++;
4240 if (c == '\0') /* if end of line */
4242 lineno++;
4243 linecharno = charno;
4244 charno += readline (&lb, inf);
4245 dbp = lb.buffer;
4246 if (*dbp == '\0')
4247 continue;
4248 if (!((found_tag && verify_tag)
4249 || get_tagname))
4250 c = *dbp++; /* only if don't need *dbp pointing
4251 to the beginning of the name of
4252 the procedure or function */
4254 if (incomment)
4256 if (c == '}') /* within { } comments */
4257 incomment = FALSE;
4258 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4260 dbp++;
4261 incomment = FALSE;
4263 continue;
4265 else if (inquote)
4267 if (c == '\'')
4268 inquote = FALSE;
4269 continue;
4271 else
4272 switch (c)
4274 case '\'':
4275 inquote = TRUE; /* found first quote */
4276 continue;
4277 case '{': /* found open { comment */
4278 incomment = TRUE;
4279 continue;
4280 case '(':
4281 if (*dbp == '*') /* found open (* comment */
4283 incomment = TRUE;
4284 dbp++;
4286 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4287 inparms = TRUE;
4288 continue;
4289 case ')': /* end of parms list */
4290 if (inparms)
4291 inparms = FALSE;
4292 continue;
4293 case ';':
4294 if (found_tag && !inparms) /* end of proc or fn stmt */
4296 verify_tag = TRUE;
4297 break;
4299 continue;
4301 if (found_tag && verify_tag && (*dbp != ' '))
4303 /* check if this is an "extern" declaration */
4304 if (*dbp == '\0')
4305 continue;
4306 if (lowcase (*dbp == 'e'))
4308 if (nocase_tail ("extern")) /* superfluous, really! */
4310 found_tag = FALSE;
4311 verify_tag = FALSE;
4314 else if (lowcase (*dbp) == 'f')
4316 if (nocase_tail ("forward")) /* check for forward reference */
4318 found_tag = FALSE;
4319 verify_tag = FALSE;
4322 if (found_tag && verify_tag) /* not external proc, so make tag */
4324 found_tag = FALSE;
4325 verify_tag = FALSE;
4326 pfnote (namebuf, TRUE,
4327 tline.buffer, save_len, save_lineno, save_lcno);
4328 continue;
4331 if (get_tagname) /* grab name of proc or fn */
4333 if (*dbp == '\0')
4334 continue;
4336 /* save all values for later tagging */
4337 linebuffer_setlen (&tline, lb.len);
4338 strcpy (tline.buffer, lb.buffer);
4339 save_lineno = lineno;
4340 save_lcno = linecharno;
4342 /* grab block name */
4343 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4344 continue;
4345 namebuf = savenstr (dbp, cp-dbp);
4346 dbp = cp; /* set dbp to e-o-token */
4347 save_len = dbp - lb.buffer + 1;
4348 get_tagname = FALSE;
4349 found_tag = TRUE;
4350 continue;
4352 /* and proceed to check for "extern" */
4354 else if (!incomment && !inquote && !found_tag)
4356 /* check for proc/fn keywords */
4357 switch (lowcase (c))
4359 case 'p':
4360 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4361 get_tagname = TRUE;
4362 continue;
4363 case 'f':
4364 if (nocase_tail ("unction"))
4365 get_tagname = TRUE;
4366 continue;
4369 } /* while not eof */
4371 free (tline.buffer);
4376 * Lisp tag functions
4377 * look for (def or (DEF, quote or QUOTE
4380 static void L_getit __P((void));
4382 static void
4383 L_getit ()
4385 if (*dbp == '\'') /* Skip prefix quote */
4386 dbp++;
4387 else if (*dbp == '(')
4389 dbp++;
4390 /* Try to skip "(quote " */
4391 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4392 /* Ok, then skip "(" before name in (defstruct (foo)) */
4393 dbp = skip_spaces (dbp);
4395 get_tag (dbp);
4398 static void
4399 Lisp_functions (inf)
4400 FILE *inf;
4402 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4404 if (dbp[0] != '(')
4405 continue;
4407 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4409 dbp = skip_non_spaces (dbp);
4410 dbp = skip_spaces (dbp);
4411 L_getit ();
4413 else
4415 /* Check for (foo::defmumble name-defined ... */
4417 dbp++;
4418 while (!notinname (*dbp) && *dbp != ':');
4419 if (*dbp == ':')
4422 dbp++;
4423 while (*dbp == ':');
4425 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4427 dbp = skip_non_spaces (dbp);
4428 dbp = skip_spaces (dbp);
4429 L_getit ();
4438 * Postscript tag functions
4439 * Just look for lines where the first character is '/'
4440 * Also look at "defineps" for PSWrap
4441 * Ideas by:
4442 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4443 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4445 static void
4446 Postscript_functions (inf)
4447 FILE *inf;
4449 register char *bp, *ep;
4451 LOOP_ON_INPUT_LINES (inf, lb, bp)
4453 if (bp[0] == '/')
4455 for (ep = bp+1;
4456 *ep != '\0' && *ep != ' ' && *ep != '{';
4457 ep++)
4458 continue;
4459 pfnote (savenstr (bp, ep-bp), TRUE,
4460 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4462 else if (LOOKING_AT (bp, "defineps"))
4463 get_tag (bp);
4469 * Scheme tag functions
4470 * look for (def... xyzzy
4471 * (def... (xyzzy
4472 * (def ... ((...(xyzzy ....
4473 * (set! xyzzy
4474 * Original code by Ken Haase (1985?)
4477 static void
4478 Scheme_functions (inf)
4479 FILE *inf;
4481 register char *bp;
4483 LOOP_ON_INPUT_LINES (inf, lb, bp)
4485 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4487 bp = skip_non_spaces (bp+4);
4488 /* Skip over open parens and white space */
4489 while (notinname (*bp))
4490 bp++;
4491 get_tag (bp);
4493 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4494 get_tag (bp);
4499 /* Find tags in TeX and LaTeX input files. */
4501 /* TEX_toktab is a table of TeX control sequences that define tags.
4502 Each TEX_tabent records one such control sequence.
4503 CONVERT THIS TO USE THE Stab TYPE!! */
4504 struct TEX_tabent
4506 char *name;
4507 int len;
4510 struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
4512 /* Default set of control sequences to put into TEX_toktab.
4513 The value of environment var TEXTAGS is prepended to this. */
4515 char *TEX_defenv = "\
4516 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4517 :part:appendix:entry:index";
4519 static void TEX_mode __P((FILE *));
4520 static struct TEX_tabent *TEX_decode_env __P((char *, char *));
4521 static int TEX_Token __P((char *));
4523 char TEX_esc = '\\';
4524 char TEX_opgrp = '{';
4525 char TEX_clgrp = '}';
4528 * TeX/LaTeX scanning loop.
4530 static void
4531 TeX_commands (inf)
4532 FILE *inf;
4534 char *cp, *lasthit;
4535 register int i;
4537 /* Select either \ or ! as escape character. */
4538 TEX_mode (inf);
4540 /* Initialize token table once from environment. */
4541 if (!TEX_toktab)
4542 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4544 LOOP_ON_INPUT_LINES (inf, lb, cp)
4546 lasthit = cp;
4547 /* Look at each esc in line. */
4548 while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4550 if (*++cp == '\0')
4551 break;
4552 linecharno += cp - lasthit;
4553 lasthit = cp;
4554 i = TEX_Token (lasthit);
4555 if (i >= 0)
4557 /* We seem to include the TeX command in the tag name.
4558 register char *p;
4559 for (p = lasthit + TEX_toktab[i].len;
4560 *p != '\0' && *p != TEX_clgrp;
4561 p++)
4562 continue; */
4563 pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
4564 lb.buffer, lb.len, lineno, linecharno);
4565 break; /* We only tag a line once */
4571 #define TEX_LESC '\\'
4572 #define TEX_SESC '!'
4573 #define TEX_cmt '%'
4575 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4576 chars accordingly. */
4577 static void
4578 TEX_mode (inf)
4579 FILE *inf;
4581 int c;
4583 while ((c = getc (inf)) != EOF)
4585 /* Skip to next line if we hit the TeX comment char. */
4586 if (c == TEX_cmt)
4587 while (c != '\n')
4588 c = getc (inf);
4589 else if (c == TEX_LESC || c == TEX_SESC )
4590 break;
4593 if (c == TEX_LESC)
4595 TEX_esc = TEX_LESC;
4596 TEX_opgrp = '{';
4597 TEX_clgrp = '}';
4599 else
4601 TEX_esc = TEX_SESC;
4602 TEX_opgrp = '<';
4603 TEX_clgrp = '>';
4605 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4606 No attempt is made to correct the situation. */
4607 rewind (inf);
4610 /* Read environment and prepend it to the default string.
4611 Build token table. */
4612 static struct TEX_tabent *
4613 TEX_decode_env (evarname, defenv)
4614 char *evarname;
4615 char *defenv;
4617 register char *env, *p;
4619 struct TEX_tabent *tab;
4620 int size, i;
4622 /* Append default string to environment. */
4623 env = getenv (evarname);
4624 if (!env)
4625 env = defenv;
4626 else
4628 char *oldenv = env;
4629 env = concat (oldenv, defenv, "");
4632 /* Allocate a token table */
4633 for (size = 1, p = env; p;)
4634 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4635 size++;
4636 /* Add 1 to leave room for null terminator. */
4637 tab = xnew (size + 1, struct TEX_tabent);
4639 /* Unpack environment string into token table. Be careful about */
4640 /* zero-length strings (leading ':', "::" and trailing ':') */
4641 for (i = 0; *env;)
4643 p = etags_strchr (env, ':');
4644 if (!p) /* End of environment string. */
4645 p = env + strlen (env);
4646 if (p - env > 0)
4647 { /* Only non-zero strings. */
4648 tab[i].name = savenstr (env, p - env);
4649 tab[i].len = strlen (tab[i].name);
4650 i++;
4652 if (*p)
4653 env = p + 1;
4654 else
4656 tab[i].name = NULL; /* Mark end of table. */
4657 tab[i].len = 0;
4658 break;
4661 return tab;
4664 /* If the text at CP matches one of the tag-defining TeX command names,
4665 return the pointer to the first occurrence of that command in TEX_toktab.
4666 Otherwise return -1.
4667 Keep the capital `T' in `token' for dumb truncating compilers
4668 (this distinguishes it from `TEX_toktab' */
4669 static int
4670 TEX_Token (cp)
4671 char *cp;
4673 int i;
4675 for (i = 0; TEX_toktab[i].len > 0; i++)
4676 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4677 return i;
4678 return -1;
4682 /* Texinfo support. Dave Love, Mar. 2000. */
4683 static void
4684 Texinfo_nodes (inf)
4685 FILE * inf;
4687 char *cp, *start;
4688 LOOP_ON_INPUT_LINES (inf, lb, cp)
4689 if (LOOKING_AT (cp, "@node"))
4691 start = cp;
4692 while (*cp != '\0' && *cp != ',')
4693 cp++;
4694 pfnote (savenstr (start, cp - start), TRUE,
4695 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4701 * Prolog support
4703 * Assumes that the predicate or rule starts at column 0.
4704 * Only the first clause of a predicate or rule is added.
4705 * Original code by Sunichirou Sugou (1989)
4706 * Rewritten by Anders Lindgren (1996)
4708 static int prolog_pr __P((char *, char *));
4709 static void prolog_skip_comment __P((linebuffer *, FILE *));
4710 static int prolog_atom __P((char *, int));
4712 static void
4713 Prolog_functions (inf)
4714 FILE *inf;
4716 char *cp, *last;
4717 int len;
4718 int allocated;
4720 allocated = 0;
4721 len = 0;
4722 last = NULL;
4724 LOOP_ON_INPUT_LINES (inf, lb, cp)
4726 if (cp[0] == '\0') /* Empty line */
4727 continue;
4728 else if (iswhite (cp[0])) /* Not a predicate */
4729 continue;
4730 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4731 prolog_skip_comment (&lb, inf);
4732 else if ((len = prolog_pr (cp, last)) > 0)
4734 /* Predicate or rule. Store the function name so that we
4735 only generate a tag for the first clause. */
4736 if (last == NULL)
4737 last = xnew(len + 1, char);
4738 else if (len + 1 > allocated)
4739 xrnew (last, len + 1, char);
4740 allocated = len + 1;
4741 strncpy (last, cp, len);
4742 last[len] = '\0';
4748 static void
4749 prolog_skip_comment (plb, inf)
4750 linebuffer *plb;
4751 FILE *inf;
4753 char *cp;
4757 for (cp = plb->buffer; *cp != '\0'; cp++)
4758 if (cp[0] == '*' && cp[1] == '/')
4759 return;
4760 lineno++;
4761 linecharno += readline (plb, inf);
4763 while (!feof(inf));
4767 * A predicate or rule definition is added if it matches:
4768 * <beginning of line><Prolog Atom><whitespace>(
4769 * or <beginning of line><Prolog Atom><whitespace>:-
4771 * It is added to the tags database if it doesn't match the
4772 * name of the previous clause header.
4774 * Return the size of the name of the predicate or rule, or 0 if no
4775 * header was found.
4777 static int
4778 prolog_pr (s, last)
4779 char *s;
4780 char *last; /* Name of last clause. */
4782 int pos;
4783 int len;
4785 pos = prolog_atom (s, 0);
4786 if (pos < 1)
4787 return 0;
4789 len = pos;
4790 pos = skip_spaces (s + pos) - s;
4792 if ((s[pos] == '.'
4793 || (s[pos] == '(' && (pos += 1))
4794 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
4795 && (last == NULL /* save only the first clause */
4796 || len != strlen (last)
4797 || !strneq (s, last, len)))
4799 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4800 return len;
4802 else
4803 return 0;
4807 * Consume a Prolog atom.
4808 * Return the number of bytes consumed, or -1 if there was an error.
4810 * A prolog atom, in this context, could be one of:
4811 * - An alphanumeric sequence, starting with a lower case letter.
4812 * - A quoted arbitrary string. Single quotes can escape themselves.
4813 * Backslash quotes everything.
4815 static int
4816 prolog_atom (s, pos)
4817 char *s;
4818 int pos;
4820 int origpos;
4822 origpos = pos;
4824 if (ISLOWER(s[pos]) || (s[pos] == '_'))
4826 /* The atom is unquoted. */
4827 pos++;
4828 while (ISALNUM(s[pos]) || (s[pos] == '_'))
4830 pos++;
4832 return pos - origpos;
4834 else if (s[pos] == '\'')
4836 pos++;
4838 while (1)
4840 if (s[pos] == '\'')
4842 pos++;
4843 if (s[pos] != '\'')
4844 break;
4845 pos++; /* A double quote */
4847 else if (s[pos] == '\0')
4848 /* Multiline quoted atoms are ignored. */
4849 return -1;
4850 else if (s[pos] == '\\')
4852 if (s[pos+1] == '\0')
4853 return -1;
4854 pos += 2;
4856 else
4857 pos++;
4859 return pos - origpos;
4861 else
4862 return -1;
4867 * Support for Erlang
4869 * Generates tags for functions, defines, and records.
4870 * Assumes that Erlang functions start at column 0.
4871 * Original code by Anders Lindgren (1996)
4873 static int erlang_func __P((char *, char *));
4874 static void erlang_attribute __P((char *));
4875 static int erlang_atom __P((char *, int));
4877 static void
4878 Erlang_functions (inf)
4879 FILE *inf;
4881 char *cp, *last;
4882 int len;
4883 int allocated;
4885 allocated = 0;
4886 len = 0;
4887 last = NULL;
4889 LOOP_ON_INPUT_LINES (inf, lb, cp)
4891 if (cp[0] == '\0') /* Empty line */
4892 continue;
4893 else if (iswhite (cp[0])) /* Not function nor attribute */
4894 continue;
4895 else if (cp[0] == '%') /* comment */
4896 continue;
4897 else if (cp[0] == '"') /* Sometimes, strings start in column one */
4898 continue;
4899 else if (cp[0] == '-') /* attribute, e.g. "-define" */
4901 erlang_attribute (cp);
4902 last = NULL;
4904 else if ((len = erlang_func (cp, last)) > 0)
4907 * Function. Store the function name so that we only
4908 * generates a tag for the first clause.
4910 if (last == NULL)
4911 last = xnew (len + 1, char);
4912 else if (len + 1 > allocated)
4913 xrnew (last, len + 1, char);
4914 allocated = len + 1;
4915 strncpy (last, cp, len);
4916 last[len] = '\0';
4923 * A function definition is added if it matches:
4924 * <beginning of line><Erlang Atom><whitespace>(
4926 * It is added to the tags database if it doesn't match the
4927 * name of the previous clause header.
4929 * Return the size of the name of the function, or 0 if no function
4930 * was found.
4932 static int
4933 erlang_func (s, last)
4934 char *s;
4935 char *last; /* Name of last clause. */
4937 int pos;
4938 int len;
4940 pos = erlang_atom (s, 0);
4941 if (pos < 1)
4942 return 0;
4944 len = pos;
4945 pos = skip_spaces (s + pos) - s;
4947 /* Save only the first clause. */
4948 if (s[pos++] == '('
4949 && (last == NULL
4950 || len != (int)strlen (last)
4951 || !strneq (s, last, len)))
4953 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4954 return len;
4957 return 0;
4962 * Handle attributes. Currently, tags are generated for defines
4963 * and records.
4965 * They are on the form:
4966 * -define(foo, bar).
4967 * -define(Foo(M, N), M+N).
4968 * -record(graph, {vtab = notable, cyclic = true}).
4970 static void
4971 erlang_attribute (s)
4972 char *s;
4974 int pos;
4975 int len;
4977 if (LOOKING_AT (s, "-define") || LOOKING_AT (s, "-record"))
4979 if (s[pos++] == '(')
4981 pos = skip_spaces (s + pos) - s;
4982 len = erlang_atom (s, pos);
4983 if (len != 0)
4984 pfnote (savenstr (& s[pos], len), TRUE,
4985 s, pos + len, lineno, linecharno);
4988 return;
4993 * Consume an Erlang atom (or variable).
4994 * Return the number of bytes consumed, or -1 if there was an error.
4996 static int
4997 erlang_atom (s, pos)
4998 char *s;
4999 int pos;
5001 int origpos;
5003 origpos = pos;
5005 if (ISALPHA (s[pos]) || s[pos] == '_')
5007 /* The atom is unquoted. */
5008 pos++;
5009 while (ISALNUM (s[pos]) || s[pos] == '_')
5010 pos++;
5011 return pos - origpos;
5013 else if (s[pos] == '\'')
5015 pos++;
5017 while (1)
5019 if (s[pos] == '\'')
5021 pos++;
5022 break;
5024 else if (s[pos] == '\0')
5025 /* Multiline quoted atoms are ignored. */
5026 return -1;
5027 else if (s[pos] == '\\')
5029 if (s[pos+1] == '\0')
5030 return -1;
5031 pos += 2;
5033 else
5034 pos++;
5036 return pos - origpos;
5038 else
5039 return -1;
5043 #ifdef ETAGS_REGEXPS
5045 static char *scan_separators __P((char *));
5046 static void analyse_regex __P((char *, bool));
5047 static void add_regex __P((char *, bool, language *));
5048 static char *substitute __P((char *, char *, struct re_registers *));
5050 /* Take a string like "/blah/" and turn it into "blah", making sure
5051 that the first and last characters are the same, and handling
5052 quoted separator characters. Actually, stops on the occurrence of
5053 an unquoted separator. Also turns "\t" into a Tab character.
5054 Returns pointer to terminating separator. Works in place. Null
5055 terminates name string. */
5056 static char *
5057 scan_separators (name)
5058 char *name;
5060 char sep = name[0];
5061 char *copyto = name;
5062 bool quoted = FALSE;
5064 for (++name; *name != '\0'; ++name)
5066 if (quoted)
5068 if (*name == 't')
5069 *copyto++ = '\t';
5070 else if (*name == sep)
5071 *copyto++ = sep;
5072 else
5074 /* Something else is quoted, so preserve the quote. */
5075 *copyto++ = '\\';
5076 *copyto++ = *name;
5078 quoted = FALSE;
5080 else if (*name == '\\')
5081 quoted = TRUE;
5082 else if (*name == sep)
5083 break;
5084 else
5085 *copyto++ = *name;
5088 /* Terminate copied string. */
5089 *copyto = '\0';
5090 return name;
5093 /* Look at the argument of --regex or --no-regex and do the right
5094 thing. Same for each line of a regexp file. */
5095 static void
5096 analyse_regex (regex_arg, ignore_case)
5097 char *regex_arg;
5098 bool ignore_case;
5100 if (regex_arg == NULL)
5102 free_patterns (); /* --no-regex: remove existing regexps */
5103 return;
5106 /* A real --regexp option or a line in a regexp file. */
5107 switch (regex_arg[0])
5109 /* Comments in regexp file or null arg to --regex. */
5110 case '\0':
5111 case ' ':
5112 case '\t':
5113 break;
5115 /* Read a regex file. This is recursive and may result in a
5116 loop, which will stop when the file descriptors are exhausted. */
5117 case '@':
5119 FILE *regexfp;
5120 linebuffer regexbuf;
5121 char *regexfile = regex_arg + 1;
5123 /* regexfile is a file containing regexps, one per line. */
5124 regexfp = fopen (regexfile, "r");
5125 if (regexfp == NULL)
5127 pfatal (regexfile);
5128 return;
5130 initbuffer (&regexbuf);
5131 while (readline_internal (&regexbuf, regexfp) > 0)
5132 analyse_regex (regexbuf.buffer, ignore_case);
5133 free (regexbuf.buffer);
5134 fclose (regexfp);
5136 break;
5138 /* Regexp to be used for a specific language only. */
5139 case '{':
5141 language *lang;
5142 char *lang_name = regex_arg + 1;
5143 char *cp;
5145 for (cp = lang_name; *cp != '}'; cp++)
5146 if (*cp == '\0')
5148 error ("unterminated language name in regex: %s", regex_arg);
5149 return;
5151 *cp = '\0';
5152 lang = get_language_from_langname (lang_name);
5153 if (lang == NULL)
5154 return;
5155 add_regex (cp + 1, ignore_case, lang);
5157 break;
5159 /* Regexp to be used for any language. */
5160 default:
5161 add_regex (regex_arg, ignore_case, NULL);
5162 break;
5166 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5167 expression, into a real regular expression by compiling it. */
5168 static void
5169 add_regex (regexp_pattern, ignore_case, lang)
5170 char *regexp_pattern;
5171 bool ignore_case;
5172 language *lang;
5174 static struct re_pattern_buffer zeropattern;
5175 char *name;
5176 const char *err;
5177 struct re_pattern_buffer *patbuf;
5178 pattern *pp;
5181 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
5183 error ("%s: unterminated regexp", regexp_pattern);
5184 return;
5186 name = scan_separators (regexp_pattern);
5187 if (regexp_pattern[0] == '\0')
5189 error ("null regexp", (char *)NULL);
5190 return;
5192 (void) scan_separators (name);
5194 patbuf = xnew (1, struct re_pattern_buffer);
5195 *patbuf = zeropattern;
5196 if (ignore_case)
5197 patbuf->translate = lc_trans; /* translation table to fold case */
5199 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
5200 if (err != NULL)
5202 error ("%s while compiling pattern", err);
5203 return;
5206 pp = p_head;
5207 p_head = xnew (1, pattern);
5208 p_head->regex = savestr (regexp_pattern);
5209 p_head->p_next = pp;
5210 p_head->lang = lang;
5211 p_head->pat = patbuf;
5212 p_head->name_pattern = savestr (name);
5213 p_head->error_signaled = FALSE;
5217 * Do the substitutions indicated by the regular expression and
5218 * arguments.
5220 static char *
5221 substitute (in, out, regs)
5222 char *in, *out;
5223 struct re_registers *regs;
5225 char *result, *t;
5226 int size, dig, diglen;
5228 result = NULL;
5229 size = strlen (out);
5231 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5232 if (out[size - 1] == '\\')
5233 fatal ("pattern error in \"%s\"", out);
5234 for (t = etags_strchr (out, '\\');
5235 t != NULL;
5236 t = etags_strchr (t + 2, '\\'))
5237 if (ISDIGIT (t[1]))
5239 dig = t[1] - '0';
5240 diglen = regs->end[dig] - regs->start[dig];
5241 size += diglen - 2;
5243 else
5244 size -= 1;
5246 /* Allocate space and do the substitutions. */
5247 result = xnew (size + 1, char);
5249 for (t = result; *out != '\0'; out++)
5250 if (*out == '\\' && ISDIGIT (*++out))
5252 dig = *out - '0';
5253 diglen = regs->end[dig] - regs->start[dig];
5254 strncpy (t, in + regs->start[dig], diglen);
5255 t += diglen;
5257 else
5258 *t++ = *out;
5259 *t = '\0';
5261 assert (t <= result + size && t - result == (int)strlen (result));
5263 return result;
5266 /* Deallocate all patterns. */
5267 static void
5268 free_patterns ()
5270 pattern *pp;
5271 while (p_head != NULL)
5273 pp = p_head->p_next;
5274 free (p_head->regex);
5275 free (p_head->name_pattern);
5276 free (p_head);
5277 p_head = pp;
5279 return;
5281 #endif /* ETAGS_REGEXPS */
5284 static bool
5285 nocase_tail (cp)
5286 char *cp;
5288 register int len = 0;
5290 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5291 cp++, len++;
5292 if (*cp == '\0' && !intoken (dbp[len]))
5294 dbp += len;
5295 return TRUE;
5297 return FALSE;
5300 static char *
5301 get_tag (bp)
5302 register char *bp;
5304 register char *cp, *name;
5306 if (*bp == '\0')
5307 return NULL;
5308 /* Go till you get to white space or a syntactic break */
5309 for (cp = bp + 1; !notinname (*cp); cp++)
5310 continue;
5311 name = savenstr (bp, cp-bp);
5312 pfnote (name, TRUE,
5313 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5314 return name;
5317 /* Initialize a linebuffer for use */
5318 static void
5319 initbuffer (lbp)
5320 linebuffer *lbp;
5322 lbp->size = (DEBUG) ? 3 : 200;
5323 lbp->buffer = xnew (lbp->size, char);
5324 lbp->buffer[0] = '\0';
5325 lbp->len = 0;
5329 * Read a line of text from `stream' into `lbp', excluding the
5330 * newline or CR-NL, if any. Return the number of characters read from
5331 * `stream', which is the length of the line including the newline.
5333 * On DOS or Windows we do not count the CR character, if any, before the
5334 * NL, in the returned length; this mirrors the behavior of emacs on those
5335 * platforms (for text files, it translates CR-NL to NL as it reads in the
5336 * file).
5338 static long
5339 readline_internal (lbp, stream)
5340 linebuffer *lbp;
5341 register FILE *stream;
5343 char *buffer = lbp->buffer;
5344 register char *p = lbp->buffer;
5345 register char *pend;
5346 int chars_deleted;
5348 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5350 while (1)
5352 register int c = getc (stream);
5353 if (p == pend)
5355 /* We're at the end of linebuffer: expand it. */
5356 lbp->size *= 2;
5357 xrnew (buffer, lbp->size, char);
5358 p += buffer - lbp->buffer;
5359 pend = buffer + lbp->size;
5360 lbp->buffer = buffer;
5362 if (c == EOF)
5364 *p = '\0';
5365 chars_deleted = 0;
5366 break;
5368 if (c == '\n')
5370 if (p > buffer && p[-1] == '\r')
5372 p -= 1;
5373 #ifdef DOS_NT
5374 /* Assume CRLF->LF translation will be performed by Emacs
5375 when loading this file, so CRs won't appear in the buffer.
5376 It would be cleaner to compensate within Emacs;
5377 however, Emacs does not know how many CRs were deleted
5378 before any given point in the file. */
5379 chars_deleted = 1;
5380 #else
5381 chars_deleted = 2;
5382 #endif
5384 else
5386 chars_deleted = 1;
5388 *p = '\0';
5389 break;
5391 *p++ = c;
5393 lbp->len = p - buffer;
5395 return lbp->len + chars_deleted;
5399 * Like readline_internal, above, but in addition try to match the
5400 * input line against relevant regular expressions.
5402 static long
5403 readline (lbp, stream)
5404 linebuffer *lbp;
5405 FILE *stream;
5407 /* Read new line. */
5408 long result = readline_internal (lbp, stream);
5409 #ifdef ETAGS_REGEXPS
5410 int match;
5411 pattern *pp;
5413 /* Match against relevant patterns. */
5414 if (lbp->len > 0)
5415 for (pp = p_head; pp != NULL; pp = pp->p_next)
5417 /* Only use generic regexps or those for the current language. */
5418 if (pp->lang != NULL && pp->lang != curlang)
5419 continue;
5421 match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs);
5422 switch (match)
5424 case -2:
5425 /* Some error. */
5426 if (!pp->error_signaled)
5428 error ("error while matching \"%s\"", pp->regex);
5429 pp->error_signaled = TRUE;
5431 break;
5432 case -1:
5433 /* No match. */
5434 break;
5435 default:
5436 /* Match occurred. Construct a tag. */
5437 if (pp->name_pattern[0] != '\0')
5439 /* Make a named tag. */
5440 char *name = substitute (lbp->buffer,
5441 pp->name_pattern, &pp->regs);
5442 if (name != NULL)
5443 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5445 else
5447 /* Make an unnamed tag. */
5448 pfnote ((char *)NULL, TRUE,
5449 lbp->buffer, match, lineno, linecharno);
5451 break;
5454 #endif /* ETAGS_REGEXPS */
5456 return result;
5461 * Return a pointer to a space of size strlen(cp)+1 allocated
5462 * with xnew where the string CP has been copied.
5464 static char *
5465 savestr (cp)
5466 char *cp;
5468 return savenstr (cp, strlen (cp));
5472 * Return a pointer to a space of size LEN+1 allocated with xnew where
5473 * the string CP has been copied for at most the first LEN characters.
5475 static char *
5476 savenstr (cp, len)
5477 char *cp;
5478 int len;
5480 register char *dp;
5482 dp = xnew (len + 1, char);
5483 strncpy (dp, cp, len);
5484 dp[len] = '\0';
5485 return dp;
5489 * Return the ptr in sp at which the character c last
5490 * appears; NULL if not found
5492 * Identical to POSIX strrchr, included for portability.
5494 static char *
5495 etags_strrchr (sp, c)
5496 register const char *sp;
5497 register int c;
5499 register const char *r;
5501 r = NULL;
5504 if (*sp == c)
5505 r = sp;
5506 } while (*sp++);
5507 return (char *)r;
5512 * Return the ptr in sp at which the character c first
5513 * appears; NULL if not found
5515 * Identical to POSIX strchr, included for portability.
5517 static char *
5518 etags_strchr (sp, c)
5519 register const char *sp;
5520 register int c;
5524 if (*sp == c)
5525 return (char *)sp;
5526 } while (*sp++);
5527 return NULL;
5530 /* Skip spaces, return new pointer. */
5531 static char *
5532 skip_spaces (cp)
5533 char *cp;
5535 while (iswhite (*cp))
5536 cp++;
5537 return cp;
5540 /* Skip non spaces, return new pointer. */
5541 static char *
5542 skip_non_spaces (cp)
5543 char *cp;
5545 while (*cp != '\0' && !iswhite (*cp))
5546 cp++;
5547 return cp;
5550 /* Print error message and exit. */
5551 void
5552 fatal (s1, s2)
5553 char *s1, *s2;
5555 error (s1, s2);
5556 exit (BAD);
5559 static void
5560 pfatal (s1)
5561 char *s1;
5563 perror (s1);
5564 exit (BAD);
5567 static void
5568 suggest_asking_for_help ()
5570 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5571 progname,
5572 #ifdef LONG_OPTIONS
5573 "--help"
5574 #else
5575 "-h"
5576 #endif
5578 exit (BAD);
5581 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
5582 static void
5583 error (s1, s2)
5584 const char *s1, *s2;
5586 fprintf (stderr, "%s: ", progname);
5587 fprintf (stderr, s1, s2);
5588 fprintf (stderr, "\n");
5591 /* Return a newly-allocated string whose contents
5592 concatenate those of s1, s2, s3. */
5593 static char *
5594 concat (s1, s2, s3)
5595 char *s1, *s2, *s3;
5597 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5598 char *result = xnew (len1 + len2 + len3 + 1, char);
5600 strcpy (result, s1);
5601 strcpy (result + len1, s2);
5602 strcpy (result + len1 + len2, s3);
5603 result[len1 + len2 + len3] = '\0';
5605 return result;
5609 /* Does the same work as the system V getcwd, but does not need to
5610 guess the buffer size in advance. */
5611 static char *
5612 etags_getcwd ()
5614 #ifdef HAVE_GETCWD
5615 int bufsize = 200;
5616 char *path = xnew (bufsize, char);
5618 while (getcwd (path, bufsize) == NULL)
5620 if (errno != ERANGE)
5621 pfatal ("getcwd");
5622 bufsize *= 2;
5623 free (path);
5624 path = xnew (bufsize, char);
5627 canonicalize_filename (path);
5628 return path;
5630 #else /* not HAVE_GETCWD */
5631 #if MSDOS
5633 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
5635 getwd (path);
5637 for (p = path; *p != '\0'; p++)
5638 if (*p == '\\')
5639 *p = '/';
5640 else
5641 *p = lowcase (*p);
5643 return strdup (path);
5644 #else /* not MSDOS */
5645 linebuffer path;
5646 FILE *pipe;
5648 initbuffer (&path);
5649 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5650 if (pipe == NULL || readline_internal (&path, pipe) == 0)
5651 pfatal ("pwd");
5652 pclose (pipe);
5654 return path.buffer;
5655 #endif /* not MSDOS */
5656 #endif /* not HAVE_GETCWD */
5659 /* Return a newly allocated string containing the file name of FILE
5660 relative to the absolute directory DIR (which should end with a slash). */
5661 static char *
5662 relative_filename (file, dir)
5663 char *file, *dir;
5665 char *fp, *dp, *afn, *res;
5666 int i;
5668 /* Find the common root of file and dir (with a trailing slash). */
5669 afn = absolute_filename (file, cwd);
5670 fp = afn;
5671 dp = dir;
5672 while (*fp++ == *dp++)
5673 continue;
5674 fp--, dp--; /* back to the first differing char */
5675 #ifdef DOS_NT
5676 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5677 return afn;
5678 #endif
5679 do /* look at the equal chars until '/' */
5680 fp--, dp--;
5681 while (*fp != '/');
5683 /* Build a sequence of "../" strings for the resulting relative file name. */
5684 i = 0;
5685 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5686 i += 1;
5687 res = xnew (3*i + strlen (fp + 1) + 1, char);
5688 res[0] = '\0';
5689 while (i-- > 0)
5690 strcat (res, "../");
5692 /* Add the file name relative to the common root of file and dir. */
5693 strcat (res, fp + 1);
5694 free (afn);
5696 return res;
5699 /* Return a newly allocated string containing the absolute file name
5700 of FILE given DIR (which should end with a slash). */
5701 static char *
5702 absolute_filename (file, dir)
5703 char *file, *dir;
5705 char *slashp, *cp, *res;
5707 if (filename_is_absolute (file))
5708 res = savestr (file);
5709 #ifdef DOS_NT
5710 /* We don't support non-absolute file names with a drive
5711 letter, like `d:NAME' (it's too much hassle). */
5712 else if (file[1] == ':')
5713 fatal ("%s: relative file names with drive letters not supported", file);
5714 #endif
5715 else
5716 res = concat (dir, file, "");
5718 /* Delete the "/dirname/.." and "/." substrings. */
5719 slashp = etags_strchr (res, '/');
5720 while (slashp != NULL && slashp[0] != '\0')
5722 if (slashp[1] == '.')
5724 if (slashp[2] == '.'
5725 && (slashp[3] == '/' || slashp[3] == '\0'))
5727 cp = slashp;
5729 cp--;
5730 while (cp >= res && !filename_is_absolute (cp));
5731 if (cp < res)
5732 cp = slashp; /* the absolute name begins with "/.." */
5733 #ifdef DOS_NT
5734 /* Under MSDOS and NT we get `d:/NAME' as absolute
5735 file name, so the luser could say `d:/../NAME'.
5736 We silently treat this as `d:/NAME'. */
5737 else if (cp[0] != '/')
5738 cp = slashp;
5739 #endif
5740 strcpy (cp, slashp + 3);
5741 slashp = cp;
5742 continue;
5744 else if (slashp[2] == '/' || slashp[2] == '\0')
5746 strcpy (slashp, slashp + 2);
5747 continue;
5751 slashp = etags_strchr (slashp + 1, '/');
5754 if (res[0] == '\0')
5755 return savestr ("/");
5756 else
5757 return res;
5760 /* Return a newly allocated string containing the absolute
5761 file name of dir where FILE resides given DIR (which should
5762 end with a slash). */
5763 static char *
5764 absolute_dirname (file, dir)
5765 char *file, *dir;
5767 char *slashp, *res;
5768 char save;
5770 canonicalize_filename (file);
5771 slashp = etags_strrchr (file, '/');
5772 if (slashp == NULL)
5773 return savestr (dir);
5774 save = slashp[1];
5775 slashp[1] = '\0';
5776 res = absolute_filename (file, dir);
5777 slashp[1] = save;
5779 return res;
5782 /* Whether the argument string is an absolute file name. The argument
5783 string must have been canonicalized with canonicalize_filename. */
5784 static bool
5785 filename_is_absolute (fn)
5786 char *fn;
5788 return (fn[0] == '/'
5789 #ifdef DOS_NT
5790 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
5791 #endif
5795 /* Translate backslashes into slashes. Works in place. */
5796 static void
5797 canonicalize_filename (fn)
5798 register char *fn;
5800 #ifdef DOS_NT
5801 /* Canonicalize drive letter case. */
5802 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
5803 fn[0] = upcase (fn[0]);
5804 /* Convert backslashes to slashes. */
5805 for (; *fn != '\0'; fn++)
5806 if (*fn == '\\')
5807 *fn = '/';
5808 #else
5809 /* No action. */
5810 fn = NULL; /* shut up the compiler */
5811 #endif
5814 /* Set the minimum size of a string contained in a linebuffer. */
5815 static void
5816 linebuffer_setlen (lbp, toksize)
5817 linebuffer *lbp;
5818 int toksize;
5820 while (lbp->size <= toksize)
5822 lbp->size *= 2;
5823 xrnew (lbp->buffer, lbp->size, char);
5825 lbp->len = toksize;
5828 /* Like malloc but get fatal error if memory is exhausted. */
5830 xmalloc (size)
5831 unsigned int size;
5833 PTR result = (PTR) malloc (size);
5834 if (result == NULL)
5835 fatal ("virtual memory exhausted", (char *)NULL);
5836 return result;
5840 xrealloc (ptr, size)
5841 char *ptr;
5842 unsigned int size;
5844 PTR result = (PTR) realloc (ptr, size);
5845 if (result == NULL)
5846 fatal ("virtual memory exhausted", (char *)NULL);
5847 return result;
5851 * Local Variables:
5852 * c-indentation-style: gnu
5853 * indent-tabs-mode: t
5854 * tab-width: 8
5855 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer")
5856 * End: