(Managing Overlays): overlay-buffer returns nil for deleted overlays.
[emacs.git] / lib-src / etags.c
bloba06d29bfd1123a59b06f0969de31e21c143895e9
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002, 2004
3 Free Software Foundation, Inc. and Ken Arnold
5 This file is not considered part of GNU Emacs.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1992 Joseph B. Wells improved C and C++ parsing.
29 * 1993 Francesco Potortì reorganised C and C++.
30 * 1994 Line-by-line regexp tags by Tom Tromey.
31 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
32 * 2002 #line directives by Francesco Potortì.
34 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
38 * If you want to add support for a new language, start by looking at the LUA
39 * language, which is the simplest. Alternatively, consider shipping a
40 * configuration file containing regexp definitions for etags.
43 char pot_etags_version[] = "@(#) pot revision number is 17.5";
45 #define TRUE 1
46 #define FALSE 0
48 #ifdef DEBUG
49 # undef DEBUG
50 # define DEBUG TRUE
51 #else
52 # define DEBUG FALSE
53 # define NDEBUG /* disable assert */
54 #endif
56 #ifdef HAVE_CONFIG_H
57 # include <config.h>
58 /* On some systems, Emacs defines static as nothing for the sake
59 of unexec. We don't want that here since we don't use unexec. */
60 # undef static
61 # define ETAGS_REGEXPS /* use the regexp features */
62 # define LONG_OPTIONS /* accept long options */
63 # ifndef PTR /* for Xemacs */
64 # define PTR void *
65 # endif
66 # ifndef __P /* for Xemacs */
67 # define __P(args) args
68 # endif
69 #else /* no config.h */
70 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
71 # define __P(args) args /* use prototypes */
72 # define PTR void * /* for generic pointers */
73 # else /* not standard C */
74 # define __P(args) () /* no prototypes */
75 # define const /* remove const for old compilers' sake */
76 # define PTR long * /* don't use void* */
77 # endif
78 #endif /* !HAVE_CONFIG_H */
80 #ifndef _GNU_SOURCE
81 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
82 #endif
84 #ifdef LONG_OPTIONS
85 # undef LONG_OPTIONS
86 # define LONG_OPTIONS TRUE
87 #else
88 # define LONG_OPTIONS FALSE
89 #endif
91 /* WIN32_NATIVE is for Xemacs.
92 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
93 #ifdef WIN32_NATIVE
94 # undef MSDOS
95 # undef WINDOWSNT
96 # define WINDOWSNT
97 #endif /* WIN32_NATIVE */
99 #ifdef MSDOS
100 # undef MSDOS
101 # define MSDOS TRUE
102 # include <fcntl.h>
103 # include <sys/param.h>
104 # include <io.h>
105 # ifndef HAVE_CONFIG_H
106 # define DOS_NT
107 # include <sys/config.h>
108 # endif
109 #else
110 # define MSDOS FALSE
111 #endif /* MSDOS */
113 #ifdef WINDOWSNT
114 # include <stdlib.h>
115 # include <fcntl.h>
116 # include <string.h>
117 # include <direct.h>
118 # include <io.h>
119 # define MAXPATHLEN _MAX_PATH
120 # undef HAVE_NTGUI
121 # undef DOS_NT
122 # define DOS_NT
123 # ifndef HAVE_GETCWD
124 # define HAVE_GETCWD
125 # endif /* undef HAVE_GETCWD */
126 #else /* not WINDOWSNT */
127 # ifdef STDC_HEADERS
128 # include <stdlib.h>
129 # include <string.h>
130 # else /* no standard C headers */
131 extern char *getenv ();
132 # ifdef VMS
133 # define EXIT_SUCCESS 1
134 # define EXIT_FAILURE 0
135 # else /* no VMS */
136 # define EXIT_SUCCESS 0
137 # define EXIT_FAILURE 1
138 # endif
139 # endif
140 #endif /* !WINDOWSNT */
142 #ifdef HAVE_UNISTD_H
143 # include <unistd.h>
144 #else
145 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
146 extern char *getcwd (char *buf, size_t size);
147 # endif
148 #endif /* HAVE_UNISTD_H */
150 #include <stdio.h>
151 #include <ctype.h>
152 #include <errno.h>
153 #ifndef errno
154 extern int errno;
155 #endif
156 #include <sys/types.h>
157 #include <sys/stat.h>
159 #include <assert.h>
160 #ifdef NDEBUG
161 # undef assert /* some systems have a buggy assert.h */
162 # define assert(x) ((void) 0)
163 #endif
165 #if !defined (S_ISREG) && defined (S_IFREG)
166 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
167 #endif
169 #if LONG_OPTIONS
170 # include <getopt.h>
171 #else
172 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
173 extern char *optarg;
174 extern int optind, opterr;
175 #endif /* LONG_OPTIONS */
177 #ifdef ETAGS_REGEXPS
178 # ifndef HAVE_CONFIG_H /* this is a standalone compilation */
179 # ifdef __CYGWIN__ /* compiling on Cygwin */
180 !!! NOTICE !!!
181 the regex.h distributed with Cygwin is not compatible with etags, alas!
182 If you want regular expression support, you should delete this notice and
183 arrange to use the GNU regex.h and regex.c.
184 # endif
185 # endif
186 # include <regex.h>
187 #endif /* ETAGS_REGEXPS */
189 /* Define CTAGS to make the program "ctags" compatible with the usual one.
190 Leave it undefined to make the program "etags", which makes emacs-style
191 tag tables and tags typedefs, #defines and struct/union/enum by default. */
192 #ifdef CTAGS
193 # undef CTAGS
194 # define CTAGS TRUE
195 #else
196 # define CTAGS FALSE
197 #endif
199 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
200 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
201 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
202 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
204 #define CHARS 256 /* 2^sizeof(char) */
205 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
206 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
207 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
208 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
209 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
210 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
212 #define ISALNUM(c) isalnum (CHAR(c))
213 #define ISALPHA(c) isalpha (CHAR(c))
214 #define ISDIGIT(c) isdigit (CHAR(c))
215 #define ISLOWER(c) islower (CHAR(c))
217 #define lowcase(c) tolower (CHAR(c))
218 #define upcase(c) toupper (CHAR(c))
222 * xnew, xrnew -- allocate, reallocate storage
224 * SYNOPSIS: Type *xnew (int n, Type);
225 * void xrnew (OldPointer, int n, Type);
227 #if DEBUG
228 # include "chkmalloc.h"
229 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
230 (n) * sizeof (Type)))
231 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
232 (char *) (op), (n) * sizeof (Type)))
233 #else
234 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
235 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
236 (char *) (op), (n) * sizeof (Type)))
237 #endif
239 #define bool int
241 typedef void Lang_function __P((FILE *));
243 typedef struct
245 char *suffix; /* file name suffix for this compressor */
246 char *command; /* takes one arg and decompresses to stdout */
247 } compressor;
249 typedef struct
251 char *name; /* language name */
252 char *help; /* detailed help for the language */
253 Lang_function *function; /* parse function */
254 char **suffixes; /* name suffixes of this language's files */
255 char **filenames; /* names of this language's files */
256 char **interpreters; /* interpreters for this language */
257 bool metasource; /* source used to generate other sources */
258 } language;
260 typedef struct fdesc
262 struct fdesc *next; /* for the linked list */
263 char *infname; /* uncompressed input file name */
264 char *infabsname; /* absolute uncompressed input file name */
265 char *infabsdir; /* absolute dir of input file */
266 char *taggedfname; /* file name to write in tagfile */
267 language *lang; /* language of file */
268 char *prop; /* file properties to write in tagfile */
269 bool usecharno; /* etags tags shall contain char number */
270 bool written; /* entry written in the tags file */
271 } fdesc;
273 typedef struct node_st
274 { /* sorting structure */
275 struct node_st *left, *right; /* left and right sons */
276 fdesc *fdp; /* description of file to whom tag belongs */
277 char *name; /* tag name */
278 char *regex; /* search regexp */
279 bool valid; /* write this tag on the tag file */
280 bool is_func; /* function tag: use regexp in CTAGS mode */
281 bool been_warned; /* warning already given for duplicated tag */
282 int lno; /* line number tag is on */
283 long cno; /* character number line starts on */
284 } node;
287 * A `linebuffer' is a structure which holds a line of text.
288 * `readline_internal' reads a line from a stream into a linebuffer
289 * and works regardless of the length of the line.
290 * SIZE is the size of BUFFER, LEN is the length of the string in
291 * BUFFER after readline reads it.
293 typedef struct
295 long size;
296 int len;
297 char *buffer;
298 } linebuffer;
300 /* Used to support mixing of --lang and file names. */
301 typedef struct
303 enum {
304 at_language, /* a language specification */
305 at_regexp, /* a regular expression */
306 at_filename, /* a file name */
307 at_stdin, /* read from stdin here */
308 at_end /* stop parsing the list */
309 } arg_type; /* argument type */
310 language *lang; /* language associated with the argument */
311 char *what; /* the argument itself */
312 } argument;
314 #ifdef ETAGS_REGEXPS
315 /* Structure defining a regular expression. */
316 typedef struct regexp
318 struct regexp *p_next; /* pointer to next in list */
319 language *lang; /* if set, use only for this language */
320 char *pattern; /* the regexp pattern */
321 char *name; /* tag name */
322 struct re_pattern_buffer *pat; /* the compiled pattern */
323 struct re_registers regs; /* re registers */
324 bool error_signaled; /* already signaled for this regexp */
325 bool force_explicit_name; /* do not allow implict tag name */
326 bool ignore_case; /* ignore case when matching */
327 bool multi_line; /* do a multi-line match on the whole file */
328 } regexp;
329 #endif /* ETAGS_REGEXPS */
332 /* Many compilers barf on this:
333 Lang_function Ada_funcs;
334 so let's write it this way */
335 static void Ada_funcs __P((FILE *));
336 static void Asm_labels __P((FILE *));
337 static void C_entries __P((int c_ext, FILE *));
338 static void default_C_entries __P((FILE *));
339 static void plain_C_entries __P((FILE *));
340 static void Cjava_entries __P((FILE *));
341 static void Cobol_paragraphs __P((FILE *));
342 static void Cplusplus_entries __P((FILE *));
343 static void Cstar_entries __P((FILE *));
344 static void Erlang_functions __P((FILE *));
345 static void Fortran_functions __P((FILE *));
346 static void HTML_labels __P((FILE *));
347 static void Lisp_functions __P((FILE *));
348 static void Lua_functions __P((FILE *));
349 static void Makefile_targets __P((FILE *));
350 static void Pascal_functions __P((FILE *));
351 static void Perl_functions __P((FILE *));
352 static void PHP_functions __P((FILE *));
353 static void PS_functions __P((FILE *));
354 static void Prolog_functions __P((FILE *));
355 static void Python_functions __P((FILE *));
356 static void Scheme_functions __P((FILE *));
357 static void TeX_commands __P((FILE *));
358 static void Texinfo_nodes __P((FILE *));
359 static void Yacc_entries __P((FILE *));
360 static void just_read_file __P((FILE *));
362 static void print_language_names __P((void));
363 static void print_version __P((void));
364 static void print_help __P((argument *));
365 int main __P((int, char **));
367 static compressor *get_compressor_from_suffix __P((char *, char **));
368 static language *get_language_from_langname __P((const char *));
369 static language *get_language_from_interpreter __P((char *));
370 static language *get_language_from_filename __P((char *, bool));
371 static void readline __P((linebuffer *, FILE *));
372 static long readline_internal __P((linebuffer *, FILE *));
373 static bool nocase_tail __P((char *));
374 static void get_tag __P((char *, char **));
376 #ifdef ETAGS_REGEXPS
377 static void analyse_regex __P((char *));
378 static void free_regexps __P((void));
379 static void regex_tag_multiline __P((void));
380 #endif /* ETAGS_REGEXPS */
381 static void error __P((const char *, const char *));
382 static void suggest_asking_for_help __P((void));
383 void fatal __P((char *, char *));
384 static void pfatal __P((char *));
385 static void add_node __P((node *, node **));
387 static void init __P((void));
388 static void process_file_name __P((char *, language *));
389 static void process_file __P((FILE *, char *, language *));
390 static void find_entries __P((FILE *));
391 static void free_tree __P((node *));
392 static void free_fdesc __P((fdesc *));
393 static void pfnote __P((char *, bool, char *, int, int, long));
394 static void make_tag __P((char *, int, bool, char *, int, int, long));
395 static void invalidate_nodes __P((fdesc *, node **));
396 static void put_entries __P((node *));
398 static char *concat __P((char *, char *, char *));
399 static char *skip_spaces __P((char *));
400 static char *skip_non_spaces __P((char *));
401 static char *savenstr __P((char *, int));
402 static char *savestr __P((char *));
403 static char *etags_strchr __P((const char *, int));
404 static char *etags_strrchr __P((const char *, int));
405 static int etags_strcasecmp __P((const char *, const char *));
406 static int etags_strncasecmp __P((const char *, const char *, int));
407 static char *etags_getcwd __P((void));
408 static char *relative_filename __P((char *, char *));
409 static char *absolute_filename __P((char *, char *));
410 static char *absolute_dirname __P((char *, char *));
411 static bool filename_is_absolute __P((char *f));
412 static void canonicalize_filename __P((char *));
413 static void linebuffer_init __P((linebuffer *));
414 static void linebuffer_setlen __P((linebuffer *, int));
415 static PTR xmalloc __P((unsigned int));
416 static PTR xrealloc __P((char *, unsigned int));
419 static char searchar = '/'; /* use /.../ searches */
421 static char *tagfile; /* output file */
422 static char *progname; /* name this program was invoked with */
423 static char *cwd; /* current working directory */
424 static char *tagfiledir; /* directory of tagfile */
425 static FILE *tagf; /* ioptr for tags file */
427 static fdesc *fdhead; /* head of file description list */
428 static fdesc *curfdp; /* current file description */
429 static int lineno; /* line number of current line */
430 static long charno; /* current character number */
431 static long linecharno; /* charno of start of current line */
432 static char *dbp; /* pointer to start of current tag */
434 static const int invalidcharno = -1;
436 static node *nodehead; /* the head of the binary tree of tags */
437 static node *last_node; /* the last node created */
439 static linebuffer lb; /* the current line */
440 static linebuffer filebuf; /* a buffer containing the whole file */
441 static linebuffer token_name; /* a buffer containing a tag name */
443 /* boolean "functions" (see init) */
444 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
445 static char
446 /* white chars */
447 *white = " \f\t\n\r\v",
448 /* not in a name */
449 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
450 /* token ending chars */
451 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
452 /* token starting chars */
453 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
454 /* valid in-token chars */
455 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
457 static bool append_to_tagfile; /* -a: append to tags */
458 /* The next four default to TRUE for etags, but to FALSE for ctags. */
459 static bool typedefs; /* -t: create tags for C and Ada typedefs */
460 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
461 /* 0 struct/enum/union decls, and C++ */
462 /* member functions. */
463 static bool constantypedefs; /* -d: create tags for C #define, enum */
464 /* constants and variables. */
465 /* -D: opposite of -d. Default under ctags. */
466 static bool globals; /* create tags for global variables */
467 static bool declarations; /* --declarations: tag them and extern in C&Co*/
468 static bool members; /* create tags for C member variables */
469 static bool no_line_directive; /* ignore #line directives (undocumented) */
470 static bool update; /* -u: update tags */
471 static bool vgrind_style; /* -v: create vgrind style index output */
472 static bool no_warnings; /* -w: suppress warnings */
473 static bool cxref_style; /* -x: create cxref style output */
474 static bool cplusplus; /* .[hc] means C++, not C */
475 static bool ignoreindent; /* -I: ignore indentation in C */
476 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
478 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
479 static bool parsing_stdin; /* --parse-stdin used */
481 #ifdef ETAGS_REGEXPS
482 static regexp *p_head; /* list of all regexps */
483 static bool need_filebuf; /* some regexes are multi-line */
484 #else
485 # define need_filebuf FALSE
486 #endif /* ETAGS_REGEXPS */
488 #if LONG_OPTIONS
489 static struct option longopts[] =
491 { "packages-only", no_argument, &packages_only, TRUE },
492 { "c++", no_argument, NULL, 'C' },
493 { "declarations", no_argument, &declarations, TRUE },
494 { "no-line-directive", no_argument, &no_line_directive, TRUE },
495 { "help", no_argument, NULL, 'h' },
496 { "help", no_argument, NULL, 'H' },
497 { "ignore-indentation", no_argument, NULL, 'I' },
498 { "language", required_argument, NULL, 'l' },
499 { "members", no_argument, &members, TRUE },
500 { "no-members", no_argument, &members, FALSE },
501 { "output", required_argument, NULL, 'o' },
502 #ifdef ETAGS_REGEXPS
503 { "regex", required_argument, NULL, 'r' },
504 { "no-regex", no_argument, NULL, 'R' },
505 { "ignore-case-regex", required_argument, NULL, 'c' },
506 #endif /* ETAGS_REGEXPS */
507 { "parse-stdin", required_argument, NULL, STDIN },
508 { "version", no_argument, NULL, 'V' },
510 #if CTAGS /* Etags options */
511 { "backward-search", no_argument, NULL, 'B' },
512 { "cxref", no_argument, NULL, 'x' },
513 { "defines", no_argument, NULL, 'd' },
514 { "globals", no_argument, &globals, TRUE },
515 { "typedefs", no_argument, NULL, 't' },
516 { "typedefs-and-c++", no_argument, NULL, 'T' },
517 { "update", no_argument, NULL, 'u' },
518 { "vgrind", no_argument, NULL, 'v' },
519 { "no-warn", no_argument, NULL, 'w' },
521 #else /* Ctags options */
522 { "append", no_argument, NULL, 'a' },
523 { "no-defines", no_argument, NULL, 'D' },
524 { "no-globals", no_argument, &globals, FALSE },
525 { "include", required_argument, NULL, 'i' },
526 #endif
527 { NULL }
529 #endif /* LONG_OPTIONS */
531 static compressor compressors[] =
533 { "z", "gzip -d -c"},
534 { "Z", "gzip -d -c"},
535 { "gz", "gzip -d -c"},
536 { "GZ", "gzip -d -c"},
537 { "bz2", "bzip2 -d -c" },
538 { NULL }
542 * Language stuff.
545 /* Ada code */
546 static char *Ada_suffixes [] =
547 { "ads", "adb", "ada", NULL };
548 static char Ada_help [] =
549 "In Ada code, functions, procedures, packages, tasks and types are\n\
550 tags. Use the `--packages-only' option to create tags for\n\
551 packages only.\n\
552 Ada tag names have suffixes indicating the type of entity:\n\
553 Entity type: Qualifier:\n\
554 ------------ ----------\n\
555 function /f\n\
556 procedure /p\n\
557 package spec /s\n\
558 package body /b\n\
559 type /t\n\
560 task /k\n\
561 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
562 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
563 will just search for any tag `bidule'.";
565 /* Assembly code */
566 static char *Asm_suffixes [] =
567 { "a", /* Unix assembler */
568 "asm", /* Microcontroller assembly */
569 "def", /* BSO/Tasking definition includes */
570 "inc", /* Microcontroller include files */
571 "ins", /* Microcontroller include files */
572 "s", "sa", /* Unix assembler */
573 "S", /* cpp-processed Unix assembler */
574 "src", /* BSO/Tasking C compiler output */
575 NULL
577 static char Asm_help [] =
578 "In assembler code, labels appearing at the beginning of a line,\n\
579 followed by a colon, are tags.";
582 /* Note that .c and .h can be considered C++, if the --c++ flag was
583 given, or if the `class' or `template' keyowrds are met inside the file.
584 That is why default_C_entries is called for these. */
585 static char *default_C_suffixes [] =
586 { "c", "h", NULL };
587 static char default_C_help [] =
588 "In C code, any C function or typedef is a tag, and so are\n\
589 definitions of `struct', `union' and `enum'. `#define' macro\n\
590 definitions and `enum' constants are tags unless you specify\n\
591 `--no-defines'. Global variables are tags unless you specify\n\
592 `--no-globals'. Use of `--no-globals' and `--no-defines'\n\
593 can make the tags table file much smaller.\n\
594 You can tag function declarations and external variables by\n\
595 using `--declarations', and struct members by using `--members'.";
597 static char *Cplusplus_suffixes [] =
598 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
599 "M", /* Objective C++ */
600 "pdb", /* Postscript with C syntax */
601 NULL };
602 static char Cplusplus_help [] =
603 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
604 --help --lang=c --lang=c++ for full help.)\n\
605 In addition to C tags, member functions are also recognized, and\n\
606 optionally member variables if you use the `--members' option.\n\
607 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
608 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
609 `operator+'.";
611 static char *Cjava_suffixes [] =
612 { "java", NULL };
613 static char Cjava_help [] =
614 "In Java code, all the tags constructs of C and C++ code are\n\
615 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
618 static char *Cobol_suffixes [] =
619 { "COB", "cob", NULL };
620 static char Cobol_help [] =
621 "In Cobol code, tags are paragraph names; that is, any word\n\
622 starting in column 8 and followed by a period.";
624 static char *Cstar_suffixes [] =
625 { "cs", "hs", NULL };
627 static char *Erlang_suffixes [] =
628 { "erl", "hrl", NULL };
629 static char Erlang_help [] =
630 "In Erlang code, the tags are the functions, records and macros\n\
631 defined in the file.";
633 static char *Fortran_suffixes [] =
634 { "F", "f", "f90", "for", NULL };
635 static char Fortran_help [] =
636 "In Fortran code, functions, subroutines and block data are tags.";
638 static char *HTML_suffixes [] =
639 { "htm", "html", "shtml", NULL };
640 static char HTML_help [] =
641 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
642 `h3' headers. Also, tags are `name=' in anchors and all\n\
643 occurrences of `id='.";
645 static char *Lisp_suffixes [] =
646 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
647 static char Lisp_help [] =
648 "In Lisp code, any function defined with `defun', any variable\n\
649 defined with `defvar' or `defconst', and in general the first\n\
650 argument of any expression that starts with `(def' in column zero\n\
651 is a tag.";
653 static char *Lua_suffixes [] =
654 { "lua", "LUA", NULL };
655 static char Lua_help [] =
656 "In Lua scripts, all functions are tags.";
658 static char *Makefile_filenames [] =
659 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
660 static char Makefile_help [] =
661 "In makefiles, targets are tags; additionally, variables are tags\n\
662 unless you specify `--no-globals'.";
664 static char *Objc_suffixes [] =
665 { "lm", /* Objective lex file */
666 "m", /* Objective C file */
667 NULL };
668 static char Objc_help [] =
669 "In Objective C code, tags include Objective C definitions for classes,\n\
670 class categories, methods and protocols. Tags for variables and\n\
671 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
673 static char *Pascal_suffixes [] =
674 { "p", "pas", NULL };
675 static char Pascal_help [] =
676 "In Pascal code, the tags are the functions and procedures defined\n\
677 in the file.";
679 static char *Perl_suffixes [] =
680 { "pl", "pm", NULL };
681 static char *Perl_interpreters [] =
682 { "perl", "@PERL@", NULL };
683 static char Perl_help [] =
684 "In Perl code, the tags are the packages, subroutines and variables\n\
685 defined by the `package', `sub', `my' and `local' keywords. Use\n\
686 `--globals' if you want to tag global variables. Tags for\n\
687 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
688 defined in the default package is `main::SUB'.";
690 static char *PHP_suffixes [] =
691 { "php", "php3", "php4", NULL };
692 static char PHP_help [] =
693 "In PHP code, tags are functions, classes and defines. When using\n\
694 the `--members' option, vars are tags too.";
696 static char *plain_C_suffixes [] =
697 { "pc", /* Pro*C file */
698 NULL };
700 static char *PS_suffixes [] =
701 { "ps", "psw", NULL }; /* .psw is for PSWrap */
702 static char PS_help [] =
703 "In PostScript code, the tags are the functions.";
705 static char *Prolog_suffixes [] =
706 { "prolog", NULL };
707 static char Prolog_help [] =
708 "In Prolog code, tags are predicates and rules at the beginning of\n\
709 line.";
711 static char *Python_suffixes [] =
712 { "py", NULL };
713 static char Python_help [] =
714 "In Python code, `def' or `class' at the beginning of a line\n\
715 generate a tag.";
717 /* Can't do the `SCM' or `scm' prefix with a version number. */
718 static char *Scheme_suffixes [] =
719 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
720 static char Scheme_help [] =
721 "In Scheme code, tags include anything defined with `def' or with a\n\
722 construct whose name starts with `def'. They also include\n\
723 variables set with `set!' at top level in the file.";
725 static char *TeX_suffixes [] =
726 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
727 static char TeX_help [] =
728 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
729 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
730 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
731 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
732 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
734 Other commands can be specified by setting the environment variable\n\
735 `TEXTAGS' to a colon-separated list like, for example,\n\
736 TEXTAGS=\"mycommand:myothercommand\".";
739 static char *Texinfo_suffixes [] =
740 { "texi", "texinfo", "txi", NULL };
741 static char Texinfo_help [] =
742 "for texinfo files, lines starting with @node are tagged.";
744 static char *Yacc_suffixes [] =
745 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
746 static char Yacc_help [] =
747 "In Bison or Yacc input files, each rule defines as a tag the\n\
748 nonterminal it constructs. The portions of the file that contain\n\
749 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
750 for full help).";
752 static char auto_help [] =
753 "`auto' is not a real language, it indicates to use\n\
754 a default language for files base on file name suffix and file contents.";
756 static char none_help [] =
757 "`none' is not a real language, it indicates to only do\n\
758 regexp processing on files.";
760 static char no_lang_help [] =
761 "No detailed help available for this language.";
765 * Table of languages.
767 * It is ok for a given function to be listed under more than one
768 * name. I just didn't.
771 static language lang_names [] =
773 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
774 { "asm", Asm_help, Asm_labels, Asm_suffixes },
775 { "c", default_C_help, default_C_entries, default_C_suffixes },
776 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
777 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
778 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
779 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
780 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
781 { "html", HTML_help, HTML_labels, HTML_suffixes },
782 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
783 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
784 { "lua", Lua_help, Lua_functions, Lua_suffixes },
785 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
786 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
787 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
788 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
789 { "php", PHP_help, PHP_functions, PHP_suffixes },
790 { "postscript",PS_help, PS_functions, PS_suffixes },
791 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
792 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
793 { "python", Python_help, Python_functions, Python_suffixes },
794 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
795 { "tex", TeX_help, TeX_commands, TeX_suffixes },
796 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
797 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
798 { "auto", auto_help }, /* default guessing scheme */
799 { "none", none_help, just_read_file }, /* regexp matching only */
800 { NULL } /* end of list */
804 static void
805 print_language_names ()
807 language *lang;
808 char **name, **ext;
810 puts ("\nThese are the currently supported languages, along with the\n\
811 default file names and dot suffixes:");
812 for (lang = lang_names; lang->name != NULL; lang++)
814 printf (" %-*s", 10, lang->name);
815 if (lang->filenames != NULL)
816 for (name = lang->filenames; *name != NULL; name++)
817 printf (" %s", *name);
818 if (lang->suffixes != NULL)
819 for (ext = lang->suffixes; *ext != NULL; ext++)
820 printf (" .%s", *ext);
821 puts ("");
823 puts ("where `auto' means use default language for files based on file\n\
824 name suffix, and `none' means only do regexp processing on files.\n\
825 If no language is specified and no matching suffix is found,\n\
826 the first line of the file is read for a sharp-bang (#!) sequence\n\
827 followed by the name of an interpreter. If no such sequence is found,\n\
828 Fortran is tried first; if no tags are found, C is tried next.\n\
829 When parsing any C file, a \"class\" or \"template\" keyword\n\
830 switches to C++.");
831 puts ("Compressed files are supported using gzip and bzip2.\n\
833 For detailed help on a given language use, for example,\n\
834 etags --help --lang=ada.");
837 #ifndef EMACS_NAME
838 # define EMACS_NAME "standalone"
839 #endif
840 #ifndef VERSION
841 # define VERSION "version"
842 #endif
843 static void
844 print_version ()
846 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
847 puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
848 puts ("This program is distributed under the same terms as Emacs");
850 exit (EXIT_SUCCESS);
853 static void
854 print_help (argbuffer)
855 argument *argbuffer;
857 bool help_for_lang = FALSE;
859 for (; argbuffer->arg_type != at_end; argbuffer++)
860 if (argbuffer->arg_type == at_language)
862 if (help_for_lang)
863 puts ("");
864 puts (argbuffer->lang->help);
865 help_for_lang = TRUE;
868 if (help_for_lang)
869 exit (EXIT_SUCCESS);
871 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
873 These are the options accepted by %s.\n", progname, progname);
874 if (LONG_OPTIONS)
875 puts ("You may use unambiguous abbreviations for the long option names.");
876 else
877 puts ("Long option names do not work with this executable, as it is not\n\
878 linked with GNU getopt.");
879 puts (" A - as file name means read names from stdin (one per line).\n\
880 Absolute names are stored in the output file as they are.\n\
881 Relative ones are stored relative to the output file's directory.\n");
883 if (!CTAGS)
884 puts ("-a, --append\n\
885 Append tag entries to existing tags file.");
887 puts ("--packages-only\n\
888 For Ada files, only generate tags for packages.");
890 if (CTAGS)
891 puts ("-B, --backward-search\n\
892 Write the search commands for the tag entries using '?', the\n\
893 backward-search command instead of '/', the forward-search command.");
895 /* This option is mostly obsolete, because etags can now automatically
896 detect C++. Retained for backward compatibility and for debugging and
897 experimentation. In principle, we could want to tag as C++ even
898 before any "class" or "template" keyword.
899 puts ("-C, --c++\n\
900 Treat files whose name suffix defaults to C language as C++ files.");
903 puts ("--declarations\n\
904 In C and derived languages, create tags for function declarations,");
905 if (CTAGS)
906 puts ("\tand create tags for extern variables if --globals is used.");
907 else
908 puts
909 ("\tand create tags for extern variables unless --no-globals is used.");
911 if (CTAGS)
912 puts ("-d, --defines\n\
913 Create tag entries for C #define constants and enum constants, too.");
914 else
915 puts ("-D, --no-defines\n\
916 Don't create tag entries for C #define constants and enum constants.\n\
917 This makes the tags file smaller.");
919 if (!CTAGS)
920 puts ("-i FILE, --include=FILE\n\
921 Include a note in tag file indicating that, when searching for\n\
922 a tag, one should also consult the tags file FILE after\n\
923 checking the current file.");
925 puts ("-l LANG, --language=LANG\n\
926 Force the following files to be considered as written in the\n\
927 named language up to the next --language=LANG option.");
929 if (CTAGS)
930 puts ("--globals\n\
931 Create tag entries for global variables in some languages.");
932 else
933 puts ("--no-globals\n\
934 Do not create tag entries for global variables in some\n\
935 languages. This makes the tags file smaller.");
936 puts ("--members\n\
937 Create tag entries for members of structures in some languages.");
939 #ifdef ETAGS_REGEXPS
940 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
941 Make a tag for each line matching a regular expression pattern\n\
942 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
943 files only. REGEXFILE is a file containing one REGEXP per line.\n\
944 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
945 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
946 puts (" If TAGNAME/ is present, the tags created are named.\n\
947 For example Tcl named tags can be created with:\n\
948 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
949 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
950 `m' means to allow multi-line matches, `s' implies `m' and\n\
951 causes dot to match any character, including newline.");
952 puts ("-R, --no-regex\n\
953 Don't create tags from regexps for the following files.");
954 #endif /* ETAGS_REGEXPS */
955 puts ("-I, --ignore-indentation\n\
956 In C and C++ do not assume that a closing brace in the first\n\
957 column is the final brace of a function or structure definition.");
958 puts ("-o FILE, --output=FILE\n\
959 Write the tags to FILE.");
960 puts ("--parse-stdin=NAME\n\
961 Read from standard input and record tags as belonging to file NAME.");
963 if (CTAGS)
965 puts ("-t, --typedefs\n\
966 Generate tag entries for C and Ada typedefs.");
967 puts ("-T, --typedefs-and-c++\n\
968 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
969 and C++ member functions.");
972 if (CTAGS)
973 puts ("-u, --update\n\
974 Update the tag entries for the given files, leaving tag\n\
975 entries for other files in place. Currently, this is\n\
976 implemented by deleting the existing entries for the given\n\
977 files and then rewriting the new entries at the end of the\n\
978 tags file. It is often faster to simply rebuild the entire\n\
979 tag file than to use this.");
981 if (CTAGS)
983 puts ("-v, --vgrind\n\
984 Generates an index of items intended for human consumption,\n\
985 similar to the output of vgrind. The index is sorted, and\n\
986 gives the page number of each item.");
987 puts ("-w, --no-warn\n\
988 Suppress warning messages about entries defined in multiple\n\
989 files.");
990 puts ("-x, --cxref\n\
991 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
992 The output uses line numbers instead of page numbers, but\n\
993 beyond that the differences are cosmetic; try both to see\n\
994 which you like.");
997 puts ("-V, --version\n\
998 Print the version of the program.\n\
999 -h, --help\n\
1000 Print this help message.\n\
1001 Followed by one or more `--language' options prints detailed\n\
1002 help about tag generation for the specified languages.");
1004 print_language_names ();
1006 puts ("");
1007 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1009 exit (EXIT_SUCCESS);
1013 #ifdef VMS /* VMS specific functions */
1015 #define EOS '\0'
1017 /* This is a BUG! ANY arbitrary limit is a BUG!
1018 Won't someone please fix this? */
1019 #define MAX_FILE_SPEC_LEN 255
1020 typedef struct {
1021 short curlen;
1022 char body[MAX_FILE_SPEC_LEN + 1];
1023 } vspec;
1026 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1027 returning in each successive call the next file name matching the input
1028 spec. The function expects that each in_spec passed
1029 to it will be processed to completion; in particular, up to and
1030 including the call following that in which the last matching name
1031 is returned, the function ignores the value of in_spec, and will
1032 only start processing a new spec with the following call.
1033 If an error occurs, on return out_spec contains the value
1034 of in_spec when the error occurred.
1036 With each successive file name returned in out_spec, the
1037 function's return value is one. When there are no more matching
1038 names the function returns zero. If on the first call no file
1039 matches in_spec, or there is any other error, -1 is returned.
1042 #include <rmsdef.h>
1043 #include <descrip.h>
1044 #define OUTSIZE MAX_FILE_SPEC_LEN
1045 static short
1046 fn_exp (out, in)
1047 vspec *out;
1048 char *in;
1050 static long context = 0;
1051 static struct dsc$descriptor_s o;
1052 static struct dsc$descriptor_s i;
1053 static bool pass1 = TRUE;
1054 long status;
1055 short retval;
1057 if (pass1)
1059 pass1 = FALSE;
1060 o.dsc$a_pointer = (char *) out;
1061 o.dsc$w_length = (short)OUTSIZE;
1062 i.dsc$a_pointer = in;
1063 i.dsc$w_length = (short)strlen(in);
1064 i.dsc$b_dtype = DSC$K_DTYPE_T;
1065 i.dsc$b_class = DSC$K_CLASS_S;
1066 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1067 o.dsc$b_class = DSC$K_CLASS_VS;
1069 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1071 out->body[out->curlen] = EOS;
1072 return 1;
1074 else if (status == RMS$_NMF)
1075 retval = 0;
1076 else
1078 strcpy(out->body, in);
1079 retval = -1;
1081 lib$find_file_end(&context);
1082 pass1 = TRUE;
1083 return retval;
1087 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1088 name of each file specified by the provided arg expanding wildcards.
1090 static char *
1091 gfnames (arg, p_error)
1092 char *arg;
1093 bool *p_error;
1095 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1097 switch (fn_exp (&filename, arg))
1099 case 1:
1100 *p_error = FALSE;
1101 return filename.body;
1102 case 0:
1103 *p_error = FALSE;
1104 return NULL;
1105 default:
1106 *p_error = TRUE;
1107 return filename.body;
1111 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1112 system (cmd)
1113 char *cmd;
1115 error ("%s", "system() function not implemented under VMS");
1117 #endif
1119 #define VERSION_DELIM ';'
1120 char *massage_name (s)
1121 char *s;
1123 char *start = s;
1125 for ( ; *s; s++)
1126 if (*s == VERSION_DELIM)
1128 *s = EOS;
1129 break;
1131 else
1132 *s = lowcase (*s);
1133 return start;
1135 #endif /* VMS */
1139 main (argc, argv)
1140 int argc;
1141 char *argv[];
1143 int i;
1144 unsigned int nincluded_files;
1145 char **included_files;
1146 argument *argbuffer;
1147 int current_arg, file_count;
1148 linebuffer filename_lb;
1149 bool help_asked = FALSE;
1150 #ifdef VMS
1151 bool got_err;
1152 #endif
1153 char *optstring;
1154 int opt;
1157 #ifdef DOS_NT
1158 _fmode = O_BINARY; /* all of files are treated as binary files */
1159 #endif /* DOS_NT */
1161 progname = argv[0];
1162 nincluded_files = 0;
1163 included_files = xnew (argc, char *);
1164 current_arg = 0;
1165 file_count = 0;
1167 /* Allocate enough no matter what happens. Overkill, but each one
1168 is small. */
1169 argbuffer = xnew (argc, argument);
1172 * If etags, always find typedefs and structure tags. Why not?
1173 * Also default to find macro constants, enum constants and
1174 * global variables.
1176 if (!CTAGS)
1178 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1179 globals = TRUE;
1182 optstring = "-";
1183 #ifdef ETAGS_REGEXPS
1184 optstring = "-r:Rc:";
1185 #endif /* ETAGS_REGEXPS */
1186 if (!LONG_OPTIONS)
1187 optstring += 1;
1188 optstring = concat (optstring,
1189 "Cf:Il:o:SVhH",
1190 (CTAGS) ? "BxdtTuvw" : "aDi:");
1192 while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
1193 switch (opt)
1195 case 0:
1196 /* If getopt returns 0, then it has already processed a
1197 long-named option. We should do nothing. */
1198 break;
1200 case 1:
1201 /* This means that a file name has been seen. Record it. */
1202 argbuffer[current_arg].arg_type = at_filename;
1203 argbuffer[current_arg].what = optarg;
1204 ++current_arg;
1205 ++file_count;
1206 break;
1208 case STDIN:
1209 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1210 argbuffer[current_arg].arg_type = at_stdin;
1211 argbuffer[current_arg].what = optarg;
1212 ++current_arg;
1213 ++file_count;
1214 if (parsing_stdin)
1215 fatal ("cannot parse standard input more than once", (char *)NULL);
1216 parsing_stdin = TRUE;
1217 break;
1219 /* Common options. */
1220 case 'C': cplusplus = TRUE; break;
1221 case 'f': /* for compatibility with old makefiles */
1222 case 'o':
1223 if (tagfile)
1225 error ("-o option may only be given once.", (char *)NULL);
1226 suggest_asking_for_help ();
1227 /* NOTREACHED */
1229 tagfile = optarg;
1230 break;
1231 case 'I':
1232 case 'S': /* for backward compatibility */
1233 ignoreindent = TRUE;
1234 break;
1235 case 'l':
1237 language *lang = get_language_from_langname (optarg);
1238 if (lang != NULL)
1240 argbuffer[current_arg].lang = lang;
1241 argbuffer[current_arg].arg_type = at_language;
1242 ++current_arg;
1245 break;
1246 case 'c':
1247 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1248 optarg = concat (optarg, "i", ""); /* memory leak here */
1249 /* FALLTHRU */
1250 case 'r':
1251 argbuffer[current_arg].arg_type = at_regexp;
1252 argbuffer[current_arg].what = optarg;
1253 ++current_arg;
1254 break;
1255 case 'R':
1256 argbuffer[current_arg].arg_type = at_regexp;
1257 argbuffer[current_arg].what = NULL;
1258 ++current_arg;
1259 break;
1260 case 'V':
1261 print_version ();
1262 break;
1263 case 'h':
1264 case 'H':
1265 help_asked = TRUE;
1266 break;
1268 /* Etags options */
1269 case 'a': append_to_tagfile = TRUE; break;
1270 case 'D': constantypedefs = FALSE; break;
1271 case 'i': included_files[nincluded_files++] = optarg; break;
1273 /* Ctags options. */
1274 case 'B': searchar = '?'; break;
1275 case 'd': constantypedefs = TRUE; break;
1276 case 't': typedefs = TRUE; break;
1277 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1278 case 'u': update = TRUE; break;
1279 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1280 case 'x': cxref_style = TRUE; break;
1281 case 'w': no_warnings = TRUE; break;
1282 default:
1283 suggest_asking_for_help ();
1284 /* NOTREACHED */
1287 for (; optind < argc; optind++)
1289 argbuffer[current_arg].arg_type = at_filename;
1290 argbuffer[current_arg].what = argv[optind];
1291 ++current_arg;
1292 ++file_count;
1295 argbuffer[current_arg].arg_type = at_end;
1297 if (help_asked)
1298 print_help (argbuffer);
1299 /* NOTREACHED */
1301 if (nincluded_files == 0 && file_count == 0)
1303 error ("no input files specified.", (char *)NULL);
1304 suggest_asking_for_help ();
1305 /* NOTREACHED */
1308 if (tagfile == NULL)
1309 tagfile = CTAGS ? "tags" : "TAGS";
1310 cwd = etags_getcwd (); /* the current working directory */
1311 if (cwd[strlen (cwd) - 1] != '/')
1313 char *oldcwd = cwd;
1314 cwd = concat (oldcwd, "/", "");
1315 free (oldcwd);
1317 /* Relative file names are made relative to the current directory. */
1318 if (streq (tagfile, "-")
1319 || strneq (tagfile, "/dev/", 5))
1320 tagfiledir = cwd;
1321 else
1322 tagfiledir = absolute_dirname (tagfile, cwd);
1324 init (); /* set up boolean "functions" */
1326 linebuffer_init (&lb);
1327 linebuffer_init (&filename_lb);
1328 linebuffer_init (&filebuf);
1329 linebuffer_init (&token_name);
1331 if (!CTAGS)
1333 if (streq (tagfile, "-"))
1335 tagf = stdout;
1336 #ifdef DOS_NT
1337 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1338 doesn't take effect until after `stdout' is already open). */
1339 if (!isatty (fileno (stdout)))
1340 setmode (fileno (stdout), O_BINARY);
1341 #endif /* DOS_NT */
1343 else
1344 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1345 if (tagf == NULL)
1346 pfatal (tagfile);
1350 * Loop through files finding functions.
1352 for (i = 0; i < current_arg; i++)
1354 static language *lang; /* non-NULL if language is forced */
1355 char *this_file;
1357 switch (argbuffer[i].arg_type)
1359 case at_language:
1360 lang = argbuffer[i].lang;
1361 break;
1362 #ifdef ETAGS_REGEXPS
1363 case at_regexp:
1364 analyse_regex (argbuffer[i].what);
1365 break;
1366 #endif
1367 case at_filename:
1368 #ifdef VMS
1369 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1371 if (got_err)
1373 error ("can't find file %s\n", this_file);
1374 argc--, argv++;
1376 else
1378 this_file = massage_name (this_file);
1380 #else
1381 this_file = argbuffer[i].what;
1382 #endif
1383 /* Input file named "-" means read file names from stdin
1384 (one per line) and use them. */
1385 if (streq (this_file, "-"))
1387 if (parsing_stdin)
1388 fatal ("cannot parse standard input AND read file names from it",
1389 (char *)NULL);
1390 while (readline_internal (&filename_lb, stdin) > 0)
1391 process_file_name (filename_lb.buffer, lang);
1393 else
1394 process_file_name (this_file, lang);
1395 #ifdef VMS
1397 #endif
1398 break;
1399 case at_stdin:
1400 this_file = argbuffer[i].what;
1401 process_file (stdin, this_file, lang);
1402 break;
1406 #ifdef ETAGS_REGEXPS
1407 free_regexps ();
1408 #endif /* ETAGS_REGEXPS */
1409 free (lb.buffer);
1410 free (filebuf.buffer);
1411 free (token_name.buffer);
1413 if (!CTAGS || cxref_style)
1415 put_entries (nodehead); /* write the remainig tags (ETAGS) */
1416 free_tree (nodehead);
1417 nodehead = NULL;
1418 if (!CTAGS)
1420 fdesc *fdp;
1422 /* Output file entries that have no tags. */
1423 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1424 if (!fdp->written)
1425 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1427 while (nincluded_files-- > 0)
1428 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1431 if (fclose (tagf) == EOF)
1432 pfatal (tagfile);
1433 exit (EXIT_SUCCESS);
1436 if (update)
1438 char cmd[BUFSIZ];
1439 for (i = 0; i < current_arg; ++i)
1441 switch (argbuffer[i].arg_type)
1443 case at_filename:
1444 case at_stdin:
1445 break;
1446 default:
1447 continue; /* the for loop */
1449 sprintf (cmd,
1450 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1451 tagfile, argbuffer[i].what, tagfile);
1452 if (system (cmd) != EXIT_SUCCESS)
1453 fatal ("failed to execute shell command", (char *)NULL);
1455 append_to_tagfile = TRUE;
1458 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1459 if (tagf == NULL)
1460 pfatal (tagfile);
1461 put_entries (nodehead); /* write all the tags (CTAGS) */
1462 free_tree (nodehead);
1463 nodehead = NULL;
1464 if (fclose (tagf) == EOF)
1465 pfatal (tagfile);
1467 if (update)
1469 char cmd[2*BUFSIZ+10];
1470 sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1471 exit (system (cmd));
1473 return EXIT_SUCCESS;
1478 * Return a compressor given the file name. If EXTPTR is non-zero,
1479 * return a pointer into FILE where the compressor-specific
1480 * extension begins. If no compressor is found, NULL is returned
1481 * and EXTPTR is not significant.
1482 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1484 static compressor *
1485 get_compressor_from_suffix (file, extptr)
1486 char *file;
1487 char **extptr;
1489 compressor *compr;
1490 char *slash, *suffix;
1492 /* This relies on FN to be after canonicalize_filename,
1493 so we don't need to consider backslashes on DOS_NT. */
1494 slash = etags_strrchr (file, '/');
1495 suffix = etags_strrchr (file, '.');
1496 if (suffix == NULL || suffix < slash)
1497 return NULL;
1498 if (extptr != NULL)
1499 *extptr = suffix;
1500 suffix += 1;
1501 /* Let those poor souls who live with DOS 8+3 file name limits get
1502 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1503 Only the first do loop is run if not MSDOS */
1506 for (compr = compressors; compr->suffix != NULL; compr++)
1507 if (streq (compr->suffix, suffix))
1508 return compr;
1509 if (!MSDOS)
1510 break; /* do it only once: not really a loop */
1511 if (extptr != NULL)
1512 *extptr = ++suffix;
1513 } while (*suffix != '\0');
1514 return NULL;
1520 * Return a language given the name.
1522 static language *
1523 get_language_from_langname (name)
1524 const char *name;
1526 language *lang;
1528 if (name == NULL)
1529 error ("empty language name", (char *)NULL);
1530 else
1532 for (lang = lang_names; lang->name != NULL; lang++)
1533 if (streq (name, lang->name))
1534 return lang;
1535 error ("unknown language \"%s\"", name);
1538 return NULL;
1543 * Return a language given the interpreter name.
1545 static language *
1546 get_language_from_interpreter (interpreter)
1547 char *interpreter;
1549 language *lang;
1550 char **iname;
1552 if (interpreter == NULL)
1553 return NULL;
1554 for (lang = lang_names; lang->name != NULL; lang++)
1555 if (lang->interpreters != NULL)
1556 for (iname = lang->interpreters; *iname != NULL; iname++)
1557 if (streq (*iname, interpreter))
1558 return lang;
1560 return NULL;
1566 * Return a language given the file name.
1568 static language *
1569 get_language_from_filename (file, case_sensitive)
1570 char *file;
1571 bool case_sensitive;
1573 language *lang;
1574 char **name, **ext, *suffix;
1576 /* Try whole file name first. */
1577 for (lang = lang_names; lang->name != NULL; lang++)
1578 if (lang->filenames != NULL)
1579 for (name = lang->filenames; *name != NULL; name++)
1580 if ((case_sensitive)
1581 ? streq (*name, file)
1582 : strcaseeq (*name, file))
1583 return lang;
1585 /* If not found, try suffix after last dot. */
1586 suffix = etags_strrchr (file, '.');
1587 if (suffix == NULL)
1588 return NULL;
1589 suffix += 1;
1590 for (lang = lang_names; lang->name != NULL; lang++)
1591 if (lang->suffixes != NULL)
1592 for (ext = lang->suffixes; *ext != NULL; ext++)
1593 if ((case_sensitive)
1594 ? streq (*ext, suffix)
1595 : strcaseeq (*ext, suffix))
1596 return lang;
1597 return NULL;
1602 * This routine is called on each file argument.
1604 static void
1605 process_file_name (file, lang)
1606 char *file;
1607 language *lang;
1609 struct stat stat_buf;
1610 FILE *inf;
1611 fdesc *fdp;
1612 compressor *compr;
1613 char *compressed_name, *uncompressed_name;
1614 char *ext, *real_name;
1615 int retval;
1617 canonicalize_filename (file);
1618 if (streq (file, tagfile) && !streq (tagfile, "-"))
1620 error ("skipping inclusion of %s in self.", file);
1621 return;
1623 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1625 compressed_name = NULL;
1626 real_name = uncompressed_name = savestr (file);
1628 else
1630 real_name = compressed_name = savestr (file);
1631 uncompressed_name = savenstr (file, ext - file);
1634 /* If the canonicalized uncompressed name
1635 has already been dealt with, skip it silently. */
1636 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1638 assert (fdp->infname != NULL);
1639 if (streq (uncompressed_name, fdp->infname))
1640 goto cleanup;
1643 if (stat (real_name, &stat_buf) != 0)
1645 /* Reset real_name and try with a different name. */
1646 real_name = NULL;
1647 if (compressed_name != NULL) /* try with the given suffix */
1649 if (stat (uncompressed_name, &stat_buf) == 0)
1650 real_name = uncompressed_name;
1652 else /* try all possible suffixes */
1654 for (compr = compressors; compr->suffix != NULL; compr++)
1656 compressed_name = concat (file, ".", compr->suffix);
1657 if (stat (compressed_name, &stat_buf) != 0)
1659 if (MSDOS)
1661 char *suf = compressed_name + strlen (file);
1662 size_t suflen = strlen (compr->suffix) + 1;
1663 for ( ; suf[1]; suf++, suflen--)
1665 memmove (suf, suf + 1, suflen);
1666 if (stat (compressed_name, &stat_buf) == 0)
1668 real_name = compressed_name;
1669 break;
1672 if (real_name != NULL)
1673 break;
1674 } /* MSDOS */
1675 free (compressed_name);
1676 compressed_name = NULL;
1678 else
1680 real_name = compressed_name;
1681 break;
1685 if (real_name == NULL)
1687 perror (file);
1688 goto cleanup;
1690 } /* try with a different name */
1692 if (!S_ISREG (stat_buf.st_mode))
1694 error ("skipping %s: it is not a regular file.", real_name);
1695 goto cleanup;
1697 if (real_name == compressed_name)
1699 char *cmd = concat (compr->command, " ", real_name);
1700 inf = (FILE *) popen (cmd, "r");
1701 free (cmd);
1703 else
1704 inf = fopen (real_name, "r");
1705 if (inf == NULL)
1707 perror (real_name);
1708 goto cleanup;
1711 process_file (inf, uncompressed_name, lang);
1713 if (real_name == compressed_name)
1714 retval = pclose (inf);
1715 else
1716 retval = fclose (inf);
1717 if (retval < 0)
1718 pfatal (file);
1720 cleanup:
1721 if (compressed_name) free (compressed_name);
1722 if (uncompressed_name) free (uncompressed_name);
1723 last_node = NULL;
1724 curfdp = NULL;
1725 return;
1728 static void
1729 process_file (fh, fn, lang)
1730 FILE *fh;
1731 char *fn;
1732 language *lang;
1734 static const fdesc emptyfdesc;
1735 fdesc *fdp;
1737 /* Create a new input file description entry. */
1738 fdp = xnew (1, fdesc);
1739 *fdp = emptyfdesc;
1740 fdp->next = fdhead;
1741 fdp->infname = savestr (fn);
1742 fdp->lang = lang;
1743 fdp->infabsname = absolute_filename (fn, cwd);
1744 fdp->infabsdir = absolute_dirname (fn, cwd);
1745 if (filename_is_absolute (fn))
1747 /* An absolute file name. Canonicalize it. */
1748 fdp->taggedfname = absolute_filename (fn, NULL);
1750 else
1752 /* A file name relative to cwd. Make it relative
1753 to the directory of the tags file. */
1754 fdp->taggedfname = relative_filename (fn, tagfiledir);
1756 fdp->usecharno = TRUE; /* use char position when making tags */
1757 fdp->prop = NULL;
1758 fdp->written = FALSE; /* not written on tags file yet */
1760 fdhead = fdp;
1761 curfdp = fdhead; /* the current file description */
1763 find_entries (fh);
1765 /* If not Ctags, and if this is not metasource and if it contained no #line
1766 directives, we can write the tags and free all nodes pointing to
1767 curfdp. */
1768 if (!CTAGS
1769 && curfdp->usecharno /* no #line directives in this file */
1770 && !curfdp->lang->metasource)
1772 node *np, *prev;
1774 /* Look for the head of the sublist relative to this file. See add_node
1775 for the structure of the node tree. */
1776 prev = NULL;
1777 for (np = nodehead; np != NULL; prev = np, np = np->left)
1778 if (np->fdp == curfdp)
1779 break;
1781 /* If we generated tags for this file, write and delete them. */
1782 if (np != NULL)
1784 /* This is the head of the last sublist, if any. The following
1785 instructions depend on this being true. */
1786 assert (np->left == NULL);
1788 assert (fdhead == curfdp);
1789 assert (last_node->fdp == curfdp);
1790 put_entries (np); /* write tags for file curfdp->taggedfname */
1791 free_tree (np); /* remove the written nodes */
1792 if (prev == NULL)
1793 nodehead = NULL; /* no nodes left */
1794 else
1795 prev->left = NULL; /* delete the pointer to the sublist */
1801 * This routine sets up the boolean pseudo-functions which work
1802 * by setting boolean flags dependent upon the corresponding character.
1803 * Every char which is NOT in that string is not a white char. Therefore,
1804 * all of the array "_wht" is set to FALSE, and then the elements
1805 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1806 * of a char is TRUE if it is the string "white", else FALSE.
1808 static void
1809 init ()
1811 register char *sp;
1812 register int i;
1814 for (i = 0; i < CHARS; i++)
1815 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1816 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1817 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1818 notinname('\0') = notinname('\n');
1819 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1820 begtoken('\0') = begtoken('\n');
1821 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1822 intoken('\0') = intoken('\n');
1823 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1824 endtoken('\0') = endtoken('\n');
1828 * This routine opens the specified file and calls the function
1829 * which finds the function and type definitions.
1831 static void
1832 find_entries (inf)
1833 FILE *inf;
1835 char *cp;
1836 language *lang = curfdp->lang;
1837 Lang_function *parser = NULL;
1839 /* If user specified a language, use it. */
1840 if (lang != NULL && lang->function != NULL)
1842 parser = lang->function;
1845 /* Else try to guess the language given the file name. */
1846 if (parser == NULL)
1848 lang = get_language_from_filename (curfdp->infname, TRUE);
1849 if (lang != NULL && lang->function != NULL)
1851 curfdp->lang = lang;
1852 parser = lang->function;
1856 /* Else look for sharp-bang as the first two characters. */
1857 if (parser == NULL
1858 && readline_internal (&lb, inf) > 0
1859 && lb.len >= 2
1860 && lb.buffer[0] == '#'
1861 && lb.buffer[1] == '!')
1863 char *lp;
1865 /* Set lp to point at the first char after the last slash in the
1866 line or, if no slashes, at the first nonblank. Then set cp to
1867 the first successive blank and terminate the string. */
1868 lp = etags_strrchr (lb.buffer+2, '/');
1869 if (lp != NULL)
1870 lp += 1;
1871 else
1872 lp = skip_spaces (lb.buffer + 2);
1873 cp = skip_non_spaces (lp);
1874 *cp = '\0';
1876 if (strlen (lp) > 0)
1878 lang = get_language_from_interpreter (lp);
1879 if (lang != NULL && lang->function != NULL)
1881 curfdp->lang = lang;
1882 parser = lang->function;
1887 /* We rewind here, even if inf may be a pipe. We fail if the
1888 length of the first line is longer than the pipe block size,
1889 which is unlikely. */
1890 rewind (inf);
1892 /* Else try to guess the language given the case insensitive file name. */
1893 if (parser == NULL)
1895 lang = get_language_from_filename (curfdp->infname, FALSE);
1896 if (lang != NULL && lang->function != NULL)
1898 curfdp->lang = lang;
1899 parser = lang->function;
1903 /* Else try Fortran or C. */
1904 if (parser == NULL)
1906 node *old_last_node = last_node;
1908 curfdp->lang = get_language_from_langname ("fortran");
1909 find_entries (inf);
1911 if (old_last_node == last_node)
1912 /* No Fortran entries found. Try C. */
1914 /* We do not tag if rewind fails.
1915 Only the file name will be recorded in the tags file. */
1916 rewind (inf);
1917 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1918 find_entries (inf);
1920 return;
1923 if (!no_line_directive
1924 && curfdp->lang != NULL && curfdp->lang->metasource)
1925 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1926 file, or anyway we parsed a file that is automatically generated from
1927 this one. If this is the case, the bingo.c file contained #line
1928 directives that generated tags pointing to this file. Let's delete
1929 them all before parsing this file, which is the real source. */
1931 fdesc **fdpp = &fdhead;
1932 while (*fdpp != NULL)
1933 if (*fdpp != curfdp
1934 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1935 /* We found one of those! We must delete both the file description
1936 and all tags referring to it. */
1938 fdesc *badfdp = *fdpp;
1940 /* Delete the tags referring to badfdp->taggedfname
1941 that were obtained from badfdp->infname. */
1942 invalidate_nodes (badfdp, &nodehead);
1944 *fdpp = badfdp->next; /* remove the bad description from the list */
1945 free_fdesc (badfdp);
1947 else
1948 fdpp = &(*fdpp)->next; /* advance the list pointer */
1951 assert (parser != NULL);
1953 /* Generic initialisations before reading from file. */
1954 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1956 /* Generic initialisations before parsing file with readline. */
1957 lineno = 0; /* reset global line number */
1958 charno = 0; /* reset global char number */
1959 linecharno = 0; /* reset global char number of line start */
1961 parser (inf);
1963 #ifdef ETAGS_REGEXPS
1964 regex_tag_multiline ();
1965 #endif /* ETAGS_REGEXPS */
1970 * Check whether an implicitly named tag should be created,
1971 * then call `pfnote'.
1972 * NAME is a string that is internally copied by this function.
1974 * TAGS format specification
1975 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1976 * The following is explained in some more detail in etc/ETAGS.EBNF.
1978 * make_tag creates tags with "implicit tag names" (unnamed tags)
1979 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1980 * 1. NAME does not contain any of the characters in NONAM;
1981 * 2. LINESTART contains name as either a rightmost, or rightmost but
1982 * one character, substring;
1983 * 3. the character, if any, immediately before NAME in LINESTART must
1984 * be a character in NONAM;
1985 * 4. the character, if any, immediately after NAME in LINESTART must
1986 * also be a character in NONAM.
1988 * The implementation uses the notinname() macro, which recognises the
1989 * characters stored in the string `nonam'.
1990 * etags.el needs to use the same characters that are in NONAM.
1992 static void
1993 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1994 char *name; /* tag name, or NULL if unnamed */
1995 int namelen; /* tag length */
1996 bool is_func; /* tag is a function */
1997 char *linestart; /* start of the line where tag is */
1998 int linelen; /* length of the line where tag is */
1999 int lno; /* line number */
2000 long cno; /* character number */
2002 bool named = (name != NULL && namelen > 0);
2004 if (!CTAGS && named) /* maybe set named to false */
2005 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2006 such that etags.el can guess a name from it. */
2008 int i;
2009 register char *cp = name;
2011 for (i = 0; i < namelen; i++)
2012 if (notinname (*cp++))
2013 break;
2014 if (i == namelen) /* rule #1 */
2016 cp = linestart + linelen - namelen;
2017 if (notinname (linestart[linelen-1]))
2018 cp -= 1; /* rule #4 */
2019 if (cp >= linestart /* rule #2 */
2020 && (cp == linestart
2021 || notinname (cp[-1])) /* rule #3 */
2022 && strneq (name, cp, namelen)) /* rule #2 */
2023 named = FALSE; /* use implicit tag name */
2027 if (named)
2028 name = savenstr (name, namelen);
2029 else
2030 name = NULL;
2031 pfnote (name, is_func, linestart, linelen, lno, cno);
2034 /* Record a tag. */
2035 static void
2036 pfnote (name, is_func, linestart, linelen, lno, cno)
2037 char *name; /* tag name, or NULL if unnamed */
2038 bool is_func; /* tag is a function */
2039 char *linestart; /* start of the line where tag is */
2040 int linelen; /* length of the line where tag is */
2041 int lno; /* line number */
2042 long cno; /* character number */
2044 register node *np;
2046 assert (name == NULL || name[0] != '\0');
2047 if (CTAGS && name == NULL)
2048 return;
2050 np = xnew (1, node);
2052 /* If ctags mode, change name "main" to M<thisfilename>. */
2053 if (CTAGS && !cxref_style && streq (name, "main"))
2055 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2056 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2057 fp = etags_strrchr (np->name, '.');
2058 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2059 fp[0] = '\0';
2061 else
2062 np->name = name;
2063 np->valid = TRUE;
2064 np->been_warned = FALSE;
2065 np->fdp = curfdp;
2066 np->is_func = is_func;
2067 np->lno = lno;
2068 if (np->fdp->usecharno)
2069 /* Our char numbers are 0-base, because of C language tradition?
2070 ctags compatibility? old versions compatibility? I don't know.
2071 Anyway, since emacs's are 1-base we expect etags.el to take care
2072 of the difference. If we wanted to have 1-based numbers, we would
2073 uncomment the +1 below. */
2074 np->cno = cno /* + 1 */ ;
2075 else
2076 np->cno = invalidcharno;
2077 np->left = np->right = NULL;
2078 if (CTAGS && !cxref_style)
2080 if (strlen (linestart) < 50)
2081 np->regex = concat (linestart, "$", "");
2082 else
2083 np->regex = savenstr (linestart, 50);
2085 else
2086 np->regex = savenstr (linestart, linelen);
2088 add_node (np, &nodehead);
2092 * free_tree ()
2093 * recurse on left children, iterate on right children.
2095 static void
2096 free_tree (np)
2097 register node *np;
2099 while (np)
2101 register node *node_right = np->right;
2102 free_tree (np->left);
2103 if (np->name != NULL)
2104 free (np->name);
2105 free (np->regex);
2106 free (np);
2107 np = node_right;
2112 * free_fdesc ()
2113 * delete a file description
2115 static void
2116 free_fdesc (fdp)
2117 register fdesc *fdp;
2119 if (fdp->infname != NULL) free (fdp->infname);
2120 if (fdp->infabsname != NULL) free (fdp->infabsname);
2121 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2122 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2123 if (fdp->prop != NULL) free (fdp->prop);
2124 free (fdp);
2128 * add_node ()
2129 * Adds a node to the tree of nodes. In etags mode, sort by file
2130 * name. In ctags mode, sort by tag name. Make no attempt at
2131 * balancing.
2133 * add_node is the only function allowed to add nodes, so it can
2134 * maintain state.
2136 static void
2137 add_node (np, cur_node_p)
2138 node *np, **cur_node_p;
2140 register int dif;
2141 register node *cur_node = *cur_node_p;
2143 if (cur_node == NULL)
2145 *cur_node_p = np;
2146 last_node = np;
2147 return;
2150 if (!CTAGS)
2151 /* Etags Mode */
2153 /* For each file name, tags are in a linked sublist on the right
2154 pointer. The first tags of different files are a linked list
2155 on the left pointer. last_node points to the end of the last
2156 used sublist. */
2157 if (last_node != NULL && last_node->fdp == np->fdp)
2159 /* Let's use the same sublist as the last added node. */
2160 assert (last_node->right == NULL);
2161 last_node->right = np;
2162 last_node = np;
2164 else if (cur_node->fdp == np->fdp)
2166 /* Scanning the list we found the head of a sublist which is
2167 good for us. Let's scan this sublist. */
2168 add_node (np, &cur_node->right);
2170 else
2171 /* The head of this sublist is not good for us. Let's try the
2172 next one. */
2173 add_node (np, &cur_node->left);
2174 } /* if ETAGS mode */
2176 else
2178 /* Ctags Mode */
2179 dif = strcmp (np->name, cur_node->name);
2182 * If this tag name matches an existing one, then
2183 * do not add the node, but maybe print a warning.
2185 if (!dif)
2187 if (np->fdp == cur_node->fdp)
2189 if (!no_warnings)
2191 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2192 np->fdp->infname, lineno, np->name);
2193 fprintf (stderr, "Second entry ignored\n");
2196 else if (!cur_node->been_warned && !no_warnings)
2198 fprintf
2199 (stderr,
2200 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2201 np->fdp->infname, cur_node->fdp->infname, np->name);
2202 cur_node->been_warned = TRUE;
2204 return;
2207 /* Actually add the node */
2208 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2209 } /* if CTAGS mode */
2213 * invalidate_nodes ()
2214 * Scan the node tree and invalidate all nodes pointing to the
2215 * given file description (CTAGS case) or free them (ETAGS case).
2217 static void
2218 invalidate_nodes (badfdp, npp)
2219 fdesc *badfdp;
2220 node **npp;
2222 node *np = *npp;
2224 if (np == NULL)
2225 return;
2227 if (CTAGS)
2229 if (np->left != NULL)
2230 invalidate_nodes (badfdp, &np->left);
2231 if (np->fdp == badfdp)
2232 np->valid = FALSE;
2233 if (np->right != NULL)
2234 invalidate_nodes (badfdp, &np->right);
2236 else
2238 assert (np->fdp != NULL);
2239 if (np->fdp == badfdp)
2241 *npp = np->left; /* detach the sublist from the list */
2242 np->left = NULL; /* isolate it */
2243 free_tree (np); /* free it */
2244 invalidate_nodes (badfdp, npp);
2246 else
2247 invalidate_nodes (badfdp, &np->left);
2252 static int total_size_of_entries __P((node *));
2253 static int number_len __P((long));
2255 /* Length of a non-negative number's decimal representation. */
2256 static int
2257 number_len (num)
2258 long num;
2260 int len = 1;
2261 while ((num /= 10) > 0)
2262 len += 1;
2263 return len;
2267 * Return total number of characters that put_entries will output for
2268 * the nodes in the linked list at the right of the specified node.
2269 * This count is irrelevant with etags.el since emacs 19.34 at least,
2270 * but is still supplied for backward compatibility.
2272 static int
2273 total_size_of_entries (np)
2274 register node *np;
2276 register int total = 0;
2278 for (; np != NULL; np = np->right)
2279 if (np->valid)
2281 total += strlen (np->regex) + 1; /* pat\177 */
2282 if (np->name != NULL)
2283 total += strlen (np->name) + 1; /* name\001 */
2284 total += number_len ((long) np->lno) + 1; /* lno, */
2285 if (np->cno != invalidcharno) /* cno */
2286 total += number_len (np->cno);
2287 total += 1; /* newline */
2290 return total;
2293 static void
2294 put_entries (np)
2295 register node *np;
2297 register char *sp;
2298 static fdesc *fdp = NULL;
2300 if (np == NULL)
2301 return;
2303 /* Output subentries that precede this one */
2304 if (CTAGS)
2305 put_entries (np->left);
2307 /* Output this entry */
2308 if (np->valid)
2310 if (!CTAGS)
2312 /* Etags mode */
2313 if (fdp != np->fdp)
2315 fdp = np->fdp;
2316 fprintf (tagf, "\f\n%s,%d\n",
2317 fdp->taggedfname, total_size_of_entries (np));
2318 fdp->written = TRUE;
2320 fputs (np->regex, tagf);
2321 fputc ('\177', tagf);
2322 if (np->name != NULL)
2324 fputs (np->name, tagf);
2325 fputc ('\001', tagf);
2327 fprintf (tagf, "%d,", np->lno);
2328 if (np->cno != invalidcharno)
2329 fprintf (tagf, "%ld", np->cno);
2330 fputs ("\n", tagf);
2332 else
2334 /* Ctags mode */
2335 if (np->name == NULL)
2336 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2338 if (cxref_style)
2340 if (vgrind_style)
2341 fprintf (stdout, "%s %s %d\n",
2342 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2343 else
2344 fprintf (stdout, "%-16s %3d %-16s %s\n",
2345 np->name, np->lno, np->fdp->taggedfname, np->regex);
2347 else
2349 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2351 if (np->is_func)
2352 { /* function or #define macro with args */
2353 putc (searchar, tagf);
2354 putc ('^', tagf);
2356 for (sp = np->regex; *sp; sp++)
2358 if (*sp == '\\' || *sp == searchar)
2359 putc ('\\', tagf);
2360 putc (*sp, tagf);
2362 putc (searchar, tagf);
2364 else
2365 { /* anything else; text pattern inadequate */
2366 fprintf (tagf, "%d", np->lno);
2368 putc ('\n', tagf);
2371 } /* if this node contains a valid tag */
2373 /* Output subentries that follow this one */
2374 put_entries (np->right);
2375 if (!CTAGS)
2376 put_entries (np->left);
2380 /* C extensions. */
2381 #define C_EXT 0x00fff /* C extensions */
2382 #define C_PLAIN 0x00000 /* C */
2383 #define C_PLPL 0x00001 /* C++ */
2384 #define C_STAR 0x00003 /* C* */
2385 #define C_JAVA 0x00005 /* JAVA */
2386 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2387 #define YACC 0x10000 /* yacc file */
2390 * The C symbol tables.
2392 enum sym_type
2394 st_none,
2395 st_C_objprot, st_C_objimpl, st_C_objend,
2396 st_C_gnumacro,
2397 st_C_ignore, st_C_attribute,
2398 st_C_javastruct,
2399 st_C_operator,
2400 st_C_class, st_C_template,
2401 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2404 static unsigned int hash __P((const char *, unsigned int));
2405 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2406 static enum sym_type C_symtype __P((char *, int, int));
2408 /* Feed stuff between (but not including) %[ and %] lines to:
2409 gperf -m 5
2411 %compare-strncmp
2412 %enum
2413 %struct-type
2414 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2416 if, 0, st_C_ignore
2417 for, 0, st_C_ignore
2418 while, 0, st_C_ignore
2419 switch, 0, st_C_ignore
2420 return, 0, st_C_ignore
2421 __attribute__, 0, st_C_attribute
2422 @interface, 0, st_C_objprot
2423 @protocol, 0, st_C_objprot
2424 @implementation,0, st_C_objimpl
2425 @end, 0, st_C_objend
2426 import, (C_JAVA & !C_PLPL), st_C_ignore
2427 package, (C_JAVA & !C_PLPL), st_C_ignore
2428 friend, C_PLPL, st_C_ignore
2429 extends, (C_JAVA & !C_PLPL), st_C_javastruct
2430 implements, (C_JAVA & !C_PLPL), st_C_javastruct
2431 interface, (C_JAVA & !C_PLPL), st_C_struct
2432 class, 0, st_C_class
2433 namespace, C_PLPL, st_C_struct
2434 domain, C_STAR, st_C_struct
2435 union, 0, st_C_struct
2436 struct, 0, st_C_struct
2437 extern, 0, st_C_extern
2438 enum, 0, st_C_enum
2439 typedef, 0, st_C_typedef
2440 define, 0, st_C_define
2441 operator, C_PLPL, st_C_operator
2442 template, 0, st_C_template
2443 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2444 DEFUN, 0, st_C_gnumacro
2445 SYSCALL, 0, st_C_gnumacro
2446 ENTRY, 0, st_C_gnumacro
2447 PSEUDO, 0, st_C_gnumacro
2448 # These are defined inside C functions, so currently they are not met.
2449 # EXFUN used in glibc, DEFVAR_* in emacs.
2450 #EXFUN, 0, st_C_gnumacro
2451 #DEFVAR_, 0, st_C_gnumacro
2453 and replace lines between %< and %> with its output, then:
2454 - remove the #if characterset check
2455 - make in_word_set static and not inline. */
2456 /*%<*/
2457 /* C code produced by gperf version 3.0.1 */
2458 /* Command-line: gperf -m 5 */
2459 /* Computed positions: -k'1-2' */
2461 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2462 /* maximum key range = 31, duplicates = 0 */
2464 #ifdef __GNUC__
2465 __inline
2466 #else
2467 #ifdef __cplusplus
2468 inline
2469 #endif
2470 #endif
2471 static unsigned int
2472 hash (str, len)
2473 register const char *str;
2474 register unsigned int len;
2476 static unsigned char asso_values[] =
2478 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2479 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2480 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2481 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2482 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2483 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2484 34, 34, 34, 34, 1, 34, 34, 34, 14, 14,
2485 34, 34, 34, 34, 34, 34, 34, 34, 13, 34,
2486 13, 34, 34, 12, 34, 34, 34, 34, 34, 11,
2487 34, 34, 34, 34, 34, 8, 34, 11, 34, 12,
2488 11, 0, 1, 34, 7, 0, 34, 34, 11, 9,
2489 0, 4, 0, 34, 7, 4, 14, 21, 34, 15,
2490 0, 2, 34, 34, 34, 34, 34, 34, 34, 34,
2491 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2492 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2493 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2494 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2495 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2496 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2497 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2498 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2499 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2500 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2501 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2502 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2503 34, 34, 34, 34, 34, 34
2505 return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
2508 static struct C_stab_entry *
2509 in_word_set (str, len)
2510 register const char *str;
2511 register unsigned int len;
2513 enum
2515 TOTAL_KEYWORDS = 31,
2516 MIN_WORD_LENGTH = 2,
2517 MAX_WORD_LENGTH = 15,
2518 MIN_HASH_VALUE = 3,
2519 MAX_HASH_VALUE = 33
2522 static struct C_stab_entry wordlist[] =
2524 {""}, {""}, {""},
2525 {"if", 0, st_C_ignore},
2526 {"enum", 0, st_C_enum},
2527 {"@end", 0, st_C_objend},
2528 {"extern", 0, st_C_extern},
2529 {"extends", (C_JAVA & !C_PLPL), st_C_javastruct},
2530 {"for", 0, st_C_ignore},
2531 {"interface", (C_JAVA & !C_PLPL), st_C_struct},
2532 {"@protocol", 0, st_C_objprot},
2533 {"@interface", 0, st_C_objprot},
2534 {"operator", C_PLPL, st_C_operator},
2535 {"return", 0, st_C_ignore},
2536 {"friend", C_PLPL, st_C_ignore},
2537 {"import", (C_JAVA & !C_PLPL), st_C_ignore},
2538 {"@implementation",0, st_C_objimpl},
2539 {"define", 0, st_C_define},
2540 {"package", (C_JAVA & !C_PLPL), st_C_ignore},
2541 {"implements", (C_JAVA & !C_PLPL), st_C_javastruct},
2542 {"namespace", C_PLPL, st_C_struct},
2543 {"domain", C_STAR, st_C_struct},
2544 {"template", 0, st_C_template},
2545 {"typedef", 0, st_C_typedef},
2546 {"struct", 0, st_C_struct},
2547 {"switch", 0, st_C_ignore},
2548 {"union", 0, st_C_struct},
2549 {"while", 0, st_C_ignore},
2550 {"class", 0, st_C_class},
2551 {"__attribute__", 0, st_C_attribute},
2552 {"SYSCALL", 0, st_C_gnumacro},
2553 {"PSEUDO", 0, st_C_gnumacro},
2554 {"ENTRY", 0, st_C_gnumacro},
2555 {"DEFUN", 0, st_C_gnumacro}
2558 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2560 register int key = hash (str, len);
2562 if (key <= MAX_HASH_VALUE && key >= 0)
2564 register const char *s = wordlist[key].name;
2566 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2567 return &wordlist[key];
2570 return 0;
2572 /*%>*/
2574 static enum sym_type
2575 C_symtype (str, len, c_ext)
2576 char *str;
2577 int len;
2578 int c_ext;
2580 register struct C_stab_entry *se = in_word_set (str, len);
2582 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2583 return st_none;
2584 return se->type;
2589 * Ignoring __attribute__ ((list))
2591 static bool inattribute; /* looking at an __attribute__ construct */
2594 * C functions and variables are recognized using a simple
2595 * finite automaton. fvdef is its state variable.
2597 static enum
2599 fvnone, /* nothing seen */
2600 fdefunkey, /* Emacs DEFUN keyword seen */
2601 fdefunname, /* Emacs DEFUN name seen */
2602 foperator, /* func: operator keyword seen (cplpl) */
2603 fvnameseen, /* function or variable name seen */
2604 fstartlist, /* func: just after open parenthesis */
2605 finlist, /* func: in parameter list */
2606 flistseen, /* func: after parameter list */
2607 fignore, /* func: before open brace */
2608 vignore /* var-like: ignore until ';' */
2609 } fvdef;
2611 static bool fvextern; /* func or var: extern keyword seen; */
2614 * typedefs are recognized using a simple finite automaton.
2615 * typdef is its state variable.
2617 static enum
2619 tnone, /* nothing seen */
2620 tkeyseen, /* typedef keyword seen */
2621 ttypeseen, /* defined type seen */
2622 tinbody, /* inside typedef body */
2623 tend, /* just before typedef tag */
2624 tignore /* junk after typedef tag */
2625 } typdef;
2628 * struct-like structures (enum, struct and union) are recognized
2629 * using another simple finite automaton. `structdef' is its state
2630 * variable.
2632 static enum
2634 snone, /* nothing seen yet,
2635 or in struct body if bracelev > 0 */
2636 skeyseen, /* struct-like keyword seen */
2637 stagseen, /* struct-like tag seen */
2638 scolonseen /* colon seen after struct-like tag */
2639 } structdef;
2642 * When objdef is different from onone, objtag is the name of the class.
2644 static char *objtag = "<uninited>";
2647 * Yet another little state machine to deal with preprocessor lines.
2649 static enum
2651 dnone, /* nothing seen */
2652 dsharpseen, /* '#' seen as first char on line */
2653 ddefineseen, /* '#' and 'define' seen */
2654 dignorerest /* ignore rest of line */
2655 } definedef;
2658 * State machine for Objective C protocols and implementations.
2659 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2661 static enum
2663 onone, /* nothing seen */
2664 oprotocol, /* @interface or @protocol seen */
2665 oimplementation, /* @implementations seen */
2666 otagseen, /* class name seen */
2667 oparenseen, /* parenthesis before category seen */
2668 ocatseen, /* category name seen */
2669 oinbody, /* in @implementation body */
2670 omethodsign, /* in @implementation body, after +/- */
2671 omethodtag, /* after method name */
2672 omethodcolon, /* after method colon */
2673 omethodparm, /* after method parameter */
2674 oignore /* wait for @end */
2675 } objdef;
2679 * Use this structure to keep info about the token read, and how it
2680 * should be tagged. Used by the make_C_tag function to build a tag.
2682 static struct tok
2684 char *line; /* string containing the token */
2685 int offset; /* where the token starts in LINE */
2686 int length; /* token length */
2688 The previous members can be used to pass strings around for generic
2689 purposes. The following ones specifically refer to creating tags. In this
2690 case the token contained here is the pattern that will be used to create a
2691 tag.
2693 bool valid; /* do not create a tag; the token should be
2694 invalidated whenever a state machine is
2695 reset prematurely */
2696 bool named; /* create a named tag */
2697 int lineno; /* source line number of tag */
2698 long linepos; /* source char number of tag */
2699 } token; /* latest token read */
2702 * Variables and functions for dealing with nested structures.
2703 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2705 static void pushclass_above __P((int, char *, int));
2706 static void popclass_above __P((int));
2707 static void write_classname __P((linebuffer *, char *qualifier));
2709 static struct {
2710 char **cname; /* nested class names */
2711 int *bracelev; /* nested class brace level */
2712 int nl; /* class nesting level (elements used) */
2713 int size; /* length of the array */
2714 } cstack; /* stack for nested declaration tags */
2715 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2716 #define nestlev (cstack.nl)
2717 /* After struct keyword or in struct body, not inside a nested function. */
2718 #define instruct (structdef == snone && nestlev > 0 \
2719 && bracelev == cstack.bracelev[nestlev-1] + 1)
2721 static void
2722 pushclass_above (bracelev, str, len)
2723 int bracelev;
2724 char *str;
2725 int len;
2727 int nl;
2729 popclass_above (bracelev);
2730 nl = cstack.nl;
2731 if (nl >= cstack.size)
2733 int size = cstack.size *= 2;
2734 xrnew (cstack.cname, size, char *);
2735 xrnew (cstack.bracelev, size, int);
2737 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2738 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2739 cstack.bracelev[nl] = bracelev;
2740 cstack.nl = nl + 1;
2743 static void
2744 popclass_above (bracelev)
2745 int bracelev;
2747 int nl;
2749 for (nl = cstack.nl - 1;
2750 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2751 nl--)
2753 if (cstack.cname[nl] != NULL)
2754 free (cstack.cname[nl]);
2755 cstack.nl = nl;
2759 static void
2760 write_classname (cn, qualifier)
2761 linebuffer *cn;
2762 char *qualifier;
2764 int i, len;
2765 int qlen = strlen (qualifier);
2767 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2769 len = 0;
2770 cn->len = 0;
2771 cn->buffer[0] = '\0';
2773 else
2775 len = strlen (cstack.cname[0]);
2776 linebuffer_setlen (cn, len);
2777 strcpy (cn->buffer, cstack.cname[0]);
2779 for (i = 1; i < cstack.nl; i++)
2781 char *s;
2782 int slen;
2784 s = cstack.cname[i];
2785 if (s == NULL)
2786 continue;
2787 slen = strlen (s);
2788 len += slen + qlen;
2789 linebuffer_setlen (cn, len);
2790 strncat (cn->buffer, qualifier, qlen);
2791 strncat (cn->buffer, s, slen);
2796 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2797 static void make_C_tag __P((bool));
2800 * consider_token ()
2801 * checks to see if the current token is at the start of a
2802 * function or variable, or corresponds to a typedef, or
2803 * is a struct/union/enum tag, or #define, or an enum constant.
2805 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2806 * with args. C_EXTP points to which language we are looking at.
2808 * Globals
2809 * fvdef IN OUT
2810 * structdef IN OUT
2811 * definedef IN OUT
2812 * typdef IN OUT
2813 * objdef IN OUT
2816 static bool
2817 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2818 register char *str; /* IN: token pointer */
2819 register int len; /* IN: token length */
2820 register int c; /* IN: first char after the token */
2821 int *c_extp; /* IN, OUT: C extensions mask */
2822 int bracelev; /* IN: brace level */
2823 int parlev; /* IN: parenthesis level */
2824 bool *is_func_or_var; /* OUT: function or variable found */
2826 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2827 structtype is the type of the preceding struct-like keyword, and
2828 structbracelev is the brace level where it has been seen. */
2829 static enum sym_type structtype;
2830 static int structbracelev;
2831 static enum sym_type toktype;
2834 toktype = C_symtype (str, len, *c_extp);
2837 * Skip __attribute__
2839 if (toktype == st_C_attribute)
2841 inattribute = TRUE;
2842 return FALSE;
2846 * Advance the definedef state machine.
2848 switch (definedef)
2850 case dnone:
2851 /* We're not on a preprocessor line. */
2852 if (toktype == st_C_gnumacro)
2854 fvdef = fdefunkey;
2855 return FALSE;
2857 break;
2858 case dsharpseen:
2859 if (toktype == st_C_define)
2861 definedef = ddefineseen;
2863 else
2865 definedef = dignorerest;
2867 return FALSE;
2868 case ddefineseen:
2870 * Make a tag for any macro, unless it is a constant
2871 * and constantypedefs is FALSE.
2873 definedef = dignorerest;
2874 *is_func_or_var = (c == '(');
2875 if (!*is_func_or_var && !constantypedefs)
2876 return FALSE;
2877 else
2878 return TRUE;
2879 case dignorerest:
2880 return FALSE;
2881 default:
2882 error ("internal error: definedef value.", (char *)NULL);
2886 * Now typedefs
2888 switch (typdef)
2890 case tnone:
2891 if (toktype == st_C_typedef)
2893 if (typedefs)
2894 typdef = tkeyseen;
2895 fvextern = FALSE;
2896 fvdef = fvnone;
2897 return FALSE;
2899 break;
2900 case tkeyseen:
2901 switch (toktype)
2903 case st_none:
2904 case st_C_class:
2905 case st_C_struct:
2906 case st_C_enum:
2907 typdef = ttypeseen;
2909 break;
2910 case ttypeseen:
2911 if (structdef == snone && fvdef == fvnone)
2913 fvdef = fvnameseen;
2914 return TRUE;
2916 break;
2917 case tend:
2918 switch (toktype)
2920 case st_C_class:
2921 case st_C_struct:
2922 case st_C_enum:
2923 return FALSE;
2925 return TRUE;
2929 * This structdef business is NOT invoked when we are ctags and the
2930 * file is plain C. This is because a struct tag may have the same
2931 * name as another tag, and this loses with ctags.
2933 switch (toktype)
2935 case st_C_javastruct:
2936 if (structdef == stagseen)
2937 structdef = scolonseen;
2938 return FALSE;
2939 case st_C_template:
2940 case st_C_class:
2941 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2942 && bracelev == 0
2943 && definedef == dnone && structdef == snone
2944 && typdef == tnone && fvdef == fvnone)
2945 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2946 if (toktype == st_C_template)
2947 break;
2948 /* FALLTHRU */
2949 case st_C_struct:
2950 case st_C_enum:
2951 if (parlev == 0
2952 && fvdef != vignore
2953 && (typdef == tkeyseen
2954 || (typedefs_or_cplusplus && structdef == snone)))
2956 structdef = skeyseen;
2957 structtype = toktype;
2958 structbracelev = bracelev;
2959 if (fvdef == fvnameseen)
2960 fvdef = fvnone;
2962 return FALSE;
2965 if (structdef == skeyseen)
2967 structdef = stagseen;
2968 return TRUE;
2971 if (typdef != tnone)
2972 definedef = dnone;
2974 /* Detect Objective C constructs. */
2975 switch (objdef)
2977 case onone:
2978 switch (toktype)
2980 case st_C_objprot:
2981 objdef = oprotocol;
2982 return FALSE;
2983 case st_C_objimpl:
2984 objdef = oimplementation;
2985 return FALSE;
2987 break;
2988 case oimplementation:
2989 /* Save the class tag for functions or variables defined inside. */
2990 objtag = savenstr (str, len);
2991 objdef = oinbody;
2992 return FALSE;
2993 case oprotocol:
2994 /* Save the class tag for categories. */
2995 objtag = savenstr (str, len);
2996 objdef = otagseen;
2997 *is_func_or_var = TRUE;
2998 return TRUE;
2999 case oparenseen:
3000 objdef = ocatseen;
3001 *is_func_or_var = TRUE;
3002 return TRUE;
3003 case oinbody:
3004 break;
3005 case omethodsign:
3006 if (parlev == 0)
3008 fvdef = fvnone;
3009 objdef = omethodtag;
3010 linebuffer_setlen (&token_name, len);
3011 strncpy (token_name.buffer, str, len);
3012 token_name.buffer[len] = '\0';
3013 return TRUE;
3015 return FALSE;
3016 case omethodcolon:
3017 if (parlev == 0)
3018 objdef = omethodparm;
3019 return FALSE;
3020 case omethodparm:
3021 if (parlev == 0)
3023 fvdef = fvnone;
3024 objdef = omethodtag;
3025 linebuffer_setlen (&token_name, token_name.len + len);
3026 strncat (token_name.buffer, str, len);
3027 return TRUE;
3029 return FALSE;
3030 case oignore:
3031 if (toktype == st_C_objend)
3033 /* Memory leakage here: the string pointed by objtag is
3034 never released, because many tests would be needed to
3035 avoid breaking on incorrect input code. The amount of
3036 memory leaked here is the sum of the lengths of the
3037 class tags.
3038 free (objtag); */
3039 objdef = onone;
3041 return FALSE;
3044 /* A function, variable or enum constant? */
3045 switch (toktype)
3047 case st_C_extern:
3048 fvextern = TRUE;
3049 switch (fvdef)
3051 case finlist:
3052 case flistseen:
3053 case fignore:
3054 case vignore:
3055 break;
3056 default:
3057 fvdef = fvnone;
3059 return FALSE;
3060 case st_C_ignore:
3061 fvextern = FALSE;
3062 fvdef = vignore;
3063 return FALSE;
3064 case st_C_operator:
3065 fvdef = foperator;
3066 *is_func_or_var = TRUE;
3067 return TRUE;
3068 case st_none:
3069 if (constantypedefs
3070 && structdef == snone
3071 && structtype == st_C_enum && bracelev > structbracelev)
3072 return TRUE; /* enum constant */
3073 switch (fvdef)
3075 case fdefunkey:
3076 if (bracelev > 0)
3077 break;
3078 fvdef = fdefunname; /* GNU macro */
3079 *is_func_or_var = TRUE;
3080 return TRUE;
3081 case fvnone:
3082 switch (typdef)
3084 case ttypeseen:
3085 return FALSE;
3086 case tnone:
3087 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3088 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3090 fvdef = vignore;
3091 return FALSE;
3093 break;
3095 /* FALLTHRU */
3096 case fvnameseen:
3097 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3099 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3100 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3101 fvdef = foperator;
3102 *is_func_or_var = TRUE;
3103 return TRUE;
3105 if (bracelev > 0 && !instruct)
3106 break;
3107 fvdef = fvnameseen; /* function or variable */
3108 *is_func_or_var = TRUE;
3109 return TRUE;
3111 break;
3114 return FALSE;
3119 * C_entries often keeps pointers to tokens or lines which are older than
3120 * the line currently read. By keeping two line buffers, and switching
3121 * them at end of line, it is possible to use those pointers.
3123 static struct
3125 long linepos;
3126 linebuffer lb;
3127 } lbs[2];
3129 #define current_lb_is_new (newndx == curndx)
3130 #define switch_line_buffers() (curndx = 1 - curndx)
3132 #define curlb (lbs[curndx].lb)
3133 #define newlb (lbs[newndx].lb)
3134 #define curlinepos (lbs[curndx].linepos)
3135 #define newlinepos (lbs[newndx].linepos)
3137 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3138 #define cplpl (c_ext & C_PLPL)
3139 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3141 #define CNL_SAVE_DEFINEDEF() \
3142 do { \
3143 curlinepos = charno; \
3144 readline (&curlb, inf); \
3145 lp = curlb.buffer; \
3146 quotednl = FALSE; \
3147 newndx = curndx; \
3148 } while (0)
3150 #define CNL() \
3151 do { \
3152 CNL_SAVE_DEFINEDEF(); \
3153 if (savetoken.valid) \
3155 token = savetoken; \
3156 savetoken.valid = FALSE; \
3158 definedef = dnone; \
3159 } while (0)
3162 static void
3163 make_C_tag (isfun)
3164 bool isfun;
3166 /* This function should never be called when token.valid is FALSE, but
3167 we must protect against invalid input or internal errors. */
3168 if (!DEBUG && !token.valid)
3169 return;
3171 if (token.valid)
3172 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3173 token.offset+token.length+1, token.lineno, token.linepos);
3174 else /* this case is optimised away if !DEBUG */
3175 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3176 token_name.len + 17, isfun, token.line,
3177 token.offset+token.length+1, token.lineno, token.linepos);
3179 token.valid = FALSE;
3184 * C_entries ()
3185 * This routine finds functions, variables, typedefs,
3186 * #define's, enum constants and struct/union/enum definitions in
3187 * C syntax and adds them to the list.
3189 static void
3190 C_entries (c_ext, inf)
3191 int c_ext; /* extension of C */
3192 FILE *inf; /* input file */
3194 register char c; /* latest char read; '\0' for end of line */
3195 register char *lp; /* pointer one beyond the character `c' */
3196 int curndx, newndx; /* indices for current and new lb */
3197 register int tokoff; /* offset in line of start of current token */
3198 register int toklen; /* length of current token */
3199 char *qualifier; /* string used to qualify names */
3200 int qlen; /* length of qualifier */
3201 int bracelev; /* current brace level */
3202 int bracketlev; /* current bracket level */
3203 int parlev; /* current parenthesis level */
3204 int attrparlev; /* __attribute__ parenthesis level */
3205 int templatelev; /* current template level */
3206 int typdefbracelev; /* bracelev where a typedef struct body begun */
3207 bool incomm, inquote, inchar, quotednl, midtoken;
3208 bool yacc_rules; /* in the rules part of a yacc file */
3209 struct tok savetoken; /* token saved during preprocessor handling */
3212 linebuffer_init (&lbs[0].lb);
3213 linebuffer_init (&lbs[1].lb);
3214 if (cstack.size == 0)
3216 cstack.size = (DEBUG) ? 1 : 4;
3217 cstack.nl = 0;
3218 cstack.cname = xnew (cstack.size, char *);
3219 cstack.bracelev = xnew (cstack.size, int);
3222 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3223 curndx = newndx = 0;
3224 lp = curlb.buffer;
3225 *lp = 0;
3227 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3228 structdef = snone; definedef = dnone; objdef = onone;
3229 yacc_rules = FALSE;
3230 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3231 token.valid = savetoken.valid = FALSE;
3232 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3233 if (cjava)
3234 { qualifier = "."; qlen = 1; }
3235 else
3236 { qualifier = "::"; qlen = 2; }
3239 while (!feof (inf))
3241 c = *lp++;
3242 if (c == '\\')
3244 /* If we are at the end of the line, the next character is a
3245 '\0'; do not skip it, because it is what tells us
3246 to read the next line. */
3247 if (*lp == '\0')
3249 quotednl = TRUE;
3250 continue;
3252 lp++;
3253 c = ' ';
3255 else if (incomm)
3257 switch (c)
3259 case '*':
3260 if (*lp == '/')
3262 c = *lp++;
3263 incomm = FALSE;
3265 break;
3266 case '\0':
3267 /* Newlines inside comments do not end macro definitions in
3268 traditional cpp. */
3269 CNL_SAVE_DEFINEDEF ();
3270 break;
3272 continue;
3274 else if (inquote)
3276 switch (c)
3278 case '"':
3279 inquote = FALSE;
3280 break;
3281 case '\0':
3282 /* Newlines inside strings do not end macro definitions
3283 in traditional cpp, even though compilers don't
3284 usually accept them. */
3285 CNL_SAVE_DEFINEDEF ();
3286 break;
3288 continue;
3290 else if (inchar)
3292 switch (c)
3294 case '\0':
3295 /* Hmmm, something went wrong. */
3296 CNL ();
3297 /* FALLTHRU */
3298 case '\'':
3299 inchar = FALSE;
3300 break;
3302 continue;
3304 else if (bracketlev > 0)
3306 switch (c)
3308 case ']':
3309 if (--bracketlev > 0)
3310 continue;
3311 break;
3312 case '\0':
3313 CNL_SAVE_DEFINEDEF ();
3314 break;
3316 continue;
3318 else switch (c)
3320 case '"':
3321 inquote = TRUE;
3322 if (inattribute)
3323 break;
3324 switch (fvdef)
3326 case fdefunkey:
3327 case fstartlist:
3328 case finlist:
3329 case fignore:
3330 case vignore:
3331 break;
3332 default:
3333 fvextern = FALSE;
3334 fvdef = fvnone;
3336 continue;
3337 case '\'':
3338 inchar = TRUE;
3339 if (inattribute)
3340 break;
3341 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3343 fvextern = FALSE;
3344 fvdef = fvnone;
3346 continue;
3347 case '/':
3348 if (*lp == '*')
3350 lp++;
3351 incomm = TRUE;
3352 continue;
3354 else if (/* cplpl && */ *lp == '/')
3356 c = '\0';
3357 break;
3359 else
3360 break;
3361 case '%':
3362 if ((c_ext & YACC) && *lp == '%')
3364 /* Entering or exiting rules section in yacc file. */
3365 lp++;
3366 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3367 typdef = tnone; structdef = snone;
3368 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3369 bracelev = 0;
3370 yacc_rules = !yacc_rules;
3371 continue;
3373 else
3374 break;
3375 case '#':
3376 if (definedef == dnone)
3378 char *cp;
3379 bool cpptoken = TRUE;
3381 /* Look back on this line. If all blanks, or nonblanks
3382 followed by an end of comment, this is a preprocessor
3383 token. */
3384 for (cp = newlb.buffer; cp < lp-1; cp++)
3385 if (!iswhite (*cp))
3387 if (*cp == '*' && *(cp+1) == '/')
3389 cp++;
3390 cpptoken = TRUE;
3392 else
3393 cpptoken = FALSE;
3395 if (cpptoken)
3396 definedef = dsharpseen;
3397 } /* if (definedef == dnone) */
3398 continue;
3399 case '[':
3400 bracketlev++;
3401 continue;
3402 } /* switch (c) */
3405 /* Consider token only if some involved conditions are satisfied. */
3406 if (typdef != tignore
3407 && definedef != dignorerest
3408 && fvdef != finlist
3409 && templatelev == 0
3410 && (definedef != dnone
3411 || structdef != scolonseen)
3412 && !inattribute)
3414 if (midtoken)
3416 if (endtoken (c))
3418 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3419 /* This handles :: in the middle,
3420 but not at the beginning of an identifier.
3421 Also, space-separated :: is not recognised. */
3423 if (c_ext & C_AUTO) /* automatic detection of C++ */
3424 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3425 lp += 2;
3426 toklen += 2;
3427 c = lp[-1];
3428 goto still_in_token;
3430 else
3432 bool funorvar = FALSE;
3434 if (yacc_rules
3435 || consider_token (newlb.buffer + tokoff, toklen, c,
3436 &c_ext, bracelev, parlev,
3437 &funorvar))
3439 if (fvdef == foperator)
3441 char *oldlp = lp;
3442 lp = skip_spaces (lp-1);
3443 if (*lp != '\0')
3444 lp += 1;
3445 while (*lp != '\0'
3446 && !iswhite (*lp) && *lp != '(')
3447 lp += 1;
3448 c = *lp++;
3449 toklen += lp - oldlp;
3451 token.named = FALSE;
3452 if (!plainc
3453 && nestlev > 0 && definedef == dnone)
3454 /* in struct body */
3456 write_classname (&token_name, qualifier);
3457 linebuffer_setlen (&token_name,
3458 token_name.len+qlen+toklen);
3459 strcat (token_name.buffer, qualifier);
3460 strncat (token_name.buffer,
3461 newlb.buffer + tokoff, toklen);
3462 token.named = TRUE;
3464 else if (objdef == ocatseen)
3465 /* Objective C category */
3467 int len = strlen (objtag) + 2 + toklen;
3468 linebuffer_setlen (&token_name, len);
3469 strcpy (token_name.buffer, objtag);
3470 strcat (token_name.buffer, "(");
3471 strncat (token_name.buffer,
3472 newlb.buffer + tokoff, toklen);
3473 strcat (token_name.buffer, ")");
3474 token.named = TRUE;
3476 else if (objdef == omethodtag
3477 || objdef == omethodparm)
3478 /* Objective C method */
3480 token.named = TRUE;
3482 else if (fvdef == fdefunname)
3483 /* GNU DEFUN and similar macros */
3485 bool defun = (newlb.buffer[tokoff] == 'F');
3486 int off = tokoff;
3487 int len = toklen;
3489 /* Rewrite the tag so that emacs lisp DEFUNs
3490 can be found by their elisp name */
3491 if (defun)
3493 off += 1;
3494 len -= 1;
3496 len = toklen;
3497 linebuffer_setlen (&token_name, len);
3498 strncpy (token_name.buffer,
3499 newlb.buffer + off, len);
3500 token_name.buffer[len] = '\0';
3501 if (defun)
3502 while (--len >= 0)
3503 if (token_name.buffer[len] == '_')
3504 token_name.buffer[len] = '-';
3505 token.named = defun;
3507 else
3509 linebuffer_setlen (&token_name, toklen);
3510 strncpy (token_name.buffer,
3511 newlb.buffer + tokoff, toklen);
3512 token_name.buffer[toklen] = '\0';
3513 /* Name macros and members. */
3514 token.named = (structdef == stagseen
3515 || typdef == ttypeseen
3516 || typdef == tend
3517 || (funorvar
3518 && definedef == dignorerest)
3519 || (funorvar
3520 && definedef == dnone
3521 && structdef == snone
3522 && bracelev > 0));
3524 token.lineno = lineno;
3525 token.offset = tokoff;
3526 token.length = toklen;
3527 token.line = newlb.buffer;
3528 token.linepos = newlinepos;
3529 token.valid = TRUE;
3531 if (definedef == dnone
3532 && (fvdef == fvnameseen
3533 || fvdef == foperator
3534 || structdef == stagseen
3535 || typdef == tend
3536 || typdef == ttypeseen
3537 || objdef != onone))
3539 if (current_lb_is_new)
3540 switch_line_buffers ();
3542 else if (definedef != dnone
3543 || fvdef == fdefunname
3544 || instruct)
3545 make_C_tag (funorvar);
3547 else /* not yacc and consider_token failed */
3549 if (inattribute && fvdef == fignore)
3551 /* We have just met __attribute__ after a
3552 function parameter list: do not tag the
3553 function again. */
3554 fvdef = fvnone;
3557 midtoken = FALSE;
3559 } /* if (endtoken (c)) */
3560 else if (intoken (c))
3561 still_in_token:
3563 toklen++;
3564 continue;
3566 } /* if (midtoken) */
3567 else if (begtoken (c))
3569 switch (definedef)
3571 case dnone:
3572 switch (fvdef)
3574 case fstartlist:
3575 /* This prevents tagging fb in
3576 void (__attribute__((noreturn)) *fb) (void);
3577 Fixing this is not easy and not very important. */
3578 fvdef = finlist;
3579 continue;
3580 case flistseen:
3581 if (plainc || declarations)
3583 make_C_tag (TRUE); /* a function */
3584 fvdef = fignore;
3586 break;
3588 if (structdef == stagseen && !cjava)
3590 popclass_above (bracelev);
3591 structdef = snone;
3593 break;
3594 case dsharpseen:
3595 savetoken = token;
3596 break;
3598 if (!yacc_rules || lp == newlb.buffer + 1)
3600 tokoff = lp - 1 - newlb.buffer;
3601 toklen = 1;
3602 midtoken = TRUE;
3604 continue;
3605 } /* if (begtoken) */
3606 } /* if must look at token */
3609 /* Detect end of line, colon, comma, semicolon and various braces
3610 after having handled a token.*/
3611 switch (c)
3613 case ':':
3614 if (inattribute)
3615 break;
3616 if (yacc_rules && token.offset == 0 && token.valid)
3618 make_C_tag (FALSE); /* a yacc function */
3619 break;
3621 if (definedef != dnone)
3622 break;
3623 switch (objdef)
3625 case otagseen:
3626 objdef = oignore;
3627 make_C_tag (TRUE); /* an Objective C class */
3628 break;
3629 case omethodtag:
3630 case omethodparm:
3631 objdef = omethodcolon;
3632 linebuffer_setlen (&token_name, token_name.len + 1);
3633 strcat (token_name.buffer, ":");
3634 break;
3636 if (structdef == stagseen)
3638 structdef = scolonseen;
3639 break;
3641 /* Should be useless, but may be work as a safety net. */
3642 if (cplpl && fvdef == flistseen)
3644 make_C_tag (TRUE); /* a function */
3645 fvdef = fignore;
3646 break;
3648 break;
3649 case ';':
3650 if (definedef != dnone || inattribute)
3651 break;
3652 switch (typdef)
3654 case tend:
3655 case ttypeseen:
3656 make_C_tag (FALSE); /* a typedef */
3657 typdef = tnone;
3658 fvdef = fvnone;
3659 break;
3660 case tnone:
3661 case tinbody:
3662 case tignore:
3663 switch (fvdef)
3665 case fignore:
3666 if (typdef == tignore || cplpl)
3667 fvdef = fvnone;
3668 break;
3669 case fvnameseen:
3670 if ((globals && bracelev == 0 && (!fvextern || declarations))
3671 || (members && instruct))
3672 make_C_tag (FALSE); /* a variable */
3673 fvextern = FALSE;
3674 fvdef = fvnone;
3675 token.valid = FALSE;
3676 break;
3677 case flistseen:
3678 if ((declarations
3679 && (cplpl || !instruct)
3680 && (typdef == tnone || (typdef != tignore && instruct)))
3681 || (members
3682 && plainc && instruct))
3683 make_C_tag (TRUE); /* a function */
3684 /* FALLTHRU */
3685 default:
3686 fvextern = FALSE;
3687 fvdef = fvnone;
3688 if (declarations
3689 && cplpl && structdef == stagseen)
3690 make_C_tag (FALSE); /* forward declaration */
3691 else
3692 token.valid = FALSE;
3693 } /* switch (fvdef) */
3694 /* FALLTHRU */
3695 default:
3696 if (!instruct)
3697 typdef = tnone;
3699 if (structdef == stagseen)
3700 structdef = snone;
3701 break;
3702 case ',':
3703 if (definedef != dnone || inattribute)
3704 break;
3705 switch (objdef)
3707 case omethodtag:
3708 case omethodparm:
3709 make_C_tag (TRUE); /* an Objective C method */
3710 objdef = oinbody;
3711 break;
3713 switch (fvdef)
3715 case fdefunkey:
3716 case foperator:
3717 case fstartlist:
3718 case finlist:
3719 case fignore:
3720 case vignore:
3721 break;
3722 case fdefunname:
3723 fvdef = fignore;
3724 break;
3725 case fvnameseen:
3726 if (parlev == 0
3727 && ((globals
3728 && bracelev == 0
3729 && templatelev == 0
3730 && (!fvextern || declarations))
3731 || (members && instruct)))
3732 make_C_tag (FALSE); /* a variable */
3733 break;
3734 case flistseen:
3735 if ((declarations && typdef == tnone && !instruct)
3736 || (members && typdef != tignore && instruct))
3738 make_C_tag (TRUE); /* a function */
3739 fvdef = fvnameseen;
3741 else if (!declarations)
3742 fvdef = fvnone;
3743 token.valid = FALSE;
3744 break;
3745 default:
3746 fvdef = fvnone;
3748 if (structdef == stagseen)
3749 structdef = snone;
3750 break;
3751 case ']':
3752 if (definedef != dnone || inattribute)
3753 break;
3754 if (structdef == stagseen)
3755 structdef = snone;
3756 switch (typdef)
3758 case ttypeseen:
3759 case tend:
3760 typdef = tignore;
3761 make_C_tag (FALSE); /* a typedef */
3762 break;
3763 case tnone:
3764 case tinbody:
3765 switch (fvdef)
3767 case foperator:
3768 case finlist:
3769 case fignore:
3770 case vignore:
3771 break;
3772 case fvnameseen:
3773 if ((members && bracelev == 1)
3774 || (globals && bracelev == 0
3775 && (!fvextern || declarations)))
3776 make_C_tag (FALSE); /* a variable */
3777 /* FALLTHRU */
3778 default:
3779 fvdef = fvnone;
3781 break;
3783 break;
3784 case '(':
3785 if (inattribute)
3787 attrparlev++;
3788 break;
3790 if (definedef != dnone)
3791 break;
3792 if (objdef == otagseen && parlev == 0)
3793 objdef = oparenseen;
3794 switch (fvdef)
3796 case fvnameseen:
3797 if (typdef == ttypeseen
3798 && *lp != '*'
3799 && !instruct)
3801 /* This handles constructs like:
3802 typedef void OperatorFun (int fun); */
3803 make_C_tag (FALSE);
3804 typdef = tignore;
3805 fvdef = fignore;
3806 break;
3808 /* FALLTHRU */
3809 case foperator:
3810 fvdef = fstartlist;
3811 break;
3812 case flistseen:
3813 fvdef = finlist;
3814 break;
3816 parlev++;
3817 break;
3818 case ')':
3819 if (inattribute)
3821 if (--attrparlev == 0)
3822 inattribute = FALSE;
3823 break;
3825 if (definedef != dnone)
3826 break;
3827 if (objdef == ocatseen && parlev == 1)
3829 make_C_tag (TRUE); /* an Objective C category */
3830 objdef = oignore;
3832 if (--parlev == 0)
3834 switch (fvdef)
3836 case fstartlist:
3837 case finlist:
3838 fvdef = flistseen;
3839 break;
3841 if (!instruct
3842 && (typdef == tend
3843 || typdef == ttypeseen))
3845 typdef = tignore;
3846 make_C_tag (FALSE); /* a typedef */
3849 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3850 parlev = 0;
3851 break;
3852 case '{':
3853 if (definedef != dnone)
3854 break;
3855 if (typdef == ttypeseen)
3857 /* Whenever typdef is set to tinbody (currently only
3858 here), typdefbracelev should be set to bracelev. */
3859 typdef = tinbody;
3860 typdefbracelev = bracelev;
3862 switch (fvdef)
3864 case flistseen:
3865 make_C_tag (TRUE); /* a function */
3866 /* FALLTHRU */
3867 case fignore:
3868 fvdef = fvnone;
3869 break;
3870 case fvnone:
3871 switch (objdef)
3873 case otagseen:
3874 make_C_tag (TRUE); /* an Objective C class */
3875 objdef = oignore;
3876 break;
3877 case omethodtag:
3878 case omethodparm:
3879 make_C_tag (TRUE); /* an Objective C method */
3880 objdef = oinbody;
3881 break;
3882 default:
3883 /* Neutralize `extern "C" {' grot. */
3884 if (bracelev == 0 && structdef == snone && nestlev == 0
3885 && typdef == tnone)
3886 bracelev = -1;
3888 break;
3890 switch (structdef)
3892 case skeyseen: /* unnamed struct */
3893 pushclass_above (bracelev, NULL, 0);
3894 structdef = snone;
3895 break;
3896 case stagseen: /* named struct or enum */
3897 case scolonseen: /* a class */
3898 pushclass_above (bracelev,token.line+token.offset, token.length);
3899 structdef = snone;
3900 make_C_tag (FALSE); /* a struct or enum */
3901 break;
3903 bracelev++;
3904 break;
3905 case '*':
3906 if (definedef != dnone)
3907 break;
3908 if (fvdef == fstartlist)
3910 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3911 token.valid = FALSE;
3913 break;
3914 case '}':
3915 if (definedef != dnone)
3916 break;
3917 if (!ignoreindent && lp == newlb.buffer + 1)
3919 if (bracelev != 0)
3920 token.valid = FALSE;
3921 bracelev = 0; /* reset brace level if first column */
3922 parlev = 0; /* also reset paren level, just in case... */
3924 else if (bracelev > 0)
3925 bracelev--;
3926 else
3927 token.valid = FALSE; /* something gone amiss, token unreliable */
3928 popclass_above (bracelev);
3929 structdef = snone;
3930 /* Only if typdef == tinbody is typdefbracelev significant. */
3931 if (typdef == tinbody && bracelev <= typdefbracelev)
3933 assert (bracelev == typdefbracelev);
3934 typdef = tend;
3936 break;
3937 case '=':
3938 if (definedef != dnone)
3939 break;
3940 switch (fvdef)
3942 case foperator:
3943 case finlist:
3944 case fignore:
3945 case vignore:
3946 break;
3947 case fvnameseen:
3948 if ((members && bracelev == 1)
3949 || (globals && bracelev == 0 && (!fvextern || declarations)))
3950 make_C_tag (FALSE); /* a variable */
3951 /* FALLTHRU */
3952 default:
3953 fvdef = vignore;
3955 break;
3956 case '<':
3957 if (cplpl
3958 && (structdef == stagseen || fvdef == fvnameseen))
3960 templatelev++;
3961 break;
3963 goto resetfvdef;
3964 case '>':
3965 if (templatelev > 0)
3967 templatelev--;
3968 break;
3970 goto resetfvdef;
3971 case '+':
3972 case '-':
3973 if (objdef == oinbody && bracelev == 0)
3975 objdef = omethodsign;
3976 break;
3978 /* FALLTHRU */
3979 resetfvdef:
3980 case '#': case '~': case '&': case '%': case '/':
3981 case '|': case '^': case '!': case '.': case '?':
3982 if (definedef != dnone)
3983 break;
3984 /* These surely cannot follow a function tag in C. */
3985 switch (fvdef)
3987 case foperator:
3988 case finlist:
3989 case fignore:
3990 case vignore:
3991 break;
3992 default:
3993 fvdef = fvnone;
3995 break;
3996 case '\0':
3997 if (objdef == otagseen)
3999 make_C_tag (TRUE); /* an Objective C class */
4000 objdef = oignore;
4002 /* If a macro spans multiple lines don't reset its state. */
4003 if (quotednl)
4004 CNL_SAVE_DEFINEDEF ();
4005 else
4006 CNL ();
4007 break;
4008 } /* switch (c) */
4010 } /* while not eof */
4012 free (lbs[0].lb.buffer);
4013 free (lbs[1].lb.buffer);
4017 * Process either a C++ file or a C file depending on the setting
4018 * of a global flag.
4020 static void
4021 default_C_entries (inf)
4022 FILE *inf;
4024 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4027 /* Always do plain C. */
4028 static void
4029 plain_C_entries (inf)
4030 FILE *inf;
4032 C_entries (0, inf);
4035 /* Always do C++. */
4036 static void
4037 Cplusplus_entries (inf)
4038 FILE *inf;
4040 C_entries (C_PLPL, inf);
4043 /* Always do Java. */
4044 static void
4045 Cjava_entries (inf)
4046 FILE *inf;
4048 C_entries (C_JAVA, inf);
4051 /* Always do C*. */
4052 static void
4053 Cstar_entries (inf)
4054 FILE *inf;
4056 C_entries (C_STAR, inf);
4059 /* Always do Yacc. */
4060 static void
4061 Yacc_entries (inf)
4062 FILE *inf;
4064 C_entries (YACC, inf);
4068 /* Useful macros. */
4069 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4070 for (; /* loop initialization */ \
4071 !feof (file_pointer) /* loop test */ \
4072 && /* instructions at start of loop */ \
4073 (readline (&line_buffer, file_pointer), \
4074 char_pointer = line_buffer.buffer, \
4075 TRUE); \
4077 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */ \
4078 (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
4079 && notinname ((cp)[sizeof(keyword)-1]) /* end of keyword */ \
4080 && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
4083 * Read a file, but do no processing. This is used to do regexp
4084 * matching on files that have no language defined.
4086 static void
4087 just_read_file (inf)
4088 FILE *inf;
4090 register char *dummy;
4092 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4093 continue;
4097 /* Fortran parsing */
4099 static void F_takeprec __P((void));
4100 static void F_getit __P((FILE *));
4102 static void
4103 F_takeprec ()
4105 dbp = skip_spaces (dbp);
4106 if (*dbp != '*')
4107 return;
4108 dbp++;
4109 dbp = skip_spaces (dbp);
4110 if (strneq (dbp, "(*)", 3))
4112 dbp += 3;
4113 return;
4115 if (!ISDIGIT (*dbp))
4117 --dbp; /* force failure */
4118 return;
4121 dbp++;
4122 while (ISDIGIT (*dbp));
4125 static void
4126 F_getit (inf)
4127 FILE *inf;
4129 register char *cp;
4131 dbp = skip_spaces (dbp);
4132 if (*dbp == '\0')
4134 readline (&lb, inf);
4135 dbp = lb.buffer;
4136 if (dbp[5] != '&')
4137 return;
4138 dbp += 6;
4139 dbp = skip_spaces (dbp);
4141 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4142 return;
4143 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4144 continue;
4145 make_tag (dbp, cp-dbp, TRUE,
4146 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4150 static void
4151 Fortran_functions (inf)
4152 FILE *inf;
4154 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4156 if (*dbp == '%')
4157 dbp++; /* Ratfor escape to fortran */
4158 dbp = skip_spaces (dbp);
4159 if (*dbp == '\0')
4160 continue;
4161 switch (lowcase (*dbp))
4163 case 'i':
4164 if (nocase_tail ("integer"))
4165 F_takeprec ();
4166 break;
4167 case 'r':
4168 if (nocase_tail ("real"))
4169 F_takeprec ();
4170 break;
4171 case 'l':
4172 if (nocase_tail ("logical"))
4173 F_takeprec ();
4174 break;
4175 case 'c':
4176 if (nocase_tail ("complex") || nocase_tail ("character"))
4177 F_takeprec ();
4178 break;
4179 case 'd':
4180 if (nocase_tail ("double"))
4182 dbp = skip_spaces (dbp);
4183 if (*dbp == '\0')
4184 continue;
4185 if (nocase_tail ("precision"))
4186 break;
4187 continue;
4189 break;
4191 dbp = skip_spaces (dbp);
4192 if (*dbp == '\0')
4193 continue;
4194 switch (lowcase (*dbp))
4196 case 'f':
4197 if (nocase_tail ("function"))
4198 F_getit (inf);
4199 continue;
4200 case 's':
4201 if (nocase_tail ("subroutine"))
4202 F_getit (inf);
4203 continue;
4204 case 'e':
4205 if (nocase_tail ("entry"))
4206 F_getit (inf);
4207 continue;
4208 case 'b':
4209 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4211 dbp = skip_spaces (dbp);
4212 if (*dbp == '\0') /* assume un-named */
4213 make_tag ("blockdata", 9, TRUE,
4214 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4215 else
4216 F_getit (inf); /* look for name */
4218 continue;
4225 * Ada parsing
4226 * Original code by
4227 * Philippe Waroquiers (1998)
4230 static void Ada_getit __P((FILE *, char *));
4232 /* Once we are positioned after an "interesting" keyword, let's get
4233 the real tag value necessary. */
4234 static void
4235 Ada_getit (inf, name_qualifier)
4236 FILE *inf;
4237 char *name_qualifier;
4239 register char *cp;
4240 char *name;
4241 char c;
4243 while (!feof (inf))
4245 dbp = skip_spaces (dbp);
4246 if (*dbp == '\0'
4247 || (dbp[0] == '-' && dbp[1] == '-'))
4249 readline (&lb, inf);
4250 dbp = lb.buffer;
4252 switch (lowcase(*dbp))
4254 case 'b':
4255 if (nocase_tail ("body"))
4257 /* Skipping body of procedure body or package body or ....
4258 resetting qualifier to body instead of spec. */
4259 name_qualifier = "/b";
4260 continue;
4262 break;
4263 case 't':
4264 /* Skipping type of task type or protected type ... */
4265 if (nocase_tail ("type"))
4266 continue;
4267 break;
4269 if (*dbp == '"')
4271 dbp += 1;
4272 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4273 continue;
4275 else
4277 dbp = skip_spaces (dbp);
4278 for (cp = dbp;
4279 (*cp != '\0'
4280 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4281 cp++)
4282 continue;
4283 if (cp == dbp)
4284 return;
4286 c = *cp;
4287 *cp = '\0';
4288 name = concat (dbp, name_qualifier, "");
4289 *cp = c;
4290 make_tag (name, strlen (name), TRUE,
4291 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4292 free (name);
4293 if (c == '"')
4294 dbp = cp + 1;
4295 return;
4299 static void
4300 Ada_funcs (inf)
4301 FILE *inf;
4303 bool inquote = FALSE;
4304 bool skip_till_semicolumn = FALSE;
4306 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4308 while (*dbp != '\0')
4310 /* Skip a string i.e. "abcd". */
4311 if (inquote || (*dbp == '"'))
4313 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4314 if (dbp != NULL)
4316 inquote = FALSE;
4317 dbp += 1;
4318 continue; /* advance char */
4320 else
4322 inquote = TRUE;
4323 break; /* advance line */
4327 /* Skip comments. */
4328 if (dbp[0] == '-' && dbp[1] == '-')
4329 break; /* advance line */
4331 /* Skip character enclosed in single quote i.e. 'a'
4332 and skip single quote starting an attribute i.e. 'Image. */
4333 if (*dbp == '\'')
4335 dbp++ ;
4336 if (*dbp != '\0')
4337 dbp++;
4338 continue;
4341 if (skip_till_semicolumn)
4343 if (*dbp == ';')
4344 skip_till_semicolumn = FALSE;
4345 dbp++;
4346 continue; /* advance char */
4349 /* Search for beginning of a token. */
4350 if (!begtoken (*dbp))
4352 dbp++;
4353 continue; /* advance char */
4356 /* We are at the beginning of a token. */
4357 switch (lowcase(*dbp))
4359 case 'f':
4360 if (!packages_only && nocase_tail ("function"))
4361 Ada_getit (inf, "/f");
4362 else
4363 break; /* from switch */
4364 continue; /* advance char */
4365 case 'p':
4366 if (!packages_only && nocase_tail ("procedure"))
4367 Ada_getit (inf, "/p");
4368 else if (nocase_tail ("package"))
4369 Ada_getit (inf, "/s");
4370 else if (nocase_tail ("protected")) /* protected type */
4371 Ada_getit (inf, "/t");
4372 else
4373 break; /* from switch */
4374 continue; /* advance char */
4376 case 'u':
4377 if (typedefs && !packages_only && nocase_tail ("use"))
4379 /* when tagging types, avoid tagging use type Pack.Typename;
4380 for this, we will skip everything till a ; */
4381 skip_till_semicolumn = TRUE;
4382 continue; /* advance char */
4385 case 't':
4386 if (!packages_only && nocase_tail ("task"))
4387 Ada_getit (inf, "/k");
4388 else if (typedefs && !packages_only && nocase_tail ("type"))
4390 Ada_getit (inf, "/t");
4391 while (*dbp != '\0')
4392 dbp += 1;
4394 else
4395 break; /* from switch */
4396 continue; /* advance char */
4399 /* Look for the end of the token. */
4400 while (!endtoken (*dbp))
4401 dbp++;
4403 } /* advance char */
4404 } /* advance line */
4409 * Unix and microcontroller assembly tag handling
4410 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4411 * Idea by Bob Weiner, Motorola Inc. (1994)
4413 static void
4414 Asm_labels (inf)
4415 FILE *inf;
4417 register char *cp;
4419 LOOP_ON_INPUT_LINES (inf, lb, cp)
4421 /* If first char is alphabetic or one of [_.$], test for colon
4422 following identifier. */
4423 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4425 /* Read past label. */
4426 cp++;
4427 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4428 cp++;
4429 if (*cp == ':' || iswhite (*cp))
4430 /* Found end of label, so copy it and add it to the table. */
4431 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4432 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4439 * Perl support
4440 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4441 * Perl variable names: /^(my|local).../
4442 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4443 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4444 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4446 static void
4447 Perl_functions (inf)
4448 FILE *inf;
4450 char *package = savestr ("main"); /* current package name */
4451 register char *cp;
4453 LOOP_ON_INPUT_LINES (inf, lb, cp)
4455 skip_spaces(cp);
4457 if (LOOKING_AT (cp, "package"))
4459 free (package);
4460 get_tag (cp, &package);
4462 else if (LOOKING_AT (cp, "sub"))
4464 char *pos;
4465 char *sp = cp;
4467 while (!notinname (*cp))
4468 cp++;
4469 if (cp == sp)
4470 continue; /* nothing found */
4471 if ((pos = etags_strchr (sp, ':')) != NULL
4472 && pos < cp && pos[1] == ':')
4473 /* The name is already qualified. */
4474 make_tag (sp, cp - sp, TRUE,
4475 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4476 else
4477 /* Qualify it. */
4479 char savechar, *name;
4481 savechar = *cp;
4482 *cp = '\0';
4483 name = concat (package, "::", sp);
4484 *cp = savechar;
4485 make_tag (name, strlen(name), TRUE,
4486 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4487 free (name);
4490 else if (globals) /* only if we are tagging global vars */
4492 /* Skip a qualifier, if any. */
4493 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4494 /* After "my" or "local", but before any following paren or space. */
4495 char *varstart = cp;
4497 if (qual /* should this be removed? If yes, how? */
4498 && (*cp == '$' || *cp == '@' || *cp == '%'))
4500 varstart += 1;
4502 cp++;
4503 while (ISALNUM (*cp) || *cp == '_');
4505 else if (qual)
4507 /* Should be examining a variable list at this point;
4508 could insist on seeing an open parenthesis. */
4509 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4510 cp++;
4512 else
4513 continue;
4515 make_tag (varstart, cp - varstart, FALSE,
4516 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4523 * Python support
4524 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4525 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4526 * More ideas by seb bacon <seb@jamkit.com> (2002)
4528 static void
4529 Python_functions (inf)
4530 FILE *inf;
4532 register char *cp;
4534 LOOP_ON_INPUT_LINES (inf, lb, cp)
4536 cp = skip_spaces (cp);
4537 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4539 char *name = cp;
4540 while (!notinname (*cp) && *cp != ':')
4541 cp++;
4542 make_tag (name, cp - name, TRUE,
4543 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4550 * PHP support
4551 * Look for:
4552 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4553 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4554 * - /^[ \t]*define\(\"[^\"]+/
4555 * Only with --members:
4556 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4557 * Idea by Diez B. Roggisch (2001)
4559 static void
4560 PHP_functions (inf)
4561 FILE *inf;
4563 register char *cp, *name;
4564 bool search_identifier = FALSE;
4566 LOOP_ON_INPUT_LINES (inf, lb, cp)
4568 cp = skip_spaces (cp);
4569 name = cp;
4570 if (search_identifier
4571 && *cp != '\0')
4573 while (!notinname (*cp))
4574 cp++;
4575 make_tag (name, cp - name, TRUE,
4576 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4577 search_identifier = FALSE;
4579 else if (LOOKING_AT (cp, "function"))
4581 if(*cp == '&')
4582 cp = skip_spaces (cp+1);
4583 if(*cp != '\0')
4585 name = cp;
4586 while (!notinname (*cp))
4587 cp++;
4588 make_tag (name, cp - name, TRUE,
4589 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4591 else
4592 search_identifier = TRUE;
4594 else if (LOOKING_AT (cp, "class"))
4596 if (*cp != '\0')
4598 name = cp;
4599 while (*cp != '\0' && !iswhite (*cp))
4600 cp++;
4601 make_tag (name, cp - name, FALSE,
4602 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4604 else
4605 search_identifier = TRUE;
4607 else if (strneq (cp, "define", 6)
4608 && (cp = skip_spaces (cp+6))
4609 && *cp++ == '('
4610 && (*cp == '"' || *cp == '\''))
4612 char quote = *cp++;
4613 name = cp;
4614 while (*cp != quote && *cp != '\0')
4615 cp++;
4616 make_tag (name, cp - name, FALSE,
4617 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4619 else if (members
4620 && LOOKING_AT (cp, "var")
4621 && *cp == '$')
4623 name = cp;
4624 while (!notinname(*cp))
4625 cp++;
4626 make_tag (name, cp - name, FALSE,
4627 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4634 * Cobol tag functions
4635 * We could look for anything that could be a paragraph name.
4636 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4637 * Idea by Corny de Souza (1993)
4639 static void
4640 Cobol_paragraphs (inf)
4641 FILE *inf;
4643 register char *bp, *ep;
4645 LOOP_ON_INPUT_LINES (inf, lb, bp)
4647 if (lb.len < 9)
4648 continue;
4649 bp += 8;
4651 /* If eoln, compiler option or comment ignore whole line. */
4652 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4653 continue;
4655 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4656 continue;
4657 if (*ep++ == '.')
4658 make_tag (bp, ep - bp, TRUE,
4659 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4665 * Makefile support
4666 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4668 static void
4669 Makefile_targets (inf)
4670 FILE *inf;
4672 register char *bp;
4674 LOOP_ON_INPUT_LINES (inf, lb, bp)
4676 if (*bp == '\t' || *bp == '#')
4677 continue;
4678 while (*bp != '\0' && *bp != '=' && *bp != ':')
4679 bp++;
4680 if (*bp == ':' || (globals && *bp == '='))
4681 make_tag (lb.buffer, bp - lb.buffer, TRUE,
4682 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4688 * Pascal parsing
4689 * Original code by Mosur K. Mohan (1989)
4691 * Locates tags for procedures & functions. Doesn't do any type- or
4692 * var-definitions. It does look for the keyword "extern" or
4693 * "forward" immediately following the procedure statement; if found,
4694 * the tag is skipped.
4696 static void
4697 Pascal_functions (inf)
4698 FILE *inf;
4700 linebuffer tline; /* mostly copied from C_entries */
4701 long save_lcno;
4702 int save_lineno, namelen, taglen;
4703 char c, *name;
4705 bool /* each of these flags is TRUE iff: */
4706 incomment, /* point is inside a comment */
4707 inquote, /* point is inside '..' string */
4708 get_tagname, /* point is after PROCEDURE/FUNCTION
4709 keyword, so next item = potential tag */
4710 found_tag, /* point is after a potential tag */
4711 inparms, /* point is within parameter-list */
4712 verify_tag; /* point has passed the parm-list, so the
4713 next token will determine whether this
4714 is a FORWARD/EXTERN to be ignored, or
4715 whether it is a real tag */
4717 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4718 name = NULL; /* keep compiler quiet */
4719 dbp = lb.buffer;
4720 *dbp = '\0';
4721 linebuffer_init (&tline);
4723 incomment = inquote = FALSE;
4724 found_tag = FALSE; /* have a proc name; check if extern */
4725 get_tagname = FALSE; /* found "procedure" keyword */
4726 inparms = FALSE; /* found '(' after "proc" */
4727 verify_tag = FALSE; /* check if "extern" is ahead */
4730 while (!feof (inf)) /* long main loop to get next char */
4732 c = *dbp++;
4733 if (c == '\0') /* if end of line */
4735 readline (&lb, inf);
4736 dbp = lb.buffer;
4737 if (*dbp == '\0')
4738 continue;
4739 if (!((found_tag && verify_tag)
4740 || get_tagname))
4741 c = *dbp++; /* only if don't need *dbp pointing
4742 to the beginning of the name of
4743 the procedure or function */
4745 if (incomment)
4747 if (c == '}') /* within { } comments */
4748 incomment = FALSE;
4749 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4751 dbp++;
4752 incomment = FALSE;
4754 continue;
4756 else if (inquote)
4758 if (c == '\'')
4759 inquote = FALSE;
4760 continue;
4762 else
4763 switch (c)
4765 case '\'':
4766 inquote = TRUE; /* found first quote */
4767 continue;
4768 case '{': /* found open { comment */
4769 incomment = TRUE;
4770 continue;
4771 case '(':
4772 if (*dbp == '*') /* found open (* comment */
4774 incomment = TRUE;
4775 dbp++;
4777 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4778 inparms = TRUE;
4779 continue;
4780 case ')': /* end of parms list */
4781 if (inparms)
4782 inparms = FALSE;
4783 continue;
4784 case ';':
4785 if (found_tag && !inparms) /* end of proc or fn stmt */
4787 verify_tag = TRUE;
4788 break;
4790 continue;
4792 if (found_tag && verify_tag && (*dbp != ' '))
4794 /* Check if this is an "extern" declaration. */
4795 if (*dbp == '\0')
4796 continue;
4797 if (lowcase (*dbp == 'e'))
4799 if (nocase_tail ("extern")) /* superfluous, really! */
4801 found_tag = FALSE;
4802 verify_tag = FALSE;
4805 else if (lowcase (*dbp) == 'f')
4807 if (nocase_tail ("forward")) /* check for forward reference */
4809 found_tag = FALSE;
4810 verify_tag = FALSE;
4813 if (found_tag && verify_tag) /* not external proc, so make tag */
4815 found_tag = FALSE;
4816 verify_tag = FALSE;
4817 make_tag (name, namelen, TRUE,
4818 tline.buffer, taglen, save_lineno, save_lcno);
4819 continue;
4822 if (get_tagname) /* grab name of proc or fn */
4824 char *cp;
4826 if (*dbp == '\0')
4827 continue;
4829 /* Find block name. */
4830 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4831 continue;
4833 /* Save all values for later tagging. */
4834 linebuffer_setlen (&tline, lb.len);
4835 strcpy (tline.buffer, lb.buffer);
4836 save_lineno = lineno;
4837 save_lcno = linecharno;
4838 name = tline.buffer + (dbp - lb.buffer);
4839 namelen = cp - dbp;
4840 taglen = cp - lb.buffer + 1;
4842 dbp = cp; /* set dbp to e-o-token */
4843 get_tagname = FALSE;
4844 found_tag = TRUE;
4845 continue;
4847 /* And proceed to check for "extern". */
4849 else if (!incomment && !inquote && !found_tag)
4851 /* Check for proc/fn keywords. */
4852 switch (lowcase (c))
4854 case 'p':
4855 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4856 get_tagname = TRUE;
4857 continue;
4858 case 'f':
4859 if (nocase_tail ("unction"))
4860 get_tagname = TRUE;
4861 continue;
4864 } /* while not eof */
4866 free (tline.buffer);
4871 * Lisp tag functions
4872 * look for (def or (DEF, quote or QUOTE
4875 static void L_getit __P((void));
4877 static void
4878 L_getit ()
4880 if (*dbp == '\'') /* Skip prefix quote */
4881 dbp++;
4882 else if (*dbp == '(')
4884 dbp++;
4885 /* Try to skip "(quote " */
4886 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4887 /* Ok, then skip "(" before name in (defstruct (foo)) */
4888 dbp = skip_spaces (dbp);
4890 get_tag (dbp, NULL);
4893 static void
4894 Lisp_functions (inf)
4895 FILE *inf;
4897 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4899 if (dbp[0] != '(')
4900 continue;
4902 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4904 dbp = skip_non_spaces (dbp);
4905 dbp = skip_spaces (dbp);
4906 L_getit ();
4908 else
4910 /* Check for (foo::defmumble name-defined ... */
4912 dbp++;
4913 while (!notinname (*dbp) && *dbp != ':');
4914 if (*dbp == ':')
4917 dbp++;
4918 while (*dbp == ':');
4920 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4922 dbp = skip_non_spaces (dbp);
4923 dbp = skip_spaces (dbp);
4924 L_getit ();
4933 * Lua script language parsing
4934 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4936 * "function" and "local function" are tags if they start at column 1.
4938 static void
4939 Lua_functions (inf)
4940 FILE *inf;
4942 register char *bp;
4944 LOOP_ON_INPUT_LINES (inf, lb, bp)
4946 if (bp[0] != 'f' && bp[0] != 'l')
4947 continue;
4949 LOOKING_AT (bp, "local"); /* skip possible "local" */
4951 if (LOOKING_AT (bp, "function"))
4952 get_tag (bp, NULL);
4958 * Postscript tag functions
4959 * Just look for lines where the first character is '/'
4960 * Also look at "defineps" for PSWrap
4961 * Ideas by:
4962 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4963 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4965 static void
4966 PS_functions (inf)
4967 FILE *inf;
4969 register char *bp, *ep;
4971 LOOP_ON_INPUT_LINES (inf, lb, bp)
4973 if (bp[0] == '/')
4975 for (ep = bp+1;
4976 *ep != '\0' && *ep != ' ' && *ep != '{';
4977 ep++)
4978 continue;
4979 make_tag (bp, ep - bp, TRUE,
4980 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4982 else if (LOOKING_AT (bp, "defineps"))
4983 get_tag (bp, NULL);
4989 * Scheme tag functions
4990 * look for (def... xyzzy
4991 * (def... (xyzzy
4992 * (def ... ((...(xyzzy ....
4993 * (set! xyzzy
4994 * Original code by Ken Haase (1985?)
4997 static void
4998 Scheme_functions (inf)
4999 FILE *inf;
5001 register char *bp;
5003 LOOP_ON_INPUT_LINES (inf, lb, bp)
5005 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5007 bp = skip_non_spaces (bp+4);
5008 /* Skip over open parens and white space */
5009 while (notinname (*bp))
5010 bp++;
5011 get_tag (bp, NULL);
5013 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5014 get_tag (bp, NULL);
5019 /* Find tags in TeX and LaTeX input files. */
5021 /* TEX_toktab is a table of TeX control sequences that define tags.
5022 * Each entry records one such control sequence.
5024 * Original code from who knows whom.
5025 * Ideas by:
5026 * Stefan Monnier (2002)
5029 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5031 /* Default set of control sequences to put into TEX_toktab.
5032 The value of environment var TEXTAGS is prepended to this. */
5033 static char *TEX_defenv = "\
5034 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5035 :part:appendix:entry:index:def\
5036 :newcommand:renewcommand:newenvironment:renewenvironment";
5038 static void TEX_mode __P((FILE *));
5039 static void TEX_decode_env __P((char *, char *));
5041 static char TEX_esc = '\\';
5042 static char TEX_opgrp = '{';
5043 static char TEX_clgrp = '}';
5046 * TeX/LaTeX scanning loop.
5048 static void
5049 TeX_commands (inf)
5050 FILE *inf;
5052 char *cp;
5053 linebuffer *key;
5055 /* Select either \ or ! as escape character. */
5056 TEX_mode (inf);
5058 /* Initialize token table once from environment. */
5059 if (TEX_toktab == NULL)
5060 TEX_decode_env ("TEXTAGS", TEX_defenv);
5062 LOOP_ON_INPUT_LINES (inf, lb, cp)
5064 /* Look at each TEX keyword in line. */
5065 for (;;)
5067 /* Look for a TEX escape. */
5068 while (*cp++ != TEX_esc)
5069 if (cp[-1] == '\0' || cp[-1] == '%')
5070 goto tex_next_line;
5072 for (key = TEX_toktab; key->buffer != NULL; key++)
5073 if (strneq (cp, key->buffer, key->len))
5075 register char *p;
5076 int namelen, linelen;
5077 bool opgrp = FALSE;
5079 cp = skip_spaces (cp + key->len);
5080 if (*cp == TEX_opgrp)
5082 opgrp = TRUE;
5083 cp++;
5085 for (p = cp;
5086 (!iswhite (*p) && *p != '#' &&
5087 *p != TEX_opgrp && *p != TEX_clgrp);
5088 p++)
5089 continue;
5090 namelen = p - cp;
5091 linelen = lb.len;
5092 if (!opgrp || *p == TEX_clgrp)
5094 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5095 *p++;
5096 linelen = p - lb.buffer + 1;
5098 make_tag (cp, namelen, TRUE,
5099 lb.buffer, linelen, lineno, linecharno);
5100 goto tex_next_line; /* We only tag a line once */
5103 tex_next_line:
5108 #define TEX_LESC '\\'
5109 #define TEX_SESC '!'
5111 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5112 chars accordingly. */
5113 static void
5114 TEX_mode (inf)
5115 FILE *inf;
5117 int c;
5119 while ((c = getc (inf)) != EOF)
5121 /* Skip to next line if we hit the TeX comment char. */
5122 if (c == '%')
5123 while (c != '\n')
5124 c = getc (inf);
5125 else if (c == TEX_LESC || c == TEX_SESC )
5126 break;
5129 if (c == TEX_LESC)
5131 TEX_esc = TEX_LESC;
5132 TEX_opgrp = '{';
5133 TEX_clgrp = '}';
5135 else
5137 TEX_esc = TEX_SESC;
5138 TEX_opgrp = '<';
5139 TEX_clgrp = '>';
5141 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5142 No attempt is made to correct the situation. */
5143 rewind (inf);
5146 /* Read environment and prepend it to the default string.
5147 Build token table. */
5148 static void
5149 TEX_decode_env (evarname, defenv)
5150 char *evarname;
5151 char *defenv;
5153 register char *env, *p;
5154 int i, len;
5156 /* Append default string to environment. */
5157 env = getenv (evarname);
5158 if (!env)
5159 env = defenv;
5160 else
5162 char *oldenv = env;
5163 env = concat (oldenv, defenv, "");
5166 /* Allocate a token table */
5167 for (len = 1, p = env; p;)
5168 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5169 len++;
5170 TEX_toktab = xnew (len, linebuffer);
5172 /* Unpack environment string into token table. Be careful about */
5173 /* zero-length strings (leading ':', "::" and trailing ':') */
5174 for (i = 0; *env != '\0';)
5176 p = etags_strchr (env, ':');
5177 if (!p) /* End of environment string. */
5178 p = env + strlen (env);
5179 if (p - env > 0)
5180 { /* Only non-zero strings. */
5181 TEX_toktab[i].buffer = savenstr (env, p - env);
5182 TEX_toktab[i].len = p - env;
5183 i++;
5185 if (*p)
5186 env = p + 1;
5187 else
5189 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5190 TEX_toktab[i].len = 0;
5191 break;
5197 /* Texinfo support. Dave Love, Mar. 2000. */
5198 static void
5199 Texinfo_nodes (inf)
5200 FILE * inf;
5202 char *cp, *start;
5203 LOOP_ON_INPUT_LINES (inf, lb, cp)
5204 if (LOOKING_AT (cp, "@node"))
5206 start = cp;
5207 while (*cp != '\0' && *cp != ',')
5208 cp++;
5209 make_tag (start, cp - start, TRUE,
5210 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5215 /* Similar to LOOKING_AT but does not use notinname, does not skip */
5216 #define LOOKING_AT_NOCASE(cp, kw) /* kw is a constant string */ \
5217 (strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
5218 && ((cp) += sizeof(kw)-1)) /* skip spaces */
5221 * HTML support.
5222 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5223 * Contents of <a name=xxx> are tags with name xxx.
5225 * Francesco Potortì, 2002.
5227 static void
5228 HTML_labels (inf)
5229 FILE * inf;
5231 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5232 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5233 bool intag = FALSE; /* inside an html tag, looking for ID= */
5234 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5235 char *end;
5238 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5240 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5241 for (;;) /* loop on the same line */
5243 if (skiptag) /* skip HTML tag */
5245 while (*dbp != '\0' && *dbp != '>')
5246 dbp++;
5247 if (*dbp == '>')
5249 dbp += 1;
5250 skiptag = FALSE;
5251 continue; /* look on the same line */
5253 break; /* go to next line */
5256 else if (intag) /* look for "name=" or "id=" */
5258 while (*dbp != '\0' && *dbp != '>'
5259 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5260 dbp++;
5261 if (*dbp == '\0')
5262 break; /* go to next line */
5263 if (*dbp == '>')
5265 dbp += 1;
5266 intag = FALSE;
5267 continue; /* look on the same line */
5269 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5270 || LOOKING_AT_NOCASE (dbp, "id="))
5272 bool quoted = (dbp[0] == '"');
5274 if (quoted)
5275 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5276 continue;
5277 else
5278 for (end = dbp; *end != '\0' && intoken (*end); end++)
5279 continue;
5280 linebuffer_setlen (&token_name, end - dbp);
5281 strncpy (token_name.buffer, dbp, end - dbp);
5282 token_name.buffer[end - dbp] = '\0';
5284 dbp = end;
5285 intag = FALSE; /* we found what we looked for */
5286 skiptag = TRUE; /* skip to the end of the tag */
5287 getnext = TRUE; /* then grab the text */
5288 continue; /* look on the same line */
5290 dbp += 1;
5293 else if (getnext) /* grab next tokens and tag them */
5295 dbp = skip_spaces (dbp);
5296 if (*dbp == '\0')
5297 break; /* go to next line */
5298 if (*dbp == '<')
5300 intag = TRUE;
5301 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5302 continue; /* look on the same line */
5305 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5306 continue;
5307 make_tag (token_name.buffer, token_name.len, TRUE,
5308 dbp, end - dbp, lineno, linecharno);
5309 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5310 getnext = FALSE;
5311 break; /* go to next line */
5314 else /* look for an interesting HTML tag */
5316 while (*dbp != '\0' && *dbp != '<')
5317 dbp++;
5318 if (*dbp == '\0')
5319 break; /* go to next line */
5320 intag = TRUE;
5321 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5323 inanchor = TRUE;
5324 continue; /* look on the same line */
5326 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5327 || LOOKING_AT_NOCASE (dbp, "<h1>")
5328 || LOOKING_AT_NOCASE (dbp, "<h2>")
5329 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5331 intag = FALSE;
5332 getnext = TRUE;
5333 continue; /* look on the same line */
5335 dbp += 1;
5342 * Prolog support
5344 * Assumes that the predicate or rule starts at column 0.
5345 * Only the first clause of a predicate or rule is added.
5346 * Original code by Sunichirou Sugou (1989)
5347 * Rewritten by Anders Lindgren (1996)
5349 static int prolog_pr __P((char *, char *));
5350 static void prolog_skip_comment __P((linebuffer *, FILE *));
5351 static int prolog_atom __P((char *, int));
5353 static void
5354 Prolog_functions (inf)
5355 FILE *inf;
5357 char *cp, *last;
5358 int len;
5359 int allocated;
5361 allocated = 0;
5362 len = 0;
5363 last = NULL;
5365 LOOP_ON_INPUT_LINES (inf, lb, cp)
5367 if (cp[0] == '\0') /* Empty line */
5368 continue;
5369 else if (iswhite (cp[0])) /* Not a predicate */
5370 continue;
5371 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5372 prolog_skip_comment (&lb, inf);
5373 else if ((len = prolog_pr (cp, last)) > 0)
5375 /* Predicate or rule. Store the function name so that we
5376 only generate a tag for the first clause. */
5377 if (last == NULL)
5378 last = xnew(len + 1, char);
5379 else if (len + 1 > allocated)
5380 xrnew (last, len + 1, char);
5381 allocated = len + 1;
5382 strncpy (last, cp, len);
5383 last[len] = '\0';
5389 static void
5390 prolog_skip_comment (plb, inf)
5391 linebuffer *plb;
5392 FILE *inf;
5394 char *cp;
5398 for (cp = plb->buffer; *cp != '\0'; cp++)
5399 if (cp[0] == '*' && cp[1] == '/')
5400 return;
5401 readline (plb, inf);
5403 while (!feof(inf));
5407 * A predicate or rule definition is added if it matches:
5408 * <beginning of line><Prolog Atom><whitespace>(
5409 * or <beginning of line><Prolog Atom><whitespace>:-
5411 * It is added to the tags database if it doesn't match the
5412 * name of the previous clause header.
5414 * Return the size of the name of the predicate or rule, or 0 if no
5415 * header was found.
5417 static int
5418 prolog_pr (s, last)
5419 char *s;
5420 char *last; /* Name of last clause. */
5422 int pos;
5423 int len;
5425 pos = prolog_atom (s, 0);
5426 if (pos < 1)
5427 return 0;
5429 len = pos;
5430 pos = skip_spaces (s + pos) - s;
5432 if ((s[pos] == '.'
5433 || (s[pos] == '(' && (pos += 1))
5434 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5435 && (last == NULL /* save only the first clause */
5436 || len != strlen (last)
5437 || !strneq (s, last, len)))
5439 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5440 return len;
5442 else
5443 return 0;
5447 * Consume a Prolog atom.
5448 * Return the number of bytes consumed, or -1 if there was an error.
5450 * A prolog atom, in this context, could be one of:
5451 * - An alphanumeric sequence, starting with a lower case letter.
5452 * - A quoted arbitrary string. Single quotes can escape themselves.
5453 * Backslash quotes everything.
5455 static int
5456 prolog_atom (s, pos)
5457 char *s;
5458 int pos;
5460 int origpos;
5462 origpos = pos;
5464 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5466 /* The atom is unquoted. */
5467 pos++;
5468 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5470 pos++;
5472 return pos - origpos;
5474 else if (s[pos] == '\'')
5476 pos++;
5478 for (;;)
5480 if (s[pos] == '\'')
5482 pos++;
5483 if (s[pos] != '\'')
5484 break;
5485 pos++; /* A double quote */
5487 else if (s[pos] == '\0')
5488 /* Multiline quoted atoms are ignored. */
5489 return -1;
5490 else if (s[pos] == '\\')
5492 if (s[pos+1] == '\0')
5493 return -1;
5494 pos += 2;
5496 else
5497 pos++;
5499 return pos - origpos;
5501 else
5502 return -1;
5507 * Support for Erlang
5509 * Generates tags for functions, defines, and records.
5510 * Assumes that Erlang functions start at column 0.
5511 * Original code by Anders Lindgren (1996)
5513 static int erlang_func __P((char *, char *));
5514 static void erlang_attribute __P((char *));
5515 static int erlang_atom __P((char *));
5517 static void
5518 Erlang_functions (inf)
5519 FILE *inf;
5521 char *cp, *last;
5522 int len;
5523 int allocated;
5525 allocated = 0;
5526 len = 0;
5527 last = NULL;
5529 LOOP_ON_INPUT_LINES (inf, lb, cp)
5531 if (cp[0] == '\0') /* Empty line */
5532 continue;
5533 else if (iswhite (cp[0])) /* Not function nor attribute */
5534 continue;
5535 else if (cp[0] == '%') /* comment */
5536 continue;
5537 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5538 continue;
5539 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5541 erlang_attribute (cp);
5542 last = NULL;
5544 else if ((len = erlang_func (cp, last)) > 0)
5547 * Function. Store the function name so that we only
5548 * generates a tag for the first clause.
5550 if (last == NULL)
5551 last = xnew (len + 1, char);
5552 else if (len + 1 > allocated)
5553 xrnew (last, len + 1, char);
5554 allocated = len + 1;
5555 strncpy (last, cp, len);
5556 last[len] = '\0';
5563 * A function definition is added if it matches:
5564 * <beginning of line><Erlang Atom><whitespace>(
5566 * It is added to the tags database if it doesn't match the
5567 * name of the previous clause header.
5569 * Return the size of the name of the function, or 0 if no function
5570 * was found.
5572 static int
5573 erlang_func (s, last)
5574 char *s;
5575 char *last; /* Name of last clause. */
5577 int pos;
5578 int len;
5580 pos = erlang_atom (s);
5581 if (pos < 1)
5582 return 0;
5584 len = pos;
5585 pos = skip_spaces (s + pos) - s;
5587 /* Save only the first clause. */
5588 if (s[pos++] == '('
5589 && (last == NULL
5590 || len != (int)strlen (last)
5591 || !strneq (s, last, len)))
5593 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5594 return len;
5597 return 0;
5602 * Handle attributes. Currently, tags are generated for defines
5603 * and records.
5605 * They are on the form:
5606 * -define(foo, bar).
5607 * -define(Foo(M, N), M+N).
5608 * -record(graph, {vtab = notable, cyclic = true}).
5610 static void
5611 erlang_attribute (s)
5612 char *s;
5614 char *cp = s;
5616 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5617 && *cp++ == '(')
5619 int len = erlang_atom (skip_spaces (cp));
5620 if (len > 0)
5621 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5623 return;
5628 * Consume an Erlang atom (or variable).
5629 * Return the number of bytes consumed, or -1 if there was an error.
5631 static int
5632 erlang_atom (s)
5633 char *s;
5635 int pos = 0;
5637 if (ISALPHA (s[pos]) || s[pos] == '_')
5639 /* The atom is unquoted. */
5641 pos++;
5642 while (ISALNUM (s[pos]) || s[pos] == '_');
5644 else if (s[pos] == '\'')
5646 for (pos++; s[pos] != '\''; pos++)
5647 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5648 || (s[pos] == '\\' && s[++pos] == '\0'))
5649 return 0;
5650 pos++;
5653 return pos;
5657 #ifdef ETAGS_REGEXPS
5659 static char *scan_separators __P((char *));
5660 static void add_regex __P((char *, language *));
5661 static char *substitute __P((char *, char *, struct re_registers *));
5664 * Take a string like "/blah/" and turn it into "blah", verifying
5665 * that the first and last characters are the same, and handling
5666 * quoted separator characters. Actually, stops on the occurrence of
5667 * an unquoted separator. Also process \t, \n, etc. and turn into
5668 * appropriate characters. Works in place. Null terminates name string.
5669 * Returns pointer to terminating separator, or NULL for
5670 * unterminated regexps.
5672 static char *
5673 scan_separators (name)
5674 char *name;
5676 char sep = name[0];
5677 char *copyto = name;
5678 bool quoted = FALSE;
5680 for (++name; *name != '\0'; ++name)
5682 if (quoted)
5684 switch (*name)
5686 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5687 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5688 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5689 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5690 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5691 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5692 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5693 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5694 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5695 default:
5696 if (*name == sep)
5697 *copyto++ = sep;
5698 else
5700 /* Something else is quoted, so preserve the quote. */
5701 *copyto++ = '\\';
5702 *copyto++ = *name;
5704 break;
5706 quoted = FALSE;
5708 else if (*name == '\\')
5709 quoted = TRUE;
5710 else if (*name == sep)
5711 break;
5712 else
5713 *copyto++ = *name;
5715 if (*name != sep)
5716 name = NULL; /* signal unterminated regexp */
5718 /* Terminate copied string. */
5719 *copyto = '\0';
5720 return name;
5723 /* Look at the argument of --regex or --no-regex and do the right
5724 thing. Same for each line of a regexp file. */
5725 static void
5726 analyse_regex (regex_arg)
5727 char *regex_arg;
5729 if (regex_arg == NULL)
5731 free_regexps (); /* --no-regex: remove existing regexps */
5732 return;
5735 /* A real --regexp option or a line in a regexp file. */
5736 switch (regex_arg[0])
5738 /* Comments in regexp file or null arg to --regex. */
5739 case '\0':
5740 case ' ':
5741 case '\t':
5742 break;
5744 /* Read a regex file. This is recursive and may result in a
5745 loop, which will stop when the file descriptors are exhausted. */
5746 case '@':
5748 FILE *regexfp;
5749 linebuffer regexbuf;
5750 char *regexfile = regex_arg + 1;
5752 /* regexfile is a file containing regexps, one per line. */
5753 regexfp = fopen (regexfile, "r");
5754 if (regexfp == NULL)
5756 pfatal (regexfile);
5757 return;
5759 linebuffer_init (&regexbuf);
5760 while (readline_internal (&regexbuf, regexfp) > 0)
5761 analyse_regex (regexbuf.buffer);
5762 free (regexbuf.buffer);
5763 fclose (regexfp);
5765 break;
5767 /* Regexp to be used for a specific language only. */
5768 case '{':
5770 language *lang;
5771 char *lang_name = regex_arg + 1;
5772 char *cp;
5774 for (cp = lang_name; *cp != '}'; cp++)
5775 if (*cp == '\0')
5777 error ("unterminated language name in regex: %s", regex_arg);
5778 return;
5780 *cp++ = '\0';
5781 lang = get_language_from_langname (lang_name);
5782 if (lang == NULL)
5783 return;
5784 add_regex (cp, lang);
5786 break;
5788 /* Regexp to be used for any language. */
5789 default:
5790 add_regex (regex_arg, NULL);
5791 break;
5795 /* Separate the regexp pattern, compile it,
5796 and care for optional name and modifiers. */
5797 static void
5798 add_regex (regexp_pattern, lang)
5799 char *regexp_pattern;
5800 language *lang;
5802 static struct re_pattern_buffer zeropattern;
5803 char sep, *pat, *name, *modifiers;
5804 const char *err;
5805 struct re_pattern_buffer *patbuf;
5806 regexp *rp;
5807 bool
5808 force_explicit_name = TRUE, /* do not use implicit tag names */
5809 ignore_case = FALSE, /* case is significant */
5810 multi_line = FALSE, /* matches are done one line at a time */
5811 single_line = FALSE; /* dot does not match newline */
5814 if (strlen(regexp_pattern) < 3)
5816 error ("null regexp", (char *)NULL);
5817 return;
5819 sep = regexp_pattern[0];
5820 name = scan_separators (regexp_pattern);
5821 if (name == NULL)
5823 error ("%s: unterminated regexp", regexp_pattern);
5824 return;
5826 if (name[1] == sep)
5828 error ("null name for regexp \"%s\"", regexp_pattern);
5829 return;
5831 modifiers = scan_separators (name);
5832 if (modifiers == NULL) /* no terminating separator --> no name */
5834 modifiers = name;
5835 name = "";
5837 else
5838 modifiers += 1; /* skip separator */
5840 /* Parse regex modifiers. */
5841 for (; modifiers[0] != '\0'; modifiers++)
5842 switch (modifiers[0])
5844 case 'N':
5845 if (modifiers == name)
5846 error ("forcing explicit tag name but no name, ignoring", NULL);
5847 force_explicit_name = TRUE;
5848 break;
5849 case 'i':
5850 ignore_case = TRUE;
5851 break;
5852 case 's':
5853 single_line = TRUE;
5854 /* FALLTHRU */
5855 case 'm':
5856 multi_line = TRUE;
5857 need_filebuf = TRUE;
5858 break;
5859 default:
5861 char wrongmod [2];
5862 wrongmod[0] = modifiers[0];
5863 wrongmod[1] = '\0';
5864 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5866 break;
5869 patbuf = xnew (1, struct re_pattern_buffer);
5870 *patbuf = zeropattern;
5871 if (ignore_case)
5873 static char lc_trans[CHARS];
5874 int i;
5875 for (i = 0; i < CHARS; i++)
5876 lc_trans[i] = lowcase (i);
5877 patbuf->translate = lc_trans; /* translation table to fold case */
5880 if (multi_line)
5881 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5882 else
5883 pat = regexp_pattern;
5885 if (single_line)
5886 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5887 else
5888 re_set_syntax (RE_SYNTAX_EMACS);
5890 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5891 if (multi_line)
5892 free (pat);
5893 if (err != NULL)
5895 error ("%s while compiling pattern", err);
5896 return;
5899 rp = p_head;
5900 p_head = xnew (1, regexp);
5901 p_head->pattern = savestr (regexp_pattern);
5902 p_head->p_next = rp;
5903 p_head->lang = lang;
5904 p_head->pat = patbuf;
5905 p_head->name = savestr (name);
5906 p_head->error_signaled = FALSE;
5907 p_head->force_explicit_name = force_explicit_name;
5908 p_head->ignore_case = ignore_case;
5909 p_head->multi_line = multi_line;
5913 * Do the substitutions indicated by the regular expression and
5914 * arguments.
5916 static char *
5917 substitute (in, out, regs)
5918 char *in, *out;
5919 struct re_registers *regs;
5921 char *result, *t;
5922 int size, dig, diglen;
5924 result = NULL;
5925 size = strlen (out);
5927 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5928 if (out[size - 1] == '\\')
5929 fatal ("pattern error in \"%s\"", out);
5930 for (t = etags_strchr (out, '\\');
5931 t != NULL;
5932 t = etags_strchr (t + 2, '\\'))
5933 if (ISDIGIT (t[1]))
5935 dig = t[1] - '0';
5936 diglen = regs->end[dig] - regs->start[dig];
5937 size += diglen - 2;
5939 else
5940 size -= 1;
5942 /* Allocate space and do the substitutions. */
5943 assert (size >= 0);
5944 result = xnew (size + 1, char);
5946 for (t = result; *out != '\0'; out++)
5947 if (*out == '\\' && ISDIGIT (*++out))
5949 dig = *out - '0';
5950 diglen = regs->end[dig] - regs->start[dig];
5951 strncpy (t, in + regs->start[dig], diglen);
5952 t += diglen;
5954 else
5955 *t++ = *out;
5956 *t = '\0';
5958 assert (t <= result + size);
5959 assert (t - result == (int)strlen (result));
5961 return result;
5964 /* Deallocate all regexps. */
5965 static void
5966 free_regexps ()
5968 regexp *rp;
5969 while (p_head != NULL)
5971 rp = p_head->p_next;
5972 free (p_head->pattern);
5973 free (p_head->name);
5974 free (p_head);
5975 p_head = rp;
5977 return;
5981 * Reads the whole file as a single string from `filebuf' and looks for
5982 * multi-line regular expressions, creating tags on matches.
5983 * readline already dealt with normal regexps.
5985 * Idea by Ben Wing <ben@666.com> (2002).
5987 static void
5988 regex_tag_multiline ()
5990 char *buffer = filebuf.buffer;
5991 regexp *rp;
5992 char *name;
5994 for (rp = p_head; rp != NULL; rp = rp->p_next)
5996 int match = 0;
5998 if (!rp->multi_line)
5999 continue; /* skip normal regexps */
6001 /* Generic initialisations before parsing file from memory. */
6002 lineno = 1; /* reset global line number */
6003 charno = 0; /* reset global char number */
6004 linecharno = 0; /* reset global char number of line start */
6006 /* Only use generic regexps or those for the current language. */
6007 if (rp->lang != NULL && rp->lang != curfdp->lang)
6008 continue;
6010 while (match >= 0 && match < filebuf.len)
6012 match = re_search (rp->pat, buffer, filebuf.len, charno,
6013 filebuf.len - match, &rp->regs);
6014 switch (match)
6016 case -2:
6017 /* Some error. */
6018 if (!rp->error_signaled)
6020 error ("regexp stack overflow while matching \"%s\"",
6021 rp->pattern);
6022 rp->error_signaled = TRUE;
6024 break;
6025 case -1:
6026 /* No match. */
6027 break;
6028 default:
6029 if (match == rp->regs.end[0])
6031 if (!rp->error_signaled)
6033 error ("regexp matches the empty string: \"%s\"",
6034 rp->pattern);
6035 rp->error_signaled = TRUE;
6037 match = -3; /* exit from while loop */
6038 break;
6041 /* Match occurred. Construct a tag. */
6042 while (charno < rp->regs.end[0])
6043 if (buffer[charno++] == '\n')
6044 lineno++, linecharno = charno;
6045 name = rp->name;
6046 if (name[0] == '\0')
6047 name = NULL;
6048 else /* make a named tag */
6049 name = substitute (buffer, rp->name, &rp->regs);
6050 if (rp->force_explicit_name)
6051 /* Force explicit tag name, if a name is there. */
6052 pfnote (name, TRUE, buffer + linecharno,
6053 charno - linecharno + 1, lineno, linecharno);
6054 else
6055 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6056 charno - linecharno + 1, lineno, linecharno);
6057 break;
6063 #endif /* ETAGS_REGEXPS */
6066 static bool
6067 nocase_tail (cp)
6068 char *cp;
6070 register int len = 0;
6072 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6073 cp++, len++;
6074 if (*cp == '\0' && !intoken (dbp[len]))
6076 dbp += len;
6077 return TRUE;
6079 return FALSE;
6082 static void
6083 get_tag (bp, namepp)
6084 register char *bp;
6085 char **namepp;
6087 register char *cp = bp;
6089 if (*bp != '\0')
6091 /* Go till you get to white space or a syntactic break */
6092 for (cp = bp + 1; !notinname (*cp); cp++)
6093 continue;
6094 make_tag (bp, cp - bp, TRUE,
6095 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6098 if (namepp != NULL)
6099 *namepp = savenstr (bp, cp - bp);
6103 * Read a line of text from `stream' into `lbp', excluding the
6104 * newline or CR-NL, if any. Return the number of characters read from
6105 * `stream', which is the length of the line including the newline.
6107 * On DOS or Windows we do not count the CR character, if any before the
6108 * NL, in the returned length; this mirrors the behavior of Emacs on those
6109 * platforms (for text files, it translates CR-NL to NL as it reads in the
6110 * file).
6112 * If multi-line regular expressions are requested, each line read is
6113 * appended to `filebuf'.
6115 static long
6116 readline_internal (lbp, stream)
6117 linebuffer *lbp;
6118 register FILE *stream;
6120 char *buffer = lbp->buffer;
6121 register char *p = lbp->buffer;
6122 register char *pend;
6123 int chars_deleted;
6125 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6127 for (;;)
6129 register int c = getc (stream);
6130 if (p == pend)
6132 /* We're at the end of linebuffer: expand it. */
6133 lbp->size *= 2;
6134 xrnew (buffer, lbp->size, char);
6135 p += buffer - lbp->buffer;
6136 pend = buffer + lbp->size;
6137 lbp->buffer = buffer;
6139 if (c == EOF)
6141 *p = '\0';
6142 chars_deleted = 0;
6143 break;
6145 if (c == '\n')
6147 if (p > buffer && p[-1] == '\r')
6149 p -= 1;
6150 #ifdef DOS_NT
6151 /* Assume CRLF->LF translation will be performed by Emacs
6152 when loading this file, so CRs won't appear in the buffer.
6153 It would be cleaner to compensate within Emacs;
6154 however, Emacs does not know how many CRs were deleted
6155 before any given point in the file. */
6156 chars_deleted = 1;
6157 #else
6158 chars_deleted = 2;
6159 #endif
6161 else
6163 chars_deleted = 1;
6165 *p = '\0';
6166 break;
6168 *p++ = c;
6170 lbp->len = p - buffer;
6172 if (need_filebuf /* we need filebuf for multi-line regexps */
6173 && chars_deleted > 0) /* not at EOF */
6175 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6177 /* Expand filebuf. */
6178 filebuf.size *= 2;
6179 xrnew (filebuf.buffer, filebuf.size, char);
6181 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6182 filebuf.len += lbp->len;
6183 filebuf.buffer[filebuf.len++] = '\n';
6184 filebuf.buffer[filebuf.len] = '\0';
6187 return lbp->len + chars_deleted;
6191 * Like readline_internal, above, but in addition try to match the
6192 * input line against relevant regular expressions and manage #line
6193 * directives.
6195 static void
6196 readline (lbp, stream)
6197 linebuffer *lbp;
6198 FILE *stream;
6200 long result;
6202 linecharno = charno; /* update global char number of line start */
6203 result = readline_internal (lbp, stream); /* read line */
6204 lineno += 1; /* increment global line number */
6205 charno += result; /* increment global char number */
6207 /* Honour #line directives. */
6208 if (!no_line_directive)
6210 static bool discard_until_line_directive;
6212 /* Check whether this is a #line directive. */
6213 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6215 int start, lno;
6217 if (DEBUG) start = 0; /* shut up the compiler */
6218 if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
6220 char *endp = lbp->buffer + start;
6222 assert (start > 0);
6223 while ((endp = etags_strchr (endp, '"')) != NULL
6224 && endp[-1] == '\\')
6225 endp++;
6226 if (endp != NULL)
6227 /* Ok, this is a real #line directive. Let's deal with it. */
6229 char *taggedabsname; /* absolute name of original file */
6230 char *taggedfname; /* name of original file as given */
6231 char *name; /* temp var */
6233 discard_until_line_directive = FALSE; /* found it */
6234 name = lbp->buffer + start;
6235 *endp = '\0';
6236 canonicalize_filename (name); /* for DOS */
6237 taggedabsname = absolute_filename (name, curfdp->infabsdir);
6238 if (filename_is_absolute (name)
6239 || filename_is_absolute (curfdp->infname))
6240 taggedfname = savestr (taggedabsname);
6241 else
6242 taggedfname = relative_filename (taggedabsname,tagfiledir);
6244 if (streq (curfdp->taggedfname, taggedfname))
6245 /* The #line directive is only a line number change. We
6246 deal with this afterwards. */
6247 free (taggedfname);
6248 else
6249 /* The tags following this #line directive should be
6250 attributed to taggedfname. In order to do this, set
6251 curfdp accordingly. */
6253 fdesc *fdp; /* file description pointer */
6255 /* Go look for a file description already set up for the
6256 file indicated in the #line directive. If there is
6257 one, use it from now until the next #line
6258 directive. */
6259 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6260 if (streq (fdp->infname, curfdp->infname)
6261 && streq (fdp->taggedfname, taggedfname))
6262 /* If we remove the second test above (after the &&)
6263 then all entries pertaining to the same file are
6264 coalesced in the tags file. If we use it, then
6265 entries pertaining to the same file but generated
6266 from different files (via #line directives) will
6267 go into separate sections in the tags file. These
6268 alternatives look equivalent. The first one
6269 destroys some apparently useless information. */
6271 curfdp = fdp;
6272 free (taggedfname);
6273 break;
6275 /* Else, if we already tagged the real file, skip all
6276 input lines until the next #line directive. */
6277 if (fdp == NULL) /* not found */
6278 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6279 if (streq (fdp->infabsname, taggedabsname))
6281 discard_until_line_directive = TRUE;
6282 free (taggedfname);
6283 break;
6285 /* Else create a new file description and use that from
6286 now on, until the next #line directive. */
6287 if (fdp == NULL) /* not found */
6289 fdp = fdhead;
6290 fdhead = xnew (1, fdesc);
6291 *fdhead = *curfdp; /* copy curr. file description */
6292 fdhead->next = fdp;
6293 fdhead->infname = savestr (curfdp->infname);
6294 fdhead->infabsname = savestr (curfdp->infabsname);
6295 fdhead->infabsdir = savestr (curfdp->infabsdir);
6296 fdhead->taggedfname = taggedfname;
6297 fdhead->usecharno = FALSE;
6298 fdhead->prop = NULL;
6299 fdhead->written = FALSE;
6300 curfdp = fdhead;
6303 free (taggedabsname);
6304 lineno = lno - 1;
6305 readline (lbp, stream);
6306 return;
6307 } /* if a real #line directive */
6308 } /* if #line is followed by a a number */
6309 } /* if line begins with "#line " */
6311 /* If we are here, no #line directive was found. */
6312 if (discard_until_line_directive)
6314 if (result > 0)
6316 /* Do a tail recursion on ourselves, thus discarding the contents
6317 of the line buffer. */
6318 readline (lbp, stream);
6319 return;
6321 /* End of file. */
6322 discard_until_line_directive = FALSE;
6323 return;
6325 } /* if #line directives should be considered */
6327 #ifdef ETAGS_REGEXPS
6329 int match;
6330 regexp *rp;
6331 char *name;
6333 /* Match against relevant regexps. */
6334 if (lbp->len > 0)
6335 for (rp = p_head; rp != NULL; rp = rp->p_next)
6337 /* Only use generic regexps or those for the current language.
6338 Also do not use multiline regexps, which is the job of
6339 regex_tag_multiline. */
6340 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6341 || rp->multi_line)
6342 continue;
6344 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6345 switch (match)
6347 case -2:
6348 /* Some error. */
6349 if (!rp->error_signaled)
6351 error ("regexp stack overflow while matching \"%s\"",
6352 rp->pattern);
6353 rp->error_signaled = TRUE;
6355 break;
6356 case -1:
6357 /* No match. */
6358 break;
6359 case 0:
6360 /* Empty string matched. */
6361 if (!rp->error_signaled)
6363 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6364 rp->error_signaled = TRUE;
6366 break;
6367 default:
6368 /* Match occurred. Construct a tag. */
6369 name = rp->name;
6370 if (name[0] == '\0')
6371 name = NULL;
6372 else /* make a named tag */
6373 name = substitute (lbp->buffer, rp->name, &rp->regs);
6374 if (rp->force_explicit_name)
6375 /* Force explicit tag name, if a name is there. */
6376 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6377 else
6378 make_tag (name, strlen (name), TRUE,
6379 lbp->buffer, match, lineno, linecharno);
6380 break;
6384 #endif /* ETAGS_REGEXPS */
6389 * Return a pointer to a space of size strlen(cp)+1 allocated
6390 * with xnew where the string CP has been copied.
6392 static char *
6393 savestr (cp)
6394 char *cp;
6396 return savenstr (cp, strlen (cp));
6400 * Return a pointer to a space of size LEN+1 allocated with xnew where
6401 * the string CP has been copied for at most the first LEN characters.
6403 static char *
6404 savenstr (cp, len)
6405 char *cp;
6406 int len;
6408 register char *dp;
6410 dp = xnew (len + 1, char);
6411 strncpy (dp, cp, len);
6412 dp[len] = '\0';
6413 return dp;
6417 * Return the ptr in sp at which the character c last
6418 * appears; NULL if not found
6420 * Identical to POSIX strrchr, included for portability.
6422 static char *
6423 etags_strrchr (sp, c)
6424 register const char *sp;
6425 register int c;
6427 register const char *r;
6429 r = NULL;
6432 if (*sp == c)
6433 r = sp;
6434 } while (*sp++);
6435 return (char *)r;
6439 * Return the ptr in sp at which the character c first
6440 * appears; NULL if not found
6442 * Identical to POSIX strchr, included for portability.
6444 static char *
6445 etags_strchr (sp, c)
6446 register const char *sp;
6447 register int c;
6451 if (*sp == c)
6452 return (char *)sp;
6453 } while (*sp++);
6454 return NULL;
6458 * Compare two strings, ignoring case for alphabetic characters.
6460 * Same as BSD's strcasecmp, included for portability.
6462 static int
6463 etags_strcasecmp (s1, s2)
6464 register const char *s1;
6465 register const char *s2;
6467 while (*s1 != '\0'
6468 && (ISALPHA (*s1) && ISALPHA (*s2)
6469 ? lowcase (*s1) == lowcase (*s2)
6470 : *s1 == *s2))
6471 s1++, s2++;
6473 return (ISALPHA (*s1) && ISALPHA (*s2)
6474 ? lowcase (*s1) - lowcase (*s2)
6475 : *s1 - *s2);
6479 * Compare two strings, ignoring case for alphabetic characters.
6480 * Stop after a given number of characters
6482 * Same as BSD's strncasecmp, included for portability.
6484 static int
6485 etags_strncasecmp (s1, s2, n)
6486 register const char *s1;
6487 register const char *s2;
6488 register int n;
6490 while (*s1 != '\0' && n-- > 0
6491 && (ISALPHA (*s1) && ISALPHA (*s2)
6492 ? lowcase (*s1) == lowcase (*s2)
6493 : *s1 == *s2))
6494 s1++, s2++;
6496 if (n < 0)
6497 return 0;
6498 else
6499 return (ISALPHA (*s1) && ISALPHA (*s2)
6500 ? lowcase (*s1) - lowcase (*s2)
6501 : *s1 - *s2);
6504 /* Skip spaces, return new pointer. */
6505 static char *
6506 skip_spaces (cp)
6507 char *cp;
6509 while (iswhite (*cp))
6510 cp++;
6511 return cp;
6514 /* Skip non spaces, return new pointer. */
6515 static char *
6516 skip_non_spaces (cp)
6517 char *cp;
6519 while (*cp != '\0' && !iswhite (*cp))
6520 cp++;
6521 return cp;
6524 /* Print error message and exit. */
6525 void
6526 fatal (s1, s2)
6527 char *s1, *s2;
6529 error (s1, s2);
6530 exit (EXIT_FAILURE);
6533 static void
6534 pfatal (s1)
6535 char *s1;
6537 perror (s1);
6538 exit (EXIT_FAILURE);
6541 static void
6542 suggest_asking_for_help ()
6544 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6545 progname, LONG_OPTIONS ? "--help" : "-h");
6546 exit (EXIT_FAILURE);
6549 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6550 static void
6551 error (s1, s2)
6552 const char *s1, *s2;
6554 fprintf (stderr, "%s: ", progname);
6555 fprintf (stderr, s1, s2);
6556 fprintf (stderr, "\n");
6559 /* Return a newly-allocated string whose contents
6560 concatenate those of s1, s2, s3. */
6561 static char *
6562 concat (s1, s2, s3)
6563 char *s1, *s2, *s3;
6565 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6566 char *result = xnew (len1 + len2 + len3 + 1, char);
6568 strcpy (result, s1);
6569 strcpy (result + len1, s2);
6570 strcpy (result + len1 + len2, s3);
6571 result[len1 + len2 + len3] = '\0';
6573 return result;
6577 /* Does the same work as the system V getcwd, but does not need to
6578 guess the buffer size in advance. */
6579 static char *
6580 etags_getcwd ()
6582 #ifdef HAVE_GETCWD
6583 int bufsize = 200;
6584 char *path = xnew (bufsize, char);
6586 while (getcwd (path, bufsize) == NULL)
6588 if (errno != ERANGE)
6589 pfatal ("getcwd");
6590 bufsize *= 2;
6591 free (path);
6592 path = xnew (bufsize, char);
6595 canonicalize_filename (path);
6596 return path;
6598 #else /* not HAVE_GETCWD */
6599 #if MSDOS
6601 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6603 getwd (path);
6605 for (p = path; *p != '\0'; p++)
6606 if (*p == '\\')
6607 *p = '/';
6608 else
6609 *p = lowcase (*p);
6611 return strdup (path);
6612 #else /* not MSDOS */
6613 linebuffer path;
6614 FILE *pipe;
6616 linebuffer_init (&path);
6617 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6618 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6619 pfatal ("pwd");
6620 pclose (pipe);
6622 return path.buffer;
6623 #endif /* not MSDOS */
6624 #endif /* not HAVE_GETCWD */
6627 /* Return a newly allocated string containing the file name of FILE
6628 relative to the absolute directory DIR (which should end with a slash). */
6629 static char *
6630 relative_filename (file, dir)
6631 char *file, *dir;
6633 char *fp, *dp, *afn, *res;
6634 int i;
6636 /* Find the common root of file and dir (with a trailing slash). */
6637 afn = absolute_filename (file, cwd);
6638 fp = afn;
6639 dp = dir;
6640 while (*fp++ == *dp++)
6641 continue;
6642 fp--, dp--; /* back to the first differing char */
6643 #ifdef DOS_NT
6644 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6645 return afn;
6646 #endif
6647 do /* look at the equal chars until '/' */
6648 fp--, dp--;
6649 while (*fp != '/');
6651 /* Build a sequence of "../" strings for the resulting relative file name. */
6652 i = 0;
6653 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6654 i += 1;
6655 res = xnew (3*i + strlen (fp + 1) + 1, char);
6656 res[0] = '\0';
6657 while (i-- > 0)
6658 strcat (res, "../");
6660 /* Add the file name relative to the common root of file and dir. */
6661 strcat (res, fp + 1);
6662 free (afn);
6664 return res;
6667 /* Return a newly allocated string containing the absolute file name
6668 of FILE given DIR (which should end with a slash). */
6669 static char *
6670 absolute_filename (file, dir)
6671 char *file, *dir;
6673 char *slashp, *cp, *res;
6675 if (filename_is_absolute (file))
6676 res = savestr (file);
6677 #ifdef DOS_NT
6678 /* We don't support non-absolute file names with a drive
6679 letter, like `d:NAME' (it's too much hassle). */
6680 else if (file[1] == ':')
6681 fatal ("%s: relative file names with drive letters not supported", file);
6682 #endif
6683 else
6684 res = concat (dir, file, "");
6686 /* Delete the "/dirname/.." and "/." substrings. */
6687 slashp = etags_strchr (res, '/');
6688 while (slashp != NULL && slashp[0] != '\0')
6690 if (slashp[1] == '.')
6692 if (slashp[2] == '.'
6693 && (slashp[3] == '/' || slashp[3] == '\0'))
6695 cp = slashp;
6697 cp--;
6698 while (cp >= res && !filename_is_absolute (cp));
6699 if (cp < res)
6700 cp = slashp; /* the absolute name begins with "/.." */
6701 #ifdef DOS_NT
6702 /* Under MSDOS and NT we get `d:/NAME' as absolute
6703 file name, so the luser could say `d:/../NAME'.
6704 We silently treat this as `d:/NAME'. */
6705 else if (cp[0] != '/')
6706 cp = slashp;
6707 #endif
6708 strcpy (cp, slashp + 3);
6709 slashp = cp;
6710 continue;
6712 else if (slashp[2] == '/' || slashp[2] == '\0')
6714 strcpy (slashp, slashp + 2);
6715 continue;
6719 slashp = etags_strchr (slashp + 1, '/');
6722 if (res[0] == '\0')
6723 return savestr ("/");
6724 else
6725 return res;
6728 /* Return a newly allocated string containing the absolute
6729 file name of dir where FILE resides given DIR (which should
6730 end with a slash). */
6731 static char *
6732 absolute_dirname (file, dir)
6733 char *file, *dir;
6735 char *slashp, *res;
6736 char save;
6738 canonicalize_filename (file);
6739 slashp = etags_strrchr (file, '/');
6740 if (slashp == NULL)
6741 return savestr (dir);
6742 save = slashp[1];
6743 slashp[1] = '\0';
6744 res = absolute_filename (file, dir);
6745 slashp[1] = save;
6747 return res;
6750 /* Whether the argument string is an absolute file name. The argument
6751 string must have been canonicalized with canonicalize_filename. */
6752 static bool
6753 filename_is_absolute (fn)
6754 char *fn;
6756 return (fn[0] == '/'
6757 #ifdef DOS_NT
6758 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6759 #endif
6763 /* Translate backslashes into slashes. Works in place. */
6764 static void
6765 canonicalize_filename (fn)
6766 register char *fn;
6768 #ifdef DOS_NT
6769 /* Canonicalize drive letter case. */
6770 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6771 fn[0] = upcase (fn[0]);
6772 /* Convert backslashes to slashes. */
6773 for (; *fn != '\0'; fn++)
6774 if (*fn == '\\')
6775 *fn = '/';
6776 #else
6777 /* No action. */
6778 fn = NULL; /* shut up the compiler */
6779 #endif
6783 /* Initialize a linebuffer for use */
6784 static void
6785 linebuffer_init (lbp)
6786 linebuffer *lbp;
6788 lbp->size = (DEBUG) ? 3 : 200;
6789 lbp->buffer = xnew (lbp->size, char);
6790 lbp->buffer[0] = '\0';
6791 lbp->len = 0;
6794 /* Set the minimum size of a string contained in a linebuffer. */
6795 static void
6796 linebuffer_setlen (lbp, toksize)
6797 linebuffer *lbp;
6798 int toksize;
6800 while (lbp->size <= toksize)
6802 lbp->size *= 2;
6803 xrnew (lbp->buffer, lbp->size, char);
6805 lbp->len = toksize;
6808 /* Like malloc but get fatal error if memory is exhausted. */
6809 static PTR
6810 xmalloc (size)
6811 unsigned int size;
6813 PTR result = (PTR) malloc (size);
6814 if (result == NULL)
6815 fatal ("virtual memory exhausted", (char *)NULL);
6816 return result;
6819 static PTR
6820 xrealloc (ptr, size)
6821 char *ptr;
6822 unsigned int size;
6824 PTR result = (PTR) realloc (ptr, size);
6825 if (result == NULL)
6826 fatal ("virtual memory exhausted", (char *)NULL);
6827 return result;
6831 * Local Variables:
6832 * c-indentation-style: gnu
6833 * indent-tabs-mode: t
6834 * tab-width: 8
6835 * fill-column: 79
6836 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6837 * End:
6840 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6841 (do not change this comment) */
6843 /* etags.c ends here */