Add arch tagline
[emacs.git] / lib-src / etags.c
blob69b92084fd60d6c2b3ea3eabb6fc6ed0275d9476
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995,
3 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006, 2007 Free Software Foundation, Inc. and Ken Arnold
6 This file is not considered part of GNU Emacs.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software Foundation,
20 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
23 * Authors:
24 * Ctags originally by Ken Arnold.
25 * Fortran added by Jim Kleckner.
26 * Ed Pelegri-Llopart added C typedefs.
27 * Gnu Emacs TAGS format and modifications by RMS?
28 * 1989 Sam Kendall added C++.
29 * 1992 Joseph B. Wells improved C and C++ parsing.
30 * 1993 Francesco Potortì reorganised C and C++.
31 * 1994 Line-by-line regexp tags by Tom Tromey.
32 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
33 * 2002 #line directives by Francesco Potortì.
35 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
39 * If you want to add support for a new language, start by looking at the LUA
40 * language, which is the simplest. Alternatively, consider shipping a
41 * configuration file containing regexp definitions for etags.
44 char pot_etags_version[] = "@(#) pot revision number is 17.26";
46 #define TRUE 1
47 #define FALSE 0
49 #ifdef DEBUG
50 # undef DEBUG
51 # define DEBUG TRUE
52 #else
53 # define DEBUG FALSE
54 # define NDEBUG /* disable assert */
55 #endif
57 #ifdef HAVE_CONFIG_H
58 # include <config.h>
59 /* On some systems, Emacs defines static as nothing for the sake
60 of unexec. We don't want that here since we don't use unexec. */
61 # undef static
62 # ifndef PTR /* for XEmacs */
63 # define PTR void *
64 # endif
65 # ifndef __P /* for XEmacs */
66 # define __P(args) args
67 # endif
68 #else /* no config.h */
69 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
70 # define __P(args) args /* use prototypes */
71 # define PTR void * /* for generic pointers */
72 # else /* not standard C */
73 # define __P(args) () /* no prototypes */
74 # define const /* remove const for old compilers' sake */
75 # define PTR long * /* don't use void* */
76 # endif
77 #endif /* !HAVE_CONFIG_H */
79 #ifndef _GNU_SOURCE
80 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
81 #endif
83 /* WIN32_NATIVE is for XEmacs.
84 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
85 #ifdef WIN32_NATIVE
86 # undef MSDOS
87 # undef WINDOWSNT
88 # define WINDOWSNT
89 #endif /* WIN32_NATIVE */
91 #ifdef MSDOS
92 # undef MSDOS
93 # define MSDOS TRUE
94 # include <fcntl.h>
95 # include <sys/param.h>
96 # include <io.h>
97 # ifndef HAVE_CONFIG_H
98 # define DOS_NT
99 # include <sys/config.h>
100 # endif
101 #else
102 # define MSDOS FALSE
103 #endif /* MSDOS */
105 #ifdef WINDOWSNT
106 # include <stdlib.h>
107 # include <fcntl.h>
108 # include <string.h>
109 # include <direct.h>
110 # include <io.h>
111 # define MAXPATHLEN _MAX_PATH
112 # undef HAVE_NTGUI
113 # undef DOS_NT
114 # define DOS_NT
115 # ifndef HAVE_GETCWD
116 # define HAVE_GETCWD
117 # endif /* undef HAVE_GETCWD */
118 #else /* not WINDOWSNT */
119 # ifdef STDC_HEADERS
120 # include <stdlib.h>
121 # include <string.h>
122 # else /* no standard C headers */
123 extern char *getenv ();
124 # ifdef VMS
125 # define EXIT_SUCCESS 1
126 # define EXIT_FAILURE 0
127 # else /* no VMS */
128 # define EXIT_SUCCESS 0
129 # define EXIT_FAILURE 1
130 # endif
131 # endif
132 #endif /* !WINDOWSNT */
134 #ifdef HAVE_UNISTD_H
135 # include <unistd.h>
136 #else
137 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
138 extern char *getcwd (char *buf, size_t size);
139 # endif
140 #endif /* HAVE_UNISTD_H */
142 #include <stdio.h>
143 #include <ctype.h>
144 #include <errno.h>
145 #ifndef errno
146 extern int errno;
147 #endif
148 #include <sys/types.h>
149 #include <sys/stat.h>
151 #include <assert.h>
152 #ifdef NDEBUG
153 # undef assert /* some systems have a buggy assert.h */
154 # define assert(x) ((void) 0)
155 #endif
157 #if !defined (S_ISREG) && defined (S_IFREG)
158 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
159 #endif
161 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
162 # define NO_LONG_OPTIONS TRUE
163 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
164 extern char *optarg;
165 extern int optind, opterr;
166 #else
167 # define NO_LONG_OPTIONS FALSE
168 # include <getopt.h>
169 #endif /* NO_LONG_OPTIONS */
171 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
172 # ifdef __CYGWIN__ /* compiling on Cygwin */
173 !!! NOTICE !!!
174 the regex.h distributed with Cygwin is not compatible with etags, alas!
175 If you want regular expression support, you should delete this notice and
176 arrange to use the GNU regex.h and regex.c.
177 # endif
178 #endif
179 #include <regex.h>
181 /* Define CTAGS to make the program "ctags" compatible with the usual one.
182 Leave it undefined to make the program "etags", which makes emacs-style
183 tag tables and tags typedefs, #defines and struct/union/enum by default. */
184 #ifdef CTAGS
185 # undef CTAGS
186 # define CTAGS TRUE
187 #else
188 # define CTAGS FALSE
189 #endif
191 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
192 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
193 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
194 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
196 #define CHARS 256 /* 2^sizeof(char) */
197 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
198 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
199 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
200 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
201 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
202 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
204 #define ISALNUM(c) isalnum (CHAR(c))
205 #define ISALPHA(c) isalpha (CHAR(c))
206 #define ISDIGIT(c) isdigit (CHAR(c))
207 #define ISLOWER(c) islower (CHAR(c))
209 #define lowcase(c) tolower (CHAR(c))
210 #define upcase(c) toupper (CHAR(c))
214 * xnew, xrnew -- allocate, reallocate storage
216 * SYNOPSIS: Type *xnew (int n, Type);
217 * void xrnew (OldPointer, int n, Type);
219 #if DEBUG
220 # include "chkmalloc.h"
221 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
222 (n) * sizeof (Type)))
223 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
224 (char *) (op), (n) * sizeof (Type)))
225 #else
226 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
227 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
228 (char *) (op), (n) * sizeof (Type)))
229 #endif
231 #define bool int
233 typedef void Lang_function __P((FILE *));
235 typedef struct
237 char *suffix; /* file name suffix for this compressor */
238 char *command; /* takes one arg and decompresses to stdout */
239 } compressor;
241 typedef struct
243 char *name; /* language name */
244 char *help; /* detailed help for the language */
245 Lang_function *function; /* parse function */
246 char **suffixes; /* name suffixes of this language's files */
247 char **filenames; /* names of this language's files */
248 char **interpreters; /* interpreters for this language */
249 bool metasource; /* source used to generate other sources */
250 } language;
252 typedef struct fdesc
254 struct fdesc *next; /* for the linked list */
255 char *infname; /* uncompressed input file name */
256 char *infabsname; /* absolute uncompressed input file name */
257 char *infabsdir; /* absolute dir of input file */
258 char *taggedfname; /* file name to write in tagfile */
259 language *lang; /* language of file */
260 char *prop; /* file properties to write in tagfile */
261 bool usecharno; /* etags tags shall contain char number */
262 bool written; /* entry written in the tags file */
263 } fdesc;
265 typedef struct node_st
266 { /* sorting structure */
267 struct node_st *left, *right; /* left and right sons */
268 fdesc *fdp; /* description of file to whom tag belongs */
269 char *name; /* tag name */
270 char *regex; /* search regexp */
271 bool valid; /* write this tag on the tag file */
272 bool is_func; /* function tag: use regexp in CTAGS mode */
273 bool been_warned; /* warning already given for duplicated tag */
274 int lno; /* line number tag is on */
275 long cno; /* character number line starts on */
276 } node;
279 * A `linebuffer' is a structure which holds a line of text.
280 * `readline_internal' reads a line from a stream into a linebuffer
281 * and works regardless of the length of the line.
282 * SIZE is the size of BUFFER, LEN is the length of the string in
283 * BUFFER after readline reads it.
285 typedef struct
287 long size;
288 int len;
289 char *buffer;
290 } linebuffer;
292 /* Used to support mixing of --lang and file names. */
293 typedef struct
295 enum {
296 at_language, /* a language specification */
297 at_regexp, /* a regular expression */
298 at_filename, /* a file name */
299 at_stdin, /* read from stdin here */
300 at_end /* stop parsing the list */
301 } arg_type; /* argument type */
302 language *lang; /* language associated with the argument */
303 char *what; /* the argument itself */
304 } argument;
306 /* Structure defining a regular expression. */
307 typedef struct regexp
309 struct regexp *p_next; /* pointer to next in list */
310 language *lang; /* if set, use only for this language */
311 char *pattern; /* the regexp pattern */
312 char *name; /* tag name */
313 struct re_pattern_buffer *pat; /* the compiled pattern */
314 struct re_registers regs; /* re registers */
315 bool error_signaled; /* already signaled for this regexp */
316 bool force_explicit_name; /* do not allow implict tag name */
317 bool ignore_case; /* ignore case when matching */
318 bool multi_line; /* do a multi-line match on the whole file */
319 } regexp;
322 /* Many compilers barf on this:
323 Lang_function Ada_funcs;
324 so let's write it this way */
325 static void Ada_funcs __P((FILE *));
326 static void Asm_labels __P((FILE *));
327 static void C_entries __P((int c_ext, FILE *));
328 static void default_C_entries __P((FILE *));
329 static void plain_C_entries __P((FILE *));
330 static void Cjava_entries __P((FILE *));
331 static void Cobol_paragraphs __P((FILE *));
332 static void Cplusplus_entries __P((FILE *));
333 static void Cstar_entries __P((FILE *));
334 static void Erlang_functions __P((FILE *));
335 static void Forth_words __P((FILE *));
336 static void Fortran_functions __P((FILE *));
337 static void HTML_labels __P((FILE *));
338 static void Lisp_functions __P((FILE *));
339 static void Lua_functions __P((FILE *));
340 static void Makefile_targets __P((FILE *));
341 static void Pascal_functions __P((FILE *));
342 static void Perl_functions __P((FILE *));
343 static void PHP_functions __P((FILE *));
344 static void PS_functions __P((FILE *));
345 static void Prolog_functions __P((FILE *));
346 static void Python_functions __P((FILE *));
347 static void Scheme_functions __P((FILE *));
348 static void TeX_commands __P((FILE *));
349 static void Texinfo_nodes __P((FILE *));
350 static void Yacc_entries __P((FILE *));
351 static void just_read_file __P((FILE *));
353 static void print_language_names __P((void));
354 static void print_version __P((void));
355 static void print_help __P((argument *));
356 int main __P((int, char **));
358 static compressor *get_compressor_from_suffix __P((char *, char **));
359 static language *get_language_from_langname __P((const char *));
360 static language *get_language_from_interpreter __P((char *));
361 static language *get_language_from_filename __P((char *, bool));
362 static void readline __P((linebuffer *, FILE *));
363 static long readline_internal __P((linebuffer *, FILE *));
364 static bool nocase_tail __P((char *));
365 static void get_tag __P((char *, char **));
367 static void analyse_regex __P((char *));
368 static void free_regexps __P((void));
369 static void regex_tag_multiline __P((void));
370 static void error __P((const char *, const char *));
371 static void suggest_asking_for_help __P((void));
372 void fatal __P((char *, char *));
373 static void pfatal __P((char *));
374 static void add_node __P((node *, node **));
376 static void init __P((void));
377 static void process_file_name __P((char *, language *));
378 static void process_file __P((FILE *, char *, language *));
379 static void find_entries __P((FILE *));
380 static void free_tree __P((node *));
381 static void free_fdesc __P((fdesc *));
382 static void pfnote __P((char *, bool, char *, int, int, long));
383 static void make_tag __P((char *, int, bool, char *, int, int, long));
384 static void invalidate_nodes __P((fdesc *, node **));
385 static void put_entries __P((node *));
387 static char *concat __P((char *, char *, char *));
388 static char *skip_spaces __P((char *));
389 static char *skip_non_spaces __P((char *));
390 static char *savenstr __P((char *, int));
391 static char *savestr __P((char *));
392 static char *etags_strchr __P((const char *, int));
393 static char *etags_strrchr __P((const char *, int));
394 static int etags_strcasecmp __P((const char *, const char *));
395 static int etags_strncasecmp __P((const char *, const char *, int));
396 static char *etags_getcwd __P((void));
397 static char *relative_filename __P((char *, char *));
398 static char *absolute_filename __P((char *, char *));
399 static char *absolute_dirname __P((char *, char *));
400 static bool filename_is_absolute __P((char *f));
401 static void canonicalize_filename __P((char *));
402 static void linebuffer_init __P((linebuffer *));
403 static void linebuffer_setlen __P((linebuffer *, int));
404 static PTR xmalloc __P((unsigned int));
405 static PTR xrealloc __P((char *, unsigned int));
408 static char searchar = '/'; /* use /.../ searches */
410 static char *tagfile; /* output file */
411 static char *progname; /* name this program was invoked with */
412 static char *cwd; /* current working directory */
413 static char *tagfiledir; /* directory of tagfile */
414 static FILE *tagf; /* ioptr for tags file */
416 static fdesc *fdhead; /* head of file description list */
417 static fdesc *curfdp; /* current file description */
418 static int lineno; /* line number of current line */
419 static long charno; /* current character number */
420 static long linecharno; /* charno of start of current line */
421 static char *dbp; /* pointer to start of current tag */
423 static const int invalidcharno = -1;
425 static node *nodehead; /* the head of the binary tree of tags */
426 static node *last_node; /* the last node created */
428 static linebuffer lb; /* the current line */
429 static linebuffer filebuf; /* a buffer containing the whole file */
430 static linebuffer token_name; /* a buffer containing a tag name */
432 /* boolean "functions" (see init) */
433 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
434 static char
435 /* white chars */
436 *white = " \f\t\n\r\v",
437 /* not in a name */
438 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
439 /* token ending chars */
440 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
441 /* token starting chars */
442 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
443 /* valid in-token chars */
444 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
446 static bool append_to_tagfile; /* -a: append to tags */
447 /* The next four default to TRUE for etags, but to FALSE for ctags. */
448 static bool typedefs; /* -t: create tags for C and Ada typedefs */
449 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
450 /* 0 struct/enum/union decls, and C++ */
451 /* member functions. */
452 static bool constantypedefs; /* -d: create tags for C #define, enum */
453 /* constants and variables. */
454 /* -D: opposite of -d. Default under ctags. */
455 static bool globals; /* create tags for global variables */
456 static bool members; /* create tags for C member variables */
457 static bool declarations; /* --declarations: tag them and extern in C&Co*/
458 static bool no_line_directive; /* ignore #line directives (undocumented) */
459 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
460 static bool update; /* -u: update tags */
461 static bool vgrind_style; /* -v: create vgrind style index output */
462 static bool no_warnings; /* -w: suppress warnings (undocumented) */
463 static bool cxref_style; /* -x: create cxref style output */
464 static bool cplusplus; /* .[hc] means C++, not C */
465 static bool ignoreindent; /* -I: ignore indentation in C */
466 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
468 /* STDIN is defined in LynxOS system headers */
469 #ifdef STDIN
470 # undef STDIN
471 #endif
473 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
474 static bool parsing_stdin; /* --parse-stdin used */
476 static regexp *p_head; /* list of all regexps */
477 static bool need_filebuf; /* some regexes are multi-line */
479 static struct option longopts[] =
481 { "append", no_argument, NULL, 'a' },
482 { "packages-only", no_argument, &packages_only, TRUE },
483 { "c++", no_argument, NULL, 'C' },
484 { "declarations", no_argument, &declarations, TRUE },
485 { "no-line-directive", no_argument, &no_line_directive, TRUE },
486 { "no-duplicates", no_argument, &no_duplicates, TRUE },
487 { "help", no_argument, NULL, 'h' },
488 { "help", no_argument, NULL, 'H' },
489 { "ignore-indentation", no_argument, NULL, 'I' },
490 { "language", required_argument, NULL, 'l' },
491 { "members", no_argument, &members, TRUE },
492 { "no-members", no_argument, &members, FALSE },
493 { "output", required_argument, NULL, 'o' },
494 { "regex", required_argument, NULL, 'r' },
495 { "no-regex", no_argument, NULL, 'R' },
496 { "ignore-case-regex", required_argument, NULL, 'c' },
497 { "parse-stdin", required_argument, NULL, STDIN },
498 { "version", no_argument, NULL, 'V' },
500 #if CTAGS /* Ctags options */
501 { "backward-search", no_argument, NULL, 'B' },
502 { "cxref", no_argument, NULL, 'x' },
503 { "defines", no_argument, NULL, 'd' },
504 { "globals", no_argument, &globals, TRUE },
505 { "typedefs", no_argument, NULL, 't' },
506 { "typedefs-and-c++", no_argument, NULL, 'T' },
507 { "update", no_argument, NULL, 'u' },
508 { "vgrind", no_argument, NULL, 'v' },
509 { "no-warn", no_argument, NULL, 'w' },
511 #else /* Etags options */
512 { "no-defines", no_argument, NULL, 'D' },
513 { "no-globals", no_argument, &globals, FALSE },
514 { "include", required_argument, NULL, 'i' },
515 #endif
516 { NULL }
519 static compressor compressors[] =
521 { "z", "gzip -d -c"},
522 { "Z", "gzip -d -c"},
523 { "gz", "gzip -d -c"},
524 { "GZ", "gzip -d -c"},
525 { "bz2", "bzip2 -d -c" },
526 { NULL }
530 * Language stuff.
533 /* Ada code */
534 static char *Ada_suffixes [] =
535 { "ads", "adb", "ada", NULL };
536 static char Ada_help [] =
537 "In Ada code, functions, procedures, packages, tasks and types are\n\
538 tags. Use the `--packages-only' option to create tags for\n\
539 packages only.\n\
540 Ada tag names have suffixes indicating the type of entity:\n\
541 Entity type: Qualifier:\n\
542 ------------ ----------\n\
543 function /f\n\
544 procedure /p\n\
545 package spec /s\n\
546 package body /b\n\
547 type /t\n\
548 task /k\n\
549 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
550 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
551 will just search for any tag `bidule'.";
553 /* Assembly code */
554 static char *Asm_suffixes [] =
555 { "a", /* Unix assembler */
556 "asm", /* Microcontroller assembly */
557 "def", /* BSO/Tasking definition includes */
558 "inc", /* Microcontroller include files */
559 "ins", /* Microcontroller include files */
560 "s", "sa", /* Unix assembler */
561 "S", /* cpp-processed Unix assembler */
562 "src", /* BSO/Tasking C compiler output */
563 NULL
565 static char Asm_help [] =
566 "In assembler code, labels appearing at the beginning of a line,\n\
567 followed by a colon, are tags.";
570 /* Note that .c and .h can be considered C++, if the --c++ flag was
571 given, or if the `class' or `template' keyowrds are met inside the file.
572 That is why default_C_entries is called for these. */
573 static char *default_C_suffixes [] =
574 { "c", "h", NULL };
575 static char default_C_help [] =
576 "In C code, any C function or typedef is a tag, and so are\n\
577 definitions of `struct', `union' and `enum'. `#define' macro\n\
578 definitions and `enum' constants are tags unless you specify\n\
579 `--no-defines'. Global variables are tags unless you specify\n\
580 `--no-globals' and so are struct members unless you specify\n\
581 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
582 `--no-members' can make the tags table file much smaller.\n\
583 You can tag function declarations and external variables by\n\
584 using `--declarations'.";
586 static char *Cplusplus_suffixes [] =
587 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
588 "M", /* Objective C++ */
589 "pdb", /* Postscript with C syntax */
590 NULL };
591 static char Cplusplus_help [] =
592 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
593 --help --lang=c --lang=c++ for full help.)\n\
594 In addition to C tags, member functions are also recognized. Member\n\
595 variables are recognized unless you use the `--no-members' option.\n\
596 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
597 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
598 `operator+'.";
600 static char *Cjava_suffixes [] =
601 { "java", NULL };
602 static char Cjava_help [] =
603 "In Java code, all the tags constructs of C and C++ code are\n\
604 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
607 static char *Cobol_suffixes [] =
608 { "COB", "cob", NULL };
609 static char Cobol_help [] =
610 "In Cobol code, tags are paragraph names; that is, any word\n\
611 starting in column 8 and followed by a period.";
613 static char *Cstar_suffixes [] =
614 { "cs", "hs", NULL };
616 static char *Erlang_suffixes [] =
617 { "erl", "hrl", NULL };
618 static char Erlang_help [] =
619 "In Erlang code, the tags are the functions, records and macros\n\
620 defined in the file.";
622 char *Forth_suffixes [] =
623 { "fth", "tok", NULL };
624 static char Forth_help [] =
625 "In Forth code, tags are words defined by `:',\n\
626 constant, code, create, defer, value, variable, buffer:, field.";
628 static char *Fortran_suffixes [] =
629 { "F", "f", "f90", "for", NULL };
630 static char Fortran_help [] =
631 "In Fortran code, functions, subroutines and block data are tags.";
633 static char *HTML_suffixes [] =
634 { "htm", "html", "shtml", NULL };
635 static char HTML_help [] =
636 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
637 `h3' headers. Also, tags are `name=' in anchors and all\n\
638 occurrences of `id='.";
640 static char *Lisp_suffixes [] =
641 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
642 static char Lisp_help [] =
643 "In Lisp code, any function defined with `defun', any variable\n\
644 defined with `defvar' or `defconst', and in general the first\n\
645 argument of any expression that starts with `(def' in column zero\n\
646 is a tag.";
648 static char *Lua_suffixes [] =
649 { "lua", "LUA", NULL };
650 static char Lua_help [] =
651 "In Lua scripts, all functions are tags.";
653 static char *Makefile_filenames [] =
654 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
655 static char Makefile_help [] =
656 "In makefiles, targets are tags; additionally, variables are tags\n\
657 unless you specify `--no-globals'.";
659 static char *Objc_suffixes [] =
660 { "lm", /* Objective lex file */
661 "m", /* Objective C file */
662 NULL };
663 static char Objc_help [] =
664 "In Objective C code, tags include Objective C definitions for classes,\n\
665 class categories, methods and protocols. Tags for variables and\n\
666 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
667 (Use --help --lang=c --lang=objc --lang=java for full help.)";
669 static char *Pascal_suffixes [] =
670 { "p", "pas", NULL };
671 static char Pascal_help [] =
672 "In Pascal code, the tags are the functions and procedures defined\n\
673 in the file.";
674 /* " // this is for working around an Emacs highlighting bug... */
676 static char *Perl_suffixes [] =
677 { "pl", "pm", NULL };
678 static char *Perl_interpreters [] =
679 { "perl", "@PERL@", NULL };
680 static char Perl_help [] =
681 "In Perl code, the tags are the packages, subroutines and variables\n\
682 defined by the `package', `sub', `my' and `local' keywords. Use\n\
683 `--globals' if you want to tag global variables. Tags for\n\
684 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
685 defined in the default package is `main::SUB'.";
687 static char *PHP_suffixes [] =
688 { "php", "php3", "php4", NULL };
689 static char PHP_help [] =
690 "In PHP code, tags are functions, classes and defines. Unless you use\n\
691 the `--no-members' option, vars are tags too.";
693 static char *plain_C_suffixes [] =
694 { "pc", /* Pro*C file */
695 NULL };
697 static char *PS_suffixes [] =
698 { "ps", "psw", NULL }; /* .psw is for PSWrap */
699 static char PS_help [] =
700 "In PostScript code, the tags are the functions.";
702 static char *Prolog_suffixes [] =
703 { "prolog", NULL };
704 static char Prolog_help [] =
705 "In Prolog code, tags are predicates and rules at the beginning of\n\
706 line.";
708 static char *Python_suffixes [] =
709 { "py", NULL };
710 static char Python_help [] =
711 "In Python code, `def' or `class' at the beginning of a line\n\
712 generate a tag.";
714 /* Can't do the `SCM' or `scm' prefix with a version number. */
715 static char *Scheme_suffixes [] =
716 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
717 static char Scheme_help [] =
718 "In Scheme code, tags include anything defined with `def' or with a\n\
719 construct whose name starts with `def'. They also include\n\
720 variables set with `set!' at top level in the file.";
722 static char *TeX_suffixes [] =
723 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
724 static char TeX_help [] =
725 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
726 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
727 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
728 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
729 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
731 Other commands can be specified by setting the environment variable\n\
732 `TEXTAGS' to a colon-separated list like, for example,\n\
733 TEXTAGS=\"mycommand:myothercommand\".";
736 static char *Texinfo_suffixes [] =
737 { "texi", "texinfo", "txi", NULL };
738 static char Texinfo_help [] =
739 "for texinfo files, lines starting with @node are tagged.";
741 static char *Yacc_suffixes [] =
742 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
743 static char Yacc_help [] =
744 "In Bison or Yacc input files, each rule defines as a tag the\n\
745 nonterminal it constructs. The portions of the file that contain\n\
746 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
747 for full help).";
749 static char auto_help [] =
750 "`auto' is not a real language, it indicates to use\n\
751 a default language for files base on file name suffix and file contents.";
753 static char none_help [] =
754 "`none' is not a real language, it indicates to only do\n\
755 regexp processing on files.";
757 static char no_lang_help [] =
758 "No detailed help available for this language.";
762 * Table of languages.
764 * It is ok for a given function to be listed under more than one
765 * name. I just didn't.
768 static language lang_names [] =
770 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
771 { "asm", Asm_help, Asm_labels, Asm_suffixes },
772 { "c", default_C_help, default_C_entries, default_C_suffixes },
773 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
774 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
775 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
776 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
777 { "forth", Forth_help, Forth_words, Forth_suffixes },
778 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
779 { "html", HTML_help, HTML_labels, HTML_suffixes },
780 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
781 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
782 { "lua", Lua_help, Lua_functions, Lua_suffixes },
783 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
784 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
785 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
786 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
787 { "php", PHP_help, PHP_functions, PHP_suffixes },
788 { "postscript",PS_help, PS_functions, PS_suffixes },
789 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
790 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
791 { "python", Python_help, Python_functions, Python_suffixes },
792 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
793 { "tex", TeX_help, TeX_commands, TeX_suffixes },
794 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
795 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
796 { "auto", auto_help }, /* default guessing scheme */
797 { "none", none_help, just_read_file }, /* regexp matching only */
798 { NULL } /* end of list */
802 static void
803 print_language_names ()
805 language *lang;
806 char **name, **ext;
808 puts ("\nThese are the currently supported languages, along with the\n\
809 default file names and dot suffixes:");
810 for (lang = lang_names; lang->name != NULL; lang++)
812 printf (" %-*s", 10, lang->name);
813 if (lang->filenames != NULL)
814 for (name = lang->filenames; *name != NULL; name++)
815 printf (" %s", *name);
816 if (lang->suffixes != NULL)
817 for (ext = lang->suffixes; *ext != NULL; ext++)
818 printf (" .%s", *ext);
819 puts ("");
821 puts ("where `auto' means use default language for files based on file\n\
822 name suffix, and `none' means only do regexp processing on files.\n\
823 If no language is specified and no matching suffix is found,\n\
824 the first line of the file is read for a sharp-bang (#!) sequence\n\
825 followed by the name of an interpreter. If no such sequence is found,\n\
826 Fortran is tried first; if no tags are found, C is tried next.\n\
827 When parsing any C file, a \"class\" or \"template\" keyword\n\
828 switches to C++.");
829 puts ("Compressed files are supported using gzip and bzip2.\n\
831 For detailed help on a given language use, for example,\n\
832 etags --help --lang=ada.");
835 #ifndef EMACS_NAME
836 # define EMACS_NAME "standalone"
837 #endif
838 #ifndef VERSION
839 # define VERSION "version"
840 #endif
841 static void
842 print_version ()
844 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
845 puts ("Copyright (C) 2007 Free Software Foundation, Inc. and Ken Arnold");
846 puts ("This program is distributed under the same terms as Emacs");
848 exit (EXIT_SUCCESS);
851 static void
852 print_help (argbuffer)
853 argument *argbuffer;
855 bool help_for_lang = FALSE;
857 for (; argbuffer->arg_type != at_end; argbuffer++)
858 if (argbuffer->arg_type == at_language)
860 if (help_for_lang)
861 puts ("");
862 puts (argbuffer->lang->help);
863 help_for_lang = TRUE;
866 if (help_for_lang)
867 exit (EXIT_SUCCESS);
869 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
871 These are the options accepted by %s.\n", progname, progname);
872 if (NO_LONG_OPTIONS)
873 puts ("WARNING: long option names do not work with this executable,\n\
874 as it is not linked with GNU getopt.");
875 else
876 puts ("You may use unambiguous abbreviations for the long option names.");
877 puts (" A - as file name means read names from stdin (one per line).\n\
878 Absolute names are stored in the output file as they are.\n\
879 Relative ones are stored relative to the output file's directory.\n");
881 puts ("-a, --append\n\
882 Append tag entries to existing tags file.");
884 puts ("--packages-only\n\
885 For Ada files, only generate tags for packages.");
887 if (CTAGS)
888 puts ("-B, --backward-search\n\
889 Write the search commands for the tag entries using '?', the\n\
890 backward-search command instead of '/', the forward-search command.");
892 /* This option is mostly obsolete, because etags can now automatically
893 detect C++. Retained for backward compatibility and for debugging and
894 experimentation. In principle, we could want to tag as C++ even
895 before any "class" or "template" keyword.
896 puts ("-C, --c++\n\
897 Treat files whose name suffix defaults to C language as C++ files.");
900 puts ("--declarations\n\
901 In C and derived languages, create tags for function declarations,");
902 if (CTAGS)
903 puts ("\tand create tags for extern variables if --globals is used.");
904 else
905 puts
906 ("\tand create tags for extern variables unless --no-globals is used.");
908 if (CTAGS)
909 puts ("-d, --defines\n\
910 Create tag entries for C #define constants and enum constants, too.");
911 else
912 puts ("-D, --no-defines\n\
913 Don't create tag entries for C #define constants and enum constants.\n\
914 This makes the tags file smaller.");
916 if (!CTAGS)
917 puts ("-i FILE, --include=FILE\n\
918 Include a note in tag file indicating that, when searching for\n\
919 a tag, one should also consult the tags file FILE after\n\
920 checking the current file.");
922 puts ("-l LANG, --language=LANG\n\
923 Force the following files to be considered as written in the\n\
924 named language up to the next --language=LANG option.");
926 if (CTAGS)
927 puts ("--globals\n\
928 Create tag entries for global variables in some languages.");
929 else
930 puts ("--no-globals\n\
931 Do not create tag entries for global variables in some\n\
932 languages. This makes the tags file smaller.");
933 puts ("--no-members\n\
934 Do not create tag entries for members of structures\n\
935 in some languages.");
937 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
938 Make a tag for each line matching a regular expression pattern\n\
939 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
940 files only. REGEXFILE is a file containing one REGEXP per line.\n\
941 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
942 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
943 puts (" If TAGNAME/ is present, the tags created are named.\n\
944 For example Tcl named tags can be created with:\n\
945 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
946 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
947 `m' means to allow multi-line matches, `s' implies `m' and\n\
948 causes dot to match any character, including newline.");
949 puts ("-R, --no-regex\n\
950 Don't create tags from regexps for the following files.");
951 puts ("-I, --ignore-indentation\n\
952 In C and C++ do not assume that a closing brace in the first\n\
953 column is the final brace of a function or structure definition.");
954 puts ("-o FILE, --output=FILE\n\
955 Write the tags to FILE.");
956 puts ("--parse-stdin=NAME\n\
957 Read from standard input and record tags as belonging to file NAME.");
959 if (CTAGS)
961 puts ("-t, --typedefs\n\
962 Generate tag entries for C and Ada typedefs.");
963 puts ("-T, --typedefs-and-c++\n\
964 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
965 and C++ member functions.");
968 if (CTAGS)
969 puts ("-u, --update\n\
970 Update the tag entries for the given files, leaving tag\n\
971 entries for other files in place. Currently, this is\n\
972 implemented by deleting the existing entries for the given\n\
973 files and then rewriting the new entries at the end of the\n\
974 tags file. It is often faster to simply rebuild the entire\n\
975 tag file than to use this.");
977 if (CTAGS)
979 puts ("-v, --vgrind\n\
980 Print on the standard output an index of items intended for\n\
981 human consumption, similar to the output of vgrind. The index\n\
982 is sorted, and gives the page number of each item.");
983 # if PRINT_UNDOCUMENTED_OPTIONS_HELP
984 puts ("-w, --no-duplicates\n\
985 Do not create duplicate tag entries, for compatibility with\n\
986 traditional ctags.");
987 puts ("-w, --no-warn\n\
988 Suppress warning messages about duplicate tag entries.");
989 # endif /* PRINT_UNDOCUMENTED_OPTIONS_HELP */
990 puts ("-x, --cxref\n\
991 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
992 The output uses line numbers instead of page numbers, but\n\
993 beyond that the differences are cosmetic; try both to see\n\
994 which you like.");
997 puts ("-V, --version\n\
998 Print the version of the program.\n\
999 -h, --help\n\
1000 Print this help message.\n\
1001 Followed by one or more `--language' options prints detailed\n\
1002 help about tag generation for the specified languages.");
1004 print_language_names ();
1006 puts ("");
1007 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1009 exit (EXIT_SUCCESS);
1013 #ifdef VMS /* VMS specific functions */
1015 #define EOS '\0'
1017 /* This is a BUG! ANY arbitrary limit is a BUG!
1018 Won't someone please fix this? */
1019 #define MAX_FILE_SPEC_LEN 255
1020 typedef struct {
1021 short curlen;
1022 char body[MAX_FILE_SPEC_LEN + 1];
1023 } vspec;
1026 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1027 returning in each successive call the next file name matching the input
1028 spec. The function expects that each in_spec passed
1029 to it will be processed to completion; in particular, up to and
1030 including the call following that in which the last matching name
1031 is returned, the function ignores the value of in_spec, and will
1032 only start processing a new spec with the following call.
1033 If an error occurs, on return out_spec contains the value
1034 of in_spec when the error occurred.
1036 With each successive file name returned in out_spec, the
1037 function's return value is one. When there are no more matching
1038 names the function returns zero. If on the first call no file
1039 matches in_spec, or there is any other error, -1 is returned.
1042 #include <rmsdef.h>
1043 #include <descrip.h>
1044 #define OUTSIZE MAX_FILE_SPEC_LEN
1045 static short
1046 fn_exp (out, in)
1047 vspec *out;
1048 char *in;
1050 static long context = 0;
1051 static struct dsc$descriptor_s o;
1052 static struct dsc$descriptor_s i;
1053 static bool pass1 = TRUE;
1054 long status;
1055 short retval;
1057 if (pass1)
1059 pass1 = FALSE;
1060 o.dsc$a_pointer = (char *) out;
1061 o.dsc$w_length = (short)OUTSIZE;
1062 i.dsc$a_pointer = in;
1063 i.dsc$w_length = (short)strlen(in);
1064 i.dsc$b_dtype = DSC$K_DTYPE_T;
1065 i.dsc$b_class = DSC$K_CLASS_S;
1066 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1067 o.dsc$b_class = DSC$K_CLASS_VS;
1069 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1071 out->body[out->curlen] = EOS;
1072 return 1;
1074 else if (status == RMS$_NMF)
1075 retval = 0;
1076 else
1078 strcpy(out->body, in);
1079 retval = -1;
1081 lib$find_file_end(&context);
1082 pass1 = TRUE;
1083 return retval;
1087 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1088 name of each file specified by the provided arg expanding wildcards.
1090 static char *
1091 gfnames (arg, p_error)
1092 char *arg;
1093 bool *p_error;
1095 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1097 switch (fn_exp (&filename, arg))
1099 case 1:
1100 *p_error = FALSE;
1101 return filename.body;
1102 case 0:
1103 *p_error = FALSE;
1104 return NULL;
1105 default:
1106 *p_error = TRUE;
1107 return filename.body;
1111 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1112 system (cmd)
1113 char *cmd;
1115 error ("%s", "system() function not implemented under VMS");
1117 #endif
1119 #define VERSION_DELIM ';'
1120 char *massage_name (s)
1121 char *s;
1123 char *start = s;
1125 for ( ; *s; s++)
1126 if (*s == VERSION_DELIM)
1128 *s = EOS;
1129 break;
1131 else
1132 *s = lowcase (*s);
1133 return start;
1135 #endif /* VMS */
1139 main (argc, argv)
1140 int argc;
1141 char *argv[];
1143 int i;
1144 unsigned int nincluded_files;
1145 char **included_files;
1146 argument *argbuffer;
1147 int current_arg, file_count;
1148 linebuffer filename_lb;
1149 bool help_asked = FALSE;
1150 #ifdef VMS
1151 bool got_err;
1152 #endif
1153 char *optstring;
1154 int opt;
1157 #ifdef DOS_NT
1158 _fmode = O_BINARY; /* all of files are treated as binary files */
1159 #endif /* DOS_NT */
1161 progname = argv[0];
1162 nincluded_files = 0;
1163 included_files = xnew (argc, char *);
1164 current_arg = 0;
1165 file_count = 0;
1167 /* Allocate enough no matter what happens. Overkill, but each one
1168 is small. */
1169 argbuffer = xnew (argc, argument);
1172 * If etags, always find typedefs and structure tags. Why not?
1173 * Also default to find macro constants, enum constants, struct
1174 * members and global variables.
1176 if (!CTAGS)
1178 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1179 globals = TRUE;
1182 /* When the optstring begins with a '-' getopt_long does not rearrange the
1183 non-options arguments to be at the end, but leaves them alone. */
1184 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1185 "ac:Cf:Il:o:r:RSVhH",
1186 (CTAGS) ? "BxdtTuvw" : "Di:");
1188 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1189 switch (opt)
1191 case 0:
1192 /* If getopt returns 0, then it has already processed a
1193 long-named option. We should do nothing. */
1194 break;
1196 case 1:
1197 /* This means that a file name has been seen. Record it. */
1198 argbuffer[current_arg].arg_type = at_filename;
1199 argbuffer[current_arg].what = optarg;
1200 ++current_arg;
1201 ++file_count;
1202 break;
1204 case STDIN:
1205 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1206 argbuffer[current_arg].arg_type = at_stdin;
1207 argbuffer[current_arg].what = optarg;
1208 ++current_arg;
1209 ++file_count;
1210 if (parsing_stdin)
1211 fatal ("cannot parse standard input more than once", (char *)NULL);
1212 parsing_stdin = TRUE;
1213 break;
1215 /* Common options. */
1216 case 'a': append_to_tagfile = TRUE; break;
1217 case 'C': cplusplus = TRUE; break;
1218 case 'f': /* for compatibility with old makefiles */
1219 case 'o':
1220 if (tagfile)
1222 error ("-o option may only be given once.", (char *)NULL);
1223 suggest_asking_for_help ();
1224 /* NOTREACHED */
1226 tagfile = optarg;
1227 break;
1228 case 'I':
1229 case 'S': /* for backward compatibility */
1230 ignoreindent = TRUE;
1231 break;
1232 case 'l':
1234 language *lang = get_language_from_langname (optarg);
1235 if (lang != NULL)
1237 argbuffer[current_arg].lang = lang;
1238 argbuffer[current_arg].arg_type = at_language;
1239 ++current_arg;
1242 break;
1243 case 'c':
1244 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1245 optarg = concat (optarg, "i", ""); /* memory leak here */
1246 /* FALLTHRU */
1247 case 'r':
1248 argbuffer[current_arg].arg_type = at_regexp;
1249 argbuffer[current_arg].what = optarg;
1250 ++current_arg;
1251 break;
1252 case 'R':
1253 argbuffer[current_arg].arg_type = at_regexp;
1254 argbuffer[current_arg].what = NULL;
1255 ++current_arg;
1256 break;
1257 case 'V':
1258 print_version ();
1259 break;
1260 case 'h':
1261 case 'H':
1262 help_asked = TRUE;
1263 break;
1265 /* Etags options */
1266 case 'D': constantypedefs = FALSE; break;
1267 case 'i': included_files[nincluded_files++] = optarg; break;
1269 /* Ctags options. */
1270 case 'B': searchar = '?'; break;
1271 case 'd': constantypedefs = TRUE; break;
1272 case 't': typedefs = TRUE; break;
1273 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1274 case 'u': update = TRUE; break;
1275 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1276 case 'x': cxref_style = TRUE; break;
1277 case 'w': no_warnings = TRUE; break;
1278 default:
1279 suggest_asking_for_help ();
1280 /* NOTREACHED */
1283 /* No more options. Store the rest of arguments. */
1284 for (; optind < argc; optind++)
1286 argbuffer[current_arg].arg_type = at_filename;
1287 argbuffer[current_arg].what = argv[optind];
1288 ++current_arg;
1289 ++file_count;
1292 argbuffer[current_arg].arg_type = at_end;
1294 if (help_asked)
1295 print_help (argbuffer);
1296 /* NOTREACHED */
1298 if (nincluded_files == 0 && file_count == 0)
1300 error ("no input files specified.", (char *)NULL);
1301 suggest_asking_for_help ();
1302 /* NOTREACHED */
1305 if (tagfile == NULL)
1306 tagfile = CTAGS ? "tags" : "TAGS";
1307 cwd = etags_getcwd (); /* the current working directory */
1308 if (cwd[strlen (cwd) - 1] != '/')
1310 char *oldcwd = cwd;
1311 cwd = concat (oldcwd, "/", "");
1312 free (oldcwd);
1314 /* Relative file names are made relative to the current directory. */
1315 if (streq (tagfile, "-")
1316 || strneq (tagfile, "/dev/", 5))
1317 tagfiledir = cwd;
1318 else
1319 tagfiledir = absolute_dirname (tagfile, cwd);
1321 init (); /* set up boolean "functions" */
1323 linebuffer_init (&lb);
1324 linebuffer_init (&filename_lb);
1325 linebuffer_init (&filebuf);
1326 linebuffer_init (&token_name);
1328 if (!CTAGS)
1330 if (streq (tagfile, "-"))
1332 tagf = stdout;
1333 #ifdef DOS_NT
1334 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1335 doesn't take effect until after `stdout' is already open). */
1336 if (!isatty (fileno (stdout)))
1337 setmode (fileno (stdout), O_BINARY);
1338 #endif /* DOS_NT */
1340 else
1341 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1342 if (tagf == NULL)
1343 pfatal (tagfile);
1347 * Loop through files finding functions.
1349 for (i = 0; i < current_arg; i++)
1351 static language *lang; /* non-NULL if language is forced */
1352 char *this_file;
1354 switch (argbuffer[i].arg_type)
1356 case at_language:
1357 lang = argbuffer[i].lang;
1358 break;
1359 case at_regexp:
1360 analyse_regex (argbuffer[i].what);
1361 break;
1362 case at_filename:
1363 #ifdef VMS
1364 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1366 if (got_err)
1368 error ("can't find file %s\n", this_file);
1369 argc--, argv++;
1371 else
1373 this_file = massage_name (this_file);
1375 #else
1376 this_file = argbuffer[i].what;
1377 #endif
1378 /* Input file named "-" means read file names from stdin
1379 (one per line) and use them. */
1380 if (streq (this_file, "-"))
1382 if (parsing_stdin)
1383 fatal ("cannot parse standard input AND read file names from it",
1384 (char *)NULL);
1385 while (readline_internal (&filename_lb, stdin) > 0)
1386 process_file_name (filename_lb.buffer, lang);
1388 else
1389 process_file_name (this_file, lang);
1390 #ifdef VMS
1392 #endif
1393 break;
1394 case at_stdin:
1395 this_file = argbuffer[i].what;
1396 process_file (stdin, this_file, lang);
1397 break;
1401 free_regexps ();
1402 free (lb.buffer);
1403 free (filebuf.buffer);
1404 free (token_name.buffer);
1406 if (!CTAGS || cxref_style)
1408 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1409 put_entries (nodehead);
1410 free_tree (nodehead);
1411 nodehead = NULL;
1412 if (!CTAGS)
1414 fdesc *fdp;
1416 /* Output file entries that have no tags. */
1417 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1418 if (!fdp->written)
1419 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1421 while (nincluded_files-- > 0)
1422 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1424 if (fclose (tagf) == EOF)
1425 pfatal (tagfile);
1428 exit (EXIT_SUCCESS);
1431 if (update)
1433 char cmd[BUFSIZ];
1434 for (i = 0; i < current_arg; ++i)
1436 switch (argbuffer[i].arg_type)
1438 case at_filename:
1439 case at_stdin:
1440 break;
1441 default:
1442 continue; /* the for loop */
1444 sprintf (cmd,
1445 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1446 tagfile, argbuffer[i].what, tagfile);
1447 if (system (cmd) != EXIT_SUCCESS)
1448 fatal ("failed to execute shell command", (char *)NULL);
1450 append_to_tagfile = TRUE;
1453 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1454 if (tagf == NULL)
1455 pfatal (tagfile);
1456 put_entries (nodehead); /* write all the tags (CTAGS) */
1457 free_tree (nodehead);
1458 nodehead = NULL;
1459 if (fclose (tagf) == EOF)
1460 pfatal (tagfile);
1462 if (CTAGS)
1463 if (append_to_tagfile || update)
1465 char cmd[2*BUFSIZ+20];
1466 /* Maybe these should be used:
1467 setenv ("LC_COLLATE", "C", 1);
1468 setenv ("LC_ALL", "C", 1); */
1469 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1470 exit (system (cmd));
1472 return EXIT_SUCCESS;
1477 * Return a compressor given the file name. If EXTPTR is non-zero,
1478 * return a pointer into FILE where the compressor-specific
1479 * extension begins. If no compressor is found, NULL is returned
1480 * and EXTPTR is not significant.
1481 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1483 static compressor *
1484 get_compressor_from_suffix (file, extptr)
1485 char *file;
1486 char **extptr;
1488 compressor *compr;
1489 char *slash, *suffix;
1491 /* This relies on FN to be after canonicalize_filename,
1492 so we don't need to consider backslashes on DOS_NT. */
1493 slash = etags_strrchr (file, '/');
1494 suffix = etags_strrchr (file, '.');
1495 if (suffix == NULL || suffix < slash)
1496 return NULL;
1497 if (extptr != NULL)
1498 *extptr = suffix;
1499 suffix += 1;
1500 /* Let those poor souls who live with DOS 8+3 file name limits get
1501 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1502 Only the first do loop is run if not MSDOS */
1505 for (compr = compressors; compr->suffix != NULL; compr++)
1506 if (streq (compr->suffix, suffix))
1507 return compr;
1508 if (!MSDOS)
1509 break; /* do it only once: not really a loop */
1510 if (extptr != NULL)
1511 *extptr = ++suffix;
1512 } while (*suffix != '\0');
1513 return NULL;
1519 * Return a language given the name.
1521 static language *
1522 get_language_from_langname (name)
1523 const char *name;
1525 language *lang;
1527 if (name == NULL)
1528 error ("empty language name", (char *)NULL);
1529 else
1531 for (lang = lang_names; lang->name != NULL; lang++)
1532 if (streq (name, lang->name))
1533 return lang;
1534 error ("unknown language \"%s\"", name);
1537 return NULL;
1542 * Return a language given the interpreter name.
1544 static language *
1545 get_language_from_interpreter (interpreter)
1546 char *interpreter;
1548 language *lang;
1549 char **iname;
1551 if (interpreter == NULL)
1552 return NULL;
1553 for (lang = lang_names; lang->name != NULL; lang++)
1554 if (lang->interpreters != NULL)
1555 for (iname = lang->interpreters; *iname != NULL; iname++)
1556 if (streq (*iname, interpreter))
1557 return lang;
1559 return NULL;
1565 * Return a language given the file name.
1567 static language *
1568 get_language_from_filename (file, case_sensitive)
1569 char *file;
1570 bool case_sensitive;
1572 language *lang;
1573 char **name, **ext, *suffix;
1575 /* Try whole file name first. */
1576 for (lang = lang_names; lang->name != NULL; lang++)
1577 if (lang->filenames != NULL)
1578 for (name = lang->filenames; *name != NULL; name++)
1579 if ((case_sensitive)
1580 ? streq (*name, file)
1581 : strcaseeq (*name, file))
1582 return lang;
1584 /* If not found, try suffix after last dot. */
1585 suffix = etags_strrchr (file, '.');
1586 if (suffix == NULL)
1587 return NULL;
1588 suffix += 1;
1589 for (lang = lang_names; lang->name != NULL; lang++)
1590 if (lang->suffixes != NULL)
1591 for (ext = lang->suffixes; *ext != NULL; ext++)
1592 if ((case_sensitive)
1593 ? streq (*ext, suffix)
1594 : strcaseeq (*ext, suffix))
1595 return lang;
1596 return NULL;
1601 * This routine is called on each file argument.
1603 static void
1604 process_file_name (file, lang)
1605 char *file;
1606 language *lang;
1608 struct stat stat_buf;
1609 FILE *inf;
1610 fdesc *fdp;
1611 compressor *compr;
1612 char *compressed_name, *uncompressed_name;
1613 char *ext, *real_name;
1614 int retval;
1616 canonicalize_filename (file);
1617 if (streq (file, tagfile) && !streq (tagfile, "-"))
1619 error ("skipping inclusion of %s in self.", file);
1620 return;
1622 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1624 compressed_name = NULL;
1625 real_name = uncompressed_name = savestr (file);
1627 else
1629 real_name = compressed_name = savestr (file);
1630 uncompressed_name = savenstr (file, ext - file);
1633 /* If the canonicalized uncompressed name
1634 has already been dealt with, skip it silently. */
1635 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1637 assert (fdp->infname != NULL);
1638 if (streq (uncompressed_name, fdp->infname))
1639 goto cleanup;
1642 if (stat (real_name, &stat_buf) != 0)
1644 /* Reset real_name and try with a different name. */
1645 real_name = NULL;
1646 if (compressed_name != NULL) /* try with the given suffix */
1648 if (stat (uncompressed_name, &stat_buf) == 0)
1649 real_name = uncompressed_name;
1651 else /* try all possible suffixes */
1653 for (compr = compressors; compr->suffix != NULL; compr++)
1655 compressed_name = concat (file, ".", compr->suffix);
1656 if (stat (compressed_name, &stat_buf) != 0)
1658 if (MSDOS)
1660 char *suf = compressed_name + strlen (file);
1661 size_t suflen = strlen (compr->suffix) + 1;
1662 for ( ; suf[1]; suf++, suflen--)
1664 memmove (suf, suf + 1, suflen);
1665 if (stat (compressed_name, &stat_buf) == 0)
1667 real_name = compressed_name;
1668 break;
1671 if (real_name != NULL)
1672 break;
1673 } /* MSDOS */
1674 free (compressed_name);
1675 compressed_name = NULL;
1677 else
1679 real_name = compressed_name;
1680 break;
1684 if (real_name == NULL)
1686 perror (file);
1687 goto cleanup;
1689 } /* try with a different name */
1691 if (!S_ISREG (stat_buf.st_mode))
1693 error ("skipping %s: it is not a regular file.", real_name);
1694 goto cleanup;
1696 if (real_name == compressed_name)
1698 char *cmd = concat (compr->command, " ", real_name);
1699 inf = (FILE *) popen (cmd, "r");
1700 free (cmd);
1702 else
1703 inf = fopen (real_name, "r");
1704 if (inf == NULL)
1706 perror (real_name);
1707 goto cleanup;
1710 process_file (inf, uncompressed_name, lang);
1712 if (real_name == compressed_name)
1713 retval = pclose (inf);
1714 else
1715 retval = fclose (inf);
1716 if (retval < 0)
1717 pfatal (file);
1719 cleanup:
1720 if (compressed_name) free (compressed_name);
1721 if (uncompressed_name) free (uncompressed_name);
1722 last_node = NULL;
1723 curfdp = NULL;
1724 return;
1727 static void
1728 process_file (fh, fn, lang)
1729 FILE *fh;
1730 char *fn;
1731 language *lang;
1733 static const fdesc emptyfdesc;
1734 fdesc *fdp;
1736 /* Create a new input file description entry. */
1737 fdp = xnew (1, fdesc);
1738 *fdp = emptyfdesc;
1739 fdp->next = fdhead;
1740 fdp->infname = savestr (fn);
1741 fdp->lang = lang;
1742 fdp->infabsname = absolute_filename (fn, cwd);
1743 fdp->infabsdir = absolute_dirname (fn, cwd);
1744 if (filename_is_absolute (fn))
1746 /* An absolute file name. Canonicalize it. */
1747 fdp->taggedfname = absolute_filename (fn, NULL);
1749 else
1751 /* A file name relative to cwd. Make it relative
1752 to the directory of the tags file. */
1753 fdp->taggedfname = relative_filename (fn, tagfiledir);
1755 fdp->usecharno = TRUE; /* use char position when making tags */
1756 fdp->prop = NULL;
1757 fdp->written = FALSE; /* not written on tags file yet */
1759 fdhead = fdp;
1760 curfdp = fdhead; /* the current file description */
1762 find_entries (fh);
1764 /* If not Ctags, and if this is not metasource and if it contained no #line
1765 directives, we can write the tags and free all nodes pointing to
1766 curfdp. */
1767 if (!CTAGS
1768 && curfdp->usecharno /* no #line directives in this file */
1769 && !curfdp->lang->metasource)
1771 node *np, *prev;
1773 /* Look for the head of the sublist relative to this file. See add_node
1774 for the structure of the node tree. */
1775 prev = NULL;
1776 for (np = nodehead; np != NULL; prev = np, np = np->left)
1777 if (np->fdp == curfdp)
1778 break;
1780 /* If we generated tags for this file, write and delete them. */
1781 if (np != NULL)
1783 /* This is the head of the last sublist, if any. The following
1784 instructions depend on this being true. */
1785 assert (np->left == NULL);
1787 assert (fdhead == curfdp);
1788 assert (last_node->fdp == curfdp);
1789 put_entries (np); /* write tags for file curfdp->taggedfname */
1790 free_tree (np); /* remove the written nodes */
1791 if (prev == NULL)
1792 nodehead = NULL; /* no nodes left */
1793 else
1794 prev->left = NULL; /* delete the pointer to the sublist */
1800 * This routine sets up the boolean pseudo-functions which work
1801 * by setting boolean flags dependent upon the corresponding character.
1802 * Every char which is NOT in that string is not a white char. Therefore,
1803 * all of the array "_wht" is set to FALSE, and then the elements
1804 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1805 * of a char is TRUE if it is the string "white", else FALSE.
1807 static void
1808 init ()
1810 register char *sp;
1811 register int i;
1813 for (i = 0; i < CHARS; i++)
1814 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1815 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1816 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1817 notinname('\0') = notinname('\n');
1818 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1819 begtoken('\0') = begtoken('\n');
1820 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1821 intoken('\0') = intoken('\n');
1822 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1823 endtoken('\0') = endtoken('\n');
1827 * This routine opens the specified file and calls the function
1828 * which finds the function and type definitions.
1830 static void
1831 find_entries (inf)
1832 FILE *inf;
1834 char *cp;
1835 language *lang = curfdp->lang;
1836 Lang_function *parser = NULL;
1838 /* If user specified a language, use it. */
1839 if (lang != NULL && lang->function != NULL)
1841 parser = lang->function;
1844 /* Else try to guess the language given the file name. */
1845 if (parser == NULL)
1847 lang = get_language_from_filename (curfdp->infname, TRUE);
1848 if (lang != NULL && lang->function != NULL)
1850 curfdp->lang = lang;
1851 parser = lang->function;
1855 /* Else look for sharp-bang as the first two characters. */
1856 if (parser == NULL
1857 && readline_internal (&lb, inf) > 0
1858 && lb.len >= 2
1859 && lb.buffer[0] == '#'
1860 && lb.buffer[1] == '!')
1862 char *lp;
1864 /* Set lp to point at the first char after the last slash in the
1865 line or, if no slashes, at the first nonblank. Then set cp to
1866 the first successive blank and terminate the string. */
1867 lp = etags_strrchr (lb.buffer+2, '/');
1868 if (lp != NULL)
1869 lp += 1;
1870 else
1871 lp = skip_spaces (lb.buffer + 2);
1872 cp = skip_non_spaces (lp);
1873 *cp = '\0';
1875 if (strlen (lp) > 0)
1877 lang = get_language_from_interpreter (lp);
1878 if (lang != NULL && lang->function != NULL)
1880 curfdp->lang = lang;
1881 parser = lang->function;
1886 /* We rewind here, even if inf may be a pipe. We fail if the
1887 length of the first line is longer than the pipe block size,
1888 which is unlikely. */
1889 rewind (inf);
1891 /* Else try to guess the language given the case insensitive file name. */
1892 if (parser == NULL)
1894 lang = get_language_from_filename (curfdp->infname, FALSE);
1895 if (lang != NULL && lang->function != NULL)
1897 curfdp->lang = lang;
1898 parser = lang->function;
1902 /* Else try Fortran or C. */
1903 if (parser == NULL)
1905 node *old_last_node = last_node;
1907 curfdp->lang = get_language_from_langname ("fortran");
1908 find_entries (inf);
1910 if (old_last_node == last_node)
1911 /* No Fortran entries found. Try C. */
1913 /* We do not tag if rewind fails.
1914 Only the file name will be recorded in the tags file. */
1915 rewind (inf);
1916 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1917 find_entries (inf);
1919 return;
1922 if (!no_line_directive
1923 && curfdp->lang != NULL && curfdp->lang->metasource)
1924 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1925 file, or anyway we parsed a file that is automatically generated from
1926 this one. If this is the case, the bingo.c file contained #line
1927 directives that generated tags pointing to this file. Let's delete
1928 them all before parsing this file, which is the real source. */
1930 fdesc **fdpp = &fdhead;
1931 while (*fdpp != NULL)
1932 if (*fdpp != curfdp
1933 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1934 /* We found one of those! We must delete both the file description
1935 and all tags referring to it. */
1937 fdesc *badfdp = *fdpp;
1939 /* Delete the tags referring to badfdp->taggedfname
1940 that were obtained from badfdp->infname. */
1941 invalidate_nodes (badfdp, &nodehead);
1943 *fdpp = badfdp->next; /* remove the bad description from the list */
1944 free_fdesc (badfdp);
1946 else
1947 fdpp = &(*fdpp)->next; /* advance the list pointer */
1950 assert (parser != NULL);
1952 /* Generic initialisations before reading from file. */
1953 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1955 /* Generic initialisations before parsing file with readline. */
1956 lineno = 0; /* reset global line number */
1957 charno = 0; /* reset global char number */
1958 linecharno = 0; /* reset global char number of line start */
1960 parser (inf);
1962 regex_tag_multiline ();
1967 * Check whether an implicitly named tag should be created,
1968 * then call `pfnote'.
1969 * NAME is a string that is internally copied by this function.
1971 * TAGS format specification
1972 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1973 * The following is explained in some more detail in etc/ETAGS.EBNF.
1975 * make_tag creates tags with "implicit tag names" (unnamed tags)
1976 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1977 * 1. NAME does not contain any of the characters in NONAM;
1978 * 2. LINESTART contains name as either a rightmost, or rightmost but
1979 * one character, substring;
1980 * 3. the character, if any, immediately before NAME in LINESTART must
1981 * be a character in NONAM;
1982 * 4. the character, if any, immediately after NAME in LINESTART must
1983 * also be a character in NONAM.
1985 * The implementation uses the notinname() macro, which recognises the
1986 * characters stored in the string `nonam'.
1987 * etags.el needs to use the same characters that are in NONAM.
1989 static void
1990 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1991 char *name; /* tag name, or NULL if unnamed */
1992 int namelen; /* tag length */
1993 bool is_func; /* tag is a function */
1994 char *linestart; /* start of the line where tag is */
1995 int linelen; /* length of the line where tag is */
1996 int lno; /* line number */
1997 long cno; /* character number */
1999 bool named = (name != NULL && namelen > 0);
2001 if (!CTAGS && named) /* maybe set named to false */
2002 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2003 such that etags.el can guess a name from it. */
2005 int i;
2006 register char *cp = name;
2008 for (i = 0; i < namelen; i++)
2009 if (notinname (*cp++))
2010 break;
2011 if (i == namelen) /* rule #1 */
2013 cp = linestart + linelen - namelen;
2014 if (notinname (linestart[linelen-1]))
2015 cp -= 1; /* rule #4 */
2016 if (cp >= linestart /* rule #2 */
2017 && (cp == linestart
2018 || notinname (cp[-1])) /* rule #3 */
2019 && strneq (name, cp, namelen)) /* rule #2 */
2020 named = FALSE; /* use implicit tag name */
2024 if (named)
2025 name = savenstr (name, namelen);
2026 else
2027 name = NULL;
2028 pfnote (name, is_func, linestart, linelen, lno, cno);
2031 /* Record a tag. */
2032 static void
2033 pfnote (name, is_func, linestart, linelen, lno, cno)
2034 char *name; /* tag name, or NULL if unnamed */
2035 bool is_func; /* tag is a function */
2036 char *linestart; /* start of the line where tag is */
2037 int linelen; /* length of the line where tag is */
2038 int lno; /* line number */
2039 long cno; /* character number */
2041 register node *np;
2043 assert (name == NULL || name[0] != '\0');
2044 if (CTAGS && name == NULL)
2045 return;
2047 np = xnew (1, node);
2049 /* If ctags mode, change name "main" to M<thisfilename>. */
2050 if (CTAGS && !cxref_style && streq (name, "main"))
2052 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2053 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2054 fp = etags_strrchr (np->name, '.');
2055 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2056 fp[0] = '\0';
2058 else
2059 np->name = name;
2060 np->valid = TRUE;
2061 np->been_warned = FALSE;
2062 np->fdp = curfdp;
2063 np->is_func = is_func;
2064 np->lno = lno;
2065 if (np->fdp->usecharno)
2066 /* Our char numbers are 0-base, because of C language tradition?
2067 ctags compatibility? old versions compatibility? I don't know.
2068 Anyway, since emacs's are 1-base we expect etags.el to take care
2069 of the difference. If we wanted to have 1-based numbers, we would
2070 uncomment the +1 below. */
2071 np->cno = cno /* + 1 */ ;
2072 else
2073 np->cno = invalidcharno;
2074 np->left = np->right = NULL;
2075 if (CTAGS && !cxref_style)
2077 if (strlen (linestart) < 50)
2078 np->regex = concat (linestart, "$", "");
2079 else
2080 np->regex = savenstr (linestart, 50);
2082 else
2083 np->regex = savenstr (linestart, linelen);
2085 add_node (np, &nodehead);
2089 * free_tree ()
2090 * recurse on left children, iterate on right children.
2092 static void
2093 free_tree (np)
2094 register node *np;
2096 while (np)
2098 register node *node_right = np->right;
2099 free_tree (np->left);
2100 if (np->name != NULL)
2101 free (np->name);
2102 free (np->regex);
2103 free (np);
2104 np = node_right;
2109 * free_fdesc ()
2110 * delete a file description
2112 static void
2113 free_fdesc (fdp)
2114 register fdesc *fdp;
2116 if (fdp->infname != NULL) free (fdp->infname);
2117 if (fdp->infabsname != NULL) free (fdp->infabsname);
2118 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2119 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2120 if (fdp->prop != NULL) free (fdp->prop);
2121 free (fdp);
2125 * add_node ()
2126 * Adds a node to the tree of nodes. In etags mode, sort by file
2127 * name. In ctags mode, sort by tag name. Make no attempt at
2128 * balancing.
2130 * add_node is the only function allowed to add nodes, so it can
2131 * maintain state.
2133 static void
2134 add_node (np, cur_node_p)
2135 node *np, **cur_node_p;
2137 register int dif;
2138 register node *cur_node = *cur_node_p;
2140 if (cur_node == NULL)
2142 *cur_node_p = np;
2143 last_node = np;
2144 return;
2147 if (!CTAGS)
2148 /* Etags Mode */
2150 /* For each file name, tags are in a linked sublist on the right
2151 pointer. The first tags of different files are a linked list
2152 on the left pointer. last_node points to the end of the last
2153 used sublist. */
2154 if (last_node != NULL && last_node->fdp == np->fdp)
2156 /* Let's use the same sublist as the last added node. */
2157 assert (last_node->right == NULL);
2158 last_node->right = np;
2159 last_node = np;
2161 else if (cur_node->fdp == np->fdp)
2163 /* Scanning the list we found the head of a sublist which is
2164 good for us. Let's scan this sublist. */
2165 add_node (np, &cur_node->right);
2167 else
2168 /* The head of this sublist is not good for us. Let's try the
2169 next one. */
2170 add_node (np, &cur_node->left);
2171 } /* if ETAGS mode */
2173 else
2175 /* Ctags Mode */
2176 dif = strcmp (np->name, cur_node->name);
2179 * If this tag name matches an existing one, then
2180 * do not add the node, but maybe print a warning.
2182 if (no_duplicates && !dif)
2184 if (np->fdp == cur_node->fdp)
2186 if (!no_warnings)
2188 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2189 np->fdp->infname, lineno, np->name);
2190 fprintf (stderr, "Second entry ignored\n");
2193 else if (!cur_node->been_warned && !no_warnings)
2195 fprintf
2196 (stderr,
2197 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2198 np->fdp->infname, cur_node->fdp->infname, np->name);
2199 cur_node->been_warned = TRUE;
2201 return;
2204 /* Actually add the node */
2205 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2206 } /* if CTAGS mode */
2210 * invalidate_nodes ()
2211 * Scan the node tree and invalidate all nodes pointing to the
2212 * given file description (CTAGS case) or free them (ETAGS case).
2214 static void
2215 invalidate_nodes (badfdp, npp)
2216 fdesc *badfdp;
2217 node **npp;
2219 node *np = *npp;
2221 if (np == NULL)
2222 return;
2224 if (CTAGS)
2226 if (np->left != NULL)
2227 invalidate_nodes (badfdp, &np->left);
2228 if (np->fdp == badfdp)
2229 np->valid = FALSE;
2230 if (np->right != NULL)
2231 invalidate_nodes (badfdp, &np->right);
2233 else
2235 assert (np->fdp != NULL);
2236 if (np->fdp == badfdp)
2238 *npp = np->left; /* detach the sublist from the list */
2239 np->left = NULL; /* isolate it */
2240 free_tree (np); /* free it */
2241 invalidate_nodes (badfdp, npp);
2243 else
2244 invalidate_nodes (badfdp, &np->left);
2249 static int total_size_of_entries __P((node *));
2250 static int number_len __P((long));
2252 /* Length of a non-negative number's decimal representation. */
2253 static int
2254 number_len (num)
2255 long num;
2257 int len = 1;
2258 while ((num /= 10) > 0)
2259 len += 1;
2260 return len;
2264 * Return total number of characters that put_entries will output for
2265 * the nodes in the linked list at the right of the specified node.
2266 * This count is irrelevant with etags.el since emacs 19.34 at least,
2267 * but is still supplied for backward compatibility.
2269 static int
2270 total_size_of_entries (np)
2271 register node *np;
2273 register int total = 0;
2275 for (; np != NULL; np = np->right)
2276 if (np->valid)
2278 total += strlen (np->regex) + 1; /* pat\177 */
2279 if (np->name != NULL)
2280 total += strlen (np->name) + 1; /* name\001 */
2281 total += number_len ((long) np->lno) + 1; /* lno, */
2282 if (np->cno != invalidcharno) /* cno */
2283 total += number_len (np->cno);
2284 total += 1; /* newline */
2287 return total;
2290 static void
2291 put_entries (np)
2292 register node *np;
2294 register char *sp;
2295 static fdesc *fdp = NULL;
2297 if (np == NULL)
2298 return;
2300 /* Output subentries that precede this one */
2301 if (CTAGS)
2302 put_entries (np->left);
2304 /* Output this entry */
2305 if (np->valid)
2307 if (!CTAGS)
2309 /* Etags mode */
2310 if (fdp != np->fdp)
2312 fdp = np->fdp;
2313 fprintf (tagf, "\f\n%s,%d\n",
2314 fdp->taggedfname, total_size_of_entries (np));
2315 fdp->written = TRUE;
2317 fputs (np->regex, tagf);
2318 fputc ('\177', tagf);
2319 if (np->name != NULL)
2321 fputs (np->name, tagf);
2322 fputc ('\001', tagf);
2324 fprintf (tagf, "%d,", np->lno);
2325 if (np->cno != invalidcharno)
2326 fprintf (tagf, "%ld", np->cno);
2327 fputs ("\n", tagf);
2329 else
2331 /* Ctags mode */
2332 if (np->name == NULL)
2333 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2335 if (cxref_style)
2337 if (vgrind_style)
2338 fprintf (stdout, "%s %s %d\n",
2339 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2340 else
2341 fprintf (stdout, "%-16s %3d %-16s %s\n",
2342 np->name, np->lno, np->fdp->taggedfname, np->regex);
2344 else
2346 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2348 if (np->is_func)
2349 { /* function or #define macro with args */
2350 putc (searchar, tagf);
2351 putc ('^', tagf);
2353 for (sp = np->regex; *sp; sp++)
2355 if (*sp == '\\' || *sp == searchar)
2356 putc ('\\', tagf);
2357 putc (*sp, tagf);
2359 putc (searchar, tagf);
2361 else
2362 { /* anything else; text pattern inadequate */
2363 fprintf (tagf, "%d", np->lno);
2365 putc ('\n', tagf);
2368 } /* if this node contains a valid tag */
2370 /* Output subentries that follow this one */
2371 put_entries (np->right);
2372 if (!CTAGS)
2373 put_entries (np->left);
2377 /* C extensions. */
2378 #define C_EXT 0x00fff /* C extensions */
2379 #define C_PLAIN 0x00000 /* C */
2380 #define C_PLPL 0x00001 /* C++ */
2381 #define C_STAR 0x00003 /* C* */
2382 #define C_JAVA 0x00005 /* JAVA */
2383 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2384 #define YACC 0x10000 /* yacc file */
2387 * The C symbol tables.
2389 enum sym_type
2391 st_none,
2392 st_C_objprot, st_C_objimpl, st_C_objend,
2393 st_C_gnumacro,
2394 st_C_ignore, st_C_attribute,
2395 st_C_javastruct,
2396 st_C_operator,
2397 st_C_class, st_C_template,
2398 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2401 static unsigned int hash __P((const char *, unsigned int));
2402 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2403 static enum sym_type C_symtype __P((char *, int, int));
2405 /* Feed stuff between (but not including) %[ and %] lines to:
2406 gperf -m 5
2408 %compare-strncmp
2409 %enum
2410 %struct-type
2411 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2413 if, 0, st_C_ignore
2414 for, 0, st_C_ignore
2415 while, 0, st_C_ignore
2416 switch, 0, st_C_ignore
2417 return, 0, st_C_ignore
2418 __attribute__, 0, st_C_attribute
2419 @interface, 0, st_C_objprot
2420 @protocol, 0, st_C_objprot
2421 @implementation,0, st_C_objimpl
2422 @end, 0, st_C_objend
2423 import, (C_JAVA & ~C_PLPL), st_C_ignore
2424 package, (C_JAVA & ~C_PLPL), st_C_ignore
2425 friend, C_PLPL, st_C_ignore
2426 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2427 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2428 interface, (C_JAVA & ~C_PLPL), st_C_struct
2429 class, 0, st_C_class
2430 namespace, C_PLPL, st_C_struct
2431 domain, C_STAR, st_C_struct
2432 union, 0, st_C_struct
2433 struct, 0, st_C_struct
2434 extern, 0, st_C_extern
2435 enum, 0, st_C_enum
2436 typedef, 0, st_C_typedef
2437 define, 0, st_C_define
2438 undef, 0, st_C_define
2439 operator, C_PLPL, st_C_operator
2440 template, 0, st_C_template
2441 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2442 DEFUN, 0, st_C_gnumacro
2443 SYSCALL, 0, st_C_gnumacro
2444 ENTRY, 0, st_C_gnumacro
2445 PSEUDO, 0, st_C_gnumacro
2446 # These are defined inside C functions, so currently they are not met.
2447 # EXFUN used in glibc, DEFVAR_* in emacs.
2448 #EXFUN, 0, st_C_gnumacro
2449 #DEFVAR_, 0, st_C_gnumacro
2451 and replace lines between %< and %> with its output, then:
2452 - remove the #if characterset check
2453 - make in_word_set static and not inline. */
2454 /*%<*/
2455 /* C code produced by gperf version 3.0.1 */
2456 /* Command-line: gperf -m 5 */
2457 /* Computed positions: -k'2-3' */
2459 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2460 /* maximum key range = 33, duplicates = 0 */
2462 #ifdef __GNUC__
2463 __inline
2464 #else
2465 #ifdef __cplusplus
2466 inline
2467 #endif
2468 #endif
2469 static unsigned int
2470 hash (str, len)
2471 register const char *str;
2472 register unsigned int len;
2474 static unsigned char asso_values[] =
2476 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2477 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2478 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2479 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2480 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2481 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2482 35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2483 14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2484 35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2485 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2486 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2487 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2488 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2489 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2490 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2491 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2492 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2493 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2494 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2495 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2496 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2497 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2498 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2499 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2500 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2501 35, 35, 35, 35, 35, 35
2503 register int hval = len;
2505 switch (hval)
2507 default:
2508 hval += asso_values[(unsigned char)str[2]];
2509 /*FALLTHROUGH*/
2510 case 2:
2511 hval += asso_values[(unsigned char)str[1]];
2512 break;
2514 return hval;
2517 static struct C_stab_entry *
2518 in_word_set (str, len)
2519 register const char *str;
2520 register unsigned int len;
2522 enum
2524 TOTAL_KEYWORDS = 32,
2525 MIN_WORD_LENGTH = 2,
2526 MAX_WORD_LENGTH = 15,
2527 MIN_HASH_VALUE = 2,
2528 MAX_HASH_VALUE = 34
2531 static struct C_stab_entry wordlist[] =
2533 {""}, {""},
2534 {"if", 0, st_C_ignore},
2535 {""},
2536 {"@end", 0, st_C_objend},
2537 {"union", 0, st_C_struct},
2538 {"define", 0, st_C_define},
2539 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2540 {"template", 0, st_C_template},
2541 {"operator", C_PLPL, st_C_operator},
2542 {"@interface", 0, st_C_objprot},
2543 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2544 {"friend", C_PLPL, st_C_ignore},
2545 {"typedef", 0, st_C_typedef},
2546 {"return", 0, st_C_ignore},
2547 {"@implementation",0, st_C_objimpl},
2548 {"@protocol", 0, st_C_objprot},
2549 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2550 {"extern", 0, st_C_extern},
2551 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2552 {"struct", 0, st_C_struct},
2553 {"domain", C_STAR, st_C_struct},
2554 {"switch", 0, st_C_ignore},
2555 {"enum", 0, st_C_enum},
2556 {"for", 0, st_C_ignore},
2557 {"namespace", C_PLPL, st_C_struct},
2558 {"class", 0, st_C_class},
2559 {"while", 0, st_C_ignore},
2560 {"undef", 0, st_C_define},
2561 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2562 {"__attribute__", 0, st_C_attribute},
2563 {"SYSCALL", 0, st_C_gnumacro},
2564 {"ENTRY", 0, st_C_gnumacro},
2565 {"PSEUDO", 0, st_C_gnumacro},
2566 {"DEFUN", 0, st_C_gnumacro}
2569 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2571 register int key = hash (str, len);
2573 if (key <= MAX_HASH_VALUE && key >= 0)
2575 register const char *s = wordlist[key].name;
2577 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2578 return &wordlist[key];
2581 return 0;
2583 /*%>*/
2585 static enum sym_type
2586 C_symtype (str, len, c_ext)
2587 char *str;
2588 int len;
2589 int c_ext;
2591 register struct C_stab_entry *se = in_word_set (str, len);
2593 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2594 return st_none;
2595 return se->type;
2600 * Ignoring __attribute__ ((list))
2602 static bool inattribute; /* looking at an __attribute__ construct */
2605 * C functions and variables are recognized using a simple
2606 * finite automaton. fvdef is its state variable.
2608 static enum
2610 fvnone, /* nothing seen */
2611 fdefunkey, /* Emacs DEFUN keyword seen */
2612 fdefunname, /* Emacs DEFUN name seen */
2613 foperator, /* func: operator keyword seen (cplpl) */
2614 fvnameseen, /* function or variable name seen */
2615 fstartlist, /* func: just after open parenthesis */
2616 finlist, /* func: in parameter list */
2617 flistseen, /* func: after parameter list */
2618 fignore, /* func: before open brace */
2619 vignore /* var-like: ignore until ';' */
2620 } fvdef;
2622 static bool fvextern; /* func or var: extern keyword seen; */
2625 * typedefs are recognized using a simple finite automaton.
2626 * typdef is its state variable.
2628 static enum
2630 tnone, /* nothing seen */
2631 tkeyseen, /* typedef keyword seen */
2632 ttypeseen, /* defined type seen */
2633 tinbody, /* inside typedef body */
2634 tend, /* just before typedef tag */
2635 tignore /* junk after typedef tag */
2636 } typdef;
2639 * struct-like structures (enum, struct and union) are recognized
2640 * using another simple finite automaton. `structdef' is its state
2641 * variable.
2643 static enum
2645 snone, /* nothing seen yet,
2646 or in struct body if bracelev > 0 */
2647 skeyseen, /* struct-like keyword seen */
2648 stagseen, /* struct-like tag seen */
2649 scolonseen /* colon seen after struct-like tag */
2650 } structdef;
2653 * When objdef is different from onone, objtag is the name of the class.
2655 static char *objtag = "<uninited>";
2658 * Yet another little state machine to deal with preprocessor lines.
2660 static enum
2662 dnone, /* nothing seen */
2663 dsharpseen, /* '#' seen as first char on line */
2664 ddefineseen, /* '#' and 'define' seen */
2665 dignorerest /* ignore rest of line */
2666 } definedef;
2669 * State machine for Objective C protocols and implementations.
2670 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2672 static enum
2674 onone, /* nothing seen */
2675 oprotocol, /* @interface or @protocol seen */
2676 oimplementation, /* @implementations seen */
2677 otagseen, /* class name seen */
2678 oparenseen, /* parenthesis before category seen */
2679 ocatseen, /* category name seen */
2680 oinbody, /* in @implementation body */
2681 omethodsign, /* in @implementation body, after +/- */
2682 omethodtag, /* after method name */
2683 omethodcolon, /* after method colon */
2684 omethodparm, /* after method parameter */
2685 oignore /* wait for @end */
2686 } objdef;
2690 * Use this structure to keep info about the token read, and how it
2691 * should be tagged. Used by the make_C_tag function to build a tag.
2693 static struct tok
2695 char *line; /* string containing the token */
2696 int offset; /* where the token starts in LINE */
2697 int length; /* token length */
2699 The previous members can be used to pass strings around for generic
2700 purposes. The following ones specifically refer to creating tags. In this
2701 case the token contained here is the pattern that will be used to create a
2702 tag.
2704 bool valid; /* do not create a tag; the token should be
2705 invalidated whenever a state machine is
2706 reset prematurely */
2707 bool named; /* create a named tag */
2708 int lineno; /* source line number of tag */
2709 long linepos; /* source char number of tag */
2710 } token; /* latest token read */
2713 * Variables and functions for dealing with nested structures.
2714 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2716 static void pushclass_above __P((int, char *, int));
2717 static void popclass_above __P((int));
2718 static void write_classname __P((linebuffer *, char *qualifier));
2720 static struct {
2721 char **cname; /* nested class names */
2722 int *bracelev; /* nested class brace level */
2723 int nl; /* class nesting level (elements used) */
2724 int size; /* length of the array */
2725 } cstack; /* stack for nested declaration tags */
2726 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2727 #define nestlev (cstack.nl)
2728 /* After struct keyword or in struct body, not inside a nested function. */
2729 #define instruct (structdef == snone && nestlev > 0 \
2730 && bracelev == cstack.bracelev[nestlev-1] + 1)
2732 static void
2733 pushclass_above (bracelev, str, len)
2734 int bracelev;
2735 char *str;
2736 int len;
2738 int nl;
2740 popclass_above (bracelev);
2741 nl = cstack.nl;
2742 if (nl >= cstack.size)
2744 int size = cstack.size *= 2;
2745 xrnew (cstack.cname, size, char *);
2746 xrnew (cstack.bracelev, size, int);
2748 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2749 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2750 cstack.bracelev[nl] = bracelev;
2751 cstack.nl = nl + 1;
2754 static void
2755 popclass_above (bracelev)
2756 int bracelev;
2758 int nl;
2760 for (nl = cstack.nl - 1;
2761 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2762 nl--)
2764 if (cstack.cname[nl] != NULL)
2765 free (cstack.cname[nl]);
2766 cstack.nl = nl;
2770 static void
2771 write_classname (cn, qualifier)
2772 linebuffer *cn;
2773 char *qualifier;
2775 int i, len;
2776 int qlen = strlen (qualifier);
2778 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2780 len = 0;
2781 cn->len = 0;
2782 cn->buffer[0] = '\0';
2784 else
2786 len = strlen (cstack.cname[0]);
2787 linebuffer_setlen (cn, len);
2788 strcpy (cn->buffer, cstack.cname[0]);
2790 for (i = 1; i < cstack.nl; i++)
2792 char *s;
2793 int slen;
2795 s = cstack.cname[i];
2796 if (s == NULL)
2797 continue;
2798 slen = strlen (s);
2799 len += slen + qlen;
2800 linebuffer_setlen (cn, len);
2801 strncat (cn->buffer, qualifier, qlen);
2802 strncat (cn->buffer, s, slen);
2807 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2808 static void make_C_tag __P((bool));
2811 * consider_token ()
2812 * checks to see if the current token is at the start of a
2813 * function or variable, or corresponds to a typedef, or
2814 * is a struct/union/enum tag, or #define, or an enum constant.
2816 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2817 * with args. C_EXTP points to which language we are looking at.
2819 * Globals
2820 * fvdef IN OUT
2821 * structdef IN OUT
2822 * definedef IN OUT
2823 * typdef IN OUT
2824 * objdef IN OUT
2827 static bool
2828 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2829 register char *str; /* IN: token pointer */
2830 register int len; /* IN: token length */
2831 register int c; /* IN: first char after the token */
2832 int *c_extp; /* IN, OUT: C extensions mask */
2833 int bracelev; /* IN: brace level */
2834 int parlev; /* IN: parenthesis level */
2835 bool *is_func_or_var; /* OUT: function or variable found */
2837 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2838 structtype is the type of the preceding struct-like keyword, and
2839 structbracelev is the brace level where it has been seen. */
2840 static enum sym_type structtype;
2841 static int structbracelev;
2842 static enum sym_type toktype;
2845 toktype = C_symtype (str, len, *c_extp);
2848 * Skip __attribute__
2850 if (toktype == st_C_attribute)
2852 inattribute = TRUE;
2853 return FALSE;
2857 * Advance the definedef state machine.
2859 switch (definedef)
2861 case dnone:
2862 /* We're not on a preprocessor line. */
2863 if (toktype == st_C_gnumacro)
2865 fvdef = fdefunkey;
2866 return FALSE;
2868 break;
2869 case dsharpseen:
2870 if (toktype == st_C_define)
2872 definedef = ddefineseen;
2874 else
2876 definedef = dignorerest;
2878 return FALSE;
2879 case ddefineseen:
2881 * Make a tag for any macro, unless it is a constant
2882 * and constantypedefs is FALSE.
2884 definedef = dignorerest;
2885 *is_func_or_var = (c == '(');
2886 if (!*is_func_or_var && !constantypedefs)
2887 return FALSE;
2888 else
2889 return TRUE;
2890 case dignorerest:
2891 return FALSE;
2892 default:
2893 error ("internal error: definedef value.", (char *)NULL);
2897 * Now typedefs
2899 switch (typdef)
2901 case tnone:
2902 if (toktype == st_C_typedef)
2904 if (typedefs)
2905 typdef = tkeyseen;
2906 fvextern = FALSE;
2907 fvdef = fvnone;
2908 return FALSE;
2910 break;
2911 case tkeyseen:
2912 switch (toktype)
2914 case st_none:
2915 case st_C_class:
2916 case st_C_struct:
2917 case st_C_enum:
2918 typdef = ttypeseen;
2920 break;
2921 case ttypeseen:
2922 if (structdef == snone && fvdef == fvnone)
2924 fvdef = fvnameseen;
2925 return TRUE;
2927 break;
2928 case tend:
2929 switch (toktype)
2931 case st_C_class:
2932 case st_C_struct:
2933 case st_C_enum:
2934 return FALSE;
2936 return TRUE;
2940 * This structdef business is NOT invoked when we are ctags and the
2941 * file is plain C. This is because a struct tag may have the same
2942 * name as another tag, and this loses with ctags.
2944 switch (toktype)
2946 case st_C_javastruct:
2947 if (structdef == stagseen)
2948 structdef = scolonseen;
2949 return FALSE;
2950 case st_C_template:
2951 case st_C_class:
2952 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2953 && bracelev == 0
2954 && definedef == dnone && structdef == snone
2955 && typdef == tnone && fvdef == fvnone)
2956 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2957 if (toktype == st_C_template)
2958 break;
2959 /* FALLTHRU */
2960 case st_C_struct:
2961 case st_C_enum:
2962 if (parlev == 0
2963 && fvdef != vignore
2964 && (typdef == tkeyseen
2965 || (typedefs_or_cplusplus && structdef == snone)))
2967 structdef = skeyseen;
2968 structtype = toktype;
2969 structbracelev = bracelev;
2970 if (fvdef == fvnameseen)
2971 fvdef = fvnone;
2973 return FALSE;
2976 if (structdef == skeyseen)
2978 structdef = stagseen;
2979 return TRUE;
2982 if (typdef != tnone)
2983 definedef = dnone;
2985 /* Detect Objective C constructs. */
2986 switch (objdef)
2988 case onone:
2989 switch (toktype)
2991 case st_C_objprot:
2992 objdef = oprotocol;
2993 return FALSE;
2994 case st_C_objimpl:
2995 objdef = oimplementation;
2996 return FALSE;
2998 break;
2999 case oimplementation:
3000 /* Save the class tag for functions or variables defined inside. */
3001 objtag = savenstr (str, len);
3002 objdef = oinbody;
3003 return FALSE;
3004 case oprotocol:
3005 /* Save the class tag for categories. */
3006 objtag = savenstr (str, len);
3007 objdef = otagseen;
3008 *is_func_or_var = TRUE;
3009 return TRUE;
3010 case oparenseen:
3011 objdef = ocatseen;
3012 *is_func_or_var = TRUE;
3013 return TRUE;
3014 case oinbody:
3015 break;
3016 case omethodsign:
3017 if (parlev == 0)
3019 fvdef = fvnone;
3020 objdef = omethodtag;
3021 linebuffer_setlen (&token_name, len);
3022 strncpy (token_name.buffer, str, len);
3023 token_name.buffer[len] = '\0';
3024 return TRUE;
3026 return FALSE;
3027 case omethodcolon:
3028 if (parlev == 0)
3029 objdef = omethodparm;
3030 return FALSE;
3031 case omethodparm:
3032 if (parlev == 0)
3034 fvdef = fvnone;
3035 objdef = omethodtag;
3036 linebuffer_setlen (&token_name, token_name.len + len);
3037 strncat (token_name.buffer, str, len);
3038 return TRUE;
3040 return FALSE;
3041 case oignore:
3042 if (toktype == st_C_objend)
3044 /* Memory leakage here: the string pointed by objtag is
3045 never released, because many tests would be needed to
3046 avoid breaking on incorrect input code. The amount of
3047 memory leaked here is the sum of the lengths of the
3048 class tags.
3049 free (objtag); */
3050 objdef = onone;
3052 return FALSE;
3055 /* A function, variable or enum constant? */
3056 switch (toktype)
3058 case st_C_extern:
3059 fvextern = TRUE;
3060 switch (fvdef)
3062 case finlist:
3063 case flistseen:
3064 case fignore:
3065 case vignore:
3066 break;
3067 default:
3068 fvdef = fvnone;
3070 return FALSE;
3071 case st_C_ignore:
3072 fvextern = FALSE;
3073 fvdef = vignore;
3074 return FALSE;
3075 case st_C_operator:
3076 fvdef = foperator;
3077 *is_func_or_var = TRUE;
3078 return TRUE;
3079 case st_none:
3080 if (constantypedefs
3081 && structdef == snone
3082 && structtype == st_C_enum && bracelev > structbracelev)
3083 return TRUE; /* enum constant */
3084 switch (fvdef)
3086 case fdefunkey:
3087 if (bracelev > 0)
3088 break;
3089 fvdef = fdefunname; /* GNU macro */
3090 *is_func_or_var = TRUE;
3091 return TRUE;
3092 case fvnone:
3093 switch (typdef)
3095 case ttypeseen:
3096 return FALSE;
3097 case tnone:
3098 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3099 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3101 fvdef = vignore;
3102 return FALSE;
3104 break;
3106 /* FALLTHRU */
3107 case fvnameseen:
3108 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3110 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3111 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3112 fvdef = foperator;
3113 *is_func_or_var = TRUE;
3114 return TRUE;
3116 if (bracelev > 0 && !instruct)
3117 break;
3118 fvdef = fvnameseen; /* function or variable */
3119 *is_func_or_var = TRUE;
3120 return TRUE;
3122 break;
3125 return FALSE;
3130 * C_entries often keeps pointers to tokens or lines which are older than
3131 * the line currently read. By keeping two line buffers, and switching
3132 * them at end of line, it is possible to use those pointers.
3134 static struct
3136 long linepos;
3137 linebuffer lb;
3138 } lbs[2];
3140 #define current_lb_is_new (newndx == curndx)
3141 #define switch_line_buffers() (curndx = 1 - curndx)
3143 #define curlb (lbs[curndx].lb)
3144 #define newlb (lbs[newndx].lb)
3145 #define curlinepos (lbs[curndx].linepos)
3146 #define newlinepos (lbs[newndx].linepos)
3148 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3149 #define cplpl (c_ext & C_PLPL)
3150 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3152 #define CNL_SAVE_DEFINEDEF() \
3153 do { \
3154 curlinepos = charno; \
3155 readline (&curlb, inf); \
3156 lp = curlb.buffer; \
3157 quotednl = FALSE; \
3158 newndx = curndx; \
3159 } while (0)
3161 #define CNL() \
3162 do { \
3163 CNL_SAVE_DEFINEDEF(); \
3164 if (savetoken.valid) \
3166 token = savetoken; \
3167 savetoken.valid = FALSE; \
3169 definedef = dnone; \
3170 } while (0)
3173 static void
3174 make_C_tag (isfun)
3175 bool isfun;
3177 /* This function is never called when token.valid is FALSE, but
3178 we must protect against invalid input or internal errors. */
3179 if (!DEBUG && !token.valid)
3180 return;
3182 if (token.valid)
3183 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3184 token.offset+token.length+1, token.lineno, token.linepos);
3185 else /* this case is optimised away if !DEBUG */
3186 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3187 token_name.len + 17, isfun, token.line,
3188 token.offset+token.length+1, token.lineno, token.linepos);
3190 token.valid = FALSE;
3195 * C_entries ()
3196 * This routine finds functions, variables, typedefs,
3197 * #define's, enum constants and struct/union/enum definitions in
3198 * C syntax and adds them to the list.
3200 static void
3201 C_entries (c_ext, inf)
3202 int c_ext; /* extension of C */
3203 FILE *inf; /* input file */
3205 register char c; /* latest char read; '\0' for end of line */
3206 register char *lp; /* pointer one beyond the character `c' */
3207 int curndx, newndx; /* indices for current and new lb */
3208 register int tokoff; /* offset in line of start of current token */
3209 register int toklen; /* length of current token */
3210 char *qualifier; /* string used to qualify names */
3211 int qlen; /* length of qualifier */
3212 int bracelev; /* current brace level */
3213 int bracketlev; /* current bracket level */
3214 int parlev; /* current parenthesis level */
3215 int attrparlev; /* __attribute__ parenthesis level */
3216 int templatelev; /* current template level */
3217 int typdefbracelev; /* bracelev where a typedef struct body begun */
3218 bool incomm, inquote, inchar, quotednl, midtoken;
3219 bool yacc_rules; /* in the rules part of a yacc file */
3220 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3223 linebuffer_init (&lbs[0].lb);
3224 linebuffer_init (&lbs[1].lb);
3225 if (cstack.size == 0)
3227 cstack.size = (DEBUG) ? 1 : 4;
3228 cstack.nl = 0;
3229 cstack.cname = xnew (cstack.size, char *);
3230 cstack.bracelev = xnew (cstack.size, int);
3233 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3234 curndx = newndx = 0;
3235 lp = curlb.buffer;
3236 *lp = 0;
3238 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3239 structdef = snone; definedef = dnone; objdef = onone;
3240 yacc_rules = FALSE;
3241 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3242 token.valid = savetoken.valid = FALSE;
3243 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3244 if (cjava)
3245 { qualifier = "."; qlen = 1; }
3246 else
3247 { qualifier = "::"; qlen = 2; }
3250 while (!feof (inf))
3252 c = *lp++;
3253 if (c == '\\')
3255 /* If we are at the end of the line, the next character is a
3256 '\0'; do not skip it, because it is what tells us
3257 to read the next line. */
3258 if (*lp == '\0')
3260 quotednl = TRUE;
3261 continue;
3263 lp++;
3264 c = ' ';
3266 else if (incomm)
3268 switch (c)
3270 case '*':
3271 if (*lp == '/')
3273 c = *lp++;
3274 incomm = FALSE;
3276 break;
3277 case '\0':
3278 /* Newlines inside comments do not end macro definitions in
3279 traditional cpp. */
3280 CNL_SAVE_DEFINEDEF ();
3281 break;
3283 continue;
3285 else if (inquote)
3287 switch (c)
3289 case '"':
3290 inquote = FALSE;
3291 break;
3292 case '\0':
3293 /* Newlines inside strings do not end macro definitions
3294 in traditional cpp, even though compilers don't
3295 usually accept them. */
3296 CNL_SAVE_DEFINEDEF ();
3297 break;
3299 continue;
3301 else if (inchar)
3303 switch (c)
3305 case '\0':
3306 /* Hmmm, something went wrong. */
3307 CNL ();
3308 /* FALLTHRU */
3309 case '\'':
3310 inchar = FALSE;
3311 break;
3313 continue;
3315 else if (bracketlev > 0)
3317 switch (c)
3319 case ']':
3320 if (--bracketlev > 0)
3321 continue;
3322 break;
3323 case '\0':
3324 CNL_SAVE_DEFINEDEF ();
3325 break;
3327 continue;
3329 else switch (c)
3331 case '"':
3332 inquote = TRUE;
3333 if (inattribute)
3334 break;
3335 switch (fvdef)
3337 case fdefunkey:
3338 case fstartlist:
3339 case finlist:
3340 case fignore:
3341 case vignore:
3342 break;
3343 default:
3344 fvextern = FALSE;
3345 fvdef = fvnone;
3347 continue;
3348 case '\'':
3349 inchar = TRUE;
3350 if (inattribute)
3351 break;
3352 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3354 fvextern = FALSE;
3355 fvdef = fvnone;
3357 continue;
3358 case '/':
3359 if (*lp == '*')
3361 lp++;
3362 incomm = TRUE;
3363 continue;
3365 else if (/* cplpl && */ *lp == '/')
3367 c = '\0';
3368 break;
3370 else
3371 break;
3372 case '%':
3373 if ((c_ext & YACC) && *lp == '%')
3375 /* Entering or exiting rules section in yacc file. */
3376 lp++;
3377 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3378 typdef = tnone; structdef = snone;
3379 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3380 bracelev = 0;
3381 yacc_rules = !yacc_rules;
3382 continue;
3384 else
3385 break;
3386 case '#':
3387 if (definedef == dnone)
3389 char *cp;
3390 bool cpptoken = TRUE;
3392 /* Look back on this line. If all blanks, or nonblanks
3393 followed by an end of comment, this is a preprocessor
3394 token. */
3395 for (cp = newlb.buffer; cp < lp-1; cp++)
3396 if (!iswhite (*cp))
3398 if (*cp == '*' && *(cp+1) == '/')
3400 cp++;
3401 cpptoken = TRUE;
3403 else
3404 cpptoken = FALSE;
3406 if (cpptoken)
3407 definedef = dsharpseen;
3408 } /* if (definedef == dnone) */
3409 continue;
3410 case '[':
3411 bracketlev++;
3412 continue;
3413 } /* switch (c) */
3416 /* Consider token only if some involved conditions are satisfied. */
3417 if (typdef != tignore
3418 && definedef != dignorerest
3419 && fvdef != finlist
3420 && templatelev == 0
3421 && (definedef != dnone
3422 || structdef != scolonseen)
3423 && !inattribute)
3425 if (midtoken)
3427 if (endtoken (c))
3429 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3430 /* This handles :: in the middle,
3431 but not at the beginning of an identifier.
3432 Also, space-separated :: is not recognised. */
3434 if (c_ext & C_AUTO) /* automatic detection of C++ */
3435 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3436 lp += 2;
3437 toklen += 2;
3438 c = lp[-1];
3439 goto still_in_token;
3441 else
3443 bool funorvar = FALSE;
3445 if (yacc_rules
3446 || consider_token (newlb.buffer + tokoff, toklen, c,
3447 &c_ext, bracelev, parlev,
3448 &funorvar))
3450 if (fvdef == foperator)
3452 char *oldlp = lp;
3453 lp = skip_spaces (lp-1);
3454 if (*lp != '\0')
3455 lp += 1;
3456 while (*lp != '\0'
3457 && !iswhite (*lp) && *lp != '(')
3458 lp += 1;
3459 c = *lp++;
3460 toklen += lp - oldlp;
3462 token.named = FALSE;
3463 if (!plainc
3464 && nestlev > 0 && definedef == dnone)
3465 /* in struct body */
3467 write_classname (&token_name, qualifier);
3468 linebuffer_setlen (&token_name,
3469 token_name.len+qlen+toklen);
3470 strcat (token_name.buffer, qualifier);
3471 strncat (token_name.buffer,
3472 newlb.buffer + tokoff, toklen);
3473 token.named = TRUE;
3475 else if (objdef == ocatseen)
3476 /* Objective C category */
3478 int len = strlen (objtag) + 2 + toklen;
3479 linebuffer_setlen (&token_name, len);
3480 strcpy (token_name.buffer, objtag);
3481 strcat (token_name.buffer, "(");
3482 strncat (token_name.buffer,
3483 newlb.buffer + tokoff, toklen);
3484 strcat (token_name.buffer, ")");
3485 token.named = TRUE;
3487 else if (objdef == omethodtag
3488 || objdef == omethodparm)
3489 /* Objective C method */
3491 token.named = TRUE;
3493 else if (fvdef == fdefunname)
3494 /* GNU DEFUN and similar macros */
3496 bool defun = (newlb.buffer[tokoff] == 'F');
3497 int off = tokoff;
3498 int len = toklen;
3500 /* Rewrite the tag so that emacs lisp DEFUNs
3501 can be found by their elisp name */
3502 if (defun)
3504 off += 1;
3505 len -= 1;
3507 linebuffer_setlen (&token_name, len);
3508 strncpy (token_name.buffer,
3509 newlb.buffer + off, len);
3510 token_name.buffer[len] = '\0';
3511 if (defun)
3512 while (--len >= 0)
3513 if (token_name.buffer[len] == '_')
3514 token_name.buffer[len] = '-';
3515 token.named = defun;
3517 else
3519 linebuffer_setlen (&token_name, toklen);
3520 strncpy (token_name.buffer,
3521 newlb.buffer + tokoff, toklen);
3522 token_name.buffer[toklen] = '\0';
3523 /* Name macros and members. */
3524 token.named = (structdef == stagseen
3525 || typdef == ttypeseen
3526 || typdef == tend
3527 || (funorvar
3528 && definedef == dignorerest)
3529 || (funorvar
3530 && definedef == dnone
3531 && structdef == snone
3532 && bracelev > 0));
3534 token.lineno = lineno;
3535 token.offset = tokoff;
3536 token.length = toklen;
3537 token.line = newlb.buffer;
3538 token.linepos = newlinepos;
3539 token.valid = TRUE;
3541 if (definedef == dnone
3542 && (fvdef == fvnameseen
3543 || fvdef == foperator
3544 || structdef == stagseen
3545 || typdef == tend
3546 || typdef == ttypeseen
3547 || objdef != onone))
3549 if (current_lb_is_new)
3550 switch_line_buffers ();
3552 else if (definedef != dnone
3553 || fvdef == fdefunname
3554 || instruct)
3555 make_C_tag (funorvar);
3557 else /* not yacc and consider_token failed */
3559 if (inattribute && fvdef == fignore)
3561 /* We have just met __attribute__ after a
3562 function parameter list: do not tag the
3563 function again. */
3564 fvdef = fvnone;
3567 midtoken = FALSE;
3569 } /* if (endtoken (c)) */
3570 else if (intoken (c))
3571 still_in_token:
3573 toklen++;
3574 continue;
3576 } /* if (midtoken) */
3577 else if (begtoken (c))
3579 switch (definedef)
3581 case dnone:
3582 switch (fvdef)
3584 case fstartlist:
3585 /* This prevents tagging fb in
3586 void (__attribute__((noreturn)) *fb) (void);
3587 Fixing this is not easy and not very important. */
3588 fvdef = finlist;
3589 continue;
3590 case flistseen:
3591 if (plainc || declarations)
3593 make_C_tag (TRUE); /* a function */
3594 fvdef = fignore;
3596 break;
3598 if (structdef == stagseen && !cjava)
3600 popclass_above (bracelev);
3601 structdef = snone;
3603 break;
3604 case dsharpseen:
3605 savetoken = token;
3606 break;
3608 if (!yacc_rules || lp == newlb.buffer + 1)
3610 tokoff = lp - 1 - newlb.buffer;
3611 toklen = 1;
3612 midtoken = TRUE;
3614 continue;
3615 } /* if (begtoken) */
3616 } /* if must look at token */
3619 /* Detect end of line, colon, comma, semicolon and various braces
3620 after having handled a token.*/
3621 switch (c)
3623 case ':':
3624 if (inattribute)
3625 break;
3626 if (yacc_rules && token.offset == 0 && token.valid)
3628 make_C_tag (FALSE); /* a yacc function */
3629 break;
3631 if (definedef != dnone)
3632 break;
3633 switch (objdef)
3635 case otagseen:
3636 objdef = oignore;
3637 make_C_tag (TRUE); /* an Objective C class */
3638 break;
3639 case omethodtag:
3640 case omethodparm:
3641 objdef = omethodcolon;
3642 linebuffer_setlen (&token_name, token_name.len + 1);
3643 strcat (token_name.buffer, ":");
3644 break;
3646 if (structdef == stagseen)
3648 structdef = scolonseen;
3649 break;
3651 /* Should be useless, but may be work as a safety net. */
3652 if (cplpl && fvdef == flistseen)
3654 make_C_tag (TRUE); /* a function */
3655 fvdef = fignore;
3656 break;
3658 break;
3659 case ';':
3660 if (definedef != dnone || inattribute)
3661 break;
3662 switch (typdef)
3664 case tend:
3665 case ttypeseen:
3666 make_C_tag (FALSE); /* a typedef */
3667 typdef = tnone;
3668 fvdef = fvnone;
3669 break;
3670 case tnone:
3671 case tinbody:
3672 case tignore:
3673 switch (fvdef)
3675 case fignore:
3676 if (typdef == tignore || cplpl)
3677 fvdef = fvnone;
3678 break;
3679 case fvnameseen:
3680 if ((globals && bracelev == 0 && (!fvextern || declarations))
3681 || (members && instruct))
3682 make_C_tag (FALSE); /* a variable */
3683 fvextern = FALSE;
3684 fvdef = fvnone;
3685 token.valid = FALSE;
3686 break;
3687 case flistseen:
3688 if ((declarations
3689 && (cplpl || !instruct)
3690 && (typdef == tnone || (typdef != tignore && instruct)))
3691 || (members
3692 && plainc && instruct))
3693 make_C_tag (TRUE); /* a function */
3694 /* FALLTHRU */
3695 default:
3696 fvextern = FALSE;
3697 fvdef = fvnone;
3698 if (declarations
3699 && cplpl && structdef == stagseen)
3700 make_C_tag (FALSE); /* forward declaration */
3701 else
3702 token.valid = FALSE;
3703 } /* switch (fvdef) */
3704 /* FALLTHRU */
3705 default:
3706 if (!instruct)
3707 typdef = tnone;
3709 if (structdef == stagseen)
3710 structdef = snone;
3711 break;
3712 case ',':
3713 if (definedef != dnone || inattribute)
3714 break;
3715 switch (objdef)
3717 case omethodtag:
3718 case omethodparm:
3719 make_C_tag (TRUE); /* an Objective C method */
3720 objdef = oinbody;
3721 break;
3723 switch (fvdef)
3725 case fdefunkey:
3726 case foperator:
3727 case fstartlist:
3728 case finlist:
3729 case fignore:
3730 case vignore:
3731 break;
3732 case fdefunname:
3733 fvdef = fignore;
3734 break;
3735 case fvnameseen:
3736 if (parlev == 0
3737 && ((globals
3738 && bracelev == 0
3739 && templatelev == 0
3740 && (!fvextern || declarations))
3741 || (members && instruct)))
3742 make_C_tag (FALSE); /* a variable */
3743 break;
3744 case flistseen:
3745 if ((declarations && typdef == tnone && !instruct)
3746 || (members && typdef != tignore && instruct))
3748 make_C_tag (TRUE); /* a function */
3749 fvdef = fvnameseen;
3751 else if (!declarations)
3752 fvdef = fvnone;
3753 token.valid = FALSE;
3754 break;
3755 default:
3756 fvdef = fvnone;
3758 if (structdef == stagseen)
3759 structdef = snone;
3760 break;
3761 case ']':
3762 if (definedef != dnone || inattribute)
3763 break;
3764 if (structdef == stagseen)
3765 structdef = snone;
3766 switch (typdef)
3768 case ttypeseen:
3769 case tend:
3770 typdef = tignore;
3771 make_C_tag (FALSE); /* a typedef */
3772 break;
3773 case tnone:
3774 case tinbody:
3775 switch (fvdef)
3777 case foperator:
3778 case finlist:
3779 case fignore:
3780 case vignore:
3781 break;
3782 case fvnameseen:
3783 if ((members && bracelev == 1)
3784 || (globals && bracelev == 0
3785 && (!fvextern || declarations)))
3786 make_C_tag (FALSE); /* a variable */
3787 /* FALLTHRU */
3788 default:
3789 fvdef = fvnone;
3791 break;
3793 break;
3794 case '(':
3795 if (inattribute)
3797 attrparlev++;
3798 break;
3800 if (definedef != dnone)
3801 break;
3802 if (objdef == otagseen && parlev == 0)
3803 objdef = oparenseen;
3804 switch (fvdef)
3806 case fvnameseen:
3807 if (typdef == ttypeseen
3808 && *lp != '*'
3809 && !instruct)
3811 /* This handles constructs like:
3812 typedef void OperatorFun (int fun); */
3813 make_C_tag (FALSE);
3814 typdef = tignore;
3815 fvdef = fignore;
3816 break;
3818 /* FALLTHRU */
3819 case foperator:
3820 fvdef = fstartlist;
3821 break;
3822 case flistseen:
3823 fvdef = finlist;
3824 break;
3826 parlev++;
3827 break;
3828 case ')':
3829 if (inattribute)
3831 if (--attrparlev == 0)
3832 inattribute = FALSE;
3833 break;
3835 if (definedef != dnone)
3836 break;
3837 if (objdef == ocatseen && parlev == 1)
3839 make_C_tag (TRUE); /* an Objective C category */
3840 objdef = oignore;
3842 if (--parlev == 0)
3844 switch (fvdef)
3846 case fstartlist:
3847 case finlist:
3848 fvdef = flistseen;
3849 break;
3851 if (!instruct
3852 && (typdef == tend
3853 || typdef == ttypeseen))
3855 typdef = tignore;
3856 make_C_tag (FALSE); /* a typedef */
3859 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3860 parlev = 0;
3861 break;
3862 case '{':
3863 if (definedef != dnone)
3864 break;
3865 if (typdef == ttypeseen)
3867 /* Whenever typdef is set to tinbody (currently only
3868 here), typdefbracelev should be set to bracelev. */
3869 typdef = tinbody;
3870 typdefbracelev = bracelev;
3872 switch (fvdef)
3874 case flistseen:
3875 make_C_tag (TRUE); /* a function */
3876 /* FALLTHRU */
3877 case fignore:
3878 fvdef = fvnone;
3879 break;
3880 case fvnone:
3881 switch (objdef)
3883 case otagseen:
3884 make_C_tag (TRUE); /* an Objective C class */
3885 objdef = oignore;
3886 break;
3887 case omethodtag:
3888 case omethodparm:
3889 make_C_tag (TRUE); /* an Objective C method */
3890 objdef = oinbody;
3891 break;
3892 default:
3893 /* Neutralize `extern "C" {' grot. */
3894 if (bracelev == 0 && structdef == snone && nestlev == 0
3895 && typdef == tnone)
3896 bracelev = -1;
3898 break;
3900 switch (structdef)
3902 case skeyseen: /* unnamed struct */
3903 pushclass_above (bracelev, NULL, 0);
3904 structdef = snone;
3905 break;
3906 case stagseen: /* named struct or enum */
3907 case scolonseen: /* a class */
3908 pushclass_above (bracelev,token.line+token.offset, token.length);
3909 structdef = snone;
3910 make_C_tag (FALSE); /* a struct or enum */
3911 break;
3913 bracelev++;
3914 break;
3915 case '*':
3916 if (definedef != dnone)
3917 break;
3918 if (fvdef == fstartlist)
3920 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3921 token.valid = FALSE;
3923 break;
3924 case '}':
3925 if (definedef != dnone)
3926 break;
3927 if (!ignoreindent && lp == newlb.buffer + 1)
3929 if (bracelev != 0)
3930 token.valid = FALSE;
3931 bracelev = 0; /* reset brace level if first column */
3932 parlev = 0; /* also reset paren level, just in case... */
3934 else if (bracelev > 0)
3935 bracelev--;
3936 else
3937 token.valid = FALSE; /* something gone amiss, token unreliable */
3938 popclass_above (bracelev);
3939 structdef = snone;
3940 /* Only if typdef == tinbody is typdefbracelev significant. */
3941 if (typdef == tinbody && bracelev <= typdefbracelev)
3943 assert (bracelev == typdefbracelev);
3944 typdef = tend;
3946 break;
3947 case '=':
3948 if (definedef != dnone)
3949 break;
3950 switch (fvdef)
3952 case foperator:
3953 case finlist:
3954 case fignore:
3955 case vignore:
3956 break;
3957 case fvnameseen:
3958 if ((members && bracelev == 1)
3959 || (globals && bracelev == 0 && (!fvextern || declarations)))
3960 make_C_tag (FALSE); /* a variable */
3961 /* FALLTHRU */
3962 default:
3963 fvdef = vignore;
3965 break;
3966 case '<':
3967 if (cplpl
3968 && (structdef == stagseen || fvdef == fvnameseen))
3970 templatelev++;
3971 break;
3973 goto resetfvdef;
3974 case '>':
3975 if (templatelev > 0)
3977 templatelev--;
3978 break;
3980 goto resetfvdef;
3981 case '+':
3982 case '-':
3983 if (objdef == oinbody && bracelev == 0)
3985 objdef = omethodsign;
3986 break;
3988 /* FALLTHRU */
3989 resetfvdef:
3990 case '#': case '~': case '&': case '%': case '/':
3991 case '|': case '^': case '!': case '.': case '?':
3992 if (definedef != dnone)
3993 break;
3994 /* These surely cannot follow a function tag in C. */
3995 switch (fvdef)
3997 case foperator:
3998 case finlist:
3999 case fignore:
4000 case vignore:
4001 break;
4002 default:
4003 fvdef = fvnone;
4005 break;
4006 case '\0':
4007 if (objdef == otagseen)
4009 make_C_tag (TRUE); /* an Objective C class */
4010 objdef = oignore;
4012 /* If a macro spans multiple lines don't reset its state. */
4013 if (quotednl)
4014 CNL_SAVE_DEFINEDEF ();
4015 else
4016 CNL ();
4017 break;
4018 } /* switch (c) */
4020 } /* while not eof */
4022 free (lbs[0].lb.buffer);
4023 free (lbs[1].lb.buffer);
4027 * Process either a C++ file or a C file depending on the setting
4028 * of a global flag.
4030 static void
4031 default_C_entries (inf)
4032 FILE *inf;
4034 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4037 /* Always do plain C. */
4038 static void
4039 plain_C_entries (inf)
4040 FILE *inf;
4042 C_entries (0, inf);
4045 /* Always do C++. */
4046 static void
4047 Cplusplus_entries (inf)
4048 FILE *inf;
4050 C_entries (C_PLPL, inf);
4053 /* Always do Java. */
4054 static void
4055 Cjava_entries (inf)
4056 FILE *inf;
4058 C_entries (C_JAVA, inf);
4061 /* Always do C*. */
4062 static void
4063 Cstar_entries (inf)
4064 FILE *inf;
4066 C_entries (C_STAR, inf);
4069 /* Always do Yacc. */
4070 static void
4071 Yacc_entries (inf)
4072 FILE *inf;
4074 C_entries (YACC, inf);
4078 /* Useful macros. */
4079 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4080 for (; /* loop initialization */ \
4081 !feof (file_pointer) /* loop test */ \
4082 && /* instructions at start of loop */ \
4083 (readline (&line_buffer, file_pointer), \
4084 char_pointer = line_buffer.buffer, \
4085 TRUE); \
4088 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4089 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4090 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4091 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4092 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4094 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4095 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4096 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4097 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4098 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4101 * Read a file, but do no processing. This is used to do regexp
4102 * matching on files that have no language defined.
4104 static void
4105 just_read_file (inf)
4106 FILE *inf;
4108 register char *dummy;
4110 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4111 continue;
4115 /* Fortran parsing */
4117 static void F_takeprec __P((void));
4118 static void F_getit __P((FILE *));
4120 static void
4121 F_takeprec ()
4123 dbp = skip_spaces (dbp);
4124 if (*dbp != '*')
4125 return;
4126 dbp++;
4127 dbp = skip_spaces (dbp);
4128 if (strneq (dbp, "(*)", 3))
4130 dbp += 3;
4131 return;
4133 if (!ISDIGIT (*dbp))
4135 --dbp; /* force failure */
4136 return;
4139 dbp++;
4140 while (ISDIGIT (*dbp));
4143 static void
4144 F_getit (inf)
4145 FILE *inf;
4147 register char *cp;
4149 dbp = skip_spaces (dbp);
4150 if (*dbp == '\0')
4152 readline (&lb, inf);
4153 dbp = lb.buffer;
4154 if (dbp[5] != '&')
4155 return;
4156 dbp += 6;
4157 dbp = skip_spaces (dbp);
4159 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4160 return;
4161 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4162 continue;
4163 make_tag (dbp, cp-dbp, TRUE,
4164 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4168 static void
4169 Fortran_functions (inf)
4170 FILE *inf;
4172 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4174 if (*dbp == '%')
4175 dbp++; /* Ratfor escape to fortran */
4176 dbp = skip_spaces (dbp);
4177 if (*dbp == '\0')
4178 continue;
4179 switch (lowcase (*dbp))
4181 case 'i':
4182 if (nocase_tail ("integer"))
4183 F_takeprec ();
4184 break;
4185 case 'r':
4186 if (nocase_tail ("real"))
4187 F_takeprec ();
4188 break;
4189 case 'l':
4190 if (nocase_tail ("logical"))
4191 F_takeprec ();
4192 break;
4193 case 'c':
4194 if (nocase_tail ("complex") || nocase_tail ("character"))
4195 F_takeprec ();
4196 break;
4197 case 'd':
4198 if (nocase_tail ("double"))
4200 dbp = skip_spaces (dbp);
4201 if (*dbp == '\0')
4202 continue;
4203 if (nocase_tail ("precision"))
4204 break;
4205 continue;
4207 break;
4209 dbp = skip_spaces (dbp);
4210 if (*dbp == '\0')
4211 continue;
4212 switch (lowcase (*dbp))
4214 case 'f':
4215 if (nocase_tail ("function"))
4216 F_getit (inf);
4217 continue;
4218 case 's':
4219 if (nocase_tail ("subroutine"))
4220 F_getit (inf);
4221 continue;
4222 case 'e':
4223 if (nocase_tail ("entry"))
4224 F_getit (inf);
4225 continue;
4226 case 'b':
4227 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4229 dbp = skip_spaces (dbp);
4230 if (*dbp == '\0') /* assume un-named */
4231 make_tag ("blockdata", 9, TRUE,
4232 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4233 else
4234 F_getit (inf); /* look for name */
4236 continue;
4243 * Ada parsing
4244 * Original code by
4245 * Philippe Waroquiers (1998)
4248 static void Ada_getit __P((FILE *, char *));
4250 /* Once we are positioned after an "interesting" keyword, let's get
4251 the real tag value necessary. */
4252 static void
4253 Ada_getit (inf, name_qualifier)
4254 FILE *inf;
4255 char *name_qualifier;
4257 register char *cp;
4258 char *name;
4259 char c;
4261 while (!feof (inf))
4263 dbp = skip_spaces (dbp);
4264 if (*dbp == '\0'
4265 || (dbp[0] == '-' && dbp[1] == '-'))
4267 readline (&lb, inf);
4268 dbp = lb.buffer;
4270 switch (lowcase(*dbp))
4272 case 'b':
4273 if (nocase_tail ("body"))
4275 /* Skipping body of procedure body or package body or ....
4276 resetting qualifier to body instead of spec. */
4277 name_qualifier = "/b";
4278 continue;
4280 break;
4281 case 't':
4282 /* Skipping type of task type or protected type ... */
4283 if (nocase_tail ("type"))
4284 continue;
4285 break;
4287 if (*dbp == '"')
4289 dbp += 1;
4290 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4291 continue;
4293 else
4295 dbp = skip_spaces (dbp);
4296 for (cp = dbp;
4297 (*cp != '\0'
4298 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4299 cp++)
4300 continue;
4301 if (cp == dbp)
4302 return;
4304 c = *cp;
4305 *cp = '\0';
4306 name = concat (dbp, name_qualifier, "");
4307 *cp = c;
4308 make_tag (name, strlen (name), TRUE,
4309 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4310 free (name);
4311 if (c == '"')
4312 dbp = cp + 1;
4313 return;
4317 static void
4318 Ada_funcs (inf)
4319 FILE *inf;
4321 bool inquote = FALSE;
4322 bool skip_till_semicolumn = FALSE;
4324 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4326 while (*dbp != '\0')
4328 /* Skip a string i.e. "abcd". */
4329 if (inquote || (*dbp == '"'))
4331 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4332 if (dbp != NULL)
4334 inquote = FALSE;
4335 dbp += 1;
4336 continue; /* advance char */
4338 else
4340 inquote = TRUE;
4341 break; /* advance line */
4345 /* Skip comments. */
4346 if (dbp[0] == '-' && dbp[1] == '-')
4347 break; /* advance line */
4349 /* Skip character enclosed in single quote i.e. 'a'
4350 and skip single quote starting an attribute i.e. 'Image. */
4351 if (*dbp == '\'')
4353 dbp++ ;
4354 if (*dbp != '\0')
4355 dbp++;
4356 continue;
4359 if (skip_till_semicolumn)
4361 if (*dbp == ';')
4362 skip_till_semicolumn = FALSE;
4363 dbp++;
4364 continue; /* advance char */
4367 /* Search for beginning of a token. */
4368 if (!begtoken (*dbp))
4370 dbp++;
4371 continue; /* advance char */
4374 /* We are at the beginning of a token. */
4375 switch (lowcase(*dbp))
4377 case 'f':
4378 if (!packages_only && nocase_tail ("function"))
4379 Ada_getit (inf, "/f");
4380 else
4381 break; /* from switch */
4382 continue; /* advance char */
4383 case 'p':
4384 if (!packages_only && nocase_tail ("procedure"))
4385 Ada_getit (inf, "/p");
4386 else if (nocase_tail ("package"))
4387 Ada_getit (inf, "/s");
4388 else if (nocase_tail ("protected")) /* protected type */
4389 Ada_getit (inf, "/t");
4390 else
4391 break; /* from switch */
4392 continue; /* advance char */
4394 case 'u':
4395 if (typedefs && !packages_only && nocase_tail ("use"))
4397 /* when tagging types, avoid tagging use type Pack.Typename;
4398 for this, we will skip everything till a ; */
4399 skip_till_semicolumn = TRUE;
4400 continue; /* advance char */
4403 case 't':
4404 if (!packages_only && nocase_tail ("task"))
4405 Ada_getit (inf, "/k");
4406 else if (typedefs && !packages_only && nocase_tail ("type"))
4408 Ada_getit (inf, "/t");
4409 while (*dbp != '\0')
4410 dbp += 1;
4412 else
4413 break; /* from switch */
4414 continue; /* advance char */
4417 /* Look for the end of the token. */
4418 while (!endtoken (*dbp))
4419 dbp++;
4421 } /* advance char */
4422 } /* advance line */
4427 * Unix and microcontroller assembly tag handling
4428 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4429 * Idea by Bob Weiner, Motorola Inc. (1994)
4431 static void
4432 Asm_labels (inf)
4433 FILE *inf;
4435 register char *cp;
4437 LOOP_ON_INPUT_LINES (inf, lb, cp)
4439 /* If first char is alphabetic or one of [_.$], test for colon
4440 following identifier. */
4441 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4443 /* Read past label. */
4444 cp++;
4445 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4446 cp++;
4447 if (*cp == ':' || iswhite (*cp))
4448 /* Found end of label, so copy it and add it to the table. */
4449 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4450 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4457 * Perl support
4458 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4459 * Perl variable names: /^(my|local).../
4460 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4461 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4462 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4464 static void
4465 Perl_functions (inf)
4466 FILE *inf;
4468 char *package = savestr ("main"); /* current package name */
4469 register char *cp;
4471 LOOP_ON_INPUT_LINES (inf, lb, cp)
4473 skip_spaces(cp);
4475 if (LOOKING_AT (cp, "package"))
4477 free (package);
4478 get_tag (cp, &package);
4480 else if (LOOKING_AT (cp, "sub"))
4482 char *pos;
4483 char *sp = cp;
4485 while (!notinname (*cp))
4486 cp++;
4487 if (cp == sp)
4488 continue; /* nothing found */
4489 if ((pos = etags_strchr (sp, ':')) != NULL
4490 && pos < cp && pos[1] == ':')
4491 /* The name is already qualified. */
4492 make_tag (sp, cp - sp, TRUE,
4493 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4494 else
4495 /* Qualify it. */
4497 char savechar, *name;
4499 savechar = *cp;
4500 *cp = '\0';
4501 name = concat (package, "::", sp);
4502 *cp = savechar;
4503 make_tag (name, strlen(name), TRUE,
4504 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4505 free (name);
4508 else if (globals) /* only if we are tagging global vars */
4510 /* Skip a qualifier, if any. */
4511 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4512 /* After "my" or "local", but before any following paren or space. */
4513 char *varstart = cp;
4515 if (qual /* should this be removed? If yes, how? */
4516 && (*cp == '$' || *cp == '@' || *cp == '%'))
4518 varstart += 1;
4520 cp++;
4521 while (ISALNUM (*cp) || *cp == '_');
4523 else if (qual)
4525 /* Should be examining a variable list at this point;
4526 could insist on seeing an open parenthesis. */
4527 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4528 cp++;
4530 else
4531 continue;
4533 make_tag (varstart, cp - varstart, FALSE,
4534 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4537 free (package);
4542 * Python support
4543 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4544 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4545 * More ideas by seb bacon <seb@jamkit.com> (2002)
4547 static void
4548 Python_functions (inf)
4549 FILE *inf;
4551 register char *cp;
4553 LOOP_ON_INPUT_LINES (inf, lb, cp)
4555 cp = skip_spaces (cp);
4556 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4558 char *name = cp;
4559 while (!notinname (*cp) && *cp != ':')
4560 cp++;
4561 make_tag (name, cp - name, TRUE,
4562 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4569 * PHP support
4570 * Look for:
4571 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4572 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4573 * - /^[ \t]*define\(\"[^\"]+/
4574 * Only with --members:
4575 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4576 * Idea by Diez B. Roggisch (2001)
4578 static void
4579 PHP_functions (inf)
4580 FILE *inf;
4582 register char *cp, *name;
4583 bool search_identifier = FALSE;
4585 LOOP_ON_INPUT_LINES (inf, lb, cp)
4587 cp = skip_spaces (cp);
4588 name = cp;
4589 if (search_identifier
4590 && *cp != '\0')
4592 while (!notinname (*cp))
4593 cp++;
4594 make_tag (name, cp - name, TRUE,
4595 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4596 search_identifier = FALSE;
4598 else if (LOOKING_AT (cp, "function"))
4600 if(*cp == '&')
4601 cp = skip_spaces (cp+1);
4602 if(*cp != '\0')
4604 name = cp;
4605 while (!notinname (*cp))
4606 cp++;
4607 make_tag (name, cp - name, TRUE,
4608 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4610 else
4611 search_identifier = TRUE;
4613 else if (LOOKING_AT (cp, "class"))
4615 if (*cp != '\0')
4617 name = cp;
4618 while (*cp != '\0' && !iswhite (*cp))
4619 cp++;
4620 make_tag (name, cp - name, FALSE,
4621 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4623 else
4624 search_identifier = TRUE;
4626 else if (strneq (cp, "define", 6)
4627 && (cp = skip_spaces (cp+6))
4628 && *cp++ == '('
4629 && (*cp == '"' || *cp == '\''))
4631 char quote = *cp++;
4632 name = cp;
4633 while (*cp != quote && *cp != '\0')
4634 cp++;
4635 make_tag (name, cp - name, FALSE,
4636 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4638 else if (members
4639 && LOOKING_AT (cp, "var")
4640 && *cp == '$')
4642 name = cp;
4643 while (!notinname(*cp))
4644 cp++;
4645 make_tag (name, cp - name, FALSE,
4646 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4653 * Cobol tag functions
4654 * We could look for anything that could be a paragraph name.
4655 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4656 * Idea by Corny de Souza (1993)
4658 static void
4659 Cobol_paragraphs (inf)
4660 FILE *inf;
4662 register char *bp, *ep;
4664 LOOP_ON_INPUT_LINES (inf, lb, bp)
4666 if (lb.len < 9)
4667 continue;
4668 bp += 8;
4670 /* If eoln, compiler option or comment ignore whole line. */
4671 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4672 continue;
4674 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4675 continue;
4676 if (*ep++ == '.')
4677 make_tag (bp, ep - bp, TRUE,
4678 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4684 * Makefile support
4685 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4687 static void
4688 Makefile_targets (inf)
4689 FILE *inf;
4691 register char *bp;
4693 LOOP_ON_INPUT_LINES (inf, lb, bp)
4695 if (*bp == '\t' || *bp == '#')
4696 continue;
4697 while (*bp != '\0' && *bp != '=' && *bp != ':')
4698 bp++;
4699 if (*bp == ':' || (globals && *bp == '='))
4701 /* We should detect if there is more than one tag, but we do not.
4702 We just skip initial and final spaces. */
4703 char * namestart = skip_spaces (lb.buffer);
4704 while (--bp > namestart)
4705 if (!notinname (*bp))
4706 break;
4707 make_tag (namestart, bp - namestart + 1, TRUE,
4708 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4715 * Pascal parsing
4716 * Original code by Mosur K. Mohan (1989)
4718 * Locates tags for procedures & functions. Doesn't do any type- or
4719 * var-definitions. It does look for the keyword "extern" or
4720 * "forward" immediately following the procedure statement; if found,
4721 * the tag is skipped.
4723 static void
4724 Pascal_functions (inf)
4725 FILE *inf;
4727 linebuffer tline; /* mostly copied from C_entries */
4728 long save_lcno;
4729 int save_lineno, namelen, taglen;
4730 char c, *name;
4732 bool /* each of these flags is TRUE iff: */
4733 incomment, /* point is inside a comment */
4734 inquote, /* point is inside '..' string */
4735 get_tagname, /* point is after PROCEDURE/FUNCTION
4736 keyword, so next item = potential tag */
4737 found_tag, /* point is after a potential tag */
4738 inparms, /* point is within parameter-list */
4739 verify_tag; /* point has passed the parm-list, so the
4740 next token will determine whether this
4741 is a FORWARD/EXTERN to be ignored, or
4742 whether it is a real tag */
4744 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4745 name = NULL; /* keep compiler quiet */
4746 dbp = lb.buffer;
4747 *dbp = '\0';
4748 linebuffer_init (&tline);
4750 incomment = inquote = FALSE;
4751 found_tag = FALSE; /* have a proc name; check if extern */
4752 get_tagname = FALSE; /* found "procedure" keyword */
4753 inparms = FALSE; /* found '(' after "proc" */
4754 verify_tag = FALSE; /* check if "extern" is ahead */
4757 while (!feof (inf)) /* long main loop to get next char */
4759 c = *dbp++;
4760 if (c == '\0') /* if end of line */
4762 readline (&lb, inf);
4763 dbp = lb.buffer;
4764 if (*dbp == '\0')
4765 continue;
4766 if (!((found_tag && verify_tag)
4767 || get_tagname))
4768 c = *dbp++; /* only if don't need *dbp pointing
4769 to the beginning of the name of
4770 the procedure or function */
4772 if (incomment)
4774 if (c == '}') /* within { } comments */
4775 incomment = FALSE;
4776 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4778 dbp++;
4779 incomment = FALSE;
4781 continue;
4783 else if (inquote)
4785 if (c == '\'')
4786 inquote = FALSE;
4787 continue;
4789 else
4790 switch (c)
4792 case '\'':
4793 inquote = TRUE; /* found first quote */
4794 continue;
4795 case '{': /* found open { comment */
4796 incomment = TRUE;
4797 continue;
4798 case '(':
4799 if (*dbp == '*') /* found open (* comment */
4801 incomment = TRUE;
4802 dbp++;
4804 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4805 inparms = TRUE;
4806 continue;
4807 case ')': /* end of parms list */
4808 if (inparms)
4809 inparms = FALSE;
4810 continue;
4811 case ';':
4812 if (found_tag && !inparms) /* end of proc or fn stmt */
4814 verify_tag = TRUE;
4815 break;
4817 continue;
4819 if (found_tag && verify_tag && (*dbp != ' '))
4821 /* Check if this is an "extern" declaration. */
4822 if (*dbp == '\0')
4823 continue;
4824 if (lowcase (*dbp == 'e'))
4826 if (nocase_tail ("extern")) /* superfluous, really! */
4828 found_tag = FALSE;
4829 verify_tag = FALSE;
4832 else if (lowcase (*dbp) == 'f')
4834 if (nocase_tail ("forward")) /* check for forward reference */
4836 found_tag = FALSE;
4837 verify_tag = FALSE;
4840 if (found_tag && verify_tag) /* not external proc, so make tag */
4842 found_tag = FALSE;
4843 verify_tag = FALSE;
4844 make_tag (name, namelen, TRUE,
4845 tline.buffer, taglen, save_lineno, save_lcno);
4846 continue;
4849 if (get_tagname) /* grab name of proc or fn */
4851 char *cp;
4853 if (*dbp == '\0')
4854 continue;
4856 /* Find block name. */
4857 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4858 continue;
4860 /* Save all values for later tagging. */
4861 linebuffer_setlen (&tline, lb.len);
4862 strcpy (tline.buffer, lb.buffer);
4863 save_lineno = lineno;
4864 save_lcno = linecharno;
4865 name = tline.buffer + (dbp - lb.buffer);
4866 namelen = cp - dbp;
4867 taglen = cp - lb.buffer + 1;
4869 dbp = cp; /* set dbp to e-o-token */
4870 get_tagname = FALSE;
4871 found_tag = TRUE;
4872 continue;
4874 /* And proceed to check for "extern". */
4876 else if (!incomment && !inquote && !found_tag)
4878 /* Check for proc/fn keywords. */
4879 switch (lowcase (c))
4881 case 'p':
4882 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4883 get_tagname = TRUE;
4884 continue;
4885 case 'f':
4886 if (nocase_tail ("unction"))
4887 get_tagname = TRUE;
4888 continue;
4891 } /* while not eof */
4893 free (tline.buffer);
4898 * Lisp tag functions
4899 * look for (def or (DEF, quote or QUOTE
4902 static void L_getit __P((void));
4904 static void
4905 L_getit ()
4907 if (*dbp == '\'') /* Skip prefix quote */
4908 dbp++;
4909 else if (*dbp == '(')
4911 dbp++;
4912 /* Try to skip "(quote " */
4913 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4914 /* Ok, then skip "(" before name in (defstruct (foo)) */
4915 dbp = skip_spaces (dbp);
4917 get_tag (dbp, NULL);
4920 static void
4921 Lisp_functions (inf)
4922 FILE *inf;
4924 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4926 if (dbp[0] != '(')
4927 continue;
4929 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4931 dbp = skip_non_spaces (dbp);
4932 dbp = skip_spaces (dbp);
4933 L_getit ();
4935 else
4937 /* Check for (foo::defmumble name-defined ... */
4939 dbp++;
4940 while (!notinname (*dbp) && *dbp != ':');
4941 if (*dbp == ':')
4944 dbp++;
4945 while (*dbp == ':');
4947 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4949 dbp = skip_non_spaces (dbp);
4950 dbp = skip_spaces (dbp);
4951 L_getit ();
4960 * Lua script language parsing
4961 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4963 * "function" and "local function" are tags if they start at column 1.
4965 static void
4966 Lua_functions (inf)
4967 FILE *inf;
4969 register char *bp;
4971 LOOP_ON_INPUT_LINES (inf, lb, bp)
4973 if (bp[0] != 'f' && bp[0] != 'l')
4974 continue;
4976 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4978 if (LOOKING_AT (bp, "function"))
4979 get_tag (bp, NULL);
4985 * Postscript tags
4986 * Just look for lines where the first character is '/'
4987 * Also look at "defineps" for PSWrap
4988 * Ideas by:
4989 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4990 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4992 static void
4993 PS_functions (inf)
4994 FILE *inf;
4996 register char *bp, *ep;
4998 LOOP_ON_INPUT_LINES (inf, lb, bp)
5000 if (bp[0] == '/')
5002 for (ep = bp+1;
5003 *ep != '\0' && *ep != ' ' && *ep != '{';
5004 ep++)
5005 continue;
5006 make_tag (bp, ep - bp, TRUE,
5007 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5009 else if (LOOKING_AT (bp, "defineps"))
5010 get_tag (bp, NULL);
5016 * Forth tags
5017 * Ignore anything after \ followed by space or in ( )
5018 * Look for words defined by :
5019 * Look for constant, code, create, defer, value, and variable
5020 * OBP extensions: Look for buffer:, field,
5021 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5023 static void
5024 Forth_words (inf)
5025 FILE *inf;
5027 register char *bp;
5029 LOOP_ON_INPUT_LINES (inf, lb, bp)
5030 while ((bp = skip_spaces (bp))[0] != '\0')
5031 if (bp[0] == '\\' && iswhite(bp[1]))
5032 break; /* read next line */
5033 else if (bp[0] == '(' && iswhite(bp[1]))
5034 do /* skip to ) or eol */
5035 bp++;
5036 while (*bp != ')' && *bp != '\0');
5037 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5038 || LOOKING_AT_NOCASE (bp, "constant")
5039 || LOOKING_AT_NOCASE (bp, "code")
5040 || LOOKING_AT_NOCASE (bp, "create")
5041 || LOOKING_AT_NOCASE (bp, "defer")
5042 || LOOKING_AT_NOCASE (bp, "value")
5043 || LOOKING_AT_NOCASE (bp, "variable")
5044 || LOOKING_AT_NOCASE (bp, "buffer:")
5045 || LOOKING_AT_NOCASE (bp, "field"))
5046 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5047 else
5048 bp = skip_non_spaces (bp);
5053 * Scheme tag functions
5054 * look for (def... xyzzy
5055 * (def... (xyzzy
5056 * (def ... ((...(xyzzy ....
5057 * (set! xyzzy
5058 * Original code by Ken Haase (1985?)
5060 static void
5061 Scheme_functions (inf)
5062 FILE *inf;
5064 register char *bp;
5066 LOOP_ON_INPUT_LINES (inf, lb, bp)
5068 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5070 bp = skip_non_spaces (bp+4);
5071 /* Skip over open parens and white space */
5072 while (notinname (*bp))
5073 bp++;
5074 get_tag (bp, NULL);
5076 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5077 get_tag (bp, NULL);
5082 /* Find tags in TeX and LaTeX input files. */
5084 /* TEX_toktab is a table of TeX control sequences that define tags.
5085 * Each entry records one such control sequence.
5087 * Original code from who knows whom.
5088 * Ideas by:
5089 * Stefan Monnier (2002)
5092 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5094 /* Default set of control sequences to put into TEX_toktab.
5095 The value of environment var TEXTAGS is prepended to this. */
5096 static char *TEX_defenv = "\
5097 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5098 :part:appendix:entry:index:def\
5099 :newcommand:renewcommand:newenvironment:renewenvironment";
5101 static void TEX_mode __P((FILE *));
5102 static void TEX_decode_env __P((char *, char *));
5104 static char TEX_esc = '\\';
5105 static char TEX_opgrp = '{';
5106 static char TEX_clgrp = '}';
5109 * TeX/LaTeX scanning loop.
5111 static void
5112 TeX_commands (inf)
5113 FILE *inf;
5115 char *cp;
5116 linebuffer *key;
5118 /* Select either \ or ! as escape character. */
5119 TEX_mode (inf);
5121 /* Initialize token table once from environment. */
5122 if (TEX_toktab == NULL)
5123 TEX_decode_env ("TEXTAGS", TEX_defenv);
5125 LOOP_ON_INPUT_LINES (inf, lb, cp)
5127 /* Look at each TEX keyword in line. */
5128 for (;;)
5130 /* Look for a TEX escape. */
5131 while (*cp++ != TEX_esc)
5132 if (cp[-1] == '\0' || cp[-1] == '%')
5133 goto tex_next_line;
5135 for (key = TEX_toktab; key->buffer != NULL; key++)
5136 if (strneq (cp, key->buffer, key->len))
5138 register char *p;
5139 int namelen, linelen;
5140 bool opgrp = FALSE;
5142 cp = skip_spaces (cp + key->len);
5143 if (*cp == TEX_opgrp)
5145 opgrp = TRUE;
5146 cp++;
5148 for (p = cp;
5149 (!iswhite (*p) && *p != '#' &&
5150 *p != TEX_opgrp && *p != TEX_clgrp);
5151 p++)
5152 continue;
5153 namelen = p - cp;
5154 linelen = lb.len;
5155 if (!opgrp || *p == TEX_clgrp)
5157 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5158 p++;
5159 linelen = p - lb.buffer + 1;
5161 make_tag (cp, namelen, TRUE,
5162 lb.buffer, linelen, lineno, linecharno);
5163 goto tex_next_line; /* We only tag a line once */
5166 tex_next_line:
5171 #define TEX_LESC '\\'
5172 #define TEX_SESC '!'
5174 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5175 chars accordingly. */
5176 static void
5177 TEX_mode (inf)
5178 FILE *inf;
5180 int c;
5182 while ((c = getc (inf)) != EOF)
5184 /* Skip to next line if we hit the TeX comment char. */
5185 if (c == '%')
5186 while (c != '\n' && c != EOF)
5187 c = getc (inf);
5188 else if (c == TEX_LESC || c == TEX_SESC )
5189 break;
5192 if (c == TEX_LESC)
5194 TEX_esc = TEX_LESC;
5195 TEX_opgrp = '{';
5196 TEX_clgrp = '}';
5198 else
5200 TEX_esc = TEX_SESC;
5201 TEX_opgrp = '<';
5202 TEX_clgrp = '>';
5204 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5205 No attempt is made to correct the situation. */
5206 rewind (inf);
5209 /* Read environment and prepend it to the default string.
5210 Build token table. */
5211 static void
5212 TEX_decode_env (evarname, defenv)
5213 char *evarname;
5214 char *defenv;
5216 register char *env, *p;
5217 int i, len;
5219 /* Append default string to environment. */
5220 env = getenv (evarname);
5221 if (!env)
5222 env = defenv;
5223 else
5225 char *oldenv = env;
5226 env = concat (oldenv, defenv, "");
5229 /* Allocate a token table */
5230 for (len = 1, p = env; p;)
5231 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5232 len++;
5233 TEX_toktab = xnew (len, linebuffer);
5235 /* Unpack environment string into token table. Be careful about */
5236 /* zero-length strings (leading ':', "::" and trailing ':') */
5237 for (i = 0; *env != '\0';)
5239 p = etags_strchr (env, ':');
5240 if (!p) /* End of environment string. */
5241 p = env + strlen (env);
5242 if (p - env > 0)
5243 { /* Only non-zero strings. */
5244 TEX_toktab[i].buffer = savenstr (env, p - env);
5245 TEX_toktab[i].len = p - env;
5246 i++;
5248 if (*p)
5249 env = p + 1;
5250 else
5252 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5253 TEX_toktab[i].len = 0;
5254 break;
5260 /* Texinfo support. Dave Love, Mar. 2000. */
5261 static void
5262 Texinfo_nodes (inf)
5263 FILE * inf;
5265 char *cp, *start;
5266 LOOP_ON_INPUT_LINES (inf, lb, cp)
5267 if (LOOKING_AT (cp, "@node"))
5269 start = cp;
5270 while (*cp != '\0' && *cp != ',')
5271 cp++;
5272 make_tag (start, cp - start, TRUE,
5273 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5279 * HTML support.
5280 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5281 * Contents of <a name=xxx> are tags with name xxx.
5283 * Francesco Potortì, 2002.
5285 static void
5286 HTML_labels (inf)
5287 FILE * inf;
5289 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5290 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5291 bool intag = FALSE; /* inside an html tag, looking for ID= */
5292 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5293 char *end;
5296 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5298 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5299 for (;;) /* loop on the same line */
5301 if (skiptag) /* skip HTML tag */
5303 while (*dbp != '\0' && *dbp != '>')
5304 dbp++;
5305 if (*dbp == '>')
5307 dbp += 1;
5308 skiptag = FALSE;
5309 continue; /* look on the same line */
5311 break; /* go to next line */
5314 else if (intag) /* look for "name=" or "id=" */
5316 while (*dbp != '\0' && *dbp != '>'
5317 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5318 dbp++;
5319 if (*dbp == '\0')
5320 break; /* go to next line */
5321 if (*dbp == '>')
5323 dbp += 1;
5324 intag = FALSE;
5325 continue; /* look on the same line */
5327 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5328 || LOOKING_AT_NOCASE (dbp, "id="))
5330 bool quoted = (dbp[0] == '"');
5332 if (quoted)
5333 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5334 continue;
5335 else
5336 for (end = dbp; *end != '\0' && intoken (*end); end++)
5337 continue;
5338 linebuffer_setlen (&token_name, end - dbp);
5339 strncpy (token_name.buffer, dbp, end - dbp);
5340 token_name.buffer[end - dbp] = '\0';
5342 dbp = end;
5343 intag = FALSE; /* we found what we looked for */
5344 skiptag = TRUE; /* skip to the end of the tag */
5345 getnext = TRUE; /* then grab the text */
5346 continue; /* look on the same line */
5348 dbp += 1;
5351 else if (getnext) /* grab next tokens and tag them */
5353 dbp = skip_spaces (dbp);
5354 if (*dbp == '\0')
5355 break; /* go to next line */
5356 if (*dbp == '<')
5358 intag = TRUE;
5359 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5360 continue; /* look on the same line */
5363 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5364 continue;
5365 make_tag (token_name.buffer, token_name.len, TRUE,
5366 dbp, end - dbp, lineno, linecharno);
5367 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5368 getnext = FALSE;
5369 break; /* go to next line */
5372 else /* look for an interesting HTML tag */
5374 while (*dbp != '\0' && *dbp != '<')
5375 dbp++;
5376 if (*dbp == '\0')
5377 break; /* go to next line */
5378 intag = TRUE;
5379 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5381 inanchor = TRUE;
5382 continue; /* look on the same line */
5384 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5385 || LOOKING_AT_NOCASE (dbp, "<h1>")
5386 || LOOKING_AT_NOCASE (dbp, "<h2>")
5387 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5389 intag = FALSE;
5390 getnext = TRUE;
5391 continue; /* look on the same line */
5393 dbp += 1;
5400 * Prolog support
5402 * Assumes that the predicate or rule starts at column 0.
5403 * Only the first clause of a predicate or rule is added.
5404 * Original code by Sunichirou Sugou (1989)
5405 * Rewritten by Anders Lindgren (1996)
5407 static int prolog_pr __P((char *, char *));
5408 static void prolog_skip_comment __P((linebuffer *, FILE *));
5409 static int prolog_atom __P((char *, int));
5411 static void
5412 Prolog_functions (inf)
5413 FILE *inf;
5415 char *cp, *last;
5416 int len;
5417 int allocated;
5419 allocated = 0;
5420 len = 0;
5421 last = NULL;
5423 LOOP_ON_INPUT_LINES (inf, lb, cp)
5425 if (cp[0] == '\0') /* Empty line */
5426 continue;
5427 else if (iswhite (cp[0])) /* Not a predicate */
5428 continue;
5429 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5430 prolog_skip_comment (&lb, inf);
5431 else if ((len = prolog_pr (cp, last)) > 0)
5433 /* Predicate or rule. Store the function name so that we
5434 only generate a tag for the first clause. */
5435 if (last == NULL)
5436 last = xnew(len + 1, char);
5437 else if (len + 1 > allocated)
5438 xrnew (last, len + 1, char);
5439 allocated = len + 1;
5440 strncpy (last, cp, len);
5441 last[len] = '\0';
5444 if (last != NULL)
5445 free (last);
5449 static void
5450 prolog_skip_comment (plb, inf)
5451 linebuffer *plb;
5452 FILE *inf;
5454 char *cp;
5458 for (cp = plb->buffer; *cp != '\0'; cp++)
5459 if (cp[0] == '*' && cp[1] == '/')
5460 return;
5461 readline (plb, inf);
5463 while (!feof(inf));
5467 * A predicate or rule definition is added if it matches:
5468 * <beginning of line><Prolog Atom><whitespace>(
5469 * or <beginning of line><Prolog Atom><whitespace>:-
5471 * It is added to the tags database if it doesn't match the
5472 * name of the previous clause header.
5474 * Return the size of the name of the predicate or rule, or 0 if no
5475 * header was found.
5477 static int
5478 prolog_pr (s, last)
5479 char *s;
5480 char *last; /* Name of last clause. */
5482 int pos;
5483 int len;
5485 pos = prolog_atom (s, 0);
5486 if (pos < 1)
5487 return 0;
5489 len = pos;
5490 pos = skip_spaces (s + pos) - s;
5492 if ((s[pos] == '.'
5493 || (s[pos] == '(' && (pos += 1))
5494 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5495 && (last == NULL /* save only the first clause */
5496 || len != (int)strlen (last)
5497 || !strneq (s, last, len)))
5499 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5500 return len;
5502 else
5503 return 0;
5507 * Consume a Prolog atom.
5508 * Return the number of bytes consumed, or -1 if there was an error.
5510 * A prolog atom, in this context, could be one of:
5511 * - An alphanumeric sequence, starting with a lower case letter.
5512 * - A quoted arbitrary string. Single quotes can escape themselves.
5513 * Backslash quotes everything.
5515 static int
5516 prolog_atom (s, pos)
5517 char *s;
5518 int pos;
5520 int origpos;
5522 origpos = pos;
5524 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5526 /* The atom is unquoted. */
5527 pos++;
5528 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5530 pos++;
5532 return pos - origpos;
5534 else if (s[pos] == '\'')
5536 pos++;
5538 for (;;)
5540 if (s[pos] == '\'')
5542 pos++;
5543 if (s[pos] != '\'')
5544 break;
5545 pos++; /* A double quote */
5547 else if (s[pos] == '\0')
5548 /* Multiline quoted atoms are ignored. */
5549 return -1;
5550 else if (s[pos] == '\\')
5552 if (s[pos+1] == '\0')
5553 return -1;
5554 pos += 2;
5556 else
5557 pos++;
5559 return pos - origpos;
5561 else
5562 return -1;
5567 * Support for Erlang
5569 * Generates tags for functions, defines, and records.
5570 * Assumes that Erlang functions start at column 0.
5571 * Original code by Anders Lindgren (1996)
5573 static int erlang_func __P((char *, char *));
5574 static void erlang_attribute __P((char *));
5575 static int erlang_atom __P((char *));
5577 static void
5578 Erlang_functions (inf)
5579 FILE *inf;
5581 char *cp, *last;
5582 int len;
5583 int allocated;
5585 allocated = 0;
5586 len = 0;
5587 last = NULL;
5589 LOOP_ON_INPUT_LINES (inf, lb, cp)
5591 if (cp[0] == '\0') /* Empty line */
5592 continue;
5593 else if (iswhite (cp[0])) /* Not function nor attribute */
5594 continue;
5595 else if (cp[0] == '%') /* comment */
5596 continue;
5597 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5598 continue;
5599 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5601 erlang_attribute (cp);
5602 if (last != NULL)
5604 free (last);
5605 last = NULL;
5608 else if ((len = erlang_func (cp, last)) > 0)
5611 * Function. Store the function name so that we only
5612 * generates a tag for the first clause.
5614 if (last == NULL)
5615 last = xnew (len + 1, char);
5616 else if (len + 1 > allocated)
5617 xrnew (last, len + 1, char);
5618 allocated = len + 1;
5619 strncpy (last, cp, len);
5620 last[len] = '\0';
5623 if (last != NULL)
5624 free (last);
5629 * A function definition is added if it matches:
5630 * <beginning of line><Erlang Atom><whitespace>(
5632 * It is added to the tags database if it doesn't match the
5633 * name of the previous clause header.
5635 * Return the size of the name of the function, or 0 if no function
5636 * was found.
5638 static int
5639 erlang_func (s, last)
5640 char *s;
5641 char *last; /* Name of last clause. */
5643 int pos;
5644 int len;
5646 pos = erlang_atom (s);
5647 if (pos < 1)
5648 return 0;
5650 len = pos;
5651 pos = skip_spaces (s + pos) - s;
5653 /* Save only the first clause. */
5654 if (s[pos++] == '('
5655 && (last == NULL
5656 || len != (int)strlen (last)
5657 || !strneq (s, last, len)))
5659 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5660 return len;
5663 return 0;
5668 * Handle attributes. Currently, tags are generated for defines
5669 * and records.
5671 * They are on the form:
5672 * -define(foo, bar).
5673 * -define(Foo(M, N), M+N).
5674 * -record(graph, {vtab = notable, cyclic = true}).
5676 static void
5677 erlang_attribute (s)
5678 char *s;
5680 char *cp = s;
5682 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5683 && *cp++ == '(')
5685 int len = erlang_atom (skip_spaces (cp));
5686 if (len > 0)
5687 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5689 return;
5694 * Consume an Erlang atom (or variable).
5695 * Return the number of bytes consumed, or -1 if there was an error.
5697 static int
5698 erlang_atom (s)
5699 char *s;
5701 int pos = 0;
5703 if (ISALPHA (s[pos]) || s[pos] == '_')
5705 /* The atom is unquoted. */
5707 pos++;
5708 while (ISALNUM (s[pos]) || s[pos] == '_');
5710 else if (s[pos] == '\'')
5712 for (pos++; s[pos] != '\''; pos++)
5713 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5714 || (s[pos] == '\\' && s[++pos] == '\0'))
5715 return 0;
5716 pos++;
5719 return pos;
5723 static char *scan_separators __P((char *));
5724 static void add_regex __P((char *, language *));
5725 static char *substitute __P((char *, char *, struct re_registers *));
5728 * Take a string like "/blah/" and turn it into "blah", verifying
5729 * that the first and last characters are the same, and handling
5730 * quoted separator characters. Actually, stops on the occurrence of
5731 * an unquoted separator. Also process \t, \n, etc. and turn into
5732 * appropriate characters. Works in place. Null terminates name string.
5733 * Returns pointer to terminating separator, or NULL for
5734 * unterminated regexps.
5736 static char *
5737 scan_separators (name)
5738 char *name;
5740 char sep = name[0];
5741 char *copyto = name;
5742 bool quoted = FALSE;
5744 for (++name; *name != '\0'; ++name)
5746 if (quoted)
5748 switch (*name)
5750 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5751 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5752 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5753 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5754 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5755 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5756 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5757 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5758 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5759 default:
5760 if (*name == sep)
5761 *copyto++ = sep;
5762 else
5764 /* Something else is quoted, so preserve the quote. */
5765 *copyto++ = '\\';
5766 *copyto++ = *name;
5768 break;
5770 quoted = FALSE;
5772 else if (*name == '\\')
5773 quoted = TRUE;
5774 else if (*name == sep)
5775 break;
5776 else
5777 *copyto++ = *name;
5779 if (*name != sep)
5780 name = NULL; /* signal unterminated regexp */
5782 /* Terminate copied string. */
5783 *copyto = '\0';
5784 return name;
5787 /* Look at the argument of --regex or --no-regex and do the right
5788 thing. Same for each line of a regexp file. */
5789 static void
5790 analyse_regex (regex_arg)
5791 char *regex_arg;
5793 if (regex_arg == NULL)
5795 free_regexps (); /* --no-regex: remove existing regexps */
5796 return;
5799 /* A real --regexp option or a line in a regexp file. */
5800 switch (regex_arg[0])
5802 /* Comments in regexp file or null arg to --regex. */
5803 case '\0':
5804 case ' ':
5805 case '\t':
5806 break;
5808 /* Read a regex file. This is recursive and may result in a
5809 loop, which will stop when the file descriptors are exhausted. */
5810 case '@':
5812 FILE *regexfp;
5813 linebuffer regexbuf;
5814 char *regexfile = regex_arg + 1;
5816 /* regexfile is a file containing regexps, one per line. */
5817 regexfp = fopen (regexfile, "r");
5818 if (regexfp == NULL)
5820 pfatal (regexfile);
5821 return;
5823 linebuffer_init (&regexbuf);
5824 while (readline_internal (&regexbuf, regexfp) > 0)
5825 analyse_regex (regexbuf.buffer);
5826 free (regexbuf.buffer);
5827 fclose (regexfp);
5829 break;
5831 /* Regexp to be used for a specific language only. */
5832 case '{':
5834 language *lang;
5835 char *lang_name = regex_arg + 1;
5836 char *cp;
5838 for (cp = lang_name; *cp != '}'; cp++)
5839 if (*cp == '\0')
5841 error ("unterminated language name in regex: %s", regex_arg);
5842 return;
5844 *cp++ = '\0';
5845 lang = get_language_from_langname (lang_name);
5846 if (lang == NULL)
5847 return;
5848 add_regex (cp, lang);
5850 break;
5852 /* Regexp to be used for any language. */
5853 default:
5854 add_regex (regex_arg, NULL);
5855 break;
5859 /* Separate the regexp pattern, compile it,
5860 and care for optional name and modifiers. */
5861 static void
5862 add_regex (regexp_pattern, lang)
5863 char *regexp_pattern;
5864 language *lang;
5866 static struct re_pattern_buffer zeropattern;
5867 char sep, *pat, *name, *modifiers;
5868 const char *err;
5869 struct re_pattern_buffer *patbuf;
5870 regexp *rp;
5871 bool
5872 force_explicit_name = TRUE, /* do not use implicit tag names */
5873 ignore_case = FALSE, /* case is significant */
5874 multi_line = FALSE, /* matches are done one line at a time */
5875 single_line = FALSE; /* dot does not match newline */
5878 if (strlen(regexp_pattern) < 3)
5880 error ("null regexp", (char *)NULL);
5881 return;
5883 sep = regexp_pattern[0];
5884 name = scan_separators (regexp_pattern);
5885 if (name == NULL)
5887 error ("%s: unterminated regexp", regexp_pattern);
5888 return;
5890 if (name[1] == sep)
5892 error ("null name for regexp \"%s\"", regexp_pattern);
5893 return;
5895 modifiers = scan_separators (name);
5896 if (modifiers == NULL) /* no terminating separator --> no name */
5898 modifiers = name;
5899 name = "";
5901 else
5902 modifiers += 1; /* skip separator */
5904 /* Parse regex modifiers. */
5905 for (; modifiers[0] != '\0'; modifiers++)
5906 switch (modifiers[0])
5908 case 'N':
5909 if (modifiers == name)
5910 error ("forcing explicit tag name but no name, ignoring", NULL);
5911 force_explicit_name = TRUE;
5912 break;
5913 case 'i':
5914 ignore_case = TRUE;
5915 break;
5916 case 's':
5917 single_line = TRUE;
5918 /* FALLTHRU */
5919 case 'm':
5920 multi_line = TRUE;
5921 need_filebuf = TRUE;
5922 break;
5923 default:
5925 char wrongmod [2];
5926 wrongmod[0] = modifiers[0];
5927 wrongmod[1] = '\0';
5928 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5930 break;
5933 patbuf = xnew (1, struct re_pattern_buffer);
5934 *patbuf = zeropattern;
5935 if (ignore_case)
5937 static char lc_trans[CHARS];
5938 int i;
5939 for (i = 0; i < CHARS; i++)
5940 lc_trans[i] = lowcase (i);
5941 patbuf->translate = lc_trans; /* translation table to fold case */
5944 if (multi_line)
5945 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5946 else
5947 pat = regexp_pattern;
5949 if (single_line)
5950 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5951 else
5952 re_set_syntax (RE_SYNTAX_EMACS);
5954 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5955 if (multi_line)
5956 free (pat);
5957 if (err != NULL)
5959 error ("%s while compiling pattern", err);
5960 return;
5963 rp = p_head;
5964 p_head = xnew (1, regexp);
5965 p_head->pattern = savestr (regexp_pattern);
5966 p_head->p_next = rp;
5967 p_head->lang = lang;
5968 p_head->pat = patbuf;
5969 p_head->name = savestr (name);
5970 p_head->error_signaled = FALSE;
5971 p_head->force_explicit_name = force_explicit_name;
5972 p_head->ignore_case = ignore_case;
5973 p_head->multi_line = multi_line;
5977 * Do the substitutions indicated by the regular expression and
5978 * arguments.
5980 static char *
5981 substitute (in, out, regs)
5982 char *in, *out;
5983 struct re_registers *regs;
5985 char *result, *t;
5986 int size, dig, diglen;
5988 result = NULL;
5989 size = strlen (out);
5991 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5992 if (out[size - 1] == '\\')
5993 fatal ("pattern error in \"%s\"", out);
5994 for (t = etags_strchr (out, '\\');
5995 t != NULL;
5996 t = etags_strchr (t + 2, '\\'))
5997 if (ISDIGIT (t[1]))
5999 dig = t[1] - '0';
6000 diglen = regs->end[dig] - regs->start[dig];
6001 size += diglen - 2;
6003 else
6004 size -= 1;
6006 /* Allocate space and do the substitutions. */
6007 assert (size >= 0);
6008 result = xnew (size + 1, char);
6010 for (t = result; *out != '\0'; out++)
6011 if (*out == '\\' && ISDIGIT (*++out))
6013 dig = *out - '0';
6014 diglen = regs->end[dig] - regs->start[dig];
6015 strncpy (t, in + regs->start[dig], diglen);
6016 t += diglen;
6018 else
6019 *t++ = *out;
6020 *t = '\0';
6022 assert (t <= result + size);
6023 assert (t - result == (int)strlen (result));
6025 return result;
6028 /* Deallocate all regexps. */
6029 static void
6030 free_regexps ()
6032 regexp *rp;
6033 while (p_head != NULL)
6035 rp = p_head->p_next;
6036 free (p_head->pattern);
6037 free (p_head->name);
6038 free (p_head);
6039 p_head = rp;
6041 return;
6045 * Reads the whole file as a single string from `filebuf' and looks for
6046 * multi-line regular expressions, creating tags on matches.
6047 * readline already dealt with normal regexps.
6049 * Idea by Ben Wing <ben@666.com> (2002).
6051 static void
6052 regex_tag_multiline ()
6054 char *buffer = filebuf.buffer;
6055 regexp *rp;
6056 char *name;
6058 for (rp = p_head; rp != NULL; rp = rp->p_next)
6060 int match = 0;
6062 if (!rp->multi_line)
6063 continue; /* skip normal regexps */
6065 /* Generic initialisations before parsing file from memory. */
6066 lineno = 1; /* reset global line number */
6067 charno = 0; /* reset global char number */
6068 linecharno = 0; /* reset global char number of line start */
6070 /* Only use generic regexps or those for the current language. */
6071 if (rp->lang != NULL && rp->lang != curfdp->lang)
6072 continue;
6074 while (match >= 0 && match < filebuf.len)
6076 match = re_search (rp->pat, buffer, filebuf.len, charno,
6077 filebuf.len - match, &rp->regs);
6078 switch (match)
6080 case -2:
6081 /* Some error. */
6082 if (!rp->error_signaled)
6084 error ("regexp stack overflow while matching \"%s\"",
6085 rp->pattern);
6086 rp->error_signaled = TRUE;
6088 break;
6089 case -1:
6090 /* No match. */
6091 break;
6092 default:
6093 if (match == rp->regs.end[0])
6095 if (!rp->error_signaled)
6097 error ("regexp matches the empty string: \"%s\"",
6098 rp->pattern);
6099 rp->error_signaled = TRUE;
6101 match = -3; /* exit from while loop */
6102 break;
6105 /* Match occurred. Construct a tag. */
6106 while (charno < rp->regs.end[0])
6107 if (buffer[charno++] == '\n')
6108 lineno++, linecharno = charno;
6109 name = rp->name;
6110 if (name[0] == '\0')
6111 name = NULL;
6112 else /* make a named tag */
6113 name = substitute (buffer, rp->name, &rp->regs);
6114 if (rp->force_explicit_name)
6115 /* Force explicit tag name, if a name is there. */
6116 pfnote (name, TRUE, buffer + linecharno,
6117 charno - linecharno + 1, lineno, linecharno);
6118 else
6119 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6120 charno - linecharno + 1, lineno, linecharno);
6121 break;
6128 static bool
6129 nocase_tail (cp)
6130 char *cp;
6132 register int len = 0;
6134 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6135 cp++, len++;
6136 if (*cp == '\0' && !intoken (dbp[len]))
6138 dbp += len;
6139 return TRUE;
6141 return FALSE;
6144 static void
6145 get_tag (bp, namepp)
6146 register char *bp;
6147 char **namepp;
6149 register char *cp = bp;
6151 if (*bp != '\0')
6153 /* Go till you get to white space or a syntactic break */
6154 for (cp = bp + 1; !notinname (*cp); cp++)
6155 continue;
6156 make_tag (bp, cp - bp, TRUE,
6157 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6160 if (namepp != NULL)
6161 *namepp = savenstr (bp, cp - bp);
6165 * Read a line of text from `stream' into `lbp', excluding the
6166 * newline or CR-NL, if any. Return the number of characters read from
6167 * `stream', which is the length of the line including the newline.
6169 * On DOS or Windows we do not count the CR character, if any before the
6170 * NL, in the returned length; this mirrors the behavior of Emacs on those
6171 * platforms (for text files, it translates CR-NL to NL as it reads in the
6172 * file).
6174 * If multi-line regular expressions are requested, each line read is
6175 * appended to `filebuf'.
6177 static long
6178 readline_internal (lbp, stream)
6179 linebuffer *lbp;
6180 register FILE *stream;
6182 char *buffer = lbp->buffer;
6183 register char *p = lbp->buffer;
6184 register char *pend;
6185 int chars_deleted;
6187 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6189 for (;;)
6191 register int c = getc (stream);
6192 if (p == pend)
6194 /* We're at the end of linebuffer: expand it. */
6195 lbp->size *= 2;
6196 xrnew (buffer, lbp->size, char);
6197 p += buffer - lbp->buffer;
6198 pend = buffer + lbp->size;
6199 lbp->buffer = buffer;
6201 if (c == EOF)
6203 *p = '\0';
6204 chars_deleted = 0;
6205 break;
6207 if (c == '\n')
6209 if (p > buffer && p[-1] == '\r')
6211 p -= 1;
6212 #ifdef DOS_NT
6213 /* Assume CRLF->LF translation will be performed by Emacs
6214 when loading this file, so CRs won't appear in the buffer.
6215 It would be cleaner to compensate within Emacs;
6216 however, Emacs does not know how many CRs were deleted
6217 before any given point in the file. */
6218 chars_deleted = 1;
6219 #else
6220 chars_deleted = 2;
6221 #endif
6223 else
6225 chars_deleted = 1;
6227 *p = '\0';
6228 break;
6230 *p++ = c;
6232 lbp->len = p - buffer;
6234 if (need_filebuf /* we need filebuf for multi-line regexps */
6235 && chars_deleted > 0) /* not at EOF */
6237 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6239 /* Expand filebuf. */
6240 filebuf.size *= 2;
6241 xrnew (filebuf.buffer, filebuf.size, char);
6243 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6244 filebuf.len += lbp->len;
6245 filebuf.buffer[filebuf.len++] = '\n';
6246 filebuf.buffer[filebuf.len] = '\0';
6249 return lbp->len + chars_deleted;
6253 * Like readline_internal, above, but in addition try to match the
6254 * input line against relevant regular expressions and manage #line
6255 * directives.
6257 static void
6258 readline (lbp, stream)
6259 linebuffer *lbp;
6260 FILE *stream;
6262 long result;
6264 linecharno = charno; /* update global char number of line start */
6265 result = readline_internal (lbp, stream); /* read line */
6266 lineno += 1; /* increment global line number */
6267 charno += result; /* increment global char number */
6269 /* Honour #line directives. */
6270 if (!no_line_directive)
6272 static bool discard_until_line_directive;
6274 /* Check whether this is a #line directive. */
6275 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6277 unsigned int lno;
6278 int start = 0;
6280 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6281 && start > 0) /* double quote character found */
6283 char *endp = lbp->buffer + start;
6285 while ((endp = etags_strchr (endp, '"')) != NULL
6286 && endp[-1] == '\\')
6287 endp++;
6288 if (endp != NULL)
6289 /* Ok, this is a real #line directive. Let's deal with it. */
6291 char *taggedabsname; /* absolute name of original file */
6292 char *taggedfname; /* name of original file as given */
6293 char *name; /* temp var */
6295 discard_until_line_directive = FALSE; /* found it */
6296 name = lbp->buffer + start;
6297 *endp = '\0';
6298 canonicalize_filename (name); /* for DOS */
6299 taggedabsname = absolute_filename (name, tagfiledir);
6300 if (filename_is_absolute (name)
6301 || filename_is_absolute (curfdp->infname))
6302 taggedfname = savestr (taggedabsname);
6303 else
6304 taggedfname = relative_filename (taggedabsname,tagfiledir);
6306 if (streq (curfdp->taggedfname, taggedfname))
6307 /* The #line directive is only a line number change. We
6308 deal with this afterwards. */
6309 free (taggedfname);
6310 else
6311 /* The tags following this #line directive should be
6312 attributed to taggedfname. In order to do this, set
6313 curfdp accordingly. */
6315 fdesc *fdp; /* file description pointer */
6317 /* Go look for a file description already set up for the
6318 file indicated in the #line directive. If there is
6319 one, use it from now until the next #line
6320 directive. */
6321 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6322 if (streq (fdp->infname, curfdp->infname)
6323 && streq (fdp->taggedfname, taggedfname))
6324 /* If we remove the second test above (after the &&)
6325 then all entries pertaining to the same file are
6326 coalesced in the tags file. If we use it, then
6327 entries pertaining to the same file but generated
6328 from different files (via #line directives) will
6329 go into separate sections in the tags file. These
6330 alternatives look equivalent. The first one
6331 destroys some apparently useless information. */
6333 curfdp = fdp;
6334 free (taggedfname);
6335 break;
6337 /* Else, if we already tagged the real file, skip all
6338 input lines until the next #line directive. */
6339 if (fdp == NULL) /* not found */
6340 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6341 if (streq (fdp->infabsname, taggedabsname))
6343 discard_until_line_directive = TRUE;
6344 free (taggedfname);
6345 break;
6347 /* Else create a new file description and use that from
6348 now on, until the next #line directive. */
6349 if (fdp == NULL) /* not found */
6351 fdp = fdhead;
6352 fdhead = xnew (1, fdesc);
6353 *fdhead = *curfdp; /* copy curr. file description */
6354 fdhead->next = fdp;
6355 fdhead->infname = savestr (curfdp->infname);
6356 fdhead->infabsname = savestr (curfdp->infabsname);
6357 fdhead->infabsdir = savestr (curfdp->infabsdir);
6358 fdhead->taggedfname = taggedfname;
6359 fdhead->usecharno = FALSE;
6360 fdhead->prop = NULL;
6361 fdhead->written = FALSE;
6362 curfdp = fdhead;
6365 free (taggedabsname);
6366 lineno = lno - 1;
6367 readline (lbp, stream);
6368 return;
6369 } /* if a real #line directive */
6370 } /* if #line is followed by a a number */
6371 } /* if line begins with "#line " */
6373 /* If we are here, no #line directive was found. */
6374 if (discard_until_line_directive)
6376 if (result > 0)
6378 /* Do a tail recursion on ourselves, thus discarding the contents
6379 of the line buffer. */
6380 readline (lbp, stream);
6381 return;
6383 /* End of file. */
6384 discard_until_line_directive = FALSE;
6385 return;
6387 } /* if #line directives should be considered */
6390 int match;
6391 regexp *rp;
6392 char *name;
6394 /* Match against relevant regexps. */
6395 if (lbp->len > 0)
6396 for (rp = p_head; rp != NULL; rp = rp->p_next)
6398 /* Only use generic regexps or those for the current language.
6399 Also do not use multiline regexps, which is the job of
6400 regex_tag_multiline. */
6401 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6402 || rp->multi_line)
6403 continue;
6405 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6406 switch (match)
6408 case -2:
6409 /* Some error. */
6410 if (!rp->error_signaled)
6412 error ("regexp stack overflow while matching \"%s\"",
6413 rp->pattern);
6414 rp->error_signaled = TRUE;
6416 break;
6417 case -1:
6418 /* No match. */
6419 break;
6420 case 0:
6421 /* Empty string matched. */
6422 if (!rp->error_signaled)
6424 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6425 rp->error_signaled = TRUE;
6427 break;
6428 default:
6429 /* Match occurred. Construct a tag. */
6430 name = rp->name;
6431 if (name[0] == '\0')
6432 name = NULL;
6433 else /* make a named tag */
6434 name = substitute (lbp->buffer, rp->name, &rp->regs);
6435 if (rp->force_explicit_name)
6436 /* Force explicit tag name, if a name is there. */
6437 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6438 else
6439 make_tag (name, strlen (name), TRUE,
6440 lbp->buffer, match, lineno, linecharno);
6441 break;
6449 * Return a pointer to a space of size strlen(cp)+1 allocated
6450 * with xnew where the string CP has been copied.
6452 static char *
6453 savestr (cp)
6454 char *cp;
6456 return savenstr (cp, strlen (cp));
6460 * Return a pointer to a space of size LEN+1 allocated with xnew where
6461 * the string CP has been copied for at most the first LEN characters.
6463 static char *
6464 savenstr (cp, len)
6465 char *cp;
6466 int len;
6468 register char *dp;
6470 dp = xnew (len + 1, char);
6471 strncpy (dp, cp, len);
6472 dp[len] = '\0';
6473 return dp;
6477 * Return the ptr in sp at which the character c last
6478 * appears; NULL if not found
6480 * Identical to POSIX strrchr, included for portability.
6482 static char *
6483 etags_strrchr (sp, c)
6484 register const char *sp;
6485 register int c;
6487 register const char *r;
6489 r = NULL;
6492 if (*sp == c)
6493 r = sp;
6494 } while (*sp++);
6495 return (char *)r;
6499 * Return the ptr in sp at which the character c first
6500 * appears; NULL if not found
6502 * Identical to POSIX strchr, included for portability.
6504 static char *
6505 etags_strchr (sp, c)
6506 register const char *sp;
6507 register int c;
6511 if (*sp == c)
6512 return (char *)sp;
6513 } while (*sp++);
6514 return NULL;
6518 * Compare two strings, ignoring case for alphabetic characters.
6520 * Same as BSD's strcasecmp, included for portability.
6522 static int
6523 etags_strcasecmp (s1, s2)
6524 register const char *s1;
6525 register const char *s2;
6527 while (*s1 != '\0'
6528 && (ISALPHA (*s1) && ISALPHA (*s2)
6529 ? lowcase (*s1) == lowcase (*s2)
6530 : *s1 == *s2))
6531 s1++, s2++;
6533 return (ISALPHA (*s1) && ISALPHA (*s2)
6534 ? lowcase (*s1) - lowcase (*s2)
6535 : *s1 - *s2);
6539 * Compare two strings, ignoring case for alphabetic characters.
6540 * Stop after a given number of characters
6542 * Same as BSD's strncasecmp, included for portability.
6544 static int
6545 etags_strncasecmp (s1, s2, n)
6546 register const char *s1;
6547 register const char *s2;
6548 register int n;
6550 while (*s1 != '\0' && n-- > 0
6551 && (ISALPHA (*s1) && ISALPHA (*s2)
6552 ? lowcase (*s1) == lowcase (*s2)
6553 : *s1 == *s2))
6554 s1++, s2++;
6556 if (n < 0)
6557 return 0;
6558 else
6559 return (ISALPHA (*s1) && ISALPHA (*s2)
6560 ? lowcase (*s1) - lowcase (*s2)
6561 : *s1 - *s2);
6564 /* Skip spaces (end of string is not space), return new pointer. */
6565 static char *
6566 skip_spaces (cp)
6567 char *cp;
6569 while (iswhite (*cp))
6570 cp++;
6571 return cp;
6574 /* Skip non spaces, except end of string, return new pointer. */
6575 static char *
6576 skip_non_spaces (cp)
6577 char *cp;
6579 while (*cp != '\0' && !iswhite (*cp))
6580 cp++;
6581 return cp;
6584 /* Print error message and exit. */
6585 void
6586 fatal (s1, s2)
6587 char *s1, *s2;
6589 error (s1, s2);
6590 exit (EXIT_FAILURE);
6593 static void
6594 pfatal (s1)
6595 char *s1;
6597 perror (s1);
6598 exit (EXIT_FAILURE);
6601 static void
6602 suggest_asking_for_help ()
6604 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6605 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6606 exit (EXIT_FAILURE);
6609 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6610 static void
6611 error (s1, s2)
6612 const char *s1, *s2;
6614 fprintf (stderr, "%s: ", progname);
6615 fprintf (stderr, s1, s2);
6616 fprintf (stderr, "\n");
6619 /* Return a newly-allocated string whose contents
6620 concatenate those of s1, s2, s3. */
6621 static char *
6622 concat (s1, s2, s3)
6623 char *s1, *s2, *s3;
6625 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6626 char *result = xnew (len1 + len2 + len3 + 1, char);
6628 strcpy (result, s1);
6629 strcpy (result + len1, s2);
6630 strcpy (result + len1 + len2, s3);
6631 result[len1 + len2 + len3] = '\0';
6633 return result;
6637 /* Does the same work as the system V getcwd, but does not need to
6638 guess the buffer size in advance. */
6639 static char *
6640 etags_getcwd ()
6642 #ifdef HAVE_GETCWD
6643 int bufsize = 200;
6644 char *path = xnew (bufsize, char);
6646 while (getcwd (path, bufsize) == NULL)
6648 if (errno != ERANGE)
6649 pfatal ("getcwd");
6650 bufsize *= 2;
6651 free (path);
6652 path = xnew (bufsize, char);
6655 canonicalize_filename (path);
6656 return path;
6658 #else /* not HAVE_GETCWD */
6659 #if MSDOS
6661 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6663 getwd (path);
6665 for (p = path; *p != '\0'; p++)
6666 if (*p == '\\')
6667 *p = '/';
6668 else
6669 *p = lowcase (*p);
6671 return strdup (path);
6672 #else /* not MSDOS */
6673 linebuffer path;
6674 FILE *pipe;
6676 linebuffer_init (&path);
6677 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6678 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6679 pfatal ("pwd");
6680 pclose (pipe);
6682 return path.buffer;
6683 #endif /* not MSDOS */
6684 #endif /* not HAVE_GETCWD */
6687 /* Return a newly allocated string containing the file name of FILE
6688 relative to the absolute directory DIR (which should end with a slash). */
6689 static char *
6690 relative_filename (file, dir)
6691 char *file, *dir;
6693 char *fp, *dp, *afn, *res;
6694 int i;
6696 /* Find the common root of file and dir (with a trailing slash). */
6697 afn = absolute_filename (file, cwd);
6698 fp = afn;
6699 dp = dir;
6700 while (*fp++ == *dp++)
6701 continue;
6702 fp--, dp--; /* back to the first differing char */
6703 #ifdef DOS_NT
6704 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6705 return afn;
6706 #endif
6707 do /* look at the equal chars until '/' */
6708 fp--, dp--;
6709 while (*fp != '/');
6711 /* Build a sequence of "../" strings for the resulting relative file name. */
6712 i = 0;
6713 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6714 i += 1;
6715 res = xnew (3*i + strlen (fp + 1) + 1, char);
6716 res[0] = '\0';
6717 while (i-- > 0)
6718 strcat (res, "../");
6720 /* Add the file name relative to the common root of file and dir. */
6721 strcat (res, fp + 1);
6722 free (afn);
6724 return res;
6727 /* Return a newly allocated string containing the absolute file name
6728 of FILE given DIR (which should end with a slash). */
6729 static char *
6730 absolute_filename (file, dir)
6731 char *file, *dir;
6733 char *slashp, *cp, *res;
6735 if (filename_is_absolute (file))
6736 res = savestr (file);
6737 #ifdef DOS_NT
6738 /* We don't support non-absolute file names with a drive
6739 letter, like `d:NAME' (it's too much hassle). */
6740 else if (file[1] == ':')
6741 fatal ("%s: relative file names with drive letters not supported", file);
6742 #endif
6743 else
6744 res = concat (dir, file, "");
6746 /* Delete the "/dirname/.." and "/." substrings. */
6747 slashp = etags_strchr (res, '/');
6748 while (slashp != NULL && slashp[0] != '\0')
6750 if (slashp[1] == '.')
6752 if (slashp[2] == '.'
6753 && (slashp[3] == '/' || slashp[3] == '\0'))
6755 cp = slashp;
6757 cp--;
6758 while (cp >= res && !filename_is_absolute (cp));
6759 if (cp < res)
6760 cp = slashp; /* the absolute name begins with "/.." */
6761 #ifdef DOS_NT
6762 /* Under MSDOS and NT we get `d:/NAME' as absolute
6763 file name, so the luser could say `d:/../NAME'.
6764 We silently treat this as `d:/NAME'. */
6765 else if (cp[0] != '/')
6766 cp = slashp;
6767 #endif
6768 strcpy (cp, slashp + 3);
6769 slashp = cp;
6770 continue;
6772 else if (slashp[2] == '/' || slashp[2] == '\0')
6774 strcpy (slashp, slashp + 2);
6775 continue;
6779 slashp = etags_strchr (slashp + 1, '/');
6782 if (res[0] == '\0') /* just a safety net: should never happen */
6784 free (res);
6785 return savestr ("/");
6787 else
6788 return res;
6791 /* Return a newly allocated string containing the absolute
6792 file name of dir where FILE resides given DIR (which should
6793 end with a slash). */
6794 static char *
6795 absolute_dirname (file, dir)
6796 char *file, *dir;
6798 char *slashp, *res;
6799 char save;
6801 canonicalize_filename (file);
6802 slashp = etags_strrchr (file, '/');
6803 if (slashp == NULL)
6804 return savestr (dir);
6805 save = slashp[1];
6806 slashp[1] = '\0';
6807 res = absolute_filename (file, dir);
6808 slashp[1] = save;
6810 return res;
6813 /* Whether the argument string is an absolute file name. The argument
6814 string must have been canonicalized with canonicalize_filename. */
6815 static bool
6816 filename_is_absolute (fn)
6817 char *fn;
6819 return (fn[0] == '/'
6820 #ifdef DOS_NT
6821 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6822 #endif
6826 /* Translate backslashes into slashes. Works in place. */
6827 static void
6828 canonicalize_filename (fn)
6829 register char *fn;
6831 #ifdef DOS_NT
6832 /* Canonicalize drive letter case. */
6833 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6834 fn[0] = upcase (fn[0]);
6835 /* Convert backslashes to slashes. */
6836 for (; *fn != '\0'; fn++)
6837 if (*fn == '\\')
6838 *fn = '/';
6839 #else
6840 /* No action. */
6841 fn = NULL; /* shut up the compiler */
6842 #endif
6846 /* Initialize a linebuffer for use */
6847 static void
6848 linebuffer_init (lbp)
6849 linebuffer *lbp;
6851 lbp->size = (DEBUG) ? 3 : 200;
6852 lbp->buffer = xnew (lbp->size, char);
6853 lbp->buffer[0] = '\0';
6854 lbp->len = 0;
6857 /* Set the minimum size of a string contained in a linebuffer. */
6858 static void
6859 linebuffer_setlen (lbp, toksize)
6860 linebuffer *lbp;
6861 int toksize;
6863 while (lbp->size <= toksize)
6865 lbp->size *= 2;
6866 xrnew (lbp->buffer, lbp->size, char);
6868 lbp->len = toksize;
6871 /* Like malloc but get fatal error if memory is exhausted. */
6872 static PTR
6873 xmalloc (size)
6874 unsigned int size;
6876 PTR result = (PTR) malloc (size);
6877 if (result == NULL)
6878 fatal ("virtual memory exhausted", (char *)NULL);
6879 return result;
6882 static PTR
6883 xrealloc (ptr, size)
6884 char *ptr;
6885 unsigned int size;
6887 PTR result = (PTR) realloc (ptr, size);
6888 if (result == NULL)
6889 fatal ("virtual memory exhausted", (char *)NULL);
6890 return result;
6894 * Local Variables:
6895 * indent-tabs-mode: t
6896 * tab-width: 8
6897 * fill-column: 79
6898 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6899 * c-file-style: "gnu"
6900 * End:
6903 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6904 (do not change this comment) */
6906 /* etags.c ends here */