* buffer.c (struct sortstr.priority): Now EMACS_INT, not int.
[emacs.git] / lib-src / etags.c
blob693c999047fc06b30b3a480a48991fc8356c12ac
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2011
32 Free Software Foundation, Inc.
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
72 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
83 #define TRUE 1
84 #define FALSE 0
86 #ifdef DEBUG
87 # undef DEBUG
88 # define DEBUG TRUE
89 #else
90 # define DEBUG FALSE
91 # define NDEBUG /* disable assert */
92 #endif
94 #ifdef HAVE_CONFIG_H
95 # include <config.h>
96 /* This is probably not necessary any more. On some systems, config.h
97 used to define static as nothing for the sake of unexec. We don't
98 want that here since we don't use unexec. None of these systems
99 are supported any more, but the idea is still mentioned in
100 etc/PROBLEMS. */
101 # undef static
102 # ifndef PTR /* for XEmacs */
103 # define PTR void *
104 # endif
105 #else /* no config.h */
106 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
107 # define PTR void * /* for generic pointers */
108 # else /* not standard C */
109 # define const /* remove const for old compilers' sake */
110 # define PTR long * /* don't use void* */
111 # endif
112 #endif /* !HAVE_CONFIG_H */
114 #ifndef _GNU_SOURCE
115 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
116 #endif
118 /* WIN32_NATIVE is for XEmacs.
119 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
120 #ifdef WIN32_NATIVE
121 # undef MSDOS
122 # undef WINDOWSNT
123 # define WINDOWSNT
124 #endif /* WIN32_NATIVE */
126 #ifdef MSDOS
127 # undef MSDOS
128 # define MSDOS TRUE
129 # include <fcntl.h>
130 # include <sys/param.h>
131 # include <io.h>
132 # ifndef HAVE_CONFIG_H
133 # define DOS_NT
134 # include <sys/config.h>
135 # endif
136 #else
137 # define MSDOS FALSE
138 #endif /* MSDOS */
140 #ifdef WINDOWSNT
141 # include <stdlib.h>
142 # include <fcntl.h>
143 # include <string.h>
144 # include <direct.h>
145 # include <io.h>
146 # define MAXPATHLEN _MAX_PATH
147 # undef HAVE_NTGUI
148 # undef DOS_NT
149 # define DOS_NT
150 # ifndef HAVE_GETCWD
151 # define HAVE_GETCWD
152 # endif /* undef HAVE_GETCWD */
153 #else /* not WINDOWSNT */
154 # ifdef STDC_HEADERS
155 # include <stdlib.h>
156 # include <string.h>
157 # else /* no standard C headers */
158 extern char *getenv (const char *);
159 extern char *strcpy (char *, const char *);
160 extern char *strncpy (char *, const char *, unsigned long);
161 extern char *strcat (char *, const char *);
162 extern char *strncat (char *, const char *, unsigned long);
163 extern int strcmp (const char *, const char *);
164 extern int strncmp (const char *, const char *, unsigned long);
165 extern int system (const char *);
166 extern unsigned long strlen (const char *);
167 extern void *malloc (unsigned long);
168 extern void *realloc (void *, unsigned long);
169 extern void exit (int);
170 extern void free (void *);
171 extern void *memmove (void *, const void *, unsigned long);
172 # define EXIT_SUCCESS 0
173 # define EXIT_FAILURE 1
174 # endif
175 #endif /* !WINDOWSNT */
177 #include <unistd.h>
178 #ifndef HAVE_UNISTD_H
179 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
180 extern char *getcwd (char *buf, size_t size);
181 # endif
182 #endif /* HAVE_UNISTD_H */
184 #include <stdio.h>
185 #include <ctype.h>
186 #include <errno.h>
187 #include <sys/types.h>
188 #include <sys/stat.h>
190 #include <assert.h>
191 #ifdef NDEBUG
192 # undef assert /* some systems have a buggy assert.h */
193 # define assert(x) ((void) 0)
194 #endif
196 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
197 # define NO_LONG_OPTIONS TRUE
198 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
199 extern char *optarg;
200 extern int optind, opterr;
201 #else
202 # define NO_LONG_OPTIONS FALSE
203 # include <getopt.h>
204 #endif /* NO_LONG_OPTIONS */
206 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
207 # ifdef __CYGWIN__ /* compiling on Cygwin */
208 !!! NOTICE !!!
209 the regex.h distributed with Cygwin is not compatible with etags, alas!
210 If you want regular expression support, you should delete this notice and
211 arrange to use the GNU regex.h and regex.c.
212 # endif
213 #endif
214 #include <regex.h>
216 /* Define CTAGS to make the program "ctags" compatible with the usual one.
217 Leave it undefined to make the program "etags", which makes emacs-style
218 tag tables and tags typedefs, #defines and struct/union/enum by default. */
219 #ifdef CTAGS
220 # undef CTAGS
221 # define CTAGS TRUE
222 #else
223 # define CTAGS FALSE
224 #endif
226 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
227 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
228 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
229 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
231 #define CHARS 256 /* 2^sizeof(char) */
232 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
233 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
234 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
235 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
236 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
237 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
239 #define ISALNUM(c) isalnum (CHAR(c))
240 #define ISALPHA(c) isalpha (CHAR(c))
241 #define ISDIGIT(c) isdigit (CHAR(c))
242 #define ISLOWER(c) islower (CHAR(c))
244 #define lowcase(c) tolower (CHAR(c))
248 * xnew, xrnew -- allocate, reallocate storage
250 * SYNOPSIS: Type *xnew (int n, Type);
251 * void xrnew (OldPointer, int n, Type);
253 #if DEBUG
254 # include "chkmalloc.h"
255 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
256 (n) * sizeof (Type)))
257 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
258 (char *) (op), (n) * sizeof (Type)))
259 #else
260 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
261 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
262 (char *) (op), (n) * sizeof (Type)))
263 #endif
265 #define bool int
267 typedef void Lang_function (FILE *);
269 typedef struct
271 const char *suffix; /* file name suffix for this compressor */
272 const char *command; /* takes one arg and decompresses to stdout */
273 } compressor;
275 typedef struct
277 const char *name; /* language name */
278 const char *help; /* detailed help for the language */
279 Lang_function *function; /* parse function */
280 const char **suffixes; /* name suffixes of this language's files */
281 const char **filenames; /* names of this language's files */
282 const char **interpreters; /* interpreters for this language */
283 bool metasource; /* source used to generate other sources */
284 } language;
286 typedef struct fdesc
288 struct fdesc *next; /* for the linked list */
289 char *infname; /* uncompressed input file name */
290 char *infabsname; /* absolute uncompressed input file name */
291 char *infabsdir; /* absolute dir of input file */
292 char *taggedfname; /* file name to write in tagfile */
293 language *lang; /* language of file */
294 char *prop; /* file properties to write in tagfile */
295 bool usecharno; /* etags tags shall contain char number */
296 bool written; /* entry written in the tags file */
297 } fdesc;
299 typedef struct node_st
300 { /* sorting structure */
301 struct node_st *left, *right; /* left and right sons */
302 fdesc *fdp; /* description of file to whom tag belongs */
303 char *name; /* tag name */
304 char *regex; /* search regexp */
305 bool valid; /* write this tag on the tag file */
306 bool is_func; /* function tag: use regexp in CTAGS mode */
307 bool been_warned; /* warning already given for duplicated tag */
308 int lno; /* line number tag is on */
309 long cno; /* character number line starts on */
310 } node;
313 * A `linebuffer' is a structure which holds a line of text.
314 * `readline_internal' reads a line from a stream into a linebuffer
315 * and works regardless of the length of the line.
316 * SIZE is the size of BUFFER, LEN is the length of the string in
317 * BUFFER after readline reads it.
319 typedef struct
321 long size;
322 int len;
323 char *buffer;
324 } linebuffer;
326 /* Used to support mixing of --lang and file names. */
327 typedef struct
329 enum {
330 at_language, /* a language specification */
331 at_regexp, /* a regular expression */
332 at_filename, /* a file name */
333 at_stdin, /* read from stdin here */
334 at_end /* stop parsing the list */
335 } arg_type; /* argument type */
336 language *lang; /* language associated with the argument */
337 char *what; /* the argument itself */
338 } argument;
340 /* Structure defining a regular expression. */
341 typedef struct regexp
343 struct regexp *p_next; /* pointer to next in list */
344 language *lang; /* if set, use only for this language */
345 char *pattern; /* the regexp pattern */
346 char *name; /* tag name */
347 struct re_pattern_buffer *pat; /* the compiled pattern */
348 struct re_registers regs; /* re registers */
349 bool error_signaled; /* already signaled for this regexp */
350 bool force_explicit_name; /* do not allow implict tag name */
351 bool ignore_case; /* ignore case when matching */
352 bool multi_line; /* do a multi-line match on the whole file */
353 } regexp;
356 /* Many compilers barf on this:
357 Lang_function Ada_funcs;
358 so let's write it this way */
359 static void Ada_funcs (FILE *);
360 static void Asm_labels (FILE *);
361 static void C_entries (int c_ext, FILE *);
362 static void default_C_entries (FILE *);
363 static void plain_C_entries (FILE *);
364 static void Cjava_entries (FILE *);
365 static void Cobol_paragraphs (FILE *);
366 static void Cplusplus_entries (FILE *);
367 static void Cstar_entries (FILE *);
368 static void Erlang_functions (FILE *);
369 static void Forth_words (FILE *);
370 static void Fortran_functions (FILE *);
371 static void HTML_labels (FILE *);
372 static void Lisp_functions (FILE *);
373 static void Lua_functions (FILE *);
374 static void Makefile_targets (FILE *);
375 static void Pascal_functions (FILE *);
376 static void Perl_functions (FILE *);
377 static void PHP_functions (FILE *);
378 static void PS_functions (FILE *);
379 static void Prolog_functions (FILE *);
380 static void Python_functions (FILE *);
381 static void Scheme_functions (FILE *);
382 static void TeX_commands (FILE *);
383 static void Texinfo_nodes (FILE *);
384 static void Yacc_entries (FILE *);
385 static void just_read_file (FILE *);
387 static void print_language_names (void);
388 static void print_version (void);
389 static void print_help (argument *);
390 int main (int, char **);
392 static compressor *get_compressor_from_suffix (char *, char **);
393 static language *get_language_from_langname (const char *);
394 static language *get_language_from_interpreter (char *);
395 static language *get_language_from_filename (char *, bool);
396 static void readline (linebuffer *, FILE *);
397 static long readline_internal (linebuffer *, FILE *);
398 static bool nocase_tail (const char *);
399 static void get_tag (char *, char **);
401 static void analyse_regex (char *);
402 static void free_regexps (void);
403 static void regex_tag_multiline (void);
404 static void error (const char *, const char *);
405 static void suggest_asking_for_help (void) NO_RETURN;
406 void fatal (const char *, const char *) NO_RETURN;
407 static void pfatal (const char *) NO_RETURN;
408 static void add_node (node *, node **);
410 static void init (void);
411 static void process_file_name (char *, language *);
412 static void process_file (FILE *, char *, language *);
413 static void find_entries (FILE *);
414 static void free_tree (node *);
415 static void free_fdesc (fdesc *);
416 static void pfnote (char *, bool, char *, int, int, long);
417 static void make_tag (const char *, int, bool, char *, int, int, long);
418 static void invalidate_nodes (fdesc *, node **);
419 static void put_entries (node *);
421 static char *concat (const char *, const char *, const char *);
422 static char *skip_spaces (char *);
423 static char *skip_non_spaces (char *);
424 static char *savenstr (const char *, int);
425 static char *savestr (const char *);
426 static char *etags_strchr (const char *, int);
427 static char *etags_strrchr (const char *, int);
428 static int etags_strcasecmp (const char *, const char *);
429 static int etags_strncasecmp (const char *, const char *, int);
430 static char *etags_getcwd (void);
431 static char *relative_filename (char *, char *);
432 static char *absolute_filename (char *, char *);
433 static char *absolute_dirname (char *, char *);
434 static bool filename_is_absolute (char *f);
435 static void canonicalize_filename (char *);
436 static void linebuffer_init (linebuffer *);
437 static void linebuffer_setlen (linebuffer *, int);
438 static PTR xmalloc (unsigned int);
439 static PTR xrealloc (char *, unsigned int);
442 static char searchar = '/'; /* use /.../ searches */
444 static char *tagfile; /* output file */
445 static char *progname; /* name this program was invoked with */
446 static char *cwd; /* current working directory */
447 static char *tagfiledir; /* directory of tagfile */
448 static FILE *tagf; /* ioptr for tags file */
450 static fdesc *fdhead; /* head of file description list */
451 static fdesc *curfdp; /* current file description */
452 static int lineno; /* line number of current line */
453 static long charno; /* current character number */
454 static long linecharno; /* charno of start of current line */
455 static char *dbp; /* pointer to start of current tag */
457 static const int invalidcharno = -1;
459 static node *nodehead; /* the head of the binary tree of tags */
460 static node *last_node; /* the last node created */
462 static linebuffer lb; /* the current line */
463 static linebuffer filebuf; /* a buffer containing the whole file */
464 static linebuffer token_name; /* a buffer containing a tag name */
466 /* boolean "functions" (see init) */
467 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
468 static const char
469 /* white chars */
470 *white = " \f\t\n\r\v",
471 /* not in a name */
472 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
473 /* token ending chars */
474 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
475 /* token starting chars */
476 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
477 /* valid in-token chars */
478 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
480 static bool append_to_tagfile; /* -a: append to tags */
481 /* The next five default to TRUE in C and derived languages. */
482 static bool typedefs; /* -t: create tags for C and Ada typedefs */
483 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
484 /* 0 struct/enum/union decls, and C++ */
485 /* member functions. */
486 static bool constantypedefs; /* -d: create tags for C #define, enum */
487 /* constants and variables. */
488 /* -D: opposite of -d. Default under ctags. */
489 static bool globals; /* create tags for global variables */
490 static bool members; /* create tags for C member variables */
491 static bool declarations; /* --declarations: tag them and extern in C&Co*/
492 static bool no_line_directive; /* ignore #line directives (undocumented) */
493 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
494 static bool update; /* -u: update tags */
495 static bool vgrind_style; /* -v: create vgrind style index output */
496 static bool no_warnings; /* -w: suppress warnings (undocumented) */
497 static bool cxref_style; /* -x: create cxref style output */
498 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
499 static bool ignoreindent; /* -I: ignore indentation in C */
500 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
502 /* STDIN is defined in LynxOS system headers */
503 #ifdef STDIN
504 # undef STDIN
505 #endif
507 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
508 static bool parsing_stdin; /* --parse-stdin used */
510 static regexp *p_head; /* list of all regexps */
511 static bool need_filebuf; /* some regexes are multi-line */
513 static struct option longopts[] =
515 { "append", no_argument, NULL, 'a' },
516 { "packages-only", no_argument, &packages_only, TRUE },
517 { "c++", no_argument, NULL, 'C' },
518 { "declarations", no_argument, &declarations, TRUE },
519 { "no-line-directive", no_argument, &no_line_directive, TRUE },
520 { "no-duplicates", no_argument, &no_duplicates, TRUE },
521 { "help", no_argument, NULL, 'h' },
522 { "help", no_argument, NULL, 'H' },
523 { "ignore-indentation", no_argument, NULL, 'I' },
524 { "language", required_argument, NULL, 'l' },
525 { "members", no_argument, &members, TRUE },
526 { "no-members", no_argument, &members, FALSE },
527 { "output", required_argument, NULL, 'o' },
528 { "regex", required_argument, NULL, 'r' },
529 { "no-regex", no_argument, NULL, 'R' },
530 { "ignore-case-regex", required_argument, NULL, 'c' },
531 { "parse-stdin", required_argument, NULL, STDIN },
532 { "version", no_argument, NULL, 'V' },
534 #if CTAGS /* Ctags options */
535 { "backward-search", no_argument, NULL, 'B' },
536 { "cxref", no_argument, NULL, 'x' },
537 { "defines", no_argument, NULL, 'd' },
538 { "globals", no_argument, &globals, TRUE },
539 { "typedefs", no_argument, NULL, 't' },
540 { "typedefs-and-c++", no_argument, NULL, 'T' },
541 { "update", no_argument, NULL, 'u' },
542 { "vgrind", no_argument, NULL, 'v' },
543 { "no-warn", no_argument, NULL, 'w' },
545 #else /* Etags options */
546 { "no-defines", no_argument, NULL, 'D' },
547 { "no-globals", no_argument, &globals, FALSE },
548 { "include", required_argument, NULL, 'i' },
549 #endif
550 { NULL }
553 static compressor compressors[] =
555 { "z", "gzip -d -c"},
556 { "Z", "gzip -d -c"},
557 { "gz", "gzip -d -c"},
558 { "GZ", "gzip -d -c"},
559 { "bz2", "bzip2 -d -c" },
560 { "xz", "xz -d -c" },
561 { NULL }
565 * Language stuff.
568 /* Ada code */
569 static const char *Ada_suffixes [] =
570 { "ads", "adb", "ada", NULL };
571 static const char Ada_help [] =
572 "In Ada code, functions, procedures, packages, tasks and types are\n\
573 tags. Use the `--packages-only' option to create tags for\n\
574 packages only.\n\
575 Ada tag names have suffixes indicating the type of entity:\n\
576 Entity type: Qualifier:\n\
577 ------------ ----------\n\
578 function /f\n\
579 procedure /p\n\
580 package spec /s\n\
581 package body /b\n\
582 type /t\n\
583 task /k\n\
584 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
585 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
586 will just search for any tag `bidule'.";
588 /* Assembly code */
589 static const char *Asm_suffixes [] =
590 { "a", /* Unix assembler */
591 "asm", /* Microcontroller assembly */
592 "def", /* BSO/Tasking definition includes */
593 "inc", /* Microcontroller include files */
594 "ins", /* Microcontroller include files */
595 "s", "sa", /* Unix assembler */
596 "S", /* cpp-processed Unix assembler */
597 "src", /* BSO/Tasking C compiler output */
598 NULL
600 static const char Asm_help [] =
601 "In assembler code, labels appearing at the beginning of a line,\n\
602 followed by a colon, are tags.";
605 /* Note that .c and .h can be considered C++, if the --c++ flag was
606 given, or if the `class' or `template' keywords are met inside the file.
607 That is why default_C_entries is called for these. */
608 static const char *default_C_suffixes [] =
609 { "c", "h", NULL };
610 #if CTAGS /* C help for Ctags */
611 static const char default_C_help [] =
612 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
613 Use -T to tag definitions of `struct', `union' and `enum'.\n\
614 Use -d to tag `#define' macro definitions and `enum' constants.\n\
615 Use --globals to tag global variables.\n\
616 You can tag function declarations and external variables by\n\
617 using `--declarations', and struct members by using `--members'.";
618 #else /* C help for Etags */
619 static const char default_C_help [] =
620 "In C code, any C function or typedef is a tag, and so are\n\
621 definitions of `struct', `union' and `enum'. `#define' macro\n\
622 definitions and `enum' constants are tags unless you specify\n\
623 `--no-defines'. Global variables are tags unless you specify\n\
624 `--no-globals' and so are struct members unless you specify\n\
625 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
626 `--no-members' can make the tags table file much smaller.\n\
627 You can tag function declarations and external variables by\n\
628 using `--declarations'.";
629 #endif /* C help for Ctags and Etags */
631 static const char *Cplusplus_suffixes [] =
632 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
633 "M", /* Objective C++ */
634 "pdb", /* Postscript with C syntax */
635 NULL };
636 static const char Cplusplus_help [] =
637 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
638 --help --lang=c --lang=c++ for full help.)\n\
639 In addition to C tags, member functions are also recognized. Member\n\
640 variables are recognized unless you use the `--no-members' option.\n\
641 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
642 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
643 `operator+'.";
645 static const char *Cjava_suffixes [] =
646 { "java", NULL };
647 static char Cjava_help [] =
648 "In Java code, all the tags constructs of C and C++ code are\n\
649 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
652 static const char *Cobol_suffixes [] =
653 { "COB", "cob", NULL };
654 static char Cobol_help [] =
655 "In Cobol code, tags are paragraph names; that is, any word\n\
656 starting in column 8 and followed by a period.";
658 static const char *Cstar_suffixes [] =
659 { "cs", "hs", NULL };
661 static const char *Erlang_suffixes [] =
662 { "erl", "hrl", NULL };
663 static const char Erlang_help [] =
664 "In Erlang code, the tags are the functions, records and macros\n\
665 defined in the file.";
667 const char *Forth_suffixes [] =
668 { "fth", "tok", NULL };
669 static const char Forth_help [] =
670 "In Forth code, tags are words defined by `:',\n\
671 constant, code, create, defer, value, variable, buffer:, field.";
673 static const char *Fortran_suffixes [] =
674 { "F", "f", "f90", "for", NULL };
675 static const char Fortran_help [] =
676 "In Fortran code, functions, subroutines and block data are tags.";
678 static const char *HTML_suffixes [] =
679 { "htm", "html", "shtml", NULL };
680 static const char HTML_help [] =
681 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
682 `h3' headers. Also, tags are `name=' in anchors and all\n\
683 occurrences of `id='.";
685 static const char *Lisp_suffixes [] =
686 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
687 static const char Lisp_help [] =
688 "In Lisp code, any function defined with `defun', any variable\n\
689 defined with `defvar' or `defconst', and in general the first\n\
690 argument of any expression that starts with `(def' in column zero\n\
691 is a tag.";
693 static const char *Lua_suffixes [] =
694 { "lua", "LUA", NULL };
695 static const char Lua_help [] =
696 "In Lua scripts, all functions are tags.";
698 static const char *Makefile_filenames [] =
699 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
700 static const char Makefile_help [] =
701 "In makefiles, targets are tags; additionally, variables are tags\n\
702 unless you specify `--no-globals'.";
704 static const char *Objc_suffixes [] =
705 { "lm", /* Objective lex file */
706 "m", /* Objective C file */
707 NULL };
708 static const char Objc_help [] =
709 "In Objective C code, tags include Objective C definitions for classes,\n\
710 class categories, methods and protocols. Tags for variables and\n\
711 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
712 (Use --help --lang=c --lang=objc --lang=java for full help.)";
714 static const char *Pascal_suffixes [] =
715 { "p", "pas", NULL };
716 static const char Pascal_help [] =
717 "In Pascal code, the tags are the functions and procedures defined\n\
718 in the file.";
719 /* " // this is for working around an Emacs highlighting bug... */
721 static const char *Perl_suffixes [] =
722 { "pl", "pm", NULL };
723 static const char *Perl_interpreters [] =
724 { "perl", "@PERL@", NULL };
725 static const char Perl_help [] =
726 "In Perl code, the tags are the packages, subroutines and variables\n\
727 defined by the `package', `sub', `my' and `local' keywords. Use\n\
728 `--globals' if you want to tag global variables. Tags for\n\
729 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
730 defined in the default package is `main::SUB'.";
732 static const char *PHP_suffixes [] =
733 { "php", "php3", "php4", NULL };
734 static const char PHP_help [] =
735 "In PHP code, tags are functions, classes and defines. Unless you use\n\
736 the `--no-members' option, vars are tags too.";
738 static const char *plain_C_suffixes [] =
739 { "pc", /* Pro*C file */
740 NULL };
742 static const char *PS_suffixes [] =
743 { "ps", "psw", NULL }; /* .psw is for PSWrap */
744 static const char PS_help [] =
745 "In PostScript code, the tags are the functions.";
747 static const char *Prolog_suffixes [] =
748 { "prolog", NULL };
749 static const char Prolog_help [] =
750 "In Prolog code, tags are predicates and rules at the beginning of\n\
751 line.";
753 static const char *Python_suffixes [] =
754 { "py", NULL };
755 static const char Python_help [] =
756 "In Python code, `def' or `class' at the beginning of a line\n\
757 generate a tag.";
759 /* Can't do the `SCM' or `scm' prefix with a version number. */
760 static const char *Scheme_suffixes [] =
761 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
762 static const char Scheme_help [] =
763 "In Scheme code, tags include anything defined with `def' or with a\n\
764 construct whose name starts with `def'. They also include\n\
765 variables set with `set!' at top level in the file.";
767 static const char *TeX_suffixes [] =
768 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
769 static const char TeX_help [] =
770 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
771 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
772 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
773 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
774 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
776 Other commands can be specified by setting the environment variable\n\
777 `TEXTAGS' to a colon-separated list like, for example,\n\
778 TEXTAGS=\"mycommand:myothercommand\".";
781 static const char *Texinfo_suffixes [] =
782 { "texi", "texinfo", "txi", NULL };
783 static const char Texinfo_help [] =
784 "for texinfo files, lines starting with @node are tagged.";
786 static const char *Yacc_suffixes [] =
787 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
788 static const char Yacc_help [] =
789 "In Bison or Yacc input files, each rule defines as a tag the\n\
790 nonterminal it constructs. The portions of the file that contain\n\
791 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
792 for full help).";
794 static const char auto_help [] =
795 "`auto' is not a real language, it indicates to use\n\
796 a default language for files base on file name suffix and file contents.";
798 static const char none_help [] =
799 "`none' is not a real language, it indicates to only do\n\
800 regexp processing on files.";
802 static const char no_lang_help [] =
803 "No detailed help available for this language.";
807 * Table of languages.
809 * It is ok for a given function to be listed under more than one
810 * name. I just didn't.
813 static language lang_names [] =
815 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
816 { "asm", Asm_help, Asm_labels, Asm_suffixes },
817 { "c", default_C_help, default_C_entries, default_C_suffixes },
818 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
819 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
820 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
821 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
822 { "forth", Forth_help, Forth_words, Forth_suffixes },
823 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
824 { "html", HTML_help, HTML_labels, HTML_suffixes },
825 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
826 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
827 { "lua", Lua_help, Lua_functions, Lua_suffixes },
828 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
829 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
830 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
831 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
832 { "php", PHP_help, PHP_functions, PHP_suffixes },
833 { "postscript",PS_help, PS_functions, PS_suffixes },
834 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
835 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
836 { "python", Python_help, Python_functions, Python_suffixes },
837 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
838 { "tex", TeX_help, TeX_commands, TeX_suffixes },
839 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
840 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
841 { "auto", auto_help }, /* default guessing scheme */
842 { "none", none_help, just_read_file }, /* regexp matching only */
843 { NULL } /* end of list */
847 static void
848 print_language_names (void)
850 language *lang;
851 const char **name, **ext;
853 puts ("\nThese are the currently supported languages, along with the\n\
854 default file names and dot suffixes:");
855 for (lang = lang_names; lang->name != NULL; lang++)
857 printf (" %-*s", 10, lang->name);
858 if (lang->filenames != NULL)
859 for (name = lang->filenames; *name != NULL; name++)
860 printf (" %s", *name);
861 if (lang->suffixes != NULL)
862 for (ext = lang->suffixes; *ext != NULL; ext++)
863 printf (" .%s", *ext);
864 puts ("");
866 puts ("where `auto' means use default language for files based on file\n\
867 name suffix, and `none' means only do regexp processing on files.\n\
868 If no language is specified and no matching suffix is found,\n\
869 the first line of the file is read for a sharp-bang (#!) sequence\n\
870 followed by the name of an interpreter. If no such sequence is found,\n\
871 Fortran is tried first; if no tags are found, C is tried next.\n\
872 When parsing any C file, a \"class\" or \"template\" keyword\n\
873 switches to C++.");
874 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
876 For detailed help on a given language use, for example,\n\
877 etags --help --lang=ada.");
880 #ifndef EMACS_NAME
881 # define EMACS_NAME "standalone"
882 #endif
883 #ifndef VERSION
884 # define VERSION "17.38.1.4"
885 #endif
886 static void
887 print_version (void)
889 /* Makes it easier to update automatically. */
890 char emacs_copyright[] = "Copyright (C) 2011 Free Software Foundation, Inc.";
892 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
893 puts (emacs_copyright);
894 puts ("This program is distributed under the terms in ETAGS.README");
896 exit (EXIT_SUCCESS);
899 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
900 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
901 #endif
903 static void
904 print_help (argument *argbuffer)
906 bool help_for_lang = FALSE;
908 for (; argbuffer->arg_type != at_end; argbuffer++)
909 if (argbuffer->arg_type == at_language)
911 if (help_for_lang)
912 puts ("");
913 puts (argbuffer->lang->help);
914 help_for_lang = TRUE;
917 if (help_for_lang)
918 exit (EXIT_SUCCESS);
920 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
922 These are the options accepted by %s.\n", progname, progname);
923 if (NO_LONG_OPTIONS)
924 puts ("WARNING: long option names do not work with this executable,\n\
925 as it is not linked with GNU getopt.");
926 else
927 puts ("You may use unambiguous abbreviations for the long option names.");
928 puts (" A - as file name means read names from stdin (one per line).\n\
929 Absolute names are stored in the output file as they are.\n\
930 Relative ones are stored relative to the output file's directory.\n");
932 puts ("-a, --append\n\
933 Append tag entries to existing tags file.");
935 puts ("--packages-only\n\
936 For Ada files, only generate tags for packages.");
938 if (CTAGS)
939 puts ("-B, --backward-search\n\
940 Write the search commands for the tag entries using '?', the\n\
941 backward-search command instead of '/', the forward-search command.");
943 /* This option is mostly obsolete, because etags can now automatically
944 detect C++. Retained for backward compatibility and for debugging and
945 experimentation. In principle, we could want to tag as C++ even
946 before any "class" or "template" keyword.
947 puts ("-C, --c++\n\
948 Treat files whose name suffix defaults to C language as C++ files.");
951 puts ("--declarations\n\
952 In C and derived languages, create tags for function declarations,");
953 if (CTAGS)
954 puts ("\tand create tags for extern variables if --globals is used.");
955 else
956 puts
957 ("\tand create tags for extern variables unless --no-globals is used.");
959 if (CTAGS)
960 puts ("-d, --defines\n\
961 Create tag entries for C #define constants and enum constants, too.");
962 else
963 puts ("-D, --no-defines\n\
964 Don't create tag entries for C #define constants and enum constants.\n\
965 This makes the tags file smaller.");
967 if (!CTAGS)
968 puts ("-i FILE, --include=FILE\n\
969 Include a note in tag file indicating that, when searching for\n\
970 a tag, one should also consult the tags file FILE after\n\
971 checking the current file.");
973 puts ("-l LANG, --language=LANG\n\
974 Force the following files to be considered as written in the\n\
975 named language up to the next --language=LANG option.");
977 if (CTAGS)
978 puts ("--globals\n\
979 Create tag entries for global variables in some languages.");
980 else
981 puts ("--no-globals\n\
982 Do not create tag entries for global variables in some\n\
983 languages. This makes the tags file smaller.");
985 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
986 puts ("--no-line-directive\n\
987 Ignore #line preprocessor directives in C and derived languages.");
989 if (CTAGS)
990 puts ("--members\n\
991 Create tag entries for members of structures in some languages.");
992 else
993 puts ("--no-members\n\
994 Do not create tag entries for members of structures\n\
995 in some languages.");
997 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
998 Make a tag for each line matching a regular expression pattern\n\
999 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1000 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1001 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1002 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1003 puts (" If TAGNAME/ is present, the tags created are named.\n\
1004 For example Tcl named tags can be created with:\n\
1005 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1006 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1007 `m' means to allow multi-line matches, `s' implies `m' and\n\
1008 causes dot to match any character, including newline.");
1010 puts ("-R, --no-regex\n\
1011 Don't create tags from regexps for the following files.");
1013 puts ("-I, --ignore-indentation\n\
1014 In C and C++ do not assume that a closing brace in the first\n\
1015 column is the final brace of a function or structure definition.");
1017 puts ("-o FILE, --output=FILE\n\
1018 Write the tags to FILE.");
1020 puts ("--parse-stdin=NAME\n\
1021 Read from standard input and record tags as belonging to file NAME.");
1023 if (CTAGS)
1025 puts ("-t, --typedefs\n\
1026 Generate tag entries for C and Ada typedefs.");
1027 puts ("-T, --typedefs-and-c++\n\
1028 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1029 and C++ member functions.");
1032 if (CTAGS)
1033 puts ("-u, --update\n\
1034 Update the tag entries for the given files, leaving tag\n\
1035 entries for other files in place. Currently, this is\n\
1036 implemented by deleting the existing entries for the given\n\
1037 files and then rewriting the new entries at the end of the\n\
1038 tags file. It is often faster to simply rebuild the entire\n\
1039 tag file than to use this.");
1041 if (CTAGS)
1043 puts ("-v, --vgrind\n\
1044 Print on the standard output an index of items intended for\n\
1045 human consumption, similar to the output of vgrind. The index\n\
1046 is sorted, and gives the page number of each item.");
1048 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1049 puts ("-w, --no-duplicates\n\
1050 Do not create duplicate tag entries, for compatibility with\n\
1051 traditional ctags.");
1053 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1054 puts ("-w, --no-warn\n\
1055 Suppress warning messages about duplicate tag entries.");
1057 puts ("-x, --cxref\n\
1058 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1059 The output uses line numbers instead of page numbers, but\n\
1060 beyond that the differences are cosmetic; try both to see\n\
1061 which you like.");
1064 puts ("-V, --version\n\
1065 Print the version of the program.\n\
1066 -h, --help\n\
1067 Print this help message.\n\
1068 Followed by one or more `--language' options prints detailed\n\
1069 help about tag generation for the specified languages.");
1071 print_language_names ();
1073 puts ("");
1074 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1076 exit (EXIT_SUCCESS);
1081 main (int argc, char **argv)
1083 int i;
1084 unsigned int nincluded_files;
1085 char **included_files;
1086 argument *argbuffer;
1087 int current_arg, file_count;
1088 linebuffer filename_lb;
1089 bool help_asked = FALSE;
1090 char *optstring;
1091 int opt;
1094 #ifdef DOS_NT
1095 _fmode = O_BINARY; /* all of files are treated as binary files */
1096 #endif /* DOS_NT */
1098 progname = argv[0];
1099 nincluded_files = 0;
1100 included_files = xnew (argc, char *);
1101 current_arg = 0;
1102 file_count = 0;
1104 /* Allocate enough no matter what happens. Overkill, but each one
1105 is small. */
1106 argbuffer = xnew (argc, argument);
1109 * Always find typedefs and structure tags.
1110 * Also default to find macro constants, enum constants, struct
1111 * members and global variables. Do it for both etags and ctags.
1113 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1114 globals = members = TRUE;
1116 /* When the optstring begins with a '-' getopt_long does not rearrange the
1117 non-options arguments to be at the end, but leaves them alone. */
1118 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1119 "ac:Cf:Il:o:r:RSVhH",
1120 (CTAGS) ? "BxdtTuvw" : "Di:");
1122 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1123 switch (opt)
1125 case 0:
1126 /* If getopt returns 0, then it has already processed a
1127 long-named option. We should do nothing. */
1128 break;
1130 case 1:
1131 /* This means that a file name has been seen. Record it. */
1132 argbuffer[current_arg].arg_type = at_filename;
1133 argbuffer[current_arg].what = optarg;
1134 ++current_arg;
1135 ++file_count;
1136 break;
1138 case STDIN:
1139 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1140 argbuffer[current_arg].arg_type = at_stdin;
1141 argbuffer[current_arg].what = optarg;
1142 ++current_arg;
1143 ++file_count;
1144 if (parsing_stdin)
1145 fatal ("cannot parse standard input more than once", (char *)NULL);
1146 parsing_stdin = TRUE;
1147 break;
1149 /* Common options. */
1150 case 'a': append_to_tagfile = TRUE; break;
1151 case 'C': cplusplus = TRUE; break;
1152 case 'f': /* for compatibility with old makefiles */
1153 case 'o':
1154 if (tagfile)
1156 error ("-o option may only be given once.", (char *)NULL);
1157 suggest_asking_for_help ();
1158 /* NOTREACHED */
1160 tagfile = optarg;
1161 break;
1162 case 'I':
1163 case 'S': /* for backward compatibility */
1164 ignoreindent = TRUE;
1165 break;
1166 case 'l':
1168 language *lang = get_language_from_langname (optarg);
1169 if (lang != NULL)
1171 argbuffer[current_arg].lang = lang;
1172 argbuffer[current_arg].arg_type = at_language;
1173 ++current_arg;
1176 break;
1177 case 'c':
1178 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1179 optarg = concat (optarg, "i", ""); /* memory leak here */
1180 /* FALLTHRU */
1181 case 'r':
1182 argbuffer[current_arg].arg_type = at_regexp;
1183 argbuffer[current_arg].what = optarg;
1184 ++current_arg;
1185 break;
1186 case 'R':
1187 argbuffer[current_arg].arg_type = at_regexp;
1188 argbuffer[current_arg].what = NULL;
1189 ++current_arg;
1190 break;
1191 case 'V':
1192 print_version ();
1193 break;
1194 case 'h':
1195 case 'H':
1196 help_asked = TRUE;
1197 break;
1199 /* Etags options */
1200 case 'D': constantypedefs = FALSE; break;
1201 case 'i': included_files[nincluded_files++] = optarg; break;
1203 /* Ctags options. */
1204 case 'B': searchar = '?'; break;
1205 case 'd': constantypedefs = TRUE; break;
1206 case 't': typedefs = TRUE; break;
1207 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1208 case 'u': update = TRUE; break;
1209 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1210 case 'x': cxref_style = TRUE; break;
1211 case 'w': no_warnings = TRUE; break;
1212 default:
1213 suggest_asking_for_help ();
1214 /* NOTREACHED */
1217 /* No more options. Store the rest of arguments. */
1218 for (; optind < argc; optind++)
1220 argbuffer[current_arg].arg_type = at_filename;
1221 argbuffer[current_arg].what = argv[optind];
1222 ++current_arg;
1223 ++file_count;
1226 argbuffer[current_arg].arg_type = at_end;
1228 if (help_asked)
1229 print_help (argbuffer);
1230 /* NOTREACHED */
1232 if (nincluded_files == 0 && file_count == 0)
1234 error ("no input files specified.", (char *)NULL);
1235 suggest_asking_for_help ();
1236 /* NOTREACHED */
1239 if (tagfile == NULL)
1240 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1241 cwd = etags_getcwd (); /* the current working directory */
1242 if (cwd[strlen (cwd) - 1] != '/')
1244 char *oldcwd = cwd;
1245 cwd = concat (oldcwd, "/", "");
1246 free (oldcwd);
1249 /* Compute base directory for relative file names. */
1250 if (streq (tagfile, "-")
1251 || strneq (tagfile, "/dev/", 5))
1252 tagfiledir = cwd; /* relative file names are relative to cwd */
1253 else
1255 canonicalize_filename (tagfile);
1256 tagfiledir = absolute_dirname (tagfile, cwd);
1259 init (); /* set up boolean "functions" */
1261 linebuffer_init (&lb);
1262 linebuffer_init (&filename_lb);
1263 linebuffer_init (&filebuf);
1264 linebuffer_init (&token_name);
1266 if (!CTAGS)
1268 if (streq (tagfile, "-"))
1270 tagf = stdout;
1271 #ifdef DOS_NT
1272 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1273 doesn't take effect until after `stdout' is already open). */
1274 if (!isatty (fileno (stdout)))
1275 setmode (fileno (stdout), O_BINARY);
1276 #endif /* DOS_NT */
1278 else
1279 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1280 if (tagf == NULL)
1281 pfatal (tagfile);
1285 * Loop through files finding functions.
1287 for (i = 0; i < current_arg; i++)
1289 static language *lang; /* non-NULL if language is forced */
1290 char *this_file;
1292 switch (argbuffer[i].arg_type)
1294 case at_language:
1295 lang = argbuffer[i].lang;
1296 break;
1297 case at_regexp:
1298 analyse_regex (argbuffer[i].what);
1299 break;
1300 case at_filename:
1301 this_file = argbuffer[i].what;
1302 /* Input file named "-" means read file names from stdin
1303 (one per line) and use them. */
1304 if (streq (this_file, "-"))
1306 if (parsing_stdin)
1307 fatal ("cannot parse standard input AND read file names from it",
1308 (char *)NULL);
1309 while (readline_internal (&filename_lb, stdin) > 0)
1310 process_file_name (filename_lb.buffer, lang);
1312 else
1313 process_file_name (this_file, lang);
1314 break;
1315 case at_stdin:
1316 this_file = argbuffer[i].what;
1317 process_file (stdin, this_file, lang);
1318 break;
1322 free_regexps ();
1323 free (lb.buffer);
1324 free (filebuf.buffer);
1325 free (token_name.buffer);
1327 if (!CTAGS || cxref_style)
1329 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1330 put_entries (nodehead);
1331 free_tree (nodehead);
1332 nodehead = NULL;
1333 if (!CTAGS)
1335 fdesc *fdp;
1337 /* Output file entries that have no tags. */
1338 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1339 if (!fdp->written)
1340 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1342 while (nincluded_files-- > 0)
1343 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1345 if (fclose (tagf) == EOF)
1346 pfatal (tagfile);
1349 exit (EXIT_SUCCESS);
1352 /* From here on, we are in (CTAGS && !cxref_style) */
1353 if (update)
1355 char cmd[BUFSIZ];
1356 for (i = 0; i < current_arg; ++i)
1358 switch (argbuffer[i].arg_type)
1360 case at_filename:
1361 case at_stdin:
1362 break;
1363 default:
1364 continue; /* the for loop */
1366 sprintf (cmd,
1367 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1368 tagfile, argbuffer[i].what, tagfile);
1369 if (system (cmd) != EXIT_SUCCESS)
1370 fatal ("failed to execute shell command", (char *)NULL);
1372 append_to_tagfile = TRUE;
1375 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1376 if (tagf == NULL)
1377 pfatal (tagfile);
1378 put_entries (nodehead); /* write all the tags (CTAGS) */
1379 free_tree (nodehead);
1380 nodehead = NULL;
1381 if (fclose (tagf) == EOF)
1382 pfatal (tagfile);
1384 if (CTAGS)
1385 if (append_to_tagfile || update)
1387 char cmd[2*BUFSIZ+20];
1388 /* Maybe these should be used:
1389 setenv ("LC_COLLATE", "C", 1);
1390 setenv ("LC_ALL", "C", 1); */
1391 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1392 exit (system (cmd));
1394 return EXIT_SUCCESS;
1399 * Return a compressor given the file name. If EXTPTR is non-zero,
1400 * return a pointer into FILE where the compressor-specific
1401 * extension begins. If no compressor is found, NULL is returned
1402 * and EXTPTR is not significant.
1403 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1405 static compressor *
1406 get_compressor_from_suffix (char *file, char **extptr)
1408 compressor *compr;
1409 char *slash, *suffix;
1411 /* File has been processed by canonicalize_filename,
1412 so we don't need to consider backslashes on DOS_NT. */
1413 slash = etags_strrchr (file, '/');
1414 suffix = etags_strrchr (file, '.');
1415 if (suffix == NULL || suffix < slash)
1416 return NULL;
1417 if (extptr != NULL)
1418 *extptr = suffix;
1419 suffix += 1;
1420 /* Let those poor souls who live with DOS 8+3 file name limits get
1421 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1422 Only the first do loop is run if not MSDOS */
1425 for (compr = compressors; compr->suffix != NULL; compr++)
1426 if (streq (compr->suffix, suffix))
1427 return compr;
1428 if (!MSDOS)
1429 break; /* do it only once: not really a loop */
1430 if (extptr != NULL)
1431 *extptr = ++suffix;
1432 } while (*suffix != '\0');
1433 return NULL;
1439 * Return a language given the name.
1441 static language *
1442 get_language_from_langname (const char *name)
1444 language *lang;
1446 if (name == NULL)
1447 error ("empty language name", (char *)NULL);
1448 else
1450 for (lang = lang_names; lang->name != NULL; lang++)
1451 if (streq (name, lang->name))
1452 return lang;
1453 error ("unknown language \"%s\"", name);
1456 return NULL;
1461 * Return a language given the interpreter name.
1463 static language *
1464 get_language_from_interpreter (char *interpreter)
1466 language *lang;
1467 const char **iname;
1469 if (interpreter == NULL)
1470 return NULL;
1471 for (lang = lang_names; lang->name != NULL; lang++)
1472 if (lang->interpreters != NULL)
1473 for (iname = lang->interpreters; *iname != NULL; iname++)
1474 if (streq (*iname, interpreter))
1475 return lang;
1477 return NULL;
1483 * Return a language given the file name.
1485 static language *
1486 get_language_from_filename (char *file, int case_sensitive)
1488 language *lang;
1489 const char **name, **ext, *suffix;
1491 /* Try whole file name first. */
1492 for (lang = lang_names; lang->name != NULL; lang++)
1493 if (lang->filenames != NULL)
1494 for (name = lang->filenames; *name != NULL; name++)
1495 if ((case_sensitive)
1496 ? streq (*name, file)
1497 : strcaseeq (*name, file))
1498 return lang;
1500 /* If not found, try suffix after last dot. */
1501 suffix = etags_strrchr (file, '.');
1502 if (suffix == NULL)
1503 return NULL;
1504 suffix += 1;
1505 for (lang = lang_names; lang->name != NULL; lang++)
1506 if (lang->suffixes != NULL)
1507 for (ext = lang->suffixes; *ext != NULL; ext++)
1508 if ((case_sensitive)
1509 ? streq (*ext, suffix)
1510 : strcaseeq (*ext, suffix))
1511 return lang;
1512 return NULL;
1517 * This routine is called on each file argument.
1519 static void
1520 process_file_name (char *file, language *lang)
1522 struct stat stat_buf;
1523 FILE *inf;
1524 fdesc *fdp;
1525 compressor *compr;
1526 char *compressed_name, *uncompressed_name;
1527 char *ext, *real_name;
1528 int retval;
1530 canonicalize_filename (file);
1531 if (streq (file, tagfile) && !streq (tagfile, "-"))
1533 error ("skipping inclusion of %s in self.", file);
1534 return;
1536 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1538 compressed_name = NULL;
1539 real_name = uncompressed_name = savestr (file);
1541 else
1543 real_name = compressed_name = savestr (file);
1544 uncompressed_name = savenstr (file, ext - file);
1547 /* If the canonicalized uncompressed name
1548 has already been dealt with, skip it silently. */
1549 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1551 assert (fdp->infname != NULL);
1552 if (streq (uncompressed_name, fdp->infname))
1553 goto cleanup;
1556 if (stat (real_name, &stat_buf) != 0)
1558 /* Reset real_name and try with a different name. */
1559 real_name = NULL;
1560 if (compressed_name != NULL) /* try with the given suffix */
1562 if (stat (uncompressed_name, &stat_buf) == 0)
1563 real_name = uncompressed_name;
1565 else /* try all possible suffixes */
1567 for (compr = compressors; compr->suffix != NULL; compr++)
1569 compressed_name = concat (file, ".", compr->suffix);
1570 if (stat (compressed_name, &stat_buf) != 0)
1572 if (MSDOS)
1574 char *suf = compressed_name + strlen (file);
1575 size_t suflen = strlen (compr->suffix) + 1;
1576 for ( ; suf[1]; suf++, suflen--)
1578 memmove (suf, suf + 1, suflen);
1579 if (stat (compressed_name, &stat_buf) == 0)
1581 real_name = compressed_name;
1582 break;
1585 if (real_name != NULL)
1586 break;
1587 } /* MSDOS */
1588 free (compressed_name);
1589 compressed_name = NULL;
1591 else
1593 real_name = compressed_name;
1594 break;
1598 if (real_name == NULL)
1600 perror (file);
1601 goto cleanup;
1603 } /* try with a different name */
1605 if (!S_ISREG (stat_buf.st_mode))
1607 error ("skipping %s: it is not a regular file.", real_name);
1608 goto cleanup;
1610 if (real_name == compressed_name)
1612 char *cmd = concat (compr->command, " ", real_name);
1613 inf = (FILE *) popen (cmd, "r");
1614 free (cmd);
1616 else
1617 inf = fopen (real_name, "r");
1618 if (inf == NULL)
1620 perror (real_name);
1621 goto cleanup;
1624 process_file (inf, uncompressed_name, lang);
1626 if (real_name == compressed_name)
1627 retval = pclose (inf);
1628 else
1629 retval = fclose (inf);
1630 if (retval < 0)
1631 pfatal (file);
1633 cleanup:
1634 free (compressed_name);
1635 free (uncompressed_name);
1636 last_node = NULL;
1637 curfdp = NULL;
1638 return;
1641 static void
1642 process_file (FILE *fh, char *fn, language *lang)
1644 static const fdesc emptyfdesc;
1645 fdesc *fdp;
1647 /* Create a new input file description entry. */
1648 fdp = xnew (1, fdesc);
1649 *fdp = emptyfdesc;
1650 fdp->next = fdhead;
1651 fdp->infname = savestr (fn);
1652 fdp->lang = lang;
1653 fdp->infabsname = absolute_filename (fn, cwd);
1654 fdp->infabsdir = absolute_dirname (fn, cwd);
1655 if (filename_is_absolute (fn))
1657 /* An absolute file name. Canonicalize it. */
1658 fdp->taggedfname = absolute_filename (fn, NULL);
1660 else
1662 /* A file name relative to cwd. Make it relative
1663 to the directory of the tags file. */
1664 fdp->taggedfname = relative_filename (fn, tagfiledir);
1666 fdp->usecharno = TRUE; /* use char position when making tags */
1667 fdp->prop = NULL;
1668 fdp->written = FALSE; /* not written on tags file yet */
1670 fdhead = fdp;
1671 curfdp = fdhead; /* the current file description */
1673 find_entries (fh);
1675 /* If not Ctags, and if this is not metasource and if it contained no #line
1676 directives, we can write the tags and free all nodes pointing to
1677 curfdp. */
1678 if (!CTAGS
1679 && curfdp->usecharno /* no #line directives in this file */
1680 && !curfdp->lang->metasource)
1682 node *np, *prev;
1684 /* Look for the head of the sublist relative to this file. See add_node
1685 for the structure of the node tree. */
1686 prev = NULL;
1687 for (np = nodehead; np != NULL; prev = np, np = np->left)
1688 if (np->fdp == curfdp)
1689 break;
1691 /* If we generated tags for this file, write and delete them. */
1692 if (np != NULL)
1694 /* This is the head of the last sublist, if any. The following
1695 instructions depend on this being true. */
1696 assert (np->left == NULL);
1698 assert (fdhead == curfdp);
1699 assert (last_node->fdp == curfdp);
1700 put_entries (np); /* write tags for file curfdp->taggedfname */
1701 free_tree (np); /* remove the written nodes */
1702 if (prev == NULL)
1703 nodehead = NULL; /* no nodes left */
1704 else
1705 prev->left = NULL; /* delete the pointer to the sublist */
1711 * This routine sets up the boolean pseudo-functions which work
1712 * by setting boolean flags dependent upon the corresponding character.
1713 * Every char which is NOT in that string is not a white char. Therefore,
1714 * all of the array "_wht" is set to FALSE, and then the elements
1715 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1716 * of a char is TRUE if it is the string "white", else FALSE.
1718 static void
1719 init (void)
1721 register const char *sp;
1722 register int i;
1724 for (i = 0; i < CHARS; i++)
1725 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1726 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1727 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1728 notinname('\0') = notinname('\n');
1729 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1730 begtoken('\0') = begtoken('\n');
1731 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1732 intoken('\0') = intoken('\n');
1733 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1734 endtoken('\0') = endtoken('\n');
1738 * This routine opens the specified file and calls the function
1739 * which finds the function and type definitions.
1741 static void
1742 find_entries (FILE *inf)
1744 char *cp;
1745 language *lang = curfdp->lang;
1746 Lang_function *parser = NULL;
1748 /* If user specified a language, use it. */
1749 if (lang != NULL && lang->function != NULL)
1751 parser = lang->function;
1754 /* Else try to guess the language given the file name. */
1755 if (parser == NULL)
1757 lang = get_language_from_filename (curfdp->infname, TRUE);
1758 if (lang != NULL && lang->function != NULL)
1760 curfdp->lang = lang;
1761 parser = lang->function;
1765 /* Else look for sharp-bang as the first two characters. */
1766 if (parser == NULL
1767 && readline_internal (&lb, inf) > 0
1768 && lb.len >= 2
1769 && lb.buffer[0] == '#'
1770 && lb.buffer[1] == '!')
1772 char *lp;
1774 /* Set lp to point at the first char after the last slash in the
1775 line or, if no slashes, at the first nonblank. Then set cp to
1776 the first successive blank and terminate the string. */
1777 lp = etags_strrchr (lb.buffer+2, '/');
1778 if (lp != NULL)
1779 lp += 1;
1780 else
1781 lp = skip_spaces (lb.buffer + 2);
1782 cp = skip_non_spaces (lp);
1783 *cp = '\0';
1785 if (strlen (lp) > 0)
1787 lang = get_language_from_interpreter (lp);
1788 if (lang != NULL && lang->function != NULL)
1790 curfdp->lang = lang;
1791 parser = lang->function;
1796 /* We rewind here, even if inf may be a pipe. We fail if the
1797 length of the first line is longer than the pipe block size,
1798 which is unlikely. */
1799 rewind (inf);
1801 /* Else try to guess the language given the case insensitive file name. */
1802 if (parser == NULL)
1804 lang = get_language_from_filename (curfdp->infname, FALSE);
1805 if (lang != NULL && lang->function != NULL)
1807 curfdp->lang = lang;
1808 parser = lang->function;
1812 /* Else try Fortran or C. */
1813 if (parser == NULL)
1815 node *old_last_node = last_node;
1817 curfdp->lang = get_language_from_langname ("fortran");
1818 find_entries (inf);
1820 if (old_last_node == last_node)
1821 /* No Fortran entries found. Try C. */
1823 /* We do not tag if rewind fails.
1824 Only the file name will be recorded in the tags file. */
1825 rewind (inf);
1826 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1827 find_entries (inf);
1829 return;
1832 if (!no_line_directive
1833 && curfdp->lang != NULL && curfdp->lang->metasource)
1834 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1835 file, or anyway we parsed a file that is automatically generated from
1836 this one. If this is the case, the bingo.c file contained #line
1837 directives that generated tags pointing to this file. Let's delete
1838 them all before parsing this file, which is the real source. */
1840 fdesc **fdpp = &fdhead;
1841 while (*fdpp != NULL)
1842 if (*fdpp != curfdp
1843 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1844 /* We found one of those! We must delete both the file description
1845 and all tags referring to it. */
1847 fdesc *badfdp = *fdpp;
1849 /* Delete the tags referring to badfdp->taggedfname
1850 that were obtained from badfdp->infname. */
1851 invalidate_nodes (badfdp, &nodehead);
1853 *fdpp = badfdp->next; /* remove the bad description from the list */
1854 free_fdesc (badfdp);
1856 else
1857 fdpp = &(*fdpp)->next; /* advance the list pointer */
1860 assert (parser != NULL);
1862 /* Generic initialisations before reading from file. */
1863 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1865 /* Generic initialisations before parsing file with readline. */
1866 lineno = 0; /* reset global line number */
1867 charno = 0; /* reset global char number */
1868 linecharno = 0; /* reset global char number of line start */
1870 parser (inf);
1872 regex_tag_multiline ();
1877 * Check whether an implicitly named tag should be created,
1878 * then call `pfnote'.
1879 * NAME is a string that is internally copied by this function.
1881 * TAGS format specification
1882 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1883 * The following is explained in some more detail in etc/ETAGS.EBNF.
1885 * make_tag creates tags with "implicit tag names" (unnamed tags)
1886 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1887 * 1. NAME does not contain any of the characters in NONAM;
1888 * 2. LINESTART contains name as either a rightmost, or rightmost but
1889 * one character, substring;
1890 * 3. the character, if any, immediately before NAME in LINESTART must
1891 * be a character in NONAM;
1892 * 4. the character, if any, immediately after NAME in LINESTART must
1893 * also be a character in NONAM.
1895 * The implementation uses the notinname() macro, which recognises the
1896 * characters stored in the string `nonam'.
1897 * etags.el needs to use the same characters that are in NONAM.
1899 static void
1900 make_tag (const char *name, /* tag name, or NULL if unnamed */
1901 int namelen, /* tag length */
1902 int is_func, /* tag is a function */
1903 char *linestart, /* start of the line where tag is */
1904 int linelen, /* length of the line where tag is */
1905 int lno, /* line number */
1906 long int cno) /* character number */
1908 bool named = (name != NULL && namelen > 0);
1909 char *nname = NULL;
1911 if (!CTAGS && named) /* maybe set named to false */
1912 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1913 such that etags.el can guess a name from it. */
1915 int i;
1916 register const char *cp = name;
1918 for (i = 0; i < namelen; i++)
1919 if (notinname (*cp++))
1920 break;
1921 if (i == namelen) /* rule #1 */
1923 cp = linestart + linelen - namelen;
1924 if (notinname (linestart[linelen-1]))
1925 cp -= 1; /* rule #4 */
1926 if (cp >= linestart /* rule #2 */
1927 && (cp == linestart
1928 || notinname (cp[-1])) /* rule #3 */
1929 && strneq (name, cp, namelen)) /* rule #2 */
1930 named = FALSE; /* use implicit tag name */
1934 if (named)
1935 nname = savenstr (name, namelen);
1937 pfnote (nname, is_func, linestart, linelen, lno, cno);
1940 /* Record a tag. */
1941 static void
1942 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1943 /* tag name, or NULL if unnamed */
1944 /* tag is a function */
1945 /* start of the line where tag is */
1946 /* length of the line where tag is */
1947 /* line number */
1948 /* character number */
1950 register node *np;
1952 assert (name == NULL || name[0] != '\0');
1953 if (CTAGS && name == NULL)
1954 return;
1956 np = xnew (1, node);
1958 /* If ctags mode, change name "main" to M<thisfilename>. */
1959 if (CTAGS && !cxref_style && streq (name, "main"))
1961 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1962 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1963 fp = etags_strrchr (np->name, '.');
1964 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1965 fp[0] = '\0';
1967 else
1968 np->name = name;
1969 np->valid = TRUE;
1970 np->been_warned = FALSE;
1971 np->fdp = curfdp;
1972 np->is_func = is_func;
1973 np->lno = lno;
1974 if (np->fdp->usecharno)
1975 /* Our char numbers are 0-base, because of C language tradition?
1976 ctags compatibility? old versions compatibility? I don't know.
1977 Anyway, since emacs's are 1-base we expect etags.el to take care
1978 of the difference. If we wanted to have 1-based numbers, we would
1979 uncomment the +1 below. */
1980 np->cno = cno /* + 1 */ ;
1981 else
1982 np->cno = invalidcharno;
1983 np->left = np->right = NULL;
1984 if (CTAGS && !cxref_style)
1986 if (strlen (linestart) < 50)
1987 np->regex = concat (linestart, "$", "");
1988 else
1989 np->regex = savenstr (linestart, 50);
1991 else
1992 np->regex = savenstr (linestart, linelen);
1994 add_node (np, &nodehead);
1998 * free_tree ()
1999 * recurse on left children, iterate on right children.
2001 static void
2002 free_tree (register node *np)
2004 while (np)
2006 register node *node_right = np->right;
2007 free_tree (np->left);
2008 free (np->name);
2009 free (np->regex);
2010 free (np);
2011 np = node_right;
2016 * free_fdesc ()
2017 * delete a file description
2019 static void
2020 free_fdesc (register fdesc *fdp)
2022 free (fdp->infname);
2023 free (fdp->infabsname);
2024 free (fdp->infabsdir);
2025 free (fdp->taggedfname);
2026 free (fdp->prop);
2027 free (fdp);
2031 * add_node ()
2032 * Adds a node to the tree of nodes. In etags mode, sort by file
2033 * name. In ctags mode, sort by tag name. Make no attempt at
2034 * balancing.
2036 * add_node is the only function allowed to add nodes, so it can
2037 * maintain state.
2039 static void
2040 add_node (node *np, node **cur_node_p)
2042 register int dif;
2043 register node *cur_node = *cur_node_p;
2045 if (cur_node == NULL)
2047 *cur_node_p = np;
2048 last_node = np;
2049 return;
2052 if (!CTAGS)
2053 /* Etags Mode */
2055 /* For each file name, tags are in a linked sublist on the right
2056 pointer. The first tags of different files are a linked list
2057 on the left pointer. last_node points to the end of the last
2058 used sublist. */
2059 if (last_node != NULL && last_node->fdp == np->fdp)
2061 /* Let's use the same sublist as the last added node. */
2062 assert (last_node->right == NULL);
2063 last_node->right = np;
2064 last_node = np;
2066 else if (cur_node->fdp == np->fdp)
2068 /* Scanning the list we found the head of a sublist which is
2069 good for us. Let's scan this sublist. */
2070 add_node (np, &cur_node->right);
2072 else
2073 /* The head of this sublist is not good for us. Let's try the
2074 next one. */
2075 add_node (np, &cur_node->left);
2076 } /* if ETAGS mode */
2078 else
2080 /* Ctags Mode */
2081 dif = strcmp (np->name, cur_node->name);
2084 * If this tag name matches an existing one, then
2085 * do not add the node, but maybe print a warning.
2087 if (no_duplicates && !dif)
2089 if (np->fdp == cur_node->fdp)
2091 if (!no_warnings)
2093 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2094 np->fdp->infname, lineno, np->name);
2095 fprintf (stderr, "Second entry ignored\n");
2098 else if (!cur_node->been_warned && !no_warnings)
2100 fprintf
2101 (stderr,
2102 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2103 np->fdp->infname, cur_node->fdp->infname, np->name);
2104 cur_node->been_warned = TRUE;
2106 return;
2109 /* Actually add the node */
2110 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2111 } /* if CTAGS mode */
2115 * invalidate_nodes ()
2116 * Scan the node tree and invalidate all nodes pointing to the
2117 * given file description (CTAGS case) or free them (ETAGS case).
2119 static void
2120 invalidate_nodes (fdesc *badfdp, node **npp)
2122 node *np = *npp;
2124 if (np == NULL)
2125 return;
2127 if (CTAGS)
2129 if (np->left != NULL)
2130 invalidate_nodes (badfdp, &np->left);
2131 if (np->fdp == badfdp)
2132 np->valid = FALSE;
2133 if (np->right != NULL)
2134 invalidate_nodes (badfdp, &np->right);
2136 else
2138 assert (np->fdp != NULL);
2139 if (np->fdp == badfdp)
2141 *npp = np->left; /* detach the sublist from the list */
2142 np->left = NULL; /* isolate it */
2143 free_tree (np); /* free it */
2144 invalidate_nodes (badfdp, npp);
2146 else
2147 invalidate_nodes (badfdp, &np->left);
2152 static int total_size_of_entries (node *);
2153 static int number_len (long);
2155 /* Length of a non-negative number's decimal representation. */
2156 static int
2157 number_len (long int num)
2159 int len = 1;
2160 while ((num /= 10) > 0)
2161 len += 1;
2162 return len;
2166 * Return total number of characters that put_entries will output for
2167 * the nodes in the linked list at the right of the specified node.
2168 * This count is irrelevant with etags.el since emacs 19.34 at least,
2169 * but is still supplied for backward compatibility.
2171 static int
2172 total_size_of_entries (register node *np)
2174 register int total = 0;
2176 for (; np != NULL; np = np->right)
2177 if (np->valid)
2179 total += strlen (np->regex) + 1; /* pat\177 */
2180 if (np->name != NULL)
2181 total += strlen (np->name) + 1; /* name\001 */
2182 total += number_len ((long) np->lno) + 1; /* lno, */
2183 if (np->cno != invalidcharno) /* cno */
2184 total += number_len (np->cno);
2185 total += 1; /* newline */
2188 return total;
2191 static void
2192 put_entries (register node *np)
2194 register char *sp;
2195 static fdesc *fdp = NULL;
2197 if (np == NULL)
2198 return;
2200 /* Output subentries that precede this one */
2201 if (CTAGS)
2202 put_entries (np->left);
2204 /* Output this entry */
2205 if (np->valid)
2207 if (!CTAGS)
2209 /* Etags mode */
2210 if (fdp != np->fdp)
2212 fdp = np->fdp;
2213 fprintf (tagf, "\f\n%s,%d\n",
2214 fdp->taggedfname, total_size_of_entries (np));
2215 fdp->written = TRUE;
2217 fputs (np->regex, tagf);
2218 fputc ('\177', tagf);
2219 if (np->name != NULL)
2221 fputs (np->name, tagf);
2222 fputc ('\001', tagf);
2224 fprintf (tagf, "%d,", np->lno);
2225 if (np->cno != invalidcharno)
2226 fprintf (tagf, "%ld", np->cno);
2227 fputs ("\n", tagf);
2229 else
2231 /* Ctags mode */
2232 if (np->name == NULL)
2233 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2235 if (cxref_style)
2237 if (vgrind_style)
2238 fprintf (stdout, "%s %s %d\n",
2239 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2240 else
2241 fprintf (stdout, "%-16s %3d %-16s %s\n",
2242 np->name, np->lno, np->fdp->taggedfname, np->regex);
2244 else
2246 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2248 if (np->is_func)
2249 { /* function or #define macro with args */
2250 putc (searchar, tagf);
2251 putc ('^', tagf);
2253 for (sp = np->regex; *sp; sp++)
2255 if (*sp == '\\' || *sp == searchar)
2256 putc ('\\', tagf);
2257 putc (*sp, tagf);
2259 putc (searchar, tagf);
2261 else
2262 { /* anything else; text pattern inadequate */
2263 fprintf (tagf, "%d", np->lno);
2265 putc ('\n', tagf);
2268 } /* if this node contains a valid tag */
2270 /* Output subentries that follow this one */
2271 put_entries (np->right);
2272 if (!CTAGS)
2273 put_entries (np->left);
2277 /* C extensions. */
2278 #define C_EXT 0x00fff /* C extensions */
2279 #define C_PLAIN 0x00000 /* C */
2280 #define C_PLPL 0x00001 /* C++ */
2281 #define C_STAR 0x00003 /* C* */
2282 #define C_JAVA 0x00005 /* JAVA */
2283 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2284 #define YACC 0x10000 /* yacc file */
2287 * The C symbol tables.
2289 enum sym_type
2291 st_none,
2292 st_C_objprot, st_C_objimpl, st_C_objend,
2293 st_C_gnumacro,
2294 st_C_ignore, st_C_attribute,
2295 st_C_javastruct,
2296 st_C_operator,
2297 st_C_class, st_C_template,
2298 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2301 static unsigned int hash (const char *, unsigned int);
2302 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2303 static enum sym_type C_symtype (char *, int, int);
2305 /* Feed stuff between (but not including) %[ and %] lines to:
2306 gperf -m 5
2308 %compare-strncmp
2309 %enum
2310 %struct-type
2311 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2313 if, 0, st_C_ignore
2314 for, 0, st_C_ignore
2315 while, 0, st_C_ignore
2316 switch, 0, st_C_ignore
2317 return, 0, st_C_ignore
2318 __attribute__, 0, st_C_attribute
2319 GTY, 0, st_C_attribute
2320 @interface, 0, st_C_objprot
2321 @protocol, 0, st_C_objprot
2322 @implementation,0, st_C_objimpl
2323 @end, 0, st_C_objend
2324 import, (C_JAVA & ~C_PLPL), st_C_ignore
2325 package, (C_JAVA & ~C_PLPL), st_C_ignore
2326 friend, C_PLPL, st_C_ignore
2327 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2328 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2329 interface, (C_JAVA & ~C_PLPL), st_C_struct
2330 class, 0, st_C_class
2331 namespace, C_PLPL, st_C_struct
2332 domain, C_STAR, st_C_struct
2333 union, 0, st_C_struct
2334 struct, 0, st_C_struct
2335 extern, 0, st_C_extern
2336 enum, 0, st_C_enum
2337 typedef, 0, st_C_typedef
2338 define, 0, st_C_define
2339 undef, 0, st_C_define
2340 operator, C_PLPL, st_C_operator
2341 template, 0, st_C_template
2342 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2343 DEFUN, 0, st_C_gnumacro
2344 SYSCALL, 0, st_C_gnumacro
2345 ENTRY, 0, st_C_gnumacro
2346 PSEUDO, 0, st_C_gnumacro
2347 # These are defined inside C functions, so currently they are not met.
2348 # EXFUN used in glibc, DEFVAR_* in emacs.
2349 #EXFUN, 0, st_C_gnumacro
2350 #DEFVAR_, 0, st_C_gnumacro
2352 and replace lines between %< and %> with its output, then:
2353 - remove the #if characterset check
2354 - make in_word_set static and not inline. */
2355 /*%<*/
2356 /* C code produced by gperf version 3.0.1 */
2357 /* Command-line: gperf -m 5 */
2358 /* Computed positions: -k'2-3' */
2360 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2361 /* maximum key range = 33, duplicates = 0 */
2363 static inline unsigned int
2364 hash (register const char *str, register unsigned int len)
2366 static unsigned char asso_values[] =
2368 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2369 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2370 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2371 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2372 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2373 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2374 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2375 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2376 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2377 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2378 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2379 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2380 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2381 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2382 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2383 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2384 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2385 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2386 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2387 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2388 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2389 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2390 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2391 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2392 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2393 35, 35, 35, 35, 35, 35
2395 register int hval = len;
2397 switch (hval)
2399 default:
2400 hval += asso_values[(unsigned char)str[2]];
2401 /*FALLTHROUGH*/
2402 case 2:
2403 hval += asso_values[(unsigned char)str[1]];
2404 break;
2406 return hval;
2409 static struct C_stab_entry *
2410 in_word_set (register const char *str, register unsigned int len)
2412 enum
2414 TOTAL_KEYWORDS = 33,
2415 MIN_WORD_LENGTH = 2,
2416 MAX_WORD_LENGTH = 15,
2417 MIN_HASH_VALUE = 2,
2418 MAX_HASH_VALUE = 34
2421 static struct C_stab_entry wordlist[] =
2423 {""}, {""},
2424 {"if", 0, st_C_ignore},
2425 {"GTY", 0, st_C_attribute},
2426 {"@end", 0, st_C_objend},
2427 {"union", 0, st_C_struct},
2428 {"define", 0, st_C_define},
2429 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2430 {"template", 0, st_C_template},
2431 {"operator", C_PLPL, st_C_operator},
2432 {"@interface", 0, st_C_objprot},
2433 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2434 {"friend", C_PLPL, st_C_ignore},
2435 {"typedef", 0, st_C_typedef},
2436 {"return", 0, st_C_ignore},
2437 {"@implementation",0, st_C_objimpl},
2438 {"@protocol", 0, st_C_objprot},
2439 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2440 {"extern", 0, st_C_extern},
2441 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2442 {"struct", 0, st_C_struct},
2443 {"domain", C_STAR, st_C_struct},
2444 {"switch", 0, st_C_ignore},
2445 {"enum", 0, st_C_enum},
2446 {"for", 0, st_C_ignore},
2447 {"namespace", C_PLPL, st_C_struct},
2448 {"class", 0, st_C_class},
2449 {"while", 0, st_C_ignore},
2450 {"undef", 0, st_C_define},
2451 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2452 {"__attribute__", 0, st_C_attribute},
2453 {"SYSCALL", 0, st_C_gnumacro},
2454 {"ENTRY", 0, st_C_gnumacro},
2455 {"PSEUDO", 0, st_C_gnumacro},
2456 {"DEFUN", 0, st_C_gnumacro}
2459 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2461 register int key = hash (str, len);
2463 if (key <= MAX_HASH_VALUE && key >= 0)
2465 register const char *s = wordlist[key].name;
2467 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2468 return &wordlist[key];
2471 return 0;
2473 /*%>*/
2475 static enum sym_type
2476 C_symtype (char *str, int len, int c_ext)
2478 register struct C_stab_entry *se = in_word_set (str, len);
2480 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2481 return st_none;
2482 return se->type;
2487 * Ignoring __attribute__ ((list))
2489 static bool inattribute; /* looking at an __attribute__ construct */
2492 * C functions and variables are recognized using a simple
2493 * finite automaton. fvdef is its state variable.
2495 static enum
2497 fvnone, /* nothing seen */
2498 fdefunkey, /* Emacs DEFUN keyword seen */
2499 fdefunname, /* Emacs DEFUN name seen */
2500 foperator, /* func: operator keyword seen (cplpl) */
2501 fvnameseen, /* function or variable name seen */
2502 fstartlist, /* func: just after open parenthesis */
2503 finlist, /* func: in parameter list */
2504 flistseen, /* func: after parameter list */
2505 fignore, /* func: before open brace */
2506 vignore /* var-like: ignore until ';' */
2507 } fvdef;
2509 static bool fvextern; /* func or var: extern keyword seen; */
2512 * typedefs are recognized using a simple finite automaton.
2513 * typdef is its state variable.
2515 static enum
2517 tnone, /* nothing seen */
2518 tkeyseen, /* typedef keyword seen */
2519 ttypeseen, /* defined type seen */
2520 tinbody, /* inside typedef body */
2521 tend, /* just before typedef tag */
2522 tignore /* junk after typedef tag */
2523 } typdef;
2526 * struct-like structures (enum, struct and union) are recognized
2527 * using another simple finite automaton. `structdef' is its state
2528 * variable.
2530 static enum
2532 snone, /* nothing seen yet,
2533 or in struct body if bracelev > 0 */
2534 skeyseen, /* struct-like keyword seen */
2535 stagseen, /* struct-like tag seen */
2536 scolonseen /* colon seen after struct-like tag */
2537 } structdef;
2540 * When objdef is different from onone, objtag is the name of the class.
2542 static const char *objtag = "<uninited>";
2545 * Yet another little state machine to deal with preprocessor lines.
2547 static enum
2549 dnone, /* nothing seen */
2550 dsharpseen, /* '#' seen as first char on line */
2551 ddefineseen, /* '#' and 'define' seen */
2552 dignorerest /* ignore rest of line */
2553 } definedef;
2556 * State machine for Objective C protocols and implementations.
2557 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2559 static enum
2561 onone, /* nothing seen */
2562 oprotocol, /* @interface or @protocol seen */
2563 oimplementation, /* @implementations seen */
2564 otagseen, /* class name seen */
2565 oparenseen, /* parenthesis before category seen */
2566 ocatseen, /* category name seen */
2567 oinbody, /* in @implementation body */
2568 omethodsign, /* in @implementation body, after +/- */
2569 omethodtag, /* after method name */
2570 omethodcolon, /* after method colon */
2571 omethodparm, /* after method parameter */
2572 oignore /* wait for @end */
2573 } objdef;
2577 * Use this structure to keep info about the token read, and how it
2578 * should be tagged. Used by the make_C_tag function to build a tag.
2580 static struct tok
2582 char *line; /* string containing the token */
2583 int offset; /* where the token starts in LINE */
2584 int length; /* token length */
2586 The previous members can be used to pass strings around for generic
2587 purposes. The following ones specifically refer to creating tags. In this
2588 case the token contained here is the pattern that will be used to create a
2589 tag.
2591 bool valid; /* do not create a tag; the token should be
2592 invalidated whenever a state machine is
2593 reset prematurely */
2594 bool named; /* create a named tag */
2595 int lineno; /* source line number of tag */
2596 long linepos; /* source char number of tag */
2597 } token; /* latest token read */
2600 * Variables and functions for dealing with nested structures.
2601 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2603 static void pushclass_above (int, char *, int);
2604 static void popclass_above (int);
2605 static void write_classname (linebuffer *, const char *qualifier);
2607 static struct {
2608 char **cname; /* nested class names */
2609 int *bracelev; /* nested class brace level */
2610 int nl; /* class nesting level (elements used) */
2611 int size; /* length of the array */
2612 } cstack; /* stack for nested declaration tags */
2613 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2614 #define nestlev (cstack.nl)
2615 /* After struct keyword or in struct body, not inside a nested function. */
2616 #define instruct (structdef == snone && nestlev > 0 \
2617 && bracelev == cstack.bracelev[nestlev-1] + 1)
2619 static void
2620 pushclass_above (int bracelev, char *str, int len)
2622 int nl;
2624 popclass_above (bracelev);
2625 nl = cstack.nl;
2626 if (nl >= cstack.size)
2628 int size = cstack.size *= 2;
2629 xrnew (cstack.cname, size, char *);
2630 xrnew (cstack.bracelev, size, int);
2632 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2633 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2634 cstack.bracelev[nl] = bracelev;
2635 cstack.nl = nl + 1;
2638 static void
2639 popclass_above (int bracelev)
2641 int nl;
2643 for (nl = cstack.nl - 1;
2644 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2645 nl--)
2647 free (cstack.cname[nl]);
2648 cstack.nl = nl;
2652 static void
2653 write_classname (linebuffer *cn, const char *qualifier)
2655 int i, len;
2656 int qlen = strlen (qualifier);
2658 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2660 len = 0;
2661 cn->len = 0;
2662 cn->buffer[0] = '\0';
2664 else
2666 len = strlen (cstack.cname[0]);
2667 linebuffer_setlen (cn, len);
2668 strcpy (cn->buffer, cstack.cname[0]);
2670 for (i = 1; i < cstack.nl; i++)
2672 char *s;
2673 int slen;
2675 s = cstack.cname[i];
2676 if (s == NULL)
2677 continue;
2678 slen = strlen (s);
2679 len += slen + qlen;
2680 linebuffer_setlen (cn, len);
2681 strncat (cn->buffer, qualifier, qlen);
2682 strncat (cn->buffer, s, slen);
2687 static bool consider_token (char *, int, int, int *, int, int, bool *);
2688 static void make_C_tag (bool);
2691 * consider_token ()
2692 * checks to see if the current token is at the start of a
2693 * function or variable, or corresponds to a typedef, or
2694 * is a struct/union/enum tag, or #define, or an enum constant.
2696 * *IS_FUNC gets TRUE if the token is a function or #define macro
2697 * with args. C_EXTP points to which language we are looking at.
2699 * Globals
2700 * fvdef IN OUT
2701 * structdef IN OUT
2702 * definedef IN OUT
2703 * typdef IN OUT
2704 * objdef IN OUT
2707 static bool
2708 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2709 /* IN: token pointer */
2710 /* IN: token length */
2711 /* IN: first char after the token */
2712 /* IN, OUT: C extensions mask */
2713 /* IN: brace level */
2714 /* IN: parenthesis level */
2715 /* OUT: function or variable found */
2717 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2718 structtype is the type of the preceding struct-like keyword, and
2719 structbracelev is the brace level where it has been seen. */
2720 static enum sym_type structtype;
2721 static int structbracelev;
2722 static enum sym_type toktype;
2725 toktype = C_symtype (str, len, *c_extp);
2728 * Skip __attribute__
2730 if (toktype == st_C_attribute)
2732 inattribute = TRUE;
2733 return FALSE;
2737 * Advance the definedef state machine.
2739 switch (definedef)
2741 case dnone:
2742 /* We're not on a preprocessor line. */
2743 if (toktype == st_C_gnumacro)
2745 fvdef = fdefunkey;
2746 return FALSE;
2748 break;
2749 case dsharpseen:
2750 if (toktype == st_C_define)
2752 definedef = ddefineseen;
2754 else
2756 definedef = dignorerest;
2758 return FALSE;
2759 case ddefineseen:
2761 * Make a tag for any macro, unless it is a constant
2762 * and constantypedefs is FALSE.
2764 definedef = dignorerest;
2765 *is_func_or_var = (c == '(');
2766 if (!*is_func_or_var && !constantypedefs)
2767 return FALSE;
2768 else
2769 return TRUE;
2770 case dignorerest:
2771 return FALSE;
2772 default:
2773 error ("internal error: definedef value.", (char *)NULL);
2777 * Now typedefs
2779 switch (typdef)
2781 case tnone:
2782 if (toktype == st_C_typedef)
2784 if (typedefs)
2785 typdef = tkeyseen;
2786 fvextern = FALSE;
2787 fvdef = fvnone;
2788 return FALSE;
2790 break;
2791 case tkeyseen:
2792 switch (toktype)
2794 case st_none:
2795 case st_C_class:
2796 case st_C_struct:
2797 case st_C_enum:
2798 typdef = ttypeseen;
2800 break;
2801 case ttypeseen:
2802 if (structdef == snone && fvdef == fvnone)
2804 fvdef = fvnameseen;
2805 return TRUE;
2807 break;
2808 case tend:
2809 switch (toktype)
2811 case st_C_class:
2812 case st_C_struct:
2813 case st_C_enum:
2814 return FALSE;
2816 return TRUE;
2819 switch (toktype)
2821 case st_C_javastruct:
2822 if (structdef == stagseen)
2823 structdef = scolonseen;
2824 return FALSE;
2825 case st_C_template:
2826 case st_C_class:
2827 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2828 && bracelev == 0
2829 && definedef == dnone && structdef == snone
2830 && typdef == tnone && fvdef == fvnone)
2831 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2832 if (toktype == st_C_template)
2833 break;
2834 /* FALLTHRU */
2835 case st_C_struct:
2836 case st_C_enum:
2837 if (parlev == 0
2838 && fvdef != vignore
2839 && (typdef == tkeyseen
2840 || (typedefs_or_cplusplus && structdef == snone)))
2842 structdef = skeyseen;
2843 structtype = toktype;
2844 structbracelev = bracelev;
2845 if (fvdef == fvnameseen)
2846 fvdef = fvnone;
2848 return FALSE;
2851 if (structdef == skeyseen)
2853 structdef = stagseen;
2854 return TRUE;
2857 if (typdef != tnone)
2858 definedef = dnone;
2860 /* Detect Objective C constructs. */
2861 switch (objdef)
2863 case onone:
2864 switch (toktype)
2866 case st_C_objprot:
2867 objdef = oprotocol;
2868 return FALSE;
2869 case st_C_objimpl:
2870 objdef = oimplementation;
2871 return FALSE;
2873 break;
2874 case oimplementation:
2875 /* Save the class tag for functions or variables defined inside. */
2876 objtag = savenstr (str, len);
2877 objdef = oinbody;
2878 return FALSE;
2879 case oprotocol:
2880 /* Save the class tag for categories. */
2881 objtag = savenstr (str, len);
2882 objdef = otagseen;
2883 *is_func_or_var = TRUE;
2884 return TRUE;
2885 case oparenseen:
2886 objdef = ocatseen;
2887 *is_func_or_var = TRUE;
2888 return TRUE;
2889 case oinbody:
2890 break;
2891 case omethodsign:
2892 if (parlev == 0)
2894 fvdef = fvnone;
2895 objdef = omethodtag;
2896 linebuffer_setlen (&token_name, len);
2897 strncpy (token_name.buffer, str, len);
2898 token_name.buffer[len] = '\0';
2899 return TRUE;
2901 return FALSE;
2902 case omethodcolon:
2903 if (parlev == 0)
2904 objdef = omethodparm;
2905 return FALSE;
2906 case omethodparm:
2907 if (parlev == 0)
2909 fvdef = fvnone;
2910 objdef = omethodtag;
2911 linebuffer_setlen (&token_name, token_name.len + len);
2912 strncat (token_name.buffer, str, len);
2913 return TRUE;
2915 return FALSE;
2916 case oignore:
2917 if (toktype == st_C_objend)
2919 /* Memory leakage here: the string pointed by objtag is
2920 never released, because many tests would be needed to
2921 avoid breaking on incorrect input code. The amount of
2922 memory leaked here is the sum of the lengths of the
2923 class tags.
2924 free (objtag); */
2925 objdef = onone;
2927 return FALSE;
2930 /* A function, variable or enum constant? */
2931 switch (toktype)
2933 case st_C_extern:
2934 fvextern = TRUE;
2935 switch (fvdef)
2937 case finlist:
2938 case flistseen:
2939 case fignore:
2940 case vignore:
2941 break;
2942 default:
2943 fvdef = fvnone;
2945 return FALSE;
2946 case st_C_ignore:
2947 fvextern = FALSE;
2948 fvdef = vignore;
2949 return FALSE;
2950 case st_C_operator:
2951 fvdef = foperator;
2952 *is_func_or_var = TRUE;
2953 return TRUE;
2954 case st_none:
2955 if (constantypedefs
2956 && structdef == snone
2957 && structtype == st_C_enum && bracelev > structbracelev)
2958 return TRUE; /* enum constant */
2959 switch (fvdef)
2961 case fdefunkey:
2962 if (bracelev > 0)
2963 break;
2964 fvdef = fdefunname; /* GNU macro */
2965 *is_func_or_var = TRUE;
2966 return TRUE;
2967 case fvnone:
2968 switch (typdef)
2970 case ttypeseen:
2971 return FALSE;
2972 case tnone:
2973 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2974 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2976 fvdef = vignore;
2977 return FALSE;
2979 break;
2981 /* FALLTHRU */
2982 case fvnameseen:
2983 if (len >= 10 && strneq (str+len-10, "::operator", 10))
2985 if (*c_extp & C_AUTO) /* automatic detection of C++ */
2986 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2987 fvdef = foperator;
2988 *is_func_or_var = TRUE;
2989 return TRUE;
2991 if (bracelev > 0 && !instruct)
2992 break;
2993 fvdef = fvnameseen; /* function or variable */
2994 *is_func_or_var = TRUE;
2995 return TRUE;
2997 break;
3000 return FALSE;
3005 * C_entries often keeps pointers to tokens or lines which are older than
3006 * the line currently read. By keeping two line buffers, and switching
3007 * them at end of line, it is possible to use those pointers.
3009 static struct
3011 long linepos;
3012 linebuffer lb;
3013 } lbs[2];
3015 #define current_lb_is_new (newndx == curndx)
3016 #define switch_line_buffers() (curndx = 1 - curndx)
3018 #define curlb (lbs[curndx].lb)
3019 #define newlb (lbs[newndx].lb)
3020 #define curlinepos (lbs[curndx].linepos)
3021 #define newlinepos (lbs[newndx].linepos)
3023 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3024 #define cplpl (c_ext & C_PLPL)
3025 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3027 #define CNL_SAVE_DEFINEDEF() \
3028 do { \
3029 curlinepos = charno; \
3030 readline (&curlb, inf); \
3031 lp = curlb.buffer; \
3032 quotednl = FALSE; \
3033 newndx = curndx; \
3034 } while (0)
3036 #define CNL() \
3037 do { \
3038 CNL_SAVE_DEFINEDEF(); \
3039 if (savetoken.valid) \
3041 token = savetoken; \
3042 savetoken.valid = FALSE; \
3044 definedef = dnone; \
3045 } while (0)
3048 static void
3049 make_C_tag (int isfun)
3051 /* This function is never called when token.valid is FALSE, but
3052 we must protect against invalid input or internal errors. */
3053 if (token.valid)
3054 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3055 token.offset+token.length+1, token.lineno, token.linepos);
3056 else if (DEBUG)
3057 { /* this branch is optimised away if !DEBUG */
3058 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3059 token_name.len + 17, isfun, token.line,
3060 token.offset+token.length+1, token.lineno, token.linepos);
3061 error ("INVALID TOKEN", NULL);
3064 token.valid = FALSE;
3069 * C_entries ()
3070 * This routine finds functions, variables, typedefs,
3071 * #define's, enum constants and struct/union/enum definitions in
3072 * C syntax and adds them to the list.
3074 static void
3075 C_entries (int c_ext, FILE *inf)
3076 /* extension of C */
3077 /* input file */
3079 register char c; /* latest char read; '\0' for end of line */
3080 register char *lp; /* pointer one beyond the character `c' */
3081 int curndx, newndx; /* indices for current and new lb */
3082 register int tokoff; /* offset in line of start of current token */
3083 register int toklen; /* length of current token */
3084 const char *qualifier; /* string used to qualify names */
3085 int qlen; /* length of qualifier */
3086 int bracelev; /* current brace level */
3087 int bracketlev; /* current bracket level */
3088 int parlev; /* current parenthesis level */
3089 int attrparlev; /* __attribute__ parenthesis level */
3090 int templatelev; /* current template level */
3091 int typdefbracelev; /* bracelev where a typedef struct body begun */
3092 bool incomm, inquote, inchar, quotednl, midtoken;
3093 bool yacc_rules; /* in the rules part of a yacc file */
3094 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3097 linebuffer_init (&lbs[0].lb);
3098 linebuffer_init (&lbs[1].lb);
3099 if (cstack.size == 0)
3101 cstack.size = (DEBUG) ? 1 : 4;
3102 cstack.nl = 0;
3103 cstack.cname = xnew (cstack.size, char *);
3104 cstack.bracelev = xnew (cstack.size, int);
3107 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3108 curndx = newndx = 0;
3109 lp = curlb.buffer;
3110 *lp = 0;
3112 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3113 structdef = snone; definedef = dnone; objdef = onone;
3114 yacc_rules = FALSE;
3115 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3116 token.valid = savetoken.valid = FALSE;
3117 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3118 if (cjava)
3119 { qualifier = "."; qlen = 1; }
3120 else
3121 { qualifier = "::"; qlen = 2; }
3124 while (!feof (inf))
3126 c = *lp++;
3127 if (c == '\\')
3129 /* If we are at the end of the line, the next character is a
3130 '\0'; do not skip it, because it is what tells us
3131 to read the next line. */
3132 if (*lp == '\0')
3134 quotednl = TRUE;
3135 continue;
3137 lp++;
3138 c = ' ';
3140 else if (incomm)
3142 switch (c)
3144 case '*':
3145 if (*lp == '/')
3147 c = *lp++;
3148 incomm = FALSE;
3150 break;
3151 case '\0':
3152 /* Newlines inside comments do not end macro definitions in
3153 traditional cpp. */
3154 CNL_SAVE_DEFINEDEF ();
3155 break;
3157 continue;
3159 else if (inquote)
3161 switch (c)
3163 case '"':
3164 inquote = FALSE;
3165 break;
3166 case '\0':
3167 /* Newlines inside strings do not end macro definitions
3168 in traditional cpp, even though compilers don't
3169 usually accept them. */
3170 CNL_SAVE_DEFINEDEF ();
3171 break;
3173 continue;
3175 else if (inchar)
3177 switch (c)
3179 case '\0':
3180 /* Hmmm, something went wrong. */
3181 CNL ();
3182 /* FALLTHRU */
3183 case '\'':
3184 inchar = FALSE;
3185 break;
3187 continue;
3189 else if (bracketlev > 0)
3191 switch (c)
3193 case ']':
3194 if (--bracketlev > 0)
3195 continue;
3196 break;
3197 case '\0':
3198 CNL_SAVE_DEFINEDEF ();
3199 break;
3201 continue;
3203 else switch (c)
3205 case '"':
3206 inquote = TRUE;
3207 if (inattribute)
3208 break;
3209 switch (fvdef)
3211 case fdefunkey:
3212 case fstartlist:
3213 case finlist:
3214 case fignore:
3215 case vignore:
3216 break;
3217 default:
3218 fvextern = FALSE;
3219 fvdef = fvnone;
3221 continue;
3222 case '\'':
3223 inchar = TRUE;
3224 if (inattribute)
3225 break;
3226 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3228 fvextern = FALSE;
3229 fvdef = fvnone;
3231 continue;
3232 case '/':
3233 if (*lp == '*')
3235 incomm = TRUE;
3236 lp++;
3237 c = ' ';
3239 else if (/* cplpl && */ *lp == '/')
3241 c = '\0';
3243 break;
3244 case '%':
3245 if ((c_ext & YACC) && *lp == '%')
3247 /* Entering or exiting rules section in yacc file. */
3248 lp++;
3249 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3250 typdef = tnone; structdef = snone;
3251 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3252 bracelev = 0;
3253 yacc_rules = !yacc_rules;
3254 continue;
3256 else
3257 break;
3258 case '#':
3259 if (definedef == dnone)
3261 char *cp;
3262 bool cpptoken = TRUE;
3264 /* Look back on this line. If all blanks, or nonblanks
3265 followed by an end of comment, this is a preprocessor
3266 token. */
3267 for (cp = newlb.buffer; cp < lp-1; cp++)
3268 if (!iswhite (*cp))
3270 if (*cp == '*' && *(cp+1) == '/')
3272 cp++;
3273 cpptoken = TRUE;
3275 else
3276 cpptoken = FALSE;
3278 if (cpptoken)
3279 definedef = dsharpseen;
3280 } /* if (definedef == dnone) */
3281 continue;
3282 case '[':
3283 bracketlev++;
3284 continue;
3285 } /* switch (c) */
3288 /* Consider token only if some involved conditions are satisfied. */
3289 if (typdef != tignore
3290 && definedef != dignorerest
3291 && fvdef != finlist
3292 && templatelev == 0
3293 && (definedef != dnone
3294 || structdef != scolonseen)
3295 && !inattribute)
3297 if (midtoken)
3299 if (endtoken (c))
3301 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3302 /* This handles :: in the middle,
3303 but not at the beginning of an identifier.
3304 Also, space-separated :: is not recognised. */
3306 if (c_ext & C_AUTO) /* automatic detection of C++ */
3307 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3308 lp += 2;
3309 toklen += 2;
3310 c = lp[-1];
3311 goto still_in_token;
3313 else
3315 bool funorvar = FALSE;
3317 if (yacc_rules
3318 || consider_token (newlb.buffer + tokoff, toklen, c,
3319 &c_ext, bracelev, parlev,
3320 &funorvar))
3322 if (fvdef == foperator)
3324 char *oldlp = lp;
3325 lp = skip_spaces (lp-1);
3326 if (*lp != '\0')
3327 lp += 1;
3328 while (*lp != '\0'
3329 && !iswhite (*lp) && *lp != '(')
3330 lp += 1;
3331 c = *lp++;
3332 toklen += lp - oldlp;
3334 token.named = FALSE;
3335 if (!plainc
3336 && nestlev > 0 && definedef == dnone)
3337 /* in struct body */
3339 write_classname (&token_name, qualifier);
3340 linebuffer_setlen (&token_name,
3341 token_name.len+qlen+toklen);
3342 strcat (token_name.buffer, qualifier);
3343 strncat (token_name.buffer,
3344 newlb.buffer + tokoff, toklen);
3345 token.named = TRUE;
3347 else if (objdef == ocatseen)
3348 /* Objective C category */
3350 int len = strlen (objtag) + 2 + toklen;
3351 linebuffer_setlen (&token_name, len);
3352 strcpy (token_name.buffer, objtag);
3353 strcat (token_name.buffer, "(");
3354 strncat (token_name.buffer,
3355 newlb.buffer + tokoff, toklen);
3356 strcat (token_name.buffer, ")");
3357 token.named = TRUE;
3359 else if (objdef == omethodtag
3360 || objdef == omethodparm)
3361 /* Objective C method */
3363 token.named = TRUE;
3365 else if (fvdef == fdefunname)
3366 /* GNU DEFUN and similar macros */
3368 bool defun = (newlb.buffer[tokoff] == 'F');
3369 int off = tokoff;
3370 int len = toklen;
3372 /* Rewrite the tag so that emacs lisp DEFUNs
3373 can be found by their elisp name */
3374 if (defun)
3376 off += 1;
3377 len -= 1;
3379 linebuffer_setlen (&token_name, len);
3380 strncpy (token_name.buffer,
3381 newlb.buffer + off, len);
3382 token_name.buffer[len] = '\0';
3383 if (defun)
3384 while (--len >= 0)
3385 if (token_name.buffer[len] == '_')
3386 token_name.buffer[len] = '-';
3387 token.named = defun;
3389 else
3391 linebuffer_setlen (&token_name, toklen);
3392 strncpy (token_name.buffer,
3393 newlb.buffer + tokoff, toklen);
3394 token_name.buffer[toklen] = '\0';
3395 /* Name macros and members. */
3396 token.named = (structdef == stagseen
3397 || typdef == ttypeseen
3398 || typdef == tend
3399 || (funorvar
3400 && definedef == dignorerest)
3401 || (funorvar
3402 && definedef == dnone
3403 && structdef == snone
3404 && bracelev > 0));
3406 token.lineno = lineno;
3407 token.offset = tokoff;
3408 token.length = toklen;
3409 token.line = newlb.buffer;
3410 token.linepos = newlinepos;
3411 token.valid = TRUE;
3413 if (definedef == dnone
3414 && (fvdef == fvnameseen
3415 || fvdef == foperator
3416 || structdef == stagseen
3417 || typdef == tend
3418 || typdef == ttypeseen
3419 || objdef != onone))
3421 if (current_lb_is_new)
3422 switch_line_buffers ();
3424 else if (definedef != dnone
3425 || fvdef == fdefunname
3426 || instruct)
3427 make_C_tag (funorvar);
3429 else /* not yacc and consider_token failed */
3431 if (inattribute && fvdef == fignore)
3433 /* We have just met __attribute__ after a
3434 function parameter list: do not tag the
3435 function again. */
3436 fvdef = fvnone;
3439 midtoken = FALSE;
3441 } /* if (endtoken (c)) */
3442 else if (intoken (c))
3443 still_in_token:
3445 toklen++;
3446 continue;
3448 } /* if (midtoken) */
3449 else if (begtoken (c))
3451 switch (definedef)
3453 case dnone:
3454 switch (fvdef)
3456 case fstartlist:
3457 /* This prevents tagging fb in
3458 void (__attribute__((noreturn)) *fb) (void);
3459 Fixing this is not easy and not very important. */
3460 fvdef = finlist;
3461 continue;
3462 case flistseen:
3463 if (plainc || declarations)
3465 make_C_tag (TRUE); /* a function */
3466 fvdef = fignore;
3468 break;
3470 if (structdef == stagseen && !cjava)
3472 popclass_above (bracelev);
3473 structdef = snone;
3475 break;
3476 case dsharpseen:
3477 savetoken = token;
3478 break;
3480 if (!yacc_rules || lp == newlb.buffer + 1)
3482 tokoff = lp - 1 - newlb.buffer;
3483 toklen = 1;
3484 midtoken = TRUE;
3486 continue;
3487 } /* if (begtoken) */
3488 } /* if must look at token */
3491 /* Detect end of line, colon, comma, semicolon and various braces
3492 after having handled a token.*/
3493 switch (c)
3495 case ':':
3496 if (inattribute)
3497 break;
3498 if (yacc_rules && token.offset == 0 && token.valid)
3500 make_C_tag (FALSE); /* a yacc function */
3501 break;
3503 if (definedef != dnone)
3504 break;
3505 switch (objdef)
3507 case otagseen:
3508 objdef = oignore;
3509 make_C_tag (TRUE); /* an Objective C class */
3510 break;
3511 case omethodtag:
3512 case omethodparm:
3513 objdef = omethodcolon;
3514 linebuffer_setlen (&token_name, token_name.len + 1);
3515 strcat (token_name.buffer, ":");
3516 break;
3518 if (structdef == stagseen)
3520 structdef = scolonseen;
3521 break;
3523 /* Should be useless, but may be work as a safety net. */
3524 if (cplpl && fvdef == flistseen)
3526 make_C_tag (TRUE); /* a function */
3527 fvdef = fignore;
3528 break;
3530 break;
3531 case ';':
3532 if (definedef != dnone || inattribute)
3533 break;
3534 switch (typdef)
3536 case tend:
3537 case ttypeseen:
3538 make_C_tag (FALSE); /* a typedef */
3539 typdef = tnone;
3540 fvdef = fvnone;
3541 break;
3542 case tnone:
3543 case tinbody:
3544 case tignore:
3545 switch (fvdef)
3547 case fignore:
3548 if (typdef == tignore || cplpl)
3549 fvdef = fvnone;
3550 break;
3551 case fvnameseen:
3552 if ((globals && bracelev == 0 && (!fvextern || declarations))
3553 || (members && instruct))
3554 make_C_tag (FALSE); /* a variable */
3555 fvextern = FALSE;
3556 fvdef = fvnone;
3557 token.valid = FALSE;
3558 break;
3559 case flistseen:
3560 if ((declarations
3561 && (cplpl || !instruct)
3562 && (typdef == tnone || (typdef != tignore && instruct)))
3563 || (members
3564 && plainc && instruct))
3565 make_C_tag (TRUE); /* a function */
3566 /* FALLTHRU */
3567 default:
3568 fvextern = FALSE;
3569 fvdef = fvnone;
3570 if (declarations
3571 && cplpl && structdef == stagseen)
3572 make_C_tag (FALSE); /* forward declaration */
3573 else
3574 token.valid = FALSE;
3575 } /* switch (fvdef) */
3576 /* FALLTHRU */
3577 default:
3578 if (!instruct)
3579 typdef = tnone;
3581 if (structdef == stagseen)
3582 structdef = snone;
3583 break;
3584 case ',':
3585 if (definedef != dnone || inattribute)
3586 break;
3587 switch (objdef)
3589 case omethodtag:
3590 case omethodparm:
3591 make_C_tag (TRUE); /* an Objective C method */
3592 objdef = oinbody;
3593 break;
3595 switch (fvdef)
3597 case fdefunkey:
3598 case foperator:
3599 case fstartlist:
3600 case finlist:
3601 case fignore:
3602 case vignore:
3603 break;
3604 case fdefunname:
3605 fvdef = fignore;
3606 break;
3607 case fvnameseen:
3608 if (parlev == 0
3609 && ((globals
3610 && bracelev == 0
3611 && templatelev == 0
3612 && (!fvextern || declarations))
3613 || (members && instruct)))
3614 make_C_tag (FALSE); /* a variable */
3615 break;
3616 case flistseen:
3617 if ((declarations && typdef == tnone && !instruct)
3618 || (members && typdef != tignore && instruct))
3620 make_C_tag (TRUE); /* a function */
3621 fvdef = fvnameseen;
3623 else if (!declarations)
3624 fvdef = fvnone;
3625 token.valid = FALSE;
3626 break;
3627 default:
3628 fvdef = fvnone;
3630 if (structdef == stagseen)
3631 structdef = snone;
3632 break;
3633 case ']':
3634 if (definedef != dnone || inattribute)
3635 break;
3636 if (structdef == stagseen)
3637 structdef = snone;
3638 switch (typdef)
3640 case ttypeseen:
3641 case tend:
3642 typdef = tignore;
3643 make_C_tag (FALSE); /* a typedef */
3644 break;
3645 case tnone:
3646 case tinbody:
3647 switch (fvdef)
3649 case foperator:
3650 case finlist:
3651 case fignore:
3652 case vignore:
3653 break;
3654 case fvnameseen:
3655 if ((members && bracelev == 1)
3656 || (globals && bracelev == 0
3657 && (!fvextern || declarations)))
3658 make_C_tag (FALSE); /* a variable */
3659 /* FALLTHRU */
3660 default:
3661 fvdef = fvnone;
3663 break;
3665 break;
3666 case '(':
3667 if (inattribute)
3669 attrparlev++;
3670 break;
3672 if (definedef != dnone)
3673 break;
3674 if (objdef == otagseen && parlev == 0)
3675 objdef = oparenseen;
3676 switch (fvdef)
3678 case fvnameseen:
3679 if (typdef == ttypeseen
3680 && *lp != '*'
3681 && !instruct)
3683 /* This handles constructs like:
3684 typedef void OperatorFun (int fun); */
3685 make_C_tag (FALSE);
3686 typdef = tignore;
3687 fvdef = fignore;
3688 break;
3690 /* FALLTHRU */
3691 case foperator:
3692 fvdef = fstartlist;
3693 break;
3694 case flistseen:
3695 fvdef = finlist;
3696 break;
3698 parlev++;
3699 break;
3700 case ')':
3701 if (inattribute)
3703 if (--attrparlev == 0)
3704 inattribute = FALSE;
3705 break;
3707 if (definedef != dnone)
3708 break;
3709 if (objdef == ocatseen && parlev == 1)
3711 make_C_tag (TRUE); /* an Objective C category */
3712 objdef = oignore;
3714 if (--parlev == 0)
3716 switch (fvdef)
3718 case fstartlist:
3719 case finlist:
3720 fvdef = flistseen;
3721 break;
3723 if (!instruct
3724 && (typdef == tend
3725 || typdef == ttypeseen))
3727 typdef = tignore;
3728 make_C_tag (FALSE); /* a typedef */
3731 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3732 parlev = 0;
3733 break;
3734 case '{':
3735 if (definedef != dnone)
3736 break;
3737 if (typdef == ttypeseen)
3739 /* Whenever typdef is set to tinbody (currently only
3740 here), typdefbracelev should be set to bracelev. */
3741 typdef = tinbody;
3742 typdefbracelev = bracelev;
3744 switch (fvdef)
3746 case flistseen:
3747 make_C_tag (TRUE); /* a function */
3748 /* FALLTHRU */
3749 case fignore:
3750 fvdef = fvnone;
3751 break;
3752 case fvnone:
3753 switch (objdef)
3755 case otagseen:
3756 make_C_tag (TRUE); /* an Objective C class */
3757 objdef = oignore;
3758 break;
3759 case omethodtag:
3760 case omethodparm:
3761 make_C_tag (TRUE); /* an Objective C method */
3762 objdef = oinbody;
3763 break;
3764 default:
3765 /* Neutralize `extern "C" {' grot. */
3766 if (bracelev == 0 && structdef == snone && nestlev == 0
3767 && typdef == tnone)
3768 bracelev = -1;
3770 break;
3772 switch (structdef)
3774 case skeyseen: /* unnamed struct */
3775 pushclass_above (bracelev, NULL, 0);
3776 structdef = snone;
3777 break;
3778 case stagseen: /* named struct or enum */
3779 case scolonseen: /* a class */
3780 pushclass_above (bracelev,token.line+token.offset, token.length);
3781 structdef = snone;
3782 make_C_tag (FALSE); /* a struct or enum */
3783 break;
3785 bracelev += 1;
3786 break;
3787 case '*':
3788 if (definedef != dnone)
3789 break;
3790 if (fvdef == fstartlist)
3792 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3793 token.valid = FALSE;
3795 break;
3796 case '}':
3797 if (definedef != dnone)
3798 break;
3799 bracelev -= 1;
3800 if (!ignoreindent && lp == newlb.buffer + 1)
3802 if (bracelev != 0)
3803 token.valid = FALSE; /* unexpected value, token unreliable */
3804 bracelev = 0; /* reset brace level if first column */
3805 parlev = 0; /* also reset paren level, just in case... */
3807 else if (bracelev < 0)
3809 token.valid = FALSE; /* something gone amiss, token unreliable */
3810 bracelev = 0;
3812 if (bracelev == 0 && fvdef == vignore)
3813 fvdef = fvnone; /* end of function */
3814 popclass_above (bracelev);
3815 structdef = snone;
3816 /* Only if typdef == tinbody is typdefbracelev significant. */
3817 if (typdef == tinbody && bracelev <= typdefbracelev)
3819 assert (bracelev == typdefbracelev);
3820 typdef = tend;
3822 break;
3823 case '=':
3824 if (definedef != dnone)
3825 break;
3826 switch (fvdef)
3828 case foperator:
3829 case finlist:
3830 case fignore:
3831 case vignore:
3832 break;
3833 case fvnameseen:
3834 if ((members && bracelev == 1)
3835 || (globals && bracelev == 0 && (!fvextern || declarations)))
3836 make_C_tag (FALSE); /* a variable */
3837 /* FALLTHRU */
3838 default:
3839 fvdef = vignore;
3841 break;
3842 case '<':
3843 if (cplpl
3844 && (structdef == stagseen || fvdef == fvnameseen))
3846 templatelev++;
3847 break;
3849 goto resetfvdef;
3850 case '>':
3851 if (templatelev > 0)
3853 templatelev--;
3854 break;
3856 goto resetfvdef;
3857 case '+':
3858 case '-':
3859 if (objdef == oinbody && bracelev == 0)
3861 objdef = omethodsign;
3862 break;
3864 /* FALLTHRU */
3865 resetfvdef:
3866 case '#': case '~': case '&': case '%': case '/':
3867 case '|': case '^': case '!': case '.': case '?':
3868 if (definedef != dnone)
3869 break;
3870 /* These surely cannot follow a function tag in C. */
3871 switch (fvdef)
3873 case foperator:
3874 case finlist:
3875 case fignore:
3876 case vignore:
3877 break;
3878 default:
3879 fvdef = fvnone;
3881 break;
3882 case '\0':
3883 if (objdef == otagseen)
3885 make_C_tag (TRUE); /* an Objective C class */
3886 objdef = oignore;
3888 /* If a macro spans multiple lines don't reset its state. */
3889 if (quotednl)
3890 CNL_SAVE_DEFINEDEF ();
3891 else
3892 CNL ();
3893 break;
3894 } /* switch (c) */
3896 } /* while not eof */
3898 free (lbs[0].lb.buffer);
3899 free (lbs[1].lb.buffer);
3903 * Process either a C++ file or a C file depending on the setting
3904 * of a global flag.
3906 static void
3907 default_C_entries (FILE *inf)
3909 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3912 /* Always do plain C. */
3913 static void
3914 plain_C_entries (FILE *inf)
3916 C_entries (0, inf);
3919 /* Always do C++. */
3920 static void
3921 Cplusplus_entries (FILE *inf)
3923 C_entries (C_PLPL, inf);
3926 /* Always do Java. */
3927 static void
3928 Cjava_entries (FILE *inf)
3930 C_entries (C_JAVA, inf);
3933 /* Always do C*. */
3934 static void
3935 Cstar_entries (FILE *inf)
3937 C_entries (C_STAR, inf);
3940 /* Always do Yacc. */
3941 static void
3942 Yacc_entries (FILE *inf)
3944 C_entries (YACC, inf);
3948 /* Useful macros. */
3949 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3950 for (; /* loop initialization */ \
3951 !feof (file_pointer) /* loop test */ \
3952 && /* instructions at start of loop */ \
3953 (readline (&line_buffer, file_pointer), \
3954 char_pointer = line_buffer.buffer, \
3955 TRUE); \
3958 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
3959 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
3960 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
3961 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
3962 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
3964 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3965 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3966 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
3967 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
3968 && ((cp) += sizeof(kw)-1)) /* skip spaces */
3971 * Read a file, but do no processing. This is used to do regexp
3972 * matching on files that have no language defined.
3974 static void
3975 just_read_file (FILE *inf)
3977 while (!feof (inf))
3978 readline (&lb, inf);
3982 /* Fortran parsing */
3984 static void F_takeprec (void);
3985 static void F_getit (FILE *);
3987 static void
3988 F_takeprec (void)
3990 dbp = skip_spaces (dbp);
3991 if (*dbp != '*')
3992 return;
3993 dbp++;
3994 dbp = skip_spaces (dbp);
3995 if (strneq (dbp, "(*)", 3))
3997 dbp += 3;
3998 return;
4000 if (!ISDIGIT (*dbp))
4002 --dbp; /* force failure */
4003 return;
4006 dbp++;
4007 while (ISDIGIT (*dbp));
4010 static void
4011 F_getit (FILE *inf)
4013 register char *cp;
4015 dbp = skip_spaces (dbp);
4016 if (*dbp == '\0')
4018 readline (&lb, inf);
4019 dbp = lb.buffer;
4020 if (dbp[5] != '&')
4021 return;
4022 dbp += 6;
4023 dbp = skip_spaces (dbp);
4025 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4026 return;
4027 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4028 continue;
4029 make_tag (dbp, cp-dbp, TRUE,
4030 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4034 static void
4035 Fortran_functions (FILE *inf)
4037 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4039 if (*dbp == '%')
4040 dbp++; /* Ratfor escape to fortran */
4041 dbp = skip_spaces (dbp);
4042 if (*dbp == '\0')
4043 continue;
4045 if (LOOKING_AT_NOCASE (dbp, "recursive"))
4046 dbp = skip_spaces (dbp);
4048 switch (lowcase (*dbp))
4050 case 'i':
4051 if (nocase_tail ("integer"))
4052 F_takeprec ();
4053 break;
4054 case 'r':
4055 if (nocase_tail ("real"))
4056 F_takeprec ();
4057 break;
4058 case 'l':
4059 if (nocase_tail ("logical"))
4060 F_takeprec ();
4061 break;
4062 case 'c':
4063 if (nocase_tail ("complex") || nocase_tail ("character"))
4064 F_takeprec ();
4065 break;
4066 case 'd':
4067 if (nocase_tail ("double"))
4069 dbp = skip_spaces (dbp);
4070 if (*dbp == '\0')
4071 continue;
4072 if (nocase_tail ("precision"))
4073 break;
4074 continue;
4076 break;
4078 dbp = skip_spaces (dbp);
4079 if (*dbp == '\0')
4080 continue;
4081 switch (lowcase (*dbp))
4083 case 'f':
4084 if (nocase_tail ("function"))
4085 F_getit (inf);
4086 continue;
4087 case 's':
4088 if (nocase_tail ("subroutine"))
4089 F_getit (inf);
4090 continue;
4091 case 'e':
4092 if (nocase_tail ("entry"))
4093 F_getit (inf);
4094 continue;
4095 case 'b':
4096 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4098 dbp = skip_spaces (dbp);
4099 if (*dbp == '\0') /* assume un-named */
4100 make_tag ("blockdata", 9, TRUE,
4101 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4102 else
4103 F_getit (inf); /* look for name */
4105 continue;
4112 * Ada parsing
4113 * Original code by
4114 * Philippe Waroquiers (1998)
4117 /* Once we are positioned after an "interesting" keyword, let's get
4118 the real tag value necessary. */
4119 static void
4120 Ada_getit (FILE *inf, const char *name_qualifier)
4122 register char *cp;
4123 char *name;
4124 char c;
4126 while (!feof (inf))
4128 dbp = skip_spaces (dbp);
4129 if (*dbp == '\0'
4130 || (dbp[0] == '-' && dbp[1] == '-'))
4132 readline (&lb, inf);
4133 dbp = lb.buffer;
4135 switch (lowcase(*dbp))
4137 case 'b':
4138 if (nocase_tail ("body"))
4140 /* Skipping body of procedure body or package body or ....
4141 resetting qualifier to body instead of spec. */
4142 name_qualifier = "/b";
4143 continue;
4145 break;
4146 case 't':
4147 /* Skipping type of task type or protected type ... */
4148 if (nocase_tail ("type"))
4149 continue;
4150 break;
4152 if (*dbp == '"')
4154 dbp += 1;
4155 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4156 continue;
4158 else
4160 dbp = skip_spaces (dbp);
4161 for (cp = dbp;
4162 (*cp != '\0'
4163 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4164 cp++)
4165 continue;
4166 if (cp == dbp)
4167 return;
4169 c = *cp;
4170 *cp = '\0';
4171 name = concat (dbp, name_qualifier, "");
4172 *cp = c;
4173 make_tag (name, strlen (name), TRUE,
4174 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4175 free (name);
4176 if (c == '"')
4177 dbp = cp + 1;
4178 return;
4182 static void
4183 Ada_funcs (FILE *inf)
4185 bool inquote = FALSE;
4186 bool skip_till_semicolumn = FALSE;
4188 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4190 while (*dbp != '\0')
4192 /* Skip a string i.e. "abcd". */
4193 if (inquote || (*dbp == '"'))
4195 dbp = etags_strchr (dbp + !inquote, '"');
4196 if (dbp != NULL)
4198 inquote = FALSE;
4199 dbp += 1;
4200 continue; /* advance char */
4202 else
4204 inquote = TRUE;
4205 break; /* advance line */
4209 /* Skip comments. */
4210 if (dbp[0] == '-' && dbp[1] == '-')
4211 break; /* advance line */
4213 /* Skip character enclosed in single quote i.e. 'a'
4214 and skip single quote starting an attribute i.e. 'Image. */
4215 if (*dbp == '\'')
4217 dbp++ ;
4218 if (*dbp != '\0')
4219 dbp++;
4220 continue;
4223 if (skip_till_semicolumn)
4225 if (*dbp == ';')
4226 skip_till_semicolumn = FALSE;
4227 dbp++;
4228 continue; /* advance char */
4231 /* Search for beginning of a token. */
4232 if (!begtoken (*dbp))
4234 dbp++;
4235 continue; /* advance char */
4238 /* We are at the beginning of a token. */
4239 switch (lowcase(*dbp))
4241 case 'f':
4242 if (!packages_only && nocase_tail ("function"))
4243 Ada_getit (inf, "/f");
4244 else
4245 break; /* from switch */
4246 continue; /* advance char */
4247 case 'p':
4248 if (!packages_only && nocase_tail ("procedure"))
4249 Ada_getit (inf, "/p");
4250 else if (nocase_tail ("package"))
4251 Ada_getit (inf, "/s");
4252 else if (nocase_tail ("protected")) /* protected type */
4253 Ada_getit (inf, "/t");
4254 else
4255 break; /* from switch */
4256 continue; /* advance char */
4258 case 'u':
4259 if (typedefs && !packages_only && nocase_tail ("use"))
4261 /* when tagging types, avoid tagging use type Pack.Typename;
4262 for this, we will skip everything till a ; */
4263 skip_till_semicolumn = TRUE;
4264 continue; /* advance char */
4267 case 't':
4268 if (!packages_only && nocase_tail ("task"))
4269 Ada_getit (inf, "/k");
4270 else if (typedefs && !packages_only && nocase_tail ("type"))
4272 Ada_getit (inf, "/t");
4273 while (*dbp != '\0')
4274 dbp += 1;
4276 else
4277 break; /* from switch */
4278 continue; /* advance char */
4281 /* Look for the end of the token. */
4282 while (!endtoken (*dbp))
4283 dbp++;
4285 } /* advance char */
4286 } /* advance line */
4291 * Unix and microcontroller assembly tag handling
4292 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4293 * Idea by Bob Weiner, Motorola Inc. (1994)
4295 static void
4296 Asm_labels (FILE *inf)
4298 register char *cp;
4300 LOOP_ON_INPUT_LINES (inf, lb, cp)
4302 /* If first char is alphabetic or one of [_.$], test for colon
4303 following identifier. */
4304 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4306 /* Read past label. */
4307 cp++;
4308 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4309 cp++;
4310 if (*cp == ':' || iswhite (*cp))
4311 /* Found end of label, so copy it and add it to the table. */
4312 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4313 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4320 * Perl support
4321 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4322 * Perl variable names: /^(my|local).../
4323 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4324 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4325 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4327 static void
4328 Perl_functions (FILE *inf)
4330 char *package = savestr ("main"); /* current package name */
4331 register char *cp;
4333 LOOP_ON_INPUT_LINES (inf, lb, cp)
4335 cp = skip_spaces (cp);
4337 if (LOOKING_AT (cp, "package"))
4339 free (package);
4340 get_tag (cp, &package);
4342 else if (LOOKING_AT (cp, "sub"))
4344 char *pos;
4345 char *sp = cp;
4347 while (!notinname (*cp))
4348 cp++;
4349 if (cp == sp)
4350 continue; /* nothing found */
4351 if ((pos = etags_strchr (sp, ':')) != NULL
4352 && pos < cp && pos[1] == ':')
4353 /* The name is already qualified. */
4354 make_tag (sp, cp - sp, TRUE,
4355 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4356 else
4357 /* Qualify it. */
4359 char savechar, *name;
4361 savechar = *cp;
4362 *cp = '\0';
4363 name = concat (package, "::", sp);
4364 *cp = savechar;
4365 make_tag (name, strlen(name), TRUE,
4366 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4367 free (name);
4370 else if (globals) /* only if we are tagging global vars */
4372 /* Skip a qualifier, if any. */
4373 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4374 /* After "my" or "local", but before any following paren or space. */
4375 char *varstart = cp;
4377 if (qual /* should this be removed? If yes, how? */
4378 && (*cp == '$' || *cp == '@' || *cp == '%'))
4380 varstart += 1;
4382 cp++;
4383 while (ISALNUM (*cp) || *cp == '_');
4385 else if (qual)
4387 /* Should be examining a variable list at this point;
4388 could insist on seeing an open parenthesis. */
4389 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4390 cp++;
4392 else
4393 continue;
4395 make_tag (varstart, cp - varstart, FALSE,
4396 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4399 free (package);
4404 * Python support
4405 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4406 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4407 * More ideas by seb bacon <seb@jamkit.com> (2002)
4409 static void
4410 Python_functions (FILE *inf)
4412 register char *cp;
4414 LOOP_ON_INPUT_LINES (inf, lb, cp)
4416 cp = skip_spaces (cp);
4417 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4419 char *name = cp;
4420 while (!notinname (*cp) && *cp != ':')
4421 cp++;
4422 make_tag (name, cp - name, TRUE,
4423 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4430 * PHP support
4431 * Look for:
4432 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4433 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4434 * - /^[ \t]*define\(\"[^\"]+/
4435 * Only with --members:
4436 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4437 * Idea by Diez B. Roggisch (2001)
4439 static void
4440 PHP_functions (FILE *inf)
4442 register char *cp, *name;
4443 bool search_identifier = FALSE;
4445 LOOP_ON_INPUT_LINES (inf, lb, cp)
4447 cp = skip_spaces (cp);
4448 name = cp;
4449 if (search_identifier
4450 && *cp != '\0')
4452 while (!notinname (*cp))
4453 cp++;
4454 make_tag (name, cp - name, TRUE,
4455 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4456 search_identifier = FALSE;
4458 else if (LOOKING_AT (cp, "function"))
4460 if(*cp == '&')
4461 cp = skip_spaces (cp+1);
4462 if(*cp != '\0')
4464 name = cp;
4465 while (!notinname (*cp))
4466 cp++;
4467 make_tag (name, cp - name, TRUE,
4468 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4470 else
4471 search_identifier = TRUE;
4473 else if (LOOKING_AT (cp, "class"))
4475 if (*cp != '\0')
4477 name = cp;
4478 while (*cp != '\0' && !iswhite (*cp))
4479 cp++;
4480 make_tag (name, cp - name, FALSE,
4481 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4483 else
4484 search_identifier = TRUE;
4486 else if (strneq (cp, "define", 6)
4487 && (cp = skip_spaces (cp+6))
4488 && *cp++ == '('
4489 && (*cp == '"' || *cp == '\''))
4491 char quote = *cp++;
4492 name = cp;
4493 while (*cp != quote && *cp != '\0')
4494 cp++;
4495 make_tag (name, cp - name, FALSE,
4496 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4498 else if (members
4499 && LOOKING_AT (cp, "var")
4500 && *cp == '$')
4502 name = cp;
4503 while (!notinname(*cp))
4504 cp++;
4505 make_tag (name, cp - name, FALSE,
4506 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4513 * Cobol tag functions
4514 * We could look for anything that could be a paragraph name.
4515 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4516 * Idea by Corny de Souza (1993)
4518 static void
4519 Cobol_paragraphs (FILE *inf)
4521 register char *bp, *ep;
4523 LOOP_ON_INPUT_LINES (inf, lb, bp)
4525 if (lb.len < 9)
4526 continue;
4527 bp += 8;
4529 /* If eoln, compiler option or comment ignore whole line. */
4530 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4531 continue;
4533 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4534 continue;
4535 if (*ep++ == '.')
4536 make_tag (bp, ep - bp, TRUE,
4537 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4543 * Makefile support
4544 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4546 static void
4547 Makefile_targets (FILE *inf)
4549 register char *bp;
4551 LOOP_ON_INPUT_LINES (inf, lb, bp)
4553 if (*bp == '\t' || *bp == '#')
4554 continue;
4555 while (*bp != '\0' && *bp != '=' && *bp != ':')
4556 bp++;
4557 if (*bp == ':' || (globals && *bp == '='))
4559 /* We should detect if there is more than one tag, but we do not.
4560 We just skip initial and final spaces. */
4561 char * namestart = skip_spaces (lb.buffer);
4562 while (--bp > namestart)
4563 if (!notinname (*bp))
4564 break;
4565 make_tag (namestart, bp - namestart + 1, TRUE,
4566 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4573 * Pascal parsing
4574 * Original code by Mosur K. Mohan (1989)
4576 * Locates tags for procedures & functions. Doesn't do any type- or
4577 * var-definitions. It does look for the keyword "extern" or
4578 * "forward" immediately following the procedure statement; if found,
4579 * the tag is skipped.
4581 static void
4582 Pascal_functions (FILE *inf)
4584 linebuffer tline; /* mostly copied from C_entries */
4585 long save_lcno;
4586 int save_lineno, namelen, taglen;
4587 char c, *name;
4589 bool /* each of these flags is TRUE if: */
4590 incomment, /* point is inside a comment */
4591 inquote, /* point is inside '..' string */
4592 get_tagname, /* point is after PROCEDURE/FUNCTION
4593 keyword, so next item = potential tag */
4594 found_tag, /* point is after a potential tag */
4595 inparms, /* point is within parameter-list */
4596 verify_tag; /* point has passed the parm-list, so the
4597 next token will determine whether this
4598 is a FORWARD/EXTERN to be ignored, or
4599 whether it is a real tag */
4601 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4602 name = NULL; /* keep compiler quiet */
4603 dbp = lb.buffer;
4604 *dbp = '\0';
4605 linebuffer_init (&tline);
4607 incomment = inquote = FALSE;
4608 found_tag = FALSE; /* have a proc name; check if extern */
4609 get_tagname = FALSE; /* found "procedure" keyword */
4610 inparms = FALSE; /* found '(' after "proc" */
4611 verify_tag = FALSE; /* check if "extern" is ahead */
4614 while (!feof (inf)) /* long main loop to get next char */
4616 c = *dbp++;
4617 if (c == '\0') /* if end of line */
4619 readline (&lb, inf);
4620 dbp = lb.buffer;
4621 if (*dbp == '\0')
4622 continue;
4623 if (!((found_tag && verify_tag)
4624 || get_tagname))
4625 c = *dbp++; /* only if don't need *dbp pointing
4626 to the beginning of the name of
4627 the procedure or function */
4629 if (incomment)
4631 if (c == '}') /* within { } comments */
4632 incomment = FALSE;
4633 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4635 dbp++;
4636 incomment = FALSE;
4638 continue;
4640 else if (inquote)
4642 if (c == '\'')
4643 inquote = FALSE;
4644 continue;
4646 else
4647 switch (c)
4649 case '\'':
4650 inquote = TRUE; /* found first quote */
4651 continue;
4652 case '{': /* found open { comment */
4653 incomment = TRUE;
4654 continue;
4655 case '(':
4656 if (*dbp == '*') /* found open (* comment */
4658 incomment = TRUE;
4659 dbp++;
4661 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4662 inparms = TRUE;
4663 continue;
4664 case ')': /* end of parms list */
4665 if (inparms)
4666 inparms = FALSE;
4667 continue;
4668 case ';':
4669 if (found_tag && !inparms) /* end of proc or fn stmt */
4671 verify_tag = TRUE;
4672 break;
4674 continue;
4676 if (found_tag && verify_tag && (*dbp != ' '))
4678 /* Check if this is an "extern" declaration. */
4679 if (*dbp == '\0')
4680 continue;
4681 if (lowcase (*dbp == 'e'))
4683 if (nocase_tail ("extern")) /* superfluous, really! */
4685 found_tag = FALSE;
4686 verify_tag = FALSE;
4689 else if (lowcase (*dbp) == 'f')
4691 if (nocase_tail ("forward")) /* check for forward reference */
4693 found_tag = FALSE;
4694 verify_tag = FALSE;
4697 if (found_tag && verify_tag) /* not external proc, so make tag */
4699 found_tag = FALSE;
4700 verify_tag = FALSE;
4701 make_tag (name, namelen, TRUE,
4702 tline.buffer, taglen, save_lineno, save_lcno);
4703 continue;
4706 if (get_tagname) /* grab name of proc or fn */
4708 char *cp;
4710 if (*dbp == '\0')
4711 continue;
4713 /* Find block name. */
4714 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4715 continue;
4717 /* Save all values for later tagging. */
4718 linebuffer_setlen (&tline, lb.len);
4719 strcpy (tline.buffer, lb.buffer);
4720 save_lineno = lineno;
4721 save_lcno = linecharno;
4722 name = tline.buffer + (dbp - lb.buffer);
4723 namelen = cp - dbp;
4724 taglen = cp - lb.buffer + 1;
4726 dbp = cp; /* set dbp to e-o-token */
4727 get_tagname = FALSE;
4728 found_tag = TRUE;
4729 continue;
4731 /* And proceed to check for "extern". */
4733 else if (!incomment && !inquote && !found_tag)
4735 /* Check for proc/fn keywords. */
4736 switch (lowcase (c))
4738 case 'p':
4739 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4740 get_tagname = TRUE;
4741 continue;
4742 case 'f':
4743 if (nocase_tail ("unction"))
4744 get_tagname = TRUE;
4745 continue;
4748 } /* while not eof */
4750 free (tline.buffer);
4755 * Lisp tag functions
4756 * look for (def or (DEF, quote or QUOTE
4759 static void L_getit (void);
4761 static void
4762 L_getit (void)
4764 if (*dbp == '\'') /* Skip prefix quote */
4765 dbp++;
4766 else if (*dbp == '(')
4768 dbp++;
4769 /* Try to skip "(quote " */
4770 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4771 /* Ok, then skip "(" before name in (defstruct (foo)) */
4772 dbp = skip_spaces (dbp);
4774 get_tag (dbp, NULL);
4777 static void
4778 Lisp_functions (FILE *inf)
4780 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4782 if (dbp[0] != '(')
4783 continue;
4785 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4787 dbp = skip_non_spaces (dbp);
4788 dbp = skip_spaces (dbp);
4789 L_getit ();
4791 else
4793 /* Check for (foo::defmumble name-defined ... */
4795 dbp++;
4796 while (!notinname (*dbp) && *dbp != ':');
4797 if (*dbp == ':')
4800 dbp++;
4801 while (*dbp == ':');
4803 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4805 dbp = skip_non_spaces (dbp);
4806 dbp = skip_spaces (dbp);
4807 L_getit ();
4816 * Lua script language parsing
4817 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4819 * "function" and "local function" are tags if they start at column 1.
4821 static void
4822 Lua_functions (FILE *inf)
4824 register char *bp;
4826 LOOP_ON_INPUT_LINES (inf, lb, bp)
4828 if (bp[0] != 'f' && bp[0] != 'l')
4829 continue;
4831 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4833 if (LOOKING_AT (bp, "function"))
4834 get_tag (bp, NULL);
4840 * Postscript tags
4841 * Just look for lines where the first character is '/'
4842 * Also look at "defineps" for PSWrap
4843 * Ideas by:
4844 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4845 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4847 static void
4848 PS_functions (FILE *inf)
4850 register char *bp, *ep;
4852 LOOP_ON_INPUT_LINES (inf, lb, bp)
4854 if (bp[0] == '/')
4856 for (ep = bp+1;
4857 *ep != '\0' && *ep != ' ' && *ep != '{';
4858 ep++)
4859 continue;
4860 make_tag (bp, ep - bp, TRUE,
4861 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4863 else if (LOOKING_AT (bp, "defineps"))
4864 get_tag (bp, NULL);
4870 * Forth tags
4871 * Ignore anything after \ followed by space or in ( )
4872 * Look for words defined by :
4873 * Look for constant, code, create, defer, value, and variable
4874 * OBP extensions: Look for buffer:, field,
4875 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4877 static void
4878 Forth_words (FILE *inf)
4880 register char *bp;
4882 LOOP_ON_INPUT_LINES (inf, lb, bp)
4883 while ((bp = skip_spaces (bp))[0] != '\0')
4884 if (bp[0] == '\\' && iswhite(bp[1]))
4885 break; /* read next line */
4886 else if (bp[0] == '(' && iswhite(bp[1]))
4887 do /* skip to ) or eol */
4888 bp++;
4889 while (*bp != ')' && *bp != '\0');
4890 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4891 || LOOKING_AT_NOCASE (bp, "constant")
4892 || LOOKING_AT_NOCASE (bp, "code")
4893 || LOOKING_AT_NOCASE (bp, "create")
4894 || LOOKING_AT_NOCASE (bp, "defer")
4895 || LOOKING_AT_NOCASE (bp, "value")
4896 || LOOKING_AT_NOCASE (bp, "variable")
4897 || LOOKING_AT_NOCASE (bp, "buffer:")
4898 || LOOKING_AT_NOCASE (bp, "field"))
4899 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4900 else
4901 bp = skip_non_spaces (bp);
4906 * Scheme tag functions
4907 * look for (def... xyzzy
4908 * (def... (xyzzy
4909 * (def ... ((...(xyzzy ....
4910 * (set! xyzzy
4911 * Original code by Ken Haase (1985?)
4913 static void
4914 Scheme_functions (FILE *inf)
4916 register char *bp;
4918 LOOP_ON_INPUT_LINES (inf, lb, bp)
4920 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4922 bp = skip_non_spaces (bp+4);
4923 /* Skip over open parens and white space. Don't continue past
4924 '\0'. */
4925 while (*bp && notinname (*bp))
4926 bp++;
4927 get_tag (bp, NULL);
4929 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4930 get_tag (bp, NULL);
4935 /* Find tags in TeX and LaTeX input files. */
4937 /* TEX_toktab is a table of TeX control sequences that define tags.
4938 * Each entry records one such control sequence.
4940 * Original code from who knows whom.
4941 * Ideas by:
4942 * Stefan Monnier (2002)
4945 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4947 /* Default set of control sequences to put into TEX_toktab.
4948 The value of environment var TEXTAGS is prepended to this. */
4949 static const char *TEX_defenv = "\
4950 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4951 :part:appendix:entry:index:def\
4952 :newcommand:renewcommand:newenvironment:renewenvironment";
4954 static void TEX_mode (FILE *);
4955 static void TEX_decode_env (const char *, const char *);
4957 static char TEX_esc = '\\';
4958 static char TEX_opgrp = '{';
4959 static char TEX_clgrp = '}';
4962 * TeX/LaTeX scanning loop.
4964 static void
4965 TeX_commands (FILE *inf)
4967 char *cp;
4968 linebuffer *key;
4970 /* Select either \ or ! as escape character. */
4971 TEX_mode (inf);
4973 /* Initialize token table once from environment. */
4974 if (TEX_toktab == NULL)
4975 TEX_decode_env ("TEXTAGS", TEX_defenv);
4977 LOOP_ON_INPUT_LINES (inf, lb, cp)
4979 /* Look at each TEX keyword in line. */
4980 for (;;)
4982 /* Look for a TEX escape. */
4983 while (*cp++ != TEX_esc)
4984 if (cp[-1] == '\0' || cp[-1] == '%')
4985 goto tex_next_line;
4987 for (key = TEX_toktab; key->buffer != NULL; key++)
4988 if (strneq (cp, key->buffer, key->len))
4990 register char *p;
4991 int namelen, linelen;
4992 bool opgrp = FALSE;
4994 cp = skip_spaces (cp + key->len);
4995 if (*cp == TEX_opgrp)
4997 opgrp = TRUE;
4998 cp++;
5000 for (p = cp;
5001 (!iswhite (*p) && *p != '#' &&
5002 *p != TEX_opgrp && *p != TEX_clgrp);
5003 p++)
5004 continue;
5005 namelen = p - cp;
5006 linelen = lb.len;
5007 if (!opgrp || *p == TEX_clgrp)
5009 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5010 p++;
5011 linelen = p - lb.buffer + 1;
5013 make_tag (cp, namelen, TRUE,
5014 lb.buffer, linelen, lineno, linecharno);
5015 goto tex_next_line; /* We only tag a line once */
5018 tex_next_line:
5023 #define TEX_LESC '\\'
5024 #define TEX_SESC '!'
5026 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5027 chars accordingly. */
5028 static void
5029 TEX_mode (FILE *inf)
5031 int c;
5033 while ((c = getc (inf)) != EOF)
5035 /* Skip to next line if we hit the TeX comment char. */
5036 if (c == '%')
5037 while (c != '\n' && c != EOF)
5038 c = getc (inf);
5039 else if (c == TEX_LESC || c == TEX_SESC )
5040 break;
5043 if (c == TEX_LESC)
5045 TEX_esc = TEX_LESC;
5046 TEX_opgrp = '{';
5047 TEX_clgrp = '}';
5049 else
5051 TEX_esc = TEX_SESC;
5052 TEX_opgrp = '<';
5053 TEX_clgrp = '>';
5055 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5056 No attempt is made to correct the situation. */
5057 rewind (inf);
5060 /* Read environment and prepend it to the default string.
5061 Build token table. */
5062 static void
5063 TEX_decode_env (const char *evarname, const char *defenv)
5065 register const char *env, *p;
5066 int i, len;
5068 /* Append default string to environment. */
5069 env = getenv (evarname);
5070 if (!env)
5071 env = defenv;
5072 else
5073 env = concat (env, defenv, "");
5075 /* Allocate a token table */
5076 for (len = 1, p = env; p;)
5077 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5078 len++;
5079 TEX_toktab = xnew (len, linebuffer);
5081 /* Unpack environment string into token table. Be careful about */
5082 /* zero-length strings (leading ':', "::" and trailing ':') */
5083 for (i = 0; *env != '\0';)
5085 p = etags_strchr (env, ':');
5086 if (!p) /* End of environment string. */
5087 p = env + strlen (env);
5088 if (p - env > 0)
5089 { /* Only non-zero strings. */
5090 TEX_toktab[i].buffer = savenstr (env, p - env);
5091 TEX_toktab[i].len = p - env;
5092 i++;
5094 if (*p)
5095 env = p + 1;
5096 else
5098 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5099 TEX_toktab[i].len = 0;
5100 break;
5106 /* Texinfo support. Dave Love, Mar. 2000. */
5107 static void
5108 Texinfo_nodes (FILE *inf)
5110 char *cp, *start;
5111 LOOP_ON_INPUT_LINES (inf, lb, cp)
5112 if (LOOKING_AT (cp, "@node"))
5114 start = cp;
5115 while (*cp != '\0' && *cp != ',')
5116 cp++;
5117 make_tag (start, cp - start, TRUE,
5118 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5124 * HTML support.
5125 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5126 * Contents of <a name=xxx> are tags with name xxx.
5128 * Francesco Potortì, 2002.
5130 static void
5131 HTML_labels (FILE *inf)
5133 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5134 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5135 bool intag = FALSE; /* inside an html tag, looking for ID= */
5136 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5137 char *end;
5140 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5142 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5143 for (;;) /* loop on the same line */
5145 if (skiptag) /* skip HTML tag */
5147 while (*dbp != '\0' && *dbp != '>')
5148 dbp++;
5149 if (*dbp == '>')
5151 dbp += 1;
5152 skiptag = FALSE;
5153 continue; /* look on the same line */
5155 break; /* go to next line */
5158 else if (intag) /* look for "name=" or "id=" */
5160 while (*dbp != '\0' && *dbp != '>'
5161 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5162 dbp++;
5163 if (*dbp == '\0')
5164 break; /* go to next line */
5165 if (*dbp == '>')
5167 dbp += 1;
5168 intag = FALSE;
5169 continue; /* look on the same line */
5171 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5172 || LOOKING_AT_NOCASE (dbp, "id="))
5174 bool quoted = (dbp[0] == '"');
5176 if (quoted)
5177 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5178 continue;
5179 else
5180 for (end = dbp; *end != '\0' && intoken (*end); end++)
5181 continue;
5182 linebuffer_setlen (&token_name, end - dbp);
5183 strncpy (token_name.buffer, dbp, end - dbp);
5184 token_name.buffer[end - dbp] = '\0';
5186 dbp = end;
5187 intag = FALSE; /* we found what we looked for */
5188 skiptag = TRUE; /* skip to the end of the tag */
5189 getnext = TRUE; /* then grab the text */
5190 continue; /* look on the same line */
5192 dbp += 1;
5195 else if (getnext) /* grab next tokens and tag them */
5197 dbp = skip_spaces (dbp);
5198 if (*dbp == '\0')
5199 break; /* go to next line */
5200 if (*dbp == '<')
5202 intag = TRUE;
5203 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5204 continue; /* look on the same line */
5207 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5208 continue;
5209 make_tag (token_name.buffer, token_name.len, TRUE,
5210 dbp, end - dbp, lineno, linecharno);
5211 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5212 getnext = FALSE;
5213 break; /* go to next line */
5216 else /* look for an interesting HTML tag */
5218 while (*dbp != '\0' && *dbp != '<')
5219 dbp++;
5220 if (*dbp == '\0')
5221 break; /* go to next line */
5222 intag = TRUE;
5223 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5225 inanchor = TRUE;
5226 continue; /* look on the same line */
5228 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5229 || LOOKING_AT_NOCASE (dbp, "<h1>")
5230 || LOOKING_AT_NOCASE (dbp, "<h2>")
5231 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5233 intag = FALSE;
5234 getnext = TRUE;
5235 continue; /* look on the same line */
5237 dbp += 1;
5244 * Prolog support
5246 * Assumes that the predicate or rule starts at column 0.
5247 * Only the first clause of a predicate or rule is added.
5248 * Original code by Sunichirou Sugou (1989)
5249 * Rewritten by Anders Lindgren (1996)
5251 static size_t prolog_pr (char *, char *);
5252 static void prolog_skip_comment (linebuffer *, FILE *);
5253 static size_t prolog_atom (char *, size_t);
5255 static void
5256 Prolog_functions (FILE *inf)
5258 char *cp, *last;
5259 size_t len;
5260 size_t allocated;
5262 allocated = 0;
5263 len = 0;
5264 last = NULL;
5266 LOOP_ON_INPUT_LINES (inf, lb, cp)
5268 if (cp[0] == '\0') /* Empty line */
5269 continue;
5270 else if (iswhite (cp[0])) /* Not a predicate */
5271 continue;
5272 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5273 prolog_skip_comment (&lb, inf);
5274 else if ((len = prolog_pr (cp, last)) > 0)
5276 /* Predicate or rule. Store the function name so that we
5277 only generate a tag for the first clause. */
5278 if (last == NULL)
5279 last = xnew(len + 1, char);
5280 else if (len + 1 > allocated)
5281 xrnew (last, len + 1, char);
5282 allocated = len + 1;
5283 strncpy (last, cp, len);
5284 last[len] = '\0';
5287 free (last);
5291 static void
5292 prolog_skip_comment (linebuffer *plb, FILE *inf)
5294 char *cp;
5298 for (cp = plb->buffer; *cp != '\0'; cp++)
5299 if (cp[0] == '*' && cp[1] == '/')
5300 return;
5301 readline (plb, inf);
5303 while (!feof(inf));
5307 * A predicate or rule definition is added if it matches:
5308 * <beginning of line><Prolog Atom><whitespace>(
5309 * or <beginning of line><Prolog Atom><whitespace>:-
5311 * It is added to the tags database if it doesn't match the
5312 * name of the previous clause header.
5314 * Return the size of the name of the predicate or rule, or 0 if no
5315 * header was found.
5317 static size_t
5318 prolog_pr (char *s, char *last)
5320 /* Name of last clause. */
5322 size_t pos;
5323 size_t len;
5325 pos = prolog_atom (s, 0);
5326 if (! pos)
5327 return 0;
5329 len = pos;
5330 pos = skip_spaces (s + pos) - s;
5332 if ((s[pos] == '.'
5333 || (s[pos] == '(' && (pos += 1))
5334 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5335 && (last == NULL /* save only the first clause */
5336 || len != strlen (last)
5337 || !strneq (s, last, len)))
5339 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5340 return len;
5342 else
5343 return 0;
5347 * Consume a Prolog atom.
5348 * Return the number of bytes consumed, or 0 if there was an error.
5350 * A prolog atom, in this context, could be one of:
5351 * - An alphanumeric sequence, starting with a lower case letter.
5352 * - A quoted arbitrary string. Single quotes can escape themselves.
5353 * Backslash quotes everything.
5355 static size_t
5356 prolog_atom (char *s, size_t pos)
5358 size_t origpos;
5360 origpos = pos;
5362 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5364 /* The atom is unquoted. */
5365 pos++;
5366 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5368 pos++;
5370 return pos - origpos;
5372 else if (s[pos] == '\'')
5374 pos++;
5376 for (;;)
5378 if (s[pos] == '\'')
5380 pos++;
5381 if (s[pos] != '\'')
5382 break;
5383 pos++; /* A double quote */
5385 else if (s[pos] == '\0')
5386 /* Multiline quoted atoms are ignored. */
5387 return 0;
5388 else if (s[pos] == '\\')
5390 if (s[pos+1] == '\0')
5391 return 0;
5392 pos += 2;
5394 else
5395 pos++;
5397 return pos - origpos;
5399 else
5400 return 0;
5405 * Support for Erlang
5407 * Generates tags for functions, defines, and records.
5408 * Assumes that Erlang functions start at column 0.
5409 * Original code by Anders Lindgren (1996)
5411 static int erlang_func (char *, char *);
5412 static void erlang_attribute (char *);
5413 static int erlang_atom (char *);
5415 static void
5416 Erlang_functions (FILE *inf)
5418 char *cp, *last;
5419 int len;
5420 int allocated;
5422 allocated = 0;
5423 len = 0;
5424 last = NULL;
5426 LOOP_ON_INPUT_LINES (inf, lb, cp)
5428 if (cp[0] == '\0') /* Empty line */
5429 continue;
5430 else if (iswhite (cp[0])) /* Not function nor attribute */
5431 continue;
5432 else if (cp[0] == '%') /* comment */
5433 continue;
5434 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5435 continue;
5436 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5438 erlang_attribute (cp);
5439 if (last != NULL)
5441 free (last);
5442 last = NULL;
5445 else if ((len = erlang_func (cp, last)) > 0)
5448 * Function. Store the function name so that we only
5449 * generates a tag for the first clause.
5451 if (last == NULL)
5452 last = xnew (len + 1, char);
5453 else if (len + 1 > allocated)
5454 xrnew (last, len + 1, char);
5455 allocated = len + 1;
5456 strncpy (last, cp, len);
5457 last[len] = '\0';
5460 free (last);
5465 * A function definition is added if it matches:
5466 * <beginning of line><Erlang Atom><whitespace>(
5468 * It is added to the tags database if it doesn't match the
5469 * name of the previous clause header.
5471 * Return the size of the name of the function, or 0 if no function
5472 * was found.
5474 static int
5475 erlang_func (char *s, char *last)
5477 /* Name of last clause. */
5479 int pos;
5480 int len;
5482 pos = erlang_atom (s);
5483 if (pos < 1)
5484 return 0;
5486 len = pos;
5487 pos = skip_spaces (s + pos) - s;
5489 /* Save only the first clause. */
5490 if (s[pos++] == '('
5491 && (last == NULL
5492 || len != (int)strlen (last)
5493 || !strneq (s, last, len)))
5495 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5496 return len;
5499 return 0;
5504 * Handle attributes. Currently, tags are generated for defines
5505 * and records.
5507 * They are on the form:
5508 * -define(foo, bar).
5509 * -define(Foo(M, N), M+N).
5510 * -record(graph, {vtab = notable, cyclic = true}).
5512 static void
5513 erlang_attribute (char *s)
5515 char *cp = s;
5517 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5518 && *cp++ == '(')
5520 int len = erlang_atom (skip_spaces (cp));
5521 if (len > 0)
5522 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5524 return;
5529 * Consume an Erlang atom (or variable).
5530 * Return the number of bytes consumed, or -1 if there was an error.
5532 static int
5533 erlang_atom (char *s)
5535 int pos = 0;
5537 if (ISALPHA (s[pos]) || s[pos] == '_')
5539 /* The atom is unquoted. */
5541 pos++;
5542 while (ISALNUM (s[pos]) || s[pos] == '_');
5544 else if (s[pos] == '\'')
5546 for (pos++; s[pos] != '\''; pos++)
5547 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5548 || (s[pos] == '\\' && s[++pos] == '\0'))
5549 return 0;
5550 pos++;
5553 return pos;
5557 static char *scan_separators (char *);
5558 static void add_regex (char *, language *);
5559 static char *substitute (char *, char *, struct re_registers *);
5562 * Take a string like "/blah/" and turn it into "blah", verifying
5563 * that the first and last characters are the same, and handling
5564 * quoted separator characters. Actually, stops on the occurrence of
5565 * an unquoted separator. Also process \t, \n, etc. and turn into
5566 * appropriate characters. Works in place. Null terminates name string.
5567 * Returns pointer to terminating separator, or NULL for
5568 * unterminated regexps.
5570 static char *
5571 scan_separators (char *name)
5573 char sep = name[0];
5574 char *copyto = name;
5575 bool quoted = FALSE;
5577 for (++name; *name != '\0'; ++name)
5579 if (quoted)
5581 switch (*name)
5583 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5584 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5585 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5586 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5587 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5588 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5589 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5590 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5591 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5592 default:
5593 if (*name == sep)
5594 *copyto++ = sep;
5595 else
5597 /* Something else is quoted, so preserve the quote. */
5598 *copyto++ = '\\';
5599 *copyto++ = *name;
5601 break;
5603 quoted = FALSE;
5605 else if (*name == '\\')
5606 quoted = TRUE;
5607 else if (*name == sep)
5608 break;
5609 else
5610 *copyto++ = *name;
5612 if (*name != sep)
5613 name = NULL; /* signal unterminated regexp */
5615 /* Terminate copied string. */
5616 *copyto = '\0';
5617 return name;
5620 /* Look at the argument of --regex or --no-regex and do the right
5621 thing. Same for each line of a regexp file. */
5622 static void
5623 analyse_regex (char *regex_arg)
5625 if (regex_arg == NULL)
5627 free_regexps (); /* --no-regex: remove existing regexps */
5628 return;
5631 /* A real --regexp option or a line in a regexp file. */
5632 switch (regex_arg[0])
5634 /* Comments in regexp file or null arg to --regex. */
5635 case '\0':
5636 case ' ':
5637 case '\t':
5638 break;
5640 /* Read a regex file. This is recursive and may result in a
5641 loop, which will stop when the file descriptors are exhausted. */
5642 case '@':
5644 FILE *regexfp;
5645 linebuffer regexbuf;
5646 char *regexfile = regex_arg + 1;
5648 /* regexfile is a file containing regexps, one per line. */
5649 regexfp = fopen (regexfile, "r");
5650 if (regexfp == NULL)
5652 pfatal (regexfile);
5653 return;
5655 linebuffer_init (&regexbuf);
5656 while (readline_internal (&regexbuf, regexfp) > 0)
5657 analyse_regex (regexbuf.buffer);
5658 free (regexbuf.buffer);
5659 fclose (regexfp);
5661 break;
5663 /* Regexp to be used for a specific language only. */
5664 case '{':
5666 language *lang;
5667 char *lang_name = regex_arg + 1;
5668 char *cp;
5670 for (cp = lang_name; *cp != '}'; cp++)
5671 if (*cp == '\0')
5673 error ("unterminated language name in regex: %s", regex_arg);
5674 return;
5676 *cp++ = '\0';
5677 lang = get_language_from_langname (lang_name);
5678 if (lang == NULL)
5679 return;
5680 add_regex (cp, lang);
5682 break;
5684 /* Regexp to be used for any language. */
5685 default:
5686 add_regex (regex_arg, NULL);
5687 break;
5691 /* Separate the regexp pattern, compile it,
5692 and care for optional name and modifiers. */
5693 static void
5694 add_regex (char *regexp_pattern, language *lang)
5696 static struct re_pattern_buffer zeropattern;
5697 char sep, *pat, *name, *modifiers;
5698 char empty[] = "";
5699 const char *err;
5700 struct re_pattern_buffer *patbuf;
5701 regexp *rp;
5702 bool
5703 force_explicit_name = TRUE, /* do not use implicit tag names */
5704 ignore_case = FALSE, /* case is significant */
5705 multi_line = FALSE, /* matches are done one line at a time */
5706 single_line = FALSE; /* dot does not match newline */
5709 if (strlen(regexp_pattern) < 3)
5711 error ("null regexp", (char *)NULL);
5712 return;
5714 sep = regexp_pattern[0];
5715 name = scan_separators (regexp_pattern);
5716 if (name == NULL)
5718 error ("%s: unterminated regexp", regexp_pattern);
5719 return;
5721 if (name[1] == sep)
5723 error ("null name for regexp \"%s\"", regexp_pattern);
5724 return;
5726 modifiers = scan_separators (name);
5727 if (modifiers == NULL) /* no terminating separator --> no name */
5729 modifiers = name;
5730 name = empty;
5732 else
5733 modifiers += 1; /* skip separator */
5735 /* Parse regex modifiers. */
5736 for (; modifiers[0] != '\0'; modifiers++)
5737 switch (modifiers[0])
5739 case 'N':
5740 if (modifiers == name)
5741 error ("forcing explicit tag name but no name, ignoring", NULL);
5742 force_explicit_name = TRUE;
5743 break;
5744 case 'i':
5745 ignore_case = TRUE;
5746 break;
5747 case 's':
5748 single_line = TRUE;
5749 /* FALLTHRU */
5750 case 'm':
5751 multi_line = TRUE;
5752 need_filebuf = TRUE;
5753 break;
5754 default:
5756 char wrongmod [2];
5757 wrongmod[0] = modifiers[0];
5758 wrongmod[1] = '\0';
5759 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5761 break;
5764 patbuf = xnew (1, struct re_pattern_buffer);
5765 *patbuf = zeropattern;
5766 if (ignore_case)
5768 static char lc_trans[CHARS];
5769 int i;
5770 for (i = 0; i < CHARS; i++)
5771 lc_trans[i] = lowcase (i);
5772 patbuf->translate = lc_trans; /* translation table to fold case */
5775 if (multi_line)
5776 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5777 else
5778 pat = regexp_pattern;
5780 if (single_line)
5781 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5782 else
5783 re_set_syntax (RE_SYNTAX_EMACS);
5785 err = re_compile_pattern (pat, strlen (pat), patbuf);
5786 if (multi_line)
5787 free (pat);
5788 if (err != NULL)
5790 error ("%s while compiling pattern", err);
5791 return;
5794 rp = p_head;
5795 p_head = xnew (1, regexp);
5796 p_head->pattern = savestr (regexp_pattern);
5797 p_head->p_next = rp;
5798 p_head->lang = lang;
5799 p_head->pat = patbuf;
5800 p_head->name = savestr (name);
5801 p_head->error_signaled = FALSE;
5802 p_head->force_explicit_name = force_explicit_name;
5803 p_head->ignore_case = ignore_case;
5804 p_head->multi_line = multi_line;
5808 * Do the substitutions indicated by the regular expression and
5809 * arguments.
5811 static char *
5812 substitute (char *in, char *out, struct re_registers *regs)
5814 char *result, *t;
5815 int size, dig, diglen;
5817 result = NULL;
5818 size = strlen (out);
5820 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5821 if (out[size - 1] == '\\')
5822 fatal ("pattern error in \"%s\"", out);
5823 for (t = etags_strchr (out, '\\');
5824 t != NULL;
5825 t = etags_strchr (t + 2, '\\'))
5826 if (ISDIGIT (t[1]))
5828 dig = t[1] - '0';
5829 diglen = regs->end[dig] - regs->start[dig];
5830 size += diglen - 2;
5832 else
5833 size -= 1;
5835 /* Allocate space and do the substitutions. */
5836 assert (size >= 0);
5837 result = xnew (size + 1, char);
5839 for (t = result; *out != '\0'; out++)
5840 if (*out == '\\' && ISDIGIT (*++out))
5842 dig = *out - '0';
5843 diglen = regs->end[dig] - regs->start[dig];
5844 strncpy (t, in + regs->start[dig], diglen);
5845 t += diglen;
5847 else
5848 *t++ = *out;
5849 *t = '\0';
5851 assert (t <= result + size);
5852 assert (t - result == (int)strlen (result));
5854 return result;
5857 /* Deallocate all regexps. */
5858 static void
5859 free_regexps (void)
5861 regexp *rp;
5862 while (p_head != NULL)
5864 rp = p_head->p_next;
5865 free (p_head->pattern);
5866 free (p_head->name);
5867 free (p_head);
5868 p_head = rp;
5870 return;
5874 * Reads the whole file as a single string from `filebuf' and looks for
5875 * multi-line regular expressions, creating tags on matches.
5876 * readline already dealt with normal regexps.
5878 * Idea by Ben Wing <ben@666.com> (2002).
5880 static void
5881 regex_tag_multiline (void)
5883 char *buffer = filebuf.buffer;
5884 regexp *rp;
5885 char *name;
5887 for (rp = p_head; rp != NULL; rp = rp->p_next)
5889 int match = 0;
5891 if (!rp->multi_line)
5892 continue; /* skip normal regexps */
5894 /* Generic initialisations before parsing file from memory. */
5895 lineno = 1; /* reset global line number */
5896 charno = 0; /* reset global char number */
5897 linecharno = 0; /* reset global char number of line start */
5899 /* Only use generic regexps or those for the current language. */
5900 if (rp->lang != NULL && rp->lang != curfdp->lang)
5901 continue;
5903 while (match >= 0 && match < filebuf.len)
5905 match = re_search (rp->pat, buffer, filebuf.len, charno,
5906 filebuf.len - match, &rp->regs);
5907 switch (match)
5909 case -2:
5910 /* Some error. */
5911 if (!rp->error_signaled)
5913 error ("regexp stack overflow while matching \"%s\"",
5914 rp->pattern);
5915 rp->error_signaled = TRUE;
5917 break;
5918 case -1:
5919 /* No match. */
5920 break;
5921 default:
5922 if (match == rp->regs.end[0])
5924 if (!rp->error_signaled)
5926 error ("regexp matches the empty string: \"%s\"",
5927 rp->pattern);
5928 rp->error_signaled = TRUE;
5930 match = -3; /* exit from while loop */
5931 break;
5934 /* Match occurred. Construct a tag. */
5935 while (charno < rp->regs.end[0])
5936 if (buffer[charno++] == '\n')
5937 lineno++, linecharno = charno;
5938 name = rp->name;
5939 if (name[0] == '\0')
5940 name = NULL;
5941 else /* make a named tag */
5942 name = substitute (buffer, rp->name, &rp->regs);
5943 if (rp->force_explicit_name)
5944 /* Force explicit tag name, if a name is there. */
5945 pfnote (name, TRUE, buffer + linecharno,
5946 charno - linecharno + 1, lineno, linecharno);
5947 else
5948 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5949 charno - linecharno + 1, lineno, linecharno);
5950 break;
5957 static bool
5958 nocase_tail (const char *cp)
5960 register int len = 0;
5962 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5963 cp++, len++;
5964 if (*cp == '\0' && !intoken (dbp[len]))
5966 dbp += len;
5967 return TRUE;
5969 return FALSE;
5972 static void
5973 get_tag (register char *bp, char **namepp)
5975 register char *cp = bp;
5977 if (*bp != '\0')
5979 /* Go till you get to white space or a syntactic break */
5980 for (cp = bp + 1; !notinname (*cp); cp++)
5981 continue;
5982 make_tag (bp, cp - bp, TRUE,
5983 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5986 if (namepp != NULL)
5987 *namepp = savenstr (bp, cp - bp);
5991 * Read a line of text from `stream' into `lbp', excluding the
5992 * newline or CR-NL, if any. Return the number of characters read from
5993 * `stream', which is the length of the line including the newline.
5995 * On DOS or Windows we do not count the CR character, if any before the
5996 * NL, in the returned length; this mirrors the behavior of Emacs on those
5997 * platforms (for text files, it translates CR-NL to NL as it reads in the
5998 * file).
6000 * If multi-line regular expressions are requested, each line read is
6001 * appended to `filebuf'.
6003 static long
6004 readline_internal (linebuffer *lbp, register FILE *stream)
6006 char *buffer = lbp->buffer;
6007 register char *p = lbp->buffer;
6008 register char *pend;
6009 int chars_deleted;
6011 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6013 for (;;)
6015 register int c = getc (stream);
6016 if (p == pend)
6018 /* We're at the end of linebuffer: expand it. */
6019 lbp->size *= 2;
6020 xrnew (buffer, lbp->size, char);
6021 p += buffer - lbp->buffer;
6022 pend = buffer + lbp->size;
6023 lbp->buffer = buffer;
6025 if (c == EOF)
6027 *p = '\0';
6028 chars_deleted = 0;
6029 break;
6031 if (c == '\n')
6033 if (p > buffer && p[-1] == '\r')
6035 p -= 1;
6036 #ifdef DOS_NT
6037 /* Assume CRLF->LF translation will be performed by Emacs
6038 when loading this file, so CRs won't appear in the buffer.
6039 It would be cleaner to compensate within Emacs;
6040 however, Emacs does not know how many CRs were deleted
6041 before any given point in the file. */
6042 chars_deleted = 1;
6043 #else
6044 chars_deleted = 2;
6045 #endif
6047 else
6049 chars_deleted = 1;
6051 *p = '\0';
6052 break;
6054 *p++ = c;
6056 lbp->len = p - buffer;
6058 if (need_filebuf /* we need filebuf for multi-line regexps */
6059 && chars_deleted > 0) /* not at EOF */
6061 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6063 /* Expand filebuf. */
6064 filebuf.size *= 2;
6065 xrnew (filebuf.buffer, filebuf.size, char);
6067 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6068 filebuf.len += lbp->len;
6069 filebuf.buffer[filebuf.len++] = '\n';
6070 filebuf.buffer[filebuf.len] = '\0';
6073 return lbp->len + chars_deleted;
6077 * Like readline_internal, above, but in addition try to match the
6078 * input line against relevant regular expressions and manage #line
6079 * directives.
6081 static void
6082 readline (linebuffer *lbp, FILE *stream)
6084 long result;
6086 linecharno = charno; /* update global char number of line start */
6087 result = readline_internal (lbp, stream); /* read line */
6088 lineno += 1; /* increment global line number */
6089 charno += result; /* increment global char number */
6091 /* Honour #line directives. */
6092 if (!no_line_directive)
6094 static bool discard_until_line_directive;
6096 /* Check whether this is a #line directive. */
6097 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6099 unsigned int lno;
6100 int start = 0;
6102 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6103 && start > 0) /* double quote character found */
6105 char *endp = lbp->buffer + start;
6107 while ((endp = etags_strchr (endp, '"')) != NULL
6108 && endp[-1] == '\\')
6109 endp++;
6110 if (endp != NULL)
6111 /* Ok, this is a real #line directive. Let's deal with it. */
6113 char *taggedabsname; /* absolute name of original file */
6114 char *taggedfname; /* name of original file as given */
6115 char *name; /* temp var */
6117 discard_until_line_directive = FALSE; /* found it */
6118 name = lbp->buffer + start;
6119 *endp = '\0';
6120 canonicalize_filename (name);
6121 taggedabsname = absolute_filename (name, tagfiledir);
6122 if (filename_is_absolute (name)
6123 || filename_is_absolute (curfdp->infname))
6124 taggedfname = savestr (taggedabsname);
6125 else
6126 taggedfname = relative_filename (taggedabsname,tagfiledir);
6128 if (streq (curfdp->taggedfname, taggedfname))
6129 /* The #line directive is only a line number change. We
6130 deal with this afterwards. */
6131 free (taggedfname);
6132 else
6133 /* The tags following this #line directive should be
6134 attributed to taggedfname. In order to do this, set
6135 curfdp accordingly. */
6137 fdesc *fdp; /* file description pointer */
6139 /* Go look for a file description already set up for the
6140 file indicated in the #line directive. If there is
6141 one, use it from now until the next #line
6142 directive. */
6143 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6144 if (streq (fdp->infname, curfdp->infname)
6145 && streq (fdp->taggedfname, taggedfname))
6146 /* If we remove the second test above (after the &&)
6147 then all entries pertaining to the same file are
6148 coalesced in the tags file. If we use it, then
6149 entries pertaining to the same file but generated
6150 from different files (via #line directives) will
6151 go into separate sections in the tags file. These
6152 alternatives look equivalent. The first one
6153 destroys some apparently useless information. */
6155 curfdp = fdp;
6156 free (taggedfname);
6157 break;
6159 /* Else, if we already tagged the real file, skip all
6160 input lines until the next #line directive. */
6161 if (fdp == NULL) /* not found */
6162 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6163 if (streq (fdp->infabsname, taggedabsname))
6165 discard_until_line_directive = TRUE;
6166 free (taggedfname);
6167 break;
6169 /* Else create a new file description and use that from
6170 now on, until the next #line directive. */
6171 if (fdp == NULL) /* not found */
6173 fdp = fdhead;
6174 fdhead = xnew (1, fdesc);
6175 *fdhead = *curfdp; /* copy curr. file description */
6176 fdhead->next = fdp;
6177 fdhead->infname = savestr (curfdp->infname);
6178 fdhead->infabsname = savestr (curfdp->infabsname);
6179 fdhead->infabsdir = savestr (curfdp->infabsdir);
6180 fdhead->taggedfname = taggedfname;
6181 fdhead->usecharno = FALSE;
6182 fdhead->prop = NULL;
6183 fdhead->written = FALSE;
6184 curfdp = fdhead;
6187 free (taggedabsname);
6188 lineno = lno - 1;
6189 readline (lbp, stream);
6190 return;
6191 } /* if a real #line directive */
6192 } /* if #line is followed by a number */
6193 } /* if line begins with "#line " */
6195 /* If we are here, no #line directive was found. */
6196 if (discard_until_line_directive)
6198 if (result > 0)
6200 /* Do a tail recursion on ourselves, thus discarding the contents
6201 of the line buffer. */
6202 readline (lbp, stream);
6203 return;
6205 /* End of file. */
6206 discard_until_line_directive = FALSE;
6207 return;
6209 } /* if #line directives should be considered */
6212 int match;
6213 regexp *rp;
6214 char *name;
6216 /* Match against relevant regexps. */
6217 if (lbp->len > 0)
6218 for (rp = p_head; rp != NULL; rp = rp->p_next)
6220 /* Only use generic regexps or those for the current language.
6221 Also do not use multiline regexps, which is the job of
6222 regex_tag_multiline. */
6223 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6224 || rp->multi_line)
6225 continue;
6227 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6228 switch (match)
6230 case -2:
6231 /* Some error. */
6232 if (!rp->error_signaled)
6234 error ("regexp stack overflow while matching \"%s\"",
6235 rp->pattern);
6236 rp->error_signaled = TRUE;
6238 break;
6239 case -1:
6240 /* No match. */
6241 break;
6242 case 0:
6243 /* Empty string matched. */
6244 if (!rp->error_signaled)
6246 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6247 rp->error_signaled = TRUE;
6249 break;
6250 default:
6251 /* Match occurred. Construct a tag. */
6252 name = rp->name;
6253 if (name[0] == '\0')
6254 name = NULL;
6255 else /* make a named tag */
6256 name = substitute (lbp->buffer, rp->name, &rp->regs);
6257 if (rp->force_explicit_name)
6258 /* Force explicit tag name, if a name is there. */
6259 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6260 else
6261 make_tag (name, strlen (name), TRUE,
6262 lbp->buffer, match, lineno, linecharno);
6263 break;
6271 * Return a pointer to a space of size strlen(cp)+1 allocated
6272 * with xnew where the string CP has been copied.
6274 static char *
6275 savestr (const char *cp)
6277 return savenstr (cp, strlen (cp));
6281 * Return a pointer to a space of size LEN+1 allocated with xnew where
6282 * the string CP has been copied for at most the first LEN characters.
6284 static char *
6285 savenstr (const char *cp, int len)
6287 register char *dp;
6289 dp = xnew (len + 1, char);
6290 strncpy (dp, cp, len);
6291 dp[len] = '\0';
6292 return dp;
6296 * Return the ptr in sp at which the character c last
6297 * appears; NULL if not found
6299 * Identical to POSIX strrchr, included for portability.
6301 static char *
6302 etags_strrchr (register const char *sp, register int c)
6304 register const char *r;
6306 r = NULL;
6309 if (*sp == c)
6310 r = sp;
6311 } while (*sp++);
6312 return (char *)r;
6316 * Return the ptr in sp at which the character c first
6317 * appears; NULL if not found
6319 * Identical to POSIX strchr, included for portability.
6321 static char *
6322 etags_strchr (register const char *sp, register int c)
6326 if (*sp == c)
6327 return (char *)sp;
6328 } while (*sp++);
6329 return NULL;
6333 * Compare two strings, ignoring case for alphabetic characters.
6335 * Same as BSD's strcasecmp, included for portability.
6337 static int
6338 etags_strcasecmp (register const char *s1, register const char *s2)
6340 while (*s1 != '\0'
6341 && (ISALPHA (*s1) && ISALPHA (*s2)
6342 ? lowcase (*s1) == lowcase (*s2)
6343 : *s1 == *s2))
6344 s1++, s2++;
6346 return (ISALPHA (*s1) && ISALPHA (*s2)
6347 ? lowcase (*s1) - lowcase (*s2)
6348 : *s1 - *s2);
6352 * Compare two strings, ignoring case for alphabetic characters.
6353 * Stop after a given number of characters
6355 * Same as BSD's strncasecmp, included for portability.
6357 static int
6358 etags_strncasecmp (register const char *s1, register const char *s2, register int n)
6360 while (*s1 != '\0' && n-- > 0
6361 && (ISALPHA (*s1) && ISALPHA (*s2)
6362 ? lowcase (*s1) == lowcase (*s2)
6363 : *s1 == *s2))
6364 s1++, s2++;
6366 if (n < 0)
6367 return 0;
6368 else
6369 return (ISALPHA (*s1) && ISALPHA (*s2)
6370 ? lowcase (*s1) - lowcase (*s2)
6371 : *s1 - *s2);
6374 /* Skip spaces (end of string is not space), return new pointer. */
6375 static char *
6376 skip_spaces (char *cp)
6378 while (iswhite (*cp))
6379 cp++;
6380 return cp;
6383 /* Skip non spaces, except end of string, return new pointer. */
6384 static char *
6385 skip_non_spaces (char *cp)
6387 while (*cp != '\0' && !iswhite (*cp))
6388 cp++;
6389 return cp;
6392 /* Print error message and exit. */
6393 void
6394 fatal (const char *s1, const char *s2)
6396 error (s1, s2);
6397 exit (EXIT_FAILURE);
6400 static void
6401 pfatal (const char *s1)
6403 perror (s1);
6404 exit (EXIT_FAILURE);
6407 static void
6408 suggest_asking_for_help (void)
6410 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6411 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6412 exit (EXIT_FAILURE);
6415 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6416 static void
6417 error (const char *s1, const char *s2)
6419 fprintf (stderr, "%s: ", progname);
6420 fprintf (stderr, s1, s2);
6421 fprintf (stderr, "\n");
6424 /* Return a newly-allocated string whose contents
6425 concatenate those of s1, s2, s3. */
6426 static char *
6427 concat (const char *s1, const char *s2, const char *s3)
6429 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6430 char *result = xnew (len1 + len2 + len3 + 1, char);
6432 strcpy (result, s1);
6433 strcpy (result + len1, s2);
6434 strcpy (result + len1 + len2, s3);
6435 result[len1 + len2 + len3] = '\0';
6437 return result;
6441 /* Does the same work as the system V getcwd, but does not need to
6442 guess the buffer size in advance. */
6443 static char *
6444 etags_getcwd (void)
6446 #ifdef HAVE_GETCWD
6447 int bufsize = 200;
6448 char *path = xnew (bufsize, char);
6450 while (getcwd (path, bufsize) == NULL)
6452 if (errno != ERANGE)
6453 pfatal ("getcwd");
6454 bufsize *= 2;
6455 free (path);
6456 path = xnew (bufsize, char);
6459 canonicalize_filename (path);
6460 return path;
6462 #else /* not HAVE_GETCWD */
6463 #if MSDOS
6465 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6467 getwd (path);
6469 for (p = path; *p != '\0'; p++)
6470 if (*p == '\\')
6471 *p = '/';
6472 else
6473 *p = lowcase (*p);
6475 return strdup (path);
6476 #else /* not MSDOS */
6477 linebuffer path;
6478 FILE *pipe;
6480 linebuffer_init (&path);
6481 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6482 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6483 pfatal ("pwd");
6484 pclose (pipe);
6486 return path.buffer;
6487 #endif /* not MSDOS */
6488 #endif /* not HAVE_GETCWD */
6491 /* Return a newly allocated string containing the file name of FILE
6492 relative to the absolute directory DIR (which should end with a slash). */
6493 static char *
6494 relative_filename (char *file, char *dir)
6496 char *fp, *dp, *afn, *res;
6497 int i;
6499 /* Find the common root of file and dir (with a trailing slash). */
6500 afn = absolute_filename (file, cwd);
6501 fp = afn;
6502 dp = dir;
6503 while (*fp++ == *dp++)
6504 continue;
6505 fp--, dp--; /* back to the first differing char */
6506 #ifdef DOS_NT
6507 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6508 return afn;
6509 #endif
6510 do /* look at the equal chars until '/' */
6511 fp--, dp--;
6512 while (*fp != '/');
6514 /* Build a sequence of "../" strings for the resulting relative file name. */
6515 i = 0;
6516 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6517 i += 1;
6518 res = xnew (3*i + strlen (fp + 1) + 1, char);
6519 res[0] = '\0';
6520 while (i-- > 0)
6521 strcat (res, "../");
6523 /* Add the file name relative to the common root of file and dir. */
6524 strcat (res, fp + 1);
6525 free (afn);
6527 return res;
6530 /* Return a newly allocated string containing the absolute file name
6531 of FILE given DIR (which should end with a slash). */
6532 static char *
6533 absolute_filename (char *file, char *dir)
6535 char *slashp, *cp, *res;
6537 if (filename_is_absolute (file))
6538 res = savestr (file);
6539 #ifdef DOS_NT
6540 /* We don't support non-absolute file names with a drive
6541 letter, like `d:NAME' (it's too much hassle). */
6542 else if (file[1] == ':')
6543 fatal ("%s: relative file names with drive letters not supported", file);
6544 #endif
6545 else
6546 res = concat (dir, file, "");
6548 /* Delete the "/dirname/.." and "/." substrings. */
6549 slashp = etags_strchr (res, '/');
6550 while (slashp != NULL && slashp[0] != '\0')
6552 if (slashp[1] == '.')
6554 if (slashp[2] == '.'
6555 && (slashp[3] == '/' || slashp[3] == '\0'))
6557 cp = slashp;
6559 cp--;
6560 while (cp >= res && !filename_is_absolute (cp));
6561 if (cp < res)
6562 cp = slashp; /* the absolute name begins with "/.." */
6563 #ifdef DOS_NT
6564 /* Under MSDOS and NT we get `d:/NAME' as absolute
6565 file name, so the luser could say `d:/../NAME'.
6566 We silently treat this as `d:/NAME'. */
6567 else if (cp[0] != '/')
6568 cp = slashp;
6569 #endif
6570 #ifdef HAVE_MEMMOVE
6571 memmove (cp, slashp + 3, strlen (slashp + 2));
6572 #else
6573 /* Overlapping copy isn't really okay */
6574 strcpy (cp, slashp + 3);
6575 #endif
6576 slashp = cp;
6577 continue;
6579 else if (slashp[2] == '/' || slashp[2] == '\0')
6581 #ifdef HAVE_MEMMOVE
6582 memmove (slashp, slashp + 2, strlen (slashp + 1));
6583 #else
6584 strcpy (slashp, slashp + 2);
6585 #endif
6586 continue;
6590 slashp = etags_strchr (slashp + 1, '/');
6593 if (res[0] == '\0') /* just a safety net: should never happen */
6595 free (res);
6596 return savestr ("/");
6598 else
6599 return res;
6602 /* Return a newly allocated string containing the absolute
6603 file name of dir where FILE resides given DIR (which should
6604 end with a slash). */
6605 static char *
6606 absolute_dirname (char *file, char *dir)
6608 char *slashp, *res;
6609 char save;
6611 slashp = etags_strrchr (file, '/');
6612 if (slashp == NULL)
6613 return savestr (dir);
6614 save = slashp[1];
6615 slashp[1] = '\0';
6616 res = absolute_filename (file, dir);
6617 slashp[1] = save;
6619 return res;
6622 /* Whether the argument string is an absolute file name. The argument
6623 string must have been canonicalized with canonicalize_filename. */
6624 static bool
6625 filename_is_absolute (char *fn)
6627 return (fn[0] == '/'
6628 #ifdef DOS_NT
6629 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6630 #endif
6634 /* Downcase DOS drive letter and collapse separators into single slashes.
6635 Works in place. */
6636 static void
6637 canonicalize_filename (register char *fn)
6639 register char* cp;
6640 char sep = '/';
6642 #ifdef DOS_NT
6643 /* Canonicalize drive letter case. */
6644 # define ISUPPER(c) isupper (CHAR(c))
6645 if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6646 fn[0] = lowcase (fn[0]);
6648 sep = '\\';
6649 #endif
6651 /* Collapse multiple separators into a single slash. */
6652 for (cp = fn; *cp != '\0'; cp++, fn++)
6653 if (*cp == sep)
6655 *fn = '/';
6656 while (cp[1] == sep)
6657 cp++;
6659 else
6660 *fn = *cp;
6661 *fn = '\0';
6665 /* Initialize a linebuffer for use. */
6666 static void
6667 linebuffer_init (linebuffer *lbp)
6669 lbp->size = (DEBUG) ? 3 : 200;
6670 lbp->buffer = xnew (lbp->size, char);
6671 lbp->buffer[0] = '\0';
6672 lbp->len = 0;
6675 /* Set the minimum size of a string contained in a linebuffer. */
6676 static void
6677 linebuffer_setlen (linebuffer *lbp, int toksize)
6679 while (lbp->size <= toksize)
6681 lbp->size *= 2;
6682 xrnew (lbp->buffer, lbp->size, char);
6684 lbp->len = toksize;
6687 /* Like malloc but get fatal error if memory is exhausted. */
6688 static PTR
6689 xmalloc (unsigned int size)
6691 PTR result = (PTR) malloc (size);
6692 if (result == NULL)
6693 fatal ("virtual memory exhausted", (char *)NULL);
6694 return result;
6697 static PTR
6698 xrealloc (char *ptr, unsigned int size)
6700 PTR result = (PTR) realloc (ptr, size);
6701 if (result == NULL)
6702 fatal ("virtual memory exhausted", (char *)NULL);
6703 return result;
6707 * Local Variables:
6708 * indent-tabs-mode: t
6709 * tab-width: 8
6710 * fill-column: 79
6711 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6712 * c-file-style: "gnu"
6713 * End:
6716 /* etags.c ends here */