1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2013 Free Software
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
72 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version
[] = "@(#) pot revision number is 17.38.1.4";
91 # define NDEBUG /* disable assert */
97 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
100 /* WIN32_NATIVE is for XEmacs.
101 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
106 #endif /* WIN32_NATIVE */
112 # include <sys/param.h>
122 # define MAXPATHLEN _MAX_PATH
126 #endif /* WINDOWSNT */
135 #include <sys/types.h>
136 #include <sys/stat.h>
137 #include <c-strcase.h>
141 # undef assert /* some systems have a buggy assert.h */
142 # define assert(x) ((void) 0)
148 /* Define CTAGS to make the program "ctags" compatible with the usual one.
149 Leave it undefined to make the program "etags", which makes emacs-style
150 tag tables and tags typedefs, #defines and struct/union/enum by default. */
158 #define streq(s,t) (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
159 #define strcaseeq(s,t) (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
160 #define strneq(s,t,n) (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
161 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
163 #define CHARS 256 /* 2^sizeof(char) */
164 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
165 #define iswhite(c) (_wht[CHAR (c)]) /* c is white (see white) */
166 #define notinname(c) (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
167 #define begtoken(c) (_btk[CHAR (c)]) /* c can start token (see begtk) */
168 #define intoken(c) (_itk[CHAR (c)]) /* c can be in token (see midtk) */
169 #define endtoken(c) (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
171 #define ISALNUM(c) isalnum (CHAR (c))
172 #define ISALPHA(c) isalpha (CHAR (c))
173 #define ISDIGIT(c) isdigit (CHAR (c))
174 #define ISLOWER(c) islower (CHAR (c))
176 #define lowcase(c) tolower (CHAR (c))
180 * xnew, xrnew -- allocate, reallocate storage
182 * SYNOPSIS: Type *xnew (int n, Type);
183 * void xrnew (OldPointer, int n, Type);
186 # include "chkmalloc.h"
187 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
188 (n) * sizeof (Type)))
189 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
190 (char *) (op), (n) * sizeof (Type)))
192 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
193 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
194 (char *) (op), (n) * sizeof (Type)))
199 typedef void Lang_function (FILE *);
203 const char *suffix
; /* file name suffix for this compressor */
204 const char *command
; /* takes one arg and decompresses to stdout */
209 const char *name
; /* language name */
210 const char *help
; /* detailed help for the language */
211 Lang_function
*function
; /* parse function */
212 const char **suffixes
; /* name suffixes of this language's files */
213 const char **filenames
; /* names of this language's files */
214 const char **interpreters
; /* interpreters for this language */
215 bool metasource
; /* source used to generate other sources */
220 struct fdesc
*next
; /* for the linked list */
221 char *infname
; /* uncompressed input file name */
222 char *infabsname
; /* absolute uncompressed input file name */
223 char *infabsdir
; /* absolute dir of input file */
224 char *taggedfname
; /* file name to write in tagfile */
225 language
*lang
; /* language of file */
226 char *prop
; /* file properties to write in tagfile */
227 bool usecharno
; /* etags tags shall contain char number */
228 bool written
; /* entry written in the tags file */
231 typedef struct node_st
232 { /* sorting structure */
233 struct node_st
*left
, *right
; /* left and right sons */
234 fdesc
*fdp
; /* description of file to whom tag belongs */
235 char *name
; /* tag name */
236 char *regex
; /* search regexp */
237 bool valid
; /* write this tag on the tag file */
238 bool is_func
; /* function tag: use regexp in CTAGS mode */
239 bool been_warned
; /* warning already given for duplicated tag */
240 int lno
; /* line number tag is on */
241 long cno
; /* character number line starts on */
245 * A `linebuffer' is a structure which holds a line of text.
246 * `readline_internal' reads a line from a stream into a linebuffer
247 * and works regardless of the length of the line.
248 * SIZE is the size of BUFFER, LEN is the length of the string in
249 * BUFFER after readline reads it.
258 /* Used to support mixing of --lang and file names. */
262 at_language
, /* a language specification */
263 at_regexp
, /* a regular expression */
264 at_filename
, /* a file name */
265 at_stdin
, /* read from stdin here */
266 at_end
/* stop parsing the list */
267 } arg_type
; /* argument type */
268 language
*lang
; /* language associated with the argument */
269 char *what
; /* the argument itself */
272 /* Structure defining a regular expression. */
273 typedef struct regexp
275 struct regexp
*p_next
; /* pointer to next in list */
276 language
*lang
; /* if set, use only for this language */
277 char *pattern
; /* the regexp pattern */
278 char *name
; /* tag name */
279 struct re_pattern_buffer
*pat
; /* the compiled pattern */
280 struct re_registers regs
; /* re registers */
281 bool error_signaled
; /* already signaled for this regexp */
282 bool force_explicit_name
; /* do not allow implicit tag name */
283 bool ignore_case
; /* ignore case when matching */
284 bool multi_line
; /* do a multi-line match on the whole file */
288 /* Many compilers barf on this:
289 Lang_function Ada_funcs;
290 so let's write it this way */
291 static void Ada_funcs (FILE *);
292 static void Asm_labels (FILE *);
293 static void C_entries (int c_ext
, FILE *);
294 static void default_C_entries (FILE *);
295 static void plain_C_entries (FILE *);
296 static void Cjava_entries (FILE *);
297 static void Cobol_paragraphs (FILE *);
298 static void Cplusplus_entries (FILE *);
299 static void Cstar_entries (FILE *);
300 static void Erlang_functions (FILE *);
301 static void Forth_words (FILE *);
302 static void Fortran_functions (FILE *);
303 static void HTML_labels (FILE *);
304 static void Lisp_functions (FILE *);
305 static void Lua_functions (FILE *);
306 static void Makefile_targets (FILE *);
307 static void Pascal_functions (FILE *);
308 static void Perl_functions (FILE *);
309 static void PHP_functions (FILE *);
310 static void PS_functions (FILE *);
311 static void Prolog_functions (FILE *);
312 static void Python_functions (FILE *);
313 static void Scheme_functions (FILE *);
314 static void TeX_commands (FILE *);
315 static void Texinfo_nodes (FILE *);
316 static void Yacc_entries (FILE *);
317 static void just_read_file (FILE *);
319 static language
*get_language_from_langname (const char *);
320 static void readline (linebuffer
*, FILE *);
321 static long readline_internal (linebuffer
*, FILE *);
322 static bool nocase_tail (const char *);
323 static void get_tag (char *, char **);
325 static void analyse_regex (char *);
326 static void free_regexps (void);
327 static void regex_tag_multiline (void);
328 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
329 static _Noreturn
void suggest_asking_for_help (void);
330 _Noreturn
void fatal (const char *, const char *);
331 static _Noreturn
void pfatal (const char *);
332 static void add_node (node
*, node
**);
334 static void init (void);
335 static void process_file_name (char *, language
*);
336 static void process_file (FILE *, char *, language
*);
337 static void find_entries (FILE *);
338 static void free_tree (node
*);
339 static void free_fdesc (fdesc
*);
340 static void pfnote (char *, bool, char *, int, int, long);
341 static void invalidate_nodes (fdesc
*, node
**);
342 static void put_entries (node
*);
344 static char *concat (const char *, const char *, const char *);
345 static char *skip_spaces (char *);
346 static char *skip_non_spaces (char *);
347 static char *skip_name (char *);
348 static char *savenstr (const char *, int);
349 static char *savestr (const char *);
350 static char *etags_strchr (const char *, int);
351 static char *etags_strrchr (const char *, int);
352 static char *etags_getcwd (void);
353 static char *relative_filename (char *, char *);
354 static char *absolute_filename (char *, char *);
355 static char *absolute_dirname (char *, char *);
356 static bool filename_is_absolute (char *f
);
357 static void canonicalize_filename (char *);
358 static void linebuffer_init (linebuffer
*);
359 static void linebuffer_setlen (linebuffer
*, int);
360 static void *xmalloc (size_t);
361 static void *xrealloc (char *, size_t);
364 static char searchar
= '/'; /* use /.../ searches */
366 static char *tagfile
; /* output file */
367 static char *progname
; /* name this program was invoked with */
368 static char *cwd
; /* current working directory */
369 static char *tagfiledir
; /* directory of tagfile */
370 static FILE *tagf
; /* ioptr for tags file */
371 static ptrdiff_t whatlen_max
; /* maximum length of any 'what' member */
373 static fdesc
*fdhead
; /* head of file description list */
374 static fdesc
*curfdp
; /* current file description */
375 static int lineno
; /* line number of current line */
376 static long charno
; /* current character number */
377 static long linecharno
; /* charno of start of current line */
378 static char *dbp
; /* pointer to start of current tag */
380 static const int invalidcharno
= -1;
382 static node
*nodehead
; /* the head of the binary tree of tags */
383 static node
*last_node
; /* the last node created */
385 static linebuffer lb
; /* the current line */
386 static linebuffer filebuf
; /* a buffer containing the whole file */
387 static linebuffer token_name
; /* a buffer containing a tag name */
389 /* boolean "functions" (see init) */
390 static bool _wht
[CHARS
], _nin
[CHARS
], _itk
[CHARS
], _btk
[CHARS
], _etk
[CHARS
];
393 *white
= " \f\t\n\r\v",
395 *nonam
= " \f\t\n\r()=,;", /* look at make_tag before modifying! */
396 /* token ending chars */
397 *endtk
= " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
398 /* token starting chars */
399 *begtk
= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
400 /* valid in-token chars */
401 *midtk
= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
403 static bool append_to_tagfile
; /* -a: append to tags */
404 /* The next five default to TRUE in C and derived languages. */
405 static bool typedefs
; /* -t: create tags for C and Ada typedefs */
406 static bool typedefs_or_cplusplus
; /* -T: create tags for C typedefs, level */
407 /* 0 struct/enum/union decls, and C++ */
408 /* member functions. */
409 static bool constantypedefs
; /* -d: create tags for C #define, enum */
410 /* constants and variables. */
411 /* -D: opposite of -d. Default under ctags. */
412 static bool globals
; /* create tags for global variables */
413 static bool members
; /* create tags for C member variables */
414 static bool declarations
; /* --declarations: tag them and extern in C&Co*/
415 static bool no_line_directive
; /* ignore #line directives (undocumented) */
416 static bool no_duplicates
; /* no duplicate tags for ctags (undocumented) */
417 static bool update
; /* -u: update tags */
418 static bool vgrind_style
; /* -v: create vgrind style index output */
419 static bool no_warnings
; /* -w: suppress warnings (undocumented) */
420 static bool cxref_style
; /* -x: create cxref style output */
421 static bool cplusplus
; /* .[hc] means C++, not C (undocumented) */
422 static bool ignoreindent
; /* -I: ignore indentation in C */
423 static bool packages_only
; /* --packages-only: in Ada, only tag packages*/
425 /* STDIN is defined in LynxOS system headers */
430 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
431 static bool parsing_stdin
; /* --parse-stdin used */
433 static regexp
*p_head
; /* list of all regexps */
434 static bool need_filebuf
; /* some regexes are multi-line */
436 static struct option longopts
[] =
438 { "append", no_argument
, NULL
, 'a' },
439 { "packages-only", no_argument
, &packages_only
, TRUE
},
440 { "c++", no_argument
, NULL
, 'C' },
441 { "declarations", no_argument
, &declarations
, TRUE
},
442 { "no-line-directive", no_argument
, &no_line_directive
, TRUE
},
443 { "no-duplicates", no_argument
, &no_duplicates
, TRUE
},
444 { "help", no_argument
, NULL
, 'h' },
445 { "help", no_argument
, NULL
, 'H' },
446 { "ignore-indentation", no_argument
, NULL
, 'I' },
447 { "language", required_argument
, NULL
, 'l' },
448 { "members", no_argument
, &members
, TRUE
},
449 { "no-members", no_argument
, &members
, FALSE
},
450 { "output", required_argument
, NULL
, 'o' },
451 { "regex", required_argument
, NULL
, 'r' },
452 { "no-regex", no_argument
, NULL
, 'R' },
453 { "ignore-case-regex", required_argument
, NULL
, 'c' },
454 { "parse-stdin", required_argument
, NULL
, STDIN
},
455 { "version", no_argument
, NULL
, 'V' },
457 #if CTAGS /* Ctags options */
458 { "backward-search", no_argument
, NULL
, 'B' },
459 { "cxref", no_argument
, NULL
, 'x' },
460 { "defines", no_argument
, NULL
, 'd' },
461 { "globals", no_argument
, &globals
, TRUE
},
462 { "typedefs", no_argument
, NULL
, 't' },
463 { "typedefs-and-c++", no_argument
, NULL
, 'T' },
464 { "update", no_argument
, NULL
, 'u' },
465 { "vgrind", no_argument
, NULL
, 'v' },
466 { "no-warn", no_argument
, NULL
, 'w' },
468 #else /* Etags options */
469 { "no-defines", no_argument
, NULL
, 'D' },
470 { "no-globals", no_argument
, &globals
, FALSE
},
471 { "include", required_argument
, NULL
, 'i' },
476 static compressor compressors
[] =
478 { "z", "gzip -d -c"},
479 { "Z", "gzip -d -c"},
480 { "gz", "gzip -d -c"},
481 { "GZ", "gzip -d -c"},
482 { "bz2", "bzip2 -d -c" },
483 { "xz", "xz -d -c" },
492 static const char *Ada_suffixes
[] =
493 { "ads", "adb", "ada", NULL
};
494 static const char Ada_help
[] =
495 "In Ada code, functions, procedures, packages, tasks and types are\n\
496 tags. Use the `--packages-only' option to create tags for\n\
498 Ada tag names have suffixes indicating the type of entity:\n\
499 Entity type: Qualifier:\n\
500 ------------ ----------\n\
507 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
508 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
509 will just search for any tag `bidule'.";
512 static const char *Asm_suffixes
[] =
513 { "a", /* Unix assembler */
514 "asm", /* Microcontroller assembly */
515 "def", /* BSO/Tasking definition includes */
516 "inc", /* Microcontroller include files */
517 "ins", /* Microcontroller include files */
518 "s", "sa", /* Unix assembler */
519 "S", /* cpp-processed Unix assembler */
520 "src", /* BSO/Tasking C compiler output */
523 static const char Asm_help
[] =
524 "In assembler code, labels appearing at the beginning of a line,\n\
525 followed by a colon, are tags.";
528 /* Note that .c and .h can be considered C++, if the --c++ flag was
529 given, or if the `class' or `template' keywords are met inside the file.
530 That is why default_C_entries is called for these. */
531 static const char *default_C_suffixes
[] =
533 #if CTAGS /* C help for Ctags */
534 static const char default_C_help
[] =
535 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
536 Use -T to tag definitions of `struct', `union' and `enum'.\n\
537 Use -d to tag `#define' macro definitions and `enum' constants.\n\
538 Use --globals to tag global variables.\n\
539 You can tag function declarations and external variables by\n\
540 using `--declarations', and struct members by using `--members'.";
541 #else /* C help for Etags */
542 static const char default_C_help
[] =
543 "In C code, any C function or typedef is a tag, and so are\n\
544 definitions of `struct', `union' and `enum'. `#define' macro\n\
545 definitions and `enum' constants are tags unless you specify\n\
546 `--no-defines'. Global variables are tags unless you specify\n\
547 `--no-globals' and so are struct members unless you specify\n\
548 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
549 `--no-members' can make the tags table file much smaller.\n\
550 You can tag function declarations and external variables by\n\
551 using `--declarations'.";
552 #endif /* C help for Ctags and Etags */
554 static const char *Cplusplus_suffixes
[] =
555 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
556 "M", /* Objective C++ */
557 "pdb", /* PostScript with C syntax */
559 static const char Cplusplus_help
[] =
560 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
561 --help --lang=c --lang=c++ for full help.)\n\
562 In addition to C tags, member functions are also recognized. Member\n\
563 variables are recognized unless you use the `--no-members' option.\n\
564 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
565 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
568 static const char *Cjava_suffixes
[] =
570 static char Cjava_help
[] =
571 "In Java code, all the tags constructs of C and C++ code are\n\
572 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
575 static const char *Cobol_suffixes
[] =
576 { "COB", "cob", NULL
};
577 static char Cobol_help
[] =
578 "In Cobol code, tags are paragraph names; that is, any word\n\
579 starting in column 8 and followed by a period.";
581 static const char *Cstar_suffixes
[] =
582 { "cs", "hs", NULL
};
584 static const char *Erlang_suffixes
[] =
585 { "erl", "hrl", NULL
};
586 static const char Erlang_help
[] =
587 "In Erlang code, the tags are the functions, records and macros\n\
588 defined in the file.";
590 const char *Forth_suffixes
[] =
591 { "fth", "tok", NULL
};
592 static const char Forth_help
[] =
593 "In Forth code, tags are words defined by `:',\n\
594 constant, code, create, defer, value, variable, buffer:, field.";
596 static const char *Fortran_suffixes
[] =
597 { "F", "f", "f90", "for", NULL
};
598 static const char Fortran_help
[] =
599 "In Fortran code, functions, subroutines and block data are tags.";
601 static const char *HTML_suffixes
[] =
602 { "htm", "html", "shtml", NULL
};
603 static const char HTML_help
[] =
604 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
605 `h3' headers. Also, tags are `name=' in anchors and all\n\
606 occurrences of `id='.";
608 static const char *Lisp_suffixes
[] =
609 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL
};
610 static const char Lisp_help
[] =
611 "In Lisp code, any function defined with `defun', any variable\n\
612 defined with `defvar' or `defconst', and in general the first\n\
613 argument of any expression that starts with `(def' in column zero\n\
615 The `--declarations' option tags \"(defvar foo)\" constructs too.";
617 static const char *Lua_suffixes
[] =
618 { "lua", "LUA", NULL
};
619 static const char Lua_help
[] =
620 "In Lua scripts, all functions are tags.";
622 static const char *Makefile_filenames
[] =
623 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL
};
624 static const char Makefile_help
[] =
625 "In makefiles, targets are tags; additionally, variables are tags\n\
626 unless you specify `--no-globals'.";
628 static const char *Objc_suffixes
[] =
629 { "lm", /* Objective lex file */
630 "m", /* Objective C file */
632 static const char Objc_help
[] =
633 "In Objective C code, tags include Objective C definitions for classes,\n\
634 class categories, methods and protocols. Tags for variables and\n\
635 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
636 (Use --help --lang=c --lang=objc --lang=java for full help.)";
638 static const char *Pascal_suffixes
[] =
639 { "p", "pas", NULL
};
640 static const char Pascal_help
[] =
641 "In Pascal code, the tags are the functions and procedures defined\n\
643 /* " // this is for working around an Emacs highlighting bug... */
645 static const char *Perl_suffixes
[] =
646 { "pl", "pm", NULL
};
647 static const char *Perl_interpreters
[] =
648 { "perl", "@PERL@", NULL
};
649 static const char Perl_help
[] =
650 "In Perl code, the tags are the packages, subroutines and variables\n\
651 defined by the `package', `sub', `my' and `local' keywords. Use\n\
652 `--globals' if you want to tag global variables. Tags for\n\
653 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
654 defined in the default package is `main::SUB'.";
656 static const char *PHP_suffixes
[] =
657 { "php", "php3", "php4", NULL
};
658 static const char PHP_help
[] =
659 "In PHP code, tags are functions, classes and defines. Unless you use\n\
660 the `--no-members' option, vars are tags too.";
662 static const char *plain_C_suffixes
[] =
663 { "pc", /* Pro*C file */
666 static const char *PS_suffixes
[] =
667 { "ps", "psw", NULL
}; /* .psw is for PSWrap */
668 static const char PS_help
[] =
669 "In PostScript code, the tags are the functions.";
671 static const char *Prolog_suffixes
[] =
673 static const char Prolog_help
[] =
674 "In Prolog code, tags are predicates and rules at the beginning of\n\
677 static const char *Python_suffixes
[] =
679 static const char Python_help
[] =
680 "In Python code, `def' or `class' at the beginning of a line\n\
683 /* Can't do the `SCM' or `scm' prefix with a version number. */
684 static const char *Scheme_suffixes
[] =
685 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL
};
686 static const char Scheme_help
[] =
687 "In Scheme code, tags include anything defined with `def' or with a\n\
688 construct whose name starts with `def'. They also include\n\
689 variables set with `set!' at top level in the file.";
691 static const char *TeX_suffixes
[] =
692 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL
};
693 static const char TeX_help
[] =
694 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
695 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
696 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
697 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
698 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
700 Other commands can be specified by setting the environment variable\n\
701 `TEXTAGS' to a colon-separated list like, for example,\n\
702 TEXTAGS=\"mycommand:myothercommand\".";
705 static const char *Texinfo_suffixes
[] =
706 { "texi", "texinfo", "txi", NULL
};
707 static const char Texinfo_help
[] =
708 "for texinfo files, lines starting with @node are tagged.";
710 static const char *Yacc_suffixes
[] =
711 { "y", "y++", "ym", "yxx", "yy", NULL
}; /* .ym is Objective yacc file */
712 static const char Yacc_help
[] =
713 "In Bison or Yacc input files, each rule defines as a tag the\n\
714 nonterminal it constructs. The portions of the file that contain\n\
715 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
718 static const char auto_help
[] =
719 "`auto' is not a real language, it indicates to use\n\
720 a default language for files base on file name suffix and file contents.";
722 static const char none_help
[] =
723 "`none' is not a real language, it indicates to only do\n\
724 regexp processing on files.";
726 static const char no_lang_help
[] =
727 "No detailed help available for this language.";
731 * Table of languages.
733 * It is ok for a given function to be listed under more than one
734 * name. I just didn't.
737 static language lang_names
[] =
739 { "ada", Ada_help
, Ada_funcs
, Ada_suffixes
},
740 { "asm", Asm_help
, Asm_labels
, Asm_suffixes
},
741 { "c", default_C_help
, default_C_entries
, default_C_suffixes
},
742 { "c++", Cplusplus_help
, Cplusplus_entries
, Cplusplus_suffixes
},
743 { "c*", no_lang_help
, Cstar_entries
, Cstar_suffixes
},
744 { "cobol", Cobol_help
, Cobol_paragraphs
, Cobol_suffixes
},
745 { "erlang", Erlang_help
, Erlang_functions
, Erlang_suffixes
},
746 { "forth", Forth_help
, Forth_words
, Forth_suffixes
},
747 { "fortran", Fortran_help
, Fortran_functions
, Fortran_suffixes
},
748 { "html", HTML_help
, HTML_labels
, HTML_suffixes
},
749 { "java", Cjava_help
, Cjava_entries
, Cjava_suffixes
},
750 { "lisp", Lisp_help
, Lisp_functions
, Lisp_suffixes
},
751 { "lua", Lua_help
, Lua_functions
, Lua_suffixes
},
752 { "makefile", Makefile_help
,Makefile_targets
,NULL
,Makefile_filenames
},
753 { "objc", Objc_help
, plain_C_entries
, Objc_suffixes
},
754 { "pascal", Pascal_help
, Pascal_functions
, Pascal_suffixes
},
755 { "perl",Perl_help
,Perl_functions
,Perl_suffixes
,NULL
,Perl_interpreters
},
756 { "php", PHP_help
, PHP_functions
, PHP_suffixes
},
757 { "postscript",PS_help
, PS_functions
, PS_suffixes
},
758 { "proc", no_lang_help
, plain_C_entries
, plain_C_suffixes
},
759 { "prolog", Prolog_help
, Prolog_functions
, Prolog_suffixes
},
760 { "python", Python_help
, Python_functions
, Python_suffixes
},
761 { "scheme", Scheme_help
, Scheme_functions
, Scheme_suffixes
},
762 { "tex", TeX_help
, TeX_commands
, TeX_suffixes
},
763 { "texinfo", Texinfo_help
, Texinfo_nodes
, Texinfo_suffixes
},
764 { "yacc", Yacc_help
,Yacc_entries
,Yacc_suffixes
,NULL
,NULL
,TRUE
},
765 { "auto", auto_help
}, /* default guessing scheme */
766 { "none", none_help
, just_read_file
}, /* regexp matching only */
767 { NULL
} /* end of list */
772 print_language_names (void)
775 const char **name
, **ext
;
777 puts ("\nThese are the currently supported languages, along with the\n\
778 default file names and dot suffixes:");
779 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
781 printf (" %-*s", 10, lang
->name
);
782 if (lang
->filenames
!= NULL
)
783 for (name
= lang
->filenames
; *name
!= NULL
; name
++)
784 printf (" %s", *name
);
785 if (lang
->suffixes
!= NULL
)
786 for (ext
= lang
->suffixes
; *ext
!= NULL
; ext
++)
787 printf (" .%s", *ext
);
790 puts ("where `auto' means use default language for files based on file\n\
791 name suffix, and `none' means only do regexp processing on files.\n\
792 If no language is specified and no matching suffix is found,\n\
793 the first line of the file is read for a sharp-bang (#!) sequence\n\
794 followed by the name of an interpreter. If no such sequence is found,\n\
795 Fortran is tried first; if no tags are found, C is tried next.\n\
796 When parsing any C file, a \"class\" or \"template\" keyword\n\
798 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
800 For detailed help on a given language use, for example,\n\
801 etags --help --lang=ada.");
805 # define EMACS_NAME "standalone"
808 # define VERSION "17.38.1.4"
810 static _Noreturn
void
813 char emacs_copyright
[] = COPYRIGHT
;
815 printf ("%s (%s %s)\n", (CTAGS
) ? "ctags" : "etags", EMACS_NAME
, VERSION
);
816 puts (emacs_copyright
);
817 puts ("This program is distributed under the terms in ETAGS.README");
822 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
823 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
826 static _Noreturn
void
827 print_help (argument
*argbuffer
)
829 bool help_for_lang
= FALSE
;
831 for (; argbuffer
->arg_type
!= at_end
; argbuffer
++)
832 if (argbuffer
->arg_type
== at_language
)
836 puts (argbuffer
->lang
->help
);
837 help_for_lang
= TRUE
;
843 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
845 These are the options accepted by %s.\n", progname
, progname
);
846 puts ("You may use unambiguous abbreviations for the long option names.");
847 puts (" A - as file name means read names from stdin (one per line).\n\
848 Absolute names are stored in the output file as they are.\n\
849 Relative ones are stored relative to the output file's directory.\n");
851 puts ("-a, --append\n\
852 Append tag entries to existing tags file.");
854 puts ("--packages-only\n\
855 For Ada files, only generate tags for packages.");
858 puts ("-B, --backward-search\n\
859 Write the search commands for the tag entries using '?', the\n\
860 backward-search command instead of '/', the forward-search command.");
862 /* This option is mostly obsolete, because etags can now automatically
863 detect C++. Retained for backward compatibility and for debugging and
864 experimentation. In principle, we could want to tag as C++ even
865 before any "class" or "template" keyword.
867 Treat files whose name suffix defaults to C language as C++ files.");
870 puts ("--declarations\n\
871 In C and derived languages, create tags for function declarations,");
873 puts ("\tand create tags for extern variables if --globals is used.");
876 ("\tand create tags for extern variables unless --no-globals is used.");
879 puts ("-d, --defines\n\
880 Create tag entries for C #define constants and enum constants, too.");
882 puts ("-D, --no-defines\n\
883 Don't create tag entries for C #define constants and enum constants.\n\
884 This makes the tags file smaller.");
887 puts ("-i FILE, --include=FILE\n\
888 Include a note in tag file indicating that, when searching for\n\
889 a tag, one should also consult the tags file FILE after\n\
890 checking the current file.");
892 puts ("-l LANG, --language=LANG\n\
893 Force the following files to be considered as written in the\n\
894 named language up to the next --language=LANG option.");
898 Create tag entries for global variables in some languages.");
900 puts ("--no-globals\n\
901 Do not create tag entries for global variables in some\n\
902 languages. This makes the tags file smaller.");
904 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
905 puts ("--no-line-directive\n\
906 Ignore #line preprocessor directives in C and derived languages.");
910 Create tag entries for members of structures in some languages.");
912 puts ("--no-members\n\
913 Do not create tag entries for members of structures\n\
914 in some languages.");
916 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
917 Make a tag for each line matching a regular expression pattern\n\
918 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
919 files only. REGEXFILE is a file containing one REGEXP per line.\n\
920 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
921 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
922 puts (" If TAGNAME/ is present, the tags created are named.\n\
923 For example Tcl named tags can be created with:\n\
924 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
925 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
926 `m' means to allow multi-line matches, `s' implies `m' and\n\
927 causes dot to match any character, including newline.");
929 puts ("-R, --no-regex\n\
930 Don't create tags from regexps for the following files.");
932 puts ("-I, --ignore-indentation\n\
933 In C and C++ do not assume that a closing brace in the first\n\
934 column is the final brace of a function or structure definition.");
936 puts ("-o FILE, --output=FILE\n\
937 Write the tags to FILE.");
939 puts ("--parse-stdin=NAME\n\
940 Read from standard input and record tags as belonging to file NAME.");
944 puts ("-t, --typedefs\n\
945 Generate tag entries for C and Ada typedefs.");
946 puts ("-T, --typedefs-and-c++\n\
947 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
948 and C++ member functions.");
952 puts ("-u, --update\n\
953 Update the tag entries for the given files, leaving tag\n\
954 entries for other files in place. Currently, this is\n\
955 implemented by deleting the existing entries for the given\n\
956 files and then rewriting the new entries at the end of the\n\
957 tags file. It is often faster to simply rebuild the entire\n\
958 tag file than to use this.");
962 puts ("-v, --vgrind\n\
963 Print on the standard output an index of items intended for\n\
964 human consumption, similar to the output of vgrind. The index\n\
965 is sorted, and gives the page number of each item.");
967 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
968 puts ("-w, --no-duplicates\n\
969 Do not create duplicate tag entries, for compatibility with\n\
970 traditional ctags.");
972 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
973 puts ("-w, --no-warn\n\
974 Suppress warning messages about duplicate tag entries.");
976 puts ("-x, --cxref\n\
977 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
978 The output uses line numbers instead of page numbers, but\n\
979 beyond that the differences are cosmetic; try both to see\n\
983 puts ("-V, --version\n\
984 Print the version of the program.\n\
986 Print this help message.\n\
987 Followed by one or more `--language' options prints detailed\n\
988 help about tag generation for the specified languages.");
990 print_language_names ();
993 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1000 main (int argc
, char **argv
)
1003 unsigned int nincluded_files
;
1004 char **included_files
;
1005 argument
*argbuffer
;
1006 int current_arg
, file_count
;
1007 linebuffer filename_lb
;
1008 bool help_asked
= FALSE
;
1015 _fmode
= O_BINARY
; /* all of files are treated as binary files */
1019 nincluded_files
= 0;
1020 included_files
= xnew (argc
, char *);
1024 /* Allocate enough no matter what happens. Overkill, but each one
1026 argbuffer
= xnew (argc
, argument
);
1029 * Always find typedefs and structure tags.
1030 * Also default to find macro constants, enum constants, struct
1031 * members and global variables. Do it for both etags and ctags.
1033 typedefs
= typedefs_or_cplusplus
= constantypedefs
= TRUE
;
1034 globals
= members
= TRUE
;
1036 /* When the optstring begins with a '-' getopt_long does not rearrange the
1037 non-options arguments to be at the end, but leaves them alone. */
1038 optstring
= concat ("-ac:Cf:Il:o:r:RSVhH",
1039 (CTAGS
) ? "BxdtTuvw" : "Di:",
1042 while ((opt
= getopt_long (argc
, argv
, optstring
, longopts
, NULL
)) != EOF
)
1046 /* If getopt returns 0, then it has already processed a
1047 long-named option. We should do nothing. */
1051 /* This means that a file name has been seen. Record it. */
1052 argbuffer
[current_arg
].arg_type
= at_filename
;
1053 argbuffer
[current_arg
].what
= optarg
;
1054 len
= strlen (optarg
);
1055 if (whatlen_max
< len
)
1062 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1063 argbuffer
[current_arg
].arg_type
= at_stdin
;
1064 argbuffer
[current_arg
].what
= optarg
;
1065 len
= strlen (optarg
);
1066 if (whatlen_max
< len
)
1071 fatal ("cannot parse standard input more than once", (char *)NULL
);
1072 parsing_stdin
= TRUE
;
1075 /* Common options. */
1076 case 'a': append_to_tagfile
= TRUE
; break;
1077 case 'C': cplusplus
= TRUE
; break;
1078 case 'f': /* for compatibility with old makefiles */
1082 error ("-o option may only be given once.");
1083 suggest_asking_for_help ();
1089 case 'S': /* for backward compatibility */
1090 ignoreindent
= TRUE
;
1094 language
*lang
= get_language_from_langname (optarg
);
1097 argbuffer
[current_arg
].lang
= lang
;
1098 argbuffer
[current_arg
].arg_type
= at_language
;
1104 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1105 optarg
= concat (optarg
, "i", ""); /* memory leak here */
1108 argbuffer
[current_arg
].arg_type
= at_regexp
;
1109 argbuffer
[current_arg
].what
= optarg
;
1110 len
= strlen (optarg
);
1111 if (whatlen_max
< len
)
1116 argbuffer
[current_arg
].arg_type
= at_regexp
;
1117 argbuffer
[current_arg
].what
= NULL
;
1129 case 'D': constantypedefs
= FALSE
; break;
1130 case 'i': included_files
[nincluded_files
++] = optarg
; break;
1132 /* Ctags options. */
1133 case 'B': searchar
= '?'; break;
1134 case 'd': constantypedefs
= TRUE
; break;
1135 case 't': typedefs
= TRUE
; break;
1136 case 'T': typedefs
= typedefs_or_cplusplus
= TRUE
; break;
1137 case 'u': update
= TRUE
; break;
1138 case 'v': vgrind_style
= TRUE
; /*FALLTHRU*/
1139 case 'x': cxref_style
= TRUE
; break;
1140 case 'w': no_warnings
= TRUE
; break;
1142 suggest_asking_for_help ();
1146 /* No more options. Store the rest of arguments. */
1147 for (; optind
< argc
; optind
++)
1149 argbuffer
[current_arg
].arg_type
= at_filename
;
1150 argbuffer
[current_arg
].what
= argv
[optind
];
1151 len
= strlen (argv
[optind
]);
1152 if (whatlen_max
< len
)
1158 argbuffer
[current_arg
].arg_type
= at_end
;
1161 print_help (argbuffer
);
1164 if (nincluded_files
== 0 && file_count
== 0)
1166 error ("no input files specified.");
1167 suggest_asking_for_help ();
1171 if (tagfile
== NULL
)
1172 tagfile
= savestr (CTAGS
? "tags" : "TAGS");
1173 cwd
= etags_getcwd (); /* the current working directory */
1174 if (cwd
[strlen (cwd
) - 1] != '/')
1177 cwd
= concat (oldcwd
, "/", "");
1181 /* Compute base directory for relative file names. */
1182 if (streq (tagfile
, "-")
1183 || strneq (tagfile
, "/dev/", 5))
1184 tagfiledir
= cwd
; /* relative file names are relative to cwd */
1187 canonicalize_filename (tagfile
);
1188 tagfiledir
= absolute_dirname (tagfile
, cwd
);
1191 init (); /* set up boolean "functions" */
1193 linebuffer_init (&lb
);
1194 linebuffer_init (&filename_lb
);
1195 linebuffer_init (&filebuf
);
1196 linebuffer_init (&token_name
);
1200 if (streq (tagfile
, "-"))
1204 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1205 doesn't take effect until after `stdout' is already open). */
1206 if (!isatty (fileno (stdout
)))
1207 setmode (fileno (stdout
), O_BINARY
);
1211 tagf
= fopen (tagfile
, append_to_tagfile
? "a" : "w");
1217 * Loop through files finding functions.
1219 for (i
= 0; i
< current_arg
; i
++)
1221 static language
*lang
; /* non-NULL if language is forced */
1224 switch (argbuffer
[i
].arg_type
)
1227 lang
= argbuffer
[i
].lang
;
1230 analyse_regex (argbuffer
[i
].what
);
1233 this_file
= argbuffer
[i
].what
;
1234 /* Input file named "-" means read file names from stdin
1235 (one per line) and use them. */
1236 if (streq (this_file
, "-"))
1239 fatal ("cannot parse standard input AND read file names from it",
1241 while (readline_internal (&filename_lb
, stdin
) > 0)
1242 process_file_name (filename_lb
.buffer
, lang
);
1245 process_file_name (this_file
, lang
);
1248 this_file
= argbuffer
[i
].what
;
1249 process_file (stdin
, this_file
, lang
);
1256 free (filebuf
.buffer
);
1257 free (token_name
.buffer
);
1259 if (!CTAGS
|| cxref_style
)
1261 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1262 put_entries (nodehead
);
1263 free_tree (nodehead
);
1269 /* Output file entries that have no tags. */
1270 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
1272 fprintf (tagf
, "\f\n%s,0\n", fdp
->taggedfname
);
1274 while (nincluded_files
-- > 0)
1275 fprintf (tagf
, "\f\n%s,include\n", *included_files
++);
1277 if (fclose (tagf
) == EOF
)
1281 exit (EXIT_SUCCESS
);
1284 /* From here on, we are in (CTAGS && !cxref_style) */
1288 xmalloc (strlen (tagfile
) + whatlen_max
+
1289 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1290 for (i
= 0; i
< current_arg
; ++i
)
1292 switch (argbuffer
[i
].arg_type
)
1298 continue; /* the for loop */
1300 strcpy (cmd
, "mv ");
1301 strcat (cmd
, tagfile
);
1302 strcat (cmd
, " OTAGS;fgrep -v '\t");
1303 strcat (cmd
, argbuffer
[i
].what
);
1304 strcat (cmd
, "\t' OTAGS >");
1305 strcat (cmd
, tagfile
);
1306 strcat (cmd
, ";rm OTAGS");
1307 if (system (cmd
) != EXIT_SUCCESS
)
1308 fatal ("failed to execute shell command", (char *)NULL
);
1311 append_to_tagfile
= TRUE
;
1314 tagf
= fopen (tagfile
, append_to_tagfile
? "a" : "w");
1317 put_entries (nodehead
); /* write all the tags (CTAGS) */
1318 free_tree (nodehead
);
1320 if (fclose (tagf
) == EOF
)
1324 if (append_to_tagfile
|| update
)
1326 char *cmd
= xmalloc (2 * strlen (tagfile
) + sizeof "sort -u -o..");
1327 /* Maybe these should be used:
1328 setenv ("LC_COLLATE", "C", 1);
1329 setenv ("LC_ALL", "C", 1); */
1330 strcpy (cmd
, "sort -u -o ");
1331 strcat (cmd
, tagfile
);
1333 strcat (cmd
, tagfile
);
1334 exit (system (cmd
));
1336 return EXIT_SUCCESS
;
1341 * Return a compressor given the file name. If EXTPTR is non-zero,
1342 * return a pointer into FILE where the compressor-specific
1343 * extension begins. If no compressor is found, NULL is returned
1344 * and EXTPTR is not significant.
1345 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1348 get_compressor_from_suffix (char *file
, char **extptr
)
1351 char *slash
, *suffix
;
1353 /* File has been processed by canonicalize_filename,
1354 so we don't need to consider backslashes on DOS_NT. */
1355 slash
= etags_strrchr (file
, '/');
1356 suffix
= etags_strrchr (file
, '.');
1357 if (suffix
== NULL
|| suffix
< slash
)
1362 /* Let those poor souls who live with DOS 8+3 file name limits get
1363 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1364 Only the first do loop is run if not MSDOS */
1367 for (compr
= compressors
; compr
->suffix
!= NULL
; compr
++)
1368 if (streq (compr
->suffix
, suffix
))
1371 break; /* do it only once: not really a loop */
1374 } while (*suffix
!= '\0');
1381 * Return a language given the name.
1384 get_language_from_langname (const char *name
)
1389 error ("empty language name");
1392 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1393 if (streq (name
, lang
->name
))
1395 error ("unknown language \"%s\"", name
);
1403 * Return a language given the interpreter name.
1406 get_language_from_interpreter (char *interpreter
)
1411 if (interpreter
== NULL
)
1413 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1414 if (lang
->interpreters
!= NULL
)
1415 for (iname
= lang
->interpreters
; *iname
!= NULL
; iname
++)
1416 if (streq (*iname
, interpreter
))
1425 * Return a language given the file name.
1428 get_language_from_filename (char *file
, int case_sensitive
)
1431 const char **name
, **ext
, *suffix
;
1433 /* Try whole file name first. */
1434 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1435 if (lang
->filenames
!= NULL
)
1436 for (name
= lang
->filenames
; *name
!= NULL
; name
++)
1437 if ((case_sensitive
)
1438 ? streq (*name
, file
)
1439 : strcaseeq (*name
, file
))
1442 /* If not found, try suffix after last dot. */
1443 suffix
= etags_strrchr (file
, '.');
1447 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1448 if (lang
->suffixes
!= NULL
)
1449 for (ext
= lang
->suffixes
; *ext
!= NULL
; ext
++)
1450 if ((case_sensitive
)
1451 ? streq (*ext
, suffix
)
1452 : strcaseeq (*ext
, suffix
))
1459 * This routine is called on each file argument.
1462 process_file_name (char *file
, language
*lang
)
1464 struct stat stat_buf
;
1468 char *compressed_name
, *uncompressed_name
;
1469 char *ext
, *real_name
;
1472 canonicalize_filename (file
);
1473 if (streq (file
, tagfile
) && !streq (tagfile
, "-"))
1475 error ("skipping inclusion of %s in self.", file
);
1478 if ((compr
= get_compressor_from_suffix (file
, &ext
)) == NULL
)
1480 compressed_name
= NULL
;
1481 real_name
= uncompressed_name
= savestr (file
);
1485 real_name
= compressed_name
= savestr (file
);
1486 uncompressed_name
= savenstr (file
, ext
- file
);
1489 /* If the canonicalized uncompressed name
1490 has already been dealt with, skip it silently. */
1491 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
1493 assert (fdp
->infname
!= NULL
);
1494 if (streq (uncompressed_name
, fdp
->infname
))
1498 if (stat (real_name
, &stat_buf
) != 0)
1500 /* Reset real_name and try with a different name. */
1502 if (compressed_name
!= NULL
) /* try with the given suffix */
1504 if (stat (uncompressed_name
, &stat_buf
) == 0)
1505 real_name
= uncompressed_name
;
1507 else /* try all possible suffixes */
1509 for (compr
= compressors
; compr
->suffix
!= NULL
; compr
++)
1511 compressed_name
= concat (file
, ".", compr
->suffix
);
1512 if (stat (compressed_name
, &stat_buf
) != 0)
1516 char *suf
= compressed_name
+ strlen (file
);
1517 size_t suflen
= strlen (compr
->suffix
) + 1;
1518 for ( ; suf
[1]; suf
++, suflen
--)
1520 memmove (suf
, suf
+ 1, suflen
);
1521 if (stat (compressed_name
, &stat_buf
) == 0)
1523 real_name
= compressed_name
;
1527 if (real_name
!= NULL
)
1530 free (compressed_name
);
1531 compressed_name
= NULL
;
1535 real_name
= compressed_name
;
1540 if (real_name
== NULL
)
1545 } /* try with a different name */
1547 if (!S_ISREG (stat_buf
.st_mode
))
1549 error ("skipping %s: it is not a regular file.", real_name
);
1552 if (real_name
== compressed_name
)
1554 char *cmd
= concat (compr
->command
, " ", real_name
);
1555 inf
= (FILE *) popen (cmd
, "r");
1559 inf
= fopen (real_name
, "r");
1566 process_file (inf
, uncompressed_name
, lang
);
1568 if (real_name
== compressed_name
)
1569 retval
= pclose (inf
);
1571 retval
= fclose (inf
);
1576 free (compressed_name
);
1577 free (uncompressed_name
);
1584 process_file (FILE *fh
, char *fn
, language
*lang
)
1586 static const fdesc emptyfdesc
;
1589 /* Create a new input file description entry. */
1590 fdp
= xnew (1, fdesc
);
1593 fdp
->infname
= savestr (fn
);
1595 fdp
->infabsname
= absolute_filename (fn
, cwd
);
1596 fdp
->infabsdir
= absolute_dirname (fn
, cwd
);
1597 if (filename_is_absolute (fn
))
1599 /* An absolute file name. Canonicalize it. */
1600 fdp
->taggedfname
= absolute_filename (fn
, NULL
);
1604 /* A file name relative to cwd. Make it relative
1605 to the directory of the tags file. */
1606 fdp
->taggedfname
= relative_filename (fn
, tagfiledir
);
1608 fdp
->usecharno
= TRUE
; /* use char position when making tags */
1610 fdp
->written
= FALSE
; /* not written on tags file yet */
1613 curfdp
= fdhead
; /* the current file description */
1617 /* If not Ctags, and if this is not metasource and if it contained no #line
1618 directives, we can write the tags and free all nodes pointing to
1621 && curfdp
->usecharno
/* no #line directives in this file */
1622 && !curfdp
->lang
->metasource
)
1626 /* Look for the head of the sublist relative to this file. See add_node
1627 for the structure of the node tree. */
1629 for (np
= nodehead
; np
!= NULL
; prev
= np
, np
= np
->left
)
1630 if (np
->fdp
== curfdp
)
1633 /* If we generated tags for this file, write and delete them. */
1636 /* This is the head of the last sublist, if any. The following
1637 instructions depend on this being true. */
1638 assert (np
->left
== NULL
);
1640 assert (fdhead
== curfdp
);
1641 assert (last_node
->fdp
== curfdp
);
1642 put_entries (np
); /* write tags for file curfdp->taggedfname */
1643 free_tree (np
); /* remove the written nodes */
1645 nodehead
= NULL
; /* no nodes left */
1647 prev
->left
= NULL
; /* delete the pointer to the sublist */
1653 * This routine sets up the boolean pseudo-functions which work
1654 * by setting boolean flags dependent upon the corresponding character.
1655 * Every char which is NOT in that string is not a white char. Therefore,
1656 * all of the array "_wht" is set to FALSE, and then the elements
1657 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1658 * of a char is TRUE if it is the string "white", else FALSE.
1663 register const char *sp
;
1666 for (i
= 0; i
< CHARS
; i
++)
1667 iswhite (i
) = notinname (i
) = begtoken (i
) = intoken (i
) = endtoken (i
) = FALSE
;
1668 for (sp
= white
; *sp
!= '\0'; sp
++) iswhite (*sp
) = TRUE
;
1669 for (sp
= nonam
; *sp
!= '\0'; sp
++) notinname (*sp
) = TRUE
;
1670 notinname ('\0') = notinname ('\n');
1671 for (sp
= begtk
; *sp
!= '\0'; sp
++) begtoken (*sp
) = TRUE
;
1672 begtoken ('\0') = begtoken ('\n');
1673 for (sp
= midtk
; *sp
!= '\0'; sp
++) intoken (*sp
) = TRUE
;
1674 intoken ('\0') = intoken ('\n');
1675 for (sp
= endtk
; *sp
!= '\0'; sp
++) endtoken (*sp
) = TRUE
;
1676 endtoken ('\0') = endtoken ('\n');
1680 * This routine opens the specified file and calls the function
1681 * which finds the function and type definitions.
1684 find_entries (FILE *inf
)
1687 language
*lang
= curfdp
->lang
;
1688 Lang_function
*parser
= NULL
;
1690 /* If user specified a language, use it. */
1691 if (lang
!= NULL
&& lang
->function
!= NULL
)
1693 parser
= lang
->function
;
1696 /* Else try to guess the language given the file name. */
1699 lang
= get_language_from_filename (curfdp
->infname
, TRUE
);
1700 if (lang
!= NULL
&& lang
->function
!= NULL
)
1702 curfdp
->lang
= lang
;
1703 parser
= lang
->function
;
1707 /* Else look for sharp-bang as the first two characters. */
1709 && readline_internal (&lb
, inf
) > 0
1711 && lb
.buffer
[0] == '#'
1712 && lb
.buffer
[1] == '!')
1716 /* Set lp to point at the first char after the last slash in the
1717 line or, if no slashes, at the first nonblank. Then set cp to
1718 the first successive blank and terminate the string. */
1719 lp
= etags_strrchr (lb
.buffer
+2, '/');
1723 lp
= skip_spaces (lb
.buffer
+ 2);
1724 cp
= skip_non_spaces (lp
);
1727 if (strlen (lp
) > 0)
1729 lang
= get_language_from_interpreter (lp
);
1730 if (lang
!= NULL
&& lang
->function
!= NULL
)
1732 curfdp
->lang
= lang
;
1733 parser
= lang
->function
;
1738 /* We rewind here, even if inf may be a pipe. We fail if the
1739 length of the first line is longer than the pipe block size,
1740 which is unlikely. */
1743 /* Else try to guess the language given the case insensitive file name. */
1746 lang
= get_language_from_filename (curfdp
->infname
, FALSE
);
1747 if (lang
!= NULL
&& lang
->function
!= NULL
)
1749 curfdp
->lang
= lang
;
1750 parser
= lang
->function
;
1754 /* Else try Fortran or C. */
1757 node
*old_last_node
= last_node
;
1759 curfdp
->lang
= get_language_from_langname ("fortran");
1762 if (old_last_node
== last_node
)
1763 /* No Fortran entries found. Try C. */
1765 /* We do not tag if rewind fails.
1766 Only the file name will be recorded in the tags file. */
1768 curfdp
->lang
= get_language_from_langname (cplusplus
? "c++" : "c");
1774 if (!no_line_directive
1775 && curfdp
->lang
!= NULL
&& curfdp
->lang
->metasource
)
1776 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1777 file, or anyway we parsed a file that is automatically generated from
1778 this one. If this is the case, the bingo.c file contained #line
1779 directives that generated tags pointing to this file. Let's delete
1780 them all before parsing this file, which is the real source. */
1782 fdesc
**fdpp
= &fdhead
;
1783 while (*fdpp
!= NULL
)
1785 && streq ((*fdpp
)->taggedfname
, curfdp
->taggedfname
))
1786 /* We found one of those! We must delete both the file description
1787 and all tags referring to it. */
1789 fdesc
*badfdp
= *fdpp
;
1791 /* Delete the tags referring to badfdp->taggedfname
1792 that were obtained from badfdp->infname. */
1793 invalidate_nodes (badfdp
, &nodehead
);
1795 *fdpp
= badfdp
->next
; /* remove the bad description from the list */
1796 free_fdesc (badfdp
);
1799 fdpp
= &(*fdpp
)->next
; /* advance the list pointer */
1802 assert (parser
!= NULL
);
1804 /* Generic initializations before reading from file. */
1805 linebuffer_setlen (&filebuf
, 0); /* reset the file buffer */
1807 /* Generic initializations before parsing file with readline. */
1808 lineno
= 0; /* reset global line number */
1809 charno
= 0; /* reset global char number */
1810 linecharno
= 0; /* reset global char number of line start */
1814 regex_tag_multiline ();
1819 * Check whether an implicitly named tag should be created,
1820 * then call `pfnote'.
1821 * NAME is a string that is internally copied by this function.
1823 * TAGS format specification
1824 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1825 * The following is explained in some more detail in etc/ETAGS.EBNF.
1827 * make_tag creates tags with "implicit tag names" (unnamed tags)
1828 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1829 * 1. NAME does not contain any of the characters in NONAM;
1830 * 2. LINESTART contains name as either a rightmost, or rightmost but
1831 * one character, substring;
1832 * 3. the character, if any, immediately before NAME in LINESTART must
1833 * be a character in NONAM;
1834 * 4. the character, if any, immediately after NAME in LINESTART must
1835 * also be a character in NONAM.
1837 * The implementation uses the notinname() macro, which recognizes the
1838 * characters stored in the string `nonam'.
1839 * etags.el needs to use the same characters that are in NONAM.
1842 make_tag (const char *name
, /* tag name, or NULL if unnamed */
1843 int namelen
, /* tag length */
1844 int is_func
, /* tag is a function */
1845 char *linestart
, /* start of the line where tag is */
1846 int linelen
, /* length of the line where tag is */
1847 int lno
, /* line number */
1848 long int cno
) /* character number */
1850 bool named
= (name
!= NULL
&& namelen
> 0);
1853 if (!CTAGS
&& named
) /* maybe set named to false */
1854 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1855 such that etags.el can guess a name from it. */
1858 register const char *cp
= name
;
1860 for (i
= 0; i
< namelen
; i
++)
1861 if (notinname (*cp
++))
1863 if (i
== namelen
) /* rule #1 */
1865 cp
= linestart
+ linelen
- namelen
;
1866 if (notinname (linestart
[linelen
-1]))
1867 cp
-= 1; /* rule #4 */
1868 if (cp
>= linestart
/* rule #2 */
1870 || notinname (cp
[-1])) /* rule #3 */
1871 && strneq (name
, cp
, namelen
)) /* rule #2 */
1872 named
= FALSE
; /* use implicit tag name */
1877 nname
= savenstr (name
, namelen
);
1879 pfnote (nname
, is_func
, linestart
, linelen
, lno
, cno
);
1884 pfnote (char *name
, int is_func
, char *linestart
, int linelen
, int lno
, long int cno
)
1885 /* tag name, or NULL if unnamed */
1886 /* tag is a function */
1887 /* start of the line where tag is */
1888 /* length of the line where tag is */
1890 /* character number */
1894 assert (name
== NULL
|| name
[0] != '\0');
1895 if (CTAGS
&& name
== NULL
)
1898 np
= xnew (1, node
);
1900 /* If ctags mode, change name "main" to M<thisfilename>. */
1901 if (CTAGS
&& !cxref_style
&& streq (name
, "main"))
1903 register char *fp
= etags_strrchr (curfdp
->taggedfname
, '/');
1904 np
->name
= concat ("M", fp
== NULL
? curfdp
->taggedfname
: fp
+ 1, "");
1905 fp
= etags_strrchr (np
->name
, '.');
1906 if (fp
!= NULL
&& fp
[1] != '\0' && fp
[2] == '\0')
1912 np
->been_warned
= FALSE
;
1914 np
->is_func
= is_func
;
1916 if (np
->fdp
->usecharno
)
1917 /* Our char numbers are 0-base, because of C language tradition?
1918 ctags compatibility? old versions compatibility? I don't know.
1919 Anyway, since emacs's are 1-base we expect etags.el to take care
1920 of the difference. If we wanted to have 1-based numbers, we would
1921 uncomment the +1 below. */
1922 np
->cno
= cno
/* + 1 */ ;
1924 np
->cno
= invalidcharno
;
1925 np
->left
= np
->right
= NULL
;
1926 if (CTAGS
&& !cxref_style
)
1928 if (strlen (linestart
) < 50)
1929 np
->regex
= concat (linestart
, "$", "");
1931 np
->regex
= savenstr (linestart
, 50);
1934 np
->regex
= savenstr (linestart
, linelen
);
1936 add_node (np
, &nodehead
);
1941 * recurse on left children, iterate on right children.
1944 free_tree (register node
*np
)
1948 register node
*node_right
= np
->right
;
1949 free_tree (np
->left
);
1959 * delete a file description
1962 free_fdesc (register fdesc
*fdp
)
1964 free (fdp
->infname
);
1965 free (fdp
->infabsname
);
1966 free (fdp
->infabsdir
);
1967 free (fdp
->taggedfname
);
1974 * Adds a node to the tree of nodes. In etags mode, sort by file
1975 * name. In ctags mode, sort by tag name. Make no attempt at
1978 * add_node is the only function allowed to add nodes, so it can
1982 add_node (node
*np
, node
**cur_node_p
)
1985 register node
*cur_node
= *cur_node_p
;
1987 if (cur_node
== NULL
)
1997 /* For each file name, tags are in a linked sublist on the right
1998 pointer. The first tags of different files are a linked list
1999 on the left pointer. last_node points to the end of the last
2001 if (last_node
!= NULL
&& last_node
->fdp
== np
->fdp
)
2003 /* Let's use the same sublist as the last added node. */
2004 assert (last_node
->right
== NULL
);
2005 last_node
->right
= np
;
2008 else if (cur_node
->fdp
== np
->fdp
)
2010 /* Scanning the list we found the head of a sublist which is
2011 good for us. Let's scan this sublist. */
2012 add_node (np
, &cur_node
->right
);
2015 /* The head of this sublist is not good for us. Let's try the
2017 add_node (np
, &cur_node
->left
);
2018 } /* if ETAGS mode */
2023 dif
= strcmp (np
->name
, cur_node
->name
);
2026 * If this tag name matches an existing one, then
2027 * do not add the node, but maybe print a warning.
2029 if (no_duplicates
&& !dif
)
2031 if (np
->fdp
== cur_node
->fdp
)
2035 fprintf (stderr
, "Duplicate entry in file %s, line %d: %s\n",
2036 np
->fdp
->infname
, lineno
, np
->name
);
2037 fprintf (stderr
, "Second entry ignored\n");
2040 else if (!cur_node
->been_warned
&& !no_warnings
)
2044 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2045 np
->fdp
->infname
, cur_node
->fdp
->infname
, np
->name
);
2046 cur_node
->been_warned
= TRUE
;
2051 /* Actually add the node */
2052 add_node (np
, dif
< 0 ? &cur_node
->left
: &cur_node
->right
);
2053 } /* if CTAGS mode */
2057 * invalidate_nodes ()
2058 * Scan the node tree and invalidate all nodes pointing to the
2059 * given file description (CTAGS case) or free them (ETAGS case).
2062 invalidate_nodes (fdesc
*badfdp
, node
**npp
)
2071 if (np
->left
!= NULL
)
2072 invalidate_nodes (badfdp
, &np
->left
);
2073 if (np
->fdp
== badfdp
)
2075 if (np
->right
!= NULL
)
2076 invalidate_nodes (badfdp
, &np
->right
);
2080 assert (np
->fdp
!= NULL
);
2081 if (np
->fdp
== badfdp
)
2083 *npp
= np
->left
; /* detach the sublist from the list */
2084 np
->left
= NULL
; /* isolate it */
2085 free_tree (np
); /* free it */
2086 invalidate_nodes (badfdp
, npp
);
2089 invalidate_nodes (badfdp
, &np
->left
);
2094 static int total_size_of_entries (node
*);
2095 static int number_len (long) ATTRIBUTE_CONST
;
2097 /* Length of a non-negative number's decimal representation. */
2099 number_len (long int num
)
2102 while ((num
/= 10) > 0)
2108 * Return total number of characters that put_entries will output for
2109 * the nodes in the linked list at the right of the specified node.
2110 * This count is irrelevant with etags.el since emacs 19.34 at least,
2111 * but is still supplied for backward compatibility.
2114 total_size_of_entries (register node
*np
)
2116 register int total
= 0;
2118 for (; np
!= NULL
; np
= np
->right
)
2121 total
+= strlen (np
->regex
) + 1; /* pat\177 */
2122 if (np
->name
!= NULL
)
2123 total
+= strlen (np
->name
) + 1; /* name\001 */
2124 total
+= number_len ((long) np
->lno
) + 1; /* lno, */
2125 if (np
->cno
!= invalidcharno
) /* cno */
2126 total
+= number_len (np
->cno
);
2127 total
+= 1; /* newline */
2134 put_entries (register node
*np
)
2137 static fdesc
*fdp
= NULL
;
2142 /* Output subentries that precede this one */
2144 put_entries (np
->left
);
2146 /* Output this entry */
2155 fprintf (tagf
, "\f\n%s,%d\n",
2156 fdp
->taggedfname
, total_size_of_entries (np
));
2157 fdp
->written
= TRUE
;
2159 fputs (np
->regex
, tagf
);
2160 fputc ('\177', tagf
);
2161 if (np
->name
!= NULL
)
2163 fputs (np
->name
, tagf
);
2164 fputc ('\001', tagf
);
2166 fprintf (tagf
, "%d,", np
->lno
);
2167 if (np
->cno
!= invalidcharno
)
2168 fprintf (tagf
, "%ld", np
->cno
);
2174 if (np
->name
== NULL
)
2175 error ("internal error: NULL name in ctags mode.");
2180 fprintf (stdout
, "%s %s %d\n",
2181 np
->name
, np
->fdp
->taggedfname
, (np
->lno
+ 63) / 64);
2183 fprintf (stdout
, "%-16s %3d %-16s %s\n",
2184 np
->name
, np
->lno
, np
->fdp
->taggedfname
, np
->regex
);
2188 fprintf (tagf
, "%s\t%s\t", np
->name
, np
->fdp
->taggedfname
);
2191 { /* function or #define macro with args */
2192 putc (searchar
, tagf
);
2195 for (sp
= np
->regex
; *sp
; sp
++)
2197 if (*sp
== '\\' || *sp
== searchar
)
2201 putc (searchar
, tagf
);
2204 { /* anything else; text pattern inadequate */
2205 fprintf (tagf
, "%d", np
->lno
);
2210 } /* if this node contains a valid tag */
2212 /* Output subentries that follow this one */
2213 put_entries (np
->right
);
2215 put_entries (np
->left
);
2220 #define C_EXT 0x00fff /* C extensions */
2221 #define C_PLAIN 0x00000 /* C */
2222 #define C_PLPL 0x00001 /* C++ */
2223 #define C_STAR 0x00003 /* C* */
2224 #define C_JAVA 0x00005 /* JAVA */
2225 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2226 #define YACC 0x10000 /* yacc file */
2229 * The C symbol tables.
2234 st_C_objprot
, st_C_objimpl
, st_C_objend
,
2236 st_C_ignore
, st_C_attribute
,
2239 st_C_class
, st_C_template
,
2240 st_C_struct
, st_C_extern
, st_C_enum
, st_C_define
, st_C_typedef
2243 /* Feed stuff between (but not including) %[ and %] lines to:
2249 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2253 while, 0, st_C_ignore
2254 switch, 0, st_C_ignore
2255 return, 0, st_C_ignore
2256 __attribute__, 0, st_C_attribute
2257 GTY, 0, st_C_attribute
2258 @interface, 0, st_C_objprot
2259 @protocol, 0, st_C_objprot
2260 @implementation,0, st_C_objimpl
2261 @end, 0, st_C_objend
2262 import, (C_JAVA & ~C_PLPL), st_C_ignore
2263 package, (C_JAVA & ~C_PLPL), st_C_ignore
2264 friend, C_PLPL, st_C_ignore
2265 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2266 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2267 interface, (C_JAVA & ~C_PLPL), st_C_struct
2268 class, 0, st_C_class
2269 namespace, C_PLPL, st_C_struct
2270 domain, C_STAR, st_C_struct
2271 union, 0, st_C_struct
2272 struct, 0, st_C_struct
2273 extern, 0, st_C_extern
2275 typedef, 0, st_C_typedef
2276 define, 0, st_C_define
2277 undef, 0, st_C_define
2278 operator, C_PLPL, st_C_operator
2279 template, 0, st_C_template
2280 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2281 DEFUN, 0, st_C_gnumacro
2282 SYSCALL, 0, st_C_gnumacro
2283 ENTRY, 0, st_C_gnumacro
2284 PSEUDO, 0, st_C_gnumacro
2285 # These are defined inside C functions, so currently they are not met.
2286 # EXFUN used in glibc, DEFVAR_* in emacs.
2287 #EXFUN, 0, st_C_gnumacro
2288 #DEFVAR_, 0, st_C_gnumacro
2290 and replace lines between %< and %> with its output, then:
2291 - remove the #if characterset check
2292 - make in_word_set static and not inline. */
2294 /* C code produced by gperf version 3.0.1 */
2295 /* Command-line: gperf -m 5 */
2296 /* Computed positions: -k'2-3' */
2298 struct C_stab_entry
{ const char *name
; int c_ext
; enum sym_type type
; };
2299 /* maximum key range = 33, duplicates = 0 */
2302 hash (const char *str
, int len
)
2304 static char const asso_values
[] =
2306 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2307 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2308 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2309 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2310 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2311 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2312 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2313 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2314 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2315 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2316 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2317 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2318 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2319 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2320 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2321 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2322 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2323 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2324 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2325 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2326 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2327 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2328 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2329 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2330 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2331 35, 35, 35, 35, 35, 35
2338 hval
+= asso_values
[(unsigned char) str
[2]];
2341 hval
+= asso_values
[(unsigned char) str
[1]];
2347 static struct C_stab_entry
*
2348 in_word_set (register const char *str
, register unsigned int len
)
2352 TOTAL_KEYWORDS
= 33,
2353 MIN_WORD_LENGTH
= 2,
2354 MAX_WORD_LENGTH
= 15,
2359 static struct C_stab_entry wordlist
[] =
2362 {"if", 0, st_C_ignore
},
2363 {"GTY", 0, st_C_attribute
},
2364 {"@end", 0, st_C_objend
},
2365 {"union", 0, st_C_struct
},
2366 {"define", 0, st_C_define
},
2367 {"import", (C_JAVA
& ~C_PLPL
), st_C_ignore
},
2368 {"template", 0, st_C_template
},
2369 {"operator", C_PLPL
, st_C_operator
},
2370 {"@interface", 0, st_C_objprot
},
2371 {"implements", (C_JAVA
& ~C_PLPL
), st_C_javastruct
},
2372 {"friend", C_PLPL
, st_C_ignore
},
2373 {"typedef", 0, st_C_typedef
},
2374 {"return", 0, st_C_ignore
},
2375 {"@implementation",0, st_C_objimpl
},
2376 {"@protocol", 0, st_C_objprot
},
2377 {"interface", (C_JAVA
& ~C_PLPL
), st_C_struct
},
2378 {"extern", 0, st_C_extern
},
2379 {"extends", (C_JAVA
& ~C_PLPL
), st_C_javastruct
},
2380 {"struct", 0, st_C_struct
},
2381 {"domain", C_STAR
, st_C_struct
},
2382 {"switch", 0, st_C_ignore
},
2383 {"enum", 0, st_C_enum
},
2384 {"for", 0, st_C_ignore
},
2385 {"namespace", C_PLPL
, st_C_struct
},
2386 {"class", 0, st_C_class
},
2387 {"while", 0, st_C_ignore
},
2388 {"undef", 0, st_C_define
},
2389 {"package", (C_JAVA
& ~C_PLPL
), st_C_ignore
},
2390 {"__attribute__", 0, st_C_attribute
},
2391 {"SYSCALL", 0, st_C_gnumacro
},
2392 {"ENTRY", 0, st_C_gnumacro
},
2393 {"PSEUDO", 0, st_C_gnumacro
},
2394 {"DEFUN", 0, st_C_gnumacro
}
2397 if (len
<= MAX_WORD_LENGTH
&& len
>= MIN_WORD_LENGTH
)
2399 int key
= hash (str
, len
);
2401 if (key
<= MAX_HASH_VALUE
&& key
>= 0)
2403 const char *s
= wordlist
[key
].name
;
2405 if (*str
== *s
&& !strncmp (str
+ 1, s
+ 1, len
- 1) && s
[len
] == '\0')
2406 return &wordlist
[key
];
2413 static enum sym_type
2414 C_symtype (char *str
, int len
, int c_ext
)
2416 register struct C_stab_entry
*se
= in_word_set (str
, len
);
2418 if (se
== NULL
|| (se
->c_ext
&& !(c_ext
& se
->c_ext
)))
2425 * Ignoring __attribute__ ((list))
2427 static bool inattribute
; /* looking at an __attribute__ construct */
2430 * C functions and variables are recognized using a simple
2431 * finite automaton. fvdef is its state variable.
2435 fvnone
, /* nothing seen */
2436 fdefunkey
, /* Emacs DEFUN keyword seen */
2437 fdefunname
, /* Emacs DEFUN name seen */
2438 foperator
, /* func: operator keyword seen (cplpl) */
2439 fvnameseen
, /* function or variable name seen */
2440 fstartlist
, /* func: just after open parenthesis */
2441 finlist
, /* func: in parameter list */
2442 flistseen
, /* func: after parameter list */
2443 fignore
, /* func: before open brace */
2444 vignore
/* var-like: ignore until ';' */
2447 static bool fvextern
; /* func or var: extern keyword seen; */
2450 * typedefs are recognized using a simple finite automaton.
2451 * typdef is its state variable.
2455 tnone
, /* nothing seen */
2456 tkeyseen
, /* typedef keyword seen */
2457 ttypeseen
, /* defined type seen */
2458 tinbody
, /* inside typedef body */
2459 tend
, /* just before typedef tag */
2460 tignore
/* junk after typedef tag */
2464 * struct-like structures (enum, struct and union) are recognized
2465 * using another simple finite automaton. `structdef' is its state
2470 snone
, /* nothing seen yet,
2471 or in struct body if bracelev > 0 */
2472 skeyseen
, /* struct-like keyword seen */
2473 stagseen
, /* struct-like tag seen */
2474 scolonseen
/* colon seen after struct-like tag */
2478 * When objdef is different from onone, objtag is the name of the class.
2480 static const char *objtag
= "<uninited>";
2483 * Yet another little state machine to deal with preprocessor lines.
2487 dnone
, /* nothing seen */
2488 dsharpseen
, /* '#' seen as first char on line */
2489 ddefineseen
, /* '#' and 'define' seen */
2490 dignorerest
/* ignore rest of line */
2494 * State machine for Objective C protocols and implementations.
2495 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2499 onone
, /* nothing seen */
2500 oprotocol
, /* @interface or @protocol seen */
2501 oimplementation
, /* @implementations seen */
2502 otagseen
, /* class name seen */
2503 oparenseen
, /* parenthesis before category seen */
2504 ocatseen
, /* category name seen */
2505 oinbody
, /* in @implementation body */
2506 omethodsign
, /* in @implementation body, after +/- */
2507 omethodtag
, /* after method name */
2508 omethodcolon
, /* after method colon */
2509 omethodparm
, /* after method parameter */
2510 oignore
/* wait for @end */
2515 * Use this structure to keep info about the token read, and how it
2516 * should be tagged. Used by the make_C_tag function to build a tag.
2520 char *line
; /* string containing the token */
2521 int offset
; /* where the token starts in LINE */
2522 int length
; /* token length */
2524 The previous members can be used to pass strings around for generic
2525 purposes. The following ones specifically refer to creating tags. In this
2526 case the token contained here is the pattern that will be used to create a
2529 bool valid
; /* do not create a tag; the token should be
2530 invalidated whenever a state machine is
2531 reset prematurely */
2532 bool named
; /* create a named tag */
2533 int lineno
; /* source line number of tag */
2534 long linepos
; /* source char number of tag */
2535 } token
; /* latest token read */
2538 * Variables and functions for dealing with nested structures.
2539 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2541 static void pushclass_above (int, char *, int);
2542 static void popclass_above (int);
2543 static void write_classname (linebuffer
*, const char *qualifier
);
2546 char **cname
; /* nested class names */
2547 int *bracelev
; /* nested class brace level */
2548 int nl
; /* class nesting level (elements used) */
2549 int size
; /* length of the array */
2550 } cstack
; /* stack for nested declaration tags */
2551 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2552 #define nestlev (cstack.nl)
2553 /* After struct keyword or in struct body, not inside a nested function. */
2554 #define instruct (structdef == snone && nestlev > 0 \
2555 && bracelev == cstack.bracelev[nestlev-1] + 1)
2558 pushclass_above (int bracelev
, char *str
, int len
)
2562 popclass_above (bracelev
);
2564 if (nl
>= cstack
.size
)
2566 int size
= cstack
.size
*= 2;
2567 xrnew (cstack
.cname
, size
, char *);
2568 xrnew (cstack
.bracelev
, size
, int);
2570 assert (nl
== 0 || cstack
.bracelev
[nl
-1] < bracelev
);
2571 cstack
.cname
[nl
] = (str
== NULL
) ? NULL
: savenstr (str
, len
);
2572 cstack
.bracelev
[nl
] = bracelev
;
2577 popclass_above (int bracelev
)
2581 for (nl
= cstack
.nl
- 1;
2582 nl
>= 0 && cstack
.bracelev
[nl
] >= bracelev
;
2585 free (cstack
.cname
[nl
]);
2591 write_classname (linebuffer
*cn
, const char *qualifier
)
2594 int qlen
= strlen (qualifier
);
2596 if (cstack
.nl
== 0 || cstack
.cname
[0] == NULL
)
2600 cn
->buffer
[0] = '\0';
2604 len
= strlen (cstack
.cname
[0]);
2605 linebuffer_setlen (cn
, len
);
2606 strcpy (cn
->buffer
, cstack
.cname
[0]);
2608 for (i
= 1; i
< cstack
.nl
; i
++)
2610 char *s
= cstack
.cname
[i
];
2613 linebuffer_setlen (cn
, len
+ qlen
+ strlen (s
));
2614 len
+= sprintf (cn
->buffer
+ len
, "%s%s", qualifier
, s
);
2619 static bool consider_token (char *, int, int, int *, int, int, bool *);
2620 static void make_C_tag (bool);
2624 * checks to see if the current token is at the start of a
2625 * function or variable, or corresponds to a typedef, or
2626 * is a struct/union/enum tag, or #define, or an enum constant.
2628 * *IS_FUNC gets TRUE if the token is a function or #define macro
2629 * with args. C_EXTP points to which language we are looking at.
2640 consider_token (register char *str
, register int len
, register int c
, int *c_extp
, int bracelev
, int parlev
, int *is_func_or_var
)
2641 /* IN: token pointer */
2642 /* IN: token length */
2643 /* IN: first char after the token */
2644 /* IN, OUT: C extensions mask */
2645 /* IN: brace level */
2646 /* IN: parenthesis level */
2647 /* OUT: function or variable found */
2649 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2650 structtype is the type of the preceding struct-like keyword, and
2651 structbracelev is the brace level where it has been seen. */
2652 static enum sym_type structtype
;
2653 static int structbracelev
;
2654 static enum sym_type toktype
;
2657 toktype
= C_symtype (str
, len
, *c_extp
);
2660 * Skip __attribute__
2662 if (toktype
== st_C_attribute
)
2669 * Advance the definedef state machine.
2674 /* We're not on a preprocessor line. */
2675 if (toktype
== st_C_gnumacro
)
2682 if (toktype
== st_C_define
)
2684 definedef
= ddefineseen
;
2688 definedef
= dignorerest
;
2693 * Make a tag for any macro, unless it is a constant
2694 * and constantypedefs is FALSE.
2696 definedef
= dignorerest
;
2697 *is_func_or_var
= (c
== '(');
2698 if (!*is_func_or_var
&& !constantypedefs
)
2705 error ("internal error: definedef value.");
2714 if (toktype
== st_C_typedef
)
2734 if (structdef
== snone
&& fvdef
== fvnone
)
2753 case st_C_javastruct
:
2754 if (structdef
== stagseen
)
2755 structdef
= scolonseen
;
2759 if ((*c_extp
& C_AUTO
) /* automatic detection of C++ language */
2761 && definedef
== dnone
&& structdef
== snone
2762 && typdef
== tnone
&& fvdef
== fvnone
)
2763 *c_extp
= (*c_extp
| C_PLPL
) & ~C_AUTO
;
2764 if (toktype
== st_C_template
)
2771 && (typdef
== tkeyseen
2772 || (typedefs_or_cplusplus
&& structdef
== snone
)))
2774 structdef
= skeyseen
;
2775 structtype
= toktype
;
2776 structbracelev
= bracelev
;
2777 if (fvdef
== fvnameseen
)
2783 if (structdef
== skeyseen
)
2785 structdef
= stagseen
;
2789 if (typdef
!= tnone
)
2792 /* Detect Objective C constructs. */
2802 objdef
= oimplementation
;
2806 case oimplementation
:
2807 /* Save the class tag for functions or variables defined inside. */
2808 objtag
= savenstr (str
, len
);
2812 /* Save the class tag for categories. */
2813 objtag
= savenstr (str
, len
);
2815 *is_func_or_var
= TRUE
;
2819 *is_func_or_var
= TRUE
;
2827 objdef
= omethodtag
;
2828 linebuffer_setlen (&token_name
, len
);
2829 memcpy (token_name
.buffer
, str
, len
);
2830 token_name
.buffer
[len
] = '\0';
2836 objdef
= omethodparm
;
2841 int oldlen
= token_name
.len
;
2843 objdef
= omethodtag
;
2844 linebuffer_setlen (&token_name
, oldlen
+ len
);
2845 memcpy (token_name
.buffer
+ oldlen
, str
, len
);
2846 token_name
.buffer
[oldlen
+ len
] = '\0';
2851 if (toktype
== st_C_objend
)
2853 /* Memory leakage here: the string pointed by objtag is
2854 never released, because many tests would be needed to
2855 avoid breaking on incorrect input code. The amount of
2856 memory leaked here is the sum of the lengths of the
2864 /* A function, variable or enum constant? */
2886 *is_func_or_var
= TRUE
;
2890 && structdef
== snone
2891 && structtype
== st_C_enum
&& bracelev
> structbracelev
)
2892 return TRUE
; /* enum constant */
2898 fvdef
= fdefunname
; /* GNU macro */
2899 *is_func_or_var
= TRUE
;
2907 if ((strneq (str
, "asm", 3) && endtoken (str
[3]))
2908 || (strneq (str
, "__asm__", 7) && endtoken (str
[7])))
2917 if (len
>= 10 && strneq (str
+len
-10, "::operator", 10))
2919 if (*c_extp
& C_AUTO
) /* automatic detection of C++ */
2920 *c_extp
= (*c_extp
| C_PLPL
) & ~C_AUTO
;
2922 *is_func_or_var
= TRUE
;
2925 if (bracelev
> 0 && !instruct
)
2927 fvdef
= fvnameseen
; /* function or variable */
2928 *is_func_or_var
= TRUE
;
2939 * C_entries often keeps pointers to tokens or lines which are older than
2940 * the line currently read. By keeping two line buffers, and switching
2941 * them at end of line, it is possible to use those pointers.
2949 #define current_lb_is_new (newndx == curndx)
2950 #define switch_line_buffers() (curndx = 1 - curndx)
2952 #define curlb (lbs[curndx].lb)
2953 #define newlb (lbs[newndx].lb)
2954 #define curlinepos (lbs[curndx].linepos)
2955 #define newlinepos (lbs[newndx].linepos)
2957 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2958 #define cplpl (c_ext & C_PLPL)
2959 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2961 #define CNL_SAVE_DEFINEDEF() \
2963 curlinepos = charno; \
2964 readline (&curlb, inf); \
2965 lp = curlb.buffer; \
2972 CNL_SAVE_DEFINEDEF(); \
2973 if (savetoken.valid) \
2975 token = savetoken; \
2976 savetoken.valid = FALSE; \
2978 definedef = dnone; \
2983 make_C_tag (int isfun
)
2985 /* This function is never called when token.valid is FALSE, but
2986 we must protect against invalid input or internal errors. */
2988 make_tag (token_name
.buffer
, token_name
.len
, isfun
, token
.line
,
2989 token
.offset
+token
.length
+1, token
.lineno
, token
.linepos
);
2991 { /* this branch is optimized away if !DEBUG */
2992 make_tag (concat ("INVALID TOKEN:-->", token_name
.buffer
, ""),
2993 token_name
.len
+ 17, isfun
, token
.line
,
2994 token
.offset
+token
.length
+1, token
.lineno
, token
.linepos
);
2995 error ("INVALID TOKEN");
2998 token
.valid
= FALSE
;
3004 * This routine finds functions, variables, typedefs,
3005 * #define's, enum constants and struct/union/enum definitions in
3006 * C syntax and adds them to the list.
3009 C_entries (int c_ext
, FILE *inf
)
3010 /* extension of C */
3013 register char c
; /* latest char read; '\0' for end of line */
3014 register char *lp
; /* pointer one beyond the character `c' */
3015 int curndx
, newndx
; /* indices for current and new lb */
3016 register int tokoff
; /* offset in line of start of current token */
3017 register int toklen
; /* length of current token */
3018 const char *qualifier
; /* string used to qualify names */
3019 int qlen
; /* length of qualifier */
3020 int bracelev
; /* current brace level */
3021 int bracketlev
; /* current bracket level */
3022 int parlev
; /* current parenthesis level */
3023 int attrparlev
; /* __attribute__ parenthesis level */
3024 int templatelev
; /* current template level */
3025 int typdefbracelev
; /* bracelev where a typedef struct body begun */
3026 bool incomm
, inquote
, inchar
, quotednl
, midtoken
;
3027 bool yacc_rules
; /* in the rules part of a yacc file */
3028 struct tok savetoken
= {0}; /* token saved during preprocessor handling */
3031 linebuffer_init (&lbs
[0].lb
);
3032 linebuffer_init (&lbs
[1].lb
);
3033 if (cstack
.size
== 0)
3035 cstack
.size
= (DEBUG
) ? 1 : 4;
3037 cstack
.cname
= xnew (cstack
.size
, char *);
3038 cstack
.bracelev
= xnew (cstack
.size
, int);
3041 tokoff
= toklen
= typdefbracelev
= 0; /* keep compiler quiet */
3042 curndx
= newndx
= 0;
3046 fvdef
= fvnone
; fvextern
= FALSE
; typdef
= tnone
;
3047 structdef
= snone
; definedef
= dnone
; objdef
= onone
;
3049 midtoken
= inquote
= inchar
= incomm
= quotednl
= FALSE
;
3050 token
.valid
= savetoken
.valid
= FALSE
;
3051 bracelev
= bracketlev
= parlev
= attrparlev
= templatelev
= 0;
3053 { qualifier
= "."; qlen
= 1; }
3055 { qualifier
= "::"; qlen
= 2; }
3063 /* If we are at the end of the line, the next character is a
3064 '\0'; do not skip it, because it is what tells us
3065 to read the next line. */
3086 /* Newlines inside comments do not end macro definitions in
3088 CNL_SAVE_DEFINEDEF ();
3101 /* Newlines inside strings do not end macro definitions
3102 in traditional cpp, even though compilers don't
3103 usually accept them. */
3104 CNL_SAVE_DEFINEDEF ();
3114 /* Hmmm, something went wrong. */
3150 if (fvdef
!= finlist
&& fvdef
!= fignore
&& fvdef
!= vignore
)
3165 else if (/* cplpl && */ *lp
== '/')
3171 if ((c_ext
& YACC
) && *lp
== '%')
3173 /* Entering or exiting rules section in yacc file. */
3175 definedef
= dnone
; fvdef
= fvnone
; fvextern
= FALSE
;
3176 typdef
= tnone
; structdef
= snone
;
3177 midtoken
= inquote
= inchar
= incomm
= quotednl
= FALSE
;
3179 yacc_rules
= !yacc_rules
;
3185 if (definedef
== dnone
)
3188 bool cpptoken
= TRUE
;
3190 /* Look back on this line. If all blanks, or nonblanks
3191 followed by an end of comment, this is a preprocessor
3193 for (cp
= newlb
.buffer
; cp
< lp
-1; cp
++)
3196 if (*cp
== '*' && cp
[1] == '/')
3205 definedef
= dsharpseen
;
3206 } /* if (definedef == dnone) */
3217 CNL_SAVE_DEFINEDEF ();
3224 /* Consider token only if some involved conditions are satisfied. */
3225 if (typdef
!= tignore
3226 && definedef
!= dignorerest
3229 && (definedef
!= dnone
3230 || structdef
!= scolonseen
)
3237 if (c
== ':' && *lp
== ':' && begtoken (lp
[1]))
3238 /* This handles :: in the middle,
3239 but not at the beginning of an identifier.
3240 Also, space-separated :: is not recognized. */
3242 if (c_ext
& C_AUTO
) /* automatic detection of C++ */
3243 c_ext
= (c_ext
| C_PLPL
) & ~C_AUTO
;
3247 goto still_in_token
;
3251 bool funorvar
= FALSE
;
3254 || consider_token (newlb
.buffer
+ tokoff
, toklen
, c
,
3255 &c_ext
, bracelev
, parlev
,
3258 if (fvdef
== foperator
)
3261 lp
= skip_spaces (lp
-1);
3265 && !iswhite (*lp
) && *lp
!= '(')
3268 toklen
+= lp
- oldlp
;
3270 token
.named
= FALSE
;
3272 && nestlev
> 0 && definedef
== dnone
)
3273 /* in struct body */
3276 write_classname (&token_name
, qualifier
);
3277 len
= token_name
.len
;
3278 linebuffer_setlen (&token_name
, len
+qlen
+toklen
);
3279 sprintf (token_name
.buffer
+ len
, "%s%.*s",
3280 qualifier
, toklen
, newlb
.buffer
+ tokoff
);
3283 else if (objdef
== ocatseen
)
3284 /* Objective C category */
3286 int len
= strlen (objtag
) + 2 + toklen
;
3287 linebuffer_setlen (&token_name
, len
);
3288 sprintf (token_name
.buffer
, "%s(%.*s)",
3289 objtag
, toklen
, newlb
.buffer
+ tokoff
);
3292 else if (objdef
== omethodtag
3293 || objdef
== omethodparm
)
3294 /* Objective C method */
3298 else if (fvdef
== fdefunname
)
3299 /* GNU DEFUN and similar macros */
3301 bool defun
= (newlb
.buffer
[tokoff
] == 'F');
3305 /* Rewrite the tag so that emacs lisp DEFUNs
3306 can be found by their elisp name */
3312 linebuffer_setlen (&token_name
, len
);
3313 memcpy (token_name
.buffer
,
3314 newlb
.buffer
+ off
, len
);
3315 token_name
.buffer
[len
] = '\0';
3318 if (token_name
.buffer
[len
] == '_')
3319 token_name
.buffer
[len
] = '-';
3320 token
.named
= defun
;
3324 linebuffer_setlen (&token_name
, toklen
);
3325 memcpy (token_name
.buffer
,
3326 newlb
.buffer
+ tokoff
, toklen
);
3327 token_name
.buffer
[toklen
] = '\0';
3328 /* Name macros and members. */
3329 token
.named
= (structdef
== stagseen
3330 || typdef
== ttypeseen
3333 && definedef
== dignorerest
)
3335 && definedef
== dnone
3336 && structdef
== snone
3339 token
.lineno
= lineno
;
3340 token
.offset
= tokoff
;
3341 token
.length
= toklen
;
3342 token
.line
= newlb
.buffer
;
3343 token
.linepos
= newlinepos
;
3346 if (definedef
== dnone
3347 && (fvdef
== fvnameseen
3348 || fvdef
== foperator
3349 || structdef
== stagseen
3351 || typdef
== ttypeseen
3352 || objdef
!= onone
))
3354 if (current_lb_is_new
)
3355 switch_line_buffers ();
3357 else if (definedef
!= dnone
3358 || fvdef
== fdefunname
3360 make_C_tag (funorvar
);
3362 else /* not yacc and consider_token failed */
3364 if (inattribute
&& fvdef
== fignore
)
3366 /* We have just met __attribute__ after a
3367 function parameter list: do not tag the
3374 } /* if (endtoken (c)) */
3375 else if (intoken (c
))
3381 } /* if (midtoken) */
3382 else if (begtoken (c
))
3390 /* This prevents tagging fb in
3391 void (__attribute__((noreturn)) *fb) (void);
3392 Fixing this is not easy and not very important. */
3396 if (plainc
|| declarations
)
3398 make_C_tag (TRUE
); /* a function */
3403 if (structdef
== stagseen
&& !cjava
)
3405 popclass_above (bracelev
);
3413 if (!yacc_rules
|| lp
== newlb
.buffer
+ 1)
3415 tokoff
= lp
- 1 - newlb
.buffer
;
3420 } /* if (begtoken) */
3421 } /* if must look at token */
3424 /* Detect end of line, colon, comma, semicolon and various braces
3425 after having handled a token.*/
3431 if (yacc_rules
&& token
.offset
== 0 && token
.valid
)
3433 make_C_tag (FALSE
); /* a yacc function */
3436 if (definedef
!= dnone
)
3442 make_C_tag (TRUE
); /* an Objective C class */
3446 objdef
= omethodcolon
;
3447 linebuffer_setlen (&token_name
, token_name
.len
+ 1);
3448 strcat (token_name
.buffer
, ":");
3451 if (structdef
== stagseen
)
3453 structdef
= scolonseen
;
3456 /* Should be useless, but may be work as a safety net. */
3457 if (cplpl
&& fvdef
== flistseen
)
3459 make_C_tag (TRUE
); /* a function */
3465 if (definedef
!= dnone
|| inattribute
)
3471 make_C_tag (FALSE
); /* a typedef */
3481 if (typdef
== tignore
|| cplpl
)
3485 if ((globals
&& bracelev
== 0 && (!fvextern
|| declarations
))
3486 || (members
&& instruct
))
3487 make_C_tag (FALSE
); /* a variable */
3490 token
.valid
= FALSE
;
3494 && (cplpl
|| !instruct
)
3495 && (typdef
== tnone
|| (typdef
!= tignore
&& instruct
)))
3497 && plainc
&& instruct
))
3498 make_C_tag (TRUE
); /* a function */
3504 && cplpl
&& structdef
== stagseen
)
3505 make_C_tag (FALSE
); /* forward declaration */
3507 token
.valid
= FALSE
;
3508 } /* switch (fvdef) */
3514 if (structdef
== stagseen
)
3518 if (definedef
!= dnone
|| inattribute
)
3524 make_C_tag (TRUE
); /* an Objective C method */
3545 && (!fvextern
|| declarations
))
3546 || (members
&& instruct
)))
3547 make_C_tag (FALSE
); /* a variable */
3550 if ((declarations
&& typdef
== tnone
&& !instruct
)
3551 || (members
&& typdef
!= tignore
&& instruct
))
3553 make_C_tag (TRUE
); /* a function */
3556 else if (!declarations
)
3558 token
.valid
= FALSE
;
3563 if (structdef
== stagseen
)
3567 if (definedef
!= dnone
|| inattribute
)
3569 if (structdef
== stagseen
)
3576 make_C_tag (FALSE
); /* a typedef */
3588 if ((members
&& bracelev
== 1)
3589 || (globals
&& bracelev
== 0
3590 && (!fvextern
|| declarations
)))
3591 make_C_tag (FALSE
); /* a variable */
3605 if (definedef
!= dnone
)
3607 if (objdef
== otagseen
&& parlev
== 0)
3608 objdef
= oparenseen
;
3612 if (typdef
== ttypeseen
3616 /* This handles constructs like:
3617 typedef void OperatorFun (int fun); */
3636 if (--attrparlev
== 0)
3637 inattribute
= FALSE
;
3640 if (definedef
!= dnone
)
3642 if (objdef
== ocatseen
&& parlev
== 1)
3644 make_C_tag (TRUE
); /* an Objective C category */
3658 || typdef
== ttypeseen
))
3661 make_C_tag (FALSE
); /* a typedef */
3664 else if (parlev
< 0) /* can happen due to ill-conceived #if's. */
3668 if (definedef
!= dnone
)
3670 if (typdef
== ttypeseen
)
3672 /* Whenever typdef is set to tinbody (currently only
3673 here), typdefbracelev should be set to bracelev. */
3675 typdefbracelev
= bracelev
;
3680 make_C_tag (TRUE
); /* a function */
3689 make_C_tag (TRUE
); /* an Objective C class */
3694 make_C_tag (TRUE
); /* an Objective C method */
3698 /* Neutralize `extern "C" {' grot. */
3699 if (bracelev
== 0 && structdef
== snone
&& nestlev
== 0
3707 case skeyseen
: /* unnamed struct */
3708 pushclass_above (bracelev
, NULL
, 0);
3711 case stagseen
: /* named struct or enum */
3712 case scolonseen
: /* a class */
3713 pushclass_above (bracelev
,token
.line
+token
.offset
, token
.length
);
3715 make_C_tag (FALSE
); /* a struct or enum */
3721 if (definedef
!= dnone
)
3723 if (fvdef
== fstartlist
)
3725 fvdef
= fvnone
; /* avoid tagging `foo' in `foo (*bar()) ()' */
3726 token
.valid
= FALSE
;
3730 if (definedef
!= dnone
)
3733 if (!ignoreindent
&& lp
== newlb
.buffer
+ 1)
3736 token
.valid
= FALSE
; /* unexpected value, token unreliable */
3737 bracelev
= 0; /* reset brace level if first column */
3738 parlev
= 0; /* also reset paren level, just in case... */
3740 else if (bracelev
< 0)
3742 token
.valid
= FALSE
; /* something gone amiss, token unreliable */
3745 if (bracelev
== 0 && fvdef
== vignore
)
3746 fvdef
= fvnone
; /* end of function */
3747 popclass_above (bracelev
);
3749 /* Only if typdef == tinbody is typdefbracelev significant. */
3750 if (typdef
== tinbody
&& bracelev
<= typdefbracelev
)
3752 assert (bracelev
== typdefbracelev
);
3757 if (definedef
!= dnone
)
3767 if ((members
&& bracelev
== 1)
3768 || (globals
&& bracelev
== 0 && (!fvextern
|| declarations
)))
3769 make_C_tag (FALSE
); /* a variable */
3777 && (structdef
== stagseen
|| fvdef
== fvnameseen
))
3784 if (templatelev
> 0)
3792 if (objdef
== oinbody
&& bracelev
== 0)
3794 objdef
= omethodsign
;
3799 case '#': case '~': case '&': case '%': case '/':
3800 case '|': case '^': case '!': case '.': case '?':
3801 if (definedef
!= dnone
)
3803 /* These surely cannot follow a function tag in C. */
3816 if (objdef
== otagseen
)
3818 make_C_tag (TRUE
); /* an Objective C class */
3821 /* If a macro spans multiple lines don't reset its state. */
3823 CNL_SAVE_DEFINEDEF ();
3829 } /* while not eof */
3831 free (lbs
[0].lb
.buffer
);
3832 free (lbs
[1].lb
.buffer
);
3836 * Process either a C++ file or a C file depending on the setting
3840 default_C_entries (FILE *inf
)
3842 C_entries (cplusplus
? C_PLPL
: C_AUTO
, inf
);
3845 /* Always do plain C. */
3847 plain_C_entries (FILE *inf
)
3852 /* Always do C++. */
3854 Cplusplus_entries (FILE *inf
)
3856 C_entries (C_PLPL
, inf
);
3859 /* Always do Java. */
3861 Cjava_entries (FILE *inf
)
3863 C_entries (C_JAVA
, inf
);
3868 Cstar_entries (FILE *inf
)
3870 C_entries (C_STAR
, inf
);
3873 /* Always do Yacc. */
3875 Yacc_entries (FILE *inf
)
3877 C_entries (YACC
, inf
);
3881 /* Useful macros. */
3882 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3883 for (; /* loop initialization */ \
3884 !feof (file_pointer) /* loop test */ \
3885 && /* instructions at start of loop */ \
3886 (readline (&line_buffer, file_pointer), \
3887 char_pointer = line_buffer.buffer, \
3891 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
3892 ((assert ("" kw), TRUE) /* syntax error if not a literal string */ \
3893 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3894 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
3895 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3897 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3898 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3899 ((assert ("" kw), TRUE) /* syntax error if not a literal string */ \
3900 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3901 && ((cp) += sizeof (kw)-1)) /* skip spaces */
3904 * Read a file, but do no processing. This is used to do regexp
3905 * matching on files that have no language defined.
3908 just_read_file (FILE *inf
)
3911 readline (&lb
, inf
);
3915 /* Fortran parsing */
3917 static void F_takeprec (void);
3918 static void F_getit (FILE *);
3923 dbp
= skip_spaces (dbp
);
3927 dbp
= skip_spaces (dbp
);
3928 if (strneq (dbp
, "(*)", 3))
3933 if (!ISDIGIT (*dbp
))
3935 --dbp
; /* force failure */
3940 while (ISDIGIT (*dbp
));
3948 dbp
= skip_spaces (dbp
);
3951 readline (&lb
, inf
);
3956 dbp
= skip_spaces (dbp
);
3958 if (!ISALPHA (*dbp
) && *dbp
!= '_' && *dbp
!= '$')
3960 for (cp
= dbp
+ 1; *cp
!= '\0' && intoken (*cp
); cp
++)
3962 make_tag (dbp
, cp
-dbp
, TRUE
,
3963 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
3968 Fortran_functions (FILE *inf
)
3970 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
3973 dbp
++; /* Ratfor escape to fortran */
3974 dbp
= skip_spaces (dbp
);
3978 if (LOOKING_AT_NOCASE (dbp
, "recursive"))
3979 dbp
= skip_spaces (dbp
);
3981 if (LOOKING_AT_NOCASE (dbp
, "pure"))
3982 dbp
= skip_spaces (dbp
);
3984 if (LOOKING_AT_NOCASE (dbp
, "elemental"))
3985 dbp
= skip_spaces (dbp
);
3987 switch (lowcase (*dbp
))
3990 if (nocase_tail ("integer"))
3994 if (nocase_tail ("real"))
3998 if (nocase_tail ("logical"))
4002 if (nocase_tail ("complex") || nocase_tail ("character"))
4006 if (nocase_tail ("double"))
4008 dbp
= skip_spaces (dbp
);
4011 if (nocase_tail ("precision"))
4017 dbp
= skip_spaces (dbp
);
4020 switch (lowcase (*dbp
))
4023 if (nocase_tail ("function"))
4027 if (nocase_tail ("subroutine"))
4031 if (nocase_tail ("entry"))
4035 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4037 dbp
= skip_spaces (dbp
);
4038 if (*dbp
== '\0') /* assume un-named */
4039 make_tag ("blockdata", 9, TRUE
,
4040 lb
.buffer
, dbp
- lb
.buffer
, lineno
, linecharno
);
4042 F_getit (inf
); /* look for name */
4053 * Philippe Waroquiers (1998)
4056 /* Once we are positioned after an "interesting" keyword, let's get
4057 the real tag value necessary. */
4059 Ada_getit (FILE *inf
, const char *name_qualifier
)
4067 dbp
= skip_spaces (dbp
);
4069 || (dbp
[0] == '-' && dbp
[1] == '-'))
4071 readline (&lb
, inf
);
4074 switch (lowcase (*dbp
))
4077 if (nocase_tail ("body"))
4079 /* Skipping body of procedure body or package body or ....
4080 resetting qualifier to body instead of spec. */
4081 name_qualifier
= "/b";
4086 /* Skipping type of task type or protected type ... */
4087 if (nocase_tail ("type"))
4094 for (cp
= dbp
; *cp
!= '\0' && *cp
!= '"'; cp
++)
4099 dbp
= skip_spaces (dbp
);
4102 && (ISALPHA (*cp
) || ISDIGIT (*cp
) || *cp
== '_' || *cp
== '.'));
4110 name
= concat (dbp
, name_qualifier
, "");
4112 make_tag (name
, strlen (name
), TRUE
,
4113 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4122 Ada_funcs (FILE *inf
)
4124 bool inquote
= FALSE
;
4125 bool skip_till_semicolumn
= FALSE
;
4127 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
4129 while (*dbp
!= '\0')
4131 /* Skip a string i.e. "abcd". */
4132 if (inquote
|| (*dbp
== '"'))
4134 dbp
= etags_strchr (dbp
+ !inquote
, '"');
4139 continue; /* advance char */
4144 break; /* advance line */
4148 /* Skip comments. */
4149 if (dbp
[0] == '-' && dbp
[1] == '-')
4150 break; /* advance line */
4152 /* Skip character enclosed in single quote i.e. 'a'
4153 and skip single quote starting an attribute i.e. 'Image. */
4162 if (skip_till_semicolumn
)
4165 skip_till_semicolumn
= FALSE
;
4167 continue; /* advance char */
4170 /* Search for beginning of a token. */
4171 if (!begtoken (*dbp
))
4174 continue; /* advance char */
4177 /* We are at the beginning of a token. */
4178 switch (lowcase (*dbp
))
4181 if (!packages_only
&& nocase_tail ("function"))
4182 Ada_getit (inf
, "/f");
4184 break; /* from switch */
4185 continue; /* advance char */
4187 if (!packages_only
&& nocase_tail ("procedure"))
4188 Ada_getit (inf
, "/p");
4189 else if (nocase_tail ("package"))
4190 Ada_getit (inf
, "/s");
4191 else if (nocase_tail ("protected")) /* protected type */
4192 Ada_getit (inf
, "/t");
4194 break; /* from switch */
4195 continue; /* advance char */
4198 if (typedefs
&& !packages_only
&& nocase_tail ("use"))
4200 /* when tagging types, avoid tagging use type Pack.Typename;
4201 for this, we will skip everything till a ; */
4202 skip_till_semicolumn
= TRUE
;
4203 continue; /* advance char */
4207 if (!packages_only
&& nocase_tail ("task"))
4208 Ada_getit (inf
, "/k");
4209 else if (typedefs
&& !packages_only
&& nocase_tail ("type"))
4211 Ada_getit (inf
, "/t");
4212 while (*dbp
!= '\0')
4216 break; /* from switch */
4217 continue; /* advance char */
4220 /* Look for the end of the token. */
4221 while (!endtoken (*dbp
))
4224 } /* advance char */
4225 } /* advance line */
4230 * Unix and microcontroller assembly tag handling
4231 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4232 * Idea by Bob Weiner, Motorola Inc. (1994)
4235 Asm_labels (FILE *inf
)
4239 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4241 /* If first char is alphabetic or one of [_.$], test for colon
4242 following identifier. */
4243 if (ISALPHA (*cp
) || *cp
== '_' || *cp
== '.' || *cp
== '$')
4245 /* Read past label. */
4247 while (ISALNUM (*cp
) || *cp
== '_' || *cp
== '.' || *cp
== '$')
4249 if (*cp
== ':' || iswhite (*cp
))
4250 /* Found end of label, so copy it and add it to the table. */
4251 make_tag (lb
.buffer
, cp
- lb
.buffer
, TRUE
,
4252 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4260 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4261 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4262 * Perl variable names: /^(my|local).../
4263 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4264 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4265 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4268 Perl_functions (FILE *inf
)
4270 char *package
= savestr ("main"); /* current package name */
4273 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4275 cp
= skip_spaces (cp
);
4277 if (LOOKING_AT (cp
, "package"))
4280 get_tag (cp
, &package
);
4282 else if (LOOKING_AT (cp
, "sub"))
4288 while (!notinname (*cp
))
4291 continue; /* nothing found */
4292 if ((pos
= etags_strchr (sp
, ':')) != NULL
4293 && pos
< cp
&& pos
[1] == ':')
4294 /* The name is already qualified. */
4295 make_tag (sp
, cp
- sp
, TRUE
,
4296 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4300 char savechar
, *name
;
4304 name
= concat (package
, "::", sp
);
4306 make_tag (name
, strlen (name
), TRUE
,
4307 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4311 else if (LOOKING_AT (cp
, "use constant")
4312 || LOOKING_AT (cp
, "use constant::defer"))
4314 /* For hash style multi-constant like
4315 use constant { FOO => 123,
4317 only the first FOO is picked up. Parsing across the value
4318 expressions would be difficult in general, due to possible nested
4319 hashes, here-documents, etc. */
4321 cp
= skip_spaces (cp
+1);
4324 else if (globals
) /* only if we are tagging global vars */
4326 /* Skip a qualifier, if any. */
4327 bool qual
= LOOKING_AT (cp
, "my") || LOOKING_AT (cp
, "local");
4328 /* After "my" or "local", but before any following paren or space. */
4329 char *varstart
= cp
;
4331 if (qual
/* should this be removed? If yes, how? */
4332 && (*cp
== '$' || *cp
== '@' || *cp
== '%'))
4337 while (ISALNUM (*cp
) || *cp
== '_');
4341 /* Should be examining a variable list at this point;
4342 could insist on seeing an open parenthesis. */
4343 while (*cp
!= '\0' && *cp
!= ';' && *cp
!= '=' && *cp
!= ')')
4349 make_tag (varstart
, cp
- varstart
, FALSE
,
4350 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4359 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4360 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4361 * More ideas by seb bacon <seb@jamkit.com> (2002)
4364 Python_functions (FILE *inf
)
4368 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4370 cp
= skip_spaces (cp
);
4371 if (LOOKING_AT (cp
, "def") || LOOKING_AT (cp
, "class"))
4374 while (!notinname (*cp
) && *cp
!= ':')
4376 make_tag (name
, cp
- name
, TRUE
,
4377 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4386 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4387 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4388 * - /^[ \t]*define\(\"[^\"]+/
4389 * Only with --members:
4390 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4391 * Idea by Diez B. Roggisch (2001)
4394 PHP_functions (FILE *inf
)
4396 register char *cp
, *name
;
4397 bool search_identifier
= FALSE
;
4399 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4401 cp
= skip_spaces (cp
);
4403 if (search_identifier
4406 while (!notinname (*cp
))
4408 make_tag (name
, cp
- name
, TRUE
,
4409 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4410 search_identifier
= FALSE
;
4412 else if (LOOKING_AT (cp
, "function"))
4415 cp
= skip_spaces (cp
+1);
4419 while (!notinname (*cp
))
4421 make_tag (name
, cp
- name
, TRUE
,
4422 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4425 search_identifier
= TRUE
;
4427 else if (LOOKING_AT (cp
, "class"))
4432 while (*cp
!= '\0' && !iswhite (*cp
))
4434 make_tag (name
, cp
- name
, FALSE
,
4435 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4438 search_identifier
= TRUE
;
4440 else if (strneq (cp
, "define", 6)
4441 && (cp
= skip_spaces (cp
+6))
4443 && (*cp
== '"' || *cp
== '\''))
4447 while (*cp
!= quote
&& *cp
!= '\0')
4449 make_tag (name
, cp
- name
, FALSE
,
4450 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4453 && LOOKING_AT (cp
, "var")
4457 while (!notinname (*cp
))
4459 make_tag (name
, cp
- name
, FALSE
,
4460 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4467 * Cobol tag functions
4468 * We could look for anything that could be a paragraph name.
4469 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4470 * Idea by Corny de Souza (1993)
4473 Cobol_paragraphs (FILE *inf
)
4475 register char *bp
, *ep
;
4477 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4483 /* If eoln, compiler option or comment ignore whole line. */
4484 if (bp
[-1] != ' ' || !ISALNUM (bp
[0]))
4487 for (ep
= bp
; ISALNUM (*ep
) || *ep
== '-'; ep
++)
4490 make_tag (bp
, ep
- bp
, TRUE
,
4491 lb
.buffer
, ep
- lb
.buffer
+ 1, lineno
, linecharno
);
4498 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4501 Makefile_targets (FILE *inf
)
4505 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4507 if (*bp
== '\t' || *bp
== '#')
4509 while (*bp
!= '\0' && *bp
!= '=' && *bp
!= ':')
4511 if (*bp
== ':' || (globals
&& *bp
== '='))
4513 /* We should detect if there is more than one tag, but we do not.
4514 We just skip initial and final spaces. */
4515 char * namestart
= skip_spaces (lb
.buffer
);
4516 while (--bp
> namestart
)
4517 if (!notinname (*bp
))
4519 make_tag (namestart
, bp
- namestart
+ 1, TRUE
,
4520 lb
.buffer
, bp
- lb
.buffer
+ 2, lineno
, linecharno
);
4528 * Original code by Mosur K. Mohan (1989)
4530 * Locates tags for procedures & functions. Doesn't do any type- or
4531 * var-definitions. It does look for the keyword "extern" or
4532 * "forward" immediately following the procedure statement; if found,
4533 * the tag is skipped.
4536 Pascal_functions (FILE *inf
)
4538 linebuffer tline
; /* mostly copied from C_entries */
4540 int save_lineno
, namelen
, taglen
;
4543 bool /* each of these flags is TRUE if: */
4544 incomment
, /* point is inside a comment */
4545 inquote
, /* point is inside '..' string */
4546 get_tagname
, /* point is after PROCEDURE/FUNCTION
4547 keyword, so next item = potential tag */
4548 found_tag
, /* point is after a potential tag */
4549 inparms
, /* point is within parameter-list */
4550 verify_tag
; /* point has passed the parm-list, so the
4551 next token will determine whether this
4552 is a FORWARD/EXTERN to be ignored, or
4553 whether it is a real tag */
4555 save_lcno
= save_lineno
= namelen
= taglen
= 0; /* keep compiler quiet */
4556 name
= NULL
; /* keep compiler quiet */
4559 linebuffer_init (&tline
);
4561 incomment
= inquote
= FALSE
;
4562 found_tag
= FALSE
; /* have a proc name; check if extern */
4563 get_tagname
= FALSE
; /* found "procedure" keyword */
4564 inparms
= FALSE
; /* found '(' after "proc" */
4565 verify_tag
= FALSE
; /* check if "extern" is ahead */
4568 while (!feof (inf
)) /* long main loop to get next char */
4571 if (c
== '\0') /* if end of line */
4573 readline (&lb
, inf
);
4577 if (!((found_tag
&& verify_tag
)
4579 c
= *dbp
++; /* only if don't need *dbp pointing
4580 to the beginning of the name of
4581 the procedure or function */
4585 if (c
== '}') /* within { } comments */
4587 else if (c
== '*' && *dbp
== ')') /* within (* *) comments */
4604 inquote
= TRUE
; /* found first quote */
4606 case '{': /* found open { comment */
4610 if (*dbp
== '*') /* found open (* comment */
4615 else if (found_tag
) /* found '(' after tag, i.e., parm-list */
4618 case ')': /* end of parms list */
4623 if (found_tag
&& !inparms
) /* end of proc or fn stmt */
4630 if (found_tag
&& verify_tag
&& (*dbp
!= ' '))
4632 /* Check if this is an "extern" declaration. */
4635 if (lowcase (*dbp
) == 'e')
4637 if (nocase_tail ("extern")) /* superfluous, really! */
4643 else if (lowcase (*dbp
) == 'f')
4645 if (nocase_tail ("forward")) /* check for forward reference */
4651 if (found_tag
&& verify_tag
) /* not external proc, so make tag */
4655 make_tag (name
, namelen
, TRUE
,
4656 tline
.buffer
, taglen
, save_lineno
, save_lcno
);
4660 if (get_tagname
) /* grab name of proc or fn */
4667 /* Find block name. */
4668 for (cp
= dbp
+ 1; *cp
!= '\0' && !endtoken (*cp
); cp
++)
4671 /* Save all values for later tagging. */
4672 linebuffer_setlen (&tline
, lb
.len
);
4673 strcpy (tline
.buffer
, lb
.buffer
);
4674 save_lineno
= lineno
;
4675 save_lcno
= linecharno
;
4676 name
= tline
.buffer
+ (dbp
- lb
.buffer
);
4678 taglen
= cp
- lb
.buffer
+ 1;
4680 dbp
= cp
; /* set dbp to e-o-token */
4681 get_tagname
= FALSE
;
4685 /* And proceed to check for "extern". */
4687 else if (!incomment
&& !inquote
&& !found_tag
)
4689 /* Check for proc/fn keywords. */
4690 switch (lowcase (c
))
4693 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4697 if (nocase_tail ("unction"))
4702 } /* while not eof */
4704 free (tline
.buffer
);
4709 * Lisp tag functions
4710 * look for (def or (DEF, quote or QUOTE
4713 static void L_getit (void);
4718 if (*dbp
== '\'') /* Skip prefix quote */
4720 else if (*dbp
== '(')
4723 /* Try to skip "(quote " */
4724 if (!LOOKING_AT (dbp
, "quote") && !LOOKING_AT (dbp
, "QUOTE"))
4725 /* Ok, then skip "(" before name in (defstruct (foo)) */
4726 dbp
= skip_spaces (dbp
);
4728 get_tag (dbp
, NULL
);
4732 Lisp_functions (FILE *inf
)
4734 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
4739 /* "(defvar foo)" is a declaration rather than a definition. */
4743 if (LOOKING_AT (p
, "defvar"))
4745 p
= skip_name (p
); /* past var name */
4746 p
= skip_spaces (p
);
4752 if (strneq (dbp
+1, "def", 3) || strneq (dbp
+1, "DEF", 3))
4754 dbp
= skip_non_spaces (dbp
);
4755 dbp
= skip_spaces (dbp
);
4760 /* Check for (foo::defmumble name-defined ... */
4763 while (!notinname (*dbp
) && *dbp
!= ':');
4768 while (*dbp
== ':');
4770 if (strneq (dbp
, "def", 3) || strneq (dbp
, "DEF", 3))
4772 dbp
= skip_non_spaces (dbp
);
4773 dbp
= skip_spaces (dbp
);
4783 * Lua script language parsing
4784 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4786 * "function" and "local function" are tags if they start at column 1.
4789 Lua_functions (FILE *inf
)
4793 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4795 if (bp
[0] != 'f' && bp
[0] != 'l')
4798 (void)LOOKING_AT (bp
, "local"); /* skip possible "local" */
4800 if (LOOKING_AT (bp
, "function"))
4808 * Just look for lines where the first character is '/'
4809 * Also look at "defineps" for PSWrap
4811 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4812 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4815 PS_functions (FILE *inf
)
4817 register char *bp
, *ep
;
4819 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4824 *ep
!= '\0' && *ep
!= ' ' && *ep
!= '{';
4827 make_tag (bp
, ep
- bp
, TRUE
,
4828 lb
.buffer
, ep
- lb
.buffer
+ 1, lineno
, linecharno
);
4830 else if (LOOKING_AT (bp
, "defineps"))
4838 * Ignore anything after \ followed by space or in ( )
4839 * Look for words defined by :
4840 * Look for constant, code, create, defer, value, and variable
4841 * OBP extensions: Look for buffer:, field,
4842 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4845 Forth_words (FILE *inf
)
4849 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4850 while ((bp
= skip_spaces (bp
))[0] != '\0')
4851 if (bp
[0] == '\\' && iswhite (bp
[1]))
4852 break; /* read next line */
4853 else if (bp
[0] == '(' && iswhite (bp
[1]))
4854 do /* skip to ) or eol */
4856 while (*bp
!= ')' && *bp
!= '\0');
4857 else if ((bp
[0] == ':' && iswhite (bp
[1]) && bp
++)
4858 || LOOKING_AT_NOCASE (bp
, "constant")
4859 || LOOKING_AT_NOCASE (bp
, "code")
4860 || LOOKING_AT_NOCASE (bp
, "create")
4861 || LOOKING_AT_NOCASE (bp
, "defer")
4862 || LOOKING_AT_NOCASE (bp
, "value")
4863 || LOOKING_AT_NOCASE (bp
, "variable")
4864 || LOOKING_AT_NOCASE (bp
, "buffer:")
4865 || LOOKING_AT_NOCASE (bp
, "field"))
4866 get_tag (skip_spaces (bp
), NULL
); /* Yay! A definition! */
4868 bp
= skip_non_spaces (bp
);
4873 * Scheme tag functions
4874 * look for (def... xyzzy
4876 * (def ... ((...(xyzzy ....
4878 * Original code by Ken Haase (1985?)
4881 Scheme_functions (FILE *inf
)
4885 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4887 if (strneq (bp
, "(def", 4) || strneq (bp
, "(DEF", 4))
4889 bp
= skip_non_spaces (bp
+4);
4890 /* Skip over open parens and white space. Don't continue past
4892 while (*bp
&& notinname (*bp
))
4896 if (LOOKING_AT (bp
, "(SET!") || LOOKING_AT (bp
, "(set!"))
4902 /* Find tags in TeX and LaTeX input files. */
4904 /* TEX_toktab is a table of TeX control sequences that define tags.
4905 * Each entry records one such control sequence.
4907 * Original code from who knows whom.
4909 * Stefan Monnier (2002)
4912 static linebuffer
*TEX_toktab
= NULL
; /* Table with tag tokens */
4914 /* Default set of control sequences to put into TEX_toktab.
4915 The value of environment var TEXTAGS is prepended to this. */
4916 static const char *TEX_defenv
= "\
4917 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4918 :part:appendix:entry:index:def\
4919 :newcommand:renewcommand:newenvironment:renewenvironment";
4921 static void TEX_mode (FILE *);
4922 static void TEX_decode_env (const char *, const char *);
4924 static char TEX_esc
= '\\';
4925 static char TEX_opgrp
= '{';
4926 static char TEX_clgrp
= '}';
4929 * TeX/LaTeX scanning loop.
4932 TeX_commands (FILE *inf
)
4937 /* Select either \ or ! as escape character. */
4940 /* Initialize token table once from environment. */
4941 if (TEX_toktab
== NULL
)
4942 TEX_decode_env ("TEXTAGS", TEX_defenv
);
4944 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4946 /* Look at each TEX keyword in line. */
4949 /* Look for a TEX escape. */
4950 while (*cp
++ != TEX_esc
)
4951 if (cp
[-1] == '\0' || cp
[-1] == '%')
4954 for (key
= TEX_toktab
; key
->buffer
!= NULL
; key
++)
4955 if (strneq (cp
, key
->buffer
, key
->len
))
4958 int namelen
, linelen
;
4961 cp
= skip_spaces (cp
+ key
->len
);
4962 if (*cp
== TEX_opgrp
)
4968 (!iswhite (*p
) && *p
!= '#' &&
4969 *p
!= TEX_opgrp
&& *p
!= TEX_clgrp
);
4974 if (!opgrp
|| *p
== TEX_clgrp
)
4976 while (*p
!= '\0' && *p
!= TEX_opgrp
&& *p
!= TEX_clgrp
)
4978 linelen
= p
- lb
.buffer
+ 1;
4980 make_tag (cp
, namelen
, TRUE
,
4981 lb
.buffer
, linelen
, lineno
, linecharno
);
4982 goto tex_next_line
; /* We only tag a line once */
4990 #define TEX_LESC '\\'
4991 #define TEX_SESC '!'
4993 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4994 chars accordingly. */
4996 TEX_mode (FILE *inf
)
5000 while ((c
= getc (inf
)) != EOF
)
5002 /* Skip to next line if we hit the TeX comment char. */
5004 while (c
!= '\n' && c
!= EOF
)
5006 else if (c
== TEX_LESC
|| c
== TEX_SESC
)
5022 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5023 No attempt is made to correct the situation. */
5027 /* Read environment and prepend it to the default string.
5028 Build token table. */
5030 TEX_decode_env (const char *evarname
, const char *defenv
)
5032 register const char *env
, *p
;
5035 /* Append default string to environment. */
5036 env
= getenv (evarname
);
5040 env
= concat (env
, defenv
, "");
5042 /* Allocate a token table */
5043 for (len
= 1, p
= env
; p
;)
5044 if ((p
= etags_strchr (p
, ':')) && *++p
!= '\0')
5046 TEX_toktab
= xnew (len
, linebuffer
);
5048 /* Unpack environment string into token table. Be careful about */
5049 /* zero-length strings (leading ':', "::" and trailing ':') */
5050 for (i
= 0; *env
!= '\0';)
5052 p
= etags_strchr (env
, ':');
5053 if (!p
) /* End of environment string. */
5054 p
= env
+ strlen (env
);
5056 { /* Only non-zero strings. */
5057 TEX_toktab
[i
].buffer
= savenstr (env
, p
- env
);
5058 TEX_toktab
[i
].len
= p
- env
;
5065 TEX_toktab
[i
].buffer
= NULL
; /* Mark end of table. */
5066 TEX_toktab
[i
].len
= 0;
5073 /* Texinfo support. Dave Love, Mar. 2000. */
5075 Texinfo_nodes (FILE *inf
)
5078 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5079 if (LOOKING_AT (cp
, "@node"))
5082 while (*cp
!= '\0' && *cp
!= ',')
5084 make_tag (start
, cp
- start
, TRUE
,
5085 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5092 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5093 * Contents of <a name=xxx> are tags with name xxx.
5095 * Francesco Potortì, 2002.
5098 HTML_labels (FILE *inf
)
5100 bool getnext
= FALSE
; /* next text outside of HTML tags is a tag */
5101 bool skiptag
= FALSE
; /* skip to the end of the current HTML tag */
5102 bool intag
= FALSE
; /* inside an html tag, looking for ID= */
5103 bool inanchor
= FALSE
; /* when INTAG, is an anchor, look for NAME= */
5107 linebuffer_setlen (&token_name
, 0); /* no name in buffer */
5109 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
5110 for (;;) /* loop on the same line */
5112 if (skiptag
) /* skip HTML tag */
5114 while (*dbp
!= '\0' && *dbp
!= '>')
5120 continue; /* look on the same line */
5122 break; /* go to next line */
5125 else if (intag
) /* look for "name=" or "id=" */
5127 while (*dbp
!= '\0' && *dbp
!= '>'
5128 && lowcase (*dbp
) != 'n' && lowcase (*dbp
) != 'i')
5131 break; /* go to next line */
5136 continue; /* look on the same line */
5138 if ((inanchor
&& LOOKING_AT_NOCASE (dbp
, "name="))
5139 || LOOKING_AT_NOCASE (dbp
, "id="))
5141 bool quoted
= (dbp
[0] == '"');
5144 for (end
= ++dbp
; *end
!= '\0' && *end
!= '"'; end
++)
5147 for (end
= dbp
; *end
!= '\0' && intoken (*end
); end
++)
5149 linebuffer_setlen (&token_name
, end
- dbp
);
5150 memcpy (token_name
.buffer
, dbp
, end
- dbp
);
5151 token_name
.buffer
[end
- dbp
] = '\0';
5154 intag
= FALSE
; /* we found what we looked for */
5155 skiptag
= TRUE
; /* skip to the end of the tag */
5156 getnext
= TRUE
; /* then grab the text */
5157 continue; /* look on the same line */
5162 else if (getnext
) /* grab next tokens and tag them */
5164 dbp
= skip_spaces (dbp
);
5166 break; /* go to next line */
5170 inanchor
= (lowcase (dbp
[1]) == 'a' && !intoken (dbp
[2]));
5171 continue; /* look on the same line */
5174 for (end
= dbp
+ 1; *end
!= '\0' && *end
!= '<'; end
++)
5176 make_tag (token_name
.buffer
, token_name
.len
, TRUE
,
5177 dbp
, end
- dbp
, lineno
, linecharno
);
5178 linebuffer_setlen (&token_name
, 0); /* no name in buffer */
5180 break; /* go to next line */
5183 else /* look for an interesting HTML tag */
5185 while (*dbp
!= '\0' && *dbp
!= '<')
5188 break; /* go to next line */
5190 if (lowcase (dbp
[1]) == 'a' && !intoken (dbp
[2]))
5193 continue; /* look on the same line */
5195 else if (LOOKING_AT_NOCASE (dbp
, "<title>")
5196 || LOOKING_AT_NOCASE (dbp
, "<h1>")
5197 || LOOKING_AT_NOCASE (dbp
, "<h2>")
5198 || LOOKING_AT_NOCASE (dbp
, "<h3>"))
5202 continue; /* look on the same line */
5213 * Assumes that the predicate or rule starts at column 0.
5214 * Only the first clause of a predicate or rule is added.
5215 * Original code by Sunichirou Sugou (1989)
5216 * Rewritten by Anders Lindgren (1996)
5218 static size_t prolog_pr (char *, char *);
5219 static void prolog_skip_comment (linebuffer
*, FILE *);
5220 static size_t prolog_atom (char *, size_t);
5223 Prolog_functions (FILE *inf
)
5233 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5235 if (cp
[0] == '\0') /* Empty line */
5237 else if (iswhite (cp
[0])) /* Not a predicate */
5239 else if (cp
[0] == '/' && cp
[1] == '*') /* comment. */
5240 prolog_skip_comment (&lb
, inf
);
5241 else if ((len
= prolog_pr (cp
, last
)) > 0)
5243 /* Predicate or rule. Store the function name so that we
5244 only generate a tag for the first clause. */
5246 last
= xnew (len
+ 1, char);
5247 else if (len
+ 1 > allocated
)
5248 xrnew (last
, len
+ 1, char);
5249 allocated
= len
+ 1;
5250 memcpy (last
, cp
, len
);
5259 prolog_skip_comment (linebuffer
*plb
, FILE *inf
)
5265 for (cp
= plb
->buffer
; *cp
!= '\0'; cp
++)
5266 if (cp
[0] == '*' && cp
[1] == '/')
5268 readline (plb
, inf
);
5270 while (!feof (inf
));
5274 * A predicate or rule definition is added if it matches:
5275 * <beginning of line><Prolog Atom><whitespace>(
5276 * or <beginning of line><Prolog Atom><whitespace>:-
5278 * It is added to the tags database if it doesn't match the
5279 * name of the previous clause header.
5281 * Return the size of the name of the predicate or rule, or 0 if no
5285 prolog_pr (char *s
, char *last
)
5287 /* Name of last clause. */
5292 pos
= prolog_atom (s
, 0);
5297 pos
= skip_spaces (s
+ pos
) - s
;
5300 || (s
[pos
] == '(' && (pos
+= 1))
5301 || (s
[pos
] == ':' && s
[pos
+ 1] == '-' && (pos
+= 2)))
5302 && (last
== NULL
/* save only the first clause */
5303 || len
!= strlen (last
)
5304 || !strneq (s
, last
, len
)))
5306 make_tag (s
, len
, TRUE
, s
, pos
, lineno
, linecharno
);
5314 * Consume a Prolog atom.
5315 * Return the number of bytes consumed, or 0 if there was an error.
5317 * A prolog atom, in this context, could be one of:
5318 * - An alphanumeric sequence, starting with a lower case letter.
5319 * - A quoted arbitrary string. Single quotes can escape themselves.
5320 * Backslash quotes everything.
5323 prolog_atom (char *s
, size_t pos
)
5329 if (ISLOWER (s
[pos
]) || (s
[pos
] == '_'))
5331 /* The atom is unquoted. */
5333 while (ISALNUM (s
[pos
]) || (s
[pos
] == '_'))
5337 return pos
- origpos
;
5339 else if (s
[pos
] == '\'')
5350 pos
++; /* A double quote */
5352 else if (s
[pos
] == '\0')
5353 /* Multiline quoted atoms are ignored. */
5355 else if (s
[pos
] == '\\')
5357 if (s
[pos
+1] == '\0')
5364 return pos
- origpos
;
5372 * Support for Erlang
5374 * Generates tags for functions, defines, and records.
5375 * Assumes that Erlang functions start at column 0.
5376 * Original code by Anders Lindgren (1996)
5378 static int erlang_func (char *, char *);
5379 static void erlang_attribute (char *);
5380 static int erlang_atom (char *);
5383 Erlang_functions (FILE *inf
)
5393 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5395 if (cp
[0] == '\0') /* Empty line */
5397 else if (iswhite (cp
[0])) /* Not function nor attribute */
5399 else if (cp
[0] == '%') /* comment */
5401 else if (cp
[0] == '"') /* Sometimes, strings start in column one */
5403 else if (cp
[0] == '-') /* attribute, e.g. "-define" */
5405 erlang_attribute (cp
);
5412 else if ((len
= erlang_func (cp
, last
)) > 0)
5415 * Function. Store the function name so that we only
5416 * generates a tag for the first clause.
5419 last
= xnew (len
+ 1, char);
5420 else if (len
+ 1 > allocated
)
5421 xrnew (last
, len
+ 1, char);
5422 allocated
= len
+ 1;
5423 memcpy (last
, cp
, len
);
5432 * A function definition is added if it matches:
5433 * <beginning of line><Erlang Atom><whitespace>(
5435 * It is added to the tags database if it doesn't match the
5436 * name of the previous clause header.
5438 * Return the size of the name of the function, or 0 if no function
5442 erlang_func (char *s
, char *last
)
5444 /* Name of last clause. */
5449 pos
= erlang_atom (s
);
5454 pos
= skip_spaces (s
+ pos
) - s
;
5456 /* Save only the first clause. */
5459 || len
!= (int)strlen (last
)
5460 || !strneq (s
, last
, len
)))
5462 make_tag (s
, len
, TRUE
, s
, pos
, lineno
, linecharno
);
5471 * Handle attributes. Currently, tags are generated for defines
5474 * They are on the form:
5475 * -define(foo, bar).
5476 * -define(Foo(M, N), M+N).
5477 * -record(graph, {vtab = notable, cyclic = true}).
5480 erlang_attribute (char *s
)
5484 if ((LOOKING_AT (cp
, "-define") || LOOKING_AT (cp
, "-record"))
5487 int len
= erlang_atom (skip_spaces (cp
));
5489 make_tag (cp
, len
, TRUE
, s
, cp
+ len
- s
, lineno
, linecharno
);
5496 * Consume an Erlang atom (or variable).
5497 * Return the number of bytes consumed, or -1 if there was an error.
5500 erlang_atom (char *s
)
5504 if (ISALPHA (s
[pos
]) || s
[pos
] == '_')
5506 /* The atom is unquoted. */
5509 while (ISALNUM (s
[pos
]) || s
[pos
] == '_');
5511 else if (s
[pos
] == '\'')
5513 for (pos
++; s
[pos
] != '\''; pos
++)
5514 if (s
[pos
] == '\0' /* multiline quoted atoms are ignored */
5515 || (s
[pos
] == '\\' && s
[++pos
] == '\0'))
5524 static char *scan_separators (char *);
5525 static void add_regex (char *, language
*);
5526 static char *substitute (char *, char *, struct re_registers
*);
5529 * Take a string like "/blah/" and turn it into "blah", verifying
5530 * that the first and last characters are the same, and handling
5531 * quoted separator characters. Actually, stops on the occurrence of
5532 * an unquoted separator. Also process \t, \n, etc. and turn into
5533 * appropriate characters. Works in place. Null terminates name string.
5534 * Returns pointer to terminating separator, or NULL for
5535 * unterminated regexps.
5538 scan_separators (char *name
)
5541 char *copyto
= name
;
5542 bool quoted
= FALSE
;
5544 for (++name
; *name
!= '\0'; ++name
)
5550 case 'a': *copyto
++ = '\007'; break; /* BEL (bell) */
5551 case 'b': *copyto
++ = '\b'; break; /* BS (back space) */
5552 case 'd': *copyto
++ = 0177; break; /* DEL (delete) */
5553 case 'e': *copyto
++ = 033; break; /* ESC (delete) */
5554 case 'f': *copyto
++ = '\f'; break; /* FF (form feed) */
5555 case 'n': *copyto
++ = '\n'; break; /* NL (new line) */
5556 case 'r': *copyto
++ = '\r'; break; /* CR (carriage return) */
5557 case 't': *copyto
++ = '\t'; break; /* TAB (horizontal tab) */
5558 case 'v': *copyto
++ = '\v'; break; /* VT (vertical tab) */
5564 /* Something else is quoted, so preserve the quote. */
5572 else if (*name
== '\\')
5574 else if (*name
== sep
)
5580 name
= NULL
; /* signal unterminated regexp */
5582 /* Terminate copied string. */
5587 /* Look at the argument of --regex or --no-regex and do the right
5588 thing. Same for each line of a regexp file. */
5590 analyse_regex (char *regex_arg
)
5592 if (regex_arg
== NULL
)
5594 free_regexps (); /* --no-regex: remove existing regexps */
5598 /* A real --regexp option or a line in a regexp file. */
5599 switch (regex_arg
[0])
5601 /* Comments in regexp file or null arg to --regex. */
5607 /* Read a regex file. This is recursive and may result in a
5608 loop, which will stop when the file descriptors are exhausted. */
5612 linebuffer regexbuf
;
5613 char *regexfile
= regex_arg
+ 1;
5615 /* regexfile is a file containing regexps, one per line. */
5616 regexfp
= fopen (regexfile
, "r");
5617 if (regexfp
== NULL
)
5619 linebuffer_init (®exbuf
);
5620 while (readline_internal (®exbuf
, regexfp
) > 0)
5621 analyse_regex (regexbuf
.buffer
);
5622 free (regexbuf
.buffer
);
5627 /* Regexp to be used for a specific language only. */
5631 char *lang_name
= regex_arg
+ 1;
5634 for (cp
= lang_name
; *cp
!= '}'; cp
++)
5637 error ("unterminated language name in regex: %s", regex_arg
);
5641 lang
= get_language_from_langname (lang_name
);
5644 add_regex (cp
, lang
);
5648 /* Regexp to be used for any language. */
5650 add_regex (regex_arg
, NULL
);
5655 /* Separate the regexp pattern, compile it,
5656 and care for optional name and modifiers. */
5658 add_regex (char *regexp_pattern
, language
*lang
)
5660 static struct re_pattern_buffer zeropattern
;
5661 char sep
, *pat
, *name
, *modifiers
;
5664 struct re_pattern_buffer
*patbuf
;
5667 force_explicit_name
= TRUE
, /* do not use implicit tag names */
5668 ignore_case
= FALSE
, /* case is significant */
5669 multi_line
= FALSE
, /* matches are done one line at a time */
5670 single_line
= FALSE
; /* dot does not match newline */
5673 if (strlen (regexp_pattern
) < 3)
5675 error ("null regexp");
5678 sep
= regexp_pattern
[0];
5679 name
= scan_separators (regexp_pattern
);
5682 error ("%s: unterminated regexp", regexp_pattern
);
5687 error ("null name for regexp \"%s\"", regexp_pattern
);
5690 modifiers
= scan_separators (name
);
5691 if (modifiers
== NULL
) /* no terminating separator --> no name */
5697 modifiers
+= 1; /* skip separator */
5699 /* Parse regex modifiers. */
5700 for (; modifiers
[0] != '\0'; modifiers
++)
5701 switch (modifiers
[0])
5704 if (modifiers
== name
)
5705 error ("forcing explicit tag name but no name, ignoring");
5706 force_explicit_name
= TRUE
;
5716 need_filebuf
= TRUE
;
5719 error ("invalid regexp modifier `%c', ignoring", modifiers
[0]);
5723 patbuf
= xnew (1, struct re_pattern_buffer
);
5724 *patbuf
= zeropattern
;
5727 static char lc_trans
[CHARS
];
5729 for (i
= 0; i
< CHARS
; i
++)
5730 lc_trans
[i
] = lowcase (i
);
5731 patbuf
->translate
= lc_trans
; /* translation table to fold case */
5735 pat
= concat ("^", regexp_pattern
, ""); /* anchor to beginning of line */
5737 pat
= regexp_pattern
;
5740 re_set_syntax (RE_SYNTAX_EMACS
| RE_DOT_NEWLINE
);
5742 re_set_syntax (RE_SYNTAX_EMACS
);
5744 err
= re_compile_pattern (pat
, strlen (pat
), patbuf
);
5749 error ("%s while compiling pattern", err
);
5754 p_head
= xnew (1, regexp
);
5755 p_head
->pattern
= savestr (regexp_pattern
);
5756 p_head
->p_next
= rp
;
5757 p_head
->lang
= lang
;
5758 p_head
->pat
= patbuf
;
5759 p_head
->name
= savestr (name
);
5760 p_head
->error_signaled
= FALSE
;
5761 p_head
->force_explicit_name
= force_explicit_name
;
5762 p_head
->ignore_case
= ignore_case
;
5763 p_head
->multi_line
= multi_line
;
5767 * Do the substitutions indicated by the regular expression and
5771 substitute (char *in
, char *out
, struct re_registers
*regs
)
5774 int size
, dig
, diglen
;
5777 size
= strlen (out
);
5779 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5780 if (out
[size
- 1] == '\\')
5781 fatal ("pattern error in \"%s\"", out
);
5782 for (t
= etags_strchr (out
, '\\');
5784 t
= etags_strchr (t
+ 2, '\\'))
5788 diglen
= regs
->end
[dig
] - regs
->start
[dig
];
5794 /* Allocate space and do the substitutions. */
5796 result
= xnew (size
+ 1, char);
5798 for (t
= result
; *out
!= '\0'; out
++)
5799 if (*out
== '\\' && ISDIGIT (*++out
))
5802 diglen
= regs
->end
[dig
] - regs
->start
[dig
];
5803 memcpy (t
, in
+ regs
->start
[dig
], diglen
);
5810 assert (t
<= result
+ size
);
5811 assert (t
- result
== (int)strlen (result
));
5816 /* Deallocate all regexps. */
5821 while (p_head
!= NULL
)
5823 rp
= p_head
->p_next
;
5824 free (p_head
->pattern
);
5825 free (p_head
->name
);
5833 * Reads the whole file as a single string from `filebuf' and looks for
5834 * multi-line regular expressions, creating tags on matches.
5835 * readline already dealt with normal regexps.
5837 * Idea by Ben Wing <ben@666.com> (2002).
5840 regex_tag_multiline (void)
5842 char *buffer
= filebuf
.buffer
;
5846 for (rp
= p_head
; rp
!= NULL
; rp
= rp
->p_next
)
5850 if (!rp
->multi_line
)
5851 continue; /* skip normal regexps */
5853 /* Generic initializations before parsing file from memory. */
5854 lineno
= 1; /* reset global line number */
5855 charno
= 0; /* reset global char number */
5856 linecharno
= 0; /* reset global char number of line start */
5858 /* Only use generic regexps or those for the current language. */
5859 if (rp
->lang
!= NULL
&& rp
->lang
!= curfdp
->lang
)
5862 while (match
>= 0 && match
< filebuf
.len
)
5864 match
= re_search (rp
->pat
, buffer
, filebuf
.len
, charno
,
5865 filebuf
.len
- match
, &rp
->regs
);
5870 if (!rp
->error_signaled
)
5872 error ("regexp stack overflow while matching \"%s\"",
5874 rp
->error_signaled
= TRUE
;
5881 if (match
== rp
->regs
.end
[0])
5883 if (!rp
->error_signaled
)
5885 error ("regexp matches the empty string: \"%s\"",
5887 rp
->error_signaled
= TRUE
;
5889 match
= -3; /* exit from while loop */
5893 /* Match occurred. Construct a tag. */
5894 while (charno
< rp
->regs
.end
[0])
5895 if (buffer
[charno
++] == '\n')
5896 lineno
++, linecharno
= charno
;
5898 if (name
[0] == '\0')
5900 else /* make a named tag */
5901 name
= substitute (buffer
, rp
->name
, &rp
->regs
);
5902 if (rp
->force_explicit_name
)
5903 /* Force explicit tag name, if a name is there. */
5904 pfnote (name
, TRUE
, buffer
+ linecharno
,
5905 charno
- linecharno
+ 1, lineno
, linecharno
);
5907 make_tag (name
, strlen (name
), TRUE
, buffer
+ linecharno
,
5908 charno
- linecharno
+ 1, lineno
, linecharno
);
5917 nocase_tail (const char *cp
)
5919 register int len
= 0;
5921 while (*cp
!= '\0' && lowcase (*cp
) == lowcase (dbp
[len
]))
5923 if (*cp
== '\0' && !intoken (dbp
[len
]))
5932 get_tag (register char *bp
, char **namepp
)
5934 register char *cp
= bp
;
5938 /* Go till you get to white space or a syntactic break */
5939 for (cp
= bp
+ 1; !notinname (*cp
); cp
++)
5941 make_tag (bp
, cp
- bp
, TRUE
,
5942 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5946 *namepp
= savenstr (bp
, cp
- bp
);
5950 * Read a line of text from `stream' into `lbp', excluding the
5951 * newline or CR-NL, if any. Return the number of characters read from
5952 * `stream', which is the length of the line including the newline.
5954 * On DOS or Windows we do not count the CR character, if any before the
5955 * NL, in the returned length; this mirrors the behavior of Emacs on those
5956 * platforms (for text files, it translates CR-NL to NL as it reads in the
5959 * If multi-line regular expressions are requested, each line read is
5960 * appended to `filebuf'.
5963 readline_internal (linebuffer
*lbp
, register FILE *stream
)
5965 char *buffer
= lbp
->buffer
;
5966 register char *p
= lbp
->buffer
;
5967 register char *pend
;
5970 pend
= p
+ lbp
->size
; /* Separate to avoid 386/IX compiler bug. */
5974 register int c
= getc (stream
);
5977 /* We're at the end of linebuffer: expand it. */
5979 xrnew (buffer
, lbp
->size
, char);
5980 p
+= buffer
- lbp
->buffer
;
5981 pend
= buffer
+ lbp
->size
;
5982 lbp
->buffer
= buffer
;
5992 if (p
> buffer
&& p
[-1] == '\r')
5996 /* Assume CRLF->LF translation will be performed by Emacs
5997 when loading this file, so CRs won't appear in the buffer.
5998 It would be cleaner to compensate within Emacs;
5999 however, Emacs does not know how many CRs were deleted
6000 before any given point in the file. */
6015 lbp
->len
= p
- buffer
;
6017 if (need_filebuf
/* we need filebuf for multi-line regexps */
6018 && chars_deleted
> 0) /* not at EOF */
6020 while (filebuf
.size
<= filebuf
.len
+ lbp
->len
+ 1) /* +1 for \n */
6022 /* Expand filebuf. */
6024 xrnew (filebuf
.buffer
, filebuf
.size
, char);
6026 memcpy (filebuf
.buffer
+ filebuf
.len
, lbp
->buffer
, lbp
->len
);
6027 filebuf
.len
+= lbp
->len
;
6028 filebuf
.buffer
[filebuf
.len
++] = '\n';
6029 filebuf
.buffer
[filebuf
.len
] = '\0';
6032 return lbp
->len
+ chars_deleted
;
6036 * Like readline_internal, above, but in addition try to match the
6037 * input line against relevant regular expressions and manage #line
6041 readline (linebuffer
*lbp
, FILE *stream
)
6045 linecharno
= charno
; /* update global char number of line start */
6046 result
= readline_internal (lbp
, stream
); /* read line */
6047 lineno
+= 1; /* increment global line number */
6048 charno
+= result
; /* increment global char number */
6050 /* Honor #line directives. */
6051 if (!no_line_directive
)
6053 static bool discard_until_line_directive
;
6055 /* Check whether this is a #line directive. */
6056 if (result
> 12 && strneq (lbp
->buffer
, "#line ", 6))
6061 if (sscanf (lbp
->buffer
, "#line %u \"%n", &lno
, &start
) >= 1
6062 && start
> 0) /* double quote character found */
6064 char *endp
= lbp
->buffer
+ start
;
6066 while ((endp
= etags_strchr (endp
, '"')) != NULL
6067 && endp
[-1] == '\\')
6070 /* Ok, this is a real #line directive. Let's deal with it. */
6072 char *taggedabsname
; /* absolute name of original file */
6073 char *taggedfname
; /* name of original file as given */
6074 char *name
; /* temp var */
6076 discard_until_line_directive
= FALSE
; /* found it */
6077 name
= lbp
->buffer
+ start
;
6079 canonicalize_filename (name
);
6080 taggedabsname
= absolute_filename (name
, tagfiledir
);
6081 if (filename_is_absolute (name
)
6082 || filename_is_absolute (curfdp
->infname
))
6083 taggedfname
= savestr (taggedabsname
);
6085 taggedfname
= relative_filename (taggedabsname
,tagfiledir
);
6087 if (streq (curfdp
->taggedfname
, taggedfname
))
6088 /* The #line directive is only a line number change. We
6089 deal with this afterwards. */
6092 /* The tags following this #line directive should be
6093 attributed to taggedfname. In order to do this, set
6094 curfdp accordingly. */
6096 fdesc
*fdp
; /* file description pointer */
6098 /* Go look for a file description already set up for the
6099 file indicated in the #line directive. If there is
6100 one, use it from now until the next #line
6102 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
6103 if (streq (fdp
->infname
, curfdp
->infname
)
6104 && streq (fdp
->taggedfname
, taggedfname
))
6105 /* If we remove the second test above (after the &&)
6106 then all entries pertaining to the same file are
6107 coalesced in the tags file. If we use it, then
6108 entries pertaining to the same file but generated
6109 from different files (via #line directives) will
6110 go into separate sections in the tags file. These
6111 alternatives look equivalent. The first one
6112 destroys some apparently useless information. */
6118 /* Else, if we already tagged the real file, skip all
6119 input lines until the next #line directive. */
6120 if (fdp
== NULL
) /* not found */
6121 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
6122 if (streq (fdp
->infabsname
, taggedabsname
))
6124 discard_until_line_directive
= TRUE
;
6128 /* Else create a new file description and use that from
6129 now on, until the next #line directive. */
6130 if (fdp
== NULL
) /* not found */
6133 fdhead
= xnew (1, fdesc
);
6134 *fdhead
= *curfdp
; /* copy curr. file description */
6136 fdhead
->infname
= savestr (curfdp
->infname
);
6137 fdhead
->infabsname
= savestr (curfdp
->infabsname
);
6138 fdhead
->infabsdir
= savestr (curfdp
->infabsdir
);
6139 fdhead
->taggedfname
= taggedfname
;
6140 fdhead
->usecharno
= FALSE
;
6141 fdhead
->prop
= NULL
;
6142 fdhead
->written
= FALSE
;
6146 free (taggedabsname
);
6148 readline (lbp
, stream
);
6150 } /* if a real #line directive */
6151 } /* if #line is followed by a number */
6152 } /* if line begins with "#line " */
6154 /* If we are here, no #line directive was found. */
6155 if (discard_until_line_directive
)
6159 /* Do a tail recursion on ourselves, thus discarding the contents
6160 of the line buffer. */
6161 readline (lbp
, stream
);
6165 discard_until_line_directive
= FALSE
;
6168 } /* if #line directives should be considered */
6175 /* Match against relevant regexps. */
6177 for (rp
= p_head
; rp
!= NULL
; rp
= rp
->p_next
)
6179 /* Only use generic regexps or those for the current language.
6180 Also do not use multiline regexps, which is the job of
6181 regex_tag_multiline. */
6182 if ((rp
->lang
!= NULL
&& rp
->lang
!= fdhead
->lang
)
6186 match
= re_match (rp
->pat
, lbp
->buffer
, lbp
->len
, 0, &rp
->regs
);
6191 if (!rp
->error_signaled
)
6193 error ("regexp stack overflow while matching \"%s\"",
6195 rp
->error_signaled
= TRUE
;
6202 /* Empty string matched. */
6203 if (!rp
->error_signaled
)
6205 error ("regexp matches the empty string: \"%s\"", rp
->pattern
);
6206 rp
->error_signaled
= TRUE
;
6210 /* Match occurred. Construct a tag. */
6212 if (name
[0] == '\0')
6214 else /* make a named tag */
6215 name
= substitute (lbp
->buffer
, rp
->name
, &rp
->regs
);
6216 if (rp
->force_explicit_name
)
6217 /* Force explicit tag name, if a name is there. */
6218 pfnote (name
, TRUE
, lbp
->buffer
, match
, lineno
, linecharno
);
6220 make_tag (name
, strlen (name
), TRUE
,
6221 lbp
->buffer
, match
, lineno
, linecharno
);
6230 * Return a pointer to a space of size strlen(cp)+1 allocated
6231 * with xnew where the string CP has been copied.
6234 savestr (const char *cp
)
6236 return savenstr (cp
, strlen (cp
));
6240 * Return a pointer to a space of size LEN+1 allocated with xnew where
6241 * the string CP has been copied for at most the first LEN characters.
6244 savenstr (const char *cp
, int len
)
6246 char *dp
= xnew (len
+ 1, char);
6248 return memcpy (dp
, cp
, len
);
6252 * Return the ptr in sp at which the character c last
6253 * appears; NULL if not found
6255 * Identical to POSIX strrchr, included for portability.
6258 etags_strrchr (register const char *sp
, register int c
)
6260 register const char *r
;
6272 * Return the ptr in sp at which the character c first
6273 * appears; NULL if not found
6275 * Identical to POSIX strchr, included for portability.
6278 etags_strchr (register const char *sp
, register int c
)
6288 /* Skip spaces (end of string is not space), return new pointer. */
6290 skip_spaces (char *cp
)
6292 while (iswhite (*cp
))
6297 /* Skip non spaces, except end of string, return new pointer. */
6299 skip_non_spaces (char *cp
)
6301 while (*cp
!= '\0' && !iswhite (*cp
))
6306 /* Skip any chars in the "name" class.*/
6308 skip_name (char *cp
)
6310 /* '\0' is a notinname() so loop stops there too */
6311 while (! notinname (*cp
))
6316 /* Print error message and exit. */
6318 fatal (const char *s1
, const char *s2
)
6321 exit (EXIT_FAILURE
);
6325 pfatal (const char *s1
)
6328 exit (EXIT_FAILURE
);
6332 suggest_asking_for_help (void)
6334 fprintf (stderr
, "\tTry `%s --help' for a complete list of options.\n",
6336 exit (EXIT_FAILURE
);
6339 /* Output a diagnostic with printf-style FORMAT and args. */
6341 error (const char *format
, ...)
6344 va_start (ap
, format
);
6345 fprintf (stderr
, "%s: ", progname
);
6346 vfprintf (stderr
, format
, ap
);
6347 fprintf (stderr
, "\n");
6351 /* Return a newly-allocated string whose contents
6352 concatenate those of s1, s2, s3. */
6354 concat (const char *s1
, const char *s2
, const char *s3
)
6356 int len1
= strlen (s1
), len2
= strlen (s2
), len3
= strlen (s3
);
6357 char *result
= xnew (len1
+ len2
+ len3
+ 1, char);
6359 strcpy (result
, s1
);
6360 strcpy (result
+ len1
, s2
);
6361 strcpy (result
+ len1
+ len2
, s3
);
6367 /* Does the same work as the system V getcwd, but does not need to
6368 guess the buffer size in advance. */
6373 char *path
= xnew (bufsize
, char);
6375 while (getcwd (path
, bufsize
) == NULL
)
6377 if (errno
!= ERANGE
)
6381 path
= xnew (bufsize
, char);
6384 canonicalize_filename (path
);
6388 /* Return a newly allocated string containing the file name of FILE
6389 relative to the absolute directory DIR (which should end with a slash). */
6391 relative_filename (char *file
, char *dir
)
6393 char *fp
, *dp
, *afn
, *res
;
6396 /* Find the common root of file and dir (with a trailing slash). */
6397 afn
= absolute_filename (file
, cwd
);
6400 while (*fp
++ == *dp
++)
6402 fp
--, dp
--; /* back to the first differing char */
6404 if (fp
== afn
&& afn
[0] != '/') /* cannot build a relative name */
6407 do /* look at the equal chars until '/' */
6411 /* Build a sequence of "../" strings for the resulting relative file name. */
6413 while ((dp
= etags_strchr (dp
+ 1, '/')) != NULL
)
6415 res
= xnew (3*i
+ strlen (fp
+ 1) + 1, char);
6418 strcat (res
, "../");
6420 /* Add the file name relative to the common root of file and dir. */
6421 strcat (res
, fp
+ 1);
6427 /* Return a newly allocated string containing the absolute file name
6428 of FILE given DIR (which should end with a slash). */
6430 absolute_filename (char *file
, char *dir
)
6432 char *slashp
, *cp
, *res
;
6434 if (filename_is_absolute (file
))
6435 res
= savestr (file
);
6437 /* We don't support non-absolute file names with a drive
6438 letter, like `d:NAME' (it's too much hassle). */
6439 else if (file
[1] == ':')
6440 fatal ("%s: relative file names with drive letters not supported", file
);
6443 res
= concat (dir
, file
, "");
6445 /* Delete the "/dirname/.." and "/." substrings. */
6446 slashp
= etags_strchr (res
, '/');
6447 while (slashp
!= NULL
&& slashp
[0] != '\0')
6449 if (slashp
[1] == '.')
6451 if (slashp
[2] == '.'
6452 && (slashp
[3] == '/' || slashp
[3] == '\0'))
6457 while (cp
>= res
&& !filename_is_absolute (cp
));
6459 cp
= slashp
; /* the absolute name begins with "/.." */
6461 /* Under MSDOS and NT we get `d:/NAME' as absolute
6462 file name, so the luser could say `d:/../NAME'.
6463 We silently treat this as `d:/NAME'. */
6464 else if (cp
[0] != '/')
6467 memmove (cp
, slashp
+ 3, strlen (slashp
+ 2));
6471 else if (slashp
[2] == '/' || slashp
[2] == '\0')
6473 memmove (slashp
, slashp
+ 2, strlen (slashp
+ 1));
6478 slashp
= etags_strchr (slashp
+ 1, '/');
6481 if (res
[0] == '\0') /* just a safety net: should never happen */
6484 return savestr ("/");
6490 /* Return a newly allocated string containing the absolute
6491 file name of dir where FILE resides given DIR (which should
6492 end with a slash). */
6494 absolute_dirname (char *file
, char *dir
)
6499 slashp
= etags_strrchr (file
, '/');
6501 return savestr (dir
);
6504 res
= absolute_filename (file
, dir
);
6510 /* Whether the argument string is an absolute file name. The argument
6511 string must have been canonicalized with canonicalize_filename. */
6513 filename_is_absolute (char *fn
)
6515 return (fn
[0] == '/'
6517 || (ISALPHA (fn
[0]) && fn
[1] == ':' && fn
[2] == '/')
6522 /* Downcase DOS drive letter and collapse separators into single slashes.
6525 canonicalize_filename (register char *fn
)
6531 /* Canonicalize drive letter case. */
6532 # define ISUPPER(c) isupper (CHAR (c))
6533 if (fn
[0] != '\0' && fn
[1] == ':' && ISUPPER (fn
[0]))
6534 fn
[0] = lowcase (fn
[0]);
6539 /* Collapse multiple separators into a single slash. */
6540 for (cp
= fn
; *cp
!= '\0'; cp
++, fn
++)
6544 while (cp
[1] == sep
)
6553 /* Initialize a linebuffer for use. */
6555 linebuffer_init (linebuffer
*lbp
)
6557 lbp
->size
= (DEBUG
) ? 3 : 200;
6558 lbp
->buffer
= xnew (lbp
->size
, char);
6559 lbp
->buffer
[0] = '\0';
6563 /* Set the minimum size of a string contained in a linebuffer. */
6565 linebuffer_setlen (linebuffer
*lbp
, int toksize
)
6567 while (lbp
->size
<= toksize
)
6570 xrnew (lbp
->buffer
, lbp
->size
, char);
6575 /* Like malloc but get fatal error if memory is exhausted. */
6577 xmalloc (size_t size
)
6579 void *result
= malloc (size
);
6581 fatal ("virtual memory exhausted", (char *)NULL
);
6586 xrealloc (char *ptr
, size_t size
)
6588 void *result
= realloc (ptr
, size
);
6590 fatal ("virtual memory exhausted", (char *)NULL
);
6596 * indent-tabs-mode: t
6599 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6600 * c-file-style: "gnu"
6604 /* etags.c ends here */