1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2015 Free Software
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version
[] = "@(#) pot revision number is 17.38.1.4";
88 # define NDEBUG /* disable assert */
94 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
97 /* WIN32_NATIVE is for XEmacs.
98 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
103 #endif /* WIN32_NATIVE */
108 # include <sys/param.h>
115 # define MAXPATHLEN _MAX_PATH
119 # define O_CLOEXEC O_NOINHERIT
120 #endif /* WINDOWSNT */
127 #include <sysstdio.h>
130 #include <binary-io.h>
132 #include <c-strcase.h>
136 # undef assert /* some systems have a buggy assert.h */
137 # define assert(x) ((void) 0)
143 /* Define CTAGS to make the program "ctags" compatible with the usual one.
144 Leave it undefined to make the program "etags", which makes emacs-style
145 tag tables and tags typedefs, #defines and struct/union/enum by default. */
153 #define streq(s,t) (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
154 #define strcaseeq(s,t) (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
155 #define strneq(s,t,n) (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
156 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
158 /* C is not in a name. */
160 notinname (unsigned char c
)
162 /* Look at make_tag before modifying! */
163 static bool const table
[UCHAR_MAX
+ 1] = {
164 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
165 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
170 /* C can start a token. */
172 begtoken (unsigned char c
)
174 static bool const table
[UCHAR_MAX
+ 1] = {
176 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
177 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
178 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
181 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
182 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
183 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
190 /* C can be in the middle of a token. */
192 intoken (unsigned char c
)
194 static bool const table
[UCHAR_MAX
+ 1] = {
196 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
197 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
198 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
199 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
200 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
203 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
204 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
205 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
211 /* C can end a token. */
213 endtoken (unsigned char c
)
215 static bool const table
[UCHAR_MAX
+ 1] = {
216 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
217 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
218 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
219 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
220 ['{']=1, ['|']=1, ['}']=1, ['~']=1
226 * xnew, xrnew -- allocate, reallocate storage
228 * SYNOPSIS: Type *xnew (int n, Type);
229 * void xrnew (OldPointer, int n, Type);
231 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
232 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
234 typedef void Lang_function (FILE *);
238 const char *suffix
; /* file name suffix for this compressor */
239 const char *command
; /* takes one arg and decompresses to stdout */
244 const char *name
; /* language name */
245 const char *help
; /* detailed help for the language */
246 Lang_function
*function
; /* parse function */
247 const char **suffixes
; /* name suffixes of this language's files */
248 const char **filenames
; /* names of this language's files */
249 const char **interpreters
; /* interpreters for this language */
250 bool metasource
; /* source used to generate other sources */
255 struct fdesc
*next
; /* for the linked list */
256 char *infname
; /* uncompressed input file name */
257 char *infabsname
; /* absolute uncompressed input file name */
258 char *infabsdir
; /* absolute dir of input file */
259 char *taggedfname
; /* file name to write in tagfile */
260 language
*lang
; /* language of file */
261 char *prop
; /* file properties to write in tagfile */
262 bool usecharno
; /* etags tags shall contain char number */
263 bool written
; /* entry written in the tags file */
266 typedef struct node_st
267 { /* sorting structure */
268 struct node_st
*left
, *right
; /* left and right sons */
269 fdesc
*fdp
; /* description of file to whom tag belongs */
270 char *name
; /* tag name */
271 char *regex
; /* search regexp */
272 bool valid
; /* write this tag on the tag file */
273 bool is_func
; /* function tag: use regexp in CTAGS mode */
274 bool been_warned
; /* warning already given for duplicated tag */
275 int lno
; /* line number tag is on */
276 long cno
; /* character number line starts on */
280 * A `linebuffer' is a structure which holds a line of text.
281 * `readline_internal' reads a line from a stream into a linebuffer
282 * and works regardless of the length of the line.
283 * SIZE is the size of BUFFER, LEN is the length of the string in
284 * BUFFER after readline reads it.
293 /* Used to support mixing of --lang and file names. */
297 at_language
, /* a language specification */
298 at_regexp
, /* a regular expression */
299 at_filename
, /* a file name */
300 at_stdin
, /* read from stdin here */
301 at_end
/* stop parsing the list */
302 } arg_type
; /* argument type */
303 language
*lang
; /* language associated with the argument */
304 char *what
; /* the argument itself */
307 /* Structure defining a regular expression. */
308 typedef struct regexp
310 struct regexp
*p_next
; /* pointer to next in list */
311 language
*lang
; /* if set, use only for this language */
312 char *pattern
; /* the regexp pattern */
313 char *name
; /* tag name */
314 struct re_pattern_buffer
*pat
; /* the compiled pattern */
315 struct re_registers regs
; /* re registers */
316 bool error_signaled
; /* already signaled for this regexp */
317 bool force_explicit_name
; /* do not allow implicit tag name */
318 bool ignore_case
; /* ignore case when matching */
319 bool multi_line
; /* do a multi-line match on the whole file */
323 /* Many compilers barf on this:
324 Lang_function Ada_funcs;
325 so let's write it this way */
326 static void Ada_funcs (FILE *);
327 static void Asm_labels (FILE *);
328 static void C_entries (int c_ext
, FILE *);
329 static void default_C_entries (FILE *);
330 static void plain_C_entries (FILE *);
331 static void Cjava_entries (FILE *);
332 static void Cobol_paragraphs (FILE *);
333 static void Cplusplus_entries (FILE *);
334 static void Cstar_entries (FILE *);
335 static void Erlang_functions (FILE *);
336 static void Forth_words (FILE *);
337 static void Fortran_functions (FILE *);
338 static void HTML_labels (FILE *);
339 static void Lisp_functions (FILE *);
340 static void Lua_functions (FILE *);
341 static void Makefile_targets (FILE *);
342 static void Pascal_functions (FILE *);
343 static void Perl_functions (FILE *);
344 static void PHP_functions (FILE *);
345 static void PS_functions (FILE *);
346 static void Prolog_functions (FILE *);
347 static void Python_functions (FILE *);
348 static void Scheme_functions (FILE *);
349 static void TeX_commands (FILE *);
350 static void Texinfo_nodes (FILE *);
351 static void Yacc_entries (FILE *);
352 static void just_read_file (FILE *);
354 static language
*get_language_from_langname (const char *);
355 static void readline (linebuffer
*, FILE *);
356 static long readline_internal (linebuffer
*, FILE *, char const *);
357 static bool nocase_tail (const char *);
358 static void get_tag (char *, char **);
360 static void analyze_regex (char *);
361 static void free_regexps (void);
362 static void regex_tag_multiline (void);
363 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
364 static _Noreturn
void suggest_asking_for_help (void);
365 _Noreturn
void fatal (const char *, const char *);
366 static _Noreturn
void pfatal (const char *);
367 static void add_node (node
*, node
**);
369 static void process_file_name (char *, language
*);
370 static void process_file (FILE *, char *, language
*);
371 static void find_entries (FILE *);
372 static void free_tree (node
*);
373 static void free_fdesc (fdesc
*);
374 static void pfnote (char *, bool, char *, int, int, long);
375 static void invalidate_nodes (fdesc
*, node
**);
376 static void put_entries (node
*);
378 static char *concat (const char *, const char *, const char *);
379 static char *skip_spaces (char *);
380 static char *skip_non_spaces (char *);
381 static char *skip_name (char *);
382 static char *savenstr (const char *, int);
383 static char *savestr (const char *);
384 static char *etags_getcwd (void);
385 static char *relative_filename (char *, char *);
386 static char *absolute_filename (char *, char *);
387 static char *absolute_dirname (char *, char *);
388 static bool filename_is_absolute (char *f
);
389 static void canonicalize_filename (char *);
390 static char *etags_mktmp (void);
391 static void linebuffer_init (linebuffer
*);
392 static void linebuffer_setlen (linebuffer
*, int);
393 static void *xmalloc (size_t);
394 static void *xrealloc (void *, size_t);
397 static char searchar
= '/'; /* use /.../ searches */
399 static char *tagfile
; /* output file */
400 static char *progname
; /* name this program was invoked with */
401 static char *cwd
; /* current working directory */
402 static char *tagfiledir
; /* directory of tagfile */
403 static FILE *tagf
; /* ioptr for tags file */
404 static ptrdiff_t whatlen_max
; /* maximum length of any 'what' member */
406 static fdesc
*fdhead
; /* head of file description list */
407 static fdesc
*curfdp
; /* current file description */
408 static char *infilename
; /* current input file name */
409 static int lineno
; /* line number of current line */
410 static long charno
; /* current character number */
411 static long linecharno
; /* charno of start of current line */
412 static char *dbp
; /* pointer to start of current tag */
414 static const int invalidcharno
= -1;
416 static node
*nodehead
; /* the head of the binary tree of tags */
417 static node
*last_node
; /* the last node created */
419 static linebuffer lb
; /* the current line */
420 static linebuffer filebuf
; /* a buffer containing the whole file */
421 static linebuffer token_name
; /* a buffer containing a tag name */
423 static bool append_to_tagfile
; /* -a: append to tags */
424 /* The next five default to true in C and derived languages. */
425 static bool typedefs
; /* -t: create tags for C and Ada typedefs */
426 static bool typedefs_or_cplusplus
; /* -T: create tags for C typedefs, level */
427 /* 0 struct/enum/union decls, and C++ */
428 /* member functions. */
429 static bool constantypedefs
; /* -d: create tags for C #define, enum */
430 /* constants and variables. */
431 /* -D: opposite of -d. Default under ctags. */
432 static int globals
; /* create tags for global variables */
433 static int members
; /* create tags for C member variables */
434 static int declarations
; /* --declarations: tag them and extern in C&Co*/
435 static int no_line_directive
; /* ignore #line directives (undocumented) */
436 static int no_duplicates
; /* no duplicate tags for ctags (undocumented) */
437 static bool update
; /* -u: update tags */
438 static bool vgrind_style
; /* -v: create vgrind style index output */
439 static bool no_warnings
; /* -w: suppress warnings (undocumented) */
440 static bool cxref_style
; /* -x: create cxref style output */
441 static bool cplusplus
; /* .[hc] means C++, not C (undocumented) */
442 static bool ignoreindent
; /* -I: ignore indentation in C */
443 static int packages_only
; /* --packages-only: in Ada, only tag packages*/
444 static int class_qualify
; /* -Q: produce class-qualified tags in C++/Java */
446 /* STDIN is defined in LynxOS system headers */
451 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
452 static bool parsing_stdin
; /* --parse-stdin used */
454 static regexp
*p_head
; /* list of all regexps */
455 static bool need_filebuf
; /* some regexes are multi-line */
457 static struct option longopts
[] =
459 { "append", no_argument
, NULL
, 'a' },
460 { "packages-only", no_argument
, &packages_only
, 1 },
461 { "c++", no_argument
, NULL
, 'C' },
462 { "declarations", no_argument
, &declarations
, 1 },
463 { "no-line-directive", no_argument
, &no_line_directive
, 1 },
464 { "no-duplicates", no_argument
, &no_duplicates
, 1 },
465 { "help", no_argument
, NULL
, 'h' },
466 { "help", no_argument
, NULL
, 'H' },
467 { "ignore-indentation", no_argument
, NULL
, 'I' },
468 { "language", required_argument
, NULL
, 'l' },
469 { "members", no_argument
, &members
, 1 },
470 { "no-members", no_argument
, &members
, 0 },
471 { "output", required_argument
, NULL
, 'o' },
472 { "class-qualify", no_argument
, &class_qualify
, 'Q' },
473 { "regex", required_argument
, NULL
, 'r' },
474 { "no-regex", no_argument
, NULL
, 'R' },
475 { "ignore-case-regex", required_argument
, NULL
, 'c' },
476 { "parse-stdin", required_argument
, NULL
, STDIN
},
477 { "version", no_argument
, NULL
, 'V' },
479 #if CTAGS /* Ctags options */
480 { "backward-search", no_argument
, NULL
, 'B' },
481 { "cxref", no_argument
, NULL
, 'x' },
482 { "defines", no_argument
, NULL
, 'd' },
483 { "globals", no_argument
, &globals
, 1 },
484 { "typedefs", no_argument
, NULL
, 't' },
485 { "typedefs-and-c++", no_argument
, NULL
, 'T' },
486 { "update", no_argument
, NULL
, 'u' },
487 { "vgrind", no_argument
, NULL
, 'v' },
488 { "no-warn", no_argument
, NULL
, 'w' },
490 #else /* Etags options */
491 { "no-defines", no_argument
, NULL
, 'D' },
492 { "no-globals", no_argument
, &globals
, 0 },
493 { "include", required_argument
, NULL
, 'i' },
498 static compressor compressors
[] =
500 { "z", "gzip -d -c"},
501 { "Z", "gzip -d -c"},
502 { "gz", "gzip -d -c"},
503 { "GZ", "gzip -d -c"},
504 { "bz2", "bzip2 -d -c" },
505 { "xz", "xz -d -c" },
514 static const char *Ada_suffixes
[] =
515 { "ads", "adb", "ada", NULL
};
516 static const char Ada_help
[] =
517 "In Ada code, functions, procedures, packages, tasks and types are\n\
518 tags. Use the `--packages-only' option to create tags for\n\
520 Ada tag names have suffixes indicating the type of entity:\n\
521 Entity type: Qualifier:\n\
522 ------------ ----------\n\
529 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
530 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
531 will just search for any tag `bidule'.";
534 static const char *Asm_suffixes
[] =
535 { "a", /* Unix assembler */
536 "asm", /* Microcontroller assembly */
537 "def", /* BSO/Tasking definition includes */
538 "inc", /* Microcontroller include files */
539 "ins", /* Microcontroller include files */
540 "s", "sa", /* Unix assembler */
541 "S", /* cpp-processed Unix assembler */
542 "src", /* BSO/Tasking C compiler output */
545 static const char Asm_help
[] =
546 "In assembler code, labels appearing at the beginning of a line,\n\
547 followed by a colon, are tags.";
550 /* Note that .c and .h can be considered C++, if the --c++ flag was
551 given, or if the `class' or `template' keywords are met inside the file.
552 That is why default_C_entries is called for these. */
553 static const char *default_C_suffixes
[] =
555 #if CTAGS /* C help for Ctags */
556 static const char default_C_help
[] =
557 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
558 Use -T to tag definitions of `struct', `union' and `enum'.\n\
559 Use -d to tag `#define' macro definitions and `enum' constants.\n\
560 Use --globals to tag global variables.\n\
561 You can tag function declarations and external variables by\n\
562 using `--declarations', and struct members by using `--members'.";
563 #else /* C help for Etags */
564 static const char default_C_help
[] =
565 "In C code, any C function or typedef is a tag, and so are\n\
566 definitions of `struct', `union' and `enum'. `#define' macro\n\
567 definitions and `enum' constants are tags unless you specify\n\
568 `--no-defines'. Global variables are tags unless you specify\n\
569 `--no-globals' and so are struct members unless you specify\n\
570 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
571 `--no-members' can make the tags table file much smaller.\n\
572 You can tag function declarations and external variables by\n\
573 using `--declarations'.";
574 #endif /* C help for Ctags and Etags */
576 static const char *Cplusplus_suffixes
[] =
577 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
578 "M", /* Objective C++ */
579 "pdb", /* PostScript with C syntax */
581 static const char Cplusplus_help
[] =
582 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
583 --help --lang=c --lang=c++ for full help.)\n\
584 In addition to C tags, member functions are also recognized. Member\n\
585 variables are recognized unless you use the `--no-members' option.\n\
586 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
587 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
590 static const char *Cjava_suffixes
[] =
592 static char Cjava_help
[] =
593 "In Java code, all the tags constructs of C and C++ code are\n\
594 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
597 static const char *Cobol_suffixes
[] =
598 { "COB", "cob", NULL
};
599 static char Cobol_help
[] =
600 "In Cobol code, tags are paragraph names; that is, any word\n\
601 starting in column 8 and followed by a period.";
603 static const char *Cstar_suffixes
[] =
604 { "cs", "hs", NULL
};
606 static const char *Erlang_suffixes
[] =
607 { "erl", "hrl", NULL
};
608 static const char Erlang_help
[] =
609 "In Erlang code, the tags are the functions, records and macros\n\
610 defined in the file.";
612 const char *Forth_suffixes
[] =
613 { "fth", "tok", NULL
};
614 static const char Forth_help
[] =
615 "In Forth code, tags are words defined by `:',\n\
616 constant, code, create, defer, value, variable, buffer:, field.";
618 static const char *Fortran_suffixes
[] =
619 { "F", "f", "f90", "for", NULL
};
620 static const char Fortran_help
[] =
621 "In Fortran code, functions, subroutines and block data are tags.";
623 static const char *HTML_suffixes
[] =
624 { "htm", "html", "shtml", NULL
};
625 static const char HTML_help
[] =
626 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
627 `h3' headers. Also, tags are `name=' in anchors and all\n\
628 occurrences of `id='.";
630 static const char *Lisp_suffixes
[] =
631 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL
};
632 static const char Lisp_help
[] =
633 "In Lisp code, any function defined with `defun', any variable\n\
634 defined with `defvar' or `defconst', and in general the first\n\
635 argument of any expression that starts with `(def' in column zero\n\
637 The `--declarations' option tags \"(defvar foo)\" constructs too.";
639 static const char *Lua_suffixes
[] =
640 { "lua", "LUA", NULL
};
641 static const char Lua_help
[] =
642 "In Lua scripts, all functions are tags.";
644 static const char *Makefile_filenames
[] =
645 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL
};
646 static const char Makefile_help
[] =
647 "In makefiles, targets are tags; additionally, variables are tags\n\
648 unless you specify `--no-globals'.";
650 static const char *Objc_suffixes
[] =
651 { "lm", /* Objective lex file */
652 "m", /* Objective C file */
654 static const char Objc_help
[] =
655 "In Objective C code, tags include Objective C definitions for classes,\n\
656 class categories, methods and protocols. Tags for variables and\n\
657 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
658 (Use --help --lang=c --lang=objc --lang=java for full help.)";
660 static const char *Pascal_suffixes
[] =
661 { "p", "pas", NULL
};
662 static const char Pascal_help
[] =
663 "In Pascal code, the tags are the functions and procedures defined\n\
665 /* " // this is for working around an Emacs highlighting bug... */
667 static const char *Perl_suffixes
[] =
668 { "pl", "pm", NULL
};
669 static const char *Perl_interpreters
[] =
670 { "perl", "@PERL@", NULL
};
671 static const char Perl_help
[] =
672 "In Perl code, the tags are the packages, subroutines and variables\n\
673 defined by the `package', `sub', `my' and `local' keywords. Use\n\
674 `--globals' if you want to tag global variables. Tags for\n\
675 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
676 defined in the default package is `main::SUB'.";
678 static const char *PHP_suffixes
[] =
679 { "php", "php3", "php4", NULL
};
680 static const char PHP_help
[] =
681 "In PHP code, tags are functions, classes and defines. Unless you use\n\
682 the `--no-members' option, vars are tags too.";
684 static const char *plain_C_suffixes
[] =
685 { "pc", /* Pro*C file */
688 static const char *PS_suffixes
[] =
689 { "ps", "psw", NULL
}; /* .psw is for PSWrap */
690 static const char PS_help
[] =
691 "In PostScript code, the tags are the functions.";
693 static const char *Prolog_suffixes
[] =
695 static const char Prolog_help
[] =
696 "In Prolog code, tags are predicates and rules at the beginning of\n\
699 static const char *Python_suffixes
[] =
701 static const char Python_help
[] =
702 "In Python code, `def' or `class' at the beginning of a line\n\
705 /* Can't do the `SCM' or `scm' prefix with a version number. */
706 static const char *Scheme_suffixes
[] =
707 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL
};
708 static const char Scheme_help
[] =
709 "In Scheme code, tags include anything defined with `def' or with a\n\
710 construct whose name starts with `def'. They also include\n\
711 variables set with `set!' at top level in the file.";
713 static const char *TeX_suffixes
[] =
714 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL
};
715 static const char TeX_help
[] =
716 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
717 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
718 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
719 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
720 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
722 Other commands can be specified by setting the environment variable\n\
723 `TEXTAGS' to a colon-separated list like, for example,\n\
724 TEXTAGS=\"mycommand:myothercommand\".";
727 static const char *Texinfo_suffixes
[] =
728 { "texi", "texinfo", "txi", NULL
};
729 static const char Texinfo_help
[] =
730 "for texinfo files, lines starting with @node are tagged.";
732 static const char *Yacc_suffixes
[] =
733 { "y", "y++", "ym", "yxx", "yy", NULL
}; /* .ym is Objective yacc file */
734 static const char Yacc_help
[] =
735 "In Bison or Yacc input files, each rule defines as a tag the\n\
736 nonterminal it constructs. The portions of the file that contain\n\
737 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
740 static const char auto_help
[] =
741 "`auto' is not a real language, it indicates to use\n\
742 a default language for files base on file name suffix and file contents.";
744 static const char none_help
[] =
745 "`none' is not a real language, it indicates to only do\n\
746 regexp processing on files.";
748 static const char no_lang_help
[] =
749 "No detailed help available for this language.";
753 * Table of languages.
755 * It is ok for a given function to be listed under more than one
756 * name. I just didn't.
759 static language lang_names
[] =
761 { "ada", Ada_help
, Ada_funcs
, Ada_suffixes
},
762 { "asm", Asm_help
, Asm_labels
, Asm_suffixes
},
763 { "c", default_C_help
, default_C_entries
, default_C_suffixes
},
764 { "c++", Cplusplus_help
, Cplusplus_entries
, Cplusplus_suffixes
},
765 { "c*", no_lang_help
, Cstar_entries
, Cstar_suffixes
},
766 { "cobol", Cobol_help
, Cobol_paragraphs
, Cobol_suffixes
},
767 { "erlang", Erlang_help
, Erlang_functions
, Erlang_suffixes
},
768 { "forth", Forth_help
, Forth_words
, Forth_suffixes
},
769 { "fortran", Fortran_help
, Fortran_functions
, Fortran_suffixes
},
770 { "html", HTML_help
, HTML_labels
, HTML_suffixes
},
771 { "java", Cjava_help
, Cjava_entries
, Cjava_suffixes
},
772 { "lisp", Lisp_help
, Lisp_functions
, Lisp_suffixes
},
773 { "lua", Lua_help
, Lua_functions
, Lua_suffixes
},
774 { "makefile", Makefile_help
,Makefile_targets
,NULL
,Makefile_filenames
},
775 { "objc", Objc_help
, plain_C_entries
, Objc_suffixes
},
776 { "pascal", Pascal_help
, Pascal_functions
, Pascal_suffixes
},
777 { "perl",Perl_help
,Perl_functions
,Perl_suffixes
,NULL
,Perl_interpreters
},
778 { "php", PHP_help
, PHP_functions
, PHP_suffixes
},
779 { "postscript",PS_help
, PS_functions
, PS_suffixes
},
780 { "proc", no_lang_help
, plain_C_entries
, plain_C_suffixes
},
781 { "prolog", Prolog_help
, Prolog_functions
, Prolog_suffixes
},
782 { "python", Python_help
, Python_functions
, Python_suffixes
},
783 { "scheme", Scheme_help
, Scheme_functions
, Scheme_suffixes
},
784 { "tex", TeX_help
, TeX_commands
, TeX_suffixes
},
785 { "texinfo", Texinfo_help
, Texinfo_nodes
, Texinfo_suffixes
},
786 { "yacc", Yacc_help
,Yacc_entries
,Yacc_suffixes
,NULL
,NULL
,true},
787 { "auto", auto_help
}, /* default guessing scheme */
788 { "none", none_help
, just_read_file
}, /* regexp matching only */
789 { NULL
} /* end of list */
794 print_language_names (void)
797 const char **name
, **ext
;
799 puts ("\nThese are the currently supported languages, along with the\n\
800 default file names and dot suffixes:");
801 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
803 printf (" %-*s", 10, lang
->name
);
804 if (lang
->filenames
!= NULL
)
805 for (name
= lang
->filenames
; *name
!= NULL
; name
++)
806 printf (" %s", *name
);
807 if (lang
->suffixes
!= NULL
)
808 for (ext
= lang
->suffixes
; *ext
!= NULL
; ext
++)
809 printf (" .%s", *ext
);
812 puts ("where `auto' means use default language for files based on file\n\
813 name suffix, and `none' means only do regexp processing on files.\n\
814 If no language is specified and no matching suffix is found,\n\
815 the first line of the file is read for a sharp-bang (#!) sequence\n\
816 followed by the name of an interpreter. If no such sequence is found,\n\
817 Fortran is tried first; if no tags are found, C is tried next.\n\
818 When parsing any C file, a \"class\" or \"template\" keyword\n\
820 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
822 For detailed help on a given language use, for example,\n\
823 etags --help --lang=ada.");
827 # define EMACS_NAME "standalone"
830 # define VERSION "17.38.1.4"
832 static _Noreturn
void
835 char emacs_copyright
[] = COPYRIGHT
;
837 printf ("%s (%s %s)\n", (CTAGS
) ? "ctags" : "etags", EMACS_NAME
, VERSION
);
838 puts (emacs_copyright
);
839 puts ("This program is distributed under the terms in ETAGS.README");
844 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
845 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
848 static _Noreturn
void
849 print_help (argument
*argbuffer
)
851 bool help_for_lang
= false;
853 for (; argbuffer
->arg_type
!= at_end
; argbuffer
++)
854 if (argbuffer
->arg_type
== at_language
)
858 puts (argbuffer
->lang
->help
);
859 help_for_lang
= true;
865 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
867 These are the options accepted by %s.\n", progname
, progname
);
868 puts ("You may use unambiguous abbreviations for the long option names.");
869 puts (" A - as file name means read names from stdin (one per line).\n\
870 Absolute names are stored in the output file as they are.\n\
871 Relative ones are stored relative to the output file's directory.\n");
873 puts ("-a, --append\n\
874 Append tag entries to existing tags file.");
876 puts ("--packages-only\n\
877 For Ada files, only generate tags for packages.");
880 puts ("-B, --backward-search\n\
881 Write the search commands for the tag entries using '?', the\n\
882 backward-search command instead of '/', the forward-search command.");
884 /* This option is mostly obsolete, because etags can now automatically
885 detect C++. Retained for backward compatibility and for debugging and
886 experimentation. In principle, we could want to tag as C++ even
887 before any "class" or "template" keyword.
889 Treat files whose name suffix defaults to C language as C++ files.");
892 puts ("--declarations\n\
893 In C and derived languages, create tags for function declarations,");
895 puts ("\tand create tags for extern variables if --globals is used.");
898 ("\tand create tags for extern variables unless --no-globals is used.");
901 puts ("-d, --defines\n\
902 Create tag entries for C #define constants and enum constants, too.");
904 puts ("-D, --no-defines\n\
905 Don't create tag entries for C #define constants and enum constants.\n\
906 This makes the tags file smaller.");
909 puts ("-i FILE, --include=FILE\n\
910 Include a note in tag file indicating that, when searching for\n\
911 a tag, one should also consult the tags file FILE after\n\
912 checking the current file.");
914 puts ("-l LANG, --language=LANG\n\
915 Force the following files to be considered as written in the\n\
916 named language up to the next --language=LANG option.");
920 Create tag entries for global variables in some languages.");
922 puts ("--no-globals\n\
923 Do not create tag entries for global variables in some\n\
924 languages. This makes the tags file smaller.");
926 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
927 puts ("--no-line-directive\n\
928 Ignore #line preprocessor directives in C and derived languages.");
932 Create tag entries for members of structures in some languages.");
934 puts ("--no-members\n\
935 Do not create tag entries for members of structures\n\
936 in some languages.");
938 puts ("-Q, --class-qualify\n\
939 Qualify tag names with their class name in C++, ObjC, and Java.\n\
940 This produces tag names of the form \"class::member\" for C++,\n\
941 \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
942 For Objective C, this also produces class methods qualified with\n\
943 their arguments, as in \"foo:bar:baz:more\".");
944 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
945 Make a tag for each line matching a regular expression pattern\n\
946 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
947 files only. REGEXFILE is a file containing one REGEXP per line.\n\
948 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
949 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
950 puts (" If TAGNAME/ is present, the tags created are named.\n\
951 For example Tcl named tags can be created with:\n\
952 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
953 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
954 `m' means to allow multi-line matches, `s' implies `m' and\n\
955 causes dot to match any character, including newline.");
957 puts ("-R, --no-regex\n\
958 Don't create tags from regexps for the following files.");
960 puts ("-I, --ignore-indentation\n\
961 In C and C++ do not assume that a closing brace in the first\n\
962 column is the final brace of a function or structure definition.");
964 puts ("-o FILE, --output=FILE\n\
965 Write the tags to FILE.");
967 puts ("--parse-stdin=NAME\n\
968 Read from standard input and record tags as belonging to file NAME.");
972 puts ("-t, --typedefs\n\
973 Generate tag entries for C and Ada typedefs.");
974 puts ("-T, --typedefs-and-c++\n\
975 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
976 and C++ member functions.");
980 puts ("-u, --update\n\
981 Update the tag entries for the given files, leaving tag\n\
982 entries for other files in place. Currently, this is\n\
983 implemented by deleting the existing entries for the given\n\
984 files and then rewriting the new entries at the end of the\n\
985 tags file. It is often faster to simply rebuild the entire\n\
986 tag file than to use this.");
990 puts ("-v, --vgrind\n\
991 Print on the standard output an index of items intended for\n\
992 human consumption, similar to the output of vgrind. The index\n\
993 is sorted, and gives the page number of each item.");
995 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
996 puts ("-w, --no-duplicates\n\
997 Do not create duplicate tag entries, for compatibility with\n\
998 traditional ctags.");
1000 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
1001 puts ("-w, --no-warn\n\
1002 Suppress warning messages about duplicate tag entries.");
1004 puts ("-x, --cxref\n\
1005 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1006 The output uses line numbers instead of page numbers, but\n\
1007 beyond that the differences are cosmetic; try both to see\n\
1011 puts ("-V, --version\n\
1012 Print the version of the program.\n\
1014 Print this help message.\n\
1015 Followed by one or more `--language' options prints detailed\n\
1016 help about tag generation for the specified languages.");
1018 print_language_names ();
1021 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1023 exit (EXIT_SUCCESS
);
1028 main (int argc
, char **argv
)
1031 unsigned int nincluded_files
;
1032 char **included_files
;
1033 argument
*argbuffer
;
1034 int current_arg
, file_count
;
1035 linebuffer filename_lb
;
1036 bool help_asked
= false;
1042 nincluded_files
= 0;
1043 included_files
= xnew (argc
, char *);
1047 /* Allocate enough no matter what happens. Overkill, but each one
1049 argbuffer
= xnew (argc
, argument
);
1052 * Always find typedefs and structure tags.
1053 * Also default to find macro constants, enum constants, struct
1054 * members and global variables. Do it for both etags and ctags.
1056 typedefs
= typedefs_or_cplusplus
= constantypedefs
= true;
1057 globals
= members
= true;
1059 /* When the optstring begins with a '-' getopt_long does not rearrange the
1060 non-options arguments to be at the end, but leaves them alone. */
1061 optstring
= concat ("-ac:Cf:Il:o:Qr:RSVhH",
1062 (CTAGS
) ? "BxdtTuvw" : "Di:",
1065 while ((opt
= getopt_long (argc
, argv
, optstring
, longopts
, NULL
)) != EOF
)
1069 /* If getopt returns 0, then it has already processed a
1070 long-named option. We should do nothing. */
1074 /* This means that a file name has been seen. Record it. */
1075 argbuffer
[current_arg
].arg_type
= at_filename
;
1076 argbuffer
[current_arg
].what
= optarg
;
1077 len
= strlen (optarg
);
1078 if (whatlen_max
< len
)
1085 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1086 argbuffer
[current_arg
].arg_type
= at_stdin
;
1087 argbuffer
[current_arg
].what
= optarg
;
1088 len
= strlen (optarg
);
1089 if (whatlen_max
< len
)
1094 fatal ("cannot parse standard input more than once", (char *)NULL
);
1095 parsing_stdin
= true;
1098 /* Common options. */
1099 case 'a': append_to_tagfile
= true; break;
1100 case 'C': cplusplus
= true; break;
1101 case 'f': /* for compatibility with old makefiles */
1105 error ("-o option may only be given once.");
1106 suggest_asking_for_help ();
1112 case 'S': /* for backward compatibility */
1113 ignoreindent
= true;
1117 language
*lang
= get_language_from_langname (optarg
);
1120 argbuffer
[current_arg
].lang
= lang
;
1121 argbuffer
[current_arg
].arg_type
= at_language
;
1127 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1128 optarg
= concat (optarg
, "i", ""); /* memory leak here */
1131 argbuffer
[current_arg
].arg_type
= at_regexp
;
1132 argbuffer
[current_arg
].what
= optarg
;
1133 len
= strlen (optarg
);
1134 if (whatlen_max
< len
)
1139 argbuffer
[current_arg
].arg_type
= at_regexp
;
1140 argbuffer
[current_arg
].what
= NULL
;
1155 case 'D': constantypedefs
= false; break;
1156 case 'i': included_files
[nincluded_files
++] = optarg
; break;
1158 /* Ctags options. */
1159 case 'B': searchar
= '?'; break;
1160 case 'd': constantypedefs
= true; break;
1161 case 't': typedefs
= true; break;
1162 case 'T': typedefs
= typedefs_or_cplusplus
= true; break;
1163 case 'u': update
= true; break;
1164 case 'v': vgrind_style
= true; /*FALLTHRU*/
1165 case 'x': cxref_style
= true; break;
1166 case 'w': no_warnings
= true; break;
1168 suggest_asking_for_help ();
1172 /* No more options. Store the rest of arguments. */
1173 for (; optind
< argc
; optind
++)
1175 argbuffer
[current_arg
].arg_type
= at_filename
;
1176 argbuffer
[current_arg
].what
= argv
[optind
];
1177 len
= strlen (argv
[optind
]);
1178 if (whatlen_max
< len
)
1184 argbuffer
[current_arg
].arg_type
= at_end
;
1187 print_help (argbuffer
);
1190 if (nincluded_files
== 0 && file_count
== 0)
1192 error ("no input files specified.");
1193 suggest_asking_for_help ();
1197 if (tagfile
== NULL
)
1198 tagfile
= savestr (CTAGS
? "tags" : "TAGS");
1199 cwd
= etags_getcwd (); /* the current working directory */
1200 if (cwd
[strlen (cwd
) - 1] != '/')
1203 cwd
= concat (oldcwd
, "/", "");
1207 /* Compute base directory for relative file names. */
1208 if (streq (tagfile
, "-")
1209 || strneq (tagfile
, "/dev/", 5))
1210 tagfiledir
= cwd
; /* relative file names are relative to cwd */
1213 canonicalize_filename (tagfile
);
1214 tagfiledir
= absolute_dirname (tagfile
, cwd
);
1217 linebuffer_init (&lb
);
1218 linebuffer_init (&filename_lb
);
1219 linebuffer_init (&filebuf
);
1220 linebuffer_init (&token_name
);
1224 if (streq (tagfile
, "-"))
1227 SET_BINARY (fileno (stdout
));
1230 tagf
= fopen (tagfile
, append_to_tagfile
? "ab" : "wb");
1236 * Loop through files finding functions.
1238 for (i
= 0; i
< current_arg
; i
++)
1240 static language
*lang
; /* non-NULL if language is forced */
1243 switch (argbuffer
[i
].arg_type
)
1246 lang
= argbuffer
[i
].lang
;
1249 analyze_regex (argbuffer
[i
].what
);
1252 this_file
= argbuffer
[i
].what
;
1253 /* Input file named "-" means read file names from stdin
1254 (one per line) and use them. */
1255 if (streq (this_file
, "-"))
1258 fatal ("cannot parse standard input AND read file names from it",
1260 while (readline_internal (&filename_lb
, stdin
, "-") > 0)
1261 process_file_name (filename_lb
.buffer
, lang
);
1264 process_file_name (this_file
, lang
);
1267 this_file
= argbuffer
[i
].what
;
1268 process_file (stdin
, this_file
, lang
);
1275 free (filebuf
.buffer
);
1276 free (token_name
.buffer
);
1278 if (!CTAGS
|| cxref_style
)
1280 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1281 put_entries (nodehead
);
1282 free_tree (nodehead
);
1288 /* Output file entries that have no tags. */
1289 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
1291 fprintf (tagf
, "\f\n%s,0\n", fdp
->taggedfname
);
1293 while (nincluded_files
-- > 0)
1294 fprintf (tagf
, "\f\n%s,include\n", *included_files
++);
1296 if (fclose (tagf
) == EOF
)
1300 exit (EXIT_SUCCESS
);
1303 /* From here on, we are in (CTAGS && !cxref_style) */
1307 xmalloc (strlen (tagfile
) + whatlen_max
+
1308 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1309 for (i
= 0; i
< current_arg
; ++i
)
1311 switch (argbuffer
[i
].arg_type
)
1317 continue; /* the for loop */
1319 char *z
= stpcpy (cmd
, "mv ");
1320 z
= stpcpy (z
, tagfile
);
1321 z
= stpcpy (z
, " OTAGS;fgrep -v '\t");
1322 z
= stpcpy (z
, argbuffer
[i
].what
);
1323 z
= stpcpy (z
, "\t' OTAGS >");
1324 z
= stpcpy (z
, tagfile
);
1325 strcpy (z
, ";rm OTAGS");
1326 if (system (cmd
) != EXIT_SUCCESS
)
1327 fatal ("failed to execute shell command", (char *)NULL
);
1330 append_to_tagfile
= true;
1333 tagf
= fopen (tagfile
, append_to_tagfile
? "ab" : "wb");
1336 put_entries (nodehead
); /* write all the tags (CTAGS) */
1337 free_tree (nodehead
);
1339 if (fclose (tagf
) == EOF
)
1343 if (append_to_tagfile
|| update
)
1345 char *cmd
= xmalloc (2 * strlen (tagfile
) + sizeof "sort -u -o..");
1346 /* Maybe these should be used:
1347 setenv ("LC_COLLATE", "C", 1);
1348 setenv ("LC_ALL", "C", 1); */
1349 char *z
= stpcpy (cmd
, "sort -u -o ");
1350 z
= stpcpy (z
, tagfile
);
1352 strcpy (z
, tagfile
);
1353 exit (system (cmd
));
1355 return EXIT_SUCCESS
;
1360 * Return a compressor given the file name. If EXTPTR is non-zero,
1361 * return a pointer into FILE where the compressor-specific
1362 * extension begins. If no compressor is found, NULL is returned
1363 * and EXTPTR is not significant.
1364 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1367 get_compressor_from_suffix (char *file
, char **extptr
)
1370 char *slash
, *suffix
;
1372 /* File has been processed by canonicalize_filename,
1373 so we don't need to consider backslashes on DOS_NT. */
1374 slash
= strrchr (file
, '/');
1375 suffix
= strrchr (file
, '.');
1376 if (suffix
== NULL
|| suffix
< slash
)
1381 /* Let those poor souls who live with DOS 8+3 file name limits get
1382 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1383 Only the first do loop is run if not MSDOS */
1386 for (compr
= compressors
; compr
->suffix
!= NULL
; compr
++)
1387 if (streq (compr
->suffix
, suffix
))
1390 break; /* do it only once: not really a loop */
1393 } while (*suffix
!= '\0');
1400 * Return a language given the name.
1403 get_language_from_langname (const char *name
)
1408 error ("empty language name");
1411 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1412 if (streq (name
, lang
->name
))
1414 error ("unknown language \"%s\"", name
);
1422 * Return a language given the interpreter name.
1425 get_language_from_interpreter (char *interpreter
)
1430 if (interpreter
== NULL
)
1432 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1433 if (lang
->interpreters
!= NULL
)
1434 for (iname
= lang
->interpreters
; *iname
!= NULL
; iname
++)
1435 if (streq (*iname
, interpreter
))
1444 * Return a language given the file name.
1447 get_language_from_filename (char *file
, int case_sensitive
)
1450 const char **name
, **ext
, *suffix
;
1452 /* Try whole file name first. */
1453 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1454 if (lang
->filenames
!= NULL
)
1455 for (name
= lang
->filenames
; *name
!= NULL
; name
++)
1456 if ((case_sensitive
)
1457 ? streq (*name
, file
)
1458 : strcaseeq (*name
, file
))
1461 /* If not found, try suffix after last dot. */
1462 suffix
= strrchr (file
, '.');
1466 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1467 if (lang
->suffixes
!= NULL
)
1468 for (ext
= lang
->suffixes
; *ext
!= NULL
; ext
++)
1469 if ((case_sensitive
)
1470 ? streq (*ext
, suffix
)
1471 : strcaseeq (*ext
, suffix
))
1478 * This routine is called on each file argument.
1481 process_file_name (char *file
, language
*lang
)
1486 char *compressed_name
, *uncompressed_name
;
1487 char *ext
, *real_name
, *tmp_name
;
1490 canonicalize_filename (file
);
1491 if (streq (file
, tagfile
) && !streq (tagfile
, "-"))
1493 error ("skipping inclusion of %s in self.", file
);
1496 compr
= get_compressor_from_suffix (file
, &ext
);
1499 compressed_name
= file
;
1500 uncompressed_name
= savenstr (file
, ext
- file
);
1504 compressed_name
= NULL
;
1505 uncompressed_name
= file
;
1508 /* If the canonicalized uncompressed name
1509 has already been dealt with, skip it silently. */
1510 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
1512 assert (fdp
->infname
!= NULL
);
1513 if (streq (uncompressed_name
, fdp
->infname
))
1517 inf
= fopen (file
, "r" FOPEN_BINARY
);
1522 int file_errno
= errno
;
1523 if (compressed_name
)
1525 /* Try with the given suffix. */
1526 inf
= fopen (uncompressed_name
, "r" FOPEN_BINARY
);
1528 real_name
= uncompressed_name
;
1532 /* Try all possible suffixes. */
1533 for (compr
= compressors
; compr
->suffix
!= NULL
; compr
++)
1535 compressed_name
= concat (file
, ".", compr
->suffix
);
1536 inf
= fopen (compressed_name
, "r" FOPEN_BINARY
);
1539 real_name
= compressed_name
;
1544 char *suf
= compressed_name
+ strlen (file
);
1545 size_t suflen
= strlen (compr
->suffix
) + 1;
1546 for ( ; suf
[1]; suf
++, suflen
--)
1548 memmove (suf
, suf
+ 1, suflen
);
1549 inf
= fopen (compressed_name
, "r" FOPEN_BINARY
);
1552 real_name
= compressed_name
;
1559 free (compressed_name
);
1560 compressed_name
= NULL
;
1571 if (real_name
== compressed_name
)
1574 tmp_name
= etags_mktmp ();
1579 #if MSDOS || defined (DOS_NT)
1580 char *cmd1
= concat (compr
->command
, " \"", real_name
);
1581 char *cmd
= concat (cmd1
, "\" > ", tmp_name
);
1583 char *cmd1
= concat (compr
->command
, " '", real_name
);
1584 char *cmd
= concat (cmd1
, "' > ", tmp_name
);
1588 if (system (cmd
) == -1)
1595 inf
= fopen (tmp_name
, "r" FOPEN_BINARY
);
1609 process_file (inf
, uncompressed_name
, lang
);
1611 retval
= fclose (inf
);
1612 if (real_name
== compressed_name
)
1621 if (compressed_name
!= file
)
1622 free (compressed_name
);
1623 if (uncompressed_name
!= file
)
1624 free (uncompressed_name
);
1631 process_file (FILE *fh
, char *fn
, language
*lang
)
1633 static const fdesc emptyfdesc
;
1637 /* Create a new input file description entry. */
1638 fdp
= xnew (1, fdesc
);
1641 fdp
->infname
= savestr (fn
);
1643 fdp
->infabsname
= absolute_filename (fn
, cwd
);
1644 fdp
->infabsdir
= absolute_dirname (fn
, cwd
);
1645 if (filename_is_absolute (fn
))
1647 /* An absolute file name. Canonicalize it. */
1648 fdp
->taggedfname
= absolute_filename (fn
, NULL
);
1652 /* A file name relative to cwd. Make it relative
1653 to the directory of the tags file. */
1654 fdp
->taggedfname
= relative_filename (fn
, tagfiledir
);
1656 fdp
->usecharno
= true; /* use char position when making tags */
1658 fdp
->written
= false; /* not written on tags file yet */
1661 curfdp
= fdhead
; /* the current file description */
1665 /* If not Ctags, and if this is not metasource and if it contained no #line
1666 directives, we can write the tags and free all nodes pointing to
1669 && curfdp
->usecharno
/* no #line directives in this file */
1670 && !curfdp
->lang
->metasource
)
1674 /* Look for the head of the sublist relative to this file. See add_node
1675 for the structure of the node tree. */
1677 for (np
= nodehead
; np
!= NULL
; prev
= np
, np
= np
->left
)
1678 if (np
->fdp
== curfdp
)
1681 /* If we generated tags for this file, write and delete them. */
1684 /* This is the head of the last sublist, if any. The following
1685 instructions depend on this being true. */
1686 assert (np
->left
== NULL
);
1688 assert (fdhead
== curfdp
);
1689 assert (last_node
->fdp
== curfdp
);
1690 put_entries (np
); /* write tags for file curfdp->taggedfname */
1691 free_tree (np
); /* remove the written nodes */
1693 nodehead
= NULL
; /* no nodes left */
1695 prev
->left
= NULL
; /* delete the pointer to the sublist */
1701 reset_input (FILE *inf
)
1703 if (fseek (inf
, 0, SEEK_SET
) != 0)
1704 perror (infilename
);
1708 * This routine opens the specified file and calls the function
1709 * which finds the function and type definitions.
1712 find_entries (FILE *inf
)
1715 language
*lang
= curfdp
->lang
;
1716 Lang_function
*parser
= NULL
;
1718 /* If user specified a language, use it. */
1719 if (lang
!= NULL
&& lang
->function
!= NULL
)
1721 parser
= lang
->function
;
1724 /* Else try to guess the language given the file name. */
1727 lang
= get_language_from_filename (curfdp
->infname
, true);
1728 if (lang
!= NULL
&& lang
->function
!= NULL
)
1730 curfdp
->lang
= lang
;
1731 parser
= lang
->function
;
1735 /* Else look for sharp-bang as the first two characters. */
1737 && readline_internal (&lb
, inf
, infilename
) > 0
1739 && lb
.buffer
[0] == '#'
1740 && lb
.buffer
[1] == '!')
1744 /* Set lp to point at the first char after the last slash in the
1745 line or, if no slashes, at the first nonblank. Then set cp to
1746 the first successive blank and terminate the string. */
1747 lp
= strrchr (lb
.buffer
+2, '/');
1751 lp
= skip_spaces (lb
.buffer
+ 2);
1752 cp
= skip_non_spaces (lp
);
1755 if (strlen (lp
) > 0)
1757 lang
= get_language_from_interpreter (lp
);
1758 if (lang
!= NULL
&& lang
->function
!= NULL
)
1760 curfdp
->lang
= lang
;
1761 parser
= lang
->function
;
1768 /* Else try to guess the language given the case insensitive file name. */
1771 lang
= get_language_from_filename (curfdp
->infname
, false);
1772 if (lang
!= NULL
&& lang
->function
!= NULL
)
1774 curfdp
->lang
= lang
;
1775 parser
= lang
->function
;
1779 /* Else try Fortran or C. */
1782 node
*old_last_node
= last_node
;
1784 curfdp
->lang
= get_language_from_langname ("fortran");
1787 if (old_last_node
== last_node
)
1788 /* No Fortran entries found. Try C. */
1791 curfdp
->lang
= get_language_from_langname (cplusplus
? "c++" : "c");
1797 if (!no_line_directive
1798 && curfdp
->lang
!= NULL
&& curfdp
->lang
->metasource
)
1799 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1800 file, or anyway we parsed a file that is automatically generated from
1801 this one. If this is the case, the bingo.c file contained #line
1802 directives that generated tags pointing to this file. Let's delete
1803 them all before parsing this file, which is the real source. */
1805 fdesc
**fdpp
= &fdhead
;
1806 while (*fdpp
!= NULL
)
1808 && streq ((*fdpp
)->taggedfname
, curfdp
->taggedfname
))
1809 /* We found one of those! We must delete both the file description
1810 and all tags referring to it. */
1812 fdesc
*badfdp
= *fdpp
;
1814 /* Delete the tags referring to badfdp->taggedfname
1815 that were obtained from badfdp->infname. */
1816 invalidate_nodes (badfdp
, &nodehead
);
1818 *fdpp
= badfdp
->next
; /* remove the bad description from the list */
1819 free_fdesc (badfdp
);
1822 fdpp
= &(*fdpp
)->next
; /* advance the list pointer */
1825 assert (parser
!= NULL
);
1827 /* Generic initializations before reading from file. */
1828 linebuffer_setlen (&filebuf
, 0); /* reset the file buffer */
1830 /* Generic initializations before parsing file with readline. */
1831 lineno
= 0; /* reset global line number */
1832 charno
= 0; /* reset global char number */
1833 linecharno
= 0; /* reset global char number of line start */
1837 regex_tag_multiline ();
1842 * Check whether an implicitly named tag should be created,
1843 * then call `pfnote'.
1844 * NAME is a string that is internally copied by this function.
1846 * TAGS format specification
1847 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1848 * The following is explained in some more detail in etc/ETAGS.EBNF.
1850 * make_tag creates tags with "implicit tag names" (unnamed tags)
1851 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1852 * 1. NAME does not contain any of the characters in NONAM;
1853 * 2. LINESTART contains name as either a rightmost, or rightmost but
1854 * one character, substring;
1855 * 3. the character, if any, immediately before NAME in LINESTART must
1856 * be a character in NONAM;
1857 * 4. the character, if any, immediately after NAME in LINESTART must
1858 * also be a character in NONAM.
1860 * The implementation uses the notinname() macro, which recognizes the
1861 * characters stored in the string `nonam'.
1862 * etags.el needs to use the same characters that are in NONAM.
1865 make_tag (const char *name
, /* tag name, or NULL if unnamed */
1866 int namelen
, /* tag length */
1867 bool is_func
, /* tag is a function */
1868 char *linestart
, /* start of the line where tag is */
1869 int linelen
, /* length of the line where tag is */
1870 int lno
, /* line number */
1871 long int cno
) /* character number */
1873 bool named
= (name
!= NULL
&& namelen
> 0);
1876 if (!CTAGS
&& named
) /* maybe set named to false */
1877 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1878 such that etags.el can guess a name from it. */
1881 register const char *cp
= name
;
1883 for (i
= 0; i
< namelen
; i
++)
1884 if (notinname (*cp
++))
1886 if (i
== namelen
) /* rule #1 */
1888 cp
= linestart
+ linelen
- namelen
;
1889 if (notinname (linestart
[linelen
-1]))
1890 cp
-= 1; /* rule #4 */
1891 if (cp
>= linestart
/* rule #2 */
1893 || notinname (cp
[-1])) /* rule #3 */
1894 && strneq (name
, cp
, namelen
)) /* rule #2 */
1895 named
= false; /* use implicit tag name */
1900 nname
= savenstr (name
, namelen
);
1902 pfnote (nname
, is_func
, linestart
, linelen
, lno
, cno
);
1907 pfnote (char *name
, bool is_func
, char *linestart
, int linelen
, int lno
,
1909 /* tag name, or NULL if unnamed */
1910 /* tag is a function */
1911 /* start of the line where tag is */
1912 /* length of the line where tag is */
1914 /* character number */
1918 assert (name
== NULL
|| name
[0] != '\0');
1919 if (CTAGS
&& name
== NULL
)
1922 np
= xnew (1, node
);
1924 /* If ctags mode, change name "main" to M<thisfilename>. */
1925 if (CTAGS
&& !cxref_style
&& streq (name
, "main"))
1927 char *fp
= strrchr (curfdp
->taggedfname
, '/');
1928 np
->name
= concat ("M", fp
== NULL
? curfdp
->taggedfname
: fp
+ 1, "");
1929 fp
= strrchr (np
->name
, '.');
1930 if (fp
!= NULL
&& fp
[1] != '\0' && fp
[2] == '\0')
1936 np
->been_warned
= false;
1938 np
->is_func
= is_func
;
1940 if (np
->fdp
->usecharno
)
1941 /* Our char numbers are 0-base, because of C language tradition?
1942 ctags compatibility? old versions compatibility? I don't know.
1943 Anyway, since emacs's are 1-base we expect etags.el to take care
1944 of the difference. If we wanted to have 1-based numbers, we would
1945 uncomment the +1 below. */
1946 np
->cno
= cno
/* + 1 */ ;
1948 np
->cno
= invalidcharno
;
1949 np
->left
= np
->right
= NULL
;
1950 if (CTAGS
&& !cxref_style
)
1952 if (strlen (linestart
) < 50)
1953 np
->regex
= concat (linestart
, "$", "");
1955 np
->regex
= savenstr (linestart
, 50);
1958 np
->regex
= savenstr (linestart
, linelen
);
1960 add_node (np
, &nodehead
);
1965 * recurse on left children, iterate on right children.
1968 free_tree (register node
*np
)
1972 register node
*node_right
= np
->right
;
1973 free_tree (np
->left
);
1983 * delete a file description
1986 free_fdesc (register fdesc
*fdp
)
1988 free (fdp
->infname
);
1989 free (fdp
->infabsname
);
1990 free (fdp
->infabsdir
);
1991 free (fdp
->taggedfname
);
1998 * Adds a node to the tree of nodes. In etags mode, sort by file
1999 * name. In ctags mode, sort by tag name. Make no attempt at
2002 * add_node is the only function allowed to add nodes, so it can
2006 add_node (node
*np
, node
**cur_node_p
)
2009 register node
*cur_node
= *cur_node_p
;
2011 if (cur_node
== NULL
)
2021 /* For each file name, tags are in a linked sublist on the right
2022 pointer. The first tags of different files are a linked list
2023 on the left pointer. last_node points to the end of the last
2025 if (last_node
!= NULL
&& last_node
->fdp
== np
->fdp
)
2027 /* Let's use the same sublist as the last added node. */
2028 assert (last_node
->right
== NULL
);
2029 last_node
->right
= np
;
2032 else if (cur_node
->fdp
== np
->fdp
)
2034 /* Scanning the list we found the head of a sublist which is
2035 good for us. Let's scan this sublist. */
2036 add_node (np
, &cur_node
->right
);
2039 /* The head of this sublist is not good for us. Let's try the
2041 add_node (np
, &cur_node
->left
);
2042 } /* if ETAGS mode */
2047 dif
= strcmp (np
->name
, cur_node
->name
);
2050 * If this tag name matches an existing one, then
2051 * do not add the node, but maybe print a warning.
2053 if (no_duplicates
&& !dif
)
2055 if (np
->fdp
== cur_node
->fdp
)
2059 fprintf (stderr
, "Duplicate entry in file %s, line %d: %s\n",
2060 np
->fdp
->infname
, lineno
, np
->name
);
2061 fprintf (stderr
, "Second entry ignored\n");
2064 else if (!cur_node
->been_warned
&& !no_warnings
)
2068 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2069 np
->fdp
->infname
, cur_node
->fdp
->infname
, np
->name
);
2070 cur_node
->been_warned
= true;
2075 /* Actually add the node */
2076 add_node (np
, dif
< 0 ? &cur_node
->left
: &cur_node
->right
);
2077 } /* if CTAGS mode */
2081 * invalidate_nodes ()
2082 * Scan the node tree and invalidate all nodes pointing to the
2083 * given file description (CTAGS case) or free them (ETAGS case).
2086 invalidate_nodes (fdesc
*badfdp
, node
**npp
)
2095 if (np
->left
!= NULL
)
2096 invalidate_nodes (badfdp
, &np
->left
);
2097 if (np
->fdp
== badfdp
)
2099 if (np
->right
!= NULL
)
2100 invalidate_nodes (badfdp
, &np
->right
);
2104 assert (np
->fdp
!= NULL
);
2105 if (np
->fdp
== badfdp
)
2107 *npp
= np
->left
; /* detach the sublist from the list */
2108 np
->left
= NULL
; /* isolate it */
2109 free_tree (np
); /* free it */
2110 invalidate_nodes (badfdp
, npp
);
2113 invalidate_nodes (badfdp
, &np
->left
);
2118 static int total_size_of_entries (node
*);
2119 static int number_len (long) ATTRIBUTE_CONST
;
2121 /* Length of a non-negative number's decimal representation. */
2123 number_len (long int num
)
2126 while ((num
/= 10) > 0)
2132 * Return total number of characters that put_entries will output for
2133 * the nodes in the linked list at the right of the specified node.
2134 * This count is irrelevant with etags.el since emacs 19.34 at least,
2135 * but is still supplied for backward compatibility.
2138 total_size_of_entries (register node
*np
)
2140 register int total
= 0;
2142 for (; np
!= NULL
; np
= np
->right
)
2145 total
+= strlen (np
->regex
) + 1; /* pat\177 */
2146 if (np
->name
!= NULL
)
2147 total
+= strlen (np
->name
) + 1; /* name\001 */
2148 total
+= number_len ((long) np
->lno
) + 1; /* lno, */
2149 if (np
->cno
!= invalidcharno
) /* cno */
2150 total
+= number_len (np
->cno
);
2151 total
+= 1; /* newline */
2158 put_entries (register node
*np
)
2161 static fdesc
*fdp
= NULL
;
2166 /* Output subentries that precede this one */
2168 put_entries (np
->left
);
2170 /* Output this entry */
2179 fprintf (tagf
, "\f\n%s,%d\n",
2180 fdp
->taggedfname
, total_size_of_entries (np
));
2181 fdp
->written
= true;
2183 fputs (np
->regex
, tagf
);
2184 fputc ('\177', tagf
);
2185 if (np
->name
!= NULL
)
2187 fputs (np
->name
, tagf
);
2188 fputc ('\001', tagf
);
2190 fprintf (tagf
, "%d,", np
->lno
);
2191 if (np
->cno
!= invalidcharno
)
2192 fprintf (tagf
, "%ld", np
->cno
);
2198 if (np
->name
== NULL
)
2199 error ("internal error: NULL name in ctags mode.");
2204 fprintf (stdout
, "%s %s %d\n",
2205 np
->name
, np
->fdp
->taggedfname
, (np
->lno
+ 63) / 64);
2207 fprintf (stdout
, "%-16s %3d %-16s %s\n",
2208 np
->name
, np
->lno
, np
->fdp
->taggedfname
, np
->regex
);
2212 fprintf (tagf
, "%s\t%s\t", np
->name
, np
->fdp
->taggedfname
);
2215 { /* function or #define macro with args */
2216 putc (searchar
, tagf
);
2219 for (sp
= np
->regex
; *sp
; sp
++)
2221 if (*sp
== '\\' || *sp
== searchar
)
2225 putc (searchar
, tagf
);
2228 { /* anything else; text pattern inadequate */
2229 fprintf (tagf
, "%d", np
->lno
);
2234 } /* if this node contains a valid tag */
2236 /* Output subentries that follow this one */
2237 put_entries (np
->right
);
2239 put_entries (np
->left
);
2244 #define C_EXT 0x00fff /* C extensions */
2245 #define C_PLAIN 0x00000 /* C */
2246 #define C_PLPL 0x00001 /* C++ */
2247 #define C_STAR 0x00003 /* C* */
2248 #define C_JAVA 0x00005 /* JAVA */
2249 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2250 #define YACC 0x10000 /* yacc file */
2253 * The C symbol tables.
2258 st_C_objprot
, st_C_objimpl
, st_C_objend
,
2260 st_C_ignore
, st_C_attribute
,
2263 st_C_class
, st_C_template
,
2264 st_C_struct
, st_C_extern
, st_C_enum
, st_C_define
, st_C_typedef
2267 /* Feed stuff between (but not including) %[ and %] lines to:
2273 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2277 while, 0, st_C_ignore
2278 switch, 0, st_C_ignore
2279 return, 0, st_C_ignore
2280 __attribute__, 0, st_C_attribute
2281 GTY, 0, st_C_attribute
2282 @interface, 0, st_C_objprot
2283 @protocol, 0, st_C_objprot
2284 @implementation,0, st_C_objimpl
2285 @end, 0, st_C_objend
2286 import, (C_JAVA & ~C_PLPL), st_C_ignore
2287 package, (C_JAVA & ~C_PLPL), st_C_ignore
2288 friend, C_PLPL, st_C_ignore
2289 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2290 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2291 interface, (C_JAVA & ~C_PLPL), st_C_struct
2292 class, 0, st_C_class
2293 namespace, C_PLPL, st_C_struct
2294 domain, C_STAR, st_C_struct
2295 union, 0, st_C_struct
2296 struct, 0, st_C_struct
2297 extern, 0, st_C_extern
2299 typedef, 0, st_C_typedef
2300 define, 0, st_C_define
2301 undef, 0, st_C_define
2302 operator, C_PLPL, st_C_operator
2303 template, 0, st_C_template
2304 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2305 DEFUN, 0, st_C_gnumacro
2306 SYSCALL, 0, st_C_gnumacro
2307 ENTRY, 0, st_C_gnumacro
2308 PSEUDO, 0, st_C_gnumacro
2309 # These are defined inside C functions, so currently they are not met.
2310 # EXFUN used in glibc, DEFVAR_* in emacs.
2311 #EXFUN, 0, st_C_gnumacro
2312 #DEFVAR_, 0, st_C_gnumacro
2314 and replace lines between %< and %> with its output, then:
2315 - remove the #if characterset check
2316 - make in_word_set static and not inline. */
2318 /* C code produced by gperf version 3.0.1 */
2319 /* Command-line: gperf -m 5 */
2320 /* Computed positions: -k'2-3' */
2322 struct C_stab_entry
{ const char *name
; int c_ext
; enum sym_type type
; };
2323 /* maximum key range = 33, duplicates = 0 */
2326 hash (const char *str
, int len
)
2328 static char const asso_values
[] =
2330 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2331 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2332 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2333 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2334 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2335 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2336 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2337 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2338 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2339 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2340 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2341 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2342 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2343 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2344 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2345 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2346 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2347 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2348 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2349 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2350 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2351 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2352 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2353 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2354 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2355 35, 35, 35, 35, 35, 35
2362 hval
+= asso_values
[(unsigned char) str
[2]];
2365 hval
+= asso_values
[(unsigned char) str
[1]];
2371 static struct C_stab_entry
*
2372 in_word_set (register const char *str
, register unsigned int len
)
2376 TOTAL_KEYWORDS
= 33,
2377 MIN_WORD_LENGTH
= 2,
2378 MAX_WORD_LENGTH
= 15,
2383 static struct C_stab_entry wordlist
[] =
2386 {"if", 0, st_C_ignore
},
2387 {"GTY", 0, st_C_attribute
},
2388 {"@end", 0, st_C_objend
},
2389 {"union", 0, st_C_struct
},
2390 {"define", 0, st_C_define
},
2391 {"import", (C_JAVA
& ~C_PLPL
), st_C_ignore
},
2392 {"template", 0, st_C_template
},
2393 {"operator", C_PLPL
, st_C_operator
},
2394 {"@interface", 0, st_C_objprot
},
2395 {"implements", (C_JAVA
& ~C_PLPL
), st_C_javastruct
},
2396 {"friend", C_PLPL
, st_C_ignore
},
2397 {"typedef", 0, st_C_typedef
},
2398 {"return", 0, st_C_ignore
},
2399 {"@implementation",0, st_C_objimpl
},
2400 {"@protocol", 0, st_C_objprot
},
2401 {"interface", (C_JAVA
& ~C_PLPL
), st_C_struct
},
2402 {"extern", 0, st_C_extern
},
2403 {"extends", (C_JAVA
& ~C_PLPL
), st_C_javastruct
},
2404 {"struct", 0, st_C_struct
},
2405 {"domain", C_STAR
, st_C_struct
},
2406 {"switch", 0, st_C_ignore
},
2407 {"enum", 0, st_C_enum
},
2408 {"for", 0, st_C_ignore
},
2409 {"namespace", C_PLPL
, st_C_struct
},
2410 {"class", 0, st_C_class
},
2411 {"while", 0, st_C_ignore
},
2412 {"undef", 0, st_C_define
},
2413 {"package", (C_JAVA
& ~C_PLPL
), st_C_ignore
},
2414 {"__attribute__", 0, st_C_attribute
},
2415 {"SYSCALL", 0, st_C_gnumacro
},
2416 {"ENTRY", 0, st_C_gnumacro
},
2417 {"PSEUDO", 0, st_C_gnumacro
},
2418 {"DEFUN", 0, st_C_gnumacro
}
2421 if (len
<= MAX_WORD_LENGTH
&& len
>= MIN_WORD_LENGTH
)
2423 int key
= hash (str
, len
);
2425 if (key
<= MAX_HASH_VALUE
&& key
>= 0)
2427 const char *s
= wordlist
[key
].name
;
2429 if (*str
== *s
&& !strncmp (str
+ 1, s
+ 1, len
- 1) && s
[len
] == '\0')
2430 return &wordlist
[key
];
2437 static enum sym_type
2438 C_symtype (char *str
, int len
, int c_ext
)
2440 register struct C_stab_entry
*se
= in_word_set (str
, len
);
2442 if (se
== NULL
|| (se
->c_ext
&& !(c_ext
& se
->c_ext
)))
2449 * Ignoring __attribute__ ((list))
2451 static bool inattribute
; /* looking at an __attribute__ construct */
2454 * C functions and variables are recognized using a simple
2455 * finite automaton. fvdef is its state variable.
2459 fvnone
, /* nothing seen */
2460 fdefunkey
, /* Emacs DEFUN keyword seen */
2461 fdefunname
, /* Emacs DEFUN name seen */
2462 foperator
, /* func: operator keyword seen (cplpl) */
2463 fvnameseen
, /* function or variable name seen */
2464 fstartlist
, /* func: just after open parenthesis */
2465 finlist
, /* func: in parameter list */
2466 flistseen
, /* func: after parameter list */
2467 fignore
, /* func: before open brace */
2468 vignore
/* var-like: ignore until ';' */
2471 static bool fvextern
; /* func or var: extern keyword seen; */
2474 * typedefs are recognized using a simple finite automaton.
2475 * typdef is its state variable.
2479 tnone
, /* nothing seen */
2480 tkeyseen
, /* typedef keyword seen */
2481 ttypeseen
, /* defined type seen */
2482 tinbody
, /* inside typedef body */
2483 tend
, /* just before typedef tag */
2484 tignore
/* junk after typedef tag */
2488 * struct-like structures (enum, struct and union) are recognized
2489 * using another simple finite automaton. `structdef' is its state
2494 snone
, /* nothing seen yet,
2495 or in struct body if bracelev > 0 */
2496 skeyseen
, /* struct-like keyword seen */
2497 stagseen
, /* struct-like tag seen */
2498 scolonseen
/* colon seen after struct-like tag */
2502 * When objdef is different from onone, objtag is the name of the class.
2504 static const char *objtag
= "<uninited>";
2507 * Yet another little state machine to deal with preprocessor lines.
2511 dnone
, /* nothing seen */
2512 dsharpseen
, /* '#' seen as first char on line */
2513 ddefineseen
, /* '#' and 'define' seen */
2514 dignorerest
/* ignore rest of line */
2518 * State machine for Objective C protocols and implementations.
2519 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2523 onone
, /* nothing seen */
2524 oprotocol
, /* @interface or @protocol seen */
2525 oimplementation
, /* @implementations seen */
2526 otagseen
, /* class name seen */
2527 oparenseen
, /* parenthesis before category seen */
2528 ocatseen
, /* category name seen */
2529 oinbody
, /* in @implementation body */
2530 omethodsign
, /* in @implementation body, after +/- */
2531 omethodtag
, /* after method name */
2532 omethodcolon
, /* after method colon */
2533 omethodparm
, /* after method parameter */
2534 oignore
/* wait for @end */
2539 * Use this structure to keep info about the token read, and how it
2540 * should be tagged. Used by the make_C_tag function to build a tag.
2544 char *line
; /* string containing the token */
2545 int offset
; /* where the token starts in LINE */
2546 int length
; /* token length */
2548 The previous members can be used to pass strings around for generic
2549 purposes. The following ones specifically refer to creating tags. In this
2550 case the token contained here is the pattern that will be used to create a
2553 bool valid
; /* do not create a tag; the token should be
2554 invalidated whenever a state machine is
2555 reset prematurely */
2556 bool named
; /* create a named tag */
2557 int lineno
; /* source line number of tag */
2558 long linepos
; /* source char number of tag */
2559 } token
; /* latest token read */
2562 * Variables and functions for dealing with nested structures.
2563 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2565 static void pushclass_above (int, char *, int);
2566 static void popclass_above (int);
2567 static void write_classname (linebuffer
*, const char *qualifier
);
2570 char **cname
; /* nested class names */
2571 int *bracelev
; /* nested class brace level */
2572 int nl
; /* class nesting level (elements used) */
2573 int size
; /* length of the array */
2574 } cstack
; /* stack for nested declaration tags */
2575 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2576 #define nestlev (cstack.nl)
2577 /* After struct keyword or in struct body, not inside a nested function. */
2578 #define instruct (structdef == snone && nestlev > 0 \
2579 && bracelev == cstack.bracelev[nestlev-1] + 1)
2582 pushclass_above (int bracelev
, char *str
, int len
)
2586 popclass_above (bracelev
);
2588 if (nl
>= cstack
.size
)
2590 int size
= cstack
.size
*= 2;
2591 xrnew (cstack
.cname
, size
, char *);
2592 xrnew (cstack
.bracelev
, size
, int);
2594 assert (nl
== 0 || cstack
.bracelev
[nl
-1] < bracelev
);
2595 cstack
.cname
[nl
] = (str
== NULL
) ? NULL
: savenstr (str
, len
);
2596 cstack
.bracelev
[nl
] = bracelev
;
2601 popclass_above (int bracelev
)
2605 for (nl
= cstack
.nl
- 1;
2606 nl
>= 0 && cstack
.bracelev
[nl
] >= bracelev
;
2609 free (cstack
.cname
[nl
]);
2615 write_classname (linebuffer
*cn
, const char *qualifier
)
2618 int qlen
= strlen (qualifier
);
2620 if (cstack
.nl
== 0 || cstack
.cname
[0] == NULL
)
2624 cn
->buffer
[0] = '\0';
2628 len
= strlen (cstack
.cname
[0]);
2629 linebuffer_setlen (cn
, len
);
2630 strcpy (cn
->buffer
, cstack
.cname
[0]);
2632 for (i
= 1; i
< cstack
.nl
; i
++)
2634 char *s
= cstack
.cname
[i
];
2637 linebuffer_setlen (cn
, len
+ qlen
+ strlen (s
));
2638 len
+= sprintf (cn
->buffer
+ len
, "%s%s", qualifier
, s
);
2643 static bool consider_token (char *, int, int, int *, int, int, bool *);
2644 static void make_C_tag (bool);
2648 * checks to see if the current token is at the start of a
2649 * function or variable, or corresponds to a typedef, or
2650 * is a struct/union/enum tag, or #define, or an enum constant.
2652 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2653 * with args. C_EXTP points to which language we are looking at.
2664 consider_token (char *str
, int len
, int c
, int *c_extp
,
2665 int bracelev
, int parlev
, bool *is_func_or_var
)
2666 /* IN: token pointer */
2667 /* IN: token length */
2668 /* IN: first char after the token */
2669 /* IN, OUT: C extensions mask */
2670 /* IN: brace level */
2671 /* IN: parenthesis level */
2672 /* OUT: function or variable found */
2674 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2675 structtype is the type of the preceding struct-like keyword, and
2676 structbracelev is the brace level where it has been seen. */
2677 static enum sym_type structtype
;
2678 static int structbracelev
;
2679 static enum sym_type toktype
;
2682 toktype
= C_symtype (str
, len
, *c_extp
);
2685 * Skip __attribute__
2687 if (toktype
== st_C_attribute
)
2694 * Advance the definedef state machine.
2699 /* We're not on a preprocessor line. */
2700 if (toktype
== st_C_gnumacro
)
2707 if (toktype
== st_C_define
)
2709 definedef
= ddefineseen
;
2713 definedef
= dignorerest
;
2718 * Make a tag for any macro, unless it is a constant
2719 * and constantypedefs is false.
2721 definedef
= dignorerest
;
2722 *is_func_or_var
= (c
== '(');
2723 if (!*is_func_or_var
&& !constantypedefs
)
2730 error ("internal error: definedef value.");
2739 if (toktype
== st_C_typedef
)
2759 if (structdef
== snone
&& fvdef
== fvnone
)
2778 case st_C_javastruct
:
2779 if (structdef
== stagseen
)
2780 structdef
= scolonseen
;
2784 if ((*c_extp
& C_AUTO
) /* automatic detection of C++ language */
2786 && definedef
== dnone
&& structdef
== snone
2787 && typdef
== tnone
&& fvdef
== fvnone
)
2788 *c_extp
= (*c_extp
| C_PLPL
) & ~C_AUTO
;
2789 if (toktype
== st_C_template
)
2796 && (typdef
== tkeyseen
2797 || (typedefs_or_cplusplus
&& structdef
== snone
)))
2799 structdef
= skeyseen
;
2800 structtype
= toktype
;
2801 structbracelev
= bracelev
;
2802 if (fvdef
== fvnameseen
)
2808 if (structdef
== skeyseen
)
2810 structdef
= stagseen
;
2814 if (typdef
!= tnone
)
2817 /* Detect Objective C constructs. */
2827 objdef
= oimplementation
;
2831 case oimplementation
:
2832 /* Save the class tag for functions or variables defined inside. */
2833 objtag
= savenstr (str
, len
);
2837 /* Save the class tag for categories. */
2838 objtag
= savenstr (str
, len
);
2840 *is_func_or_var
= true;
2844 *is_func_or_var
= true;
2852 objdef
= omethodtag
;
2853 linebuffer_setlen (&token_name
, len
);
2854 memcpy (token_name
.buffer
, str
, len
);
2855 token_name
.buffer
[len
] = '\0';
2861 objdef
= omethodparm
;
2866 objdef
= omethodtag
;
2869 int oldlen
= token_name
.len
;
2871 linebuffer_setlen (&token_name
, oldlen
+ len
);
2872 memcpy (token_name
.buffer
+ oldlen
, str
, len
);
2873 token_name
.buffer
[oldlen
+ len
] = '\0';
2879 if (toktype
== st_C_objend
)
2881 /* Memory leakage here: the string pointed by objtag is
2882 never released, because many tests would be needed to
2883 avoid breaking on incorrect input code. The amount of
2884 memory leaked here is the sum of the lengths of the
2892 /* A function, variable or enum constant? */
2914 *is_func_or_var
= true;
2918 && structdef
== snone
2919 && structtype
== st_C_enum
&& bracelev
> structbracelev
2920 /* Don't tag tokens in expressions that assign values to enum
2922 && fvdef
!= vignore
)
2923 return true; /* enum constant */
2929 fvdef
= fdefunname
; /* GNU macro */
2930 *is_func_or_var
= true;
2938 if ((strneq (str
, "asm", 3) && endtoken (str
[3]))
2939 || (strneq (str
, "__asm__", 7) && endtoken (str
[7])))
2948 if (len
>= 10 && strneq (str
+len
-10, "::operator", 10))
2950 if (*c_extp
& C_AUTO
) /* automatic detection of C++ */
2951 *c_extp
= (*c_extp
| C_PLPL
) & ~C_AUTO
;
2953 *is_func_or_var
= true;
2956 if (bracelev
> 0 && !instruct
)
2958 fvdef
= fvnameseen
; /* function or variable */
2959 *is_func_or_var
= true;
2970 * C_entries often keeps pointers to tokens or lines which are older than
2971 * the line currently read. By keeping two line buffers, and switching
2972 * them at end of line, it is possible to use those pointers.
2980 #define current_lb_is_new (newndx == curndx)
2981 #define switch_line_buffers() (curndx = 1 - curndx)
2983 #define curlb (lbs[curndx].lb)
2984 #define newlb (lbs[newndx].lb)
2985 #define curlinepos (lbs[curndx].linepos)
2986 #define newlinepos (lbs[newndx].linepos)
2988 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2989 #define cplpl (c_ext & C_PLPL)
2990 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2992 #define CNL_SAVE_DEFINEDEF() \
2994 curlinepos = charno; \
2995 readline (&curlb, inf); \
2996 lp = curlb.buffer; \
3003 CNL_SAVE_DEFINEDEF (); \
3004 if (savetoken.valid) \
3006 token = savetoken; \
3007 savetoken.valid = false; \
3009 definedef = dnone; \
3014 make_C_tag (bool isfun
)
3016 /* This function is never called when token.valid is false, but
3017 we must protect against invalid input or internal errors. */
3019 make_tag (token_name
.buffer
, token_name
.len
, isfun
, token
.line
,
3020 token
.offset
+token
.length
+1, token
.lineno
, token
.linepos
);
3022 { /* this branch is optimized away if !DEBUG */
3023 make_tag (concat ("INVALID TOKEN:-->", token_name
.buffer
, ""),
3024 token_name
.len
+ 17, isfun
, token
.line
,
3025 token
.offset
+token
.length
+1, token
.lineno
, token
.linepos
);
3026 error ("INVALID TOKEN");
3029 token
.valid
= false;
3033 perhaps_more_input (FILE *inf
)
3035 return !feof (inf
) && !ferror (inf
);
3041 * This routine finds functions, variables, typedefs,
3042 * #define's, enum constants and struct/union/enum definitions in
3043 * C syntax and adds them to the list.
3046 C_entries (int c_ext
, FILE *inf
)
3047 /* extension of C */
3050 register char c
; /* latest char read; '\0' for end of line */
3051 register char *lp
; /* pointer one beyond the character `c' */
3052 int curndx
, newndx
; /* indices for current and new lb */
3053 register int tokoff
; /* offset in line of start of current token */
3054 register int toklen
; /* length of current token */
3055 const char *qualifier
; /* string used to qualify names */
3056 int qlen
; /* length of qualifier */
3057 int bracelev
; /* current brace level */
3058 int bracketlev
; /* current bracket level */
3059 int parlev
; /* current parenthesis level */
3060 int attrparlev
; /* __attribute__ parenthesis level */
3061 int templatelev
; /* current template level */
3062 int typdefbracelev
; /* bracelev where a typedef struct body begun */
3063 bool incomm
, inquote
, inchar
, quotednl
, midtoken
;
3064 bool yacc_rules
; /* in the rules part of a yacc file */
3065 struct tok savetoken
= {0}; /* token saved during preprocessor handling */
3068 linebuffer_init (&lbs
[0].lb
);
3069 linebuffer_init (&lbs
[1].lb
);
3070 if (cstack
.size
== 0)
3072 cstack
.size
= (DEBUG
) ? 1 : 4;
3074 cstack
.cname
= xnew (cstack
.size
, char *);
3075 cstack
.bracelev
= xnew (cstack
.size
, int);
3078 tokoff
= toklen
= typdefbracelev
= 0; /* keep compiler quiet */
3079 curndx
= newndx
= 0;
3083 fvdef
= fvnone
; fvextern
= false; typdef
= tnone
;
3084 structdef
= snone
; definedef
= dnone
; objdef
= onone
;
3086 midtoken
= inquote
= inchar
= incomm
= quotednl
= false;
3087 token
.valid
= savetoken
.valid
= false;
3088 bracelev
= bracketlev
= parlev
= attrparlev
= templatelev
= 0;
3090 { qualifier
= "."; qlen
= 1; }
3092 { qualifier
= "::"; qlen
= 2; }
3095 while (perhaps_more_input (inf
))
3100 /* If we are at the end of the line, the next character is a
3101 '\0'; do not skip it, because it is what tells us
3102 to read the next line. */
3123 /* Newlines inside comments do not end macro definitions in
3125 CNL_SAVE_DEFINEDEF ();
3138 /* Newlines inside strings do not end macro definitions
3139 in traditional cpp, even though compilers don't
3140 usually accept them. */
3141 CNL_SAVE_DEFINEDEF ();
3151 /* Hmmm, something went wrong. */
3187 if (fvdef
!= finlist
&& fvdef
!= fignore
&& fvdef
!= vignore
)
3202 else if (/* cplpl && */ *lp
== '/')
3208 if ((c_ext
& YACC
) && *lp
== '%')
3210 /* Entering or exiting rules section in yacc file. */
3212 definedef
= dnone
; fvdef
= fvnone
; fvextern
= false;
3213 typdef
= tnone
; structdef
= snone
;
3214 midtoken
= inquote
= inchar
= incomm
= quotednl
= false;
3216 yacc_rules
= !yacc_rules
;
3222 if (definedef
== dnone
)
3225 bool cpptoken
= true;
3227 /* Look back on this line. If all blanks, or nonblanks
3228 followed by an end of comment, this is a preprocessor
3230 for (cp
= newlb
.buffer
; cp
< lp
-1; cp
++)
3231 if (!c_isspace (*cp
))
3233 if (*cp
== '*' && cp
[1] == '/')
3243 definedef
= dsharpseen
;
3244 /* This is needed for tagging enum values: when there are
3245 preprocessor conditionals inside the enum, we need to
3246 reset the value of fvdef so that the next enum value is
3247 tagged even though the one before it did not end in a
3249 if (fvdef
== vignore
&& instruct
&& parlev
== 0)
3251 if (strneq (cp
, "#if", 3) || strneq (cp
, "#el", 3))
3255 } /* if (definedef == dnone) */
3266 CNL_SAVE_DEFINEDEF ();
3273 /* Consider token only if some involved conditions are satisfied. */
3274 if (typdef
!= tignore
3275 && definedef
!= dignorerest
3278 && (definedef
!= dnone
3279 || structdef
!= scolonseen
)
3286 if (c
== ':' && *lp
== ':' && begtoken (lp
[1]))
3287 /* This handles :: in the middle,
3288 but not at the beginning of an identifier.
3289 Also, space-separated :: is not recognized. */
3291 if (c_ext
& C_AUTO
) /* automatic detection of C++ */
3292 c_ext
= (c_ext
| C_PLPL
) & ~C_AUTO
;
3296 goto still_in_token
;
3300 bool funorvar
= false;
3303 || consider_token (newlb
.buffer
+ tokoff
, toklen
, c
,
3304 &c_ext
, bracelev
, parlev
,
3307 if (fvdef
== foperator
)
3310 lp
= skip_spaces (lp
-1);
3314 && !c_isspace (*lp
) && *lp
!= '(')
3317 toklen
+= lp
- oldlp
;
3319 token
.named
= false;
3321 && nestlev
> 0 && definedef
== dnone
)
3322 /* in struct body */
3327 write_classname (&token_name
, qualifier
);
3328 len
= token_name
.len
;
3329 linebuffer_setlen (&token_name
,
3330 len
+ qlen
+ toklen
);
3331 sprintf (token_name
.buffer
+ len
, "%s%.*s",
3333 newlb
.buffer
+ tokoff
);
3337 linebuffer_setlen (&token_name
, toklen
);
3338 sprintf (token_name
.buffer
, "%.*s",
3339 toklen
, newlb
.buffer
+ tokoff
);
3343 else if (objdef
== ocatseen
)
3344 /* Objective C category */
3348 int len
= strlen (objtag
) + 2 + toklen
;
3349 linebuffer_setlen (&token_name
, len
);
3350 sprintf (token_name
.buffer
, "%s(%.*s)",
3352 newlb
.buffer
+ tokoff
);
3356 linebuffer_setlen (&token_name
, toklen
);
3357 sprintf (token_name
.buffer
, "%.*s",
3358 toklen
, newlb
.buffer
+ tokoff
);
3362 else if (objdef
== omethodtag
3363 || objdef
== omethodparm
)
3364 /* Objective C method */
3368 else if (fvdef
== fdefunname
)
3369 /* GNU DEFUN and similar macros */
3371 bool defun
= (newlb
.buffer
[tokoff
] == 'F');
3375 /* Rewrite the tag so that emacs lisp DEFUNs
3376 can be found by their elisp name */
3382 linebuffer_setlen (&token_name
, len
);
3383 memcpy (token_name
.buffer
,
3384 newlb
.buffer
+ off
, len
);
3385 token_name
.buffer
[len
] = '\0';
3388 if (token_name
.buffer
[len
] == '_')
3389 token_name
.buffer
[len
] = '-';
3390 token
.named
= defun
;
3394 linebuffer_setlen (&token_name
, toklen
);
3395 memcpy (token_name
.buffer
,
3396 newlb
.buffer
+ tokoff
, toklen
);
3397 token_name
.buffer
[toklen
] = '\0';
3398 /* Name macros and members. */
3399 token
.named
= (structdef
== stagseen
3400 || typdef
== ttypeseen
3403 && definedef
== dignorerest
)
3405 && definedef
== dnone
3406 && structdef
== snone
3409 token
.lineno
= lineno
;
3410 token
.offset
= tokoff
;
3411 token
.length
= toklen
;
3412 token
.line
= newlb
.buffer
;
3413 token
.linepos
= newlinepos
;
3416 if (definedef
== dnone
3417 && (fvdef
== fvnameseen
3418 || fvdef
== foperator
3419 || structdef
== stagseen
3421 || typdef
== ttypeseen
3422 || objdef
!= onone
))
3424 if (current_lb_is_new
)
3425 switch_line_buffers ();
3427 else if (definedef
!= dnone
3428 || fvdef
== fdefunname
3430 make_C_tag (funorvar
);
3432 else /* not yacc and consider_token failed */
3434 if (inattribute
&& fvdef
== fignore
)
3436 /* We have just met __attribute__ after a
3437 function parameter list: do not tag the
3444 } /* if (endtoken (c)) */
3445 else if (intoken (c
))
3451 } /* if (midtoken) */
3452 else if (begtoken (c
))
3460 /* This prevents tagging fb in
3461 void (__attribute__((noreturn)) *fb) (void);
3462 Fixing this is not easy and not very important. */
3466 if (plainc
|| declarations
)
3468 make_C_tag (true); /* a function */
3473 if (structdef
== stagseen
&& !cjava
)
3475 popclass_above (bracelev
);
3483 if (!yacc_rules
|| lp
== newlb
.buffer
+ 1)
3485 tokoff
= lp
- 1 - newlb
.buffer
;
3490 } /* if (begtoken) */
3491 } /* if must look at token */
3494 /* Detect end of line, colon, comma, semicolon and various braces
3495 after having handled a token.*/
3501 if (yacc_rules
&& token
.offset
== 0 && token
.valid
)
3503 make_C_tag (false); /* a yacc function */
3506 if (definedef
!= dnone
)
3512 make_C_tag (true); /* an Objective C class */
3516 objdef
= omethodcolon
;
3519 int toklen
= token_name
.len
;
3520 linebuffer_setlen (&token_name
, toklen
+ 1);
3521 strcpy (token_name
.buffer
+ toklen
, ":");
3525 if (structdef
== stagseen
)
3527 structdef
= scolonseen
;
3530 /* Should be useless, but may be work as a safety net. */
3531 if (cplpl
&& fvdef
== flistseen
)
3533 make_C_tag (true); /* a function */
3539 if (definedef
!= dnone
|| inattribute
)
3545 make_C_tag (false); /* a typedef */
3555 if (typdef
== tignore
|| cplpl
)
3559 if ((globals
&& bracelev
== 0 && (!fvextern
|| declarations
))
3560 || (members
&& instruct
))
3561 make_C_tag (false); /* a variable */
3564 token
.valid
= false;
3568 && (cplpl
|| !instruct
)
3569 && (typdef
== tnone
|| (typdef
!= tignore
&& instruct
)))
3571 && plainc
&& instruct
))
3572 make_C_tag (true); /* a function */
3578 && cplpl
&& structdef
== stagseen
)
3579 make_C_tag (false); /* forward declaration */
3581 token
.valid
= false;
3582 } /* switch (fvdef) */
3588 if (structdef
== stagseen
)
3592 if (definedef
!= dnone
|| inattribute
)
3598 make_C_tag (true); /* an Objective C method */
3611 if (instruct
&& parlev
== 0)
3622 && (!fvextern
|| declarations
))
3623 || (members
&& instruct
)))
3624 make_C_tag (false); /* a variable */
3627 if ((declarations
&& typdef
== tnone
&& !instruct
)
3628 || (members
&& typdef
!= tignore
&& instruct
))
3630 make_C_tag (true); /* a function */
3633 else if (!declarations
)
3635 token
.valid
= false;
3640 if (structdef
== stagseen
)
3644 if (definedef
!= dnone
|| inattribute
)
3646 if (structdef
== stagseen
)
3653 make_C_tag (false); /* a typedef */
3665 if ((members
&& bracelev
== 1)
3666 || (globals
&& bracelev
== 0
3667 && (!fvextern
|| declarations
)))
3668 make_C_tag (false); /* a variable */
3682 if (definedef
!= dnone
)
3684 if (objdef
== otagseen
&& parlev
== 0)
3685 objdef
= oparenseen
;
3689 if (typdef
== ttypeseen
3693 /* This handles constructs like:
3694 typedef void OperatorFun (int fun); */
3713 if (--attrparlev
== 0)
3714 inattribute
= false;
3717 if (definedef
!= dnone
)
3719 if (objdef
== ocatseen
&& parlev
== 1)
3721 make_C_tag (true); /* an Objective C category */
3735 || typdef
== ttypeseen
))
3738 make_C_tag (false); /* a typedef */
3741 else if (parlev
< 0) /* can happen due to ill-conceived #if's. */
3745 if (definedef
!= dnone
)
3747 if (typdef
== ttypeseen
)
3749 /* Whenever typdef is set to tinbody (currently only
3750 here), typdefbracelev should be set to bracelev. */
3752 typdefbracelev
= bracelev
;
3757 if (cplpl
&& !class_qualify
)
3759 /* Remove class and namespace qualifiers from the token,
3760 leaving only the method/member name. */
3761 char *cc
, *uqname
= token_name
.buffer
;
3762 char *tok_end
= token_name
.buffer
+ token_name
.len
;
3764 for (cc
= token_name
.buffer
; cc
< tok_end
; cc
++)
3766 if (*cc
== ':' && cc
[1] == ':')
3772 if (uqname
> token_name
.buffer
)
3774 int uqlen
= strlen (uqname
);
3775 linebuffer_setlen (&token_name
, uqlen
);
3776 memmove (token_name
.buffer
, uqname
, uqlen
+ 1);
3779 make_C_tag (true); /* a function */
3788 make_C_tag (true); /* an Objective C class */
3793 make_C_tag (true); /* an Objective C method */
3797 /* Neutralize `extern "C" {' grot. */
3798 if (bracelev
== 0 && structdef
== snone
&& nestlev
== 0
3806 case skeyseen
: /* unnamed struct */
3807 pushclass_above (bracelev
, NULL
, 0);
3810 case stagseen
: /* named struct or enum */
3811 case scolonseen
: /* a class */
3812 pushclass_above (bracelev
,token
.line
+token
.offset
, token
.length
);
3814 make_C_tag (false); /* a struct or enum */
3820 if (definedef
!= dnone
)
3822 if (fvdef
== fstartlist
)
3824 fvdef
= fvnone
; /* avoid tagging `foo' in `foo (*bar()) ()' */
3825 token
.valid
= false;
3829 if (definedef
!= dnone
)
3832 if (!ignoreindent
&& lp
== newlb
.buffer
+ 1)
3835 token
.valid
= false; /* unexpected value, token unreliable */
3836 bracelev
= 0; /* reset brace level if first column */
3837 parlev
= 0; /* also reset paren level, just in case... */
3839 else if (bracelev
< 0)
3841 token
.valid
= false; /* something gone amiss, token unreliable */
3844 if (bracelev
== 0 && fvdef
== vignore
)
3845 fvdef
= fvnone
; /* end of function */
3846 popclass_above (bracelev
);
3848 /* Only if typdef == tinbody is typdefbracelev significant. */
3849 if (typdef
== tinbody
&& bracelev
<= typdefbracelev
)
3851 assert (bracelev
== typdefbracelev
);
3856 if (definedef
!= dnone
)
3866 if ((members
&& bracelev
== 1)
3867 || (globals
&& bracelev
== 0 && (!fvextern
|| declarations
)))
3868 make_C_tag (false); /* a variable */
3876 && (structdef
== stagseen
|| fvdef
== fvnameseen
))
3883 if (templatelev
> 0)
3891 if (objdef
== oinbody
&& bracelev
== 0)
3893 objdef
= omethodsign
;
3898 case '#': case '~': case '&': case '%': case '/':
3899 case '|': case '^': case '!': case '.': case '?':
3900 if (definedef
!= dnone
)
3902 /* These surely cannot follow a function tag in C. */
3915 if (objdef
== otagseen
)
3917 make_C_tag (true); /* an Objective C class */
3920 /* If a macro spans multiple lines don't reset its state. */
3922 CNL_SAVE_DEFINEDEF ();
3928 } /* while not eof */
3930 free (lbs
[0].lb
.buffer
);
3931 free (lbs
[1].lb
.buffer
);
3935 * Process either a C++ file or a C file depending on the setting
3939 default_C_entries (FILE *inf
)
3941 C_entries (cplusplus
? C_PLPL
: C_AUTO
, inf
);
3944 /* Always do plain C. */
3946 plain_C_entries (FILE *inf
)
3951 /* Always do C++. */
3953 Cplusplus_entries (FILE *inf
)
3955 C_entries (C_PLPL
, inf
);
3958 /* Always do Java. */
3960 Cjava_entries (FILE *inf
)
3962 C_entries (C_JAVA
, inf
);
3967 Cstar_entries (FILE *inf
)
3969 C_entries (C_STAR
, inf
);
3972 /* Always do Yacc. */
3974 Yacc_entries (FILE *inf
)
3976 C_entries (YACC
, inf
);
3980 /* Useful macros. */
3981 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3982 while (perhaps_more_input (file_pointer) \
3983 && (readline (&(line_buffer), file_pointer), \
3984 (char_pointer) = (line_buffer).buffer, \
3987 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
3988 ((assert ("" kw), true) /* syntax error if not a literal string */ \
3989 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3990 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
3991 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3993 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3994 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3995 ((assert ("" kw), true) /* syntax error if not a literal string */ \
3996 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3997 && ((cp) += sizeof (kw)-1)) /* skip spaces */
4000 * Read a file, but do no processing. This is used to do regexp
4001 * matching on files that have no language defined.
4004 just_read_file (FILE *inf
)
4006 while (perhaps_more_input (inf
))
4007 readline (&lb
, inf
);
4011 /* Fortran parsing */
4013 static void F_takeprec (void);
4014 static void F_getit (FILE *);
4019 dbp
= skip_spaces (dbp
);
4023 dbp
= skip_spaces (dbp
);
4024 if (strneq (dbp
, "(*)", 3))
4029 if (!c_isdigit (*dbp
))
4031 --dbp
; /* force failure */
4036 while (c_isdigit (*dbp
));
4044 dbp
= skip_spaces (dbp
);
4047 readline (&lb
, inf
);
4052 dbp
= skip_spaces (dbp
);
4054 if (!c_isalpha (*dbp
) && *dbp
!= '_' && *dbp
!= '$')
4056 for (cp
= dbp
+ 1; *cp
!= '\0' && intoken (*cp
); cp
++)
4058 make_tag (dbp
, cp
-dbp
, true,
4059 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4064 Fortran_functions (FILE *inf
)
4066 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
4069 dbp
++; /* Ratfor escape to fortran */
4070 dbp
= skip_spaces (dbp
);
4074 if (LOOKING_AT_NOCASE (dbp
, "recursive"))
4075 dbp
= skip_spaces (dbp
);
4077 if (LOOKING_AT_NOCASE (dbp
, "pure"))
4078 dbp
= skip_spaces (dbp
);
4080 if (LOOKING_AT_NOCASE (dbp
, "elemental"))
4081 dbp
= skip_spaces (dbp
);
4083 switch (c_tolower (*dbp
))
4086 if (nocase_tail ("integer"))
4090 if (nocase_tail ("real"))
4094 if (nocase_tail ("logical"))
4098 if (nocase_tail ("complex") || nocase_tail ("character"))
4102 if (nocase_tail ("double"))
4104 dbp
= skip_spaces (dbp
);
4107 if (nocase_tail ("precision"))
4113 dbp
= skip_spaces (dbp
);
4116 switch (c_tolower (*dbp
))
4119 if (nocase_tail ("function"))
4123 if (nocase_tail ("subroutine"))
4127 if (nocase_tail ("entry"))
4131 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4133 dbp
= skip_spaces (dbp
);
4134 if (*dbp
== '\0') /* assume un-named */
4135 make_tag ("blockdata", 9, true,
4136 lb
.buffer
, dbp
- lb
.buffer
, lineno
, linecharno
);
4138 F_getit (inf
); /* look for name */
4149 * Philippe Waroquiers (1998)
4152 /* Once we are positioned after an "interesting" keyword, let's get
4153 the real tag value necessary. */
4155 Ada_getit (FILE *inf
, const char *name_qualifier
)
4161 while (perhaps_more_input (inf
))
4163 dbp
= skip_spaces (dbp
);
4165 || (dbp
[0] == '-' && dbp
[1] == '-'))
4167 readline (&lb
, inf
);
4170 switch (c_tolower (*dbp
))
4173 if (nocase_tail ("body"))
4175 /* Skipping body of procedure body or package body or ....
4176 resetting qualifier to body instead of spec. */
4177 name_qualifier
= "/b";
4182 /* Skipping type of task type or protected type ... */
4183 if (nocase_tail ("type"))
4190 for (cp
= dbp
; *cp
!= '\0' && *cp
!= '"'; cp
++)
4195 dbp
= skip_spaces (dbp
);
4197 c_isalnum (*cp
) || *cp
== '_' || *cp
== '.';
4205 name
= concat (dbp
, name_qualifier
, "");
4207 make_tag (name
, strlen (name
), true,
4208 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4217 Ada_funcs (FILE *inf
)
4219 bool inquote
= false;
4220 bool skip_till_semicolumn
= false;
4222 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
4224 while (*dbp
!= '\0')
4226 /* Skip a string i.e. "abcd". */
4227 if (inquote
|| (*dbp
== '"'))
4229 dbp
= strchr (dbp
+ !inquote
, '"');
4234 continue; /* advance char */
4239 break; /* advance line */
4243 /* Skip comments. */
4244 if (dbp
[0] == '-' && dbp
[1] == '-')
4245 break; /* advance line */
4247 /* Skip character enclosed in single quote i.e. 'a'
4248 and skip single quote starting an attribute i.e. 'Image. */
4257 if (skip_till_semicolumn
)
4260 skip_till_semicolumn
= false;
4262 continue; /* advance char */
4265 /* Search for beginning of a token. */
4266 if (!begtoken (*dbp
))
4269 continue; /* advance char */
4272 /* We are at the beginning of a token. */
4273 switch (c_tolower (*dbp
))
4276 if (!packages_only
&& nocase_tail ("function"))
4277 Ada_getit (inf
, "/f");
4279 break; /* from switch */
4280 continue; /* advance char */
4282 if (!packages_only
&& nocase_tail ("procedure"))
4283 Ada_getit (inf
, "/p");
4284 else if (nocase_tail ("package"))
4285 Ada_getit (inf
, "/s");
4286 else if (nocase_tail ("protected")) /* protected type */
4287 Ada_getit (inf
, "/t");
4289 break; /* from switch */
4290 continue; /* advance char */
4293 if (typedefs
&& !packages_only
&& nocase_tail ("use"))
4295 /* when tagging types, avoid tagging use type Pack.Typename;
4296 for this, we will skip everything till a ; */
4297 skip_till_semicolumn
= true;
4298 continue; /* advance char */
4302 if (!packages_only
&& nocase_tail ("task"))
4303 Ada_getit (inf
, "/k");
4304 else if (typedefs
&& !packages_only
&& nocase_tail ("type"))
4306 Ada_getit (inf
, "/t");
4307 while (*dbp
!= '\0')
4311 break; /* from switch */
4312 continue; /* advance char */
4315 /* Look for the end of the token. */
4316 while (!endtoken (*dbp
))
4319 } /* advance char */
4320 } /* advance line */
4325 * Unix and microcontroller assembly tag handling
4326 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4327 * Idea by Bob Weiner, Motorola Inc. (1994)
4330 Asm_labels (FILE *inf
)
4334 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4336 /* If first char is alphabetic or one of [_.$], test for colon
4337 following identifier. */
4338 if (c_isalpha (*cp
) || *cp
== '_' || *cp
== '.' || *cp
== '$')
4340 /* Read past label. */
4342 while (c_isalnum (*cp
) || *cp
== '_' || *cp
== '.' || *cp
== '$')
4344 if (*cp
== ':' || c_isspace (*cp
))
4345 /* Found end of label, so copy it and add it to the table. */
4346 make_tag (lb
.buffer
, cp
- lb
.buffer
, true,
4347 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4355 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4356 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4357 * Perl variable names: /^(my|local).../
4358 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4359 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4360 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4363 Perl_functions (FILE *inf
)
4365 char *package
= savestr ("main"); /* current package name */
4368 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4370 cp
= skip_spaces (cp
);
4372 if (LOOKING_AT (cp
, "package"))
4375 get_tag (cp
, &package
);
4377 else if (LOOKING_AT (cp
, "sub"))
4383 while (!notinname (*cp
))
4386 continue; /* nothing found */
4387 pos
= strchr (sp
, ':');
4388 if (pos
&& pos
< cp
&& pos
[1] == ':')
4389 /* The name is already qualified. */
4390 make_tag (sp
, cp
- sp
, true,
4391 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4395 char savechar
, *name
;
4399 name
= concat (package
, "::", sp
);
4401 make_tag (name
, strlen (name
), true,
4402 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4406 else if (LOOKING_AT (cp
, "use constant")
4407 || LOOKING_AT (cp
, "use constant::defer"))
4409 /* For hash style multi-constant like
4410 use constant { FOO => 123,
4412 only the first FOO is picked up. Parsing across the value
4413 expressions would be difficult in general, due to possible nested
4414 hashes, here-documents, etc. */
4416 cp
= skip_spaces (cp
+1);
4419 else if (globals
) /* only if we are tagging global vars */
4421 /* Skip a qualifier, if any. */
4422 bool qual
= LOOKING_AT (cp
, "my") || LOOKING_AT (cp
, "local");
4423 /* After "my" or "local", but before any following paren or space. */
4424 char *varstart
= cp
;
4426 if (qual
/* should this be removed? If yes, how? */
4427 && (*cp
== '$' || *cp
== '@' || *cp
== '%'))
4432 while (c_isalnum (*cp
) || *cp
== '_');
4436 /* Should be examining a variable list at this point;
4437 could insist on seeing an open parenthesis. */
4438 while (*cp
!= '\0' && *cp
!= ';' && *cp
!= '=' && *cp
!= ')')
4444 make_tag (varstart
, cp
- varstart
, false,
4445 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4454 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4455 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4456 * More ideas by seb bacon <seb@jamkit.com> (2002)
4459 Python_functions (FILE *inf
)
4463 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4465 cp
= skip_spaces (cp
);
4466 if (LOOKING_AT (cp
, "def") || LOOKING_AT (cp
, "class"))
4469 while (!notinname (*cp
) && *cp
!= ':')
4471 make_tag (name
, cp
- name
, true,
4472 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4481 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4482 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4483 * - /^[ \t]*define\(\"[^\"]+/
4484 * Only with --members:
4485 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4486 * Idea by Diez B. Roggisch (2001)
4489 PHP_functions (FILE *inf
)
4492 bool search_identifier
= false;
4494 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4496 cp
= skip_spaces (cp
);
4498 if (search_identifier
4501 while (!notinname (*cp
))
4503 make_tag (name
, cp
- name
, true,
4504 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4505 search_identifier
= false;
4507 else if (LOOKING_AT (cp
, "function"))
4510 cp
= skip_spaces (cp
+1);
4514 while (!notinname (*cp
))
4516 make_tag (name
, cp
- name
, true,
4517 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4520 search_identifier
= true;
4522 else if (LOOKING_AT (cp
, "class"))
4527 while (*cp
!= '\0' && !c_isspace (*cp
))
4529 make_tag (name
, cp
- name
, false,
4530 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4533 search_identifier
= true;
4535 else if (strneq (cp
, "define", 6)
4536 && (cp
= skip_spaces (cp
+6))
4538 && (*cp
== '"' || *cp
== '\''))
4542 while (*cp
!= quote
&& *cp
!= '\0')
4544 make_tag (name
, cp
- name
, false,
4545 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4548 && LOOKING_AT (cp
, "var")
4552 while (!notinname (*cp
))
4554 make_tag (name
, cp
- name
, false,
4555 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4562 * Cobol tag functions
4563 * We could look for anything that could be a paragraph name.
4564 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4565 * Idea by Corny de Souza (1993)
4568 Cobol_paragraphs (FILE *inf
)
4570 register char *bp
, *ep
;
4572 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4578 /* If eoln, compiler option or comment ignore whole line. */
4579 if (bp
[-1] != ' ' || !c_isalnum (bp
[0]))
4582 for (ep
= bp
; c_isalnum (*ep
) || *ep
== '-'; ep
++)
4585 make_tag (bp
, ep
- bp
, true,
4586 lb
.buffer
, ep
- lb
.buffer
+ 1, lineno
, linecharno
);
4593 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4596 Makefile_targets (FILE *inf
)
4600 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4602 if (*bp
== '\t' || *bp
== '#')
4604 while (*bp
!= '\0' && *bp
!= '=' && *bp
!= ':')
4606 if (*bp
== ':' || (globals
&& *bp
== '='))
4608 /* We should detect if there is more than one tag, but we do not.
4609 We just skip initial and final spaces. */
4610 char * namestart
= skip_spaces (lb
.buffer
);
4611 while (--bp
> namestart
)
4612 if (!notinname (*bp
))
4614 make_tag (namestart
, bp
- namestart
+ 1, true,
4615 lb
.buffer
, bp
- lb
.buffer
+ 2, lineno
, linecharno
);
4623 * Original code by Mosur K. Mohan (1989)
4625 * Locates tags for procedures & functions. Doesn't do any type- or
4626 * var-definitions. It does look for the keyword "extern" or
4627 * "forward" immediately following the procedure statement; if found,
4628 * the tag is skipped.
4631 Pascal_functions (FILE *inf
)
4633 linebuffer tline
; /* mostly copied from C_entries */
4635 int save_lineno
, namelen
, taglen
;
4638 bool /* each of these flags is true if: */
4639 incomment
, /* point is inside a comment */
4640 inquote
, /* point is inside '..' string */
4641 get_tagname
, /* point is after PROCEDURE/FUNCTION
4642 keyword, so next item = potential tag */
4643 found_tag
, /* point is after a potential tag */
4644 inparms
, /* point is within parameter-list */
4645 verify_tag
; /* point has passed the parm-list, so the
4646 next token will determine whether this
4647 is a FORWARD/EXTERN to be ignored, or
4648 whether it is a real tag */
4650 save_lcno
= save_lineno
= namelen
= taglen
= 0; /* keep compiler quiet */
4651 name
= NULL
; /* keep compiler quiet */
4654 linebuffer_init (&tline
);
4656 incomment
= inquote
= false;
4657 found_tag
= false; /* have a proc name; check if extern */
4658 get_tagname
= false; /* found "procedure" keyword */
4659 inparms
= false; /* found '(' after "proc" */
4660 verify_tag
= false; /* check if "extern" is ahead */
4663 while (perhaps_more_input (inf
)) /* long main loop to get next char */
4666 if (c
== '\0') /* if end of line */
4668 readline (&lb
, inf
);
4672 if (!((found_tag
&& verify_tag
)
4674 c
= *dbp
++; /* only if don't need *dbp pointing
4675 to the beginning of the name of
4676 the procedure or function */
4680 if (c
== '}') /* within { } comments */
4682 else if (c
== '*' && *dbp
== ')') /* within (* *) comments */
4699 inquote
= true; /* found first quote */
4701 case '{': /* found open { comment */
4705 if (*dbp
== '*') /* found open (* comment */
4710 else if (found_tag
) /* found '(' after tag, i.e., parm-list */
4713 case ')': /* end of parms list */
4718 if (found_tag
&& !inparms
) /* end of proc or fn stmt */
4725 if (found_tag
&& verify_tag
&& (*dbp
!= ' '))
4727 /* Check if this is an "extern" declaration. */
4730 if (c_tolower (*dbp
) == 'e')
4732 if (nocase_tail ("extern")) /* superfluous, really! */
4738 else if (c_tolower (*dbp
) == 'f')
4740 if (nocase_tail ("forward")) /* check for forward reference */
4746 if (found_tag
&& verify_tag
) /* not external proc, so make tag */
4750 make_tag (name
, namelen
, true,
4751 tline
.buffer
, taglen
, save_lineno
, save_lcno
);
4755 if (get_tagname
) /* grab name of proc or fn */
4762 /* Find block name. */
4763 for (cp
= dbp
+ 1; *cp
!= '\0' && !endtoken (*cp
); cp
++)
4766 /* Save all values for later tagging. */
4767 linebuffer_setlen (&tline
, lb
.len
);
4768 strcpy (tline
.buffer
, lb
.buffer
);
4769 save_lineno
= lineno
;
4770 save_lcno
= linecharno
;
4771 name
= tline
.buffer
+ (dbp
- lb
.buffer
);
4773 taglen
= cp
- lb
.buffer
+ 1;
4775 dbp
= cp
; /* set dbp to e-o-token */
4776 get_tagname
= false;
4780 /* And proceed to check for "extern". */
4782 else if (!incomment
&& !inquote
&& !found_tag
)
4784 /* Check for proc/fn keywords. */
4785 switch (c_tolower (c
))
4788 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4792 if (nocase_tail ("unction"))
4797 } /* while not eof */
4799 free (tline
.buffer
);
4804 * Lisp tag functions
4805 * look for (def or (DEF, quote or QUOTE
4808 static void L_getit (void);
4813 if (*dbp
== '\'') /* Skip prefix quote */
4815 else if (*dbp
== '(')
4818 /* Try to skip "(quote " */
4819 if (!LOOKING_AT (dbp
, "quote") && !LOOKING_AT (dbp
, "QUOTE"))
4820 /* Ok, then skip "(" before name in (defstruct (foo)) */
4821 dbp
= skip_spaces (dbp
);
4823 get_tag (dbp
, NULL
);
4827 Lisp_functions (FILE *inf
)
4829 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
4834 /* "(defvar foo)" is a declaration rather than a definition. */
4838 if (LOOKING_AT (p
, "defvar"))
4840 p
= skip_name (p
); /* past var name */
4841 p
= skip_spaces (p
);
4847 if (strneq (dbp
+ 1, "cl-", 3) || strneq (dbp
+ 1, "CL-", 3))
4850 if (strneq (dbp
+1, "def", 3) || strneq (dbp
+1, "DEF", 3))
4852 dbp
= skip_non_spaces (dbp
);
4853 dbp
= skip_spaces (dbp
);
4858 /* Check for (foo::defmumble name-defined ... */
4861 while (!notinname (*dbp
) && *dbp
!= ':');
4866 while (*dbp
== ':');
4868 if (strneq (dbp
, "def", 3) || strneq (dbp
, "DEF", 3))
4870 dbp
= skip_non_spaces (dbp
);
4871 dbp
= skip_spaces (dbp
);
4881 * Lua script language parsing
4882 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4884 * "function" and "local function" are tags if they start at column 1.
4887 Lua_functions (FILE *inf
)
4891 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4893 if (bp
[0] != 'f' && bp
[0] != 'l')
4896 (void)LOOKING_AT (bp
, "local"); /* skip possible "local" */
4898 if (LOOKING_AT (bp
, "function"))
4906 * Just look for lines where the first character is '/'
4907 * Also look at "defineps" for PSWrap
4909 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4910 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4913 PS_functions (FILE *inf
)
4915 register char *bp
, *ep
;
4917 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4922 *ep
!= '\0' && *ep
!= ' ' && *ep
!= '{';
4925 make_tag (bp
, ep
- bp
, true,
4926 lb
.buffer
, ep
- lb
.buffer
+ 1, lineno
, linecharno
);
4928 else if (LOOKING_AT (bp
, "defineps"))
4936 * Ignore anything after \ followed by space or in ( )
4937 * Look for words defined by :
4938 * Look for constant, code, create, defer, value, and variable
4939 * OBP extensions: Look for buffer:, field,
4940 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4943 Forth_words (FILE *inf
)
4947 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4948 while ((bp
= skip_spaces (bp
))[0] != '\0')
4949 if (bp
[0] == '\\' && c_isspace (bp
[1]))
4950 break; /* read next line */
4951 else if (bp
[0] == '(' && c_isspace (bp
[1]))
4952 do /* skip to ) or eol */
4954 while (*bp
!= ')' && *bp
!= '\0');
4955 else if ((bp
[0] == ':' && c_isspace (bp
[1]) && bp
++)
4956 || LOOKING_AT_NOCASE (bp
, "constant")
4957 || LOOKING_AT_NOCASE (bp
, "code")
4958 || LOOKING_AT_NOCASE (bp
, "create")
4959 || LOOKING_AT_NOCASE (bp
, "defer")
4960 || LOOKING_AT_NOCASE (bp
, "value")
4961 || LOOKING_AT_NOCASE (bp
, "variable")
4962 || LOOKING_AT_NOCASE (bp
, "buffer:")
4963 || LOOKING_AT_NOCASE (bp
, "field"))
4964 get_tag (skip_spaces (bp
), NULL
); /* Yay! A definition! */
4966 bp
= skip_non_spaces (bp
);
4971 * Scheme tag functions
4972 * look for (def... xyzzy
4974 * (def ... ((...(xyzzy ....
4976 * Original code by Ken Haase (1985?)
4979 Scheme_functions (FILE *inf
)
4983 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4985 if (strneq (bp
, "(def", 4) || strneq (bp
, "(DEF", 4))
4987 bp
= skip_non_spaces (bp
+4);
4988 /* Skip over open parens and white space. Don't continue past
4990 while (*bp
&& notinname (*bp
))
4994 if (LOOKING_AT (bp
, "(SET!") || LOOKING_AT (bp
, "(set!"))
5000 /* Find tags in TeX and LaTeX input files. */
5002 /* TEX_toktab is a table of TeX control sequences that define tags.
5003 * Each entry records one such control sequence.
5005 * Original code from who knows whom.
5007 * Stefan Monnier (2002)
5010 static linebuffer
*TEX_toktab
= NULL
; /* Table with tag tokens */
5012 /* Default set of control sequences to put into TEX_toktab.
5013 The value of environment var TEXTAGS is prepended to this. */
5014 static const char *TEX_defenv
= "\
5015 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5016 :part:appendix:entry:index:def\
5017 :newcommand:renewcommand:newenvironment:renewenvironment";
5019 static void TEX_decode_env (const char *, const char *);
5022 * TeX/LaTeX scanning loop.
5025 TeX_commands (FILE *inf
)
5030 char TEX_esc
= '\0';
5031 char TEX_opgrp
, TEX_clgrp
;
5033 /* Initialize token table once from environment. */
5034 if (TEX_toktab
== NULL
)
5035 TEX_decode_env ("TEXTAGS", TEX_defenv
);
5037 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5039 /* Look at each TEX keyword in line. */
5042 /* Look for a TEX escape. */
5046 if (c
== '\0' || c
== '%')
5049 /* Select either \ or ! as escape character, whichever comes
5050 first outside a comment. */
5071 for (key
= TEX_toktab
; key
->buffer
!= NULL
; key
++)
5072 if (strneq (cp
, key
->buffer
, key
->len
))
5075 int namelen
, linelen
;
5078 cp
= skip_spaces (cp
+ key
->len
);
5079 if (*cp
== TEX_opgrp
)
5085 (!c_isspace (*p
) && *p
!= '#' &&
5086 *p
!= TEX_opgrp
&& *p
!= TEX_clgrp
);
5091 if (!opgrp
|| *p
== TEX_clgrp
)
5093 while (*p
!= '\0' && *p
!= TEX_opgrp
&& *p
!= TEX_clgrp
)
5095 linelen
= p
- lb
.buffer
+ 1;
5097 make_tag (cp
, namelen
, true,
5098 lb
.buffer
, linelen
, lineno
, linecharno
);
5099 goto tex_next_line
; /* We only tag a line once */
5107 /* Read environment and prepend it to the default string.
5108 Build token table. */
5110 TEX_decode_env (const char *evarname
, const char *defenv
)
5112 register const char *env
, *p
;
5115 /* Append default string to environment. */
5116 env
= getenv (evarname
);
5120 env
= concat (env
, defenv
, "");
5122 /* Allocate a token table */
5123 for (len
= 1, p
= env
; (p
= strchr (p
, ':')); )
5126 TEX_toktab
= xnew (len
, linebuffer
);
5128 /* Unpack environment string into token table. Be careful about */
5129 /* zero-length strings (leading ':', "::" and trailing ':') */
5130 for (i
= 0; *env
!= '\0';)
5132 p
= strchr (env
, ':');
5133 if (!p
) /* End of environment string. */
5134 p
= env
+ strlen (env
);
5136 { /* Only non-zero strings. */
5137 TEX_toktab
[i
].buffer
= savenstr (env
, p
- env
);
5138 TEX_toktab
[i
].len
= p
- env
;
5145 TEX_toktab
[i
].buffer
= NULL
; /* Mark end of table. */
5146 TEX_toktab
[i
].len
= 0;
5153 /* Texinfo support. Dave Love, Mar. 2000. */
5155 Texinfo_nodes (FILE *inf
)
5158 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5159 if (LOOKING_AT (cp
, "@node"))
5162 while (*cp
!= '\0' && *cp
!= ',')
5164 make_tag (start
, cp
- start
, true,
5165 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5172 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5173 * Contents of <a name=xxx> are tags with name xxx.
5175 * Francesco Potortì, 2002.
5178 HTML_labels (FILE *inf
)
5180 bool getnext
= false; /* next text outside of HTML tags is a tag */
5181 bool skiptag
= false; /* skip to the end of the current HTML tag */
5182 bool intag
= false; /* inside an html tag, looking for ID= */
5183 bool inanchor
= false; /* when INTAG, is an anchor, look for NAME= */
5187 linebuffer_setlen (&token_name
, 0); /* no name in buffer */
5189 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
5190 for (;;) /* loop on the same line */
5192 if (skiptag
) /* skip HTML tag */
5194 while (*dbp
!= '\0' && *dbp
!= '>')
5200 continue; /* look on the same line */
5202 break; /* go to next line */
5205 else if (intag
) /* look for "name=" or "id=" */
5207 while (*dbp
!= '\0' && *dbp
!= '>'
5208 && c_tolower (*dbp
) != 'n' && c_tolower (*dbp
) != 'i')
5211 break; /* go to next line */
5216 continue; /* look on the same line */
5218 if ((inanchor
&& LOOKING_AT_NOCASE (dbp
, "name="))
5219 || LOOKING_AT_NOCASE (dbp
, "id="))
5221 bool quoted
= (dbp
[0] == '"');
5224 for (end
= ++dbp
; *end
!= '\0' && *end
!= '"'; end
++)
5227 for (end
= dbp
; *end
!= '\0' && intoken (*end
); end
++)
5229 linebuffer_setlen (&token_name
, end
- dbp
);
5230 memcpy (token_name
.buffer
, dbp
, end
- dbp
);
5231 token_name
.buffer
[end
- dbp
] = '\0';
5234 intag
= false; /* we found what we looked for */
5235 skiptag
= true; /* skip to the end of the tag */
5236 getnext
= true; /* then grab the text */
5237 continue; /* look on the same line */
5242 else if (getnext
) /* grab next tokens and tag them */
5244 dbp
= skip_spaces (dbp
);
5246 break; /* go to next line */
5250 inanchor
= (c_tolower (dbp
[1]) == 'a' && !intoken (dbp
[2]));
5251 continue; /* look on the same line */
5254 for (end
= dbp
+ 1; *end
!= '\0' && *end
!= '<'; end
++)
5256 make_tag (token_name
.buffer
, token_name
.len
, true,
5257 dbp
, end
- dbp
, lineno
, linecharno
);
5258 linebuffer_setlen (&token_name
, 0); /* no name in buffer */
5260 break; /* go to next line */
5263 else /* look for an interesting HTML tag */
5265 while (*dbp
!= '\0' && *dbp
!= '<')
5268 break; /* go to next line */
5270 if (c_tolower (dbp
[1]) == 'a' && !intoken (dbp
[2]))
5273 continue; /* look on the same line */
5275 else if (LOOKING_AT_NOCASE (dbp
, "<title>")
5276 || LOOKING_AT_NOCASE (dbp
, "<h1>")
5277 || LOOKING_AT_NOCASE (dbp
, "<h2>")
5278 || LOOKING_AT_NOCASE (dbp
, "<h3>"))
5282 continue; /* look on the same line */
5293 * Assumes that the predicate or rule starts at column 0.
5294 * Only the first clause of a predicate or rule is added.
5295 * Original code by Sunichirou Sugou (1989)
5296 * Rewritten by Anders Lindgren (1996)
5298 static size_t prolog_pr (char *, char *);
5299 static void prolog_skip_comment (linebuffer
*, FILE *);
5300 static size_t prolog_atom (char *, size_t);
5303 Prolog_functions (FILE *inf
)
5313 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5315 if (cp
[0] == '\0') /* Empty line */
5317 else if (c_isspace (cp
[0])) /* Not a predicate */
5319 else if (cp
[0] == '/' && cp
[1] == '*') /* comment. */
5320 prolog_skip_comment (&lb
, inf
);
5321 else if ((len
= prolog_pr (cp
, last
)) > 0)
5323 /* Predicate or rule. Store the function name so that we
5324 only generate a tag for the first clause. */
5326 last
= xnew (len
+ 1, char);
5327 else if (len
+ 1 > allocated
)
5328 xrnew (last
, len
+ 1, char);
5329 allocated
= len
+ 1;
5330 memcpy (last
, cp
, len
);
5339 prolog_skip_comment (linebuffer
*plb
, FILE *inf
)
5345 for (cp
= plb
->buffer
; *cp
!= '\0'; cp
++)
5346 if (cp
[0] == '*' && cp
[1] == '/')
5348 readline (plb
, inf
);
5350 while (perhaps_more_input (inf
));
5354 * A predicate or rule definition is added if it matches:
5355 * <beginning of line><Prolog Atom><whitespace>(
5356 * or <beginning of line><Prolog Atom><whitespace>:-
5358 * It is added to the tags database if it doesn't match the
5359 * name of the previous clause header.
5361 * Return the size of the name of the predicate or rule, or 0 if no
5365 prolog_pr (char *s
, char *last
)
5367 /* Name of last clause. */
5372 pos
= prolog_atom (s
, 0);
5377 pos
= skip_spaces (s
+ pos
) - s
;
5380 || (s
[pos
] == '(' && (pos
+= 1))
5381 || (s
[pos
] == ':' && s
[pos
+ 1] == '-' && (pos
+= 2)))
5382 && (last
== NULL
/* save only the first clause */
5383 || len
!= strlen (last
)
5384 || !strneq (s
, last
, len
)))
5386 make_tag (s
, len
, true, s
, pos
, lineno
, linecharno
);
5394 * Consume a Prolog atom.
5395 * Return the number of bytes consumed, or 0 if there was an error.
5397 * A prolog atom, in this context, could be one of:
5398 * - An alphanumeric sequence, starting with a lower case letter.
5399 * - A quoted arbitrary string. Single quotes can escape themselves.
5400 * Backslash quotes everything.
5403 prolog_atom (char *s
, size_t pos
)
5409 if (c_islower (s
[pos
]) || s
[pos
] == '_')
5411 /* The atom is unquoted. */
5413 while (c_isalnum (s
[pos
]) || s
[pos
] == '_')
5417 return pos
- origpos
;
5419 else if (s
[pos
] == '\'')
5430 pos
++; /* A double quote */
5432 else if (s
[pos
] == '\0')
5433 /* Multiline quoted atoms are ignored. */
5435 else if (s
[pos
] == '\\')
5437 if (s
[pos
+1] == '\0')
5444 return pos
- origpos
;
5452 * Support for Erlang
5454 * Generates tags for functions, defines, and records.
5455 * Assumes that Erlang functions start at column 0.
5456 * Original code by Anders Lindgren (1996)
5458 static int erlang_func (char *, char *);
5459 static void erlang_attribute (char *);
5460 static int erlang_atom (char *);
5463 Erlang_functions (FILE *inf
)
5473 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5475 if (cp
[0] == '\0') /* Empty line */
5477 else if (c_isspace (cp
[0])) /* Not function nor attribute */
5479 else if (cp
[0] == '%') /* comment */
5481 else if (cp
[0] == '"') /* Sometimes, strings start in column one */
5483 else if (cp
[0] == '-') /* attribute, e.g. "-define" */
5485 erlang_attribute (cp
);
5492 else if ((len
= erlang_func (cp
, last
)) > 0)
5495 * Function. Store the function name so that we only
5496 * generates a tag for the first clause.
5499 last
= xnew (len
+ 1, char);
5500 else if (len
+ 1 > allocated
)
5501 xrnew (last
, len
+ 1, char);
5502 allocated
= len
+ 1;
5503 memcpy (last
, cp
, len
);
5512 * A function definition is added if it matches:
5513 * <beginning of line><Erlang Atom><whitespace>(
5515 * It is added to the tags database if it doesn't match the
5516 * name of the previous clause header.
5518 * Return the size of the name of the function, or 0 if no function
5522 erlang_func (char *s
, char *last
)
5524 /* Name of last clause. */
5529 pos
= erlang_atom (s
);
5534 pos
= skip_spaces (s
+ pos
) - s
;
5536 /* Save only the first clause. */
5539 || len
!= (int)strlen (last
)
5540 || !strneq (s
, last
, len
)))
5542 make_tag (s
, len
, true, s
, pos
, lineno
, linecharno
);
5551 * Handle attributes. Currently, tags are generated for defines
5554 * They are on the form:
5555 * -define(foo, bar).
5556 * -define(Foo(M, N), M+N).
5557 * -record(graph, {vtab = notable, cyclic = true}).
5560 erlang_attribute (char *s
)
5564 if ((LOOKING_AT (cp
, "-define") || LOOKING_AT (cp
, "-record"))
5567 int len
= erlang_atom (skip_spaces (cp
));
5569 make_tag (cp
, len
, true, s
, cp
+ len
- s
, lineno
, linecharno
);
5576 * Consume an Erlang atom (or variable).
5577 * Return the number of bytes consumed, or -1 if there was an error.
5580 erlang_atom (char *s
)
5584 if (c_isalpha (s
[pos
]) || s
[pos
] == '_')
5586 /* The atom is unquoted. */
5589 while (c_isalnum (s
[pos
]) || s
[pos
] == '_');
5591 else if (s
[pos
] == '\'')
5593 for (pos
++; s
[pos
] != '\''; pos
++)
5594 if (s
[pos
] == '\0' /* multiline quoted atoms are ignored */
5595 || (s
[pos
] == '\\' && s
[++pos
] == '\0'))
5604 static char *scan_separators (char *);
5605 static void add_regex (char *, language
*);
5606 static char *substitute (char *, char *, struct re_registers
*);
5609 * Take a string like "/blah/" and turn it into "blah", verifying
5610 * that the first and last characters are the same, and handling
5611 * quoted separator characters. Actually, stops on the occurrence of
5612 * an unquoted separator. Also process \t, \n, etc. and turn into
5613 * appropriate characters. Works in place. Null terminates name string.
5614 * Returns pointer to terminating separator, or NULL for
5615 * unterminated regexps.
5618 scan_separators (char *name
)
5621 char *copyto
= name
;
5622 bool quoted
= false;
5624 for (++name
; *name
!= '\0'; ++name
)
5630 case 'a': *copyto
++ = '\007'; break; /* BEL (bell) */
5631 case 'b': *copyto
++ = '\b'; break; /* BS (back space) */
5632 case 'd': *copyto
++ = 0177; break; /* DEL (delete) */
5633 case 'e': *copyto
++ = 033; break; /* ESC (delete) */
5634 case 'f': *copyto
++ = '\f'; break; /* FF (form feed) */
5635 case 'n': *copyto
++ = '\n'; break; /* NL (new line) */
5636 case 'r': *copyto
++ = '\r'; break; /* CR (carriage return) */
5637 case 't': *copyto
++ = '\t'; break; /* TAB (horizontal tab) */
5638 case 'v': *copyto
++ = '\v'; break; /* VT (vertical tab) */
5644 /* Something else is quoted, so preserve the quote. */
5652 else if (*name
== '\\')
5654 else if (*name
== sep
)
5660 name
= NULL
; /* signal unterminated regexp */
5662 /* Terminate copied string. */
5667 /* Look at the argument of --regex or --no-regex and do the right
5668 thing. Same for each line of a regexp file. */
5670 analyze_regex (char *regex_arg
)
5672 if (regex_arg
== NULL
)
5674 free_regexps (); /* --no-regex: remove existing regexps */
5678 /* A real --regexp option or a line in a regexp file. */
5679 switch (regex_arg
[0])
5681 /* Comments in regexp file or null arg to --regex. */
5687 /* Read a regex file. This is recursive and may result in a
5688 loop, which will stop when the file descriptors are exhausted. */
5692 linebuffer regexbuf
;
5693 char *regexfile
= regex_arg
+ 1;
5695 /* regexfile is a file containing regexps, one per line. */
5696 regexfp
= fopen (regexfile
, "r" FOPEN_BINARY
);
5697 if (regexfp
== NULL
)
5699 linebuffer_init (®exbuf
);
5700 while (readline_internal (®exbuf
, regexfp
, regexfile
) > 0)
5701 analyze_regex (regexbuf
.buffer
);
5702 free (regexbuf
.buffer
);
5703 if (fclose (regexfp
) != 0)
5708 /* Regexp to be used for a specific language only. */
5712 char *lang_name
= regex_arg
+ 1;
5715 for (cp
= lang_name
; *cp
!= '}'; cp
++)
5718 error ("unterminated language name in regex: %s", regex_arg
);
5722 lang
= get_language_from_langname (lang_name
);
5725 add_regex (cp
, lang
);
5729 /* Regexp to be used for any language. */
5731 add_regex (regex_arg
, NULL
);
5736 /* Separate the regexp pattern, compile it,
5737 and care for optional name and modifiers. */
5739 add_regex (char *regexp_pattern
, language
*lang
)
5741 static struct re_pattern_buffer zeropattern
;
5742 char sep
, *pat
, *name
, *modifiers
;
5745 struct re_pattern_buffer
*patbuf
;
5748 force_explicit_name
= true, /* do not use implicit tag names */
5749 ignore_case
= false, /* case is significant */
5750 multi_line
= false, /* matches are done one line at a time */
5751 single_line
= false; /* dot does not match newline */
5754 if (strlen (regexp_pattern
) < 3)
5756 error ("null regexp");
5759 sep
= regexp_pattern
[0];
5760 name
= scan_separators (regexp_pattern
);
5763 error ("%s: unterminated regexp", regexp_pattern
);
5768 error ("null name for regexp \"%s\"", regexp_pattern
);
5771 modifiers
= scan_separators (name
);
5772 if (modifiers
== NULL
) /* no terminating separator --> no name */
5778 modifiers
+= 1; /* skip separator */
5780 /* Parse regex modifiers. */
5781 for (; modifiers
[0] != '\0'; modifiers
++)
5782 switch (modifiers
[0])
5785 if (modifiers
== name
)
5786 error ("forcing explicit tag name but no name, ignoring");
5787 force_explicit_name
= true;
5797 need_filebuf
= true;
5800 error ("invalid regexp modifier `%c', ignoring", modifiers
[0]);
5804 patbuf
= xnew (1, struct re_pattern_buffer
);
5805 *patbuf
= zeropattern
;
5808 static char lc_trans
[UCHAR_MAX
+ 1];
5810 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++)
5811 lc_trans
[i
] = c_tolower (i
);
5812 patbuf
->translate
= lc_trans
; /* translation table to fold case */
5816 pat
= concat ("^", regexp_pattern
, ""); /* anchor to beginning of line */
5818 pat
= regexp_pattern
;
5821 re_set_syntax (RE_SYNTAX_EMACS
| RE_DOT_NEWLINE
);
5823 re_set_syntax (RE_SYNTAX_EMACS
);
5825 err
= re_compile_pattern (pat
, strlen (pat
), patbuf
);
5830 error ("%s while compiling pattern", err
);
5835 p_head
= xnew (1, regexp
);
5836 p_head
->pattern
= savestr (regexp_pattern
);
5837 p_head
->p_next
= rp
;
5838 p_head
->lang
= lang
;
5839 p_head
->pat
= patbuf
;
5840 p_head
->name
= savestr (name
);
5841 p_head
->error_signaled
= false;
5842 p_head
->force_explicit_name
= force_explicit_name
;
5843 p_head
->ignore_case
= ignore_case
;
5844 p_head
->multi_line
= multi_line
;
5848 * Do the substitutions indicated by the regular expression and
5852 substitute (char *in
, char *out
, struct re_registers
*regs
)
5855 int size
, dig
, diglen
;
5858 size
= strlen (out
);
5860 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5861 if (out
[size
- 1] == '\\')
5862 fatal ("pattern error in \"%s\"", out
);
5863 for (t
= strchr (out
, '\\');
5865 t
= strchr (t
+ 2, '\\'))
5866 if (c_isdigit (t
[1]))
5869 diglen
= regs
->end
[dig
] - regs
->start
[dig
];
5875 /* Allocate space and do the substitutions. */
5877 result
= xnew (size
+ 1, char);
5879 for (t
= result
; *out
!= '\0'; out
++)
5880 if (*out
== '\\' && c_isdigit (*++out
))
5883 diglen
= regs
->end
[dig
] - regs
->start
[dig
];
5884 memcpy (t
, in
+ regs
->start
[dig
], diglen
);
5891 assert (t
<= result
+ size
);
5892 assert (t
- result
== (int)strlen (result
));
5897 /* Deallocate all regexps. */
5902 while (p_head
!= NULL
)
5904 rp
= p_head
->p_next
;
5905 free (p_head
->pattern
);
5906 free (p_head
->name
);
5914 * Reads the whole file as a single string from `filebuf' and looks for
5915 * multi-line regular expressions, creating tags on matches.
5916 * readline already dealt with normal regexps.
5918 * Idea by Ben Wing <ben@666.com> (2002).
5921 regex_tag_multiline (void)
5923 char *buffer
= filebuf
.buffer
;
5927 for (rp
= p_head
; rp
!= NULL
; rp
= rp
->p_next
)
5931 if (!rp
->multi_line
)
5932 continue; /* skip normal regexps */
5934 /* Generic initializations before parsing file from memory. */
5935 lineno
= 1; /* reset global line number */
5936 charno
= 0; /* reset global char number */
5937 linecharno
= 0; /* reset global char number of line start */
5939 /* Only use generic regexps or those for the current language. */
5940 if (rp
->lang
!= NULL
&& rp
->lang
!= curfdp
->lang
)
5943 while (match
>= 0 && match
< filebuf
.len
)
5945 match
= re_search (rp
->pat
, buffer
, filebuf
.len
, charno
,
5946 filebuf
.len
- match
, &rp
->regs
);
5951 if (!rp
->error_signaled
)
5953 error ("regexp stack overflow while matching \"%s\"",
5955 rp
->error_signaled
= true;
5962 if (match
== rp
->regs
.end
[0])
5964 if (!rp
->error_signaled
)
5966 error ("regexp matches the empty string: \"%s\"",
5968 rp
->error_signaled
= true;
5970 match
= -3; /* exit from while loop */
5974 /* Match occurred. Construct a tag. */
5975 while (charno
< rp
->regs
.end
[0])
5976 if (buffer
[charno
++] == '\n')
5977 lineno
++, linecharno
= charno
;
5979 if (name
[0] == '\0')
5981 else /* make a named tag */
5982 name
= substitute (buffer
, rp
->name
, &rp
->regs
);
5983 if (rp
->force_explicit_name
)
5984 /* Force explicit tag name, if a name is there. */
5985 pfnote (name
, true, buffer
+ linecharno
,
5986 charno
- linecharno
+ 1, lineno
, linecharno
);
5988 make_tag (name
, strlen (name
), true, buffer
+ linecharno
,
5989 charno
- linecharno
+ 1, lineno
, linecharno
);
5998 nocase_tail (const char *cp
)
6002 while (*cp
!= '\0' && c_tolower (*cp
) == c_tolower (dbp
[len
]))
6004 if (*cp
== '\0' && !intoken (dbp
[len
]))
6013 get_tag (register char *bp
, char **namepp
)
6015 register char *cp
= bp
;
6019 /* Go till you get to white space or a syntactic break */
6020 for (cp
= bp
+ 1; !notinname (*cp
); cp
++)
6022 make_tag (bp
, cp
- bp
, true,
6023 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
6027 *namepp
= savenstr (bp
, cp
- bp
);
6031 * Read a line of text from `stream' into `lbp', excluding the
6032 * newline or CR-NL, if any. Return the number of characters read from
6033 * `stream', which is the length of the line including the newline.
6035 * On DOS or Windows we do not count the CR character, if any before the
6036 * NL, in the returned length; this mirrors the behavior of Emacs on those
6037 * platforms (for text files, it translates CR-NL to NL as it reads in the
6040 * If multi-line regular expressions are requested, each line read is
6041 * appended to `filebuf'.
6044 readline_internal (linebuffer
*lbp
, FILE *stream
, char const *filename
)
6046 char *buffer
= lbp
->buffer
;
6047 char *p
= lbp
->buffer
;
6051 pend
= p
+ lbp
->size
; /* Separate to avoid 386/IX compiler bug. */
6055 register int c
= getc (stream
);
6058 /* We're at the end of linebuffer: expand it. */
6060 xrnew (buffer
, lbp
->size
, char);
6061 p
+= buffer
- lbp
->buffer
;
6062 pend
= buffer
+ lbp
->size
;
6063 lbp
->buffer
= buffer
;
6067 if (ferror (stream
))
6075 if (p
> buffer
&& p
[-1] == '\r')
6089 lbp
->len
= p
- buffer
;
6091 if (need_filebuf
/* we need filebuf for multi-line regexps */
6092 && chars_deleted
> 0) /* not at EOF */
6094 while (filebuf
.size
<= filebuf
.len
+ lbp
->len
+ 1) /* +1 for \n */
6096 /* Expand filebuf. */
6098 xrnew (filebuf
.buffer
, filebuf
.size
, char);
6100 memcpy (filebuf
.buffer
+ filebuf
.len
, lbp
->buffer
, lbp
->len
);
6101 filebuf
.len
+= lbp
->len
;
6102 filebuf
.buffer
[filebuf
.len
++] = '\n';
6103 filebuf
.buffer
[filebuf
.len
] = '\0';
6106 return lbp
->len
+ chars_deleted
;
6110 * Like readline_internal, above, but in addition try to match the
6111 * input line against relevant regular expressions and manage #line
6115 readline (linebuffer
*lbp
, FILE *stream
)
6119 linecharno
= charno
; /* update global char number of line start */
6120 result
= readline_internal (lbp
, stream
, infilename
); /* read line */
6121 lineno
+= 1; /* increment global line number */
6122 charno
+= result
; /* increment global char number */
6124 /* Honor #line directives. */
6125 if (!no_line_directive
)
6127 static bool discard_until_line_directive
;
6129 /* Check whether this is a #line directive. */
6130 if (result
> 12 && strneq (lbp
->buffer
, "#line ", 6))
6135 if (sscanf (lbp
->buffer
, "#line %u \"%n", &lno
, &start
) >= 1
6136 && start
> 0) /* double quote character found */
6138 char *endp
= lbp
->buffer
+ start
;
6140 while ((endp
= strchr (endp
, '"')) != NULL
6141 && endp
[-1] == '\\')
6144 /* Ok, this is a real #line directive. Let's deal with it. */
6146 char *taggedabsname
; /* absolute name of original file */
6147 char *taggedfname
; /* name of original file as given */
6148 char *name
; /* temp var */
6150 discard_until_line_directive
= false; /* found it */
6151 name
= lbp
->buffer
+ start
;
6153 canonicalize_filename (name
);
6154 taggedabsname
= absolute_filename (name
, tagfiledir
);
6155 if (filename_is_absolute (name
)
6156 || filename_is_absolute (curfdp
->infname
))
6157 taggedfname
= savestr (taggedabsname
);
6159 taggedfname
= relative_filename (taggedabsname
,tagfiledir
);
6161 if (streq (curfdp
->taggedfname
, taggedfname
))
6162 /* The #line directive is only a line number change. We
6163 deal with this afterwards. */
6166 /* The tags following this #line directive should be
6167 attributed to taggedfname. In order to do this, set
6168 curfdp accordingly. */
6170 fdesc
*fdp
; /* file description pointer */
6172 /* Go look for a file description already set up for the
6173 file indicated in the #line directive. If there is
6174 one, use it from now until the next #line
6176 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
6177 if (streq (fdp
->infname
, curfdp
->infname
)
6178 && streq (fdp
->taggedfname
, taggedfname
))
6179 /* If we remove the second test above (after the &&)
6180 then all entries pertaining to the same file are
6181 coalesced in the tags file. If we use it, then
6182 entries pertaining to the same file but generated
6183 from different files (via #line directives) will
6184 go into separate sections in the tags file. These
6185 alternatives look equivalent. The first one
6186 destroys some apparently useless information. */
6192 /* Else, if we already tagged the real file, skip all
6193 input lines until the next #line directive. */
6194 if (fdp
== NULL
) /* not found */
6195 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
6196 if (streq (fdp
->infabsname
, taggedabsname
))
6198 discard_until_line_directive
= true;
6202 /* Else create a new file description and use that from
6203 now on, until the next #line directive. */
6204 if (fdp
== NULL
) /* not found */
6207 fdhead
= xnew (1, fdesc
);
6208 *fdhead
= *curfdp
; /* copy curr. file description */
6210 fdhead
->infname
= savestr (curfdp
->infname
);
6211 fdhead
->infabsname
= savestr (curfdp
->infabsname
);
6212 fdhead
->infabsdir
= savestr (curfdp
->infabsdir
);
6213 fdhead
->taggedfname
= taggedfname
;
6214 fdhead
->usecharno
= false;
6215 fdhead
->prop
= NULL
;
6216 fdhead
->written
= false;
6220 free (taggedabsname
);
6222 readline (lbp
, stream
);
6224 } /* if a real #line directive */
6225 } /* if #line is followed by a number */
6226 } /* if line begins with "#line " */
6228 /* If we are here, no #line directive was found. */
6229 if (discard_until_line_directive
)
6233 /* Do a tail recursion on ourselves, thus discarding the contents
6234 of the line buffer. */
6235 readline (lbp
, stream
);
6239 discard_until_line_directive
= false;
6242 } /* if #line directives should be considered */
6249 /* Match against relevant regexps. */
6251 for (rp
= p_head
; rp
!= NULL
; rp
= rp
->p_next
)
6253 /* Only use generic regexps or those for the current language.
6254 Also do not use multiline regexps, which is the job of
6255 regex_tag_multiline. */
6256 if ((rp
->lang
!= NULL
&& rp
->lang
!= fdhead
->lang
)
6260 match
= re_match (rp
->pat
, lbp
->buffer
, lbp
->len
, 0, &rp
->regs
);
6265 if (!rp
->error_signaled
)
6267 error ("regexp stack overflow while matching \"%s\"",
6269 rp
->error_signaled
= true;
6276 /* Empty string matched. */
6277 if (!rp
->error_signaled
)
6279 error ("regexp matches the empty string: \"%s\"", rp
->pattern
);
6280 rp
->error_signaled
= true;
6284 /* Match occurred. Construct a tag. */
6286 if (name
[0] == '\0')
6288 else /* make a named tag */
6289 name
= substitute (lbp
->buffer
, rp
->name
, &rp
->regs
);
6290 if (rp
->force_explicit_name
)
6291 /* Force explicit tag name, if a name is there. */
6292 pfnote (name
, true, lbp
->buffer
, match
, lineno
, linecharno
);
6294 make_tag (name
, strlen (name
), true,
6295 lbp
->buffer
, match
, lineno
, linecharno
);
6304 * Return a pointer to a space of size strlen(cp)+1 allocated
6305 * with xnew where the string CP has been copied.
6308 savestr (const char *cp
)
6310 return savenstr (cp
, strlen (cp
));
6314 * Return a pointer to a space of size LEN+1 allocated with xnew where
6315 * the string CP has been copied for at most the first LEN characters.
6318 savenstr (const char *cp
, int len
)
6320 char *dp
= xnew (len
+ 1, char);
6322 return memcpy (dp
, cp
, len
);
6325 /* Skip spaces (end of string is not space), return new pointer. */
6327 skip_spaces (char *cp
)
6329 while (c_isspace (*cp
))
6334 /* Skip non spaces, except end of string, return new pointer. */
6336 skip_non_spaces (char *cp
)
6338 while (*cp
!= '\0' && !c_isspace (*cp
))
6343 /* Skip any chars in the "name" class.*/
6345 skip_name (char *cp
)
6347 /* '\0' is a notinname() so loop stops there too */
6348 while (! notinname (*cp
))
6353 /* Print error message and exit. */
6355 fatal (const char *s1
, const char *s2
)
6358 exit (EXIT_FAILURE
);
6362 pfatal (const char *s1
)
6365 exit (EXIT_FAILURE
);
6369 suggest_asking_for_help (void)
6371 fprintf (stderr
, "\tTry `%s --help' for a complete list of options.\n",
6373 exit (EXIT_FAILURE
);
6376 /* Output a diagnostic with printf-style FORMAT and args. */
6378 error (const char *format
, ...)
6381 va_start (ap
, format
);
6382 fprintf (stderr
, "%s: ", progname
);
6383 vfprintf (stderr
, format
, ap
);
6384 fprintf (stderr
, "\n");
6388 /* Return a newly-allocated string whose contents
6389 concatenate those of s1, s2, s3. */
6391 concat (const char *s1
, const char *s2
, const char *s3
)
6393 int len1
= strlen (s1
), len2
= strlen (s2
), len3
= strlen (s3
);
6394 char *result
= xnew (len1
+ len2
+ len3
+ 1, char);
6396 strcpy (result
, s1
);
6397 strcpy (result
+ len1
, s2
);
6398 strcpy (result
+ len1
+ len2
, s3
);
6404 /* Does the same work as the system V getcwd, but does not need to
6405 guess the buffer size in advance. */
6410 char *path
= xnew (bufsize
, char);
6412 while (getcwd (path
, bufsize
) == NULL
)
6414 if (errno
!= ERANGE
)
6418 path
= xnew (bufsize
, char);
6421 canonicalize_filename (path
);
6425 /* Return a newly allocated string containing a name of a temporary file. */
6429 const char *tmpdir
= getenv ("TMPDIR");
6430 const char *slash
= "/";
6432 #if MSDOS || defined (DOS_NT)
6434 tmpdir
= getenv ("TEMP");
6436 tmpdir
= getenv ("TMP");
6439 if (tmpdir
[strlen (tmpdir
) - 1] == '/'
6440 || tmpdir
[strlen (tmpdir
) - 1] == '\\')
6445 if (tmpdir
[strlen (tmpdir
) - 1] == '/')
6449 char *templt
= concat (tmpdir
, slash
, "etXXXXXX");
6450 int fd
= mkostemp (templt
, O_CLOEXEC
);
6451 if (fd
< 0 || close (fd
) != 0)
6453 int temp_errno
= errno
;
6459 #if defined (DOS_NT)
6460 /* The file name will be used in shell redirection, so it needs to have
6461 DOS-style backslashes, or else the Windows shell will barf. */
6463 for (p
= templt
; *p
; p
++)
6471 /* Return a newly allocated string containing the file name of FILE
6472 relative to the absolute directory DIR (which should end with a slash). */
6474 relative_filename (char *file
, char *dir
)
6476 char *fp
, *dp
, *afn
, *res
;
6479 /* Find the common root of file and dir (with a trailing slash). */
6480 afn
= absolute_filename (file
, cwd
);
6483 while (*fp
++ == *dp
++)
6485 fp
--, dp
--; /* back to the first differing char */
6487 if (fp
== afn
&& afn
[0] != '/') /* cannot build a relative name */
6490 do /* look at the equal chars until '/' */
6494 /* Build a sequence of "../" strings for the resulting relative file name. */
6496 while ((dp
= strchr (dp
+ 1, '/')) != NULL
)
6498 res
= xnew (3*i
+ strlen (fp
+ 1) + 1, char);
6501 z
= stpcpy (z
, "../");
6503 /* Add the file name relative to the common root of file and dir. */
6510 /* Return a newly allocated string containing the absolute file name
6511 of FILE given DIR (which should end with a slash). */
6513 absolute_filename (char *file
, char *dir
)
6515 char *slashp
, *cp
, *res
;
6517 if (filename_is_absolute (file
))
6518 res
= savestr (file
);
6520 /* We don't support non-absolute file names with a drive
6521 letter, like `d:NAME' (it's too much hassle). */
6522 else if (file
[1] == ':')
6523 fatal ("%s: relative file names with drive letters not supported", file
);
6526 res
= concat (dir
, file
, "");
6528 /* Delete the "/dirname/.." and "/." substrings. */
6529 slashp
= strchr (res
, '/');
6530 while (slashp
!= NULL
&& slashp
[0] != '\0')
6532 if (slashp
[1] == '.')
6534 if (slashp
[2] == '.'
6535 && (slashp
[3] == '/' || slashp
[3] == '\0'))
6540 while (cp
>= res
&& !filename_is_absolute (cp
));
6542 cp
= slashp
; /* the absolute name begins with "/.." */
6544 /* Under MSDOS and NT we get `d:/NAME' as absolute
6545 file name, so the luser could say `d:/../NAME'.
6546 We silently treat this as `d:/NAME'. */
6547 else if (cp
[0] != '/')
6550 memmove (cp
, slashp
+ 3, strlen (slashp
+ 2));
6554 else if (slashp
[2] == '/' || slashp
[2] == '\0')
6556 memmove (slashp
, slashp
+ 2, strlen (slashp
+ 1));
6561 slashp
= strchr (slashp
+ 1, '/');
6564 if (res
[0] == '\0') /* just a safety net: should never happen */
6567 return savestr ("/");
6573 /* Return a newly allocated string containing the absolute
6574 file name of dir where FILE resides given DIR (which should
6575 end with a slash). */
6577 absolute_dirname (char *file
, char *dir
)
6582 slashp
= strrchr (file
, '/');
6584 return savestr (dir
);
6587 res
= absolute_filename (file
, dir
);
6593 /* Whether the argument string is an absolute file name. The argument
6594 string must have been canonicalized with canonicalize_filename. */
6596 filename_is_absolute (char *fn
)
6598 return (fn
[0] == '/'
6600 || (c_isalpha (fn
[0]) && fn
[1] == ':' && fn
[2] == '/')
6605 /* Downcase DOS drive letter and collapse separators into single slashes.
6608 canonicalize_filename (register char *fn
)
6613 /* Canonicalize drive letter case. */
6614 if (c_isupper (fn
[0]) && fn
[1] == ':')
6615 fn
[0] = c_tolower (fn
[0]);
6617 /* Collapse multiple forward- and back-slashes into a single forward
6619 for (cp
= fn
; *cp
!= '\0'; cp
++, fn
++)
6620 if (*cp
== '/' || *cp
== '\\')
6623 while (cp
[1] == '/' || cp
[1] == '\\')
6631 /* Collapse multiple slashes into a single slash. */
6632 for (cp
= fn
; *cp
!= '\0'; cp
++, fn
++)
6636 while (cp
[1] == '/')
6642 #endif /* !DOS_NT */
6648 /* Initialize a linebuffer for use. */
6650 linebuffer_init (linebuffer
*lbp
)
6652 lbp
->size
= (DEBUG
) ? 3 : 200;
6653 lbp
->buffer
= xnew (lbp
->size
, char);
6654 lbp
->buffer
[0] = '\0';
6658 /* Set the minimum size of a string contained in a linebuffer. */
6660 linebuffer_setlen (linebuffer
*lbp
, int toksize
)
6662 while (lbp
->size
<= toksize
)
6665 xrnew (lbp
->buffer
, lbp
->size
, char);
6670 /* Like malloc but get fatal error if memory is exhausted. */
6672 xmalloc (size_t size
)
6674 void *result
= malloc (size
);
6676 fatal ("virtual memory exhausted", (char *)NULL
);
6681 xrealloc (void *ptr
, size_t size
)
6683 void *result
= realloc (ptr
, size
);
6685 fatal ("virtual memory exhausted", (char *)NULL
);
6691 * indent-tabs-mode: t
6694 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6695 * c-file-style: "gnu"
6699 /* etags.c ends here */