1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2015 Free Software
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version
[] = "@(#) pot revision number is 17.38.1.4";
88 # define NDEBUG /* disable assert */
94 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
97 /* WIN32_NATIVE is for XEmacs.
98 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
103 #endif /* WIN32_NATIVE */
108 # include <sys/param.h>
115 # define MAXPATHLEN _MAX_PATH
119 # define O_CLOEXEC O_NOINHERIT
120 #endif /* WINDOWSNT */
127 #include <sysstdio.h>
130 #include <binary-io.h>
132 #include <c-strcase.h>
136 # undef assert /* some systems have a buggy assert.h */
137 # define assert(x) ((void) 0)
143 /* Define CTAGS to make the program "ctags" compatible with the usual one.
144 Leave it undefined to make the program "etags", which makes emacs-style
145 tag tables and tags typedefs, #defines and struct/union/enum by default. */
154 streq (char const *s
, char const *t
)
156 return strcmp (s
, t
) == 0;
160 strcaseeq (char const *s
, char const *t
)
162 return c_strcasecmp (s
, t
) == 0;
166 strneq (char const *s
, char const *t
, size_t n
)
168 return strncmp (s
, t
, n
) == 0;
172 strncaseeq (char const *s
, char const *t
, size_t n
)
174 return c_strncasecmp (s
, t
, n
) == 0;
177 /* C is not in a name. */
179 notinname (unsigned char c
)
181 /* Look at make_tag before modifying! */
182 static bool const table
[UCHAR_MAX
+ 1] = {
183 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
184 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
189 /* C can start a token. */
191 begtoken (unsigned char c
)
193 static bool const table
[UCHAR_MAX
+ 1] = {
195 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
196 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
197 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
200 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
201 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
202 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
209 /* C can be in the middle of a token. */
211 intoken (unsigned char c
)
213 static bool const table
[UCHAR_MAX
+ 1] = {
215 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
216 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
217 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
218 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
219 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
222 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
223 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
224 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
230 /* C can end a token. */
232 endtoken (unsigned char c
)
234 static bool const table
[UCHAR_MAX
+ 1] = {
235 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
236 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
237 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
238 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
239 ['{']=1, ['|']=1, ['}']=1, ['~']=1
245 * xnew, xrnew -- allocate, reallocate storage
247 * SYNOPSIS: Type *xnew (int n, Type);
248 * void xrnew (OldPointer, int n, Type);
250 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
251 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
253 typedef void Lang_function (FILE *);
257 const char *suffix
; /* file name suffix for this compressor */
258 const char *command
; /* takes one arg and decompresses to stdout */
263 const char *name
; /* language name */
264 const char *help
; /* detailed help for the language */
265 Lang_function
*function
; /* parse function */
266 const char **suffixes
; /* name suffixes of this language's files */
267 const char **filenames
; /* names of this language's files */
268 const char **interpreters
; /* interpreters for this language */
269 bool metasource
; /* source used to generate other sources */
274 struct fdesc
*next
; /* for the linked list */
275 char *infname
; /* uncompressed input file name */
276 char *infabsname
; /* absolute uncompressed input file name */
277 char *infabsdir
; /* absolute dir of input file */
278 char *taggedfname
; /* file name to write in tagfile */
279 language
*lang
; /* language of file */
280 char *prop
; /* file properties to write in tagfile */
281 bool usecharno
; /* etags tags shall contain char number */
282 bool written
; /* entry written in the tags file */
285 typedef struct node_st
286 { /* sorting structure */
287 struct node_st
*left
, *right
; /* left and right sons */
288 fdesc
*fdp
; /* description of file to whom tag belongs */
289 char *name
; /* tag name */
290 char *regex
; /* search regexp */
291 bool valid
; /* write this tag on the tag file */
292 bool is_func
; /* function tag: use regexp in CTAGS mode */
293 bool been_warned
; /* warning already given for duplicated tag */
294 int lno
; /* line number tag is on */
295 long cno
; /* character number line starts on */
299 * A `linebuffer' is a structure which holds a line of text.
300 * `readline_internal' reads a line from a stream into a linebuffer
301 * and works regardless of the length of the line.
302 * SIZE is the size of BUFFER, LEN is the length of the string in
303 * BUFFER after readline reads it.
312 /* Used to support mixing of --lang and file names. */
316 at_language
, /* a language specification */
317 at_regexp
, /* a regular expression */
318 at_filename
, /* a file name */
319 at_stdin
, /* read from stdin here */
320 at_end
/* stop parsing the list */
321 } arg_type
; /* argument type */
322 language
*lang
; /* language associated with the argument */
323 char *what
; /* the argument itself */
326 /* Structure defining a regular expression. */
327 typedef struct regexp
329 struct regexp
*p_next
; /* pointer to next in list */
330 language
*lang
; /* if set, use only for this language */
331 char *pattern
; /* the regexp pattern */
332 char *name
; /* tag name */
333 struct re_pattern_buffer
*pat
; /* the compiled pattern */
334 struct re_registers regs
; /* re registers */
335 bool error_signaled
; /* already signaled for this regexp */
336 bool force_explicit_name
; /* do not allow implicit tag name */
337 bool ignore_case
; /* ignore case when matching */
338 bool multi_line
; /* do a multi-line match on the whole file */
342 /* Many compilers barf on this:
343 Lang_function Ada_funcs;
344 so let's write it this way */
345 static void Ada_funcs (FILE *);
346 static void Asm_labels (FILE *);
347 static void C_entries (int c_ext
, FILE *);
348 static void default_C_entries (FILE *);
349 static void plain_C_entries (FILE *);
350 static void Cjava_entries (FILE *);
351 static void Cobol_paragraphs (FILE *);
352 static void Cplusplus_entries (FILE *);
353 static void Cstar_entries (FILE *);
354 static void Erlang_functions (FILE *);
355 static void Forth_words (FILE *);
356 static void Fortran_functions (FILE *);
357 static void HTML_labels (FILE *);
358 static void Lisp_functions (FILE *);
359 static void Lua_functions (FILE *);
360 static void Makefile_targets (FILE *);
361 static void Pascal_functions (FILE *);
362 static void Perl_functions (FILE *);
363 static void PHP_functions (FILE *);
364 static void PS_functions (FILE *);
365 static void Prolog_functions (FILE *);
366 static void Python_functions (FILE *);
367 static void Scheme_functions (FILE *);
368 static void TeX_commands (FILE *);
369 static void Texinfo_nodes (FILE *);
370 static void Yacc_entries (FILE *);
371 static void just_read_file (FILE *);
373 static language
*get_language_from_langname (const char *);
374 static void readline (linebuffer
*, FILE *);
375 static long readline_internal (linebuffer
*, FILE *, char const *);
376 static bool nocase_tail (const char *);
377 static void get_tag (char *, char **);
379 static void analyze_regex (char *);
380 static void free_regexps (void);
381 static void regex_tag_multiline (void);
382 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
383 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
384 static _Noreturn
void suggest_asking_for_help (void);
385 static _Noreturn
void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
386 static _Noreturn
void pfatal (const char *);
387 static void add_node (node
*, node
**);
389 static void process_file_name (char *, language
*);
390 static void process_file (FILE *, char *, language
*);
391 static void find_entries (FILE *);
392 static void free_tree (node
*);
393 static void free_fdesc (fdesc
*);
394 static void pfnote (char *, bool, char *, int, int, long);
395 static void invalidate_nodes (fdesc
*, node
**);
396 static void put_entries (node
*);
398 static char *concat (const char *, const char *, const char *);
399 static char *skip_spaces (char *);
400 static char *skip_non_spaces (char *);
401 static char *skip_name (char *);
402 static char *savenstr (const char *, int);
403 static char *savestr (const char *);
404 static char *etags_getcwd (void);
405 static char *relative_filename (char *, char *);
406 static char *absolute_filename (char *, char *);
407 static char *absolute_dirname (char *, char *);
408 static bool filename_is_absolute (char *f
);
409 static void canonicalize_filename (char *);
410 static char *etags_mktmp (void);
411 static void linebuffer_init (linebuffer
*);
412 static void linebuffer_setlen (linebuffer
*, int);
413 static void *xmalloc (size_t);
414 static void *xrealloc (void *, size_t);
417 static char searchar
= '/'; /* use /.../ searches */
419 static char *tagfile
; /* output file */
420 static char *progname
; /* name this program was invoked with */
421 static char *cwd
; /* current working directory */
422 static char *tagfiledir
; /* directory of tagfile */
423 static FILE *tagf
; /* ioptr for tags file */
424 static ptrdiff_t whatlen_max
; /* maximum length of any 'what' member */
426 static fdesc
*fdhead
; /* head of file description list */
427 static fdesc
*curfdp
; /* current file description */
428 static char *infilename
; /* current input file name */
429 static int lineno
; /* line number of current line */
430 static long charno
; /* current character number */
431 static long linecharno
; /* charno of start of current line */
432 static char *dbp
; /* pointer to start of current tag */
434 static const int invalidcharno
= -1;
436 static node
*nodehead
; /* the head of the binary tree of tags */
437 static node
*last_node
; /* the last node created */
439 static linebuffer lb
; /* the current line */
440 static linebuffer filebuf
; /* a buffer containing the whole file */
441 static linebuffer token_name
; /* a buffer containing a tag name */
443 static bool append_to_tagfile
; /* -a: append to tags */
444 /* The next five default to true in C and derived languages. */
445 static bool typedefs
; /* -t: create tags for C and Ada typedefs */
446 static bool typedefs_or_cplusplus
; /* -T: create tags for C typedefs, level */
447 /* 0 struct/enum/union decls, and C++ */
448 /* member functions. */
449 static bool constantypedefs
; /* -d: create tags for C #define, enum */
450 /* constants and variables. */
451 /* -D: opposite of -d. Default under ctags. */
452 static int globals
; /* create tags for global variables */
453 static int members
; /* create tags for C member variables */
454 static int declarations
; /* --declarations: tag them and extern in C&Co*/
455 static int no_line_directive
; /* ignore #line directives (undocumented) */
456 static int no_duplicates
; /* no duplicate tags for ctags (undocumented) */
457 static bool update
; /* -u: update tags */
458 static bool vgrind_style
; /* -v: create vgrind style index output */
459 static bool no_warnings
; /* -w: suppress warnings (undocumented) */
460 static bool cxref_style
; /* -x: create cxref style output */
461 static bool cplusplus
; /* .[hc] means C++, not C (undocumented) */
462 static bool ignoreindent
; /* -I: ignore indentation in C */
463 static int packages_only
; /* --packages-only: in Ada, only tag packages*/
464 static int class_qualify
; /* -Q: produce class-qualified tags in C++/Java */
466 /* STDIN is defined in LynxOS system headers */
471 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
472 static bool parsing_stdin
; /* --parse-stdin used */
474 static regexp
*p_head
; /* list of all regexps */
475 static bool need_filebuf
; /* some regexes are multi-line */
477 static struct option longopts
[] =
479 { "append", no_argument
, NULL
, 'a' },
480 { "packages-only", no_argument
, &packages_only
, 1 },
481 { "c++", no_argument
, NULL
, 'C' },
482 { "declarations", no_argument
, &declarations
, 1 },
483 { "no-line-directive", no_argument
, &no_line_directive
, 1 },
484 { "no-duplicates", no_argument
, &no_duplicates
, 1 },
485 { "help", no_argument
, NULL
, 'h' },
486 { "help", no_argument
, NULL
, 'H' },
487 { "ignore-indentation", no_argument
, NULL
, 'I' },
488 { "language", required_argument
, NULL
, 'l' },
489 { "members", no_argument
, &members
, 1 },
490 { "no-members", no_argument
, &members
, 0 },
491 { "output", required_argument
, NULL
, 'o' },
492 { "class-qualify", no_argument
, &class_qualify
, 'Q' },
493 { "regex", required_argument
, NULL
, 'r' },
494 { "no-regex", no_argument
, NULL
, 'R' },
495 { "ignore-case-regex", required_argument
, NULL
, 'c' },
496 { "parse-stdin", required_argument
, NULL
, STDIN
},
497 { "version", no_argument
, NULL
, 'V' },
499 #if CTAGS /* Ctags options */
500 { "backward-search", no_argument
, NULL
, 'B' },
501 { "cxref", no_argument
, NULL
, 'x' },
502 { "defines", no_argument
, NULL
, 'd' },
503 { "globals", no_argument
, &globals
, 1 },
504 { "typedefs", no_argument
, NULL
, 't' },
505 { "typedefs-and-c++", no_argument
, NULL
, 'T' },
506 { "update", no_argument
, NULL
, 'u' },
507 { "vgrind", no_argument
, NULL
, 'v' },
508 { "no-warn", no_argument
, NULL
, 'w' },
510 #else /* Etags options */
511 { "no-defines", no_argument
, NULL
, 'D' },
512 { "no-globals", no_argument
, &globals
, 0 },
513 { "include", required_argument
, NULL
, 'i' },
518 static compressor compressors
[] =
520 { "z", "gzip -d -c"},
521 { "Z", "gzip -d -c"},
522 { "gz", "gzip -d -c"},
523 { "GZ", "gzip -d -c"},
524 { "bz2", "bzip2 -d -c" },
525 { "xz", "xz -d -c" },
534 static const char *Ada_suffixes
[] =
535 { "ads", "adb", "ada", NULL
};
536 static const char Ada_help
[] =
537 "In Ada code, functions, procedures, packages, tasks and types are\n\
538 tags. Use the '--packages-only' option to create tags for\n\
540 Ada tag names have suffixes indicating the type of entity:\n\
541 Entity type: Qualifier:\n\
542 ------------ ----------\n\
549 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
550 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
551 will just search for any tag 'bidule'.";
554 static const char *Asm_suffixes
[] =
555 { "a", /* Unix assembler */
556 "asm", /* Microcontroller assembly */
557 "def", /* BSO/Tasking definition includes */
558 "inc", /* Microcontroller include files */
559 "ins", /* Microcontroller include files */
560 "s", "sa", /* Unix assembler */
561 "S", /* cpp-processed Unix assembler */
562 "src", /* BSO/Tasking C compiler output */
565 static const char Asm_help
[] =
566 "In assembler code, labels appearing at the beginning of a line,\n\
567 followed by a colon, are tags.";
570 /* Note that .c and .h can be considered C++, if the --c++ flag was
571 given, or if the `class' or `template' keywords are met inside the file.
572 That is why default_C_entries is called for these. */
573 static const char *default_C_suffixes
[] =
575 #if CTAGS /* C help for Ctags */
576 static const char default_C_help
[] =
577 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
578 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
579 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
580 Use --globals to tag global variables.\n\
581 You can tag function declarations and external variables by\n\
582 using '--declarations', and struct members by using '--members'.";
583 #else /* C help for Etags */
584 static const char default_C_help
[] =
585 "In C code, any C function or typedef is a tag, and so are\n\
586 definitions of 'struct', 'union' and 'enum'. '#define' macro\n\
587 definitions and 'enum' constants are tags unless you specify\n\
588 '--no-defines'. Global variables are tags unless you specify\n\
589 '--no-globals' and so are struct members unless you specify\n\
590 '--no-members'. Use of '--no-globals', '--no-defines' and\n\
591 '--no-members' can make the tags table file much smaller.\n\
592 You can tag function declarations and external variables by\n\
593 using '--declarations'.";
594 #endif /* C help for Ctags and Etags */
596 static const char *Cplusplus_suffixes
[] =
597 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
598 "M", /* Objective C++ */
599 "pdb", /* PostScript with C syntax */
601 static const char Cplusplus_help
[] =
602 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
603 --help --lang=c --lang=c++ for full help.)\n\
604 In addition to C tags, member functions are also recognized. Member\n\
605 variables are recognized unless you use the '--no-members' option.\n\
606 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
607 and 'CLASS::FUNCTION'. 'operator' definitions have tag names like\n\
610 static const char *Cjava_suffixes
[] =
612 static char Cjava_help
[] =
613 "In Java code, all the tags constructs of C and C++ code are\n\
614 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
617 static const char *Cobol_suffixes
[] =
618 { "COB", "cob", NULL
};
619 static char Cobol_help
[] =
620 "In Cobol code, tags are paragraph names; that is, any word\n\
621 starting in column 8 and followed by a period.";
623 static const char *Cstar_suffixes
[] =
624 { "cs", "hs", NULL
};
626 static const char *Erlang_suffixes
[] =
627 { "erl", "hrl", NULL
};
628 static const char Erlang_help
[] =
629 "In Erlang code, the tags are the functions, records and macros\n\
630 defined in the file.";
632 const char *Forth_suffixes
[] =
633 { "fth", "tok", NULL
};
634 static const char Forth_help
[] =
635 "In Forth code, tags are words defined by ':',\n\
636 constant, code, create, defer, value, variable, buffer:, field.";
638 static const char *Fortran_suffixes
[] =
639 { "F", "f", "f90", "for", NULL
};
640 static const char Fortran_help
[] =
641 "In Fortran code, functions, subroutines and block data are tags.";
643 static const char *HTML_suffixes
[] =
644 { "htm", "html", "shtml", NULL
};
645 static const char HTML_help
[] =
646 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
647 'h3' headers. Also, tags are 'name=' in anchors and all\n\
648 occurrences of 'id='.";
650 static const char *Lisp_suffixes
[] =
651 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL
};
652 static const char Lisp_help
[] =
653 "In Lisp code, any function defined with 'defun', any variable\n\
654 defined with 'defvar' or 'defconst', and in general the first\n\
655 argument of any expression that starts with '(def' in column zero\n\
657 The '--declarations' option tags \"(defvar foo)\" constructs too.";
659 static const char *Lua_suffixes
[] =
660 { "lua", "LUA", NULL
};
661 static const char Lua_help
[] =
662 "In Lua scripts, all functions are tags.";
664 static const char *Makefile_filenames
[] =
665 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL
};
666 static const char Makefile_help
[] =
667 "In makefiles, targets are tags; additionally, variables are tags\n\
668 unless you specify '--no-globals'.";
670 static const char *Objc_suffixes
[] =
671 { "lm", /* Objective lex file */
672 "m", /* Objective C file */
674 static const char Objc_help
[] =
675 "In Objective C code, tags include Objective C definitions for classes,\n\
676 class categories, methods and protocols. Tags for variables and\n\
677 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\n\
678 (Use --help --lang=c --lang=objc --lang=java for full help.)";
680 static const char *Pascal_suffixes
[] =
681 { "p", "pas", NULL
};
682 static const char Pascal_help
[] =
683 "In Pascal code, the tags are the functions and procedures defined\n\
685 /* " // this is for working around an Emacs highlighting bug... */
687 static const char *Perl_suffixes
[] =
688 { "pl", "pm", NULL
};
689 static const char *Perl_interpreters
[] =
690 { "perl", "@PERL@", NULL
};
691 static const char Perl_help
[] =
692 "In Perl code, the tags are the packages, subroutines and variables\n\
693 defined by the 'package', 'sub', 'my' and 'local' keywords. Use\n\
694 '--globals' if you want to tag global variables. Tags for\n\
695 subroutines are named 'PACKAGE::SUB'. The name for subroutines\n\
696 defined in the default package is 'main::SUB'.";
698 static const char *PHP_suffixes
[] =
699 { "php", "php3", "php4", NULL
};
700 static const char PHP_help
[] =
701 "In PHP code, tags are functions, classes and defines. Unless you use\n\
702 the '--no-members' option, vars are tags too.";
704 static const char *plain_C_suffixes
[] =
705 { "pc", /* Pro*C file */
708 static const char *PS_suffixes
[] =
709 { "ps", "psw", NULL
}; /* .psw is for PSWrap */
710 static const char PS_help
[] =
711 "In PostScript code, the tags are the functions.";
713 static const char *Prolog_suffixes
[] =
715 static const char Prolog_help
[] =
716 "In Prolog code, tags are predicates and rules at the beginning of\n\
719 static const char *Python_suffixes
[] =
721 static const char Python_help
[] =
722 "In Python code, 'def' or 'class' at the beginning of a line\n\
725 /* Can't do the `SCM' or `scm' prefix with a version number. */
726 static const char *Scheme_suffixes
[] =
727 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL
};
728 static const char Scheme_help
[] =
729 "In Scheme code, tags include anything defined with 'def' or with a\n\
730 construct whose name starts with 'def'. They also include\n\
731 variables set with 'set!' at top level in the file.";
733 static const char *TeX_suffixes
[] =
734 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL
};
735 static const char TeX_help
[] =
736 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
737 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
738 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
739 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
740 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
742 Other commands can be specified by setting the environment variable\n\
743 'TEXTAGS' to a colon-separated list like, for example,\n\
744 TEXTAGS=\"mycommand:myothercommand\".";
747 static const char *Texinfo_suffixes
[] =
748 { "texi", "texinfo", "txi", NULL
};
749 static const char Texinfo_help
[] =
750 "for texinfo files, lines starting with @node are tagged.";
752 static const char *Yacc_suffixes
[] =
753 { "y", "y++", "ym", "yxx", "yy", NULL
}; /* .ym is Objective yacc file */
754 static const char Yacc_help
[] =
755 "In Bison or Yacc input files, each rule defines as a tag the\n\
756 nonterminal it constructs. The portions of the file that contain\n\
757 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
760 static const char auto_help
[] =
761 "'auto' is not a real language, it indicates to use\n\
762 a default language for files base on file name suffix and file contents.";
764 static const char none_help
[] =
765 "'none' is not a real language, it indicates to only do\n\
766 regexp processing on files.";
768 static const char no_lang_help
[] =
769 "No detailed help available for this language.";
773 * Table of languages.
775 * It is ok for a given function to be listed under more than one
776 * name. I just didn't.
779 static language lang_names
[] =
781 { "ada", Ada_help
, Ada_funcs
, Ada_suffixes
},
782 { "asm", Asm_help
, Asm_labels
, Asm_suffixes
},
783 { "c", default_C_help
, default_C_entries
, default_C_suffixes
},
784 { "c++", Cplusplus_help
, Cplusplus_entries
, Cplusplus_suffixes
},
785 { "c*", no_lang_help
, Cstar_entries
, Cstar_suffixes
},
786 { "cobol", Cobol_help
, Cobol_paragraphs
, Cobol_suffixes
},
787 { "erlang", Erlang_help
, Erlang_functions
, Erlang_suffixes
},
788 { "forth", Forth_help
, Forth_words
, Forth_suffixes
},
789 { "fortran", Fortran_help
, Fortran_functions
, Fortran_suffixes
},
790 { "html", HTML_help
, HTML_labels
, HTML_suffixes
},
791 { "java", Cjava_help
, Cjava_entries
, Cjava_suffixes
},
792 { "lisp", Lisp_help
, Lisp_functions
, Lisp_suffixes
},
793 { "lua", Lua_help
, Lua_functions
, Lua_suffixes
},
794 { "makefile", Makefile_help
,Makefile_targets
,NULL
,Makefile_filenames
},
795 { "objc", Objc_help
, plain_C_entries
, Objc_suffixes
},
796 { "pascal", Pascal_help
, Pascal_functions
, Pascal_suffixes
},
797 { "perl",Perl_help
,Perl_functions
,Perl_suffixes
,NULL
,Perl_interpreters
},
798 { "php", PHP_help
, PHP_functions
, PHP_suffixes
},
799 { "postscript",PS_help
, PS_functions
, PS_suffixes
},
800 { "proc", no_lang_help
, plain_C_entries
, plain_C_suffixes
},
801 { "prolog", Prolog_help
, Prolog_functions
, Prolog_suffixes
},
802 { "python", Python_help
, Python_functions
, Python_suffixes
},
803 { "scheme", Scheme_help
, Scheme_functions
, Scheme_suffixes
},
804 { "tex", TeX_help
, TeX_commands
, TeX_suffixes
},
805 { "texinfo", Texinfo_help
, Texinfo_nodes
, Texinfo_suffixes
},
806 { "yacc", Yacc_help
,Yacc_entries
,Yacc_suffixes
,NULL
,NULL
,true},
807 { "auto", auto_help
}, /* default guessing scheme */
808 { "none", none_help
, just_read_file
}, /* regexp matching only */
809 { NULL
} /* end of list */
814 print_language_names (void)
817 const char **name
, **ext
;
819 puts ("\nThese are the currently supported languages, along with the\n\
820 default file names and dot suffixes:");
821 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
823 printf (" %-*s", 10, lang
->name
);
824 if (lang
->filenames
!= NULL
)
825 for (name
= lang
->filenames
; *name
!= NULL
; name
++)
826 printf (" %s", *name
);
827 if (lang
->suffixes
!= NULL
)
828 for (ext
= lang
->suffixes
; *ext
!= NULL
; ext
++)
829 printf (" .%s", *ext
);
832 puts ("where 'auto' means use default language for files based on file\n\
833 name suffix, and 'none' means only do regexp processing on files.\n\
834 If no language is specified and no matching suffix is found,\n\
835 the first line of the file is read for a sharp-bang (#!) sequence\n\
836 followed by the name of an interpreter. If no such sequence is found,\n\
837 Fortran is tried first; if no tags are found, C is tried next.\n\
838 When parsing any C file, a \"class\" or \"template\" keyword\n\
840 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
842 For detailed help on a given language use, for example,\n\
843 etags --help --lang=ada.");
847 # define EMACS_NAME "standalone"
850 # define VERSION "17.38.1.4"
852 static _Noreturn
void
855 char emacs_copyright
[] = COPYRIGHT
;
857 printf ("%s (%s %s)\n", (CTAGS
) ? "ctags" : "etags", EMACS_NAME
, VERSION
);
858 puts (emacs_copyright
);
859 puts ("This program is distributed under the terms in ETAGS.README");
864 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
865 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
868 static _Noreturn
void
869 print_help (argument
*argbuffer
)
871 bool help_for_lang
= false;
873 for (; argbuffer
->arg_type
!= at_end
; argbuffer
++)
874 if (argbuffer
->arg_type
== at_language
)
878 puts (argbuffer
->lang
->help
);
879 help_for_lang
= true;
885 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
887 These are the options accepted by %s.\n", progname
, progname
);
888 puts ("You may use unambiguous abbreviations for the long option names.");
889 puts (" A - as file name means read names from stdin (one per line).\n\
890 Absolute names are stored in the output file as they are.\n\
891 Relative ones are stored relative to the output file's directory.\n");
893 puts ("-a, --append\n\
894 Append tag entries to existing tags file.");
896 puts ("--packages-only\n\
897 For Ada files, only generate tags for packages.");
900 puts ("-B, --backward-search\n\
901 Write the search commands for the tag entries using '?', the\n\
902 backward-search command instead of '/', the forward-search command.");
904 /* This option is mostly obsolete, because etags can now automatically
905 detect C++. Retained for backward compatibility and for debugging and
906 experimentation. In principle, we could want to tag as C++ even
907 before any "class" or "template" keyword.
909 Treat files whose name suffix defaults to C language as C++ files.");
912 puts ("--declarations\n\
913 In C and derived languages, create tags for function declarations,");
915 puts ("\tand create tags for extern variables if --globals is used.");
918 ("\tand create tags for extern variables unless --no-globals is used.");
921 puts ("-d, --defines\n\
922 Create tag entries for C #define constants and enum constants, too.");
924 puts ("-D, --no-defines\n\
925 Don't create tag entries for C #define constants and enum constants.\n\
926 This makes the tags file smaller.");
929 puts ("-i FILE, --include=FILE\n\
930 Include a note in tag file indicating that, when searching for\n\
931 a tag, one should also consult the tags file FILE after\n\
932 checking the current file.");
934 puts ("-l LANG, --language=LANG\n\
935 Force the following files to be considered as written in the\n\
936 named language up to the next --language=LANG option.");
940 Create tag entries for global variables in some languages.");
942 puts ("--no-globals\n\
943 Do not create tag entries for global variables in some\n\
944 languages. This makes the tags file smaller.");
946 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
947 puts ("--no-line-directive\n\
948 Ignore #line preprocessor directives in C and derived languages.");
952 Create tag entries for members of structures in some languages.");
954 puts ("--no-members\n\
955 Do not create tag entries for members of structures\n\
956 in some languages.");
958 puts ("-Q, --class-qualify\n\
959 Qualify tag names with their class name in C++, ObjC, and Java.\n\
960 This produces tag names of the form \"class::member\" for C++,\n\
961 \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
962 For Objective C, this also produces class methods qualified with\n\
963 their arguments, as in \"foo:bar:baz:more\".");
964 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
965 Make a tag for each line matching a regular expression pattern\n\
966 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
967 files only. REGEXFILE is a file containing one REGEXP per line.\n\
968 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
969 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
970 puts (" If TAGNAME/ is present, the tags created are named.\n\
971 For example Tcl named tags can be created with:\n\
972 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
973 MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
974 'm' means to allow multi-line matches, 's' implies 'm' and\n\
975 causes dot to match any character, including newline.");
977 puts ("-R, --no-regex\n\
978 Don't create tags from regexps for the following files.");
980 puts ("-I, --ignore-indentation\n\
981 In C and C++ do not assume that a closing brace in the first\n\
982 column is the final brace of a function or structure definition.");
984 puts ("-o FILE, --output=FILE\n\
985 Write the tags to FILE.");
987 puts ("--parse-stdin=NAME\n\
988 Read from standard input and record tags as belonging to file NAME.");
992 puts ("-t, --typedefs\n\
993 Generate tag entries for C and Ada typedefs.");
994 puts ("-T, --typedefs-and-c++\n\
995 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
996 and C++ member functions.");
1000 puts ("-u, --update\n\
1001 Update the tag entries for the given files, leaving tag\n\
1002 entries for other files in place. Currently, this is\n\
1003 implemented by deleting the existing entries for the given\n\
1004 files and then rewriting the new entries at the end of the\n\
1005 tags file. It is often faster to simply rebuild the entire\n\
1006 tag file than to use this.");
1010 puts ("-v, --vgrind\n\
1011 Print on the standard output an index of items intended for\n\
1012 human consumption, similar to the output of vgrind. The index\n\
1013 is sorted, and gives the page number of each item.");
1015 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
1016 puts ("-w, --no-duplicates\n\
1017 Do not create duplicate tag entries, for compatibility with\n\
1018 traditional ctags.");
1020 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
1021 puts ("-w, --no-warn\n\
1022 Suppress warning messages about duplicate tag entries.");
1024 puts ("-x, --cxref\n\
1025 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1026 The output uses line numbers instead of page numbers, but\n\
1027 beyond that the differences are cosmetic; try both to see\n\
1031 puts ("-V, --version\n\
1032 Print the version of the program.\n\
1034 Print this help message.\n\
1035 Followed by one or more '--language' options prints detailed\n\
1036 help about tag generation for the specified languages.");
1038 print_language_names ();
1041 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1043 exit (EXIT_SUCCESS
);
1048 main (int argc
, char **argv
)
1051 unsigned int nincluded_files
;
1052 char **included_files
;
1053 argument
*argbuffer
;
1054 int current_arg
, file_count
;
1055 linebuffer filename_lb
;
1056 bool help_asked
= false;
1062 nincluded_files
= 0;
1063 included_files
= xnew (argc
, char *);
1067 /* Allocate enough no matter what happens. Overkill, but each one
1069 argbuffer
= xnew (argc
, argument
);
1072 * Always find typedefs and structure tags.
1073 * Also default to find macro constants, enum constants, struct
1074 * members and global variables. Do it for both etags and ctags.
1076 typedefs
= typedefs_or_cplusplus
= constantypedefs
= true;
1077 globals
= members
= true;
1079 /* When the optstring begins with a '-' getopt_long does not rearrange the
1080 non-options arguments to be at the end, but leaves them alone. */
1081 optstring
= concat ("-ac:Cf:Il:o:Qr:RSVhH",
1082 (CTAGS
) ? "BxdtTuvw" : "Di:",
1085 while ((opt
= getopt_long (argc
, argv
, optstring
, longopts
, NULL
)) != EOF
)
1089 /* If getopt returns 0, then it has already processed a
1090 long-named option. We should do nothing. */
1094 /* This means that a file name has been seen. Record it. */
1095 argbuffer
[current_arg
].arg_type
= at_filename
;
1096 argbuffer
[current_arg
].what
= optarg
;
1097 len
= strlen (optarg
);
1098 if (whatlen_max
< len
)
1105 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1106 argbuffer
[current_arg
].arg_type
= at_stdin
;
1107 argbuffer
[current_arg
].what
= optarg
;
1108 len
= strlen (optarg
);
1109 if (whatlen_max
< len
)
1114 fatal ("cannot parse standard input more than once");
1115 parsing_stdin
= true;
1118 /* Common options. */
1119 case 'a': append_to_tagfile
= true; break;
1120 case 'C': cplusplus
= true; break;
1121 case 'f': /* for compatibility with old makefiles */
1125 error ("-o option may only be given once.");
1126 suggest_asking_for_help ();
1132 case 'S': /* for backward compatibility */
1133 ignoreindent
= true;
1137 language
*lang
= get_language_from_langname (optarg
);
1140 argbuffer
[current_arg
].lang
= lang
;
1141 argbuffer
[current_arg
].arg_type
= at_language
;
1147 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1148 optarg
= concat (optarg
, "i", ""); /* memory leak here */
1151 argbuffer
[current_arg
].arg_type
= at_regexp
;
1152 argbuffer
[current_arg
].what
= optarg
;
1153 len
= strlen (optarg
);
1154 if (whatlen_max
< len
)
1159 argbuffer
[current_arg
].arg_type
= at_regexp
;
1160 argbuffer
[current_arg
].what
= NULL
;
1175 case 'D': constantypedefs
= false; break;
1176 case 'i': included_files
[nincluded_files
++] = optarg
; break;
1178 /* Ctags options. */
1179 case 'B': searchar
= '?'; break;
1180 case 'd': constantypedefs
= true; break;
1181 case 't': typedefs
= true; break;
1182 case 'T': typedefs
= typedefs_or_cplusplus
= true; break;
1183 case 'u': update
= true; break;
1184 case 'v': vgrind_style
= true; /*FALLTHRU*/
1185 case 'x': cxref_style
= true; break;
1186 case 'w': no_warnings
= true; break;
1188 suggest_asking_for_help ();
1192 /* No more options. Store the rest of arguments. */
1193 for (; optind
< argc
; optind
++)
1195 argbuffer
[current_arg
].arg_type
= at_filename
;
1196 argbuffer
[current_arg
].what
= argv
[optind
];
1197 len
= strlen (argv
[optind
]);
1198 if (whatlen_max
< len
)
1204 argbuffer
[current_arg
].arg_type
= at_end
;
1207 print_help (argbuffer
);
1210 if (nincluded_files
== 0 && file_count
== 0)
1212 error ("no input files specified.");
1213 suggest_asking_for_help ();
1217 if (tagfile
== NULL
)
1218 tagfile
= savestr (CTAGS
? "tags" : "TAGS");
1219 cwd
= etags_getcwd (); /* the current working directory */
1220 if (cwd
[strlen (cwd
) - 1] != '/')
1223 cwd
= concat (oldcwd
, "/", "");
1227 /* Compute base directory for relative file names. */
1228 if (streq (tagfile
, "-")
1229 || strneq (tagfile
, "/dev/", 5))
1230 tagfiledir
= cwd
; /* relative file names are relative to cwd */
1233 canonicalize_filename (tagfile
);
1234 tagfiledir
= absolute_dirname (tagfile
, cwd
);
1237 linebuffer_init (&lb
);
1238 linebuffer_init (&filename_lb
);
1239 linebuffer_init (&filebuf
);
1240 linebuffer_init (&token_name
);
1244 if (streq (tagfile
, "-"))
1247 SET_BINARY (fileno (stdout
));
1250 tagf
= fopen (tagfile
, append_to_tagfile
? "ab" : "wb");
1256 * Loop through files finding functions.
1258 for (i
= 0; i
< current_arg
; i
++)
1260 static language
*lang
; /* non-NULL if language is forced */
1263 switch (argbuffer
[i
].arg_type
)
1266 lang
= argbuffer
[i
].lang
;
1269 analyze_regex (argbuffer
[i
].what
);
1272 this_file
= argbuffer
[i
].what
;
1273 /* Input file named "-" means read file names from stdin
1274 (one per line) and use them. */
1275 if (streq (this_file
, "-"))
1278 fatal ("cannot parse standard input "
1279 "AND read file names from it");
1280 while (readline_internal (&filename_lb
, stdin
, "-") > 0)
1281 process_file_name (filename_lb
.buffer
, lang
);
1284 process_file_name (this_file
, lang
);
1287 this_file
= argbuffer
[i
].what
;
1288 process_file (stdin
, this_file
, lang
);
1291 error ("internal error: arg_type");
1297 free (filebuf
.buffer
);
1298 free (token_name
.buffer
);
1300 if (!CTAGS
|| cxref_style
)
1302 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1303 put_entries (nodehead
);
1304 free_tree (nodehead
);
1310 /* Output file entries that have no tags. */
1311 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
1313 fprintf (tagf
, "\f\n%s,0\n", fdp
->taggedfname
);
1315 while (nincluded_files
-- > 0)
1316 fprintf (tagf
, "\f\n%s,include\n", *included_files
++);
1318 if (fclose (tagf
) == EOF
)
1322 exit (EXIT_SUCCESS
);
1325 /* From here on, we are in (CTAGS && !cxref_style) */
1329 xmalloc (strlen (tagfile
) + whatlen_max
+
1330 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1331 for (i
= 0; i
< current_arg
; ++i
)
1333 switch (argbuffer
[i
].arg_type
)
1339 continue; /* the for loop */
1341 char *z
= stpcpy (cmd
, "mv ");
1342 z
= stpcpy (z
, tagfile
);
1343 z
= stpcpy (z
, " OTAGS;fgrep -v '\t");
1344 z
= stpcpy (z
, argbuffer
[i
].what
);
1345 z
= stpcpy (z
, "\t' OTAGS >");
1346 z
= stpcpy (z
, tagfile
);
1347 strcpy (z
, ";rm OTAGS");
1348 if (system (cmd
) != EXIT_SUCCESS
)
1349 fatal ("failed to execute shell command");
1352 append_to_tagfile
= true;
1355 tagf
= fopen (tagfile
, append_to_tagfile
? "ab" : "wb");
1358 put_entries (nodehead
); /* write all the tags (CTAGS) */
1359 free_tree (nodehead
);
1361 if (fclose (tagf
) == EOF
)
1365 if (append_to_tagfile
|| update
)
1367 char *cmd
= xmalloc (2 * strlen (tagfile
) + sizeof "sort -u -o..");
1368 /* Maybe these should be used:
1369 setenv ("LC_COLLATE", "C", 1);
1370 setenv ("LC_ALL", "C", 1); */
1371 char *z
= stpcpy (cmd
, "sort -u -o ");
1372 z
= stpcpy (z
, tagfile
);
1374 strcpy (z
, tagfile
);
1375 exit (system (cmd
));
1377 return EXIT_SUCCESS
;
1382 * Return a compressor given the file name. If EXTPTR is non-zero,
1383 * return a pointer into FILE where the compressor-specific
1384 * extension begins. If no compressor is found, NULL is returned
1385 * and EXTPTR is not significant.
1386 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1389 get_compressor_from_suffix (char *file
, char **extptr
)
1392 char *slash
, *suffix
;
1394 /* File has been processed by canonicalize_filename,
1395 so we don't need to consider backslashes on DOS_NT. */
1396 slash
= strrchr (file
, '/');
1397 suffix
= strrchr (file
, '.');
1398 if (suffix
== NULL
|| suffix
< slash
)
1403 /* Let those poor souls who live with DOS 8+3 file name limits get
1404 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1405 Only the first do loop is run if not MSDOS */
1408 for (compr
= compressors
; compr
->suffix
!= NULL
; compr
++)
1409 if (streq (compr
->suffix
, suffix
))
1412 break; /* do it only once: not really a loop */
1415 } while (*suffix
!= '\0');
1422 * Return a language given the name.
1425 get_language_from_langname (const char *name
)
1430 error ("empty language name");
1433 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1434 if (streq (name
, lang
->name
))
1436 error ("unknown language \"%s\"", name
);
1444 * Return a language given the interpreter name.
1447 get_language_from_interpreter (char *interpreter
)
1452 if (interpreter
== NULL
)
1454 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1455 if (lang
->interpreters
!= NULL
)
1456 for (iname
= lang
->interpreters
; *iname
!= NULL
; iname
++)
1457 if (streq (*iname
, interpreter
))
1466 * Return a language given the file name.
1469 get_language_from_filename (char *file
, int case_sensitive
)
1472 const char **name
, **ext
, *suffix
;
1474 /* Try whole file name first. */
1475 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1476 if (lang
->filenames
!= NULL
)
1477 for (name
= lang
->filenames
; *name
!= NULL
; name
++)
1478 if ((case_sensitive
)
1479 ? streq (*name
, file
)
1480 : strcaseeq (*name
, file
))
1483 /* If not found, try suffix after last dot. */
1484 suffix
= strrchr (file
, '.');
1488 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1489 if (lang
->suffixes
!= NULL
)
1490 for (ext
= lang
->suffixes
; *ext
!= NULL
; ext
++)
1491 if ((case_sensitive
)
1492 ? streq (*ext
, suffix
)
1493 : strcaseeq (*ext
, suffix
))
1500 * This routine is called on each file argument.
1503 process_file_name (char *file
, language
*lang
)
1508 char *compressed_name
, *uncompressed_name
;
1509 char *ext
, *real_name
, *tmp_name
;
1512 canonicalize_filename (file
);
1513 if (streq (file
, tagfile
) && !streq (tagfile
, "-"))
1515 error ("skipping inclusion of %s in self.", file
);
1518 compr
= get_compressor_from_suffix (file
, &ext
);
1521 compressed_name
= file
;
1522 uncompressed_name
= savenstr (file
, ext
- file
);
1526 compressed_name
= NULL
;
1527 uncompressed_name
= file
;
1530 /* If the canonicalized uncompressed name
1531 has already been dealt with, skip it silently. */
1532 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
1534 assert (fdp
->infname
!= NULL
);
1535 if (streq (uncompressed_name
, fdp
->infname
))
1539 inf
= fopen (file
, "r" FOPEN_BINARY
);
1544 int file_errno
= errno
;
1545 if (compressed_name
)
1547 /* Try with the given suffix. */
1548 inf
= fopen (uncompressed_name
, "r" FOPEN_BINARY
);
1550 real_name
= uncompressed_name
;
1554 /* Try all possible suffixes. */
1555 for (compr
= compressors
; compr
->suffix
!= NULL
; compr
++)
1557 compressed_name
= concat (file
, ".", compr
->suffix
);
1558 inf
= fopen (compressed_name
, "r" FOPEN_BINARY
);
1561 real_name
= compressed_name
;
1566 char *suf
= compressed_name
+ strlen (file
);
1567 size_t suflen
= strlen (compr
->suffix
) + 1;
1568 for ( ; suf
[1]; suf
++, suflen
--)
1570 memmove (suf
, suf
+ 1, suflen
);
1571 inf
= fopen (compressed_name
, "r" FOPEN_BINARY
);
1574 real_name
= compressed_name
;
1581 free (compressed_name
);
1582 compressed_name
= NULL
;
1593 if (real_name
== compressed_name
)
1596 tmp_name
= etags_mktmp ();
1601 #if MSDOS || defined (DOS_NT)
1602 char *cmd1
= concat (compr
->command
, " \"", real_name
);
1603 char *cmd
= concat (cmd1
, "\" > ", tmp_name
);
1605 char *cmd1
= concat (compr
->command
, " '", real_name
);
1606 char *cmd
= concat (cmd1
, "' > ", tmp_name
);
1610 if (system (cmd
) == -1)
1617 inf
= fopen (tmp_name
, "r" FOPEN_BINARY
);
1631 process_file (inf
, uncompressed_name
, lang
);
1633 retval
= fclose (inf
);
1634 if (real_name
== compressed_name
)
1643 if (compressed_name
!= file
)
1644 free (compressed_name
);
1645 if (uncompressed_name
!= file
)
1646 free (uncompressed_name
);
1653 process_file (FILE *fh
, char *fn
, language
*lang
)
1655 static const fdesc emptyfdesc
;
1659 /* Create a new input file description entry. */
1660 fdp
= xnew (1, fdesc
);
1663 fdp
->infname
= savestr (fn
);
1665 fdp
->infabsname
= absolute_filename (fn
, cwd
);
1666 fdp
->infabsdir
= absolute_dirname (fn
, cwd
);
1667 if (filename_is_absolute (fn
))
1669 /* An absolute file name. Canonicalize it. */
1670 fdp
->taggedfname
= absolute_filename (fn
, NULL
);
1674 /* A file name relative to cwd. Make it relative
1675 to the directory of the tags file. */
1676 fdp
->taggedfname
= relative_filename (fn
, tagfiledir
);
1678 fdp
->usecharno
= true; /* use char position when making tags */
1680 fdp
->written
= false; /* not written on tags file yet */
1683 curfdp
= fdhead
; /* the current file description */
1687 /* If not Ctags, and if this is not metasource and if it contained no #line
1688 directives, we can write the tags and free all nodes pointing to
1691 && curfdp
->usecharno
/* no #line directives in this file */
1692 && !curfdp
->lang
->metasource
)
1696 /* Look for the head of the sublist relative to this file. See add_node
1697 for the structure of the node tree. */
1699 for (np
= nodehead
; np
!= NULL
; prev
= np
, np
= np
->left
)
1700 if (np
->fdp
== curfdp
)
1703 /* If we generated tags for this file, write and delete them. */
1706 /* This is the head of the last sublist, if any. The following
1707 instructions depend on this being true. */
1708 assert (np
->left
== NULL
);
1710 assert (fdhead
== curfdp
);
1711 assert (last_node
->fdp
== curfdp
);
1712 put_entries (np
); /* write tags for file curfdp->taggedfname */
1713 free_tree (np
); /* remove the written nodes */
1715 nodehead
= NULL
; /* no nodes left */
1717 prev
->left
= NULL
; /* delete the pointer to the sublist */
1723 reset_input (FILE *inf
)
1725 if (fseek (inf
, 0, SEEK_SET
) != 0)
1726 perror (infilename
);
1730 * This routine opens the specified file and calls the function
1731 * which finds the function and type definitions.
1734 find_entries (FILE *inf
)
1737 language
*lang
= curfdp
->lang
;
1738 Lang_function
*parser
= NULL
;
1740 /* If user specified a language, use it. */
1741 if (lang
!= NULL
&& lang
->function
!= NULL
)
1743 parser
= lang
->function
;
1746 /* Else try to guess the language given the file name. */
1749 lang
= get_language_from_filename (curfdp
->infname
, true);
1750 if (lang
!= NULL
&& lang
->function
!= NULL
)
1752 curfdp
->lang
= lang
;
1753 parser
= lang
->function
;
1757 /* Else look for sharp-bang as the first two characters. */
1759 && readline_internal (&lb
, inf
, infilename
) > 0
1761 && lb
.buffer
[0] == '#'
1762 && lb
.buffer
[1] == '!')
1766 /* Set lp to point at the first char after the last slash in the
1767 line or, if no slashes, at the first nonblank. Then set cp to
1768 the first successive blank and terminate the string. */
1769 lp
= strrchr (lb
.buffer
+2, '/');
1773 lp
= skip_spaces (lb
.buffer
+ 2);
1774 cp
= skip_non_spaces (lp
);
1777 if (strlen (lp
) > 0)
1779 lang
= get_language_from_interpreter (lp
);
1780 if (lang
!= NULL
&& lang
->function
!= NULL
)
1782 curfdp
->lang
= lang
;
1783 parser
= lang
->function
;
1790 /* Else try to guess the language given the case insensitive file name. */
1793 lang
= get_language_from_filename (curfdp
->infname
, false);
1794 if (lang
!= NULL
&& lang
->function
!= NULL
)
1796 curfdp
->lang
= lang
;
1797 parser
= lang
->function
;
1801 /* Else try Fortran or C. */
1804 node
*old_last_node
= last_node
;
1806 curfdp
->lang
= get_language_from_langname ("fortran");
1809 if (old_last_node
== last_node
)
1810 /* No Fortran entries found. Try C. */
1813 curfdp
->lang
= get_language_from_langname (cplusplus
? "c++" : "c");
1819 if (!no_line_directive
1820 && curfdp
->lang
!= NULL
&& curfdp
->lang
->metasource
)
1821 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1822 file, or anyway we parsed a file that is automatically generated from
1823 this one. If this is the case, the bingo.c file contained #line
1824 directives that generated tags pointing to this file. Let's delete
1825 them all before parsing this file, which is the real source. */
1827 fdesc
**fdpp
= &fdhead
;
1828 while (*fdpp
!= NULL
)
1830 && streq ((*fdpp
)->taggedfname
, curfdp
->taggedfname
))
1831 /* We found one of those! We must delete both the file description
1832 and all tags referring to it. */
1834 fdesc
*badfdp
= *fdpp
;
1836 /* Delete the tags referring to badfdp->taggedfname
1837 that were obtained from badfdp->infname. */
1838 invalidate_nodes (badfdp
, &nodehead
);
1840 *fdpp
= badfdp
->next
; /* remove the bad description from the list */
1841 free_fdesc (badfdp
);
1844 fdpp
= &(*fdpp
)->next
; /* advance the list pointer */
1847 assert (parser
!= NULL
);
1849 /* Generic initializations before reading from file. */
1850 linebuffer_setlen (&filebuf
, 0); /* reset the file buffer */
1852 /* Generic initializations before parsing file with readline. */
1853 lineno
= 0; /* reset global line number */
1854 charno
= 0; /* reset global char number */
1855 linecharno
= 0; /* reset global char number of line start */
1859 regex_tag_multiline ();
1864 * Check whether an implicitly named tag should be created,
1865 * then call `pfnote'.
1866 * NAME is a string that is internally copied by this function.
1868 * TAGS format specification
1869 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1870 * The following is explained in some more detail in etc/ETAGS.EBNF.
1872 * make_tag creates tags with "implicit tag names" (unnamed tags)
1873 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1874 * 1. NAME does not contain any of the characters in NONAM;
1875 * 2. LINESTART contains name as either a rightmost, or rightmost but
1876 * one character, substring;
1877 * 3. the character, if any, immediately before NAME in LINESTART must
1878 * be a character in NONAM;
1879 * 4. the character, if any, immediately after NAME in LINESTART must
1880 * also be a character in NONAM.
1882 * The implementation uses the notinname() macro, which recognizes the
1883 * characters stored in the string `nonam'.
1884 * etags.el needs to use the same characters that are in NONAM.
1887 make_tag (const char *name
, /* tag name, or NULL if unnamed */
1888 int namelen
, /* tag length */
1889 bool is_func
, /* tag is a function */
1890 char *linestart
, /* start of the line where tag is */
1891 int linelen
, /* length of the line where tag is */
1892 int lno
, /* line number */
1893 long int cno
) /* character number */
1895 bool named
= (name
!= NULL
&& namelen
> 0);
1898 if (!CTAGS
&& named
) /* maybe set named to false */
1899 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1900 such that etags.el can guess a name from it. */
1903 register const char *cp
= name
;
1905 for (i
= 0; i
< namelen
; i
++)
1906 if (notinname (*cp
++))
1908 if (i
== namelen
) /* rule #1 */
1910 cp
= linestart
+ linelen
- namelen
;
1911 if (notinname (linestart
[linelen
-1]))
1912 cp
-= 1; /* rule #4 */
1913 if (cp
>= linestart
/* rule #2 */
1915 || notinname (cp
[-1])) /* rule #3 */
1916 && strneq (name
, cp
, namelen
)) /* rule #2 */
1917 named
= false; /* use implicit tag name */
1922 nname
= savenstr (name
, namelen
);
1924 pfnote (nname
, is_func
, linestart
, linelen
, lno
, cno
);
1929 pfnote (char *name
, bool is_func
, char *linestart
, int linelen
, int lno
,
1931 /* tag name, or NULL if unnamed */
1932 /* tag is a function */
1933 /* start of the line where tag is */
1934 /* length of the line where tag is */
1936 /* character number */
1940 assert (name
== NULL
|| name
[0] != '\0');
1941 if (CTAGS
&& name
== NULL
)
1944 np
= xnew (1, node
);
1946 /* If ctags mode, change name "main" to M<thisfilename>. */
1947 if (CTAGS
&& !cxref_style
&& streq (name
, "main"))
1949 char *fp
= strrchr (curfdp
->taggedfname
, '/');
1950 np
->name
= concat ("M", fp
== NULL
? curfdp
->taggedfname
: fp
+ 1, "");
1951 fp
= strrchr (np
->name
, '.');
1952 if (fp
!= NULL
&& fp
[1] != '\0' && fp
[2] == '\0')
1958 np
->been_warned
= false;
1960 np
->is_func
= is_func
;
1962 if (np
->fdp
->usecharno
)
1963 /* Our char numbers are 0-base, because of C language tradition?
1964 ctags compatibility? old versions compatibility? I don't know.
1965 Anyway, since emacs's are 1-base we expect etags.el to take care
1966 of the difference. If we wanted to have 1-based numbers, we would
1967 uncomment the +1 below. */
1968 np
->cno
= cno
/* + 1 */ ;
1970 np
->cno
= invalidcharno
;
1971 np
->left
= np
->right
= NULL
;
1972 if (CTAGS
&& !cxref_style
)
1974 if (strlen (linestart
) < 50)
1975 np
->regex
= concat (linestart
, "$", "");
1977 np
->regex
= savenstr (linestart
, 50);
1980 np
->regex
= savenstr (linestart
, linelen
);
1982 add_node (np
, &nodehead
);
1987 * recurse on left children, iterate on right children.
1990 free_tree (register node
*np
)
1994 register node
*node_right
= np
->right
;
1995 free_tree (np
->left
);
2005 * delete a file description
2008 free_fdesc (register fdesc
*fdp
)
2010 free (fdp
->infname
);
2011 free (fdp
->infabsname
);
2012 free (fdp
->infabsdir
);
2013 free (fdp
->taggedfname
);
2020 * Adds a node to the tree of nodes. In etags mode, sort by file
2021 * name. In ctags mode, sort by tag name. Make no attempt at
2024 * add_node is the only function allowed to add nodes, so it can
2028 add_node (node
*np
, node
**cur_node_p
)
2031 register node
*cur_node
= *cur_node_p
;
2033 if (cur_node
== NULL
)
2043 /* For each file name, tags are in a linked sublist on the right
2044 pointer. The first tags of different files are a linked list
2045 on the left pointer. last_node points to the end of the last
2047 if (last_node
!= NULL
&& last_node
->fdp
== np
->fdp
)
2049 /* Let's use the same sublist as the last added node. */
2050 assert (last_node
->right
== NULL
);
2051 last_node
->right
= np
;
2054 else if (cur_node
->fdp
== np
->fdp
)
2056 /* Scanning the list we found the head of a sublist which is
2057 good for us. Let's scan this sublist. */
2058 add_node (np
, &cur_node
->right
);
2061 /* The head of this sublist is not good for us. Let's try the
2063 add_node (np
, &cur_node
->left
);
2064 } /* if ETAGS mode */
2069 dif
= strcmp (np
->name
, cur_node
->name
);
2072 * If this tag name matches an existing one, then
2073 * do not add the node, but maybe print a warning.
2075 if (no_duplicates
&& !dif
)
2077 if (np
->fdp
== cur_node
->fdp
)
2081 fprintf (stderr
, "Duplicate entry in file %s, line %d: %s\n",
2082 np
->fdp
->infname
, lineno
, np
->name
);
2083 fprintf (stderr
, "Second entry ignored\n");
2086 else if (!cur_node
->been_warned
&& !no_warnings
)
2090 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2091 np
->fdp
->infname
, cur_node
->fdp
->infname
, np
->name
);
2092 cur_node
->been_warned
= true;
2097 /* Actually add the node */
2098 add_node (np
, dif
< 0 ? &cur_node
->left
: &cur_node
->right
);
2099 } /* if CTAGS mode */
2103 * invalidate_nodes ()
2104 * Scan the node tree and invalidate all nodes pointing to the
2105 * given file description (CTAGS case) or free them (ETAGS case).
2108 invalidate_nodes (fdesc
*badfdp
, node
**npp
)
2117 if (np
->left
!= NULL
)
2118 invalidate_nodes (badfdp
, &np
->left
);
2119 if (np
->fdp
== badfdp
)
2121 if (np
->right
!= NULL
)
2122 invalidate_nodes (badfdp
, &np
->right
);
2126 assert (np
->fdp
!= NULL
);
2127 if (np
->fdp
== badfdp
)
2129 *npp
= np
->left
; /* detach the sublist from the list */
2130 np
->left
= NULL
; /* isolate it */
2131 free_tree (np
); /* free it */
2132 invalidate_nodes (badfdp
, npp
);
2135 invalidate_nodes (badfdp
, &np
->left
);
2140 static int total_size_of_entries (node
*);
2141 static int number_len (long) ATTRIBUTE_CONST
;
2143 /* Length of a non-negative number's decimal representation. */
2145 number_len (long int num
)
2148 while ((num
/= 10) > 0)
2154 * Return total number of characters that put_entries will output for
2155 * the nodes in the linked list at the right of the specified node.
2156 * This count is irrelevant with etags.el since emacs 19.34 at least,
2157 * but is still supplied for backward compatibility.
2160 total_size_of_entries (register node
*np
)
2162 register int total
= 0;
2164 for (; np
!= NULL
; np
= np
->right
)
2167 total
+= strlen (np
->regex
) + 1; /* pat\177 */
2168 if (np
->name
!= NULL
)
2169 total
+= strlen (np
->name
) + 1; /* name\001 */
2170 total
+= number_len ((long) np
->lno
) + 1; /* lno, */
2171 if (np
->cno
!= invalidcharno
) /* cno */
2172 total
+= number_len (np
->cno
);
2173 total
+= 1; /* newline */
2180 put_entries (register node
*np
)
2183 static fdesc
*fdp
= NULL
;
2188 /* Output subentries that precede this one */
2190 put_entries (np
->left
);
2192 /* Output this entry */
2201 fprintf (tagf
, "\f\n%s,%d\n",
2202 fdp
->taggedfname
, total_size_of_entries (np
));
2203 fdp
->written
= true;
2205 fputs (np
->regex
, tagf
);
2206 fputc ('\177', tagf
);
2207 if (np
->name
!= NULL
)
2209 fputs (np
->name
, tagf
);
2210 fputc ('\001', tagf
);
2212 fprintf (tagf
, "%d,", np
->lno
);
2213 if (np
->cno
!= invalidcharno
)
2214 fprintf (tagf
, "%ld", np
->cno
);
2220 if (np
->name
== NULL
)
2221 error ("internal error: NULL name in ctags mode.");
2226 fprintf (stdout
, "%s %s %d\n",
2227 np
->name
, np
->fdp
->taggedfname
, (np
->lno
+ 63) / 64);
2229 fprintf (stdout
, "%-16s %3d %-16s %s\n",
2230 np
->name
, np
->lno
, np
->fdp
->taggedfname
, np
->regex
);
2234 fprintf (tagf
, "%s\t%s\t", np
->name
, np
->fdp
->taggedfname
);
2237 { /* function or #define macro with args */
2238 putc (searchar
, tagf
);
2241 for (sp
= np
->regex
; *sp
; sp
++)
2243 if (*sp
== '\\' || *sp
== searchar
)
2247 putc (searchar
, tagf
);
2250 { /* anything else; text pattern inadequate */
2251 fprintf (tagf
, "%d", np
->lno
);
2256 } /* if this node contains a valid tag */
2258 /* Output subentries that follow this one */
2259 put_entries (np
->right
);
2261 put_entries (np
->left
);
2266 #define C_EXT 0x00fff /* C extensions */
2267 #define C_PLAIN 0x00000 /* C */
2268 #define C_PLPL 0x00001 /* C++ */
2269 #define C_STAR 0x00003 /* C* */
2270 #define C_JAVA 0x00005 /* JAVA */
2271 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2272 #define YACC 0x10000 /* yacc file */
2275 * The C symbol tables.
2280 st_C_objprot
, st_C_objimpl
, st_C_objend
,
2282 st_C_ignore
, st_C_attribute
,
2285 st_C_class
, st_C_template
,
2286 st_C_struct
, st_C_extern
, st_C_enum
, st_C_define
, st_C_typedef
2289 /* Feed stuff between (but not including) %[ and %] lines to:
2295 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2299 while, 0, st_C_ignore
2300 switch, 0, st_C_ignore
2301 return, 0, st_C_ignore
2302 __attribute__, 0, st_C_attribute
2303 GTY, 0, st_C_attribute
2304 @interface, 0, st_C_objprot
2305 @protocol, 0, st_C_objprot
2306 @implementation,0, st_C_objimpl
2307 @end, 0, st_C_objend
2308 import, (C_JAVA & ~C_PLPL), st_C_ignore
2309 package, (C_JAVA & ~C_PLPL), st_C_ignore
2310 friend, C_PLPL, st_C_ignore
2311 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2312 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2313 interface, (C_JAVA & ~C_PLPL), st_C_struct
2314 class, 0, st_C_class
2315 namespace, C_PLPL, st_C_struct
2316 domain, C_STAR, st_C_struct
2317 union, 0, st_C_struct
2318 struct, 0, st_C_struct
2319 extern, 0, st_C_extern
2321 typedef, 0, st_C_typedef
2322 define, 0, st_C_define
2323 undef, 0, st_C_define
2324 operator, C_PLPL, st_C_operator
2325 template, 0, st_C_template
2326 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2327 DEFUN, 0, st_C_gnumacro
2328 SYSCALL, 0, st_C_gnumacro
2329 ENTRY, 0, st_C_gnumacro
2330 PSEUDO, 0, st_C_gnumacro
2331 # These are defined inside C functions, so currently they are not met.
2332 # EXFUN used in glibc, DEFVAR_* in emacs.
2333 #EXFUN, 0, st_C_gnumacro
2334 #DEFVAR_, 0, st_C_gnumacro
2336 and replace lines between %< and %> with its output, then:
2337 - remove the #if characterset check
2338 - make in_word_set static and not inline. */
2340 /* C code produced by gperf version 3.0.1 */
2341 /* Command-line: gperf -m 5 */
2342 /* Computed positions: -k'2-3' */
2344 struct C_stab_entry
{ const char *name
; int c_ext
; enum sym_type type
; };
2345 /* maximum key range = 33, duplicates = 0 */
2348 hash (const char *str
, int len
)
2350 static char const asso_values
[] =
2352 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2353 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2354 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2355 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2356 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2357 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2358 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2359 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2360 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2361 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2362 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2363 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2364 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2365 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2366 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2367 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2368 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2369 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2370 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2371 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2372 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2373 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2374 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2375 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2376 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2377 35, 35, 35, 35, 35, 35
2384 hval
+= asso_values
[(unsigned char) str
[2]];
2387 hval
+= asso_values
[(unsigned char) str
[1]];
2393 static struct C_stab_entry
*
2394 in_word_set (register const char *str
, register unsigned int len
)
2398 TOTAL_KEYWORDS
= 33,
2399 MIN_WORD_LENGTH
= 2,
2400 MAX_WORD_LENGTH
= 15,
2405 static struct C_stab_entry wordlist
[] =
2408 {"if", 0, st_C_ignore
},
2409 {"GTY", 0, st_C_attribute
},
2410 {"@end", 0, st_C_objend
},
2411 {"union", 0, st_C_struct
},
2412 {"define", 0, st_C_define
},
2413 {"import", (C_JAVA
& ~C_PLPL
), st_C_ignore
},
2414 {"template", 0, st_C_template
},
2415 {"operator", C_PLPL
, st_C_operator
},
2416 {"@interface", 0, st_C_objprot
},
2417 {"implements", (C_JAVA
& ~C_PLPL
), st_C_javastruct
},
2418 {"friend", C_PLPL
, st_C_ignore
},
2419 {"typedef", 0, st_C_typedef
},
2420 {"return", 0, st_C_ignore
},
2421 {"@implementation",0, st_C_objimpl
},
2422 {"@protocol", 0, st_C_objprot
},
2423 {"interface", (C_JAVA
& ~C_PLPL
), st_C_struct
},
2424 {"extern", 0, st_C_extern
},
2425 {"extends", (C_JAVA
& ~C_PLPL
), st_C_javastruct
},
2426 {"struct", 0, st_C_struct
},
2427 {"domain", C_STAR
, st_C_struct
},
2428 {"switch", 0, st_C_ignore
},
2429 {"enum", 0, st_C_enum
},
2430 {"for", 0, st_C_ignore
},
2431 {"namespace", C_PLPL
, st_C_struct
},
2432 {"class", 0, st_C_class
},
2433 {"while", 0, st_C_ignore
},
2434 {"undef", 0, st_C_define
},
2435 {"package", (C_JAVA
& ~C_PLPL
), st_C_ignore
},
2436 {"__attribute__", 0, st_C_attribute
},
2437 {"SYSCALL", 0, st_C_gnumacro
},
2438 {"ENTRY", 0, st_C_gnumacro
},
2439 {"PSEUDO", 0, st_C_gnumacro
},
2440 {"DEFUN", 0, st_C_gnumacro
}
2443 if (len
<= MAX_WORD_LENGTH
&& len
>= MIN_WORD_LENGTH
)
2445 int key
= hash (str
, len
);
2447 if (key
<= MAX_HASH_VALUE
&& key
>= 0)
2449 const char *s
= wordlist
[key
].name
;
2451 if (*str
== *s
&& !strncmp (str
+ 1, s
+ 1, len
- 1) && s
[len
] == '\0')
2452 return &wordlist
[key
];
2459 static enum sym_type
2460 C_symtype (char *str
, int len
, int c_ext
)
2462 register struct C_stab_entry
*se
= in_word_set (str
, len
);
2464 if (se
== NULL
|| (se
->c_ext
&& !(c_ext
& se
->c_ext
)))
2471 * Ignoring __attribute__ ((list))
2473 static bool inattribute
; /* looking at an __attribute__ construct */
2476 * C functions and variables are recognized using a simple
2477 * finite automaton. fvdef is its state variable.
2481 fvnone
, /* nothing seen */
2482 fdefunkey
, /* Emacs DEFUN keyword seen */
2483 fdefunname
, /* Emacs DEFUN name seen */
2484 foperator
, /* func: operator keyword seen (cplpl) */
2485 fvnameseen
, /* function or variable name seen */
2486 fstartlist
, /* func: just after open parenthesis */
2487 finlist
, /* func: in parameter list */
2488 flistseen
, /* func: after parameter list */
2489 fignore
, /* func: before open brace */
2490 vignore
/* var-like: ignore until ';' */
2493 static bool fvextern
; /* func or var: extern keyword seen; */
2496 * typedefs are recognized using a simple finite automaton.
2497 * typdef is its state variable.
2501 tnone
, /* nothing seen */
2502 tkeyseen
, /* typedef keyword seen */
2503 ttypeseen
, /* defined type seen */
2504 tinbody
, /* inside typedef body */
2505 tend
, /* just before typedef tag */
2506 tignore
/* junk after typedef tag */
2510 * struct-like structures (enum, struct and union) are recognized
2511 * using another simple finite automaton. `structdef' is its state
2516 snone
, /* nothing seen yet,
2517 or in struct body if bracelev > 0 */
2518 skeyseen
, /* struct-like keyword seen */
2519 stagseen
, /* struct-like tag seen */
2520 scolonseen
/* colon seen after struct-like tag */
2524 * When objdef is different from onone, objtag is the name of the class.
2526 static const char *objtag
= "<uninited>";
2529 * Yet another little state machine to deal with preprocessor lines.
2533 dnone
, /* nothing seen */
2534 dsharpseen
, /* '#' seen as first char on line */
2535 ddefineseen
, /* '#' and 'define' seen */
2536 dignorerest
/* ignore rest of line */
2540 * State machine for Objective C protocols and implementations.
2541 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2545 onone
, /* nothing seen */
2546 oprotocol
, /* @interface or @protocol seen */
2547 oimplementation
, /* @implementations seen */
2548 otagseen
, /* class name seen */
2549 oparenseen
, /* parenthesis before category seen */
2550 ocatseen
, /* category name seen */
2551 oinbody
, /* in @implementation body */
2552 omethodsign
, /* in @implementation body, after +/- */
2553 omethodtag
, /* after method name */
2554 omethodcolon
, /* after method colon */
2555 omethodparm
, /* after method parameter */
2556 oignore
/* wait for @end */
2561 * Use this structure to keep info about the token read, and how it
2562 * should be tagged. Used by the make_C_tag function to build a tag.
2566 char *line
; /* string containing the token */
2567 int offset
; /* where the token starts in LINE */
2568 int length
; /* token length */
2570 The previous members can be used to pass strings around for generic
2571 purposes. The following ones specifically refer to creating tags. In this
2572 case the token contained here is the pattern that will be used to create a
2575 bool valid
; /* do not create a tag; the token should be
2576 invalidated whenever a state machine is
2577 reset prematurely */
2578 bool named
; /* create a named tag */
2579 int lineno
; /* source line number of tag */
2580 long linepos
; /* source char number of tag */
2581 } token
; /* latest token read */
2584 * Variables and functions for dealing with nested structures.
2585 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2587 static void pushclass_above (int, char *, int);
2588 static void popclass_above (int);
2589 static void write_classname (linebuffer
*, const char *qualifier
);
2592 char **cname
; /* nested class names */
2593 int *bracelev
; /* nested class brace level */
2594 int nl
; /* class nesting level (elements used) */
2595 int size
; /* length of the array */
2596 } cstack
; /* stack for nested declaration tags */
2597 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2598 #define nestlev (cstack.nl)
2599 /* After struct keyword or in struct body, not inside a nested function. */
2600 #define instruct (structdef == snone && nestlev > 0 \
2601 && bracelev == cstack.bracelev[nestlev-1] + 1)
2604 pushclass_above (int bracelev
, char *str
, int len
)
2608 popclass_above (bracelev
);
2610 if (nl
>= cstack
.size
)
2612 int size
= cstack
.size
*= 2;
2613 xrnew (cstack
.cname
, size
, char *);
2614 xrnew (cstack
.bracelev
, size
, int);
2616 assert (nl
== 0 || cstack
.bracelev
[nl
-1] < bracelev
);
2617 cstack
.cname
[nl
] = (str
== NULL
) ? NULL
: savenstr (str
, len
);
2618 cstack
.bracelev
[nl
] = bracelev
;
2623 popclass_above (int bracelev
)
2627 for (nl
= cstack
.nl
- 1;
2628 nl
>= 0 && cstack
.bracelev
[nl
] >= bracelev
;
2631 free (cstack
.cname
[nl
]);
2637 write_classname (linebuffer
*cn
, const char *qualifier
)
2640 int qlen
= strlen (qualifier
);
2642 if (cstack
.nl
== 0 || cstack
.cname
[0] == NULL
)
2646 cn
->buffer
[0] = '\0';
2650 len
= strlen (cstack
.cname
[0]);
2651 linebuffer_setlen (cn
, len
);
2652 strcpy (cn
->buffer
, cstack
.cname
[0]);
2654 for (i
= 1; i
< cstack
.nl
; i
++)
2656 char *s
= cstack
.cname
[i
];
2659 linebuffer_setlen (cn
, len
+ qlen
+ strlen (s
));
2660 len
+= sprintf (cn
->buffer
+ len
, "%s%s", qualifier
, s
);
2665 static bool consider_token (char *, int, int, int *, int, int, bool *);
2666 static void make_C_tag (bool);
2670 * checks to see if the current token is at the start of a
2671 * function or variable, or corresponds to a typedef, or
2672 * is a struct/union/enum tag, or #define, or an enum constant.
2674 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2675 * with args. C_EXTP points to which language we are looking at.
2686 consider_token (char *str
, int len
, int c
, int *c_extp
,
2687 int bracelev
, int parlev
, bool *is_func_or_var
)
2688 /* IN: token pointer */
2689 /* IN: token length */
2690 /* IN: first char after the token */
2691 /* IN, OUT: C extensions mask */
2692 /* IN: brace level */
2693 /* IN: parenthesis level */
2694 /* OUT: function or variable found */
2696 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2697 structtype is the type of the preceding struct-like keyword, and
2698 structbracelev is the brace level where it has been seen. */
2699 static enum sym_type structtype
;
2700 static int structbracelev
;
2701 static enum sym_type toktype
;
2704 toktype
= C_symtype (str
, len
, *c_extp
);
2707 * Skip __attribute__
2709 if (toktype
== st_C_attribute
)
2716 * Advance the definedef state machine.
2721 /* We're not on a preprocessor line. */
2722 if (toktype
== st_C_gnumacro
)
2729 if (toktype
== st_C_define
)
2731 definedef
= ddefineseen
;
2735 definedef
= dignorerest
;
2740 * Make a tag for any macro, unless it is a constant
2741 * and constantypedefs is false.
2743 definedef
= dignorerest
;
2744 *is_func_or_var
= (c
== '(');
2745 if (!*is_func_or_var
&& !constantypedefs
)
2752 error ("internal error: definedef value.");
2761 if (toktype
== st_C_typedef
)
2784 if (structdef
== snone
&& fvdef
== fvnone
)
2806 case st_C_javastruct
:
2807 if (structdef
== stagseen
)
2808 structdef
= scolonseen
;
2812 if ((*c_extp
& C_AUTO
) /* automatic detection of C++ language */
2814 && definedef
== dnone
&& structdef
== snone
2815 && typdef
== tnone
&& fvdef
== fvnone
)
2816 *c_extp
= (*c_extp
| C_PLPL
) & ~C_AUTO
;
2817 if (toktype
== st_C_template
)
2824 && (typdef
== tkeyseen
2825 || (typedefs_or_cplusplus
&& structdef
== snone
)))
2827 structdef
= skeyseen
;
2828 structtype
= toktype
;
2829 structbracelev
= bracelev
;
2830 if (fvdef
== fvnameseen
)
2838 if (structdef
== skeyseen
)
2840 structdef
= stagseen
;
2844 if (typdef
!= tnone
)
2847 /* Detect Objective C constructs. */
2857 objdef
= oimplementation
;
2863 case oimplementation
:
2864 /* Save the class tag for functions or variables defined inside. */
2865 objtag
= savenstr (str
, len
);
2869 /* Save the class tag for categories. */
2870 objtag
= savenstr (str
, len
);
2872 *is_func_or_var
= true;
2876 *is_func_or_var
= true;
2884 objdef
= omethodtag
;
2885 linebuffer_setlen (&token_name
, len
);
2886 memcpy (token_name
.buffer
, str
, len
);
2887 token_name
.buffer
[len
] = '\0';
2893 objdef
= omethodparm
;
2898 objdef
= omethodtag
;
2901 int oldlen
= token_name
.len
;
2903 linebuffer_setlen (&token_name
, oldlen
+ len
);
2904 memcpy (token_name
.buffer
+ oldlen
, str
, len
);
2905 token_name
.buffer
[oldlen
+ len
] = '\0';
2911 if (toktype
== st_C_objend
)
2913 /* Memory leakage here: the string pointed by objtag is
2914 never released, because many tests would be needed to
2915 avoid breaking on incorrect input code. The amount of
2916 memory leaked here is the sum of the lengths of the
2926 /* A function, variable or enum constant? */
2948 *is_func_or_var
= true;
2952 && structdef
== snone
2953 && structtype
== st_C_enum
&& bracelev
> structbracelev
2954 /* Don't tag tokens in expressions that assign values to enum
2956 && fvdef
!= vignore
)
2957 return true; /* enum constant */
2963 fvdef
= fdefunname
; /* GNU macro */
2964 *is_func_or_var
= true;
2972 if ((strneq (str
, "asm", 3) && endtoken (str
[3]))
2973 || (strneq (str
, "__asm__", 7) && endtoken (str
[7])))
2984 if (len
>= 10 && strneq (str
+len
-10, "::operator", 10))
2986 if (*c_extp
& C_AUTO
) /* automatic detection of C++ */
2987 *c_extp
= (*c_extp
| C_PLPL
) & ~C_AUTO
;
2989 *is_func_or_var
= true;
2992 if (bracelev
> 0 && !instruct
)
2994 fvdef
= fvnameseen
; /* function or variable */
2995 *is_func_or_var
= true;
3010 * C_entries often keeps pointers to tokens or lines which are older than
3011 * the line currently read. By keeping two line buffers, and switching
3012 * them at end of line, it is possible to use those pointers.
3020 #define current_lb_is_new (newndx == curndx)
3021 #define switch_line_buffers() (curndx = 1 - curndx)
3023 #define curlb (lbs[curndx].lb)
3024 #define newlb (lbs[newndx].lb)
3025 #define curlinepos (lbs[curndx].linepos)
3026 #define newlinepos (lbs[newndx].linepos)
3028 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3029 #define cplpl (c_ext & C_PLPL)
3030 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3032 #define CNL_SAVE_DEFINEDEF() \
3034 curlinepos = charno; \
3035 readline (&curlb, inf); \
3036 lp = curlb.buffer; \
3043 CNL_SAVE_DEFINEDEF (); \
3044 if (savetoken.valid) \
3046 token = savetoken; \
3047 savetoken.valid = false; \
3049 definedef = dnone; \
3054 make_C_tag (bool isfun
)
3056 /* This function is never called when token.valid is false, but
3057 we must protect against invalid input or internal errors. */
3059 make_tag (token_name
.buffer
, token_name
.len
, isfun
, token
.line
,
3060 token
.offset
+token
.length
+1, token
.lineno
, token
.linepos
);
3062 { /* this branch is optimized away if !DEBUG */
3063 make_tag (concat ("INVALID TOKEN:-->", token_name
.buffer
, ""),
3064 token_name
.len
+ 17, isfun
, token
.line
,
3065 token
.offset
+token
.length
+1, token
.lineno
, token
.linepos
);
3066 error ("INVALID TOKEN");
3069 token
.valid
= false;
3073 perhaps_more_input (FILE *inf
)
3075 return !feof (inf
) && !ferror (inf
);
3081 * This routine finds functions, variables, typedefs,
3082 * #define's, enum constants and struct/union/enum definitions in
3083 * C syntax and adds them to the list.
3086 C_entries (int c_ext
, FILE *inf
)
3087 /* extension of C */
3090 register char c
; /* latest char read; '\0' for end of line */
3091 register char *lp
; /* pointer one beyond the character `c' */
3092 int curndx
, newndx
; /* indices for current and new lb */
3093 register int tokoff
; /* offset in line of start of current token */
3094 register int toklen
; /* length of current token */
3095 const char *qualifier
; /* string used to qualify names */
3096 int qlen
; /* length of qualifier */
3097 int bracelev
; /* current brace level */
3098 int bracketlev
; /* current bracket level */
3099 int parlev
; /* current parenthesis level */
3100 int attrparlev
; /* __attribute__ parenthesis level */
3101 int templatelev
; /* current template level */
3102 int typdefbracelev
; /* bracelev where a typedef struct body begun */
3103 bool incomm
, inquote
, inchar
, quotednl
, midtoken
;
3104 bool yacc_rules
; /* in the rules part of a yacc file */
3105 struct tok savetoken
= {0}; /* token saved during preprocessor handling */
3108 linebuffer_init (&lbs
[0].lb
);
3109 linebuffer_init (&lbs
[1].lb
);
3110 if (cstack
.size
== 0)
3112 cstack
.size
= (DEBUG
) ? 1 : 4;
3114 cstack
.cname
= xnew (cstack
.size
, char *);
3115 cstack
.bracelev
= xnew (cstack
.size
, int);
3118 tokoff
= toklen
= typdefbracelev
= 0; /* keep compiler quiet */
3119 curndx
= newndx
= 0;
3123 fvdef
= fvnone
; fvextern
= false; typdef
= tnone
;
3124 structdef
= snone
; definedef
= dnone
; objdef
= onone
;
3126 midtoken
= inquote
= inchar
= incomm
= quotednl
= false;
3127 token
.valid
= savetoken
.valid
= false;
3128 bracelev
= bracketlev
= parlev
= attrparlev
= templatelev
= 0;
3130 { qualifier
= "."; qlen
= 1; }
3132 { qualifier
= "::"; qlen
= 2; }
3135 while (perhaps_more_input (inf
))
3140 /* If we are at the end of the line, the next character is a
3141 '\0'; do not skip it, because it is what tells us
3142 to read the next line. */
3163 /* Newlines inside comments do not end macro definitions in
3165 CNL_SAVE_DEFINEDEF ();
3178 /* Newlines inside strings do not end macro definitions
3179 in traditional cpp, even though compilers don't
3180 usually accept them. */
3181 CNL_SAVE_DEFINEDEF ();
3191 /* Hmmm, something went wrong. */
3227 if (fvdef
!= finlist
&& fvdef
!= fignore
&& fvdef
!= vignore
)
3242 else if (/* cplpl && */ *lp
== '/')
3248 if ((c_ext
& YACC
) && *lp
== '%')
3250 /* Entering or exiting rules section in yacc file. */
3252 definedef
= dnone
; fvdef
= fvnone
; fvextern
= false;
3253 typdef
= tnone
; structdef
= snone
;
3254 midtoken
= inquote
= inchar
= incomm
= quotednl
= false;
3256 yacc_rules
= !yacc_rules
;
3262 if (definedef
== dnone
)
3265 bool cpptoken
= true;
3267 /* Look back on this line. If all blanks, or nonblanks
3268 followed by an end of comment, this is a preprocessor
3270 for (cp
= newlb
.buffer
; cp
< lp
-1; cp
++)
3271 if (!c_isspace (*cp
))
3273 if (*cp
== '*' && cp
[1] == '/')
3283 definedef
= dsharpseen
;
3284 /* This is needed for tagging enum values: when there are
3285 preprocessor conditionals inside the enum, we need to
3286 reset the value of fvdef so that the next enum value is
3287 tagged even though the one before it did not end in a
3289 if (fvdef
== vignore
&& instruct
&& parlev
== 0)
3291 if (strneq (cp
, "#if", 3) || strneq (cp
, "#el", 3))
3295 } /* if (definedef == dnone) */
3306 CNL_SAVE_DEFINEDEF ();
3313 /* Consider token only if some involved conditions are satisfied. */
3314 if (typdef
!= tignore
3315 && definedef
!= dignorerest
3318 && (definedef
!= dnone
3319 || structdef
!= scolonseen
)
3326 if (c
== ':' && *lp
== ':' && begtoken (lp
[1]))
3327 /* This handles :: in the middle,
3328 but not at the beginning of an identifier.
3329 Also, space-separated :: is not recognized. */
3331 if (c_ext
& C_AUTO
) /* automatic detection of C++ */
3332 c_ext
= (c_ext
| C_PLPL
) & ~C_AUTO
;
3336 goto still_in_token
;
3340 bool funorvar
= false;
3343 || consider_token (newlb
.buffer
+ tokoff
, toklen
, c
,
3344 &c_ext
, bracelev
, parlev
,
3347 if (fvdef
== foperator
)
3350 lp
= skip_spaces (lp
-1);
3354 && !c_isspace (*lp
) && *lp
!= '(')
3357 toklen
+= lp
- oldlp
;
3359 token
.named
= false;
3361 && nestlev
> 0 && definedef
== dnone
)
3362 /* in struct body */
3367 write_classname (&token_name
, qualifier
);
3368 len
= token_name
.len
;
3369 linebuffer_setlen (&token_name
,
3370 len
+ qlen
+ toklen
);
3371 sprintf (token_name
.buffer
+ len
, "%s%.*s",
3373 newlb
.buffer
+ tokoff
);
3377 linebuffer_setlen (&token_name
, toklen
);
3378 sprintf (token_name
.buffer
, "%.*s",
3379 toklen
, newlb
.buffer
+ tokoff
);
3383 else if (objdef
== ocatseen
)
3384 /* Objective C category */
3388 int len
= strlen (objtag
) + 2 + toklen
;
3389 linebuffer_setlen (&token_name
, len
);
3390 sprintf (token_name
.buffer
, "%s(%.*s)",
3392 newlb
.buffer
+ tokoff
);
3396 linebuffer_setlen (&token_name
, toklen
);
3397 sprintf (token_name
.buffer
, "%.*s",
3398 toklen
, newlb
.buffer
+ tokoff
);
3402 else if (objdef
== omethodtag
3403 || objdef
== omethodparm
)
3404 /* Objective C method */
3408 else if (fvdef
== fdefunname
)
3409 /* GNU DEFUN and similar macros */
3411 bool defun
= (newlb
.buffer
[tokoff
] == 'F');
3415 /* Rewrite the tag so that emacs lisp DEFUNs
3416 can be found by their elisp name */
3422 linebuffer_setlen (&token_name
, len
);
3423 memcpy (token_name
.buffer
,
3424 newlb
.buffer
+ off
, len
);
3425 token_name
.buffer
[len
] = '\0';
3428 if (token_name
.buffer
[len
] == '_')
3429 token_name
.buffer
[len
] = '-';
3430 token
.named
= defun
;
3434 linebuffer_setlen (&token_name
, toklen
);
3435 memcpy (token_name
.buffer
,
3436 newlb
.buffer
+ tokoff
, toklen
);
3437 token_name
.buffer
[toklen
] = '\0';
3438 /* Name macros and members. */
3439 token
.named
= (structdef
== stagseen
3440 || typdef
== ttypeseen
3443 && definedef
== dignorerest
)
3445 && definedef
== dnone
3446 && structdef
== snone
3449 token
.lineno
= lineno
;
3450 token
.offset
= tokoff
;
3451 token
.length
= toklen
;
3452 token
.line
= newlb
.buffer
;
3453 token
.linepos
= newlinepos
;
3456 if (definedef
== dnone
3457 && (fvdef
== fvnameseen
3458 || fvdef
== foperator
3459 || structdef
== stagseen
3461 || typdef
== ttypeseen
3462 || objdef
!= onone
))
3464 if (current_lb_is_new
)
3465 switch_line_buffers ();
3467 else if (definedef
!= dnone
3468 || fvdef
== fdefunname
3470 make_C_tag (funorvar
);
3472 else /* not yacc and consider_token failed */
3474 if (inattribute
&& fvdef
== fignore
)
3476 /* We have just met __attribute__ after a
3477 function parameter list: do not tag the
3484 } /* if (endtoken (c)) */
3485 else if (intoken (c
))
3491 } /* if (midtoken) */
3492 else if (begtoken (c
))
3500 /* This prevents tagging fb in
3501 void (__attribute__((noreturn)) *fb) (void);
3502 Fixing this is not easy and not very important. */
3506 if (plainc
|| declarations
)
3508 make_C_tag (true); /* a function */
3515 if (structdef
== stagseen
&& !cjava
)
3517 popclass_above (bracelev
);
3527 if (!yacc_rules
|| lp
== newlb
.buffer
+ 1)
3529 tokoff
= lp
- 1 - newlb
.buffer
;
3534 } /* if (begtoken) */
3535 } /* if must look at token */
3538 /* Detect end of line, colon, comma, semicolon and various braces
3539 after having handled a token.*/
3545 if (yacc_rules
&& token
.offset
== 0 && token
.valid
)
3547 make_C_tag (false); /* a yacc function */
3550 if (definedef
!= dnone
)
3556 make_C_tag (true); /* an Objective C class */
3560 objdef
= omethodcolon
;
3563 int toklen
= token_name
.len
;
3564 linebuffer_setlen (&token_name
, toklen
+ 1);
3565 strcpy (token_name
.buffer
+ toklen
, ":");
3571 if (structdef
== stagseen
)
3573 structdef
= scolonseen
;
3576 /* Should be useless, but may be work as a safety net. */
3577 if (cplpl
&& fvdef
== flistseen
)
3579 make_C_tag (true); /* a function */
3585 if (definedef
!= dnone
|| inattribute
)
3591 make_C_tag (false); /* a typedef */
3601 if (typdef
== tignore
|| cplpl
)
3605 if ((globals
&& bracelev
== 0 && (!fvextern
|| declarations
))
3606 || (members
&& instruct
))
3607 make_C_tag (false); /* a variable */
3610 token
.valid
= false;
3614 && (cplpl
|| !instruct
)
3615 && (typdef
== tnone
|| (typdef
!= tignore
&& instruct
)))
3617 && plainc
&& instruct
))
3618 make_C_tag (true); /* a function */
3624 && cplpl
&& structdef
== stagseen
)
3625 make_C_tag (false); /* forward declaration */
3627 token
.valid
= false;
3628 } /* switch (fvdef) */
3634 if (structdef
== stagseen
)
3638 if (definedef
!= dnone
|| inattribute
)
3644 make_C_tag (true); /* an Objective C method */
3659 if (instruct
&& parlev
== 0)
3670 && (!fvextern
|| declarations
))
3671 || (members
&& instruct
)))
3672 make_C_tag (false); /* a variable */
3675 if ((declarations
&& typdef
== tnone
&& !instruct
)
3676 || (members
&& typdef
!= tignore
&& instruct
))
3678 make_C_tag (true); /* a function */
3681 else if (!declarations
)
3683 token
.valid
= false;
3688 if (structdef
== stagseen
)
3692 if (definedef
!= dnone
|| inattribute
)
3694 if (structdef
== stagseen
)
3701 make_C_tag (false); /* a typedef */
3713 if ((members
&& bracelev
== 1)
3714 || (globals
&& bracelev
== 0
3715 && (!fvextern
|| declarations
)))
3716 make_C_tag (false); /* a variable */
3732 if (definedef
!= dnone
)
3734 if (objdef
== otagseen
&& parlev
== 0)
3735 objdef
= oparenseen
;
3739 if (typdef
== ttypeseen
3743 /* This handles constructs like:
3744 typedef void OperatorFun (int fun); */
3765 if (--attrparlev
== 0)
3766 inattribute
= false;
3769 if (definedef
!= dnone
)
3771 if (objdef
== ocatseen
&& parlev
== 1)
3773 make_C_tag (true); /* an Objective C category */
3789 || typdef
== ttypeseen
))
3792 make_C_tag (false); /* a typedef */
3795 else if (parlev
< 0) /* can happen due to ill-conceived #if's. */
3799 if (definedef
!= dnone
)
3801 if (typdef
== ttypeseen
)
3803 /* Whenever typdef is set to tinbody (currently only
3804 here), typdefbracelev should be set to bracelev. */
3806 typdefbracelev
= bracelev
;
3811 if (cplpl
&& !class_qualify
)
3813 /* Remove class and namespace qualifiers from the token,
3814 leaving only the method/member name. */
3815 char *cc
, *uqname
= token_name
.buffer
;
3816 char *tok_end
= token_name
.buffer
+ token_name
.len
;
3818 for (cc
= token_name
.buffer
; cc
< tok_end
; cc
++)
3820 if (*cc
== ':' && cc
[1] == ':')
3826 if (uqname
> token_name
.buffer
)
3828 int uqlen
= strlen (uqname
);
3829 linebuffer_setlen (&token_name
, uqlen
);
3830 memmove (token_name
.buffer
, uqname
, uqlen
+ 1);
3833 make_C_tag (true); /* a function */
3842 make_C_tag (true); /* an Objective C class */
3847 make_C_tag (true); /* an Objective C method */
3851 /* Neutralize `extern "C" {' grot. */
3852 if (bracelev
== 0 && structdef
== snone
&& nestlev
== 0
3862 case skeyseen
: /* unnamed struct */
3863 pushclass_above (bracelev
, NULL
, 0);
3866 case stagseen
: /* named struct or enum */
3867 case scolonseen
: /* a class */
3868 pushclass_above (bracelev
,token
.line
+token
.offset
, token
.length
);
3870 make_C_tag (false); /* a struct or enum */
3878 if (definedef
!= dnone
)
3880 if (fvdef
== fstartlist
)
3882 fvdef
= fvnone
; /* avoid tagging `foo' in `foo (*bar()) ()' */
3883 token
.valid
= false;
3887 if (definedef
!= dnone
)
3890 if (!ignoreindent
&& lp
== newlb
.buffer
+ 1)
3893 token
.valid
= false; /* unexpected value, token unreliable */
3894 bracelev
= 0; /* reset brace level if first column */
3895 parlev
= 0; /* also reset paren level, just in case... */
3897 else if (bracelev
< 0)
3899 token
.valid
= false; /* something gone amiss, token unreliable */
3902 if (bracelev
== 0 && fvdef
== vignore
)
3903 fvdef
= fvnone
; /* end of function */
3904 popclass_above (bracelev
);
3906 /* Only if typdef == tinbody is typdefbracelev significant. */
3907 if (typdef
== tinbody
&& bracelev
<= typdefbracelev
)
3909 assert (bracelev
== typdefbracelev
);
3914 if (definedef
!= dnone
)
3924 if ((members
&& bracelev
== 1)
3925 || (globals
&& bracelev
== 0 && (!fvextern
|| declarations
)))
3926 make_C_tag (false); /* a variable */
3934 && (structdef
== stagseen
|| fvdef
== fvnameseen
))
3941 if (templatelev
> 0)
3949 if (objdef
== oinbody
&& bracelev
== 0)
3951 objdef
= omethodsign
;
3956 case '#': case '~': case '&': case '%': case '/':
3957 case '|': case '^': case '!': case '.': case '?':
3958 if (definedef
!= dnone
)
3960 /* These surely cannot follow a function tag in C. */
3973 if (objdef
== otagseen
)
3975 make_C_tag (true); /* an Objective C class */
3978 /* If a macro spans multiple lines don't reset its state. */
3980 CNL_SAVE_DEFINEDEF ();
3986 } /* while not eof */
3988 free (lbs
[0].lb
.buffer
);
3989 free (lbs
[1].lb
.buffer
);
3993 * Process either a C++ file or a C file depending on the setting
3997 default_C_entries (FILE *inf
)
3999 C_entries (cplusplus
? C_PLPL
: C_AUTO
, inf
);
4002 /* Always do plain C. */
4004 plain_C_entries (FILE *inf
)
4009 /* Always do C++. */
4011 Cplusplus_entries (FILE *inf
)
4013 C_entries (C_PLPL
, inf
);
4016 /* Always do Java. */
4018 Cjava_entries (FILE *inf
)
4020 C_entries (C_JAVA
, inf
);
4025 Cstar_entries (FILE *inf
)
4027 C_entries (C_STAR
, inf
);
4030 /* Always do Yacc. */
4032 Yacc_entries (FILE *inf
)
4034 C_entries (YACC
, inf
);
4038 /* Useful macros. */
4039 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4040 while (perhaps_more_input (file_pointer) \
4041 && (readline (&(line_buffer), file_pointer), \
4042 (char_pointer) = (line_buffer).buffer, \
4045 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4046 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4047 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4048 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
4049 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
4051 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4052 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4053 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4054 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4055 && ((cp) += sizeof (kw)-1)) /* skip spaces */
4058 * Read a file, but do no processing. This is used to do regexp
4059 * matching on files that have no language defined.
4062 just_read_file (FILE *inf
)
4064 while (perhaps_more_input (inf
))
4065 readline (&lb
, inf
);
4069 /* Fortran parsing */
4071 static void F_takeprec (void);
4072 static void F_getit (FILE *);
4077 dbp
= skip_spaces (dbp
);
4081 dbp
= skip_spaces (dbp
);
4082 if (strneq (dbp
, "(*)", 3))
4087 if (!c_isdigit (*dbp
))
4089 --dbp
; /* force failure */
4094 while (c_isdigit (*dbp
));
4102 dbp
= skip_spaces (dbp
);
4105 readline (&lb
, inf
);
4110 dbp
= skip_spaces (dbp
);
4112 if (!c_isalpha (*dbp
) && *dbp
!= '_' && *dbp
!= '$')
4114 for (cp
= dbp
+ 1; *cp
!= '\0' && intoken (*cp
); cp
++)
4116 make_tag (dbp
, cp
-dbp
, true,
4117 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4122 Fortran_functions (FILE *inf
)
4124 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
4127 dbp
++; /* Ratfor escape to fortran */
4128 dbp
= skip_spaces (dbp
);
4132 if (LOOKING_AT_NOCASE (dbp
, "recursive"))
4133 dbp
= skip_spaces (dbp
);
4135 if (LOOKING_AT_NOCASE (dbp
, "pure"))
4136 dbp
= skip_spaces (dbp
);
4138 if (LOOKING_AT_NOCASE (dbp
, "elemental"))
4139 dbp
= skip_spaces (dbp
);
4141 switch (c_tolower (*dbp
))
4144 if (nocase_tail ("integer"))
4148 if (nocase_tail ("real"))
4152 if (nocase_tail ("logical"))
4156 if (nocase_tail ("complex") || nocase_tail ("character"))
4160 if (nocase_tail ("double"))
4162 dbp
= skip_spaces (dbp
);
4165 if (nocase_tail ("precision"))
4171 dbp
= skip_spaces (dbp
);
4174 switch (c_tolower (*dbp
))
4177 if (nocase_tail ("function"))
4181 if (nocase_tail ("subroutine"))
4185 if (nocase_tail ("entry"))
4189 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4191 dbp
= skip_spaces (dbp
);
4192 if (*dbp
== '\0') /* assume un-named */
4193 make_tag ("blockdata", 9, true,
4194 lb
.buffer
, dbp
- lb
.buffer
, lineno
, linecharno
);
4196 F_getit (inf
); /* look for name */
4207 * Philippe Waroquiers (1998)
4210 /* Once we are positioned after an "interesting" keyword, let's get
4211 the real tag value necessary. */
4213 Ada_getit (FILE *inf
, const char *name_qualifier
)
4219 while (perhaps_more_input (inf
))
4221 dbp
= skip_spaces (dbp
);
4223 || (dbp
[0] == '-' && dbp
[1] == '-'))
4225 readline (&lb
, inf
);
4228 switch (c_tolower (*dbp
))
4231 if (nocase_tail ("body"))
4233 /* Skipping body of procedure body or package body or ....
4234 resetting qualifier to body instead of spec. */
4235 name_qualifier
= "/b";
4240 /* Skipping type of task type or protected type ... */
4241 if (nocase_tail ("type"))
4248 for (cp
= dbp
; *cp
!= '\0' && *cp
!= '"'; cp
++)
4253 dbp
= skip_spaces (dbp
);
4255 c_isalnum (*cp
) || *cp
== '_' || *cp
== '.';
4263 name
= concat (dbp
, name_qualifier
, "");
4265 make_tag (name
, strlen (name
), true,
4266 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4275 Ada_funcs (FILE *inf
)
4277 bool inquote
= false;
4278 bool skip_till_semicolumn
= false;
4280 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
4282 while (*dbp
!= '\0')
4284 /* Skip a string i.e. "abcd". */
4285 if (inquote
|| (*dbp
== '"'))
4287 dbp
= strchr (dbp
+ !inquote
, '"');
4292 continue; /* advance char */
4297 break; /* advance line */
4301 /* Skip comments. */
4302 if (dbp
[0] == '-' && dbp
[1] == '-')
4303 break; /* advance line */
4305 /* Skip character enclosed in single quote i.e. 'a'
4306 and skip single quote starting an attribute i.e. 'Image. */
4315 if (skip_till_semicolumn
)
4318 skip_till_semicolumn
= false;
4320 continue; /* advance char */
4323 /* Search for beginning of a token. */
4324 if (!begtoken (*dbp
))
4327 continue; /* advance char */
4330 /* We are at the beginning of a token. */
4331 switch (c_tolower (*dbp
))
4334 if (!packages_only
&& nocase_tail ("function"))
4335 Ada_getit (inf
, "/f");
4337 break; /* from switch */
4338 continue; /* advance char */
4340 if (!packages_only
&& nocase_tail ("procedure"))
4341 Ada_getit (inf
, "/p");
4342 else if (nocase_tail ("package"))
4343 Ada_getit (inf
, "/s");
4344 else if (nocase_tail ("protected")) /* protected type */
4345 Ada_getit (inf
, "/t");
4347 break; /* from switch */
4348 continue; /* advance char */
4351 if (typedefs
&& !packages_only
&& nocase_tail ("use"))
4353 /* when tagging types, avoid tagging use type Pack.Typename;
4354 for this, we will skip everything till a ; */
4355 skip_till_semicolumn
= true;
4356 continue; /* advance char */
4360 if (!packages_only
&& nocase_tail ("task"))
4361 Ada_getit (inf
, "/k");
4362 else if (typedefs
&& !packages_only
&& nocase_tail ("type"))
4364 Ada_getit (inf
, "/t");
4365 while (*dbp
!= '\0')
4369 break; /* from switch */
4370 continue; /* advance char */
4373 /* Look for the end of the token. */
4374 while (!endtoken (*dbp
))
4377 } /* advance char */
4378 } /* advance line */
4383 * Unix and microcontroller assembly tag handling
4384 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4385 * Idea by Bob Weiner, Motorola Inc. (1994)
4388 Asm_labels (FILE *inf
)
4392 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4394 /* If first char is alphabetic or one of [_.$], test for colon
4395 following identifier. */
4396 if (c_isalpha (*cp
) || *cp
== '_' || *cp
== '.' || *cp
== '$')
4398 /* Read past label. */
4400 while (c_isalnum (*cp
) || *cp
== '_' || *cp
== '.' || *cp
== '$')
4402 if (*cp
== ':' || c_isspace (*cp
))
4403 /* Found end of label, so copy it and add it to the table. */
4404 make_tag (lb
.buffer
, cp
- lb
.buffer
, true,
4405 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4413 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4414 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4415 * Perl variable names: /^(my|local).../
4416 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4417 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4418 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4421 Perl_functions (FILE *inf
)
4423 char *package
= savestr ("main"); /* current package name */
4426 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4428 cp
= skip_spaces (cp
);
4430 if (LOOKING_AT (cp
, "package"))
4433 get_tag (cp
, &package
);
4435 else if (LOOKING_AT (cp
, "sub"))
4441 while (!notinname (*cp
))
4444 continue; /* nothing found */
4445 pos
= strchr (sp
, ':');
4446 if (pos
&& pos
< cp
&& pos
[1] == ':')
4447 /* The name is already qualified. */
4448 make_tag (sp
, cp
- sp
, true,
4449 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4453 char savechar
, *name
;
4457 name
= concat (package
, "::", sp
);
4459 make_tag (name
, strlen (name
), true,
4460 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4464 else if (LOOKING_AT (cp
, "use constant")
4465 || LOOKING_AT (cp
, "use constant::defer"))
4467 /* For hash style multi-constant like
4468 use constant { FOO => 123,
4470 only the first FOO is picked up. Parsing across the value
4471 expressions would be difficult in general, due to possible nested
4472 hashes, here-documents, etc. */
4474 cp
= skip_spaces (cp
+1);
4477 else if (globals
) /* only if we are tagging global vars */
4479 /* Skip a qualifier, if any. */
4480 bool qual
= LOOKING_AT (cp
, "my") || LOOKING_AT (cp
, "local");
4481 /* After "my" or "local", but before any following paren or space. */
4482 char *varstart
= cp
;
4484 if (qual
/* should this be removed? If yes, how? */
4485 && (*cp
== '$' || *cp
== '@' || *cp
== '%'))
4490 while (c_isalnum (*cp
) || *cp
== '_');
4494 /* Should be examining a variable list at this point;
4495 could insist on seeing an open parenthesis. */
4496 while (*cp
!= '\0' && *cp
!= ';' && *cp
!= '=' && *cp
!= ')')
4502 make_tag (varstart
, cp
- varstart
, false,
4503 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4512 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4513 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4514 * More ideas by seb bacon <seb@jamkit.com> (2002)
4517 Python_functions (FILE *inf
)
4521 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4523 cp
= skip_spaces (cp
);
4524 if (LOOKING_AT (cp
, "def") || LOOKING_AT (cp
, "class"))
4527 while (!notinname (*cp
) && *cp
!= ':')
4529 make_tag (name
, cp
- name
, true,
4530 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4539 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4540 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4541 * - /^[ \t]*define\(\"[^\"]+/
4542 * Only with --members:
4543 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4544 * Idea by Diez B. Roggisch (2001)
4547 PHP_functions (FILE *inf
)
4550 bool search_identifier
= false;
4552 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4554 cp
= skip_spaces (cp
);
4556 if (search_identifier
4559 while (!notinname (*cp
))
4561 make_tag (name
, cp
- name
, true,
4562 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4563 search_identifier
= false;
4565 else if (LOOKING_AT (cp
, "function"))
4568 cp
= skip_spaces (cp
+1);
4572 while (!notinname (*cp
))
4574 make_tag (name
, cp
- name
, true,
4575 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4578 search_identifier
= true;
4580 else if (LOOKING_AT (cp
, "class"))
4585 while (*cp
!= '\0' && !c_isspace (*cp
))
4587 make_tag (name
, cp
- name
, false,
4588 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4591 search_identifier
= true;
4593 else if (strneq (cp
, "define", 6)
4594 && (cp
= skip_spaces (cp
+6))
4596 && (*cp
== '"' || *cp
== '\''))
4600 while (*cp
!= quote
&& *cp
!= '\0')
4602 make_tag (name
, cp
- name
, false,
4603 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4606 && LOOKING_AT (cp
, "var")
4610 while (!notinname (*cp
))
4612 make_tag (name
, cp
- name
, false,
4613 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4620 * Cobol tag functions
4621 * We could look for anything that could be a paragraph name.
4622 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4623 * Idea by Corny de Souza (1993)
4626 Cobol_paragraphs (FILE *inf
)
4628 register char *bp
, *ep
;
4630 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4636 /* If eoln, compiler option or comment ignore whole line. */
4637 if (bp
[-1] != ' ' || !c_isalnum (bp
[0]))
4640 for (ep
= bp
; c_isalnum (*ep
) || *ep
== '-'; ep
++)
4643 make_tag (bp
, ep
- bp
, true,
4644 lb
.buffer
, ep
- lb
.buffer
+ 1, lineno
, linecharno
);
4651 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4654 Makefile_targets (FILE *inf
)
4658 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4660 if (*bp
== '\t' || *bp
== '#')
4662 while (*bp
!= '\0' && *bp
!= '=' && *bp
!= ':')
4664 if (*bp
== ':' || (globals
&& *bp
== '='))
4666 /* We should detect if there is more than one tag, but we do not.
4667 We just skip initial and final spaces. */
4668 char * namestart
= skip_spaces (lb
.buffer
);
4669 while (--bp
> namestart
)
4670 if (!notinname (*bp
))
4672 make_tag (namestart
, bp
- namestart
+ 1, true,
4673 lb
.buffer
, bp
- lb
.buffer
+ 2, lineno
, linecharno
);
4681 * Original code by Mosur K. Mohan (1989)
4683 * Locates tags for procedures & functions. Doesn't do any type- or
4684 * var-definitions. It does look for the keyword "extern" or
4685 * "forward" immediately following the procedure statement; if found,
4686 * the tag is skipped.
4689 Pascal_functions (FILE *inf
)
4691 linebuffer tline
; /* mostly copied from C_entries */
4693 int save_lineno
, namelen
, taglen
;
4696 bool /* each of these flags is true if: */
4697 incomment
, /* point is inside a comment */
4698 inquote
, /* point is inside '..' string */
4699 get_tagname
, /* point is after PROCEDURE/FUNCTION
4700 keyword, so next item = potential tag */
4701 found_tag
, /* point is after a potential tag */
4702 inparms
, /* point is within parameter-list */
4703 verify_tag
; /* point has passed the parm-list, so the
4704 next token will determine whether this
4705 is a FORWARD/EXTERN to be ignored, or
4706 whether it is a real tag */
4708 save_lcno
= save_lineno
= namelen
= taglen
= 0; /* keep compiler quiet */
4709 name
= NULL
; /* keep compiler quiet */
4712 linebuffer_init (&tline
);
4714 incomment
= inquote
= false;
4715 found_tag
= false; /* have a proc name; check if extern */
4716 get_tagname
= false; /* found "procedure" keyword */
4717 inparms
= false; /* found '(' after "proc" */
4718 verify_tag
= false; /* check if "extern" is ahead */
4721 while (perhaps_more_input (inf
)) /* long main loop to get next char */
4724 if (c
== '\0') /* if end of line */
4726 readline (&lb
, inf
);
4730 if (!((found_tag
&& verify_tag
)
4732 c
= *dbp
++; /* only if don't need *dbp pointing
4733 to the beginning of the name of
4734 the procedure or function */
4738 if (c
== '}') /* within { } comments */
4740 else if (c
== '*' && *dbp
== ')') /* within (* *) comments */
4757 inquote
= true; /* found first quote */
4759 case '{': /* found open { comment */
4763 if (*dbp
== '*') /* found open (* comment */
4768 else if (found_tag
) /* found '(' after tag, i.e., parm-list */
4771 case ')': /* end of parms list */
4776 if (found_tag
&& !inparms
) /* end of proc or fn stmt */
4783 if (found_tag
&& verify_tag
&& (*dbp
!= ' '))
4785 /* Check if this is an "extern" declaration. */
4788 if (c_tolower (*dbp
) == 'e')
4790 if (nocase_tail ("extern")) /* superfluous, really! */
4796 else if (c_tolower (*dbp
) == 'f')
4798 if (nocase_tail ("forward")) /* check for forward reference */
4804 if (found_tag
&& verify_tag
) /* not external proc, so make tag */
4808 make_tag (name
, namelen
, true,
4809 tline
.buffer
, taglen
, save_lineno
, save_lcno
);
4813 if (get_tagname
) /* grab name of proc or fn */
4820 /* Find block name. */
4821 for (cp
= dbp
+ 1; *cp
!= '\0' && !endtoken (*cp
); cp
++)
4824 /* Save all values for later tagging. */
4825 linebuffer_setlen (&tline
, lb
.len
);
4826 strcpy (tline
.buffer
, lb
.buffer
);
4827 save_lineno
= lineno
;
4828 save_lcno
= linecharno
;
4829 name
= tline
.buffer
+ (dbp
- lb
.buffer
);
4831 taglen
= cp
- lb
.buffer
+ 1;
4833 dbp
= cp
; /* set dbp to e-o-token */
4834 get_tagname
= false;
4838 /* And proceed to check for "extern". */
4840 else if (!incomment
&& !inquote
&& !found_tag
)
4842 /* Check for proc/fn keywords. */
4843 switch (c_tolower (c
))
4846 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4850 if (nocase_tail ("unction"))
4855 } /* while not eof */
4857 free (tline
.buffer
);
4862 * Lisp tag functions
4863 * look for (def or (DEF, quote or QUOTE
4866 static void L_getit (void);
4871 if (*dbp
== '\'') /* Skip prefix quote */
4873 else if (*dbp
== '(')
4876 /* Try to skip "(quote " */
4877 if (!LOOKING_AT (dbp
, "quote") && !LOOKING_AT (dbp
, "QUOTE"))
4878 /* Ok, then skip "(" before name in (defstruct (foo)) */
4879 dbp
= skip_spaces (dbp
);
4881 get_tag (dbp
, NULL
);
4885 Lisp_functions (FILE *inf
)
4887 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
4892 /* "(defvar foo)" is a declaration rather than a definition. */
4896 if (LOOKING_AT (p
, "defvar"))
4898 p
= skip_name (p
); /* past var name */
4899 p
= skip_spaces (p
);
4905 if (strneq (dbp
+ 1, "cl-", 3) || strneq (dbp
+ 1, "CL-", 3))
4908 if (strneq (dbp
+1, "def", 3) || strneq (dbp
+1, "DEF", 3))
4910 dbp
= skip_non_spaces (dbp
);
4911 dbp
= skip_spaces (dbp
);
4916 /* Check for (foo::defmumble name-defined ... */
4919 while (!notinname (*dbp
) && *dbp
!= ':');
4924 while (*dbp
== ':');
4926 if (strneq (dbp
, "def", 3) || strneq (dbp
, "DEF", 3))
4928 dbp
= skip_non_spaces (dbp
);
4929 dbp
= skip_spaces (dbp
);
4939 * Lua script language parsing
4940 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4942 * "function" and "local function" are tags if they start at column 1.
4945 Lua_functions (FILE *inf
)
4949 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4951 if (bp
[0] != 'f' && bp
[0] != 'l')
4954 (void)LOOKING_AT (bp
, "local"); /* skip possible "local" */
4956 if (LOOKING_AT (bp
, "function"))
4964 * Just look for lines where the first character is '/'
4965 * Also look at "defineps" for PSWrap
4967 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4968 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4971 PS_functions (FILE *inf
)
4973 register char *bp
, *ep
;
4975 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4980 *ep
!= '\0' && *ep
!= ' ' && *ep
!= '{';
4983 make_tag (bp
, ep
- bp
, true,
4984 lb
.buffer
, ep
- lb
.buffer
+ 1, lineno
, linecharno
);
4986 else if (LOOKING_AT (bp
, "defineps"))
4994 * Ignore anything after \ followed by space or in ( )
4995 * Look for words defined by :
4996 * Look for constant, code, create, defer, value, and variable
4997 * OBP extensions: Look for buffer:, field,
4998 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5001 Forth_words (FILE *inf
)
5005 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
5006 while ((bp
= skip_spaces (bp
))[0] != '\0')
5007 if (bp
[0] == '\\' && c_isspace (bp
[1]))
5008 break; /* read next line */
5009 else if (bp
[0] == '(' && c_isspace (bp
[1]))
5010 do /* skip to ) or eol */
5012 while (*bp
!= ')' && *bp
!= '\0');
5013 else if ((bp
[0] == ':' && c_isspace (bp
[1]) && bp
++)
5014 || LOOKING_AT_NOCASE (bp
, "constant")
5015 || LOOKING_AT_NOCASE (bp
, "code")
5016 || LOOKING_AT_NOCASE (bp
, "create")
5017 || LOOKING_AT_NOCASE (bp
, "defer")
5018 || LOOKING_AT_NOCASE (bp
, "value")
5019 || LOOKING_AT_NOCASE (bp
, "variable")
5020 || LOOKING_AT_NOCASE (bp
, "buffer:")
5021 || LOOKING_AT_NOCASE (bp
, "field"))
5022 get_tag (skip_spaces (bp
), NULL
); /* Yay! A definition! */
5024 bp
= skip_non_spaces (bp
);
5029 * Scheme tag functions
5030 * look for (def... xyzzy
5032 * (def ... ((...(xyzzy ....
5034 * Original code by Ken Haase (1985?)
5037 Scheme_functions (FILE *inf
)
5041 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
5043 if (strneq (bp
, "(def", 4) || strneq (bp
, "(DEF", 4))
5045 bp
= skip_non_spaces (bp
+4);
5046 /* Skip over open parens and white space. Don't continue past
5048 while (*bp
&& notinname (*bp
))
5052 if (LOOKING_AT (bp
, "(SET!") || LOOKING_AT (bp
, "(set!"))
5058 /* Find tags in TeX and LaTeX input files. */
5060 /* TEX_toktab is a table of TeX control sequences that define tags.
5061 * Each entry records one such control sequence.
5063 * Original code from who knows whom.
5065 * Stefan Monnier (2002)
5068 static linebuffer
*TEX_toktab
= NULL
; /* Table with tag tokens */
5070 /* Default set of control sequences to put into TEX_toktab.
5071 The value of environment var TEXTAGS is prepended to this. */
5072 static const char *TEX_defenv
= "\
5073 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5074 :part:appendix:entry:index:def\
5075 :newcommand:renewcommand:newenvironment:renewenvironment";
5077 static void TEX_decode_env (const char *, const char *);
5080 * TeX/LaTeX scanning loop.
5083 TeX_commands (FILE *inf
)
5088 char TEX_esc
= '\0';
5089 char TEX_opgrp
, TEX_clgrp
;
5091 /* Initialize token table once from environment. */
5092 if (TEX_toktab
== NULL
)
5093 TEX_decode_env ("TEXTAGS", TEX_defenv
);
5095 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5097 /* Look at each TEX keyword in line. */
5100 /* Look for a TEX escape. */
5104 if (c
== '\0' || c
== '%')
5107 /* Select either \ or ! as escape character, whichever comes
5108 first outside a comment. */
5129 for (key
= TEX_toktab
; key
->buffer
!= NULL
; key
++)
5130 if (strneq (cp
, key
->buffer
, key
->len
))
5133 int namelen
, linelen
;
5136 cp
= skip_spaces (cp
+ key
->len
);
5137 if (*cp
== TEX_opgrp
)
5143 (!c_isspace (*p
) && *p
!= '#' &&
5144 *p
!= TEX_opgrp
&& *p
!= TEX_clgrp
);
5149 if (!opgrp
|| *p
== TEX_clgrp
)
5151 while (*p
!= '\0' && *p
!= TEX_opgrp
&& *p
!= TEX_clgrp
)
5153 linelen
= p
- lb
.buffer
+ 1;
5155 make_tag (cp
, namelen
, true,
5156 lb
.buffer
, linelen
, lineno
, linecharno
);
5157 goto tex_next_line
; /* We only tag a line once */
5165 /* Read environment and prepend it to the default string.
5166 Build token table. */
5168 TEX_decode_env (const char *evarname
, const char *defenv
)
5170 register const char *env
, *p
;
5173 /* Append default string to environment. */
5174 env
= getenv (evarname
);
5178 env
= concat (env
, defenv
, "");
5180 /* Allocate a token table */
5181 for (len
= 1, p
= env
; (p
= strchr (p
, ':')); )
5184 TEX_toktab
= xnew (len
, linebuffer
);
5186 /* Unpack environment string into token table. Be careful about */
5187 /* zero-length strings (leading ':', "::" and trailing ':') */
5188 for (i
= 0; *env
!= '\0';)
5190 p
= strchr (env
, ':');
5191 if (!p
) /* End of environment string. */
5192 p
= env
+ strlen (env
);
5194 { /* Only non-zero strings. */
5195 TEX_toktab
[i
].buffer
= savenstr (env
, p
- env
);
5196 TEX_toktab
[i
].len
= p
- env
;
5203 TEX_toktab
[i
].buffer
= NULL
; /* Mark end of table. */
5204 TEX_toktab
[i
].len
= 0;
5211 /* Texinfo support. Dave Love, Mar. 2000. */
5213 Texinfo_nodes (FILE *inf
)
5216 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5217 if (LOOKING_AT (cp
, "@node"))
5220 while (*cp
!= '\0' && *cp
!= ',')
5222 make_tag (start
, cp
- start
, true,
5223 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5230 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5231 * Contents of <a name=xxx> are tags with name xxx.
5233 * Francesco Potortì, 2002.
5236 HTML_labels (FILE *inf
)
5238 bool getnext
= false; /* next text outside of HTML tags is a tag */
5239 bool skiptag
= false; /* skip to the end of the current HTML tag */
5240 bool intag
= false; /* inside an html tag, looking for ID= */
5241 bool inanchor
= false; /* when INTAG, is an anchor, look for NAME= */
5245 linebuffer_setlen (&token_name
, 0); /* no name in buffer */
5247 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
5248 for (;;) /* loop on the same line */
5250 if (skiptag
) /* skip HTML tag */
5252 while (*dbp
!= '\0' && *dbp
!= '>')
5258 continue; /* look on the same line */
5260 break; /* go to next line */
5263 else if (intag
) /* look for "name=" or "id=" */
5265 while (*dbp
!= '\0' && *dbp
!= '>'
5266 && c_tolower (*dbp
) != 'n' && c_tolower (*dbp
) != 'i')
5269 break; /* go to next line */
5274 continue; /* look on the same line */
5276 if ((inanchor
&& LOOKING_AT_NOCASE (dbp
, "name="))
5277 || LOOKING_AT_NOCASE (dbp
, "id="))
5279 bool quoted
= (dbp
[0] == '"');
5282 for (end
= ++dbp
; *end
!= '\0' && *end
!= '"'; end
++)
5285 for (end
= dbp
; *end
!= '\0' && intoken (*end
); end
++)
5287 linebuffer_setlen (&token_name
, end
- dbp
);
5288 memcpy (token_name
.buffer
, dbp
, end
- dbp
);
5289 token_name
.buffer
[end
- dbp
] = '\0';
5292 intag
= false; /* we found what we looked for */
5293 skiptag
= true; /* skip to the end of the tag */
5294 getnext
= true; /* then grab the text */
5295 continue; /* look on the same line */
5300 else if (getnext
) /* grab next tokens and tag them */
5302 dbp
= skip_spaces (dbp
);
5304 break; /* go to next line */
5308 inanchor
= (c_tolower (dbp
[1]) == 'a' && !intoken (dbp
[2]));
5309 continue; /* look on the same line */
5312 for (end
= dbp
+ 1; *end
!= '\0' && *end
!= '<'; end
++)
5314 make_tag (token_name
.buffer
, token_name
.len
, true,
5315 dbp
, end
- dbp
, lineno
, linecharno
);
5316 linebuffer_setlen (&token_name
, 0); /* no name in buffer */
5318 break; /* go to next line */
5321 else /* look for an interesting HTML tag */
5323 while (*dbp
!= '\0' && *dbp
!= '<')
5326 break; /* go to next line */
5328 if (c_tolower (dbp
[1]) == 'a' && !intoken (dbp
[2]))
5331 continue; /* look on the same line */
5333 else if (LOOKING_AT_NOCASE (dbp
, "<title>")
5334 || LOOKING_AT_NOCASE (dbp
, "<h1>")
5335 || LOOKING_AT_NOCASE (dbp
, "<h2>")
5336 || LOOKING_AT_NOCASE (dbp
, "<h3>"))
5340 continue; /* look on the same line */
5351 * Assumes that the predicate or rule starts at column 0.
5352 * Only the first clause of a predicate or rule is added.
5353 * Original code by Sunichirou Sugou (1989)
5354 * Rewritten by Anders Lindgren (1996)
5356 static size_t prolog_pr (char *, char *);
5357 static void prolog_skip_comment (linebuffer
*, FILE *);
5358 static size_t prolog_atom (char *, size_t);
5361 Prolog_functions (FILE *inf
)
5371 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5373 if (cp
[0] == '\0') /* Empty line */
5375 else if (c_isspace (cp
[0])) /* Not a predicate */
5377 else if (cp
[0] == '/' && cp
[1] == '*') /* comment. */
5378 prolog_skip_comment (&lb
, inf
);
5379 else if ((len
= prolog_pr (cp
, last
)) > 0)
5381 /* Predicate or rule. Store the function name so that we
5382 only generate a tag for the first clause. */
5384 last
= xnew (len
+ 1, char);
5385 else if (len
+ 1 > allocated
)
5386 xrnew (last
, len
+ 1, char);
5387 allocated
= len
+ 1;
5388 memcpy (last
, cp
, len
);
5397 prolog_skip_comment (linebuffer
*plb
, FILE *inf
)
5403 for (cp
= plb
->buffer
; *cp
!= '\0'; cp
++)
5404 if (cp
[0] == '*' && cp
[1] == '/')
5406 readline (plb
, inf
);
5408 while (perhaps_more_input (inf
));
5412 * A predicate or rule definition is added if it matches:
5413 * <beginning of line><Prolog Atom><whitespace>(
5414 * or <beginning of line><Prolog Atom><whitespace>:-
5416 * It is added to the tags database if it doesn't match the
5417 * name of the previous clause header.
5419 * Return the size of the name of the predicate or rule, or 0 if no
5423 prolog_pr (char *s
, char *last
)
5425 /* Name of last clause. */
5430 pos
= prolog_atom (s
, 0);
5435 pos
= skip_spaces (s
+ pos
) - s
;
5438 || (s
[pos
] == '(' && (pos
+= 1))
5439 || (s
[pos
] == ':' && s
[pos
+ 1] == '-' && (pos
+= 2)))
5440 && (last
== NULL
/* save only the first clause */
5441 || len
!= strlen (last
)
5442 || !strneq (s
, last
, len
)))
5444 make_tag (s
, len
, true, s
, pos
, lineno
, linecharno
);
5452 * Consume a Prolog atom.
5453 * Return the number of bytes consumed, or 0 if there was an error.
5455 * A prolog atom, in this context, could be one of:
5456 * - An alphanumeric sequence, starting with a lower case letter.
5457 * - A quoted arbitrary string. Single quotes can escape themselves.
5458 * Backslash quotes everything.
5461 prolog_atom (char *s
, size_t pos
)
5467 if (c_islower (s
[pos
]) || s
[pos
] == '_')
5469 /* The atom is unquoted. */
5471 while (c_isalnum (s
[pos
]) || s
[pos
] == '_')
5475 return pos
- origpos
;
5477 else if (s
[pos
] == '\'')
5488 pos
++; /* A double quote */
5490 else if (s
[pos
] == '\0')
5491 /* Multiline quoted atoms are ignored. */
5493 else if (s
[pos
] == '\\')
5495 if (s
[pos
+1] == '\0')
5502 return pos
- origpos
;
5510 * Support for Erlang
5512 * Generates tags for functions, defines, and records.
5513 * Assumes that Erlang functions start at column 0.
5514 * Original code by Anders Lindgren (1996)
5516 static int erlang_func (char *, char *);
5517 static void erlang_attribute (char *);
5518 static int erlang_atom (char *);
5521 Erlang_functions (FILE *inf
)
5531 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5533 if (cp
[0] == '\0') /* Empty line */
5535 else if (c_isspace (cp
[0])) /* Not function nor attribute */
5537 else if (cp
[0] == '%') /* comment */
5539 else if (cp
[0] == '"') /* Sometimes, strings start in column one */
5541 else if (cp
[0] == '-') /* attribute, e.g. "-define" */
5543 erlang_attribute (cp
);
5550 else if ((len
= erlang_func (cp
, last
)) > 0)
5553 * Function. Store the function name so that we only
5554 * generates a tag for the first clause.
5557 last
= xnew (len
+ 1, char);
5558 else if (len
+ 1 > allocated
)
5559 xrnew (last
, len
+ 1, char);
5560 allocated
= len
+ 1;
5561 memcpy (last
, cp
, len
);
5570 * A function definition is added if it matches:
5571 * <beginning of line><Erlang Atom><whitespace>(
5573 * It is added to the tags database if it doesn't match the
5574 * name of the previous clause header.
5576 * Return the size of the name of the function, or 0 if no function
5580 erlang_func (char *s
, char *last
)
5582 /* Name of last clause. */
5587 pos
= erlang_atom (s
);
5592 pos
= skip_spaces (s
+ pos
) - s
;
5594 /* Save only the first clause. */
5597 || len
!= (int)strlen (last
)
5598 || !strneq (s
, last
, len
)))
5600 make_tag (s
, len
, true, s
, pos
, lineno
, linecharno
);
5609 * Handle attributes. Currently, tags are generated for defines
5612 * They are on the form:
5613 * -define(foo, bar).
5614 * -define(Foo(M, N), M+N).
5615 * -record(graph, {vtab = notable, cyclic = true}).
5618 erlang_attribute (char *s
)
5622 if ((LOOKING_AT (cp
, "-define") || LOOKING_AT (cp
, "-record"))
5625 int len
= erlang_atom (skip_spaces (cp
));
5627 make_tag (cp
, len
, true, s
, cp
+ len
- s
, lineno
, linecharno
);
5634 * Consume an Erlang atom (or variable).
5635 * Return the number of bytes consumed, or -1 if there was an error.
5638 erlang_atom (char *s
)
5642 if (c_isalpha (s
[pos
]) || s
[pos
] == '_')
5644 /* The atom is unquoted. */
5647 while (c_isalnum (s
[pos
]) || s
[pos
] == '_');
5649 else if (s
[pos
] == '\'')
5651 for (pos
++; s
[pos
] != '\''; pos
++)
5652 if (s
[pos
] == '\0' /* multiline quoted atoms are ignored */
5653 || (s
[pos
] == '\\' && s
[++pos
] == '\0'))
5662 static char *scan_separators (char *);
5663 static void add_regex (char *, language
*);
5664 static char *substitute (char *, char *, struct re_registers
*);
5667 * Take a string like "/blah/" and turn it into "blah", verifying
5668 * that the first and last characters are the same, and handling
5669 * quoted separator characters. Actually, stops on the occurrence of
5670 * an unquoted separator. Also process \t, \n, etc. and turn into
5671 * appropriate characters. Works in place. Null terminates name string.
5672 * Returns pointer to terminating separator, or NULL for
5673 * unterminated regexps.
5676 scan_separators (char *name
)
5679 char *copyto
= name
;
5680 bool quoted
= false;
5682 for (++name
; *name
!= '\0'; ++name
)
5688 case 'a': *copyto
++ = '\007'; break; /* BEL (bell) */
5689 case 'b': *copyto
++ = '\b'; break; /* BS (back space) */
5690 case 'd': *copyto
++ = 0177; break; /* DEL (delete) */
5691 case 'e': *copyto
++ = 033; break; /* ESC (delete) */
5692 case 'f': *copyto
++ = '\f'; break; /* FF (form feed) */
5693 case 'n': *copyto
++ = '\n'; break; /* NL (new line) */
5694 case 'r': *copyto
++ = '\r'; break; /* CR (carriage return) */
5695 case 't': *copyto
++ = '\t'; break; /* TAB (horizontal tab) */
5696 case 'v': *copyto
++ = '\v'; break; /* VT (vertical tab) */
5702 /* Something else is quoted, so preserve the quote. */
5710 else if (*name
== '\\')
5712 else if (*name
== sep
)
5718 name
= NULL
; /* signal unterminated regexp */
5720 /* Terminate copied string. */
5725 /* Look at the argument of --regex or --no-regex and do the right
5726 thing. Same for each line of a regexp file. */
5728 analyze_regex (char *regex_arg
)
5730 if (regex_arg
== NULL
)
5732 free_regexps (); /* --no-regex: remove existing regexps */
5736 /* A real --regexp option or a line in a regexp file. */
5737 switch (regex_arg
[0])
5739 /* Comments in regexp file or null arg to --regex. */
5745 /* Read a regex file. This is recursive and may result in a
5746 loop, which will stop when the file descriptors are exhausted. */
5750 linebuffer regexbuf
;
5751 char *regexfile
= regex_arg
+ 1;
5753 /* regexfile is a file containing regexps, one per line. */
5754 regexfp
= fopen (regexfile
, "r" FOPEN_BINARY
);
5755 if (regexfp
== NULL
)
5757 linebuffer_init (®exbuf
);
5758 while (readline_internal (®exbuf
, regexfp
, regexfile
) > 0)
5759 analyze_regex (regexbuf
.buffer
);
5760 free (regexbuf
.buffer
);
5761 if (fclose (regexfp
) != 0)
5766 /* Regexp to be used for a specific language only. */
5770 char *lang_name
= regex_arg
+ 1;
5773 for (cp
= lang_name
; *cp
!= '}'; cp
++)
5776 error ("unterminated language name in regex: %s", regex_arg
);
5780 lang
= get_language_from_langname (lang_name
);
5783 add_regex (cp
, lang
);
5787 /* Regexp to be used for any language. */
5789 add_regex (regex_arg
, NULL
);
5794 /* Separate the regexp pattern, compile it,
5795 and care for optional name and modifiers. */
5797 add_regex (char *regexp_pattern
, language
*lang
)
5799 static struct re_pattern_buffer zeropattern
;
5800 char sep
, *pat
, *name
, *modifiers
;
5803 struct re_pattern_buffer
*patbuf
;
5806 force_explicit_name
= true, /* do not use implicit tag names */
5807 ignore_case
= false, /* case is significant */
5808 multi_line
= false, /* matches are done one line at a time */
5809 single_line
= false; /* dot does not match newline */
5812 if (strlen (regexp_pattern
) < 3)
5814 error ("null regexp");
5817 sep
= regexp_pattern
[0];
5818 name
= scan_separators (regexp_pattern
);
5821 error ("%s: unterminated regexp", regexp_pattern
);
5826 error ("null name for regexp \"%s\"", regexp_pattern
);
5829 modifiers
= scan_separators (name
);
5830 if (modifiers
== NULL
) /* no terminating separator --> no name */
5836 modifiers
+= 1; /* skip separator */
5838 /* Parse regex modifiers. */
5839 for (; modifiers
[0] != '\0'; modifiers
++)
5840 switch (modifiers
[0])
5843 if (modifiers
== name
)
5844 error ("forcing explicit tag name but no name, ignoring");
5845 force_explicit_name
= true;
5855 need_filebuf
= true;
5858 error ("invalid regexp modifier '%c', ignoring", modifiers
[0]);
5862 patbuf
= xnew (1, struct re_pattern_buffer
);
5863 *patbuf
= zeropattern
;
5866 static char lc_trans
[UCHAR_MAX
+ 1];
5868 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++)
5869 lc_trans
[i
] = c_tolower (i
);
5870 patbuf
->translate
= lc_trans
; /* translation table to fold case */
5874 pat
= concat ("^", regexp_pattern
, ""); /* anchor to beginning of line */
5876 pat
= regexp_pattern
;
5879 re_set_syntax (RE_SYNTAX_EMACS
| RE_DOT_NEWLINE
);
5881 re_set_syntax (RE_SYNTAX_EMACS
);
5883 err
= re_compile_pattern (pat
, strlen (pat
), patbuf
);
5888 error ("%s while compiling pattern", err
);
5893 p_head
= xnew (1, regexp
);
5894 p_head
->pattern
= savestr (regexp_pattern
);
5895 p_head
->p_next
= rp
;
5896 p_head
->lang
= lang
;
5897 p_head
->pat
= patbuf
;
5898 p_head
->name
= savestr (name
);
5899 p_head
->error_signaled
= false;
5900 p_head
->force_explicit_name
= force_explicit_name
;
5901 p_head
->ignore_case
= ignore_case
;
5902 p_head
->multi_line
= multi_line
;
5906 * Do the substitutions indicated by the regular expression and
5910 substitute (char *in
, char *out
, struct re_registers
*regs
)
5913 int size
, dig
, diglen
;
5916 size
= strlen (out
);
5918 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5919 if (out
[size
- 1] == '\\')
5920 fatal ("pattern error in \"%s\"", out
);
5921 for (t
= strchr (out
, '\\');
5923 t
= strchr (t
+ 2, '\\'))
5924 if (c_isdigit (t
[1]))
5927 diglen
= regs
->end
[dig
] - regs
->start
[dig
];
5933 /* Allocate space and do the substitutions. */
5935 result
= xnew (size
+ 1, char);
5937 for (t
= result
; *out
!= '\0'; out
++)
5938 if (*out
== '\\' && c_isdigit (*++out
))
5941 diglen
= regs
->end
[dig
] - regs
->start
[dig
];
5942 memcpy (t
, in
+ regs
->start
[dig
], diglen
);
5949 assert (t
<= result
+ size
);
5950 assert (t
- result
== (int)strlen (result
));
5955 /* Deallocate all regexps. */
5960 while (p_head
!= NULL
)
5962 rp
= p_head
->p_next
;
5963 free (p_head
->pattern
);
5964 free (p_head
->name
);
5972 * Reads the whole file as a single string from `filebuf' and looks for
5973 * multi-line regular expressions, creating tags on matches.
5974 * readline already dealt with normal regexps.
5976 * Idea by Ben Wing <ben@666.com> (2002).
5979 regex_tag_multiline (void)
5981 char *buffer
= filebuf
.buffer
;
5985 for (rp
= p_head
; rp
!= NULL
; rp
= rp
->p_next
)
5989 if (!rp
->multi_line
)
5990 continue; /* skip normal regexps */
5992 /* Generic initializations before parsing file from memory. */
5993 lineno
= 1; /* reset global line number */
5994 charno
= 0; /* reset global char number */
5995 linecharno
= 0; /* reset global char number of line start */
5997 /* Only use generic regexps or those for the current language. */
5998 if (rp
->lang
!= NULL
&& rp
->lang
!= curfdp
->lang
)
6001 while (match
>= 0 && match
< filebuf
.len
)
6003 match
= re_search (rp
->pat
, buffer
, filebuf
.len
, charno
,
6004 filebuf
.len
- match
, &rp
->regs
);
6009 if (!rp
->error_signaled
)
6011 error ("regexp stack overflow while matching \"%s\"",
6013 rp
->error_signaled
= true;
6020 if (match
== rp
->regs
.end
[0])
6022 if (!rp
->error_signaled
)
6024 error ("regexp matches the empty string: \"%s\"",
6026 rp
->error_signaled
= true;
6028 match
= -3; /* exit from while loop */
6032 /* Match occurred. Construct a tag. */
6033 while (charno
< rp
->regs
.end
[0])
6034 if (buffer
[charno
++] == '\n')
6035 lineno
++, linecharno
= charno
;
6037 if (name
[0] == '\0')
6039 else /* make a named tag */
6040 name
= substitute (buffer
, rp
->name
, &rp
->regs
);
6041 if (rp
->force_explicit_name
)
6042 /* Force explicit tag name, if a name is there. */
6043 pfnote (name
, true, buffer
+ linecharno
,
6044 charno
- linecharno
+ 1, lineno
, linecharno
);
6046 make_tag (name
, strlen (name
), true, buffer
+ linecharno
,
6047 charno
- linecharno
+ 1, lineno
, linecharno
);
6056 nocase_tail (const char *cp
)
6060 while (*cp
!= '\0' && c_tolower (*cp
) == c_tolower (dbp
[len
]))
6062 if (*cp
== '\0' && !intoken (dbp
[len
]))
6071 get_tag (register char *bp
, char **namepp
)
6073 register char *cp
= bp
;
6077 /* Go till you get to white space or a syntactic break */
6078 for (cp
= bp
+ 1; !notinname (*cp
); cp
++)
6080 make_tag (bp
, cp
- bp
, true,
6081 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
6085 *namepp
= savenstr (bp
, cp
- bp
);
6089 * Read a line of text from `stream' into `lbp', excluding the
6090 * newline or CR-NL, if any. Return the number of characters read from
6091 * `stream', which is the length of the line including the newline.
6093 * On DOS or Windows we do not count the CR character, if any before the
6094 * NL, in the returned length; this mirrors the behavior of Emacs on those
6095 * platforms (for text files, it translates CR-NL to NL as it reads in the
6098 * If multi-line regular expressions are requested, each line read is
6099 * appended to `filebuf'.
6102 readline_internal (linebuffer
*lbp
, FILE *stream
, char const *filename
)
6104 char *buffer
= lbp
->buffer
;
6105 char *p
= lbp
->buffer
;
6109 pend
= p
+ lbp
->size
; /* Separate to avoid 386/IX compiler bug. */
6113 register int c
= getc (stream
);
6116 /* We're at the end of linebuffer: expand it. */
6118 xrnew (buffer
, lbp
->size
, char);
6119 p
+= buffer
- lbp
->buffer
;
6120 pend
= buffer
+ lbp
->size
;
6121 lbp
->buffer
= buffer
;
6125 if (ferror (stream
))
6133 if (p
> buffer
&& p
[-1] == '\r')
6147 lbp
->len
= p
- buffer
;
6149 if (need_filebuf
/* we need filebuf for multi-line regexps */
6150 && chars_deleted
> 0) /* not at EOF */
6152 while (filebuf
.size
<= filebuf
.len
+ lbp
->len
+ 1) /* +1 for \n */
6154 /* Expand filebuf. */
6156 xrnew (filebuf
.buffer
, filebuf
.size
, char);
6158 memcpy (filebuf
.buffer
+ filebuf
.len
, lbp
->buffer
, lbp
->len
);
6159 filebuf
.len
+= lbp
->len
;
6160 filebuf
.buffer
[filebuf
.len
++] = '\n';
6161 filebuf
.buffer
[filebuf
.len
] = '\0';
6164 return lbp
->len
+ chars_deleted
;
6168 * Like readline_internal, above, but in addition try to match the
6169 * input line against relevant regular expressions and manage #line
6173 readline (linebuffer
*lbp
, FILE *stream
)
6177 linecharno
= charno
; /* update global char number of line start */
6178 result
= readline_internal (lbp
, stream
, infilename
); /* read line */
6179 lineno
+= 1; /* increment global line number */
6180 charno
+= result
; /* increment global char number */
6182 /* Honor #line directives. */
6183 if (!no_line_directive
)
6185 static bool discard_until_line_directive
;
6187 /* Check whether this is a #line directive. */
6188 if (result
> 12 && strneq (lbp
->buffer
, "#line ", 6))
6193 if (sscanf (lbp
->buffer
, "#line %u \"%n", &lno
, &start
) >= 1
6194 && start
> 0) /* double quote character found */
6196 char *endp
= lbp
->buffer
+ start
;
6198 while ((endp
= strchr (endp
, '"')) != NULL
6199 && endp
[-1] == '\\')
6202 /* Ok, this is a real #line directive. Let's deal with it. */
6204 char *taggedabsname
; /* absolute name of original file */
6205 char *taggedfname
; /* name of original file as given */
6206 char *name
; /* temp var */
6208 discard_until_line_directive
= false; /* found it */
6209 name
= lbp
->buffer
+ start
;
6211 canonicalize_filename (name
);
6212 taggedabsname
= absolute_filename (name
, tagfiledir
);
6213 if (filename_is_absolute (name
)
6214 || filename_is_absolute (curfdp
->infname
))
6215 taggedfname
= savestr (taggedabsname
);
6217 taggedfname
= relative_filename (taggedabsname
,tagfiledir
);
6219 if (streq (curfdp
->taggedfname
, taggedfname
))
6220 /* The #line directive is only a line number change. We
6221 deal with this afterwards. */
6224 /* The tags following this #line directive should be
6225 attributed to taggedfname. In order to do this, set
6226 curfdp accordingly. */
6228 fdesc
*fdp
; /* file description pointer */
6230 /* Go look for a file description already set up for the
6231 file indicated in the #line directive. If there is
6232 one, use it from now until the next #line
6234 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
6235 if (streq (fdp
->infname
, curfdp
->infname
)
6236 && streq (fdp
->taggedfname
, taggedfname
))
6237 /* If we remove the second test above (after the &&)
6238 then all entries pertaining to the same file are
6239 coalesced in the tags file. If we use it, then
6240 entries pertaining to the same file but generated
6241 from different files (via #line directives) will
6242 go into separate sections in the tags file. These
6243 alternatives look equivalent. The first one
6244 destroys some apparently useless information. */
6250 /* Else, if we already tagged the real file, skip all
6251 input lines until the next #line directive. */
6252 if (fdp
== NULL
) /* not found */
6253 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
6254 if (streq (fdp
->infabsname
, taggedabsname
))
6256 discard_until_line_directive
= true;
6260 /* Else create a new file description and use that from
6261 now on, until the next #line directive. */
6262 if (fdp
== NULL
) /* not found */
6265 fdhead
= xnew (1, fdesc
);
6266 *fdhead
= *curfdp
; /* copy curr. file description */
6268 fdhead
->infname
= savestr (curfdp
->infname
);
6269 fdhead
->infabsname
= savestr (curfdp
->infabsname
);
6270 fdhead
->infabsdir
= savestr (curfdp
->infabsdir
);
6271 fdhead
->taggedfname
= taggedfname
;
6272 fdhead
->usecharno
= false;
6273 fdhead
->prop
= NULL
;
6274 fdhead
->written
= false;
6278 free (taggedabsname
);
6280 readline (lbp
, stream
);
6282 } /* if a real #line directive */
6283 } /* if #line is followed by a number */
6284 } /* if line begins with "#line " */
6286 /* If we are here, no #line directive was found. */
6287 if (discard_until_line_directive
)
6291 /* Do a tail recursion on ourselves, thus discarding the contents
6292 of the line buffer. */
6293 readline (lbp
, stream
);
6297 discard_until_line_directive
= false;
6300 } /* if #line directives should be considered */
6307 /* Match against relevant regexps. */
6309 for (rp
= p_head
; rp
!= NULL
; rp
= rp
->p_next
)
6311 /* Only use generic regexps or those for the current language.
6312 Also do not use multiline regexps, which is the job of
6313 regex_tag_multiline. */
6314 if ((rp
->lang
!= NULL
&& rp
->lang
!= fdhead
->lang
)
6318 match
= re_match (rp
->pat
, lbp
->buffer
, lbp
->len
, 0, &rp
->regs
);
6323 if (!rp
->error_signaled
)
6325 error ("regexp stack overflow while matching \"%s\"",
6327 rp
->error_signaled
= true;
6334 /* Empty string matched. */
6335 if (!rp
->error_signaled
)
6337 error ("regexp matches the empty string: \"%s\"", rp
->pattern
);
6338 rp
->error_signaled
= true;
6342 /* Match occurred. Construct a tag. */
6344 if (name
[0] == '\0')
6346 else /* make a named tag */
6347 name
= substitute (lbp
->buffer
, rp
->name
, &rp
->regs
);
6348 if (rp
->force_explicit_name
)
6349 /* Force explicit tag name, if a name is there. */
6350 pfnote (name
, true, lbp
->buffer
, match
, lineno
, linecharno
);
6352 make_tag (name
, strlen (name
), true,
6353 lbp
->buffer
, match
, lineno
, linecharno
);
6362 * Return a pointer to a space of size strlen(cp)+1 allocated
6363 * with xnew where the string CP has been copied.
6366 savestr (const char *cp
)
6368 return savenstr (cp
, strlen (cp
));
6372 * Return a pointer to a space of size LEN+1 allocated with xnew where
6373 * the string CP has been copied for at most the first LEN characters.
6376 savenstr (const char *cp
, int len
)
6378 char *dp
= xnew (len
+ 1, char);
6380 return memcpy (dp
, cp
, len
);
6383 /* Skip spaces (end of string is not space), return new pointer. */
6385 skip_spaces (char *cp
)
6387 while (c_isspace (*cp
))
6392 /* Skip non spaces, except end of string, return new pointer. */
6394 skip_non_spaces (char *cp
)
6396 while (*cp
!= '\0' && !c_isspace (*cp
))
6401 /* Skip any chars in the "name" class.*/
6403 skip_name (char *cp
)
6405 /* '\0' is a notinname() so loop stops there too */
6406 while (! notinname (*cp
))
6411 /* Print error message and exit. */
6413 fatal (char const *format
, ...)
6416 va_start (ap
, format
);
6417 verror (format
, ap
);
6419 exit (EXIT_FAILURE
);
6423 pfatal (const char *s1
)
6426 exit (EXIT_FAILURE
);
6430 suggest_asking_for_help (void)
6432 fprintf (stderr
, "\tTry '%s --help' for a complete list of options.\n",
6434 exit (EXIT_FAILURE
);
6437 /* Output a diagnostic with printf-style FORMAT and args. */
6439 error (const char *format
, ...)
6442 va_start (ap
, format
);
6443 verror (format
, ap
);
6448 verror (char const *format
, va_list ap
)
6450 fprintf (stderr
, "%s: ", progname
);
6451 vfprintf (stderr
, format
, ap
);
6452 fprintf (stderr
, "\n");
6455 /* Return a newly-allocated string whose contents
6456 concatenate those of s1, s2, s3. */
6458 concat (const char *s1
, const char *s2
, const char *s3
)
6460 int len1
= strlen (s1
), len2
= strlen (s2
), len3
= strlen (s3
);
6461 char *result
= xnew (len1
+ len2
+ len3
+ 1, char);
6463 strcpy (result
, s1
);
6464 strcpy (result
+ len1
, s2
);
6465 strcpy (result
+ len1
+ len2
, s3
);
6471 /* Does the same work as the system V getcwd, but does not need to
6472 guess the buffer size in advance. */
6477 char *path
= xnew (bufsize
, char);
6479 while (getcwd (path
, bufsize
) == NULL
)
6481 if (errno
!= ERANGE
)
6485 path
= xnew (bufsize
, char);
6488 canonicalize_filename (path
);
6492 /* Return a newly allocated string containing a name of a temporary file. */
6496 const char *tmpdir
= getenv ("TMPDIR");
6497 const char *slash
= "/";
6499 #if MSDOS || defined (DOS_NT)
6501 tmpdir
= getenv ("TEMP");
6503 tmpdir
= getenv ("TMP");
6506 if (tmpdir
[strlen (tmpdir
) - 1] == '/'
6507 || tmpdir
[strlen (tmpdir
) - 1] == '\\')
6512 if (tmpdir
[strlen (tmpdir
) - 1] == '/')
6516 char *templt
= concat (tmpdir
, slash
, "etXXXXXX");
6517 int fd
= mkostemp (templt
, O_CLOEXEC
);
6518 if (fd
< 0 || close (fd
) != 0)
6520 int temp_errno
= errno
;
6526 #if defined (DOS_NT)
6527 /* The file name will be used in shell redirection, so it needs to have
6528 DOS-style backslashes, or else the Windows shell will barf. */
6530 for (p
= templt
; *p
; p
++)
6538 /* Return a newly allocated string containing the file name of FILE
6539 relative to the absolute directory DIR (which should end with a slash). */
6541 relative_filename (char *file
, char *dir
)
6543 char *fp
, *dp
, *afn
, *res
;
6546 /* Find the common root of file and dir (with a trailing slash). */
6547 afn
= absolute_filename (file
, cwd
);
6550 while (*fp
++ == *dp
++)
6552 fp
--, dp
--; /* back to the first differing char */
6554 if (fp
== afn
&& afn
[0] != '/') /* cannot build a relative name */
6557 do /* look at the equal chars until '/' */
6561 /* Build a sequence of "../" strings for the resulting relative file name. */
6563 while ((dp
= strchr (dp
+ 1, '/')) != NULL
)
6565 res
= xnew (3*i
+ strlen (fp
+ 1) + 1, char);
6568 z
= stpcpy (z
, "../");
6570 /* Add the file name relative to the common root of file and dir. */
6577 /* Return a newly allocated string containing the absolute file name
6578 of FILE given DIR (which should end with a slash). */
6580 absolute_filename (char *file
, char *dir
)
6582 char *slashp
, *cp
, *res
;
6584 if (filename_is_absolute (file
))
6585 res
= savestr (file
);
6587 /* We don't support non-absolute file names with a drive
6588 letter, like `d:NAME' (it's too much hassle). */
6589 else if (file
[1] == ':')
6590 fatal ("%s: relative file names with drive letters not supported", file
);
6593 res
= concat (dir
, file
, "");
6595 /* Delete the "/dirname/.." and "/." substrings. */
6596 slashp
= strchr (res
, '/');
6597 while (slashp
!= NULL
&& slashp
[0] != '\0')
6599 if (slashp
[1] == '.')
6601 if (slashp
[2] == '.'
6602 && (slashp
[3] == '/' || slashp
[3] == '\0'))
6607 while (cp
>= res
&& !filename_is_absolute (cp
));
6609 cp
= slashp
; /* the absolute name begins with "/.." */
6611 /* Under MSDOS and NT we get `d:/NAME' as absolute
6612 file name, so the luser could say `d:/../NAME'.
6613 We silently treat this as `d:/NAME'. */
6614 else if (cp
[0] != '/')
6617 memmove (cp
, slashp
+ 3, strlen (slashp
+ 2));
6621 else if (slashp
[2] == '/' || slashp
[2] == '\0')
6623 memmove (slashp
, slashp
+ 2, strlen (slashp
+ 1));
6628 slashp
= strchr (slashp
+ 1, '/');
6631 if (res
[0] == '\0') /* just a safety net: should never happen */
6634 return savestr ("/");
6640 /* Return a newly allocated string containing the absolute
6641 file name of dir where FILE resides given DIR (which should
6642 end with a slash). */
6644 absolute_dirname (char *file
, char *dir
)
6649 slashp
= strrchr (file
, '/');
6651 return savestr (dir
);
6654 res
= absolute_filename (file
, dir
);
6660 /* Whether the argument string is an absolute file name. The argument
6661 string must have been canonicalized with canonicalize_filename. */
6663 filename_is_absolute (char *fn
)
6665 return (fn
[0] == '/'
6667 || (c_isalpha (fn
[0]) && fn
[1] == ':' && fn
[2] == '/')
6672 /* Downcase DOS drive letter and collapse separators into single slashes.
6675 canonicalize_filename (register char *fn
)
6680 /* Canonicalize drive letter case. */
6681 if (c_isupper (fn
[0]) && fn
[1] == ':')
6682 fn
[0] = c_tolower (fn
[0]);
6684 /* Collapse multiple forward- and back-slashes into a single forward
6686 for (cp
= fn
; *cp
!= '\0'; cp
++, fn
++)
6687 if (*cp
== '/' || *cp
== '\\')
6690 while (cp
[1] == '/' || cp
[1] == '\\')
6698 /* Collapse multiple slashes into a single slash. */
6699 for (cp
= fn
; *cp
!= '\0'; cp
++, fn
++)
6703 while (cp
[1] == '/')
6709 #endif /* !DOS_NT */
6715 /* Initialize a linebuffer for use. */
6717 linebuffer_init (linebuffer
*lbp
)
6719 lbp
->size
= (DEBUG
) ? 3 : 200;
6720 lbp
->buffer
= xnew (lbp
->size
, char);
6721 lbp
->buffer
[0] = '\0';
6725 /* Set the minimum size of a string contained in a linebuffer. */
6727 linebuffer_setlen (linebuffer
*lbp
, int toksize
)
6729 while (lbp
->size
<= toksize
)
6732 xrnew (lbp
->buffer
, lbp
->size
, char);
6737 /* Like malloc but get fatal error if memory is exhausted. */
6739 xmalloc (size_t size
)
6741 void *result
= malloc (size
);
6743 fatal ("virtual memory exhausted");
6748 xrealloc (void *ptr
, size_t size
)
6750 void *result
= realloc (ptr
, size
);
6752 fatal ("virtual memory exhausted");
6758 * indent-tabs-mode: t
6761 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6762 * c-file-style: "gnu"
6766 /* etags.c ends here */