1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2017 Free Software
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or (at
39 your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <https://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version
[] = "@(#) pot revision number is 17.38.1.4";
88 # define NDEBUG /* disable assert */
93 /* WIN32_NATIVE is for XEmacs.
94 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
99 #endif /* WIN32_NATIVE */
104 # include <sys/param.h>
114 /* The WINDOWSNT build doesn't use Gnulib's fcntl.h. */
115 # define O_CLOEXEC O_NOINHERIT
116 #endif /* WINDOWSNT */
123 #include <sysstdio.h>
126 #include <binary-io.h>
127 #include <unlocked-io.h>
129 #include <c-strcase.h>
133 # undef assert /* some systems have a buggy assert.h */
134 # define assert(x) ((void) 0)
140 /* Define CTAGS to make the program "ctags" compatible with the usual one.
141 Leave it undefined to make the program "etags", which makes emacs-style
142 tag tables and tags typedefs, #defines and struct/union/enum by default. */
151 streq (char const *s
, char const *t
)
153 return strcmp (s
, t
) == 0;
157 strcaseeq (char const *s
, char const *t
)
159 return c_strcasecmp (s
, t
) == 0;
163 strneq (char const *s
, char const *t
, size_t n
)
165 return strncmp (s
, t
, n
) == 0;
169 strncaseeq (char const *s
, char const *t
, size_t n
)
171 return c_strncasecmp (s
, t
, n
) == 0;
174 /* C is not in a name. */
176 notinname (unsigned char c
)
178 /* Look at make_tag before modifying! */
179 static bool const table
[UCHAR_MAX
+ 1] = {
180 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
181 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
186 /* C can start a token. */
188 begtoken (unsigned char c
)
190 static bool const table
[UCHAR_MAX
+ 1] = {
192 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
193 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
194 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
197 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
198 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
199 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
206 /* C can be in the middle of a token. */
208 intoken (unsigned char c
)
210 static bool const table
[UCHAR_MAX
+ 1] = {
212 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
213 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
214 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
215 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
216 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
219 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
220 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
221 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
227 /* C can end a token. */
229 endtoken (unsigned char c
)
231 static bool const table
[UCHAR_MAX
+ 1] = {
232 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
233 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
234 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
235 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
236 ['{']=1, ['|']=1, ['}']=1, ['~']=1
242 * xnew, xrnew -- allocate, reallocate storage
244 * SYNOPSIS: Type *xnew (int n, Type);
245 * void xrnew (OldPointer, int n, Type);
247 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
248 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
250 typedef void Lang_function (FILE *);
254 const char *suffix
; /* file name suffix for this compressor */
255 const char *command
; /* takes one arg and decompresses to stdout */
260 const char *name
; /* language name */
261 const char *help
; /* detailed help for the language */
262 Lang_function
*function
; /* parse function */
263 const char **suffixes
; /* name suffixes of this language's files */
264 const char **filenames
; /* names of this language's files */
265 const char **interpreters
; /* interpreters for this language */
266 bool metasource
; /* source used to generate other sources */
271 struct fdesc
*next
; /* for the linked list */
272 char *infname
; /* uncompressed input file name */
273 char *infabsname
; /* absolute uncompressed input file name */
274 char *infabsdir
; /* absolute dir of input file */
275 char *taggedfname
; /* file name to write in tagfile */
276 language
*lang
; /* language of file */
277 char *prop
; /* file properties to write in tagfile */
278 bool usecharno
; /* etags tags shall contain char number */
279 bool written
; /* entry written in the tags file */
282 typedef struct node_st
283 { /* sorting structure */
284 struct node_st
*left
, *right
; /* left and right sons */
285 fdesc
*fdp
; /* description of file to whom tag belongs */
286 char *name
; /* tag name */
287 char *regex
; /* search regexp */
288 bool valid
; /* write this tag on the tag file */
289 bool is_func
; /* function tag: use regexp in CTAGS mode */
290 bool been_warned
; /* warning already given for duplicated tag */
291 int lno
; /* line number tag is on */
292 long cno
; /* character number line starts on */
296 * A `linebuffer' is a structure which holds a line of text.
297 * `readline_internal' reads a line from a stream into a linebuffer
298 * and works regardless of the length of the line.
299 * SIZE is the size of BUFFER, LEN is the length of the string in
300 * BUFFER after readline reads it.
309 /* Used to support mixing of --lang and file names. */
313 at_language
, /* a language specification */
314 at_regexp
, /* a regular expression */
315 at_filename
, /* a file name */
316 at_stdin
, /* read from stdin here */
317 at_end
/* stop parsing the list */
318 } arg_type
; /* argument type */
319 language
*lang
; /* language associated with the argument */
320 char *what
; /* the argument itself */
323 /* Structure defining a regular expression. */
324 typedef struct regexp
326 struct regexp
*p_next
; /* pointer to next in list */
327 language
*lang
; /* if set, use only for this language */
328 char *pattern
; /* the regexp pattern */
329 char *name
; /* tag name */
330 struct re_pattern_buffer
*pat
; /* the compiled pattern */
331 struct re_registers regs
; /* re registers */
332 bool error_signaled
; /* already signaled for this regexp */
333 bool force_explicit_name
; /* do not allow implicit tag name */
334 bool ignore_case
; /* ignore case when matching */
335 bool multi_line
; /* do a multi-line match on the whole file */
339 /* Many compilers barf on this:
340 Lang_function Ada_funcs;
341 so let's write it this way */
342 static void Ada_funcs (FILE *);
343 static void Asm_labels (FILE *);
344 static void C_entries (int c_ext
, FILE *);
345 static void default_C_entries (FILE *);
346 static void plain_C_entries (FILE *);
347 static void Cjava_entries (FILE *);
348 static void Cobol_paragraphs (FILE *);
349 static void Cplusplus_entries (FILE *);
350 static void Cstar_entries (FILE *);
351 static void Erlang_functions (FILE *);
352 static void Forth_words (FILE *);
353 static void Fortran_functions (FILE *);
354 static void Go_functions (FILE *);
355 static void HTML_labels (FILE *);
356 static void Lisp_functions (FILE *);
357 static void Lua_functions (FILE *);
358 static void Makefile_targets (FILE *);
359 static void Pascal_functions (FILE *);
360 static void Perl_functions (FILE *);
361 static void PHP_functions (FILE *);
362 static void PS_functions (FILE *);
363 static void Prolog_functions (FILE *);
364 static void Python_functions (FILE *);
365 static void Ruby_functions (FILE *);
366 static void Scheme_functions (FILE *);
367 static void TeX_commands (FILE *);
368 static void Texinfo_nodes (FILE *);
369 static void Yacc_entries (FILE *);
370 static void just_read_file (FILE *);
372 static language
*get_language_from_langname (const char *);
373 static void readline (linebuffer
*, FILE *);
374 static long readline_internal (linebuffer
*, FILE *, char const *);
375 static bool nocase_tail (const char *);
376 static void get_tag (char *, char **);
377 static void get_lispy_tag (char *);
379 static void analyze_regex (char *);
380 static void free_regexps (void);
381 static void regex_tag_multiline (void);
382 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
383 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
384 static _Noreturn
void suggest_asking_for_help (void);
385 static _Noreturn
void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
386 static _Noreturn
void pfatal (const char *);
387 static void add_node (node
*, node
**);
389 static void process_file_name (char *, language
*);
390 static void process_file (FILE *, char *, language
*);
391 static void find_entries (FILE *);
392 static void free_tree (node
*);
393 static void free_fdesc (fdesc
*);
394 static void pfnote (char *, bool, char *, int, int, long);
395 static void invalidate_nodes (fdesc
*, node
**);
396 static void put_entries (node
*);
398 static char *concat (const char *, const char *, const char *);
399 static char *skip_spaces (char *);
400 static char *skip_non_spaces (char *);
401 static char *skip_name (char *);
402 static char *savenstr (const char *, int);
403 static char *savestr (const char *);
404 static char *etags_getcwd (void);
405 static char *relative_filename (char *, char *);
406 static char *absolute_filename (char *, char *);
407 static char *absolute_dirname (char *, char *);
408 static bool filename_is_absolute (char *f
);
409 static void canonicalize_filename (char *);
410 static char *etags_mktmp (void);
411 static void linebuffer_init (linebuffer
*);
412 static void linebuffer_setlen (linebuffer
*, int);
413 static void *xmalloc (size_t);
414 static void *xrealloc (void *, size_t);
417 static char searchar
= '/'; /* use /.../ searches */
419 static char *tagfile
; /* output file */
420 static char *progname
; /* name this program was invoked with */
421 static char *cwd
; /* current working directory */
422 static char *tagfiledir
; /* directory of tagfile */
423 static FILE *tagf
; /* ioptr for tags file */
424 static ptrdiff_t whatlen_max
; /* maximum length of any 'what' member */
426 static fdesc
*fdhead
; /* head of file description list */
427 static fdesc
*curfdp
; /* current file description */
428 static char *infilename
; /* current input file name */
429 static int lineno
; /* line number of current line */
430 static long charno
; /* current character number */
431 static long linecharno
; /* charno of start of current line */
432 static char *dbp
; /* pointer to start of current tag */
434 static const int invalidcharno
= -1;
436 static node
*nodehead
; /* the head of the binary tree of tags */
437 static node
*last_node
; /* the last node created */
439 static linebuffer lb
; /* the current line */
440 static linebuffer filebuf
; /* a buffer containing the whole file */
441 static linebuffer token_name
; /* a buffer containing a tag name */
443 static bool append_to_tagfile
; /* -a: append to tags */
444 /* The next five default to true in C and derived languages. */
445 static bool typedefs
; /* -t: create tags for C and Ada typedefs */
446 static bool typedefs_or_cplusplus
; /* -T: create tags for C typedefs, level */
447 /* 0 struct/enum/union decls, and C++ */
448 /* member functions. */
449 static bool constantypedefs
; /* -d: create tags for C #define, enum */
450 /* constants and variables. */
451 /* -D: opposite of -d. Default under ctags. */
452 static int globals
; /* create tags for global variables */
453 static int members
; /* create tags for C member variables */
454 static int declarations
; /* --declarations: tag them and extern in C&Co*/
455 static int no_line_directive
; /* ignore #line directives (undocumented) */
456 static int no_duplicates
; /* no duplicate tags for ctags (undocumented) */
457 static bool update
; /* -u: update tags */
458 static bool vgrind_style
; /* -v: create vgrind style index output */
459 static bool no_warnings
; /* -w: suppress warnings (undocumented) */
460 static bool cxref_style
; /* -x: create cxref style output */
461 static bool cplusplus
; /* .[hc] means C++, not C (undocumented) */
462 static bool ignoreindent
; /* -I: ignore indentation in C */
463 static int packages_only
; /* --packages-only: in Ada, only tag packages*/
464 static int class_qualify
; /* -Q: produce class-qualified tags in C++/Java */
465 static int debug
; /* --debug */
467 /* STDIN is defined in LynxOS system headers */
472 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
473 static bool parsing_stdin
; /* --parse-stdin used */
475 static regexp
*p_head
; /* list of all regexps */
476 static bool need_filebuf
; /* some regexes are multi-line */
478 static struct option longopts
[] =
480 { "append", no_argument
, NULL
, 'a' },
481 { "packages-only", no_argument
, &packages_only
, 1 },
482 { "c++", no_argument
, NULL
, 'C' },
483 { "debug", no_argument
, &debug
, 1 },
484 { "declarations", no_argument
, &declarations
, 1 },
485 { "no-line-directive", no_argument
, &no_line_directive
, 1 },
486 { "no-duplicates", no_argument
, &no_duplicates
, 1 },
487 { "help", no_argument
, NULL
, 'h' },
488 { "help", no_argument
, NULL
, 'H' },
489 { "ignore-indentation", no_argument
, NULL
, 'I' },
490 { "language", required_argument
, NULL
, 'l' },
491 { "members", no_argument
, &members
, 1 },
492 { "no-members", no_argument
, &members
, 0 },
493 { "output", required_argument
, NULL
, 'o' },
494 { "class-qualify", no_argument
, &class_qualify
, 'Q' },
495 { "regex", required_argument
, NULL
, 'r' },
496 { "no-regex", no_argument
, NULL
, 'R' },
497 { "ignore-case-regex", required_argument
, NULL
, 'c' },
498 { "parse-stdin", required_argument
, NULL
, STDIN
},
499 { "version", no_argument
, NULL
, 'V' },
501 #if CTAGS /* Ctags options */
502 { "backward-search", no_argument
, NULL
, 'B' },
503 { "cxref", no_argument
, NULL
, 'x' },
504 { "defines", no_argument
, NULL
, 'd' },
505 { "globals", no_argument
, &globals
, 1 },
506 { "typedefs", no_argument
, NULL
, 't' },
507 { "typedefs-and-c++", no_argument
, NULL
, 'T' },
508 { "update", no_argument
, NULL
, 'u' },
509 { "vgrind", no_argument
, NULL
, 'v' },
510 { "no-warn", no_argument
, NULL
, 'w' },
512 #else /* Etags options */
513 { "no-defines", no_argument
, NULL
, 'D' },
514 { "no-globals", no_argument
, &globals
, 0 },
515 { "include", required_argument
, NULL
, 'i' },
520 static compressor compressors
[] =
522 { "z", "gzip -d -c"},
523 { "Z", "gzip -d -c"},
524 { "gz", "gzip -d -c"},
525 { "GZ", "gzip -d -c"},
526 { "bz2", "bzip2 -d -c" },
527 { "xz", "xz -d -c" },
536 static const char *Ada_suffixes
[] =
537 { "ads", "adb", "ada", NULL
};
538 static const char Ada_help
[] =
539 "In Ada code, functions, procedures, packages, tasks and types are\n\
540 tags. Use the '--packages-only' option to create tags for\n\
542 Ada tag names have suffixes indicating the type of entity:\n\
543 Entity type: Qualifier:\n\
544 ------------ ----------\n\
551 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
552 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
553 will just search for any tag 'bidule'.";
556 static const char *Asm_suffixes
[] =
557 { "a", /* Unix assembler */
558 "asm", /* Microcontroller assembly */
559 "def", /* BSO/Tasking definition includes */
560 "inc", /* Microcontroller include files */
561 "ins", /* Microcontroller include files */
562 "s", "sa", /* Unix assembler */
563 "S", /* cpp-processed Unix assembler */
564 "src", /* BSO/Tasking C compiler output */
567 static const char Asm_help
[] =
568 "In assembler code, labels appearing at the beginning of a line,\n\
569 followed by a colon, are tags.";
572 /* Note that .c and .h can be considered C++, if the --c++ flag was
573 given, or if the `class' or `template' keywords are met inside the file.
574 That is why default_C_entries is called for these. */
575 static const char *default_C_suffixes
[] =
577 #if CTAGS /* C help for Ctags */
578 static const char default_C_help
[] =
579 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
580 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
581 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
582 Use --globals to tag global variables.\n\
583 You can tag function declarations and external variables by\n\
584 using '--declarations', and struct members by using '--members'.";
585 #else /* C help for Etags */
586 static const char default_C_help
[] =
587 "In C code, any C function or typedef is a tag, and so are\n\
588 definitions of 'struct', 'union' and 'enum'. '#define' macro\n\
589 definitions and 'enum' constants are tags unless you specify\n\
590 '--no-defines'. Global variables are tags unless you specify\n\
591 '--no-globals' and so are struct members unless you specify\n\
592 '--no-members'. Use of '--no-globals', '--no-defines' and\n\
593 '--no-members' can make the tags table file much smaller.\n\
594 You can tag function declarations and external variables by\n\
595 using '--declarations'.";
596 #endif /* C help for Ctags and Etags */
598 static const char *Cplusplus_suffixes
[] =
599 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
600 "M", /* Objective C++ */
601 "pdb", /* PostScript with C syntax */
603 static const char Cplusplus_help
[] =
604 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
605 --help --lang=c --lang=c++ for full help.)\n\
606 In addition to C tags, member functions are also recognized. Member\n\
607 variables are recognized unless you use the '--no-members' option.\n\
608 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
609 and 'CLASS::FUNCTION'. 'operator' definitions have tag names like\n\
612 static const char *Cjava_suffixes
[] =
614 static char Cjava_help
[] =
615 "In Java code, all the tags constructs of C and C++ code are\n\
616 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
619 static const char *Cobol_suffixes
[] =
620 { "COB", "cob", NULL
};
621 static char Cobol_help
[] =
622 "In Cobol code, tags are paragraph names; that is, any word\n\
623 starting in column 8 and followed by a period.";
625 static const char *Cstar_suffixes
[] =
626 { "cs", "hs", NULL
};
628 static const char *Erlang_suffixes
[] =
629 { "erl", "hrl", NULL
};
630 static const char Erlang_help
[] =
631 "In Erlang code, the tags are the functions, records and macros\n\
632 defined in the file.";
634 const char *Forth_suffixes
[] =
635 { "fth", "tok", NULL
};
636 static const char Forth_help
[] =
637 "In Forth code, tags are words defined by ':',\n\
638 constant, code, create, defer, value, variable, buffer:, field.";
640 static const char *Fortran_suffixes
[] =
641 { "F", "f", "f90", "for", NULL
};
642 static const char Fortran_help
[] =
643 "In Fortran code, functions, subroutines and block data are tags.";
645 static const char *Go_suffixes
[] = {"go", NULL
};
646 static const char Go_help
[] =
647 "In Go code, functions, interfaces and packages are tags.";
649 static const char *HTML_suffixes
[] =
650 { "htm", "html", "shtml", NULL
};
651 static const char HTML_help
[] =
652 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
653 'h3' headers. Also, tags are 'name=' in anchors and all\n\
654 occurrences of 'id='.";
656 static const char *Lisp_suffixes
[] =
657 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL
};
658 static const char Lisp_help
[] =
659 "In Lisp code, any function defined with 'defun', any variable\n\
660 defined with 'defvar' or 'defconst', and in general the first\n\
661 argument of any expression that starts with '(def' in column zero\n\
663 The '--declarations' option tags \"(defvar foo)\" constructs too.";
665 static const char *Lua_suffixes
[] =
666 { "lua", "LUA", NULL
};
667 static const char Lua_help
[] =
668 "In Lua scripts, all functions are tags.";
670 static const char *Makefile_filenames
[] =
671 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL
};
672 static const char Makefile_help
[] =
673 "In makefiles, targets are tags; additionally, variables are tags\n\
674 unless you specify '--no-globals'.";
676 static const char *Objc_suffixes
[] =
677 { "lm", /* Objective lex file */
678 "m", /* Objective C file */
680 static const char Objc_help
[] =
681 "In Objective C code, tags include Objective C definitions for classes,\n\
682 class categories, methods and protocols. Tags for variables and\n\
683 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\
684 \n(Use --help --lang=c --lang=objc --lang=java for full help.)";
686 static const char *Pascal_suffixes
[] =
687 { "p", "pas", NULL
};
688 static const char Pascal_help
[] =
689 "In Pascal code, the tags are the functions and procedures defined\n\
691 /* " // this is for working around an Emacs highlighting bug... */
693 static const char *Perl_suffixes
[] =
694 { "pl", "pm", NULL
};
695 static const char *Perl_interpreters
[] =
696 { "perl", "@PERL@", NULL
};
697 static const char Perl_help
[] =
698 "In Perl code, the tags are the packages, subroutines and variables\n\
699 defined by the 'package', 'sub', 'my' and 'local' keywords. Use\n\
700 '--globals' if you want to tag global variables. Tags for\n\
701 subroutines are named 'PACKAGE::SUB'. The name for subroutines\n\
702 defined in the default package is 'main::SUB'.";
704 static const char *PHP_suffixes
[] =
705 { "php", "php3", "php4", NULL
};
706 static const char PHP_help
[] =
707 "In PHP code, tags are functions, classes and defines. Unless you use\n\
708 the '--no-members' option, vars are tags too.";
710 static const char *plain_C_suffixes
[] =
711 { "pc", /* Pro*C file */
714 static const char *PS_suffixes
[] =
715 { "ps", "psw", NULL
}; /* .psw is for PSWrap */
716 static const char PS_help
[] =
717 "In PostScript code, the tags are the functions.";
719 static const char *Prolog_suffixes
[] =
721 static const char Prolog_help
[] =
722 "In Prolog code, tags are predicates and rules at the beginning of\n\
725 static const char *Python_suffixes
[] =
727 static const char Python_help
[] =
728 "In Python code, 'def' or 'class' at the beginning of a line\n\
731 static const char *Ruby_suffixes
[] =
732 { "rb", "ru", "rbw", NULL
};
733 static const char *Ruby_filenames
[] =
734 { "Rakefile", "Thorfile", NULL
};
735 static const char Ruby_help
[] =
736 "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
737 a line generate a tag. Constants also generate a tag.";
739 /* Can't do the `SCM' or `scm' prefix with a version number. */
740 static const char *Scheme_suffixes
[] =
741 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL
};
742 static const char Scheme_help
[] =
743 "In Scheme code, tags include anything defined with 'def' or with a\n\
744 construct whose name starts with 'def'. They also include\n\
745 variables set with 'set!' at top level in the file.";
747 static const char *TeX_suffixes
[] =
748 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL
};
749 static const char TeX_help
[] =
750 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
751 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
752 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
753 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
754 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
756 Other commands can be specified by setting the environment variable\n\
757 'TEXTAGS' to a colon-separated list like, for example,\n\
758 TEXTAGS=\"mycommand:myothercommand\".";
761 static const char *Texinfo_suffixes
[] =
762 { "texi", "texinfo", "txi", NULL
};
763 static const char Texinfo_help
[] =
764 "for texinfo files, lines starting with @node are tagged.";
766 static const char *Yacc_suffixes
[] =
767 { "y", "y++", "ym", "yxx", "yy", NULL
}; /* .ym is Objective yacc file */
768 static const char Yacc_help
[] =
769 "In Bison or Yacc input files, each rule defines as a tag the\n\
770 nonterminal it constructs. The portions of the file that contain\n\
771 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
774 static const char auto_help
[] =
775 "'auto' is not a real language, it indicates to use\n\
776 a default language for files base on file name suffix and file contents.";
778 static const char none_help
[] =
779 "'none' is not a real language, it indicates to only do\n\
780 regexp processing on files.";
782 static const char no_lang_help
[] =
783 "No detailed help available for this language.";
787 * Table of languages.
789 * It is ok for a given function to be listed under more than one
790 * name. I just didn't.
793 static language lang_names
[] =
795 { "ada", Ada_help
, Ada_funcs
, Ada_suffixes
},
796 { "asm", Asm_help
, Asm_labels
, Asm_suffixes
},
797 { "c", default_C_help
, default_C_entries
, default_C_suffixes
},
798 { "c++", Cplusplus_help
, Cplusplus_entries
, Cplusplus_suffixes
},
799 { "c*", no_lang_help
, Cstar_entries
, Cstar_suffixes
},
800 { "cobol", Cobol_help
, Cobol_paragraphs
, Cobol_suffixes
},
801 { "erlang", Erlang_help
, Erlang_functions
, Erlang_suffixes
},
802 { "forth", Forth_help
, Forth_words
, Forth_suffixes
},
803 { "fortran", Fortran_help
, Fortran_functions
, Fortran_suffixes
},
804 { "go", Go_help
, Go_functions
, Go_suffixes
},
805 { "html", HTML_help
, HTML_labels
, HTML_suffixes
},
806 { "java", Cjava_help
, Cjava_entries
, Cjava_suffixes
},
807 { "lisp", Lisp_help
, Lisp_functions
, Lisp_suffixes
},
808 { "lua", Lua_help
, Lua_functions
, Lua_suffixes
},
809 { "makefile", Makefile_help
,Makefile_targets
,NULL
,Makefile_filenames
},
810 { "objc", Objc_help
, plain_C_entries
, Objc_suffixes
},
811 { "pascal", Pascal_help
, Pascal_functions
, Pascal_suffixes
},
812 { "perl",Perl_help
,Perl_functions
,Perl_suffixes
,NULL
,Perl_interpreters
},
813 { "php", PHP_help
, PHP_functions
, PHP_suffixes
},
814 { "postscript",PS_help
, PS_functions
, PS_suffixes
},
815 { "proc", no_lang_help
, plain_C_entries
, plain_C_suffixes
},
816 { "prolog", Prolog_help
, Prolog_functions
, Prolog_suffixes
},
817 { "python", Python_help
, Python_functions
, Python_suffixes
},
818 { "ruby", Ruby_help
,Ruby_functions
,Ruby_suffixes
,Ruby_filenames
},
819 { "scheme", Scheme_help
, Scheme_functions
, Scheme_suffixes
},
820 { "tex", TeX_help
, TeX_commands
, TeX_suffixes
},
821 { "texinfo", Texinfo_help
, Texinfo_nodes
, Texinfo_suffixes
},
822 { "yacc", Yacc_help
,Yacc_entries
,Yacc_suffixes
,NULL
,NULL
,true},
823 { "auto", auto_help
}, /* default guessing scheme */
824 { "none", none_help
, just_read_file
}, /* regexp matching only */
825 { NULL
} /* end of list */
830 print_language_names (void)
833 const char **name
, **ext
;
835 puts ("\nThese are the currently supported languages, along with the\n\
836 default file names and dot suffixes:");
837 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
839 printf (" %-*s", 10, lang
->name
);
840 if (lang
->filenames
!= NULL
)
841 for (name
= lang
->filenames
; *name
!= NULL
; name
++)
842 printf (" %s", *name
);
843 if (lang
->suffixes
!= NULL
)
844 for (ext
= lang
->suffixes
; *ext
!= NULL
; ext
++)
845 printf (" .%s", *ext
);
848 puts ("where 'auto' means use default language for files based on file\n\
849 name suffix, and 'none' means only do regexp processing on files.\n\
850 If no language is specified and no matching suffix is found,\n\
851 the first line of the file is read for a sharp-bang (#!) sequence\n\
852 followed by the name of an interpreter. If no such sequence is found,\n\
853 Fortran is tried first; if no tags are found, C is tried next.\n\
854 When parsing any C file, a \"class\" or \"template\" keyword\n\
856 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
858 For detailed help on a given language use, for example,\n\
859 etags --help --lang=ada.");
863 # define EMACS_NAME "standalone"
866 # define VERSION "17.38.1.4"
868 static _Noreturn
void
871 char emacs_copyright
[] = COPYRIGHT
;
873 printf ("%s (%s %s)\n", (CTAGS
) ? "ctags" : "etags", EMACS_NAME
, VERSION
);
874 puts (emacs_copyright
);
875 puts ("This program is distributed under the terms in ETAGS.README");
880 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
881 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
884 static _Noreturn
void
885 print_help (argument
*argbuffer
)
887 bool help_for_lang
= false;
889 for (; argbuffer
->arg_type
!= at_end
; argbuffer
++)
890 if (argbuffer
->arg_type
== at_language
)
894 puts (argbuffer
->lang
->help
);
895 help_for_lang
= true;
901 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
903 These are the options accepted by %s.\n", progname
, progname
);
904 puts ("You may use unambiguous abbreviations for the long option names.");
905 puts (" A - as file name means read names from stdin (one per line).\n\
906 Absolute names are stored in the output file as they are.\n\
907 Relative ones are stored relative to the output file's directory.\n");
909 puts ("-a, --append\n\
910 Append tag entries to existing tags file.");
912 puts ("--packages-only\n\
913 For Ada files, only generate tags for packages.");
916 puts ("-B, --backward-search\n\
917 Write the search commands for the tag entries using '?', the\n\
918 backward-search command instead of '/', the forward-search command.");
920 /* This option is mostly obsolete, because etags can now automatically
921 detect C++. Retained for backward compatibility and for debugging and
922 experimentation. In principle, we could want to tag as C++ even
923 before any "class" or "template" keyword.
925 Treat files whose name suffix defaults to C language as C++ files.");
928 puts ("--declarations\n\
929 In C and derived languages, create tags for function declarations,");
931 puts ("\tand create tags for extern variables if --globals is used.");
934 ("\tand create tags for extern variables unless --no-globals is used.");
937 puts ("-d, --defines\n\
938 Create tag entries for C #define constants and enum constants, too.");
940 puts ("-D, --no-defines\n\
941 Don't create tag entries for C #define constants and enum constants.\n\
942 This makes the tags file smaller.");
945 puts ("-i FILE, --include=FILE\n\
946 Include a note in tag file indicating that, when searching for\n\
947 a tag, one should also consult the tags file FILE after\n\
948 checking the current file.");
950 puts ("-l LANG, --language=LANG\n\
951 Force the following files to be considered as written in the\n\
952 named language up to the next --language=LANG option.");
956 Create tag entries for global variables in some languages.");
958 puts ("--no-globals\n\
959 Do not create tag entries for global variables in some\n\
960 languages. This makes the tags file smaller.");
962 puts ("--no-line-directive\n\
963 Ignore #line preprocessor directives in C and derived languages.");
967 Create tag entries for members of structures in some languages.");
969 puts ("--no-members\n\
970 Do not create tag entries for members of structures\n\
971 in some languages.");
973 puts ("-Q, --class-qualify\n\
974 Qualify tag names with their class name in C++, ObjC, Java, and Perl.\n\
975 This produces tag names of the form \"class::member\" for C++,\n\
976 \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
977 For Objective C, this also produces class methods qualified with\n\
978 their arguments, as in \"foo:bar:baz:more\".\n\
979 For Perl, this produces \"package::member\".");
980 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
981 Make a tag for each line matching a regular expression pattern\n\
982 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
983 files only. REGEXFILE is a file containing one REGEXP per line.\n\
984 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
985 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
986 puts (" If TAGNAME/ is present, the tags created are named.\n\
987 For example Tcl named tags can be created with:\n\
988 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
989 MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
990 'm' means to allow multi-line matches, 's' implies 'm' and\n\
991 causes dot to match any character, including newline.");
993 puts ("-R, --no-regex\n\
994 Don't create tags from regexps for the following files.");
996 puts ("-I, --ignore-indentation\n\
997 In C and C++ do not assume that a closing brace in the first\n\
998 column is the final brace of a function or structure definition.");
1000 puts ("-o FILE, --output=FILE\n\
1001 Write the tags to FILE.");
1003 puts ("--parse-stdin=NAME\n\
1004 Read from standard input and record tags as belonging to file NAME.");
1008 puts ("-t, --typedefs\n\
1009 Generate tag entries for C and Ada typedefs.");
1010 puts ("-T, --typedefs-and-c++\n\
1011 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1012 and C++ member functions.");
1016 puts ("-u, --update\n\
1017 Update the tag entries for the given files, leaving tag\n\
1018 entries for other files in place. Currently, this is\n\
1019 implemented by deleting the existing entries for the given\n\
1020 files and then rewriting the new entries at the end of the\n\
1021 tags file. It is often faster to simply rebuild the entire\n\
1022 tag file than to use this.");
1026 puts ("-v, --vgrind\n\
1027 Print on the standard output an index of items intended for\n\
1028 human consumption, similar to the output of vgrind. The index\n\
1029 is sorted, and gives the page number of each item.");
1031 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
1032 puts ("-w, --no-duplicates\n\
1033 Do not create duplicate tag entries, for compatibility with\n\
1034 traditional ctags.");
1036 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
1037 puts ("-w, --no-warn\n\
1038 Suppress warning messages about duplicate tag entries.");
1040 puts ("-x, --cxref\n\
1041 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1042 The output uses line numbers instead of page numbers, but\n\
1043 beyond that the differences are cosmetic; try both to see\n\
1047 puts ("-V, --version\n\
1048 Print the version of the program.\n\
1050 Print this help message.\n\
1051 Followed by one or more '--language' options prints detailed\n\
1052 help about tag generation for the specified languages.");
1054 print_language_names ();
1057 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1059 exit (EXIT_SUCCESS
);
1064 main (int argc
, char **argv
)
1067 unsigned int nincluded_files
;
1068 char **included_files
;
1069 argument
*argbuffer
;
1070 int current_arg
, file_count
;
1071 linebuffer filename_lb
;
1072 bool help_asked
= false;
1078 nincluded_files
= 0;
1079 included_files
= xnew (argc
, char *);
1083 /* Allocate enough no matter what happens. Overkill, but each one
1085 argbuffer
= xnew (argc
, argument
);
1088 * Always find typedefs and structure tags.
1089 * Also default to find macro constants, enum constants, struct
1090 * members and global variables. Do it for both etags and ctags.
1092 typedefs
= typedefs_or_cplusplus
= constantypedefs
= true;
1093 globals
= members
= true;
1095 /* When the optstring begins with a '-' getopt_long does not rearrange the
1096 non-options arguments to be at the end, but leaves them alone. */
1097 optstring
= concat ("-ac:Cf:Il:o:Qr:RSVhH",
1098 (CTAGS
) ? "BxdtTuvw" : "Di:",
1101 while ((opt
= getopt_long (argc
, argv
, optstring
, longopts
, NULL
)) != EOF
)
1105 /* If getopt returns 0, then it has already processed a
1106 long-named option. We should do nothing. */
1110 /* This means that a file name has been seen. Record it. */
1111 argbuffer
[current_arg
].arg_type
= at_filename
;
1112 argbuffer
[current_arg
].what
= optarg
;
1113 len
= strlen (optarg
);
1114 if (whatlen_max
< len
)
1121 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1122 argbuffer
[current_arg
].arg_type
= at_stdin
;
1123 argbuffer
[current_arg
].what
= optarg
;
1124 len
= strlen (optarg
);
1125 if (whatlen_max
< len
)
1130 fatal ("cannot parse standard input more than once");
1131 parsing_stdin
= true;
1134 /* Common options. */
1135 case 'a': append_to_tagfile
= true; break;
1136 case 'C': cplusplus
= true; break;
1137 case 'f': /* for compatibility with old makefiles */
1141 error ("-o option may only be given once.");
1142 suggest_asking_for_help ();
1148 case 'S': /* for backward compatibility */
1149 ignoreindent
= true;
1153 language
*lang
= get_language_from_langname (optarg
);
1156 argbuffer
[current_arg
].lang
= lang
;
1157 argbuffer
[current_arg
].arg_type
= at_language
;
1163 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1164 optarg
= concat (optarg
, "i", ""); /* memory leak here */
1167 argbuffer
[current_arg
].arg_type
= at_regexp
;
1168 argbuffer
[current_arg
].what
= optarg
;
1169 len
= strlen (optarg
);
1170 if (whatlen_max
< len
)
1175 argbuffer
[current_arg
].arg_type
= at_regexp
;
1176 argbuffer
[current_arg
].what
= NULL
;
1191 case 'D': constantypedefs
= false; break;
1192 case 'i': included_files
[nincluded_files
++] = optarg
; break;
1194 /* Ctags options. */
1195 case 'B': searchar
= '?'; break;
1196 case 'd': constantypedefs
= true; break;
1197 case 't': typedefs
= true; break;
1198 case 'T': typedefs
= typedefs_or_cplusplus
= true; break;
1199 case 'u': update
= true; break;
1200 case 'v': vgrind_style
= true; FALLTHROUGH
;
1201 case 'x': cxref_style
= true; break;
1202 case 'w': no_warnings
= true; break;
1204 suggest_asking_for_help ();
1208 /* No more options. Store the rest of arguments. */
1209 for (; optind
< argc
; optind
++)
1211 argbuffer
[current_arg
].arg_type
= at_filename
;
1212 argbuffer
[current_arg
].what
= argv
[optind
];
1213 len
= strlen (argv
[optind
]);
1214 if (whatlen_max
< len
)
1220 argbuffer
[current_arg
].arg_type
= at_end
;
1223 print_help (argbuffer
);
1226 if (nincluded_files
== 0 && file_count
== 0)
1228 error ("no input files specified.");
1229 suggest_asking_for_help ();
1233 if (tagfile
== NULL
)
1234 tagfile
= savestr (CTAGS
? "tags" : "TAGS");
1235 cwd
= etags_getcwd (); /* the current working directory */
1236 if (cwd
[strlen (cwd
) - 1] != '/')
1239 cwd
= concat (oldcwd
, "/", "");
1243 /* Compute base directory for relative file names. */
1244 if (streq (tagfile
, "-")
1245 || strneq (tagfile
, "/dev/", 5))
1246 tagfiledir
= cwd
; /* relative file names are relative to cwd */
1249 canonicalize_filename (tagfile
);
1250 tagfiledir
= absolute_dirname (tagfile
, cwd
);
1253 linebuffer_init (&lb
);
1254 linebuffer_init (&filename_lb
);
1255 linebuffer_init (&filebuf
);
1256 linebuffer_init (&token_name
);
1260 if (streq (tagfile
, "-"))
1263 set_binary_mode (STDOUT_FILENO
, O_BINARY
);
1266 tagf
= fopen (tagfile
, append_to_tagfile
? "ab" : "wb");
1272 * Loop through files finding functions.
1274 for (i
= 0; i
< current_arg
; i
++)
1276 static language
*lang
; /* non-NULL if language is forced */
1279 switch (argbuffer
[i
].arg_type
)
1282 lang
= argbuffer
[i
].lang
;
1285 analyze_regex (argbuffer
[i
].what
);
1288 this_file
= argbuffer
[i
].what
;
1289 /* Input file named "-" means read file names from stdin
1290 (one per line) and use them. */
1291 if (streq (this_file
, "-"))
1294 fatal ("cannot parse standard input "
1295 "AND read file names from it");
1296 while (readline_internal (&filename_lb
, stdin
, "-") > 0)
1297 process_file_name (filename_lb
.buffer
, lang
);
1300 process_file_name (this_file
, lang
);
1303 this_file
= argbuffer
[i
].what
;
1304 process_file (stdin
, this_file
, lang
);
1307 error ("internal error: arg_type");
1313 free (filebuf
.buffer
);
1314 free (token_name
.buffer
);
1316 if (!CTAGS
|| cxref_style
)
1318 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1319 put_entries (nodehead
);
1320 free_tree (nodehead
);
1326 /* Output file entries that have no tags. */
1327 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
1329 fprintf (tagf
, "\f\n%s,0\n", fdp
->taggedfname
);
1331 while (nincluded_files
-- > 0)
1332 fprintf (tagf
, "\f\n%s,include\n", *included_files
++);
1334 if (fclose (tagf
) == EOF
)
1338 return EXIT_SUCCESS
;
1341 /* From here on, we are in (CTAGS && !cxref_style) */
1345 xmalloc (strlen (tagfile
) + whatlen_max
+
1346 sizeof "mv..OTAGS;grep -Fv '\t\t' OTAGS >;rm OTAGS");
1347 for (i
= 0; i
< current_arg
; ++i
)
1349 switch (argbuffer
[i
].arg_type
)
1355 continue; /* the for loop */
1357 char *z
= stpcpy (cmd
, "mv ");
1358 z
= stpcpy (z
, tagfile
);
1359 z
= stpcpy (z
, " OTAGS;grep -Fv '\t");
1360 z
= stpcpy (z
, argbuffer
[i
].what
);
1361 z
= stpcpy (z
, "\t' OTAGS >");
1362 z
= stpcpy (z
, tagfile
);
1363 strcpy (z
, ";rm OTAGS");
1364 if (system (cmd
) != EXIT_SUCCESS
)
1365 fatal ("failed to execute shell command");
1368 append_to_tagfile
= true;
1371 tagf
= fopen (tagfile
, append_to_tagfile
? "ab" : "wb");
1374 put_entries (nodehead
); /* write all the tags (CTAGS) */
1375 free_tree (nodehead
);
1377 if (fclose (tagf
) == EOF
)
1381 if (append_to_tagfile
|| update
)
1383 char *cmd
= xmalloc (2 * strlen (tagfile
) + sizeof "sort -u -o..");
1384 /* Maybe these should be used:
1385 setenv ("LC_COLLATE", "C", 1);
1386 setenv ("LC_ALL", "C", 1); */
1387 char *z
= stpcpy (cmd
, "sort -u -o ");
1388 z
= stpcpy (z
, tagfile
);
1390 strcpy (z
, tagfile
);
1391 return system (cmd
);
1393 return EXIT_SUCCESS
;
1398 * Return a compressor given the file name. If EXTPTR is non-zero,
1399 * return a pointer into FILE where the compressor-specific
1400 * extension begins. If no compressor is found, NULL is returned
1401 * and EXTPTR is not significant.
1402 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1405 get_compressor_from_suffix (char *file
, char **extptr
)
1408 char *slash
, *suffix
;
1410 /* File has been processed by canonicalize_filename,
1411 so we don't need to consider backslashes on DOS_NT. */
1412 slash
= strrchr (file
, '/');
1413 suffix
= strrchr (file
, '.');
1414 if (suffix
== NULL
|| suffix
< slash
)
1419 /* Let those poor souls who live with DOS 8+3 file name limits get
1420 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1421 Only the first do loop is run if not MSDOS */
1424 for (compr
= compressors
; compr
->suffix
!= NULL
; compr
++)
1425 if (streq (compr
->suffix
, suffix
))
1428 break; /* do it only once: not really a loop */
1431 } while (*suffix
!= '\0');
1438 * Return a language given the name.
1441 get_language_from_langname (const char *name
)
1446 error ("empty language name");
1449 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1450 if (streq (name
, lang
->name
))
1452 error ("unknown language \"%s\"", name
);
1460 * Return a language given the interpreter name.
1463 get_language_from_interpreter (char *interpreter
)
1468 if (interpreter
== NULL
)
1470 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1471 if (lang
->interpreters
!= NULL
)
1472 for (iname
= lang
->interpreters
; *iname
!= NULL
; iname
++)
1473 if (streq (*iname
, interpreter
))
1482 * Return a language given the file name.
1485 get_language_from_filename (char *file
, int case_sensitive
)
1488 const char **name
, **ext
, *suffix
;
1491 /* Try whole file name first. */
1492 slash
= strrchr (file
, '/');
1496 else if (file
[0] && file
[1] == ':')
1499 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1500 if (lang
->filenames
!= NULL
)
1501 for (name
= lang
->filenames
; *name
!= NULL
; name
++)
1502 if ((case_sensitive
)
1503 ? streq (*name
, file
)
1504 : strcaseeq (*name
, file
))
1507 /* If not found, try suffix after last dot. */
1508 suffix
= strrchr (file
, '.');
1512 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1513 if (lang
->suffixes
!= NULL
)
1514 for (ext
= lang
->suffixes
; *ext
!= NULL
; ext
++)
1515 if ((case_sensitive
)
1516 ? streq (*ext
, suffix
)
1517 : strcaseeq (*ext
, suffix
))
1524 * This routine is called on each file argument.
1527 process_file_name (char *file
, language
*lang
)
1532 char *compressed_name
, *uncompressed_name
;
1533 char *ext
, *real_name UNINIT
, *tmp_name UNINIT
;
1536 canonicalize_filename (file
);
1537 if (streq (file
, tagfile
) && !streq (tagfile
, "-"))
1539 error ("skipping inclusion of %s in self.", file
);
1542 compr
= get_compressor_from_suffix (file
, &ext
);
1545 compressed_name
= file
;
1546 uncompressed_name
= savenstr (file
, ext
- file
);
1550 compressed_name
= NULL
;
1551 uncompressed_name
= file
;
1554 /* If the canonicalized uncompressed name
1555 has already been dealt with, skip it silently. */
1556 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
1558 assert (fdp
->infname
!= NULL
);
1559 if (streq (uncompressed_name
, fdp
->infname
))
1563 inf
= fopen (file
, "r" FOPEN_BINARY
);
1568 int file_errno
= errno
;
1569 if (compressed_name
)
1571 /* Try with the given suffix. */
1572 inf
= fopen (uncompressed_name
, "r" FOPEN_BINARY
);
1574 real_name
= uncompressed_name
;
1578 /* Try all possible suffixes. */
1579 for (compr
= compressors
; compr
->suffix
!= NULL
; compr
++)
1581 compressed_name
= concat (file
, ".", compr
->suffix
);
1582 inf
= fopen (compressed_name
, "r" FOPEN_BINARY
);
1585 real_name
= compressed_name
;
1590 char *suf
= compressed_name
+ strlen (file
);
1591 size_t suflen
= strlen (compr
->suffix
) + 1;
1592 for ( ; suf
[1]; suf
++, suflen
--)
1594 memmove (suf
, suf
+ 1, suflen
);
1595 inf
= fopen (compressed_name
, "r" FOPEN_BINARY
);
1598 real_name
= compressed_name
;
1605 free (compressed_name
);
1606 compressed_name
= NULL
;
1617 if (real_name
== compressed_name
)
1620 tmp_name
= etags_mktmp ();
1625 #if MSDOS || defined (DOS_NT)
1626 char *cmd1
= concat (compr
->command
, " \"", real_name
);
1627 char *cmd
= concat (cmd1
, "\" > ", tmp_name
);
1629 char *cmd1
= concat (compr
->command
, " '", real_name
);
1630 char *cmd
= concat (cmd1
, "' > ", tmp_name
);
1634 if (system (cmd
) == -1)
1641 inf
= fopen (tmp_name
, "r" FOPEN_BINARY
);
1655 process_file (inf
, uncompressed_name
, lang
);
1657 retval
= fclose (inf
);
1658 if (real_name
== compressed_name
)
1667 if (compressed_name
!= file
)
1668 free (compressed_name
);
1669 if (uncompressed_name
!= file
)
1670 free (uncompressed_name
);
1677 process_file (FILE *fh
, char *fn
, language
*lang
)
1679 static const fdesc emptyfdesc
;
1683 /* Create a new input file description entry. */
1684 fdp
= xnew (1, fdesc
);
1687 fdp
->infname
= savestr (fn
);
1689 fdp
->infabsname
= absolute_filename (fn
, cwd
);
1690 fdp
->infabsdir
= absolute_dirname (fn
, cwd
);
1691 if (filename_is_absolute (fn
))
1693 /* An absolute file name. Canonicalize it. */
1694 fdp
->taggedfname
= absolute_filename (fn
, NULL
);
1698 /* A file name relative to cwd. Make it relative
1699 to the directory of the tags file. */
1700 fdp
->taggedfname
= relative_filename (fn
, tagfiledir
);
1702 fdp
->usecharno
= true; /* use char position when making tags */
1704 fdp
->written
= false; /* not written on tags file yet */
1707 curfdp
= fdhead
; /* the current file description */
1711 /* If not Ctags, and if this is not metasource and if it contained no #line
1712 directives, we can write the tags and free all nodes pointing to
1715 && curfdp
->usecharno
/* no #line directives in this file */
1716 && !curfdp
->lang
->metasource
)
1720 /* Look for the head of the sublist relative to this file. See add_node
1721 for the structure of the node tree. */
1723 for (np
= nodehead
; np
!= NULL
; prev
= np
, np
= np
->left
)
1724 if (np
->fdp
== curfdp
)
1727 /* If we generated tags for this file, write and delete them. */
1730 /* This is the head of the last sublist, if any. The following
1731 instructions depend on this being true. */
1732 assert (np
->left
== NULL
);
1734 assert (fdhead
== curfdp
);
1735 assert (last_node
->fdp
== curfdp
);
1736 put_entries (np
); /* write tags for file curfdp->taggedfname */
1737 free_tree (np
); /* remove the written nodes */
1739 nodehead
= NULL
; /* no nodes left */
1741 prev
->left
= NULL
; /* delete the pointer to the sublist */
1747 reset_input (FILE *inf
)
1749 if (fseek (inf
, 0, SEEK_SET
) != 0)
1750 perror (infilename
);
1754 * This routine opens the specified file and calls the function
1755 * which finds the function and type definitions.
1758 find_entries (FILE *inf
)
1761 language
*lang
= curfdp
->lang
;
1762 Lang_function
*parser
= NULL
;
1764 /* If user specified a language, use it. */
1765 if (lang
!= NULL
&& lang
->function
!= NULL
)
1767 parser
= lang
->function
;
1770 /* Else try to guess the language given the file name. */
1773 lang
= get_language_from_filename (curfdp
->infname
, true);
1774 if (lang
!= NULL
&& lang
->function
!= NULL
)
1776 curfdp
->lang
= lang
;
1777 parser
= lang
->function
;
1781 /* Else look for sharp-bang as the first two characters. */
1783 && readline_internal (&lb
, inf
, infilename
) > 0
1785 && lb
.buffer
[0] == '#'
1786 && lb
.buffer
[1] == '!')
1790 /* Set lp to point at the first char after the last slash in the
1791 line or, if no slashes, at the first nonblank. Then set cp to
1792 the first successive blank and terminate the string. */
1793 lp
= strrchr (lb
.buffer
+2, '/');
1797 lp
= skip_spaces (lb
.buffer
+ 2);
1798 cp
= skip_non_spaces (lp
);
1801 if (strlen (lp
) > 0)
1803 lang
= get_language_from_interpreter (lp
);
1804 if (lang
!= NULL
&& lang
->function
!= NULL
)
1806 curfdp
->lang
= lang
;
1807 parser
= lang
->function
;
1814 /* Else try to guess the language given the case insensitive file name. */
1817 lang
= get_language_from_filename (curfdp
->infname
, false);
1818 if (lang
!= NULL
&& lang
->function
!= NULL
)
1820 curfdp
->lang
= lang
;
1821 parser
= lang
->function
;
1825 /* Else try Fortran or C. */
1828 node
*old_last_node
= last_node
;
1830 curfdp
->lang
= get_language_from_langname ("fortran");
1833 if (old_last_node
== last_node
)
1834 /* No Fortran entries found. Try C. */
1837 curfdp
->lang
= get_language_from_langname (cplusplus
? "c++" : "c");
1843 if (!no_line_directive
1844 && curfdp
->lang
!= NULL
&& curfdp
->lang
->metasource
)
1845 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1846 file, or anyway we parsed a file that is automatically generated from
1847 this one. If this is the case, the bingo.c file contained #line
1848 directives that generated tags pointing to this file. Let's delete
1849 them all before parsing this file, which is the real source. */
1851 fdesc
**fdpp
= &fdhead
;
1852 while (*fdpp
!= NULL
)
1854 && streq ((*fdpp
)->taggedfname
, curfdp
->taggedfname
))
1855 /* We found one of those! We must delete both the file description
1856 and all tags referring to it. */
1858 fdesc
*badfdp
= *fdpp
;
1860 /* Delete the tags referring to badfdp->taggedfname
1861 that were obtained from badfdp->infname. */
1862 invalidate_nodes (badfdp
, &nodehead
);
1864 *fdpp
= badfdp
->next
; /* remove the bad description from the list */
1865 free_fdesc (badfdp
);
1868 fdpp
= &(*fdpp
)->next
; /* advance the list pointer */
1871 assert (parser
!= NULL
);
1873 /* Generic initializations before reading from file. */
1874 linebuffer_setlen (&filebuf
, 0); /* reset the file buffer */
1876 /* Generic initializations before parsing file with readline. */
1877 lineno
= 0; /* reset global line number */
1878 charno
= 0; /* reset global char number */
1879 linecharno
= 0; /* reset global char number of line start */
1883 regex_tag_multiline ();
1888 * Check whether an implicitly named tag should be created,
1889 * then call `pfnote'.
1890 * NAME is a string that is internally copied by this function.
1892 * TAGS format specification
1893 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1894 * The following is explained in some more detail in etc/ETAGS.EBNF.
1896 * make_tag creates tags with "implicit tag names" (unnamed tags)
1897 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1898 * 1. NAME does not contain any of the characters in NONAM;
1899 * 2. LINESTART contains name as either a rightmost, or rightmost but
1900 * one character, substring;
1901 * 3. the character, if any, immediately before NAME in LINESTART must
1902 * be a character in NONAM;
1903 * 4. the character, if any, immediately after NAME in LINESTART must
1904 * also be a character in NONAM.
1906 * The implementation uses the notinname() macro, which recognizes the
1907 * characters stored in the string `nonam'.
1908 * etags.el needs to use the same characters that are in NONAM.
1911 make_tag (const char *name
, /* tag name, or NULL if unnamed */
1912 int namelen
, /* tag length */
1913 bool is_func
, /* tag is a function */
1914 char *linestart
, /* start of the line where tag is */
1915 int linelen
, /* length of the line where tag is */
1916 int lno
, /* line number */
1917 long int cno
) /* character number */
1919 bool named
= (name
!= NULL
&& namelen
> 0);
1923 fprintf (stderr
, "%s on %s:%d: %s\n",
1924 named
? name
: "(unnamed)", curfdp
->taggedfname
, lno
, linestart
);
1926 if (!CTAGS
&& named
) /* maybe set named to false */
1927 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1928 such that etags.el can guess a name from it. */
1931 register const char *cp
= name
;
1933 for (i
= 0; i
< namelen
; i
++)
1934 if (notinname (*cp
++))
1936 if (i
== namelen
) /* rule #1 */
1938 cp
= linestart
+ linelen
- namelen
;
1939 if (notinname (linestart
[linelen
-1]))
1940 cp
-= 1; /* rule #4 */
1941 if (cp
>= linestart
/* rule #2 */
1943 || notinname (cp
[-1])) /* rule #3 */
1944 && strneq (name
, cp
, namelen
)) /* rule #2 */
1945 named
= false; /* use implicit tag name */
1950 nname
= savenstr (name
, namelen
);
1952 pfnote (nname
, is_func
, linestart
, linelen
, lno
, cno
);
1957 pfnote (char *name
, bool is_func
, char *linestart
, int linelen
, int lno
,
1959 /* tag name, or NULL if unnamed */
1960 /* tag is a function */
1961 /* start of the line where tag is */
1962 /* length of the line where tag is */
1964 /* character number */
1968 assert (name
== NULL
|| name
[0] != '\0');
1969 if (CTAGS
&& name
== NULL
)
1972 np
= xnew (1, node
);
1974 /* If ctags mode, change name "main" to M<thisfilename>. */
1975 if (CTAGS
&& !cxref_style
&& streq (name
, "main"))
1977 char *fp
= strrchr (curfdp
->taggedfname
, '/');
1978 np
->name
= concat ("M", fp
== NULL
? curfdp
->taggedfname
: fp
+ 1, "");
1979 fp
= strrchr (np
->name
, '.');
1980 if (fp
!= NULL
&& fp
[1] != '\0' && fp
[2] == '\0')
1986 np
->been_warned
= false;
1988 np
->is_func
= is_func
;
1990 if (np
->fdp
->usecharno
)
1991 /* Our char numbers are 0-base, because of C language tradition?
1992 ctags compatibility? old versions compatibility? I don't know.
1993 Anyway, since emacs's are 1-base we expect etags.el to take care
1994 of the difference. If we wanted to have 1-based numbers, we would
1995 uncomment the +1 below. */
1996 np
->cno
= cno
/* + 1 */ ;
1998 np
->cno
= invalidcharno
;
1999 np
->left
= np
->right
= NULL
;
2000 if (CTAGS
&& !cxref_style
)
2002 if (strlen (linestart
) < 50)
2003 np
->regex
= concat (linestart
, "$", "");
2005 np
->regex
= savenstr (linestart
, 50);
2008 np
->regex
= savenstr (linestart
, linelen
);
2010 add_node (np
, &nodehead
);
2014 * Utility functions and data to avoid recursion.
2017 typedef struct stack_entry
{
2019 struct stack_entry
*next
;
2023 push_node (node
*np
, stkentry
**stack_top
)
2027 stkentry
*new = xnew (1, stkentry
);
2030 new->next
= *stack_top
;
2036 pop_node (stkentry
**stack_top
)
2042 stkentry
*old_start
= *stack_top
;
2044 ret
= (*stack_top
)->np
;
2045 *stack_top
= (*stack_top
)->next
;
2053 * emulate recursion on left children, iterate on right children.
2056 free_tree (register node
*np
)
2058 stkentry
*stack
= NULL
;
2062 /* Descent on left children. */
2065 push_node (np
, &stack
);
2068 /* Free node without left children. */
2069 node
*node_right
= np
->right
;
2075 /* Backtrack to find a node with right children, while freeing nodes
2076 that don't have right children. */
2077 while (node_right
== NULL
&& (np
= pop_node (&stack
)) != NULL
)
2079 node_right
= np
->right
;
2085 /* Free right children. */
2092 * delete a file description
2095 free_fdesc (register fdesc
*fdp
)
2097 free (fdp
->infname
);
2098 free (fdp
->infabsname
);
2099 free (fdp
->infabsdir
);
2100 free (fdp
->taggedfname
);
2107 * Adds a node to the tree of nodes. In etags mode, sort by file
2108 * name. In ctags mode, sort by tag name. Make no attempt at
2111 * add_node is the only function allowed to add nodes, so it can
2115 add_node (node
*np
, node
**cur_node_p
)
2117 node
*cur_node
= *cur_node_p
;
2119 /* Make the first node. */
2120 if (cur_node
== NULL
)
2130 /* For each file name, tags are in a linked sublist on the right
2131 pointer. The first tags of different files are a linked list
2132 on the left pointer. last_node points to the end of the last
2134 if (last_node
!= NULL
&& last_node
->fdp
== np
->fdp
)
2136 /* Let's use the same sublist as the last added node. */
2137 assert (last_node
->right
== NULL
);
2138 last_node
->right
= np
;
2143 while (cur_node
->fdp
!= np
->fdp
)
2145 if (cur_node
->left
== NULL
)
2147 /* The head of this sublist is not good for us. Let's try the
2149 cur_node
= cur_node
->left
;
2153 /* Scanning the list we found the head of a sublist which is
2154 good for us. Let's scan this sublist. */
2155 if (cur_node
->right
)
2157 cur_node
= cur_node
->right
;
2158 while (cur_node
->right
)
2159 cur_node
= cur_node
->right
;
2161 /* Make a new node in this sublist. */
2162 cur_node
->right
= np
;
2166 /* Make a new sublist. */
2167 cur_node
->left
= np
;
2171 } /* if ETAGS mode */
2175 node
**next_node
= &cur_node
;
2177 while ((cur_node
= *next_node
) != NULL
)
2179 int dif
= strcmp (np
->name
, cur_node
->name
);
2181 * If this tag name matches an existing one, then
2182 * do not add the node, but maybe print a warning.
2184 if (!dif
&& no_duplicates
)
2186 if (np
->fdp
== cur_node
->fdp
)
2191 "Duplicate entry in file %s, line %d: %s\n",
2192 np
->fdp
->infname
, lineno
, np
->name
);
2193 fprintf (stderr
, "Second entry ignored\n");
2196 else if (!cur_node
->been_warned
&& !no_warnings
)
2200 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2201 np
->fdp
->infname
, cur_node
->fdp
->infname
, np
->name
);
2202 cur_node
->been_warned
= true;
2207 next_node
= dif
< 0 ? &cur_node
->left
: &cur_node
->right
;
2211 } /* if CTAGS mode */
2215 * invalidate_nodes ()
2216 * Scan the node tree and invalidate all nodes pointing to the
2217 * given file description (CTAGS case) or free them (ETAGS case).
2220 invalidate_nodes (fdesc
*badfdp
, node
**npp
)
2223 stkentry
*stack
= NULL
;
2229 /* Push all the left children on the stack. */
2230 while (np
->left
!= NULL
)
2232 push_node (np
, &stack
);
2235 /* Invalidate this node. */
2236 if (np
->fdp
== badfdp
)
2240 /* Pop nodes from stack, invalidating them, until we find one
2241 with a right child. */
2242 while ((np
= pop_node (&stack
)) != NULL
)
2244 if (np
->fdp
== badfdp
)
2246 if (np
->right
!= NULL
)
2250 /* Process the right child, if any. */
2257 node super_root
, *np_parent
= NULL
;
2259 super_root
.left
= np
;
2260 super_root
.fdp
= (fdesc
*) -1;
2265 /* Descent on left children until node with BADFP. */
2266 while (np
&& np
->fdp
!= badfdp
)
2268 assert (np
->fdp
!= NULL
);
2274 np_parent
->left
= np
->left
; /* detach subtree from the tree */
2275 np
->left
= NULL
; /* isolate it */
2276 free_tree (np
); /* free it */
2278 /* Continue with rest of tree. */
2279 np
= np_parent
->left
;
2282 *npp
= super_root
.left
;
2287 static int total_size_of_entries (node
*);
2288 static int number_len (long) ATTRIBUTE_CONST
;
2290 /* Length of a non-negative number's decimal representation. */
2292 number_len (long int num
)
2295 while ((num
/= 10) > 0)
2301 * Return total number of characters that put_entries will output for
2302 * the nodes in the linked list at the right of the specified node.
2303 * This count is irrelevant with etags.el since emacs 19.34 at least,
2304 * but is still supplied for backward compatibility.
2307 total_size_of_entries (register node
*np
)
2309 register int total
= 0;
2311 for (; np
!= NULL
; np
= np
->right
)
2314 total
+= strlen (np
->regex
) + 1; /* pat\177 */
2315 if (np
->name
!= NULL
)
2316 total
+= strlen (np
->name
) + 1; /* name\001 */
2317 total
+= number_len ((long) np
->lno
) + 1; /* lno, */
2318 if (np
->cno
!= invalidcharno
) /* cno */
2319 total
+= number_len (np
->cno
);
2320 total
+= 1; /* newline */
2327 put_entry (node
*np
)
2330 static fdesc
*fdp
= NULL
;
2332 /* Output this entry */
2341 fprintf (tagf
, "\f\n%s,%d\n",
2342 fdp
->taggedfname
, total_size_of_entries (np
));
2343 fdp
->written
= true;
2345 fputs (np
->regex
, tagf
);
2346 fputc ('\177', tagf
);
2347 if (np
->name
!= NULL
)
2349 fputs (np
->name
, tagf
);
2350 fputc ('\001', tagf
);
2352 fprintf (tagf
, "%d,", np
->lno
);
2353 if (np
->cno
!= invalidcharno
)
2354 fprintf (tagf
, "%ld", np
->cno
);
2360 if (np
->name
== NULL
)
2361 error ("internal error: NULL name in ctags mode.");
2366 fprintf (stdout
, "%s %s %d\n",
2367 np
->name
, np
->fdp
->taggedfname
, (np
->lno
+ 63) / 64);
2369 fprintf (stdout
, "%-16s %3d %-16s %s\n",
2370 np
->name
, np
->lno
, np
->fdp
->taggedfname
, np
->regex
);
2374 fprintf (tagf
, "%s\t%s\t", np
->name
, np
->fdp
->taggedfname
);
2377 { /* function or #define macro with args */
2378 putc (searchar
, tagf
);
2381 for (sp
= np
->regex
; *sp
; sp
++)
2383 if (*sp
== '\\' || *sp
== searchar
)
2387 putc (searchar
, tagf
);
2390 { /* anything else; text pattern inadequate */
2391 fprintf (tagf
, "%d", np
->lno
);
2396 } /* if this node contains a valid tag */
2400 put_entries (node
*np
)
2402 stkentry
*stack
= NULL
;
2411 /* Stack subentries that precede this one. */
2414 push_node (np
, &stack
);
2417 /* Output this subentry. */
2419 /* Stack subentries that follow this one. */
2422 /* Output subentries that precede the next one. */
2423 np
= pop_node (&stack
);
2434 push_node (np
, &stack
);
2435 while ((np
= pop_node (&stack
)) != NULL
)
2437 /* Output this subentry. */
2441 /* Output subentries that follow this one. */
2442 put_entry (np
->right
);
2443 /* Stack subentries from the following files. */
2444 push_node (np
->left
, &stack
);
2447 push_node (np
->left
, &stack
);
2454 #define C_EXT 0x00fff /* C extensions */
2455 #define C_PLAIN 0x00000 /* C */
2456 #define C_PLPL 0x00001 /* C++ */
2457 #define C_STAR 0x00003 /* C* */
2458 #define C_JAVA 0x00005 /* JAVA */
2459 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2460 #define YACC 0x10000 /* yacc file */
2463 * The C symbol tables.
2468 st_C_objprot
, st_C_objimpl
, st_C_objend
,
2470 st_C_ignore
, st_C_attribute
, st_C_enum_bf
,
2473 st_C_class
, st_C_template
,
2474 st_C_struct
, st_C_extern
, st_C_enum
, st_C_define
, st_C_typedef
2477 /* Feed stuff between (but not including) %[ and %] lines to:
2483 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2487 while, 0, st_C_ignore
2488 switch, 0, st_C_ignore
2489 return, 0, st_C_ignore
2490 __attribute__, 0, st_C_attribute
2491 GTY, 0, st_C_attribute
2492 @interface, 0, st_C_objprot
2493 @protocol, 0, st_C_objprot
2494 @implementation,0, st_C_objimpl
2495 @end, 0, st_C_objend
2496 import, (C_JAVA & ~C_PLPL), st_C_ignore
2497 package, (C_JAVA & ~C_PLPL), st_C_ignore
2498 friend, C_PLPL, st_C_ignore
2499 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2500 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2501 interface, (C_JAVA & ~C_PLPL), st_C_struct
2502 class, 0, st_C_class
2503 namespace, C_PLPL, st_C_struct
2504 domain, C_STAR, st_C_struct
2505 union, 0, st_C_struct
2506 struct, 0, st_C_struct
2507 extern, 0, st_C_extern
2509 typedef, 0, st_C_typedef
2510 define, 0, st_C_define
2511 undef, 0, st_C_define
2512 operator, C_PLPL, st_C_operator
2513 template, 0, st_C_template
2514 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2515 DEFUN, 0, st_C_gnumacro
2516 SYSCALL, 0, st_C_gnumacro
2517 ENTRY, 0, st_C_gnumacro
2518 PSEUDO, 0, st_C_gnumacro
2519 ENUM_BF, 0, st_C_enum_bf
2520 # These are defined inside C functions, so currently they are not met.
2521 # EXFUN used in glibc, DEFVAR_* in emacs.
2522 #EXFUN, 0, st_C_gnumacro
2523 #DEFVAR_, 0, st_C_gnumacro
2525 and replace lines between %< and %> with its output, then:
2526 - remove the #if characterset check
2527 - remove any #line directives
2528 - make in_word_set static and not inline
2529 - remove any 'register' qualifications from variable decls. */
2531 /* C code produced by gperf version 3.0.1 */
2532 /* Command-line: gperf -m 5 */
2533 /* Computed positions: -k'2-3' */
2535 struct C_stab_entry
{ const char *name
; int c_ext
; enum sym_type type
; };
2536 /* maximum key range = 34, duplicates = 0 */
2539 hash (const char *str
, int len
)
2541 static char const asso_values
[] =
2543 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2544 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2545 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2546 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2547 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2548 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2549 36, 36, 36, 36, 36, 36, 36, 36, 36, 3,
2550 27, 36, 36, 36, 36, 36, 36, 36, 26, 36,
2551 36, 36, 36, 25, 0, 0, 36, 36, 36, 0,
2552 36, 36, 36, 36, 36, 1, 36, 16, 36, 6,
2553 23, 0, 0, 36, 22, 0, 36, 36, 5, 0,
2554 0, 15, 1, 36, 6, 36, 8, 19, 36, 16,
2555 4, 5, 36, 36, 36, 36, 36, 36, 36, 36,
2556 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2557 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2558 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2559 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2560 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2561 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2562 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2563 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2564 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2565 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2566 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2567 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2568 36, 36, 36, 36, 36, 36
2575 hval
+= asso_values
[(unsigned char) str
[2]];
2578 hval
+= asso_values
[(unsigned char) str
[1]];
2584 static struct C_stab_entry
*
2585 in_word_set (register const char *str
, register unsigned int len
)
2589 TOTAL_KEYWORDS
= 34,
2590 MIN_WORD_LENGTH
= 2,
2591 MAX_WORD_LENGTH
= 15,
2596 static struct C_stab_entry wordlist
[] =
2599 {"if", 0, st_C_ignore
},
2600 {"GTY", 0, st_C_attribute
},
2601 {"@end", 0, st_C_objend
},
2602 {"union", 0, st_C_struct
},
2603 {"define", 0, st_C_define
},
2604 {"import", (C_JAVA
& ~C_PLPL
), st_C_ignore
},
2605 {"template", 0, st_C_template
},
2606 {"operator", C_PLPL
, st_C_operator
},
2607 {"@interface", 0, st_C_objprot
},
2608 {"implements", (C_JAVA
& ~C_PLPL
), st_C_javastruct
},
2609 {"friend", C_PLPL
, st_C_ignore
},
2610 {"typedef", 0, st_C_typedef
},
2611 {"return", 0, st_C_ignore
},
2612 {"@implementation",0, st_C_objimpl
},
2613 {"@protocol", 0, st_C_objprot
},
2614 {"interface", (C_JAVA
& ~C_PLPL
), st_C_struct
},
2615 {"extern", 0, st_C_extern
},
2616 {"extends", (C_JAVA
& ~C_PLPL
), st_C_javastruct
},
2617 {"struct", 0, st_C_struct
},
2618 {"domain", C_STAR
, st_C_struct
},
2619 {"switch", 0, st_C_ignore
},
2620 {"enum", 0, st_C_enum
},
2621 {"for", 0, st_C_ignore
},
2622 {"namespace", C_PLPL
, st_C_struct
},
2623 {"class", 0, st_C_class
},
2624 {"while", 0, st_C_ignore
},
2625 {"undef", 0, st_C_define
},
2626 {"package", (C_JAVA
& ~C_PLPL
), st_C_ignore
},
2627 {"__attribute__", 0, st_C_attribute
},
2628 {"ENTRY", 0, st_C_gnumacro
},
2629 {"SYSCALL", 0, st_C_gnumacro
},
2630 {"ENUM_BF", 0, st_C_enum_bf
},
2631 {"PSEUDO", 0, st_C_gnumacro
},
2632 {"DEFUN", 0, st_C_gnumacro
}
2635 if (len
<= MAX_WORD_LENGTH
&& len
>= MIN_WORD_LENGTH
)
2637 int key
= hash (str
, len
);
2639 if (key
<= MAX_HASH_VALUE
&& key
>= 0)
2641 const char *s
= wordlist
[key
].name
;
2643 if (*str
== *s
&& !strncmp (str
+ 1, s
+ 1, len
- 1) && s
[len
] == '\0')
2644 return &wordlist
[key
];
2651 static enum sym_type
2652 C_symtype (char *str
, int len
, int c_ext
)
2654 register struct C_stab_entry
*se
= in_word_set (str
, len
);
2656 if (se
== NULL
|| (se
->c_ext
&& !(c_ext
& se
->c_ext
)))
2663 * Ignoring __attribute__ ((list))
2665 static bool inattribute
; /* looking at an __attribute__ construct */
2667 /* Ignoring ENUM_BF (type)
2670 static bool in_enum_bf
; /* inside parentheses following ENUM_BF */
2673 * C functions and variables are recognized using a simple
2674 * finite automaton. fvdef is its state variable.
2678 fvnone
, /* nothing seen */
2679 fdefunkey
, /* Emacs DEFUN keyword seen */
2680 fdefunname
, /* Emacs DEFUN name seen */
2681 foperator
, /* func: operator keyword seen (cplpl) */
2682 fvnameseen
, /* function or variable name seen */
2683 fstartlist
, /* func: just after open parenthesis */
2684 finlist
, /* func: in parameter list */
2685 flistseen
, /* func: after parameter list */
2686 fignore
, /* func: before open brace */
2687 vignore
/* var-like: ignore until ';' */
2690 static bool fvextern
; /* func or var: extern keyword seen; */
2693 * typedefs are recognized using a simple finite automaton.
2694 * typdef is its state variable.
2698 tnone
, /* nothing seen */
2699 tkeyseen
, /* typedef keyword seen */
2700 ttypeseen
, /* defined type seen */
2701 tinbody
, /* inside typedef body */
2702 tend
, /* just before typedef tag */
2703 tignore
/* junk after typedef tag */
2707 * struct-like structures (enum, struct and union) are recognized
2708 * using another simple finite automaton. `structdef' is its state
2713 snone
, /* nothing seen yet,
2714 or in struct body if bracelev > 0 */
2715 skeyseen
, /* struct-like keyword seen */
2716 stagseen
, /* struct-like tag seen */
2717 scolonseen
/* colon seen after struct-like tag */
2721 * When objdef is different from onone, objtag is the name of the class.
2723 static const char *objtag
= "<uninited>";
2726 * Yet another little state machine to deal with preprocessor lines.
2730 dnone
, /* nothing seen */
2731 dsharpseen
, /* '#' seen as first char on line */
2732 ddefineseen
, /* '#' and 'define' seen */
2733 dignorerest
/* ignore rest of line */
2737 * State machine for Objective C protocols and implementations.
2738 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2742 onone
, /* nothing seen */
2743 oprotocol
, /* @interface or @protocol seen */
2744 oimplementation
, /* @implementations seen */
2745 otagseen
, /* class name seen */
2746 oparenseen
, /* parenthesis before category seen */
2747 ocatseen
, /* category name seen */
2748 oinbody
, /* in @implementation body */
2749 omethodsign
, /* in @implementation body, after +/- */
2750 omethodtag
, /* after method name */
2751 omethodcolon
, /* after method colon */
2752 omethodparm
, /* after method parameter */
2753 oignore
/* wait for @end */
2758 * Use this structure to keep info about the token read, and how it
2759 * should be tagged. Used by the make_C_tag function to build a tag.
2763 char *line
; /* string containing the token */
2764 int offset
; /* where the token starts in LINE */
2765 int length
; /* token length */
2767 The previous members can be used to pass strings around for generic
2768 purposes. The following ones specifically refer to creating tags. In this
2769 case the token contained here is the pattern that will be used to create a
2772 bool valid
; /* do not create a tag; the token should be
2773 invalidated whenever a state machine is
2774 reset prematurely */
2775 bool named
; /* create a named tag */
2776 int lineno
; /* source line number of tag */
2777 long linepos
; /* source char number of tag */
2778 } token
; /* latest token read */
2781 * Variables and functions for dealing with nested structures.
2782 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2784 static void pushclass_above (int, char *, int);
2785 static void popclass_above (int);
2786 static void write_classname (linebuffer
*, const char *qualifier
);
2789 char **cname
; /* nested class names */
2790 int *bracelev
; /* nested class brace level */
2791 int nl
; /* class nesting level (elements used) */
2792 int size
; /* length of the array */
2793 } cstack
; /* stack for nested declaration tags */
2794 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2795 #define nestlev (cstack.nl)
2796 /* After struct keyword or in struct body, not inside a nested function. */
2797 #define instruct (structdef == snone && nestlev > 0 \
2798 && bracelev == cstack.bracelev[nestlev-1] + 1)
2801 pushclass_above (int bracelev
, char *str
, int len
)
2805 popclass_above (bracelev
);
2807 if (nl
>= cstack
.size
)
2809 int size
= cstack
.size
*= 2;
2810 xrnew (cstack
.cname
, size
, char *);
2811 xrnew (cstack
.bracelev
, size
, int);
2813 assert (nl
== 0 || cstack
.bracelev
[nl
-1] < bracelev
);
2814 cstack
.cname
[nl
] = (str
== NULL
) ? NULL
: savenstr (str
, len
);
2815 cstack
.bracelev
[nl
] = bracelev
;
2820 popclass_above (int bracelev
)
2824 for (nl
= cstack
.nl
- 1;
2825 nl
>= 0 && cstack
.bracelev
[nl
] >= bracelev
;
2828 free (cstack
.cname
[nl
]);
2834 write_classname (linebuffer
*cn
, const char *qualifier
)
2837 int qlen
= strlen (qualifier
);
2839 if (cstack
.nl
== 0 || cstack
.cname
[0] == NULL
)
2843 cn
->buffer
[0] = '\0';
2847 len
= strlen (cstack
.cname
[0]);
2848 linebuffer_setlen (cn
, len
);
2849 strcpy (cn
->buffer
, cstack
.cname
[0]);
2851 for (i
= 1; i
< cstack
.nl
; i
++)
2853 char *s
= cstack
.cname
[i
];
2856 linebuffer_setlen (cn
, len
+ qlen
+ strlen (s
));
2857 len
+= sprintf (cn
->buffer
+ len
, "%s%s", qualifier
, s
);
2862 static bool consider_token (char *, int, int, int *, int, int, bool *);
2863 static void make_C_tag (bool);
2867 * checks to see if the current token is at the start of a
2868 * function or variable, or corresponds to a typedef, or
2869 * is a struct/union/enum tag, or #define, or an enum constant.
2871 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2872 * with args. C_EXTP points to which language we are looking at.
2883 consider_token (char *str
, int len
, int c
, int *c_extp
,
2884 int bracelev
, int parlev
, bool *is_func_or_var
)
2885 /* IN: token pointer */
2886 /* IN: token length */
2887 /* IN: first char after the token */
2888 /* IN, OUT: C extensions mask */
2889 /* IN: brace level */
2890 /* IN: parenthesis level */
2891 /* OUT: function or variable found */
2893 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2894 structtype is the type of the preceding struct-like keyword, and
2895 structbracelev is the brace level where it has been seen. */
2896 static enum sym_type structtype
;
2897 static int structbracelev
;
2898 static enum sym_type toktype
;
2901 toktype
= C_symtype (str
, len
, *c_extp
);
2904 * Skip __attribute__
2906 if (toktype
== st_C_attribute
)
2915 if (toktype
== st_C_enum_bf
&& definedef
== dnone
)
2922 * Advance the definedef state machine.
2927 /* We're not on a preprocessor line. */
2928 if (toktype
== st_C_gnumacro
)
2935 if (toktype
== st_C_define
)
2937 definedef
= ddefineseen
;
2941 definedef
= dignorerest
;
2946 * Make a tag for any macro, unless it is a constant
2947 * and constantypedefs is false.
2949 definedef
= dignorerest
;
2950 *is_func_or_var
= (c
== '(');
2951 if (!*is_func_or_var
&& !constantypedefs
)
2958 error ("internal error: definedef value.");
2967 if (toktype
== st_C_typedef
)
2990 if (structdef
== snone
&& fvdef
== fvnone
)
3012 case st_C_javastruct
:
3013 if (structdef
== stagseen
)
3014 structdef
= scolonseen
;
3018 if ((*c_extp
& C_AUTO
) /* automatic detection of C++ language */
3020 && definedef
== dnone
&& structdef
== snone
3021 && typdef
== tnone
&& fvdef
== fvnone
)
3022 *c_extp
= (*c_extp
| C_PLPL
) & ~C_AUTO
;
3023 if (toktype
== st_C_template
)
3030 && (typdef
== tkeyseen
3031 || (typedefs_or_cplusplus
&& structdef
== snone
)))
3033 structdef
= skeyseen
;
3034 structtype
= toktype
;
3035 structbracelev
= bracelev
;
3036 if (fvdef
== fvnameseen
)
3044 if (structdef
== skeyseen
)
3046 structdef
= stagseen
;
3050 if (typdef
!= tnone
)
3053 /* Detect Objective C constructs. */
3063 objdef
= oimplementation
;
3069 case oimplementation
:
3070 /* Save the class tag for functions or variables defined inside. */
3071 objtag
= savenstr (str
, len
);
3075 /* Save the class tag for categories. */
3076 objtag
= savenstr (str
, len
);
3078 *is_func_or_var
= true;
3082 *is_func_or_var
= true;
3090 objdef
= omethodtag
;
3091 linebuffer_setlen (&token_name
, len
);
3092 memcpy (token_name
.buffer
, str
, len
);
3093 token_name
.buffer
[len
] = '\0';
3099 objdef
= omethodparm
;
3104 objdef
= omethodtag
;
3107 int oldlen
= token_name
.len
;
3109 linebuffer_setlen (&token_name
, oldlen
+ len
);
3110 memcpy (token_name
.buffer
+ oldlen
, str
, len
);
3111 token_name
.buffer
[oldlen
+ len
] = '\0';
3117 if (toktype
== st_C_objend
)
3119 /* Memory leakage here: the string pointed by objtag is
3120 never released, because many tests would be needed to
3121 avoid breaking on incorrect input code. The amount of
3122 memory leaked here is the sum of the lengths of the
3132 /* A function, variable or enum constant? */
3154 *is_func_or_var
= true;
3158 && structdef
== snone
3159 && structtype
== st_C_enum
&& bracelev
> structbracelev
3160 /* Don't tag tokens in expressions that assign values to enum
3162 && fvdef
!= vignore
)
3163 return true; /* enum constant */
3169 fvdef
= fdefunname
; /* GNU macro */
3170 *is_func_or_var
= true;
3178 if ((strneq (str
, "asm", 3) && endtoken (str
[3]))
3179 || (strneq (str
, "__asm__", 7) && endtoken (str
[7])))
3190 if (len
>= 10 && strneq (str
+len
-10, "::operator", 10))
3192 if (*c_extp
& C_AUTO
) /* automatic detection of C++ */
3193 *c_extp
= (*c_extp
| C_PLPL
) & ~C_AUTO
;
3195 *is_func_or_var
= true;
3198 if (bracelev
> 0 && !instruct
)
3200 fvdef
= fvnameseen
; /* function or variable */
3201 *is_func_or_var
= true;
3216 * C_entries often keeps pointers to tokens or lines which are older than
3217 * the line currently read. By keeping two line buffers, and switching
3218 * them at end of line, it is possible to use those pointers.
3226 #define current_lb_is_new (newndx == curndx)
3227 #define switch_line_buffers() (curndx = 1 - curndx)
3229 #define curlb (lbs[curndx].lb)
3230 #define newlb (lbs[newndx].lb)
3231 #define curlinepos (lbs[curndx].linepos)
3232 #define newlinepos (lbs[newndx].linepos)
3234 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3235 #define cplpl (c_ext & C_PLPL)
3236 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3238 #define CNL_SAVE_DEFINEDEF() \
3240 curlinepos = charno; \
3241 readline (&curlb, inf); \
3242 lp = curlb.buffer; \
3249 CNL_SAVE_DEFINEDEF (); \
3250 if (savetoken.valid) \
3252 token = savetoken; \
3253 savetoken.valid = false; \
3255 definedef = dnone; \
3260 make_C_tag (bool isfun
)
3262 /* This function is never called when token.valid is false, but
3263 we must protect against invalid input or internal errors. */
3265 make_tag (token_name
.buffer
, token_name
.len
, isfun
, token
.line
,
3266 token
.offset
+token
.length
+1, token
.lineno
, token
.linepos
);
3268 { /* this branch is optimized away if !DEBUG */
3269 make_tag (concat ("INVALID TOKEN:-->", token_name
.buffer
, ""),
3270 token_name
.len
+ 17, isfun
, token
.line
,
3271 token
.offset
+token
.length
+1, token
.lineno
, token
.linepos
);
3272 error ("INVALID TOKEN");
3275 token
.valid
= false;
3279 perhaps_more_input (FILE *inf
)
3281 return !feof (inf
) && !ferror (inf
);
3287 * This routine finds functions, variables, typedefs,
3288 * #define's, enum constants and struct/union/enum definitions in
3289 * C syntax and adds them to the list.
3292 C_entries (int c_ext
, FILE *inf
)
3293 /* extension of C */
3296 register char c
; /* latest char read; '\0' for end of line */
3297 register char *lp
; /* pointer one beyond the character `c' */
3298 int curndx
, newndx
; /* indices for current and new lb */
3299 register int tokoff
; /* offset in line of start of current token */
3300 register int toklen
; /* length of current token */
3301 const char *qualifier
; /* string used to qualify names */
3302 int qlen
; /* length of qualifier */
3303 int bracelev
; /* current brace level */
3304 int bracketlev
; /* current bracket level */
3305 int parlev
; /* current parenthesis level */
3306 int attrparlev
; /* __attribute__ parenthesis level */
3307 int templatelev
; /* current template level */
3308 int typdefbracelev
; /* bracelev where a typedef struct body begun */
3309 bool incomm
, inquote
, inchar
, quotednl
, midtoken
;
3310 bool yacc_rules
; /* in the rules part of a yacc file */
3311 struct tok savetoken
= {0}; /* token saved during preprocessor handling */
3314 linebuffer_init (&lbs
[0].lb
);
3315 linebuffer_init (&lbs
[1].lb
);
3316 if (cstack
.size
== 0)
3318 cstack
.size
= (DEBUG
) ? 1 : 4;
3320 cstack
.cname
= xnew (cstack
.size
, char *);
3321 cstack
.bracelev
= xnew (cstack
.size
, int);
3324 tokoff
= toklen
= typdefbracelev
= 0; /* keep compiler quiet */
3325 curndx
= newndx
= 0;
3329 fvdef
= fvnone
; fvextern
= false; typdef
= tnone
;
3330 structdef
= snone
; definedef
= dnone
; objdef
= onone
;
3332 midtoken
= inquote
= inchar
= incomm
= quotednl
= false;
3333 token
.valid
= savetoken
.valid
= false;
3334 bracelev
= bracketlev
= parlev
= attrparlev
= templatelev
= 0;
3336 { qualifier
= "."; qlen
= 1; }
3338 { qualifier
= "::"; qlen
= 2; }
3341 while (perhaps_more_input (inf
))
3346 /* If we are at the end of the line, the next character is a
3347 '\0'; do not skip it, because it is what tells us
3348 to read the next line. */
3369 /* Newlines inside comments do not end macro definitions in
3371 CNL_SAVE_DEFINEDEF ();
3384 /* Newlines inside strings do not end macro definitions
3385 in traditional cpp, even though compilers don't
3386 usually accept them. */
3387 CNL_SAVE_DEFINEDEF ();
3397 /* Hmmm, something went wrong. */
3433 if (fvdef
!= finlist
&& fvdef
!= fignore
&& fvdef
!= vignore
)
3448 else if (/* cplpl && */ *lp
== '/')
3454 if ((c_ext
& YACC
) && *lp
== '%')
3456 /* Entering or exiting rules section in yacc file. */
3458 definedef
= dnone
; fvdef
= fvnone
; fvextern
= false;
3459 typdef
= tnone
; structdef
= snone
;
3460 midtoken
= inquote
= inchar
= incomm
= quotednl
= false;
3462 yacc_rules
= !yacc_rules
;
3468 if (definedef
== dnone
)
3471 bool cpptoken
= true;
3473 /* Look back on this line. If all blanks, or nonblanks
3474 followed by an end of comment, this is a preprocessor
3476 for (cp
= newlb
.buffer
; cp
< lp
-1; cp
++)
3477 if (!c_isspace (*cp
))
3479 if (*cp
== '*' && cp
[1] == '/')
3489 definedef
= dsharpseen
;
3490 /* This is needed for tagging enum values: when there are
3491 preprocessor conditionals inside the enum, we need to
3492 reset the value of fvdef so that the next enum value is
3493 tagged even though the one before it did not end in a
3495 if (fvdef
== vignore
&& instruct
&& parlev
== 0)
3497 if (strneq (cp
, "#if", 3) || strneq (cp
, "#el", 3))
3501 } /* if (definedef == dnone) */
3512 CNL_SAVE_DEFINEDEF ();
3519 /* Consider token only if some involved conditions are satisfied. */
3520 if (typdef
!= tignore
3521 && definedef
!= dignorerest
3524 && (definedef
!= dnone
3525 || structdef
!= scolonseen
)
3533 if (c
== ':' && *lp
== ':' && begtoken (lp
[1]))
3534 /* This handles :: in the middle,
3535 but not at the beginning of an identifier.
3536 Also, space-separated :: is not recognized. */
3538 if (c_ext
& C_AUTO
) /* automatic detection of C++ */
3539 c_ext
= (c_ext
| C_PLPL
) & ~C_AUTO
;
3543 goto still_in_token
;
3547 bool funorvar
= false;
3550 || consider_token (newlb
.buffer
+ tokoff
, toklen
, c
,
3551 &c_ext
, bracelev
, parlev
,
3554 if (fvdef
== foperator
)
3557 lp
= skip_spaces (lp
-1);
3561 && !c_isspace (*lp
) && *lp
!= '(')
3564 toklen
+= lp
- oldlp
;
3566 token
.named
= false;
3568 && nestlev
> 0 && definedef
== dnone
)
3569 /* in struct body */
3574 write_classname (&token_name
, qualifier
);
3575 len
= token_name
.len
;
3576 linebuffer_setlen (&token_name
,
3577 len
+ qlen
+ toklen
);
3578 sprintf (token_name
.buffer
+ len
, "%s%.*s",
3580 newlb
.buffer
+ tokoff
);
3584 linebuffer_setlen (&token_name
, toklen
);
3585 sprintf (token_name
.buffer
, "%.*s",
3586 toklen
, newlb
.buffer
+ tokoff
);
3590 else if (objdef
== ocatseen
)
3591 /* Objective C category */
3595 int len
= strlen (objtag
) + 2 + toklen
;
3596 linebuffer_setlen (&token_name
, len
);
3597 sprintf (token_name
.buffer
, "%s(%.*s)",
3599 newlb
.buffer
+ tokoff
);
3603 linebuffer_setlen (&token_name
, toklen
);
3604 sprintf (token_name
.buffer
, "%.*s",
3605 toklen
, newlb
.buffer
+ tokoff
);
3609 else if (objdef
== omethodtag
3610 || objdef
== omethodparm
)
3611 /* Objective C method */
3615 else if (fvdef
== fdefunname
)
3616 /* GNU DEFUN and similar macros */
3618 bool defun
= (newlb
.buffer
[tokoff
] == 'F');
3627 /* First, tag it as its C name */
3628 linebuffer_setlen (&token_name
, toklen
);
3629 memcpy (token_name
.buffer
,
3630 newlb
.buffer
+ tokoff
, toklen
);
3631 token_name
.buffer
[toklen
] = '\0';
3633 token
.lineno
= lineno
;
3634 token
.offset
= tokoff
;
3635 token
.length
= toklen
;
3636 token
.line
= newlb
.buffer
;
3637 token
.linepos
= newlinepos
;
3639 make_C_tag (funorvar
);
3641 /* Rewrite the tag so that emacs lisp DEFUNs
3642 can be found also by their elisp name */
3643 linebuffer_setlen (&token_name
, len
);
3644 memcpy (token_name
.buffer
,
3645 newlb
.buffer
+ off
, len
);
3646 token_name
.buffer
[len
] = '\0';
3649 if (token_name
.buffer
[len
] == '_')
3650 token_name
.buffer
[len
] = '-';
3651 token
.named
= defun
;
3655 linebuffer_setlen (&token_name
, toklen
);
3656 memcpy (token_name
.buffer
,
3657 newlb
.buffer
+ tokoff
, toklen
);
3658 token_name
.buffer
[toklen
] = '\0';
3659 /* Name macros and members. */
3660 token
.named
= (structdef
== stagseen
3661 || typdef
== ttypeseen
3664 && definedef
== dignorerest
)
3666 && definedef
== dnone
3667 && structdef
== snone
3670 token
.lineno
= lineno
;
3671 token
.offset
= tokoff
;
3672 token
.length
= toklen
;
3673 token
.line
= newlb
.buffer
;
3674 token
.linepos
= newlinepos
;
3677 if (definedef
== dnone
3678 && (fvdef
== fvnameseen
3679 || fvdef
== foperator
3680 || structdef
== stagseen
3682 || typdef
== ttypeseen
3683 || objdef
!= onone
))
3685 if (current_lb_is_new
)
3686 switch_line_buffers ();
3688 else if (definedef
!= dnone
3689 || fvdef
== fdefunname
3691 make_C_tag (funorvar
);
3693 else /* not yacc and consider_token failed */
3695 if (inattribute
&& fvdef
== fignore
)
3697 /* We have just met __attribute__ after a
3698 function parameter list: do not tag the
3705 } /* if (endtoken (c)) */
3706 else if (intoken (c
))
3712 } /* if (midtoken) */
3713 else if (begtoken (c
))
3721 /* This prevents tagging fb in
3722 void (__attribute__((noreturn)) *fb) (void);
3723 Fixing this is not easy and not very important. */
3727 if (plainc
|| declarations
)
3729 make_C_tag (true); /* a function */
3736 if (structdef
== stagseen
&& !cjava
)
3738 popclass_above (bracelev
);
3748 if (!yacc_rules
|| lp
== newlb
.buffer
+ 1)
3750 tokoff
= lp
- 1 - newlb
.buffer
;
3755 } /* if (begtoken) */
3756 } /* if must look at token */
3759 /* Detect end of line, colon, comma, semicolon and various braces
3760 after having handled a token.*/
3766 if (yacc_rules
&& token
.offset
== 0 && token
.valid
)
3768 make_C_tag (false); /* a yacc function */
3771 if (definedef
!= dnone
)
3777 make_C_tag (true); /* an Objective C class */
3781 objdef
= omethodcolon
;
3784 int toklen
= token_name
.len
;
3785 linebuffer_setlen (&token_name
, toklen
+ 1);
3786 strcpy (token_name
.buffer
+ toklen
, ":");
3792 if (structdef
== stagseen
)
3794 structdef
= scolonseen
;
3797 /* Should be useless, but may be work as a safety net. */
3798 if (cplpl
&& fvdef
== flistseen
)
3800 make_C_tag (true); /* a function */
3806 if (definedef
!= dnone
|| inattribute
)
3812 make_C_tag (false); /* a typedef */
3822 if (typdef
== tignore
|| cplpl
)
3826 if ((globals
&& bracelev
== 0 && (!fvextern
|| declarations
))
3827 || (members
&& instruct
))
3828 make_C_tag (false); /* a variable */
3831 token
.valid
= false;
3835 && (cplpl
|| !instruct
)
3836 && (typdef
== tnone
|| (typdef
!= tignore
&& instruct
)))
3838 && plainc
&& instruct
))
3839 make_C_tag (true); /* a function */
3845 && cplpl
&& structdef
== stagseen
)
3846 make_C_tag (false); /* forward declaration */
3848 token
.valid
= false;
3849 } /* switch (fvdef) */
3855 if (structdef
== stagseen
)
3859 if (definedef
!= dnone
|| inattribute
)
3865 make_C_tag (true); /* an Objective C method */
3880 if (instruct
&& parlev
== 0)
3891 && (!fvextern
|| declarations
))
3892 || (members
&& instruct
)))
3893 make_C_tag (false); /* a variable */
3896 if ((declarations
&& typdef
== tnone
&& !instruct
)
3897 || (members
&& typdef
!= tignore
&& instruct
))
3899 make_C_tag (true); /* a function */
3902 else if (!declarations
)
3904 token
.valid
= false;
3909 if (structdef
== stagseen
)
3913 if (definedef
!= dnone
|| inattribute
)
3915 if (structdef
== stagseen
)
3922 make_C_tag (false); /* a typedef */
3934 if ((members
&& bracelev
== 1)
3935 || (globals
&& bracelev
== 0
3936 && (!fvextern
|| declarations
)))
3937 make_C_tag (false); /* a variable */
3953 if (definedef
!= dnone
)
3955 if (objdef
== otagseen
&& parlev
== 0)
3956 objdef
= oparenseen
;
3960 if (typdef
== ttypeseen
3964 /* This handles constructs like:
3965 typedef void OperatorFun (int fun); */
3986 if (--attrparlev
== 0)
3987 inattribute
= false;
3996 if (definedef
!= dnone
)
3998 if (objdef
== ocatseen
&& parlev
== 1)
4000 make_C_tag (true); /* an Objective C category */
4016 || typdef
== ttypeseen
))
4019 make_C_tag (false); /* a typedef */
4022 else if (parlev
< 0) /* can happen due to ill-conceived #if's. */
4026 if (definedef
!= dnone
)
4028 if (typdef
== ttypeseen
)
4030 /* Whenever typdef is set to tinbody (currently only
4031 here), typdefbracelev should be set to bracelev. */
4033 typdefbracelev
= bracelev
;
4038 if (cplpl
&& !class_qualify
)
4040 /* Remove class and namespace qualifiers from the token,
4041 leaving only the method/member name. */
4042 char *cc
, *uqname
= token_name
.buffer
;
4043 char *tok_end
= token_name
.buffer
+ token_name
.len
;
4045 for (cc
= token_name
.buffer
; cc
< tok_end
; cc
++)
4047 if (*cc
== ':' && cc
[1] == ':')
4053 if (uqname
> token_name
.buffer
)
4055 int uqlen
= strlen (uqname
);
4056 linebuffer_setlen (&token_name
, uqlen
);
4057 memmove (token_name
.buffer
, uqname
, uqlen
+ 1);
4060 make_C_tag (true); /* a function */
4069 make_C_tag (true); /* an Objective C class */
4074 make_C_tag (true); /* an Objective C method */
4078 /* Neutralize `extern "C" {' grot. */
4079 if (bracelev
== 0 && structdef
== snone
&& nestlev
== 0
4089 case skeyseen
: /* unnamed struct */
4090 pushclass_above (bracelev
, NULL
, 0);
4093 case stagseen
: /* named struct or enum */
4094 case scolonseen
: /* a class */
4095 pushclass_above (bracelev
,token
.line
+token
.offset
, token
.length
);
4097 make_C_tag (false); /* a struct or enum */
4105 if (definedef
!= dnone
)
4107 if (fvdef
== fstartlist
)
4109 fvdef
= fvnone
; /* avoid tagging `foo' in `foo (*bar()) ()' */
4110 token
.valid
= false;
4114 if (definedef
!= dnone
)
4117 if (!ignoreindent
&& lp
== newlb
.buffer
+ 1)
4120 token
.valid
= false; /* unexpected value, token unreliable */
4121 bracelev
= 0; /* reset brace level if first column */
4122 parlev
= 0; /* also reset paren level, just in case... */
4124 else if (bracelev
< 0)
4126 token
.valid
= false; /* something gone amiss, token unreliable */
4129 if (bracelev
== 0 && fvdef
== vignore
)
4130 fvdef
= fvnone
; /* end of function */
4131 popclass_above (bracelev
);
4133 /* Only if typdef == tinbody is typdefbracelev significant. */
4134 if (typdef
== tinbody
&& bracelev
<= typdefbracelev
)
4136 assert (bracelev
== typdefbracelev
);
4141 if (definedef
!= dnone
)
4151 if ((members
&& bracelev
== 1)
4152 || (globals
&& bracelev
== 0 && (!fvextern
|| declarations
)))
4153 make_C_tag (false); /* a variable */
4161 && (structdef
== stagseen
|| fvdef
== fvnameseen
))
4168 if (templatelev
> 0)
4176 if (objdef
== oinbody
&& bracelev
== 0)
4178 objdef
= omethodsign
;
4183 case '#': case '~': case '&': case '%': case '/':
4184 case '|': case '^': case '!': case '.': case '?':
4185 if (definedef
!= dnone
)
4187 /* These surely cannot follow a function tag in C. */
4200 if (objdef
== otagseen
)
4202 make_C_tag (true); /* an Objective C class */
4205 /* If a macro spans multiple lines don't reset its state. */
4207 CNL_SAVE_DEFINEDEF ();
4213 } /* while not eof */
4215 free (lbs
[0].lb
.buffer
);
4216 free (lbs
[1].lb
.buffer
);
4220 * Process either a C++ file or a C file depending on the setting
4224 default_C_entries (FILE *inf
)
4226 C_entries (cplusplus
? C_PLPL
: C_AUTO
, inf
);
4229 /* Always do plain C. */
4231 plain_C_entries (FILE *inf
)
4236 /* Always do C++. */
4238 Cplusplus_entries (FILE *inf
)
4240 C_entries (C_PLPL
, inf
);
4243 /* Always do Java. */
4245 Cjava_entries (FILE *inf
)
4247 C_entries (C_JAVA
, inf
);
4252 Cstar_entries (FILE *inf
)
4254 C_entries (C_STAR
, inf
);
4257 /* Always do Yacc. */
4259 Yacc_entries (FILE *inf
)
4261 C_entries (YACC
, inf
);
4265 /* Useful macros. */
4266 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4267 while (perhaps_more_input (file_pointer) \
4268 && (readline (&(line_buffer), file_pointer), \
4269 (char_pointer) = (line_buffer).buffer, \
4272 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4273 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4274 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4275 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
4276 && ((cp) = skip_spaces ((cp) + sizeof (kw) - 1), true)) /* skip spaces */
4278 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4279 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4280 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4281 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4282 && ((cp) += sizeof (kw) - 1, true)) /* skip spaces */
4285 * Read a file, but do no processing. This is used to do regexp
4286 * matching on files that have no language defined.
4289 just_read_file (FILE *inf
)
4291 while (perhaps_more_input (inf
))
4292 readline (&lb
, inf
);
4296 /* Fortran parsing */
4298 static void F_takeprec (void);
4299 static void F_getit (FILE *);
4304 dbp
= skip_spaces (dbp
);
4308 dbp
= skip_spaces (dbp
);
4309 if (strneq (dbp
, "(*)", 3))
4314 if (!c_isdigit (*dbp
))
4316 --dbp
; /* force failure */
4321 while (c_isdigit (*dbp
));
4329 dbp
= skip_spaces (dbp
);
4332 readline (&lb
, inf
);
4337 dbp
= skip_spaces (dbp
);
4339 if (!c_isalpha (*dbp
) && *dbp
!= '_' && *dbp
!= '$')
4341 for (cp
= dbp
+ 1; *cp
!= '\0' && intoken (*cp
); cp
++)
4343 make_tag (dbp
, cp
-dbp
, true,
4344 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4349 Fortran_functions (FILE *inf
)
4351 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
4354 dbp
++; /* Ratfor escape to fortran */
4355 dbp
= skip_spaces (dbp
);
4359 if (LOOKING_AT_NOCASE (dbp
, "recursive"))
4360 dbp
= skip_spaces (dbp
);
4362 if (LOOKING_AT_NOCASE (dbp
, "pure"))
4363 dbp
= skip_spaces (dbp
);
4365 if (LOOKING_AT_NOCASE (dbp
, "elemental"))
4366 dbp
= skip_spaces (dbp
);
4368 switch (c_tolower (*dbp
))
4371 if (nocase_tail ("integer"))
4375 if (nocase_tail ("real"))
4379 if (nocase_tail ("logical"))
4383 if (nocase_tail ("complex") || nocase_tail ("character"))
4387 if (nocase_tail ("double"))
4389 dbp
= skip_spaces (dbp
);
4392 if (nocase_tail ("precision"))
4398 dbp
= skip_spaces (dbp
);
4401 switch (c_tolower (*dbp
))
4404 if (nocase_tail ("function"))
4408 if (nocase_tail ("subroutine"))
4412 if (nocase_tail ("entry"))
4416 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4418 dbp
= skip_spaces (dbp
);
4419 if (*dbp
== '\0') /* assume un-named */
4420 make_tag ("blockdata", 9, true,
4421 lb
.buffer
, dbp
- lb
.buffer
, lineno
, linecharno
);
4423 F_getit (inf
); /* look for name */
4432 * Go language support
4433 * Original code by Xi Lu <lx@shellcodes.org> (2016)
4436 Go_functions(FILE *inf
)
4440 LOOP_ON_INPUT_LINES(inf
, lb
, cp
)
4442 cp
= skip_spaces (cp
);
4444 if (LOOKING_AT (cp
, "package"))
4447 while (!notinname (*cp
) && *cp
!= '\0')
4449 make_tag (name
, cp
- name
, false, lb
.buffer
,
4450 cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4452 else if (LOOKING_AT (cp
, "func"))
4454 /* Go implementation of interface, such as:
4455 func (n *Integer) Add(m Integer) ...
4456 skip `(n *Integer)` part.
4462 cp
= skip_spaces (cp
+1);
4469 while (!notinname (*cp
))
4472 make_tag (name
, cp
- name
, true, lb
.buffer
,
4473 cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4476 else if (members
&& LOOKING_AT (cp
, "type"))
4480 /* Ignore the likes of the following:
4488 while (!notinname (*cp
) && *cp
!= '\0')
4491 make_tag (name
, cp
- name
, false, lb
.buffer
,
4492 cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4501 * Philippe Waroquiers (1998)
4504 /* Once we are positioned after an "interesting" keyword, let's get
4505 the real tag value necessary. */
4507 Ada_getit (FILE *inf
, const char *name_qualifier
)
4513 while (perhaps_more_input (inf
))
4515 dbp
= skip_spaces (dbp
);
4517 || (dbp
[0] == '-' && dbp
[1] == '-'))
4519 readline (&lb
, inf
);
4522 switch (c_tolower (*dbp
))
4525 if (nocase_tail ("body"))
4527 /* Skipping body of procedure body or package body or ....
4528 resetting qualifier to body instead of spec. */
4529 name_qualifier
= "/b";
4534 /* Skipping type of task type or protected type ... */
4535 if (nocase_tail ("type"))
4542 for (cp
= dbp
; *cp
!= '\0' && *cp
!= '"'; cp
++)
4547 dbp
= skip_spaces (dbp
);
4549 c_isalnum (*cp
) || *cp
== '_' || *cp
== '.';
4557 name
= concat (dbp
, name_qualifier
, "");
4559 make_tag (name
, strlen (name
), true,
4560 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4569 Ada_funcs (FILE *inf
)
4571 bool inquote
= false;
4572 bool skip_till_semicolumn
= false;
4574 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
4576 while (*dbp
!= '\0')
4578 /* Skip a string i.e. "abcd". */
4579 if (inquote
|| (*dbp
== '"'))
4581 dbp
= strchr (dbp
+ !inquote
, '"');
4586 continue; /* advance char */
4591 break; /* advance line */
4595 /* Skip comments. */
4596 if (dbp
[0] == '-' && dbp
[1] == '-')
4597 break; /* advance line */
4599 /* Skip character enclosed in single quote i.e. 'a'
4600 and skip single quote starting an attribute i.e. 'Image. */
4609 if (skip_till_semicolumn
)
4612 skip_till_semicolumn
= false;
4614 continue; /* advance char */
4617 /* Search for beginning of a token. */
4618 if (!begtoken (*dbp
))
4621 continue; /* advance char */
4624 /* We are at the beginning of a token. */
4625 switch (c_tolower (*dbp
))
4628 if (!packages_only
&& nocase_tail ("function"))
4629 Ada_getit (inf
, "/f");
4631 break; /* from switch */
4632 continue; /* advance char */
4634 if (!packages_only
&& nocase_tail ("procedure"))
4635 Ada_getit (inf
, "/p");
4636 else if (nocase_tail ("package"))
4637 Ada_getit (inf
, "/s");
4638 else if (nocase_tail ("protected")) /* protected type */
4639 Ada_getit (inf
, "/t");
4641 break; /* from switch */
4642 continue; /* advance char */
4645 if (typedefs
&& !packages_only
&& nocase_tail ("use"))
4647 /* when tagging types, avoid tagging use type Pack.Typename;
4648 for this, we will skip everything till a ; */
4649 skip_till_semicolumn
= true;
4650 continue; /* advance char */
4654 if (!packages_only
&& nocase_tail ("task"))
4655 Ada_getit (inf
, "/k");
4656 else if (typedefs
&& !packages_only
&& nocase_tail ("type"))
4658 Ada_getit (inf
, "/t");
4659 while (*dbp
!= '\0')
4663 break; /* from switch */
4664 continue; /* advance char */
4667 /* Look for the end of the token. */
4668 while (!endtoken (*dbp
))
4671 } /* advance char */
4672 } /* advance line */
4677 * Unix and microcontroller assembly tag handling
4678 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4679 * Idea by Bob Weiner, Motorola Inc. (1994)
4682 Asm_labels (FILE *inf
)
4686 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4688 /* If first char is alphabetic or one of [_.$], test for colon
4689 following identifier. */
4690 if (c_isalpha (*cp
) || *cp
== '_' || *cp
== '.' || *cp
== '$')
4692 /* Read past label. */
4694 while (c_isalnum (*cp
) || *cp
== '_' || *cp
== '.' || *cp
== '$')
4696 if (*cp
== ':' || c_isspace (*cp
))
4697 /* Found end of label, so copy it and add it to the table. */
4698 make_tag (lb
.buffer
, cp
- lb
.buffer
, true,
4699 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4707 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4708 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4709 * Perl variable names: /^(my|local).../
4710 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4711 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4712 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4715 Perl_functions (FILE *inf
)
4717 char *package
= savestr ("main"); /* current package name */
4720 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4722 cp
= skip_spaces (cp
);
4724 if (LOOKING_AT (cp
, "package"))
4727 get_tag (cp
, &package
);
4729 else if (LOOKING_AT (cp
, "sub"))
4735 while (!notinname (*cp
))
4738 continue; /* nothing found */
4739 pos
= strchr (sp
, ':');
4740 if (pos
&& pos
< cp
&& pos
[1] == ':')
4742 /* The name is already qualified. */
4745 char *q
= pos
+ 2, *qpos
;
4746 while ((qpos
= strchr (q
, ':')) != NULL
4752 make_tag (sp
, cp
- sp
, true,
4753 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4755 else if (class_qualify
)
4758 char savechar
, *name
;
4762 name
= concat (package
, "::", sp
);
4764 make_tag (name
, strlen (name
), true,
4765 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4769 make_tag (sp
, cp
- sp
, true,
4770 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4772 else if (LOOKING_AT (cp
, "use constant")
4773 || LOOKING_AT (cp
, "use constant::defer"))
4775 /* For hash style multi-constant like
4776 use constant { FOO => 123,
4778 only the first FOO is picked up. Parsing across the value
4779 expressions would be difficult in general, due to possible nested
4780 hashes, here-documents, etc. */
4782 cp
= skip_spaces (cp
+1);
4785 else if (globals
) /* only if we are tagging global vars */
4787 /* Skip a qualifier, if any. */
4788 bool qual
= LOOKING_AT (cp
, "my") || LOOKING_AT (cp
, "local");
4789 /* After "my" or "local", but before any following paren or space. */
4790 char *varstart
= cp
;
4792 if (qual
/* should this be removed? If yes, how? */
4793 && (*cp
== '$' || *cp
== '@' || *cp
== '%'))
4798 while (c_isalnum (*cp
) || *cp
== '_');
4802 /* Should be examining a variable list at this point;
4803 could insist on seeing an open parenthesis. */
4804 while (*cp
!= '\0' && *cp
!= ';' && *cp
!= '=' && *cp
!= ')')
4810 make_tag (varstart
, cp
- varstart
, false,
4811 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4820 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4821 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4822 * More ideas by seb bacon <seb@jamkit.com> (2002)
4825 Python_functions (FILE *inf
)
4829 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4831 cp
= skip_spaces (cp
);
4832 if (LOOKING_AT (cp
, "def") || LOOKING_AT (cp
, "class"))
4835 while (!notinname (*cp
) && *cp
!= ':')
4837 make_tag (name
, cp
- name
, true,
4838 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4845 * Original code by Xi Lu <lx@shellcodes.org> (2015)
4848 Ruby_functions (FILE *inf
)
4851 bool reader
= false, writer
= false, alias
= false, continuation
= false;
4853 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4855 bool is_class
= false;
4856 bool is_method
= false;
4859 cp
= skip_spaces (cp
);
4862 && c_isalpha (*cp
) && c_isupper (*cp
))
4864 char *bp
, *colon
= NULL
;
4868 for (cp
++; c_isalnum (*cp
) || *cp
== '_' || *cp
== ':'; cp
++)
4875 bp
= skip_spaces (cp
);
4876 if (*bp
== '=' && !(bp
[1] == '=' || bp
[1] == '>'))
4878 if (colon
&& !c_isspace (colon
[1]))
4880 make_tag (name
, cp
- name
, false,
4881 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4885 else if (!continuation
4886 /* Modules, classes, methods. */
4887 && ((is_method
= LOOKING_AT (cp
, "def"))
4888 || (is_class
= LOOKING_AT (cp
, "class"))
4889 || LOOKING_AT (cp
, "module")))
4891 const char self_name
[] = "self.";
4892 const size_t self_size1
= sizeof (self_name
) - 1;
4896 /* Ruby method names can end in a '='. Also, operator overloading can
4897 define operators whose names include '='. */
4898 while (!notinname (*cp
) || *cp
== '=')
4901 /* Remove "self." from the method name. */
4902 if (cp
- name
> self_size1
4903 && strneq (name
, self_name
, self_size1
))
4906 /* Remove the class/module qualifiers from method names. */
4911 for (q
= name
; q
< cp
&& *q
!= '.'; q
++)
4913 if (q
< cp
- 1) /* punt if we see just "FOO." */
4917 /* Don't tag singleton classes. */
4918 if (is_class
&& strneq (name
, "<<", 2) && cp
== name
+ 2)
4921 make_tag (name
, cp
- name
, true,
4922 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4926 /* Tag accessors and aliases. */
4929 reader
= writer
= alias
= false;
4931 while (*cp
&& *cp
!= '#')
4935 reader
= writer
= alias
= false;
4936 if (LOOKING_AT (cp
, "attr_reader"))
4938 else if (LOOKING_AT (cp
, "attr_writer"))
4940 else if (LOOKING_AT (cp
, "attr_accessor"))
4945 else if (LOOKING_AT (cp
, "alias_method"))
4948 if (reader
|| writer
|| alias
)
4953 cp
= skip_spaces (cp
);
4955 cp
= skip_spaces (cp
+ 1);
4957 cp
= skip_name (cp
);
4963 make_tag (np
, cp
- np
, true,
4964 lb
.buffer
, cp
- lb
.buffer
+ 1,
4965 lineno
, linecharno
);
4966 continuation
= false;
4970 size_t name_len
= cp
- np
+ 1;
4971 char *wr_name
= xnew (name_len
+ 1, char);
4973 memcpy (wr_name
, np
, name_len
- 1);
4974 memcpy (wr_name
+ name_len
- 1, "=", 2);
4975 pfnote (wr_name
, true, lb
.buffer
, cp
- lb
.buffer
+ 1,
4976 lineno
, linecharno
);
4978 fprintf (stderr
, "%s on %s:%d: %s\n", wr_name
,
4979 curfdp
->taggedfname
, lineno
, lb
.buffer
);
4980 continuation
= false;
4985 make_tag (np
, cp
- np
, true,
4986 lb
.buffer
, cp
- lb
.buffer
+ 1,
4987 lineno
, linecharno
);
4988 continuation
= false;
4989 while (*cp
&& *cp
!= '#' && *cp
!= ';')
4992 continuation
= true;
4993 else if (!c_isspace (*cp
))
4994 continuation
= false;
4998 continuation
= false;
5000 cp
= skip_spaces (cp
);
5003 : (continuation
= (*cp
== ',')))
5004 && (cp
= skip_spaces (cp
+ 1), *cp
&& *cp
!= '#'));
5007 cp
= skip_name (cp
);
5008 while (*cp
&& *cp
!= '#' && notinname (*cp
))
5019 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
5020 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
5021 * - /^[ \t]*define\(\"[^\"]+/
5022 * Only with --members:
5023 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
5024 * Idea by Diez B. Roggisch (2001)
5027 PHP_functions (FILE *inf
)
5030 bool search_identifier
= false;
5032 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5034 cp
= skip_spaces (cp
);
5036 if (search_identifier
5039 while (!notinname (*cp
))
5041 make_tag (name
, cp
- name
, true,
5042 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5043 search_identifier
= false;
5045 else if (LOOKING_AT (cp
, "function"))
5048 cp
= skip_spaces (cp
+1);
5052 while (!notinname (*cp
))
5054 make_tag (name
, cp
- name
, true,
5055 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5058 search_identifier
= true;
5060 else if (LOOKING_AT (cp
, "class"))
5065 while (*cp
!= '\0' && !c_isspace (*cp
))
5067 make_tag (name
, cp
- name
, false,
5068 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5071 search_identifier
= true;
5073 else if (strneq (cp
, "define", 6)
5074 && (cp
= skip_spaces (cp
+6))
5076 && (*cp
== '"' || *cp
== '\''))
5080 while (*cp
!= quote
&& *cp
!= '\0')
5082 make_tag (name
, cp
- name
, false,
5083 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5086 && LOOKING_AT (cp
, "var")
5090 while (!notinname (*cp
))
5092 make_tag (name
, cp
- name
, false,
5093 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5100 * Cobol tag functions
5101 * We could look for anything that could be a paragraph name.
5102 * i.e. anything that starts in column 8 is one word and ends in a full stop.
5103 * Idea by Corny de Souza (1993)
5106 Cobol_paragraphs (FILE *inf
)
5108 register char *bp
, *ep
;
5110 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
5116 /* If eoln, compiler option or comment ignore whole line. */
5117 if (bp
[-1] != ' ' || !c_isalnum (bp
[0]))
5120 for (ep
= bp
; c_isalnum (*ep
) || *ep
== '-'; ep
++)
5123 make_tag (bp
, ep
- bp
, true,
5124 lb
.buffer
, ep
- lb
.buffer
+ 1, lineno
, linecharno
);
5131 * Ideas by Assar Westerlund <assar@sics.se> (2001)
5134 Makefile_targets (FILE *inf
)
5138 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
5140 if (*bp
== '\t' || *bp
== '#')
5142 while (*bp
!= '\0' && *bp
!= '=' && *bp
!= ':')
5144 if (*bp
== ':' || (globals
&& *bp
== '='))
5146 /* We should detect if there is more than one tag, but we do not.
5147 We just skip initial and final spaces. */
5148 char * namestart
= skip_spaces (lb
.buffer
);
5149 while (--bp
> namestart
)
5150 if (!notinname (*bp
))
5152 make_tag (namestart
, bp
- namestart
+ 1, true,
5153 lb
.buffer
, bp
- lb
.buffer
+ 2, lineno
, linecharno
);
5161 * Original code by Mosur K. Mohan (1989)
5163 * Locates tags for procedures & functions. Doesn't do any type- or
5164 * var-definitions. It does look for the keyword "extern" or
5165 * "forward" immediately following the procedure statement; if found,
5166 * the tag is skipped.
5169 Pascal_functions (FILE *inf
)
5171 linebuffer tline
; /* mostly copied from C_entries */
5173 int save_lineno
, namelen
, taglen
;
5176 bool /* each of these flags is true if: */
5177 incomment
, /* point is inside a comment */
5178 inquote
, /* point is inside '..' string */
5179 get_tagname
, /* point is after PROCEDURE/FUNCTION
5180 keyword, so next item = potential tag */
5181 found_tag
, /* point is after a potential tag */
5182 inparms
, /* point is within parameter-list */
5183 verify_tag
; /* point has passed the parm-list, so the
5184 next token will determine whether this
5185 is a FORWARD/EXTERN to be ignored, or
5186 whether it is a real tag */
5188 save_lcno
= save_lineno
= namelen
= taglen
= 0; /* keep compiler quiet */
5189 name
= NULL
; /* keep compiler quiet */
5192 linebuffer_init (&tline
);
5194 incomment
= inquote
= false;
5195 found_tag
= false; /* have a proc name; check if extern */
5196 get_tagname
= false; /* found "procedure" keyword */
5197 inparms
= false; /* found '(' after "proc" */
5198 verify_tag
= false; /* check if "extern" is ahead */
5201 while (perhaps_more_input (inf
)) /* long main loop to get next char */
5204 if (c
== '\0') /* if end of line */
5206 readline (&lb
, inf
);
5210 if (!((found_tag
&& verify_tag
)
5212 c
= *dbp
++; /* only if don't need *dbp pointing
5213 to the beginning of the name of
5214 the procedure or function */
5218 if (c
== '}') /* within { } comments */
5220 else if (c
== '*' && *dbp
== ')') /* within (* *) comments */
5237 inquote
= true; /* found first quote */
5239 case '{': /* found open { comment */
5243 if (*dbp
== '*') /* found open (* comment */
5248 else if (found_tag
) /* found '(' after tag, i.e., parm-list */
5251 case ')': /* end of parms list */
5256 if (found_tag
&& !inparms
) /* end of proc or fn stmt */
5263 if (found_tag
&& verify_tag
&& (*dbp
!= ' '))
5265 /* Check if this is an "extern" declaration. */
5268 if (c_tolower (*dbp
) == 'e')
5270 if (nocase_tail ("extern")) /* superfluous, really! */
5276 else if (c_tolower (*dbp
) == 'f')
5278 if (nocase_tail ("forward")) /* check for forward reference */
5284 if (found_tag
&& verify_tag
) /* not external proc, so make tag */
5288 make_tag (name
, namelen
, true,
5289 tline
.buffer
, taglen
, save_lineno
, save_lcno
);
5293 if (get_tagname
) /* grab name of proc or fn */
5300 /* Find block name. */
5301 for (cp
= dbp
+ 1; *cp
!= '\0' && !endtoken (*cp
); cp
++)
5304 /* Save all values for later tagging. */
5305 linebuffer_setlen (&tline
, lb
.len
);
5306 strcpy (tline
.buffer
, lb
.buffer
);
5307 save_lineno
= lineno
;
5308 save_lcno
= linecharno
;
5309 name
= tline
.buffer
+ (dbp
- lb
.buffer
);
5311 taglen
= cp
- lb
.buffer
+ 1;
5313 dbp
= cp
; /* set dbp to e-o-token */
5314 get_tagname
= false;
5318 /* And proceed to check for "extern". */
5320 else if (!incomment
&& !inquote
&& !found_tag
)
5322 /* Check for proc/fn keywords. */
5323 switch (c_tolower (c
))
5326 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5330 if (nocase_tail ("unction"))
5335 } /* while not eof */
5337 free (tline
.buffer
);
5342 * Lisp tag functions
5343 * look for (def or (DEF, quote or QUOTE
5346 static void L_getit (void);
5351 if (*dbp
== '\'') /* Skip prefix quote */
5353 else if (*dbp
== '(')
5356 /* Try to skip "(quote " */
5357 if (!LOOKING_AT (dbp
, "quote") && !LOOKING_AT (dbp
, "QUOTE"))
5358 /* Ok, then skip "(" before name in (defstruct (foo)) */
5359 dbp
= skip_spaces (dbp
);
5361 get_lispy_tag (dbp
);
5365 Lisp_functions (FILE *inf
)
5367 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
5372 /* "(defvar foo)" is a declaration rather than a definition. */
5376 if (LOOKING_AT (p
, "defvar"))
5378 p
= skip_name (p
); /* past var name */
5379 p
= skip_spaces (p
);
5385 if (strneq (dbp
+ 1, "cl-", 3) || strneq (dbp
+ 1, "CL-", 3))
5388 if (strneq (dbp
+1, "def", 3) || strneq (dbp
+1, "DEF", 3))
5390 dbp
= skip_non_spaces (dbp
);
5391 dbp
= skip_spaces (dbp
);
5396 /* Check for (foo::defmumble name-defined ... */
5399 while (!notinname (*dbp
) && *dbp
!= ':');
5404 while (*dbp
== ':');
5406 if (strneq (dbp
, "def", 3) || strneq (dbp
, "DEF", 3))
5408 dbp
= skip_non_spaces (dbp
);
5409 dbp
= skip_spaces (dbp
);
5419 * Lua script language parsing
5420 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5422 * "function" and "local function" are tags if they start at column 1.
5425 Lua_functions (FILE *inf
)
5429 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
5431 bp
= skip_spaces (bp
);
5432 if (bp
[0] != 'f' && bp
[0] != 'l')
5435 (void)LOOKING_AT (bp
, "local"); /* skip possible "local" */
5437 if (LOOKING_AT (bp
, "function"))
5439 char *tag_name
, *tp_dot
, *tp_colon
;
5441 get_tag (bp
, &tag_name
);
5442 /* If the tag ends with ".foo" or ":foo", make an additional tag for
5444 tp_dot
= strrchr (tag_name
, '.');
5445 tp_colon
= strrchr (tag_name
, ':');
5446 if (tp_dot
|| tp_colon
)
5448 char *p
= tp_dot
> tp_colon
? tp_dot
: tp_colon
;
5449 int len_add
= p
- tag_name
+ 1;
5451 get_tag (bp
+ len_add
, NULL
);
5460 * Just look for lines where the first character is '/'
5461 * Also look at "defineps" for PSWrap
5463 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5464 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5467 PS_functions (FILE *inf
)
5469 register char *bp
, *ep
;
5471 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
5476 *ep
!= '\0' && *ep
!= ' ' && *ep
!= '{';
5479 make_tag (bp
, ep
- bp
, true,
5480 lb
.buffer
, ep
- lb
.buffer
+ 1, lineno
, linecharno
);
5482 else if (LOOKING_AT (bp
, "defineps"))
5490 * Ignore anything after \ followed by space or in ( )
5491 * Look for words defined by :
5492 * Look for constant, code, create, defer, value, and variable
5493 * OBP extensions: Look for buffer:, field,
5494 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5497 Forth_words (FILE *inf
)
5501 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
5502 while ((bp
= skip_spaces (bp
))[0] != '\0')
5503 if (bp
[0] == '\\' && c_isspace (bp
[1]))
5504 break; /* read next line */
5505 else if (bp
[0] == '(' && c_isspace (bp
[1]))
5506 do /* skip to ) or eol */
5508 while (*bp
!= ')' && *bp
!= '\0');
5509 else if (((bp
[0] == ':' && c_isspace (bp
[1]) && bp
++)
5510 || LOOKING_AT_NOCASE (bp
, "constant")
5511 || LOOKING_AT_NOCASE (bp
, "2constant")
5512 || LOOKING_AT_NOCASE (bp
, "fconstant")
5513 || LOOKING_AT_NOCASE (bp
, "code")
5514 || LOOKING_AT_NOCASE (bp
, "create")
5515 || LOOKING_AT_NOCASE (bp
, "defer")
5516 || LOOKING_AT_NOCASE (bp
, "value")
5517 || LOOKING_AT_NOCASE (bp
, "2value")
5518 || LOOKING_AT_NOCASE (bp
, "fvalue")
5519 || LOOKING_AT_NOCASE (bp
, "variable")
5520 || LOOKING_AT_NOCASE (bp
, "2variable")
5521 || LOOKING_AT_NOCASE (bp
, "fvariable")
5522 || LOOKING_AT_NOCASE (bp
, "buffer:")
5523 || LOOKING_AT_NOCASE (bp
, "field:")
5524 || LOOKING_AT_NOCASE (bp
, "+field")
5525 || LOOKING_AT_NOCASE (bp
, "field") /* not standard? */
5526 || LOOKING_AT_NOCASE (bp
, "begin-structure")
5527 || LOOKING_AT_NOCASE (bp
, "synonym")
5529 && c_isspace (bp
[0]))
5531 /* Yay! A definition! */
5532 char* name_start
= skip_spaces (bp
);
5533 char* name_end
= skip_non_spaces (name_start
);
5534 if (name_start
< name_end
)
5535 make_tag (name_start
, name_end
- name_start
,
5536 true, lb
.buffer
, name_end
- lb
.buffer
,
5537 lineno
, linecharno
);
5541 bp
= skip_non_spaces (bp
);
5546 * Scheme tag functions
5547 * look for (def... xyzzy
5549 * (def ... ((...(xyzzy ....
5551 * Original code by Ken Haase (1985?)
5554 Scheme_functions (FILE *inf
)
5558 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
5560 if (strneq (bp
, "(def", 4) || strneq (bp
, "(DEF", 4))
5562 bp
= skip_non_spaces (bp
+4);
5563 /* Skip over open parens and white space.
5564 Don't continue past '\0' or '='. */
5565 while (*bp
&& notinname (*bp
) && *bp
!= '=')
5569 if (LOOKING_AT (bp
, "(SET!") || LOOKING_AT (bp
, "(set!"))
5575 /* Find tags in TeX and LaTeX input files. */
5577 /* TEX_toktab is a table of TeX control sequences that define tags.
5578 * Each entry records one such control sequence.
5580 * Original code from who knows whom.
5582 * Stefan Monnier (2002)
5585 static linebuffer
*TEX_toktab
= NULL
; /* Table with tag tokens */
5587 /* Default set of control sequences to put into TEX_toktab.
5588 The value of environment var TEXTAGS is prepended to this. */
5589 static const char *TEX_defenv
= "\
5590 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5591 :part:appendix:entry:index:def\
5592 :newcommand:renewcommand:newenvironment:renewenvironment";
5594 static void TEX_decode_env (const char *, const char *);
5597 * TeX/LaTeX scanning loop.
5600 TeX_commands (FILE *inf
)
5605 char TEX_esc
= '\0';
5606 char TEX_opgrp UNINIT
, TEX_clgrp UNINIT
;
5608 /* Initialize token table once from environment. */
5609 if (TEX_toktab
== NULL
)
5610 TEX_decode_env ("TEXTAGS", TEX_defenv
);
5612 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5614 /* Look at each TEX keyword in line. */
5617 /* Look for a TEX escape. */
5621 if (c
== '\0' || c
== '%')
5624 /* Select either \ or ! as escape character, whichever comes
5625 first outside a comment. */
5646 for (key
= TEX_toktab
; key
->buffer
!= NULL
; key
++)
5647 if (strneq (cp
, key
->buffer
, key
->len
))
5650 int namelen
, linelen
;
5653 cp
= skip_spaces (cp
+ key
->len
);
5654 if (*cp
== TEX_opgrp
)
5660 (!c_isspace (*p
) && *p
!= '#' &&
5661 *p
!= TEX_opgrp
&& *p
!= TEX_clgrp
);
5666 if (!opgrp
|| *p
== TEX_clgrp
)
5668 while (*p
!= '\0' && *p
!= TEX_opgrp
&& *p
!= TEX_clgrp
)
5670 linelen
= p
- lb
.buffer
+ 1;
5672 make_tag (cp
, namelen
, true,
5673 lb
.buffer
, linelen
, lineno
, linecharno
);
5674 goto tex_next_line
; /* We only tag a line once */
5682 /* Read environment and prepend it to the default string.
5683 Build token table. */
5685 TEX_decode_env (const char *evarname
, const char *defenv
)
5687 register const char *env
, *p
;
5690 /* Append default string to environment. */
5691 env
= getenv (evarname
);
5695 env
= concat (env
, defenv
, "");
5697 /* Allocate a token table */
5698 for (len
= 1, p
= env
; (p
= strchr (p
, ':')); )
5701 TEX_toktab
= xnew (len
, linebuffer
);
5703 /* Unpack environment string into token table. Be careful about */
5704 /* zero-length strings (leading ':', "::" and trailing ':') */
5705 for (i
= 0; *env
!= '\0';)
5707 p
= strchr (env
, ':');
5708 if (!p
) /* End of environment string. */
5709 p
= env
+ strlen (env
);
5711 { /* Only non-zero strings. */
5712 TEX_toktab
[i
].buffer
= savenstr (env
, p
- env
);
5713 TEX_toktab
[i
].len
= p
- env
;
5720 TEX_toktab
[i
].buffer
= NULL
; /* Mark end of table. */
5721 TEX_toktab
[i
].len
= 0;
5728 /* Texinfo support. Dave Love, Mar. 2000. */
5730 Texinfo_nodes (FILE *inf
)
5733 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5734 if (LOOKING_AT (cp
, "@node"))
5737 while (*cp
!= '\0' && *cp
!= ',')
5739 make_tag (start
, cp
- start
, true,
5740 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5747 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5748 * Contents of <a name=xxx> are tags with name xxx.
5750 * Francesco Potortì, 2002.
5753 HTML_labels (FILE *inf
)
5755 bool getnext
= false; /* next text outside of HTML tags is a tag */
5756 bool skiptag
= false; /* skip to the end of the current HTML tag */
5757 bool intag
= false; /* inside an html tag, looking for ID= */
5758 bool inanchor
= false; /* when INTAG, is an anchor, look for NAME= */
5762 linebuffer_setlen (&token_name
, 0); /* no name in buffer */
5764 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
5765 for (;;) /* loop on the same line */
5767 if (skiptag
) /* skip HTML tag */
5769 while (*dbp
!= '\0' && *dbp
!= '>')
5775 continue; /* look on the same line */
5777 break; /* go to next line */
5780 else if (intag
) /* look for "name=" or "id=" */
5782 while (*dbp
!= '\0' && *dbp
!= '>'
5783 && c_tolower (*dbp
) != 'n' && c_tolower (*dbp
) != 'i')
5786 break; /* go to next line */
5791 continue; /* look on the same line */
5793 if ((inanchor
&& LOOKING_AT_NOCASE (dbp
, "name="))
5794 || LOOKING_AT_NOCASE (dbp
, "id="))
5796 bool quoted
= (dbp
[0] == '"');
5799 for (end
= ++dbp
; *end
!= '\0' && *end
!= '"'; end
++)
5802 for (end
= dbp
; *end
!= '\0' && intoken (*end
); end
++)
5804 linebuffer_setlen (&token_name
, end
- dbp
);
5805 memcpy (token_name
.buffer
, dbp
, end
- dbp
);
5806 token_name
.buffer
[end
- dbp
] = '\0';
5809 intag
= false; /* we found what we looked for */
5810 skiptag
= true; /* skip to the end of the tag */
5811 getnext
= true; /* then grab the text */
5812 continue; /* look on the same line */
5817 else if (getnext
) /* grab next tokens and tag them */
5819 dbp
= skip_spaces (dbp
);
5821 break; /* go to next line */
5825 inanchor
= (c_tolower (dbp
[1]) == 'a' && !intoken (dbp
[2]));
5826 continue; /* look on the same line */
5829 for (end
= dbp
+ 1; *end
!= '\0' && *end
!= '<'; end
++)
5831 make_tag (token_name
.buffer
, token_name
.len
, true,
5832 dbp
, end
- dbp
, lineno
, linecharno
);
5833 linebuffer_setlen (&token_name
, 0); /* no name in buffer */
5835 break; /* go to next line */
5838 else /* look for an interesting HTML tag */
5840 while (*dbp
!= '\0' && *dbp
!= '<')
5843 break; /* go to next line */
5845 if (c_tolower (dbp
[1]) == 'a' && !intoken (dbp
[2]))
5848 continue; /* look on the same line */
5850 else if (LOOKING_AT_NOCASE (dbp
, "<title>")
5851 || LOOKING_AT_NOCASE (dbp
, "<h1>")
5852 || LOOKING_AT_NOCASE (dbp
, "<h2>")
5853 || LOOKING_AT_NOCASE (dbp
, "<h3>"))
5857 continue; /* look on the same line */
5868 * Assumes that the predicate or rule starts at column 0.
5869 * Only the first clause of a predicate or rule is added.
5870 * Original code by Sunichirou Sugou (1989)
5871 * Rewritten by Anders Lindgren (1996)
5873 static size_t prolog_pr (char *, char *);
5874 static void prolog_skip_comment (linebuffer
*, FILE *);
5875 static size_t prolog_atom (char *, size_t);
5878 Prolog_functions (FILE *inf
)
5888 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5890 if (cp
[0] == '\0') /* Empty line */
5892 else if (c_isspace (cp
[0])) /* Not a predicate */
5894 else if (cp
[0] == '/' && cp
[1] == '*') /* comment. */
5895 prolog_skip_comment (&lb
, inf
);
5896 else if ((len
= prolog_pr (cp
, last
)) > 0)
5898 /* Predicate or rule. Store the function name so that we
5899 only generate a tag for the first clause. */
5901 last
= xnew (len
+ 1, char);
5902 else if (len
+ 1 > allocated
)
5903 xrnew (last
, len
+ 1, char);
5904 allocated
= len
+ 1;
5905 memcpy (last
, cp
, len
);
5914 prolog_skip_comment (linebuffer
*plb
, FILE *inf
)
5920 for (cp
= plb
->buffer
; *cp
!= '\0'; cp
++)
5921 if (cp
[0] == '*' && cp
[1] == '/')
5923 readline (plb
, inf
);
5925 while (perhaps_more_input (inf
));
5929 * A predicate or rule definition is added if it matches:
5930 * <beginning of line><Prolog Atom><whitespace>(
5931 * or <beginning of line><Prolog Atom><whitespace>:-
5933 * It is added to the tags database if it doesn't match the
5934 * name of the previous clause header.
5936 * Return the size of the name of the predicate or rule, or 0 if no
5940 prolog_pr (char *s
, char *last
)
5942 /* Name of last clause. */
5947 pos
= prolog_atom (s
, 0);
5952 pos
= skip_spaces (s
+ pos
) - s
;
5955 || (s
[pos
] == '(' && (pos
+= 1))
5956 || (s
[pos
] == ':' && s
[pos
+ 1] == '-' && (pos
+= 2)))
5957 && (last
== NULL
/* save only the first clause */
5958 || len
!= strlen (last
)
5959 || !strneq (s
, last
, len
)))
5961 make_tag (s
, len
, true, s
, pos
, lineno
, linecharno
);
5969 * Consume a Prolog atom.
5970 * Return the number of bytes consumed, or 0 if there was an error.
5972 * A prolog atom, in this context, could be one of:
5973 * - An alphanumeric sequence, starting with a lower case letter.
5974 * - A quoted arbitrary string. Single quotes can escape themselves.
5975 * Backslash quotes everything.
5978 prolog_atom (char *s
, size_t pos
)
5984 if (c_islower (s
[pos
]) || s
[pos
] == '_')
5986 /* The atom is unquoted. */
5988 while (c_isalnum (s
[pos
]) || s
[pos
] == '_')
5992 return pos
- origpos
;
5994 else if (s
[pos
] == '\'')
6005 pos
++; /* A double quote */
6007 else if (s
[pos
] == '\0')
6008 /* Multiline quoted atoms are ignored. */
6010 else if (s
[pos
] == '\\')
6012 if (s
[pos
+1] == '\0')
6019 return pos
- origpos
;
6027 * Support for Erlang
6029 * Generates tags for functions, defines, and records.
6030 * Assumes that Erlang functions start at column 0.
6031 * Original code by Anders Lindgren (1996)
6033 static int erlang_func (char *, char *);
6034 static void erlang_attribute (char *);
6035 static int erlang_atom (char *);
6038 Erlang_functions (FILE *inf
)
6048 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
6050 if (cp
[0] == '\0') /* Empty line */
6052 else if (c_isspace (cp
[0])) /* Not function nor attribute */
6054 else if (cp
[0] == '%') /* comment */
6056 else if (cp
[0] == '"') /* Sometimes, strings start in column one */
6058 else if (cp
[0] == '-') /* attribute, e.g. "-define" */
6060 erlang_attribute (cp
);
6067 else if ((len
= erlang_func (cp
, last
)) > 0)
6070 * Function. Store the function name so that we only
6071 * generates a tag for the first clause.
6074 last
= xnew (len
+ 1, char);
6075 else if (len
+ 1 > allocated
)
6076 xrnew (last
, len
+ 1, char);
6077 allocated
= len
+ 1;
6078 memcpy (last
, cp
, len
);
6087 * A function definition is added if it matches:
6088 * <beginning of line><Erlang Atom><whitespace>(
6090 * It is added to the tags database if it doesn't match the
6091 * name of the previous clause header.
6093 * Return the size of the name of the function, or 0 if no function
6097 erlang_func (char *s
, char *last
)
6099 /* Name of last clause. */
6104 pos
= erlang_atom (s
);
6109 pos
= skip_spaces (s
+ pos
) - s
;
6111 /* Save only the first clause. */
6114 || len
!= (int)strlen (last
)
6115 || !strneq (s
, last
, len
)))
6117 make_tag (s
, len
, true, s
, pos
, lineno
, linecharno
);
6126 * Handle attributes. Currently, tags are generated for defines
6129 * They are on the form:
6130 * -define(foo, bar).
6131 * -define(Foo(M, N), M+N).
6132 * -record(graph, {vtab = notable, cyclic = true}).
6135 erlang_attribute (char *s
)
6139 if ((LOOKING_AT (cp
, "-define") || LOOKING_AT (cp
, "-record"))
6142 int len
= erlang_atom (skip_spaces (cp
));
6144 make_tag (cp
, len
, true, s
, cp
+ len
- s
, lineno
, linecharno
);
6151 * Consume an Erlang atom (or variable).
6152 * Return the number of bytes consumed, or -1 if there was an error.
6155 erlang_atom (char *s
)
6159 if (c_isalpha (s
[pos
]) || s
[pos
] == '_')
6161 /* The atom is unquoted. */
6164 while (c_isalnum (s
[pos
]) || s
[pos
] == '_');
6166 else if (s
[pos
] == '\'')
6168 for (pos
++; s
[pos
] != '\''; pos
++)
6169 if (s
[pos
] == '\0' /* multiline quoted atoms are ignored */
6170 || (s
[pos
] == '\\' && s
[++pos
] == '\0'))
6179 static char *scan_separators (char *);
6180 static void add_regex (char *, language
*);
6181 static char *substitute (char *, char *, struct re_registers
*);
6184 * Take a string like "/blah/" and turn it into "blah", verifying
6185 * that the first and last characters are the same, and handling
6186 * quoted separator characters. Actually, stops on the occurrence of
6187 * an unquoted separator. Also process \t, \n, etc. and turn into
6188 * appropriate characters. Works in place. Null terminates name string.
6189 * Returns pointer to terminating separator, or NULL for
6190 * unterminated regexps.
6193 scan_separators (char *name
)
6196 char *copyto
= name
;
6197 bool quoted
= false;
6199 for (++name
; *name
!= '\0'; ++name
)
6205 case 'a': *copyto
++ = '\007'; break; /* BEL (bell) */
6206 case 'b': *copyto
++ = '\b'; break; /* BS (back space) */
6207 case 'd': *copyto
++ = 0177; break; /* DEL (delete) */
6208 case 'e': *copyto
++ = 033; break; /* ESC (delete) */
6209 case 'f': *copyto
++ = '\f'; break; /* FF (form feed) */
6210 case 'n': *copyto
++ = '\n'; break; /* NL (new line) */
6211 case 'r': *copyto
++ = '\r'; break; /* CR (carriage return) */
6212 case 't': *copyto
++ = '\t'; break; /* TAB (horizontal tab) */
6213 case 'v': *copyto
++ = '\v'; break; /* VT (vertical tab) */
6219 /* Something else is quoted, so preserve the quote. */
6227 else if (*name
== '\\')
6229 else if (*name
== sep
)
6235 name
= NULL
; /* signal unterminated regexp */
6237 /* Terminate copied string. */
6242 /* Look at the argument of --regex or --no-regex and do the right
6243 thing. Same for each line of a regexp file. */
6245 analyze_regex (char *regex_arg
)
6247 if (regex_arg
== NULL
)
6249 free_regexps (); /* --no-regex: remove existing regexps */
6253 /* A real --regexp option or a line in a regexp file. */
6254 switch (regex_arg
[0])
6256 /* Comments in regexp file or null arg to --regex. */
6262 /* Read a regex file. This is recursive and may result in a
6263 loop, which will stop when the file descriptors are exhausted. */
6267 linebuffer regexbuf
;
6268 char *regexfile
= regex_arg
+ 1;
6270 /* regexfile is a file containing regexps, one per line. */
6271 regexfp
= fopen (regexfile
, "r" FOPEN_BINARY
);
6272 if (regexfp
== NULL
)
6274 linebuffer_init (®exbuf
);
6275 while (readline_internal (®exbuf
, regexfp
, regexfile
) > 0)
6276 analyze_regex (regexbuf
.buffer
);
6277 free (regexbuf
.buffer
);
6278 if (fclose (regexfp
) != 0)
6283 /* Regexp to be used for a specific language only. */
6287 char *lang_name
= regex_arg
+ 1;
6290 for (cp
= lang_name
; *cp
!= '}'; cp
++)
6293 error ("unterminated language name in regex: %s", regex_arg
);
6297 lang
= get_language_from_langname (lang_name
);
6300 add_regex (cp
, lang
);
6304 /* Regexp to be used for any language. */
6306 add_regex (regex_arg
, NULL
);
6311 /* Separate the regexp pattern, compile it,
6312 and care for optional name and modifiers. */
6314 add_regex (char *regexp_pattern
, language
*lang
)
6316 static struct re_pattern_buffer zeropattern
;
6317 char sep
, *pat
, *name
, *modifiers
;
6320 struct re_pattern_buffer
*patbuf
;
6323 force_explicit_name
= true, /* do not use implicit tag names */
6324 ignore_case
= false, /* case is significant */
6325 multi_line
= false, /* matches are done one line at a time */
6326 single_line
= false; /* dot does not match newline */
6329 if (strlen (regexp_pattern
) < 3)
6331 error ("null regexp");
6334 sep
= regexp_pattern
[0];
6335 name
= scan_separators (regexp_pattern
);
6338 error ("%s: unterminated regexp", regexp_pattern
);
6343 error ("null name for regexp \"%s\"", regexp_pattern
);
6346 modifiers
= scan_separators (name
);
6347 if (modifiers
== NULL
) /* no terminating separator --> no name */
6353 modifiers
+= 1; /* skip separator */
6355 /* Parse regex modifiers. */
6356 for (; modifiers
[0] != '\0'; modifiers
++)
6357 switch (modifiers
[0])
6360 if (modifiers
== name
)
6361 error ("forcing explicit tag name but no name, ignoring");
6362 force_explicit_name
= true;
6372 need_filebuf
= true;
6375 error ("invalid regexp modifier '%c', ignoring", modifiers
[0]);
6379 patbuf
= xnew (1, struct re_pattern_buffer
);
6380 *patbuf
= zeropattern
;
6383 static char lc_trans
[UCHAR_MAX
+ 1];
6385 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++)
6386 lc_trans
[i
] = c_tolower (i
);
6387 patbuf
->translate
= lc_trans
; /* translation table to fold case */
6391 pat
= concat ("^", regexp_pattern
, ""); /* anchor to beginning of line */
6393 pat
= regexp_pattern
;
6396 re_set_syntax (RE_SYNTAX_EMACS
| RE_DOT_NEWLINE
);
6398 re_set_syntax (RE_SYNTAX_EMACS
);
6400 err
= re_compile_pattern (pat
, strlen (pat
), patbuf
);
6405 error ("%s while compiling pattern", err
);
6410 p_head
= xnew (1, regexp
);
6411 p_head
->pattern
= savestr (regexp_pattern
);
6412 p_head
->p_next
= rp
;
6413 p_head
->lang
= lang
;
6414 p_head
->pat
= patbuf
;
6415 p_head
->name
= savestr (name
);
6416 p_head
->error_signaled
= false;
6417 p_head
->force_explicit_name
= force_explicit_name
;
6418 p_head
->ignore_case
= ignore_case
;
6419 p_head
->multi_line
= multi_line
;
6423 * Do the substitutions indicated by the regular expression and
6427 substitute (char *in
, char *out
, struct re_registers
*regs
)
6430 int size
, dig
, diglen
;
6433 size
= strlen (out
);
6435 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6436 if (out
[size
- 1] == '\\')
6437 fatal ("pattern error in \"%s\"", out
);
6438 for (t
= strchr (out
, '\\');
6440 t
= strchr (t
+ 2, '\\'))
6441 if (c_isdigit (t
[1]))
6444 diglen
= regs
->end
[dig
] - regs
->start
[dig
];
6450 /* Allocate space and do the substitutions. */
6452 result
= xnew (size
+ 1, char);
6454 for (t
= result
; *out
!= '\0'; out
++)
6455 if (*out
== '\\' && c_isdigit (*++out
))
6458 diglen
= regs
->end
[dig
] - regs
->start
[dig
];
6459 memcpy (t
, in
+ regs
->start
[dig
], diglen
);
6466 assert (t
<= result
+ size
);
6467 assert (t
- result
== (int)strlen (result
));
6472 /* Deallocate all regexps. */
6477 while (p_head
!= NULL
)
6479 rp
= p_head
->p_next
;
6480 free (p_head
->pattern
);
6481 free (p_head
->name
);
6489 * Reads the whole file as a single string from `filebuf' and looks for
6490 * multi-line regular expressions, creating tags on matches.
6491 * readline already dealt with normal regexps.
6493 * Idea by Ben Wing <ben@666.com> (2002).
6496 regex_tag_multiline (void)
6498 char *buffer
= filebuf
.buffer
;
6502 for (rp
= p_head
; rp
!= NULL
; rp
= rp
->p_next
)
6506 if (!rp
->multi_line
)
6507 continue; /* skip normal regexps */
6509 /* Generic initializations before parsing file from memory. */
6510 lineno
= 1; /* reset global line number */
6511 charno
= 0; /* reset global char number */
6512 linecharno
= 0; /* reset global char number of line start */
6514 /* Only use generic regexps or those for the current language. */
6515 if (rp
->lang
!= NULL
&& rp
->lang
!= curfdp
->lang
)
6518 while (match
>= 0 && match
< filebuf
.len
)
6520 match
= re_search (rp
->pat
, buffer
, filebuf
.len
, charno
,
6521 filebuf
.len
- match
, &rp
->regs
);
6526 if (!rp
->error_signaled
)
6528 error ("regexp stack overflow while matching \"%s\"",
6530 rp
->error_signaled
= true;
6537 if (match
== rp
->regs
.end
[0])
6539 if (!rp
->error_signaled
)
6541 error ("regexp matches the empty string: \"%s\"",
6543 rp
->error_signaled
= true;
6545 match
= -3; /* exit from while loop */
6549 /* Match occurred. Construct a tag. */
6550 while (charno
< rp
->regs
.end
[0])
6551 if (buffer
[charno
++] == '\n')
6552 lineno
++, linecharno
= charno
;
6554 if (name
[0] == '\0')
6556 else /* make a named tag */
6557 name
= substitute (buffer
, rp
->name
, &rp
->regs
);
6558 if (rp
->force_explicit_name
)
6560 /* Force explicit tag name, if a name is there. */
6561 pfnote (name
, true, buffer
+ linecharno
,
6562 charno
- linecharno
+ 1, lineno
, linecharno
);
6565 fprintf (stderr
, "%s on %s:%d: %s\n",
6566 name
? name
: "(unnamed)", curfdp
->taggedfname
,
6567 lineno
, buffer
+ linecharno
);
6570 make_tag (name
, strlen (name
), true, buffer
+ linecharno
,
6571 charno
- linecharno
+ 1, lineno
, linecharno
);
6580 nocase_tail (const char *cp
)
6584 while (*cp
!= '\0' && c_tolower (*cp
) == c_tolower (dbp
[len
]))
6586 if (*cp
== '\0' && !intoken (dbp
[len
]))
6595 get_tag (register char *bp
, char **namepp
)
6597 register char *cp
= bp
;
6601 /* Go till you get to white space or a syntactic break */
6602 for (cp
= bp
+ 1; !notinname (*cp
); cp
++)
6604 make_tag (bp
, cp
- bp
, true,
6605 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
6609 *namepp
= savenstr (bp
, cp
- bp
);
6612 /* Similar to get_tag, but include '=' as part of the tag. */
6614 get_lispy_tag (register char *bp
)
6616 register char *cp
= bp
;
6620 /* Go till you get to white space or a syntactic break */
6621 for (cp
= bp
+ 1; !notinname (*cp
) || *cp
== '='; cp
++)
6623 make_tag (bp
, cp
- bp
, true,
6624 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
6629 * Read a line of text from `stream' into `lbp', excluding the
6630 * newline or CR-NL, if any. Return the number of characters read from
6631 * `stream', which is the length of the line including the newline.
6633 * On DOS or Windows we do not count the CR character, if any before the
6634 * NL, in the returned length; this mirrors the behavior of Emacs on those
6635 * platforms (for text files, it translates CR-NL to NL as it reads in the
6638 * If multi-line regular expressions are requested, each line read is
6639 * appended to `filebuf'.
6642 readline_internal (linebuffer
*lbp
, FILE *stream
, char const *filename
)
6644 char *buffer
= lbp
->buffer
;
6645 char *p
= lbp
->buffer
;
6649 pend
= p
+ lbp
->size
; /* Separate to avoid 386/IX compiler bug. */
6653 register int c
= getc (stream
);
6656 /* We're at the end of linebuffer: expand it. */
6658 xrnew (buffer
, lbp
->size
, char);
6659 p
+= buffer
- lbp
->buffer
;
6660 pend
= buffer
+ lbp
->size
;
6661 lbp
->buffer
= buffer
;
6665 if (ferror (stream
))
6673 if (p
> buffer
&& p
[-1] == '\r')
6687 lbp
->len
= p
- buffer
;
6689 if (need_filebuf
/* we need filebuf for multi-line regexps */
6690 && chars_deleted
> 0) /* not at EOF */
6692 while (filebuf
.size
<= filebuf
.len
+ lbp
->len
+ 1) /* +1 for \n */
6694 /* Expand filebuf. */
6696 xrnew (filebuf
.buffer
, filebuf
.size
, char);
6698 memcpy (filebuf
.buffer
+ filebuf
.len
, lbp
->buffer
, lbp
->len
);
6699 filebuf
.len
+= lbp
->len
;
6700 filebuf
.buffer
[filebuf
.len
++] = '\n';
6701 filebuf
.buffer
[filebuf
.len
] = '\0';
6704 return lbp
->len
+ chars_deleted
;
6708 * Like readline_internal, above, but in addition try to match the
6709 * input line against relevant regular expressions and manage #line
6713 readline (linebuffer
*lbp
, FILE *stream
)
6717 linecharno
= charno
; /* update global char number of line start */
6718 result
= readline_internal (lbp
, stream
, infilename
); /* read line */
6719 lineno
+= 1; /* increment global line number */
6720 charno
+= result
; /* increment global char number */
6722 /* Honor #line directives. */
6723 if (!no_line_directive
)
6725 static bool discard_until_line_directive
;
6727 /* Check whether this is a #line directive. */
6728 if (result
> 12 && strneq (lbp
->buffer
, "#line ", 6))
6733 if (sscanf (lbp
->buffer
, "#line %u \"%n", &lno
, &start
) >= 1
6734 && start
> 0) /* double quote character found */
6736 char *endp
= lbp
->buffer
+ start
;
6738 while ((endp
= strchr (endp
, '"')) != NULL
6739 && endp
[-1] == '\\')
6742 /* Ok, this is a real #line directive. Let's deal with it. */
6744 char *taggedabsname
; /* absolute name of original file */
6745 char *taggedfname
; /* name of original file as given */
6746 char *name
; /* temp var */
6748 discard_until_line_directive
= false; /* found it */
6749 name
= lbp
->buffer
+ start
;
6751 canonicalize_filename (name
);
6752 taggedabsname
= absolute_filename (name
, tagfiledir
);
6753 if (filename_is_absolute (name
)
6754 || filename_is_absolute (curfdp
->infname
))
6755 taggedfname
= savestr (taggedabsname
);
6757 taggedfname
= relative_filename (taggedabsname
,tagfiledir
);
6759 if (streq (curfdp
->taggedfname
, taggedfname
))
6760 /* The #line directive is only a line number change. We
6761 deal with this afterwards. */
6764 /* The tags following this #line directive should be
6765 attributed to taggedfname. In order to do this, set
6766 curfdp accordingly. */
6768 fdesc
*fdp
; /* file description pointer */
6770 /* Go look for a file description already set up for the
6771 file indicated in the #line directive. If there is
6772 one, use it from now until the next #line
6774 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
6775 if (streq (fdp
->infname
, curfdp
->infname
)
6776 && streq (fdp
->taggedfname
, taggedfname
))
6777 /* If we remove the second test above (after the &&)
6778 then all entries pertaining to the same file are
6779 coalesced in the tags file. If we use it, then
6780 entries pertaining to the same file but generated
6781 from different files (via #line directives) will
6782 go into separate sections in the tags file. These
6783 alternatives look equivalent. The first one
6784 destroys some apparently useless information. */
6790 /* Else, if we already tagged the real file, skip all
6791 input lines until the next #line directive. */
6792 if (fdp
== NULL
) /* not found */
6793 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
6794 if (streq (fdp
->infabsname
, taggedabsname
))
6796 discard_until_line_directive
= true;
6800 /* Else create a new file description and use that from
6801 now on, until the next #line directive. */
6802 if (fdp
== NULL
) /* not found */
6805 fdhead
= xnew (1, fdesc
);
6806 *fdhead
= *curfdp
; /* copy curr. file description */
6808 fdhead
->infname
= savestr (curfdp
->infname
);
6809 fdhead
->infabsname
= savestr (curfdp
->infabsname
);
6810 fdhead
->infabsdir
= savestr (curfdp
->infabsdir
);
6811 fdhead
->taggedfname
= taggedfname
;
6812 fdhead
->usecharno
= false;
6813 fdhead
->prop
= NULL
;
6814 fdhead
->written
= false;
6818 free (taggedabsname
);
6820 readline (lbp
, stream
);
6822 } /* if a real #line directive */
6823 } /* if #line is followed by a number */
6824 } /* if line begins with "#line " */
6826 /* If we are here, no #line directive was found. */
6827 if (discard_until_line_directive
)
6831 /* Do a tail recursion on ourselves, thus discarding the contents
6832 of the line buffer. */
6833 readline (lbp
, stream
);
6837 discard_until_line_directive
= false;
6840 } /* if #line directives should be considered */
6847 /* Match against relevant regexps. */
6849 for (rp
= p_head
; rp
!= NULL
; rp
= rp
->p_next
)
6851 /* Only use generic regexps or those for the current language.
6852 Also do not use multiline regexps, which is the job of
6853 regex_tag_multiline. */
6854 if ((rp
->lang
!= NULL
&& rp
->lang
!= fdhead
->lang
)
6858 match
= re_match (rp
->pat
, lbp
->buffer
, lbp
->len
, 0, &rp
->regs
);
6863 if (!rp
->error_signaled
)
6865 error ("regexp stack overflow while matching \"%s\"",
6867 rp
->error_signaled
= true;
6874 /* Empty string matched. */
6875 if (!rp
->error_signaled
)
6877 error ("regexp matches the empty string: \"%s\"", rp
->pattern
);
6878 rp
->error_signaled
= true;
6882 /* Match occurred. Construct a tag. */
6884 if (name
[0] == '\0')
6886 else /* make a named tag */
6887 name
= substitute (lbp
->buffer
, rp
->name
, &rp
->regs
);
6888 if (rp
->force_explicit_name
)
6890 /* Force explicit tag name, if a name is there. */
6891 pfnote (name
, true, lbp
->buffer
, match
, lineno
, linecharno
);
6893 fprintf (stderr
, "%s on %s:%d: %s\n",
6894 name
? name
: "(unnamed)", curfdp
->taggedfname
,
6895 lineno
, lbp
->buffer
);
6898 make_tag (name
, strlen (name
), true,
6899 lbp
->buffer
, match
, lineno
, linecharno
);
6908 * Return a pointer to a space of size strlen(cp)+1 allocated
6909 * with xnew where the string CP has been copied.
6912 savestr (const char *cp
)
6914 return savenstr (cp
, strlen (cp
));
6918 * Return a pointer to a space of size LEN+1 allocated with xnew where
6919 * the string CP has been copied for at most the first LEN characters.
6922 savenstr (const char *cp
, int len
)
6924 char *dp
= xnew (len
+ 1, char);
6926 return memcpy (dp
, cp
, len
);
6929 /* Skip spaces (end of string is not space), return new pointer. */
6931 skip_spaces (char *cp
)
6933 while (c_isspace (*cp
))
6938 /* Skip non spaces, except end of string, return new pointer. */
6940 skip_non_spaces (char *cp
)
6942 while (*cp
!= '\0' && !c_isspace (*cp
))
6947 /* Skip any chars in the "name" class.*/
6949 skip_name (char *cp
)
6951 /* '\0' is a notinname() so loop stops there too */
6952 while (! notinname (*cp
))
6957 /* Print error message and exit. */
6959 fatal (char const *format
, ...)
6962 va_start (ap
, format
);
6963 verror (format
, ap
);
6965 exit (EXIT_FAILURE
);
6969 pfatal (const char *s1
)
6972 exit (EXIT_FAILURE
);
6976 suggest_asking_for_help (void)
6978 fprintf (stderr
, "\tTry '%s --help' for a complete list of options.\n",
6980 exit (EXIT_FAILURE
);
6983 /* Output a diagnostic with printf-style FORMAT and args. */
6985 error (const char *format
, ...)
6988 va_start (ap
, format
);
6989 verror (format
, ap
);
6994 verror (char const *format
, va_list ap
)
6996 fprintf (stderr
, "%s: ", progname
);
6997 vfprintf (stderr
, format
, ap
);
6998 fprintf (stderr
, "\n");
7001 /* Return a newly-allocated string whose contents
7002 concatenate those of s1, s2, s3. */
7004 concat (const char *s1
, const char *s2
, const char *s3
)
7006 int len1
= strlen (s1
), len2
= strlen (s2
), len3
= strlen (s3
);
7007 char *result
= xnew (len1
+ len2
+ len3
+ 1, char);
7009 strcpy (result
, s1
);
7010 strcpy (result
+ len1
, s2
);
7011 strcpy (result
+ len1
+ len2
, s3
);
7017 /* Does the same work as the system V getcwd, but does not need to
7018 guess the buffer size in advance. */
7023 char *path
= xnew (bufsize
, char);
7025 while (getcwd (path
, bufsize
) == NULL
)
7027 if (errno
!= ERANGE
)
7031 path
= xnew (bufsize
, char);
7034 canonicalize_filename (path
);
7038 /* Return a newly allocated string containing a name of a temporary file. */
7042 const char *tmpdir
= getenv ("TMPDIR");
7043 const char *slash
= "/";
7045 #if MSDOS || defined (DOS_NT)
7047 tmpdir
= getenv ("TEMP");
7049 tmpdir
= getenv ("TMP");
7052 if (tmpdir
[strlen (tmpdir
) - 1] == '/'
7053 || tmpdir
[strlen (tmpdir
) - 1] == '\\')
7058 if (tmpdir
[strlen (tmpdir
) - 1] == '/')
7062 char *templt
= concat (tmpdir
, slash
, "etXXXXXX");
7063 int fd
= mkostemp (templt
, O_CLOEXEC
);
7064 if (fd
< 0 || close (fd
) != 0)
7066 int temp_errno
= errno
;
7071 #if defined (DOS_NT)
7074 /* The file name will be used in shell redirection, so it needs to have
7075 DOS-style backslashes, or else the Windows shell will barf. */
7077 for (p
= templt
; *p
; p
++)
7086 /* Return a newly allocated string containing the file name of FILE
7087 relative to the absolute directory DIR (which should end with a slash). */
7089 relative_filename (char *file
, char *dir
)
7091 char *fp
, *dp
, *afn
, *res
;
7094 /* Find the common root of file and dir (with a trailing slash). */
7095 afn
= absolute_filename (file
, cwd
);
7098 while (*fp
++ == *dp
++)
7100 fp
--, dp
--; /* back to the first differing char */
7102 if (fp
== afn
&& afn
[0] != '/') /* cannot build a relative name */
7105 do /* look at the equal chars until '/' */
7109 /* Build a sequence of "../" strings for the resulting relative file name. */
7111 while ((dp
= strchr (dp
+ 1, '/')) != NULL
)
7113 res
= xnew (3*i
+ strlen (fp
+ 1) + 1, char);
7116 z
= stpcpy (z
, "../");
7118 /* Add the file name relative to the common root of file and dir. */
7125 /* Return a newly allocated string containing the absolute file name
7126 of FILE given DIR (which should end with a slash). */
7128 absolute_filename (char *file
, char *dir
)
7130 char *slashp
, *cp
, *res
;
7132 if (filename_is_absolute (file
))
7133 res
= savestr (file
);
7135 /* We don't support non-absolute file names with a drive
7136 letter, like `d:NAME' (it's too much hassle). */
7137 else if (file
[1] == ':')
7138 fatal ("%s: relative file names with drive letters not supported", file
);
7141 res
= concat (dir
, file
, "");
7143 /* Delete the "/dirname/.." and "/." substrings. */
7144 slashp
= strchr (res
, '/');
7145 while (slashp
!= NULL
&& slashp
[0] != '\0')
7147 if (slashp
[1] == '.')
7149 if (slashp
[2] == '.'
7150 && (slashp
[3] == '/' || slashp
[3] == '\0'))
7155 while (cp
>= res
&& !filename_is_absolute (cp
));
7157 cp
= slashp
; /* the absolute name begins with "/.." */
7159 /* Under MSDOS and NT we get `d:/NAME' as absolute
7160 file name, so the luser could say `d:/../NAME'.
7161 We silently treat this as `d:/NAME'. */
7162 else if (cp
[0] != '/')
7165 memmove (cp
, slashp
+ 3, strlen (slashp
+ 2));
7169 else if (slashp
[2] == '/' || slashp
[2] == '\0')
7171 memmove (slashp
, slashp
+ 2, strlen (slashp
+ 1));
7176 slashp
= strchr (slashp
+ 1, '/');
7179 if (res
[0] == '\0') /* just a safety net: should never happen */
7182 return savestr ("/");
7188 /* Return a newly allocated string containing the absolute
7189 file name of dir where FILE resides given DIR (which should
7190 end with a slash). */
7192 absolute_dirname (char *file
, char *dir
)
7197 slashp
= strrchr (file
, '/');
7199 return savestr (dir
);
7202 res
= absolute_filename (file
, dir
);
7208 /* Whether the argument string is an absolute file name. The argument
7209 string must have been canonicalized with canonicalize_filename. */
7211 filename_is_absolute (char *fn
)
7213 return (fn
[0] == '/'
7215 || (c_isalpha (fn
[0]) && fn
[1] == ':' && fn
[2] == '/')
7220 /* Downcase DOS drive letter and collapse separators into single slashes.
7223 canonicalize_filename (register char *fn
)
7228 /* Canonicalize drive letter case. */
7229 if (c_isupper (fn
[0]) && fn
[1] == ':')
7230 fn
[0] = c_tolower (fn
[0]);
7232 /* Collapse multiple forward- and back-slashes into a single forward
7234 for (cp
= fn
; *cp
!= '\0'; cp
++, fn
++)
7235 if (*cp
== '/' || *cp
== '\\')
7238 while (cp
[1] == '/' || cp
[1] == '\\')
7246 /* Collapse multiple slashes into a single slash. */
7247 for (cp
= fn
; *cp
!= '\0'; cp
++, fn
++)
7251 while (cp
[1] == '/')
7257 #endif /* !DOS_NT */
7263 /* Initialize a linebuffer for use. */
7265 linebuffer_init (linebuffer
*lbp
)
7267 lbp
->size
= (DEBUG
) ? 3 : 200;
7268 lbp
->buffer
= xnew (lbp
->size
, char);
7269 lbp
->buffer
[0] = '\0';
7273 /* Set the minimum size of a string contained in a linebuffer. */
7275 linebuffer_setlen (linebuffer
*lbp
, int toksize
)
7277 while (lbp
->size
<= toksize
)
7280 xrnew (lbp
->buffer
, lbp
->size
, char);
7285 /* Like malloc but get fatal error if memory is exhausted. */
7287 xmalloc (size_t size
)
7289 void *result
= malloc (size
);
7291 fatal ("virtual memory exhausted");
7296 xrealloc (void *ptr
, size_t size
)
7298 void *result
= realloc (ptr
, size
);
7300 fatal ("virtual memory exhausted");
7306 * indent-tabs-mode: t
7309 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
7310 * c-file-style: "gnu"
7314 /* etags.c ends here */