1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2017 Free Software
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or (at
39 your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 * Francesco Potortì maintained and improved it for many years
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version
[] = "@(#) pot revision number is 17.38.1.4";
88 # define NDEBUG /* disable assert */
93 /* WIN32_NATIVE is for XEmacs.
94 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
99 #endif /* WIN32_NATIVE */
104 # include <sys/param.h>
114 /* The WINDOWSNT build doesn't use Gnulib's fcntl.h. */
115 # define O_CLOEXEC O_NOINHERIT
116 #endif /* WINDOWSNT */
123 #include <sysstdio.h>
126 #include <binary-io.h>
127 #include <unlocked-io.h>
129 #include <c-strcase.h>
133 # undef assert /* some systems have a buggy assert.h */
134 # define assert(x) ((void) 0)
140 /* Define CTAGS to make the program "ctags" compatible with the usual one.
141 Leave it undefined to make the program "etags", which makes emacs-style
142 tag tables and tags typedefs, #defines and struct/union/enum by default. */
151 streq (char const *s
, char const *t
)
153 return strcmp (s
, t
) == 0;
157 strcaseeq (char const *s
, char const *t
)
159 return c_strcasecmp (s
, t
) == 0;
163 strneq (char const *s
, char const *t
, size_t n
)
165 return strncmp (s
, t
, n
) == 0;
169 strncaseeq (char const *s
, char const *t
, size_t n
)
171 return c_strncasecmp (s
, t
, n
) == 0;
174 /* C is not in a name. */
176 notinname (unsigned char c
)
178 /* Look at make_tag before modifying! */
179 static bool const table
[UCHAR_MAX
+ 1] = {
180 ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
181 ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
186 /* C can start a token. */
188 begtoken (unsigned char c
)
190 static bool const table
[UCHAR_MAX
+ 1] = {
192 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
193 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
194 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
197 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
198 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
199 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
206 /* C can be in the middle of a token. */
208 intoken (unsigned char c
)
210 static bool const table
[UCHAR_MAX
+ 1] = {
212 ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
213 ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
214 ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
215 ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
216 ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
219 ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
220 ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
221 ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
227 /* C can end a token. */
229 endtoken (unsigned char c
)
231 static bool const table
[UCHAR_MAX
+ 1] = {
232 ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
233 ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
234 ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
235 ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
236 ['{']=1, ['|']=1, ['}']=1, ['~']=1
242 * xnew, xrnew -- allocate, reallocate storage
244 * SYNOPSIS: Type *xnew (int n, Type);
245 * void xrnew (OldPointer, int n, Type);
247 #define xnew(n, Type) ((Type *) xmalloc ((n) * sizeof (Type)))
248 #define xrnew(op, n, Type) ((op) = (Type *) xrealloc (op, (n) * sizeof (Type)))
250 typedef void Lang_function (FILE *);
254 const char *suffix
; /* file name suffix for this compressor */
255 const char *command
; /* takes one arg and decompresses to stdout */
260 const char *name
; /* language name */
261 const char *help
; /* detailed help for the language */
262 Lang_function
*function
; /* parse function */
263 const char **suffixes
; /* name suffixes of this language's files */
264 const char **filenames
; /* names of this language's files */
265 const char **interpreters
; /* interpreters for this language */
266 bool metasource
; /* source used to generate other sources */
271 struct fdesc
*next
; /* for the linked list */
272 char *infname
; /* uncompressed input file name */
273 char *infabsname
; /* absolute uncompressed input file name */
274 char *infabsdir
; /* absolute dir of input file */
275 char *taggedfname
; /* file name to write in tagfile */
276 language
*lang
; /* language of file */
277 char *prop
; /* file properties to write in tagfile */
278 bool usecharno
; /* etags tags shall contain char number */
279 bool written
; /* entry written in the tags file */
282 typedef struct node_st
283 { /* sorting structure */
284 struct node_st
*left
, *right
; /* left and right sons */
285 fdesc
*fdp
; /* description of file to whom tag belongs */
286 char *name
; /* tag name */
287 char *regex
; /* search regexp */
288 bool valid
; /* write this tag on the tag file */
289 bool is_func
; /* function tag: use regexp in CTAGS mode */
290 bool been_warned
; /* warning already given for duplicated tag */
291 int lno
; /* line number tag is on */
292 long cno
; /* character number line starts on */
296 * A `linebuffer' is a structure which holds a line of text.
297 * `readline_internal' reads a line from a stream into a linebuffer
298 * and works regardless of the length of the line.
299 * SIZE is the size of BUFFER, LEN is the length of the string in
300 * BUFFER after readline reads it.
309 /* Used to support mixing of --lang and file names. */
313 at_language
, /* a language specification */
314 at_regexp
, /* a regular expression */
315 at_filename
, /* a file name */
316 at_stdin
, /* read from stdin here */
317 at_end
/* stop parsing the list */
318 } arg_type
; /* argument type */
319 language
*lang
; /* language associated with the argument */
320 char *what
; /* the argument itself */
323 /* Structure defining a regular expression. */
324 typedef struct regexp
326 struct regexp
*p_next
; /* pointer to next in list */
327 language
*lang
; /* if set, use only for this language */
328 char *pattern
; /* the regexp pattern */
329 char *name
; /* tag name */
330 struct re_pattern_buffer
*pat
; /* the compiled pattern */
331 struct re_registers regs
; /* re registers */
332 bool error_signaled
; /* already signaled for this regexp */
333 bool force_explicit_name
; /* do not allow implicit tag name */
334 bool ignore_case
; /* ignore case when matching */
335 bool multi_line
; /* do a multi-line match on the whole file */
339 /* Many compilers barf on this:
340 Lang_function Ada_funcs;
341 so let's write it this way */
342 static void Ada_funcs (FILE *);
343 static void Asm_labels (FILE *);
344 static void C_entries (int c_ext
, FILE *);
345 static void default_C_entries (FILE *);
346 static void plain_C_entries (FILE *);
347 static void Cjava_entries (FILE *);
348 static void Cobol_paragraphs (FILE *);
349 static void Cplusplus_entries (FILE *);
350 static void Cstar_entries (FILE *);
351 static void Erlang_functions (FILE *);
352 static void Forth_words (FILE *);
353 static void Fortran_functions (FILE *);
354 static void Go_functions (FILE *);
355 static void HTML_labels (FILE *);
356 static void Lisp_functions (FILE *);
357 static void Lua_functions (FILE *);
358 static void Makefile_targets (FILE *);
359 static void Pascal_functions (FILE *);
360 static void Perl_functions (FILE *);
361 static void PHP_functions (FILE *);
362 static void PS_functions (FILE *);
363 static void Prolog_functions (FILE *);
364 static void Python_functions (FILE *);
365 static void Ruby_functions (FILE *);
366 static void Scheme_functions (FILE *);
367 static void TeX_commands (FILE *);
368 static void Texinfo_nodes (FILE *);
369 static void Yacc_entries (FILE *);
370 static void just_read_file (FILE *);
372 static language
*get_language_from_langname (const char *);
373 static void readline (linebuffer
*, FILE *);
374 static long readline_internal (linebuffer
*, FILE *, char const *);
375 static bool nocase_tail (const char *);
376 static void get_tag (char *, char **);
377 static void get_lispy_tag (char *);
379 static void analyze_regex (char *);
380 static void free_regexps (void);
381 static void regex_tag_multiline (void);
382 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
383 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
384 static _Noreturn
void suggest_asking_for_help (void);
385 static _Noreturn
void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
386 static _Noreturn
void pfatal (const char *);
387 static void add_node (node
*, node
**);
389 static void process_file_name (char *, language
*);
390 static void process_file (FILE *, char *, language
*);
391 static void find_entries (FILE *);
392 static void free_tree (node
*);
393 static void free_fdesc (fdesc
*);
394 static void pfnote (char *, bool, char *, int, int, long);
395 static void invalidate_nodes (fdesc
*, node
**);
396 static void put_entries (node
*);
398 static char *concat (const char *, const char *, const char *);
399 static char *skip_spaces (char *);
400 static char *skip_non_spaces (char *);
401 static char *skip_name (char *);
402 static char *savenstr (const char *, int);
403 static char *savestr (const char *);
404 static char *etags_getcwd (void);
405 static char *relative_filename (char *, char *);
406 static char *absolute_filename (char *, char *);
407 static char *absolute_dirname (char *, char *);
408 static bool filename_is_absolute (char *f
);
409 static void canonicalize_filename (char *);
410 static char *etags_mktmp (void);
411 static void linebuffer_init (linebuffer
*);
412 static void linebuffer_setlen (linebuffer
*, int);
413 static void *xmalloc (size_t);
414 static void *xrealloc (void *, size_t);
417 static char searchar
= '/'; /* use /.../ searches */
419 static char *tagfile
; /* output file */
420 static char *progname
; /* name this program was invoked with */
421 static char *cwd
; /* current working directory */
422 static char *tagfiledir
; /* directory of tagfile */
423 static FILE *tagf
; /* ioptr for tags file */
424 static ptrdiff_t whatlen_max
; /* maximum length of any 'what' member */
426 static fdesc
*fdhead
; /* head of file description list */
427 static fdesc
*curfdp
; /* current file description */
428 static char *infilename
; /* current input file name */
429 static int lineno
; /* line number of current line */
430 static long charno
; /* current character number */
431 static long linecharno
; /* charno of start of current line */
432 static char *dbp
; /* pointer to start of current tag */
434 static const int invalidcharno
= -1;
436 static node
*nodehead
; /* the head of the binary tree of tags */
437 static node
*last_node
; /* the last node created */
439 static linebuffer lb
; /* the current line */
440 static linebuffer filebuf
; /* a buffer containing the whole file */
441 static linebuffer token_name
; /* a buffer containing a tag name */
443 static bool append_to_tagfile
; /* -a: append to tags */
444 /* The next five default to true in C and derived languages. */
445 static bool typedefs
; /* -t: create tags for C and Ada typedefs */
446 static bool typedefs_or_cplusplus
; /* -T: create tags for C typedefs, level */
447 /* 0 struct/enum/union decls, and C++ */
448 /* member functions. */
449 static bool constantypedefs
; /* -d: create tags for C #define, enum */
450 /* constants and variables. */
451 /* -D: opposite of -d. Default under ctags. */
452 static int globals
; /* create tags for global variables */
453 static int members
; /* create tags for C member variables */
454 static int declarations
; /* --declarations: tag them and extern in C&Co*/
455 static int no_line_directive
; /* ignore #line directives (undocumented) */
456 static int no_duplicates
; /* no duplicate tags for ctags (undocumented) */
457 static bool update
; /* -u: update tags */
458 static bool vgrind_style
; /* -v: create vgrind style index output */
459 static bool no_warnings
; /* -w: suppress warnings (undocumented) */
460 static bool cxref_style
; /* -x: create cxref style output */
461 static bool cplusplus
; /* .[hc] means C++, not C (undocumented) */
462 static bool ignoreindent
; /* -I: ignore indentation in C */
463 static int packages_only
; /* --packages-only: in Ada, only tag packages*/
464 static int class_qualify
; /* -Q: produce class-qualified tags in C++/Java */
466 /* STDIN is defined in LynxOS system headers */
471 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
472 static bool parsing_stdin
; /* --parse-stdin used */
474 static regexp
*p_head
; /* list of all regexps */
475 static bool need_filebuf
; /* some regexes are multi-line */
477 static struct option longopts
[] =
479 { "append", no_argument
, NULL
, 'a' },
480 { "packages-only", no_argument
, &packages_only
, 1 },
481 { "c++", no_argument
, NULL
, 'C' },
482 { "declarations", no_argument
, &declarations
, 1 },
483 { "no-line-directive", no_argument
, &no_line_directive
, 1 },
484 { "no-duplicates", no_argument
, &no_duplicates
, 1 },
485 { "help", no_argument
, NULL
, 'h' },
486 { "help", no_argument
, NULL
, 'H' },
487 { "ignore-indentation", no_argument
, NULL
, 'I' },
488 { "language", required_argument
, NULL
, 'l' },
489 { "members", no_argument
, &members
, 1 },
490 { "no-members", no_argument
, &members
, 0 },
491 { "output", required_argument
, NULL
, 'o' },
492 { "class-qualify", no_argument
, &class_qualify
, 'Q' },
493 { "regex", required_argument
, NULL
, 'r' },
494 { "no-regex", no_argument
, NULL
, 'R' },
495 { "ignore-case-regex", required_argument
, NULL
, 'c' },
496 { "parse-stdin", required_argument
, NULL
, STDIN
},
497 { "version", no_argument
, NULL
, 'V' },
499 #if CTAGS /* Ctags options */
500 { "backward-search", no_argument
, NULL
, 'B' },
501 { "cxref", no_argument
, NULL
, 'x' },
502 { "defines", no_argument
, NULL
, 'd' },
503 { "globals", no_argument
, &globals
, 1 },
504 { "typedefs", no_argument
, NULL
, 't' },
505 { "typedefs-and-c++", no_argument
, NULL
, 'T' },
506 { "update", no_argument
, NULL
, 'u' },
507 { "vgrind", no_argument
, NULL
, 'v' },
508 { "no-warn", no_argument
, NULL
, 'w' },
510 #else /* Etags options */
511 { "no-defines", no_argument
, NULL
, 'D' },
512 { "no-globals", no_argument
, &globals
, 0 },
513 { "include", required_argument
, NULL
, 'i' },
518 static compressor compressors
[] =
520 { "z", "gzip -d -c"},
521 { "Z", "gzip -d -c"},
522 { "gz", "gzip -d -c"},
523 { "GZ", "gzip -d -c"},
524 { "bz2", "bzip2 -d -c" },
525 { "xz", "xz -d -c" },
534 static const char *Ada_suffixes
[] =
535 { "ads", "adb", "ada", NULL
};
536 static const char Ada_help
[] =
537 "In Ada code, functions, procedures, packages, tasks and types are\n\
538 tags. Use the '--packages-only' option to create tags for\n\
540 Ada tag names have suffixes indicating the type of entity:\n\
541 Entity type: Qualifier:\n\
542 ------------ ----------\n\
549 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
550 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
551 will just search for any tag 'bidule'.";
554 static const char *Asm_suffixes
[] =
555 { "a", /* Unix assembler */
556 "asm", /* Microcontroller assembly */
557 "def", /* BSO/Tasking definition includes */
558 "inc", /* Microcontroller include files */
559 "ins", /* Microcontroller include files */
560 "s", "sa", /* Unix assembler */
561 "S", /* cpp-processed Unix assembler */
562 "src", /* BSO/Tasking C compiler output */
565 static const char Asm_help
[] =
566 "In assembler code, labels appearing at the beginning of a line,\n\
567 followed by a colon, are tags.";
570 /* Note that .c and .h can be considered C++, if the --c++ flag was
571 given, or if the `class' or `template' keywords are met inside the file.
572 That is why default_C_entries is called for these. */
573 static const char *default_C_suffixes
[] =
575 #if CTAGS /* C help for Ctags */
576 static const char default_C_help
[] =
577 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
578 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
579 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
580 Use --globals to tag global variables.\n\
581 You can tag function declarations and external variables by\n\
582 using '--declarations', and struct members by using '--members'.";
583 #else /* C help for Etags */
584 static const char default_C_help
[] =
585 "In C code, any C function or typedef is a tag, and so are\n\
586 definitions of 'struct', 'union' and 'enum'. '#define' macro\n\
587 definitions and 'enum' constants are tags unless you specify\n\
588 '--no-defines'. Global variables are tags unless you specify\n\
589 '--no-globals' and so are struct members unless you specify\n\
590 '--no-members'. Use of '--no-globals', '--no-defines' and\n\
591 '--no-members' can make the tags table file much smaller.\n\
592 You can tag function declarations and external variables by\n\
593 using '--declarations'.";
594 #endif /* C help for Ctags and Etags */
596 static const char *Cplusplus_suffixes
[] =
597 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
598 "M", /* Objective C++ */
599 "pdb", /* PostScript with C syntax */
601 static const char Cplusplus_help
[] =
602 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
603 --help --lang=c --lang=c++ for full help.)\n\
604 In addition to C tags, member functions are also recognized. Member\n\
605 variables are recognized unless you use the '--no-members' option.\n\
606 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
607 and 'CLASS::FUNCTION'. 'operator' definitions have tag names like\n\
610 static const char *Cjava_suffixes
[] =
612 static char Cjava_help
[] =
613 "In Java code, all the tags constructs of C and C++ code are\n\
614 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
617 static const char *Cobol_suffixes
[] =
618 { "COB", "cob", NULL
};
619 static char Cobol_help
[] =
620 "In Cobol code, tags are paragraph names; that is, any word\n\
621 starting in column 8 and followed by a period.";
623 static const char *Cstar_suffixes
[] =
624 { "cs", "hs", NULL
};
626 static const char *Erlang_suffixes
[] =
627 { "erl", "hrl", NULL
};
628 static const char Erlang_help
[] =
629 "In Erlang code, the tags are the functions, records and macros\n\
630 defined in the file.";
632 const char *Forth_suffixes
[] =
633 { "fth", "tok", NULL
};
634 static const char Forth_help
[] =
635 "In Forth code, tags are words defined by ':',\n\
636 constant, code, create, defer, value, variable, buffer:, field.";
638 static const char *Fortran_suffixes
[] =
639 { "F", "f", "f90", "for", NULL
};
640 static const char Fortran_help
[] =
641 "In Fortran code, functions, subroutines and block data are tags.";
643 static const char *Go_suffixes
[] = {"go", NULL
};
644 static const char Go_help
[] =
645 "In Go code, functions, interfaces and packages are tags.";
647 static const char *HTML_suffixes
[] =
648 { "htm", "html", "shtml", NULL
};
649 static const char HTML_help
[] =
650 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
651 'h3' headers. Also, tags are 'name=' in anchors and all\n\
652 occurrences of 'id='.";
654 static const char *Lisp_suffixes
[] =
655 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL
};
656 static const char Lisp_help
[] =
657 "In Lisp code, any function defined with 'defun', any variable\n\
658 defined with 'defvar' or 'defconst', and in general the first\n\
659 argument of any expression that starts with '(def' in column zero\n\
661 The '--declarations' option tags \"(defvar foo)\" constructs too.";
663 static const char *Lua_suffixes
[] =
664 { "lua", "LUA", NULL
};
665 static const char Lua_help
[] =
666 "In Lua scripts, all functions are tags.";
668 static const char *Makefile_filenames
[] =
669 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL
};
670 static const char Makefile_help
[] =
671 "In makefiles, targets are tags; additionally, variables are tags\n\
672 unless you specify '--no-globals'.";
674 static const char *Objc_suffixes
[] =
675 { "lm", /* Objective lex file */
676 "m", /* Objective C file */
678 static const char Objc_help
[] =
679 "In Objective C code, tags include Objective C definitions for classes,\n\
680 class categories, methods and protocols. Tags for variables and\n\
681 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\
682 \n(Use --help --lang=c --lang=objc --lang=java for full help.)";
684 static const char *Pascal_suffixes
[] =
685 { "p", "pas", NULL
};
686 static const char Pascal_help
[] =
687 "In Pascal code, the tags are the functions and procedures defined\n\
689 /* " // this is for working around an Emacs highlighting bug... */
691 static const char *Perl_suffixes
[] =
692 { "pl", "pm", NULL
};
693 static const char *Perl_interpreters
[] =
694 { "perl", "@PERL@", NULL
};
695 static const char Perl_help
[] =
696 "In Perl code, the tags are the packages, subroutines and variables\n\
697 defined by the 'package', 'sub', 'my' and 'local' keywords. Use\n\
698 '--globals' if you want to tag global variables. Tags for\n\
699 subroutines are named 'PACKAGE::SUB'. The name for subroutines\n\
700 defined in the default package is 'main::SUB'.";
702 static const char *PHP_suffixes
[] =
703 { "php", "php3", "php4", NULL
};
704 static const char PHP_help
[] =
705 "In PHP code, tags are functions, classes and defines. Unless you use\n\
706 the '--no-members' option, vars are tags too.";
708 static const char *plain_C_suffixes
[] =
709 { "pc", /* Pro*C file */
712 static const char *PS_suffixes
[] =
713 { "ps", "psw", NULL
}; /* .psw is for PSWrap */
714 static const char PS_help
[] =
715 "In PostScript code, the tags are the functions.";
717 static const char *Prolog_suffixes
[] =
719 static const char Prolog_help
[] =
720 "In Prolog code, tags are predicates and rules at the beginning of\n\
723 static const char *Python_suffixes
[] =
725 static const char Python_help
[] =
726 "In Python code, 'def' or 'class' at the beginning of a line\n\
729 static const char *Ruby_suffixes
[] =
730 { "rb", "ru", "rbw", NULL
};
731 static const char *Ruby_filenames
[] =
732 { "Rakefile", "Thorfile", NULL
};
733 static const char Ruby_help
[] =
734 "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
735 a line generate a tag. Constants also generate a tag.";
737 /* Can't do the `SCM' or `scm' prefix with a version number. */
738 static const char *Scheme_suffixes
[] =
739 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL
};
740 static const char Scheme_help
[] =
741 "In Scheme code, tags include anything defined with 'def' or with a\n\
742 construct whose name starts with 'def'. They also include\n\
743 variables set with 'set!' at top level in the file.";
745 static const char *TeX_suffixes
[] =
746 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL
};
747 static const char TeX_help
[] =
748 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
749 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
750 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
751 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
752 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
754 Other commands can be specified by setting the environment variable\n\
755 'TEXTAGS' to a colon-separated list like, for example,\n\
756 TEXTAGS=\"mycommand:myothercommand\".";
759 static const char *Texinfo_suffixes
[] =
760 { "texi", "texinfo", "txi", NULL
};
761 static const char Texinfo_help
[] =
762 "for texinfo files, lines starting with @node are tagged.";
764 static const char *Yacc_suffixes
[] =
765 { "y", "y++", "ym", "yxx", "yy", NULL
}; /* .ym is Objective yacc file */
766 static const char Yacc_help
[] =
767 "In Bison or Yacc input files, each rule defines as a tag the\n\
768 nonterminal it constructs. The portions of the file that contain\n\
769 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
772 static const char auto_help
[] =
773 "'auto' is not a real language, it indicates to use\n\
774 a default language for files base on file name suffix and file contents.";
776 static const char none_help
[] =
777 "'none' is not a real language, it indicates to only do\n\
778 regexp processing on files.";
780 static const char no_lang_help
[] =
781 "No detailed help available for this language.";
785 * Table of languages.
787 * It is ok for a given function to be listed under more than one
788 * name. I just didn't.
791 static language lang_names
[] =
793 { "ada", Ada_help
, Ada_funcs
, Ada_suffixes
},
794 { "asm", Asm_help
, Asm_labels
, Asm_suffixes
},
795 { "c", default_C_help
, default_C_entries
, default_C_suffixes
},
796 { "c++", Cplusplus_help
, Cplusplus_entries
, Cplusplus_suffixes
},
797 { "c*", no_lang_help
, Cstar_entries
, Cstar_suffixes
},
798 { "cobol", Cobol_help
, Cobol_paragraphs
, Cobol_suffixes
},
799 { "erlang", Erlang_help
, Erlang_functions
, Erlang_suffixes
},
800 { "forth", Forth_help
, Forth_words
, Forth_suffixes
},
801 { "fortran", Fortran_help
, Fortran_functions
, Fortran_suffixes
},
802 { "go", Go_help
, Go_functions
, Go_suffixes
},
803 { "html", HTML_help
, HTML_labels
, HTML_suffixes
},
804 { "java", Cjava_help
, Cjava_entries
, Cjava_suffixes
},
805 { "lisp", Lisp_help
, Lisp_functions
, Lisp_suffixes
},
806 { "lua", Lua_help
, Lua_functions
, Lua_suffixes
},
807 { "makefile", Makefile_help
,Makefile_targets
,NULL
,Makefile_filenames
},
808 { "objc", Objc_help
, plain_C_entries
, Objc_suffixes
},
809 { "pascal", Pascal_help
, Pascal_functions
, Pascal_suffixes
},
810 { "perl",Perl_help
,Perl_functions
,Perl_suffixes
,NULL
,Perl_interpreters
},
811 { "php", PHP_help
, PHP_functions
, PHP_suffixes
},
812 { "postscript",PS_help
, PS_functions
, PS_suffixes
},
813 { "proc", no_lang_help
, plain_C_entries
, plain_C_suffixes
},
814 { "prolog", Prolog_help
, Prolog_functions
, Prolog_suffixes
},
815 { "python", Python_help
, Python_functions
, Python_suffixes
},
816 { "ruby", Ruby_help
,Ruby_functions
,Ruby_suffixes
,Ruby_filenames
},
817 { "scheme", Scheme_help
, Scheme_functions
, Scheme_suffixes
},
818 { "tex", TeX_help
, TeX_commands
, TeX_suffixes
},
819 { "texinfo", Texinfo_help
, Texinfo_nodes
, Texinfo_suffixes
},
820 { "yacc", Yacc_help
,Yacc_entries
,Yacc_suffixes
,NULL
,NULL
,true},
821 { "auto", auto_help
}, /* default guessing scheme */
822 { "none", none_help
, just_read_file
}, /* regexp matching only */
823 { NULL
} /* end of list */
828 print_language_names (void)
831 const char **name
, **ext
;
833 puts ("\nThese are the currently supported languages, along with the\n\
834 default file names and dot suffixes:");
835 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
837 printf (" %-*s", 10, lang
->name
);
838 if (lang
->filenames
!= NULL
)
839 for (name
= lang
->filenames
; *name
!= NULL
; name
++)
840 printf (" %s", *name
);
841 if (lang
->suffixes
!= NULL
)
842 for (ext
= lang
->suffixes
; *ext
!= NULL
; ext
++)
843 printf (" .%s", *ext
);
846 puts ("where 'auto' means use default language for files based on file\n\
847 name suffix, and 'none' means only do regexp processing on files.\n\
848 If no language is specified and no matching suffix is found,\n\
849 the first line of the file is read for a sharp-bang (#!) sequence\n\
850 followed by the name of an interpreter. If no such sequence is found,\n\
851 Fortran is tried first; if no tags are found, C is tried next.\n\
852 When parsing any C file, a \"class\" or \"template\" keyword\n\
854 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
856 For detailed help on a given language use, for example,\n\
857 etags --help --lang=ada.");
861 # define EMACS_NAME "standalone"
864 # define VERSION "17.38.1.4"
866 static _Noreturn
void
869 char emacs_copyright
[] = COPYRIGHT
;
871 printf ("%s (%s %s)\n", (CTAGS
) ? "ctags" : "etags", EMACS_NAME
, VERSION
);
872 puts (emacs_copyright
);
873 puts ("This program is distributed under the terms in ETAGS.README");
878 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
879 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
882 static _Noreturn
void
883 print_help (argument
*argbuffer
)
885 bool help_for_lang
= false;
887 for (; argbuffer
->arg_type
!= at_end
; argbuffer
++)
888 if (argbuffer
->arg_type
== at_language
)
892 puts (argbuffer
->lang
->help
);
893 help_for_lang
= true;
899 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
901 These are the options accepted by %s.\n", progname
, progname
);
902 puts ("You may use unambiguous abbreviations for the long option names.");
903 puts (" A - as file name means read names from stdin (one per line).\n\
904 Absolute names are stored in the output file as they are.\n\
905 Relative ones are stored relative to the output file's directory.\n");
907 puts ("-a, --append\n\
908 Append tag entries to existing tags file.");
910 puts ("--packages-only\n\
911 For Ada files, only generate tags for packages.");
914 puts ("-B, --backward-search\n\
915 Write the search commands for the tag entries using '?', the\n\
916 backward-search command instead of '/', the forward-search command.");
918 /* This option is mostly obsolete, because etags can now automatically
919 detect C++. Retained for backward compatibility and for debugging and
920 experimentation. In principle, we could want to tag as C++ even
921 before any "class" or "template" keyword.
923 Treat files whose name suffix defaults to C language as C++ files.");
926 puts ("--declarations\n\
927 In C and derived languages, create tags for function declarations,");
929 puts ("\tand create tags for extern variables if --globals is used.");
932 ("\tand create tags for extern variables unless --no-globals is used.");
935 puts ("-d, --defines\n\
936 Create tag entries for C #define constants and enum constants, too.");
938 puts ("-D, --no-defines\n\
939 Don't create tag entries for C #define constants and enum constants.\n\
940 This makes the tags file smaller.");
943 puts ("-i FILE, --include=FILE\n\
944 Include a note in tag file indicating that, when searching for\n\
945 a tag, one should also consult the tags file FILE after\n\
946 checking the current file.");
948 puts ("-l LANG, --language=LANG\n\
949 Force the following files to be considered as written in the\n\
950 named language up to the next --language=LANG option.");
954 Create tag entries for global variables in some languages.");
956 puts ("--no-globals\n\
957 Do not create tag entries for global variables in some\n\
958 languages. This makes the tags file smaller.");
960 puts ("--no-line-directive\n\
961 Ignore #line preprocessor directives in C and derived languages.");
965 Create tag entries for members of structures in some languages.");
967 puts ("--no-members\n\
968 Do not create tag entries for members of structures\n\
969 in some languages.");
971 puts ("-Q, --class-qualify\n\
972 Qualify tag names with their class name in C++, ObjC, Java, and Perl.\n\
973 This produces tag names of the form \"class::member\" for C++,\n\
974 \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
975 For Objective C, this also produces class methods qualified with\n\
976 their arguments, as in \"foo:bar:baz:more\".\n\
977 For Perl, this produces \"package::member\".");
978 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
979 Make a tag for each line matching a regular expression pattern\n\
980 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
981 files only. REGEXFILE is a file containing one REGEXP per line.\n\
982 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
983 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
984 puts (" If TAGNAME/ is present, the tags created are named.\n\
985 For example Tcl named tags can be created with:\n\
986 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
987 MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
988 'm' means to allow multi-line matches, 's' implies 'm' and\n\
989 causes dot to match any character, including newline.");
991 puts ("-R, --no-regex\n\
992 Don't create tags from regexps for the following files.");
994 puts ("-I, --ignore-indentation\n\
995 In C and C++ do not assume that a closing brace in the first\n\
996 column is the final brace of a function or structure definition.");
998 puts ("-o FILE, --output=FILE\n\
999 Write the tags to FILE.");
1001 puts ("--parse-stdin=NAME\n\
1002 Read from standard input and record tags as belonging to file NAME.");
1006 puts ("-t, --typedefs\n\
1007 Generate tag entries for C and Ada typedefs.");
1008 puts ("-T, --typedefs-and-c++\n\
1009 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1010 and C++ member functions.");
1014 puts ("-u, --update\n\
1015 Update the tag entries for the given files, leaving tag\n\
1016 entries for other files in place. Currently, this is\n\
1017 implemented by deleting the existing entries for the given\n\
1018 files and then rewriting the new entries at the end of the\n\
1019 tags file. It is often faster to simply rebuild the entire\n\
1020 tag file than to use this.");
1024 puts ("-v, --vgrind\n\
1025 Print on the standard output an index of items intended for\n\
1026 human consumption, similar to the output of vgrind. The index\n\
1027 is sorted, and gives the page number of each item.");
1029 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
1030 puts ("-w, --no-duplicates\n\
1031 Do not create duplicate tag entries, for compatibility with\n\
1032 traditional ctags.");
1034 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
1035 puts ("-w, --no-warn\n\
1036 Suppress warning messages about duplicate tag entries.");
1038 puts ("-x, --cxref\n\
1039 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1040 The output uses line numbers instead of page numbers, but\n\
1041 beyond that the differences are cosmetic; try both to see\n\
1045 puts ("-V, --version\n\
1046 Print the version of the program.\n\
1048 Print this help message.\n\
1049 Followed by one or more '--language' options prints detailed\n\
1050 help about tag generation for the specified languages.");
1052 print_language_names ();
1055 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1057 exit (EXIT_SUCCESS
);
1062 main (int argc
, char **argv
)
1065 unsigned int nincluded_files
;
1066 char **included_files
;
1067 argument
*argbuffer
;
1068 int current_arg
, file_count
;
1069 linebuffer filename_lb
;
1070 bool help_asked
= false;
1076 nincluded_files
= 0;
1077 included_files
= xnew (argc
, char *);
1081 /* Allocate enough no matter what happens. Overkill, but each one
1083 argbuffer
= xnew (argc
, argument
);
1086 * Always find typedefs and structure tags.
1087 * Also default to find macro constants, enum constants, struct
1088 * members and global variables. Do it for both etags and ctags.
1090 typedefs
= typedefs_or_cplusplus
= constantypedefs
= true;
1091 globals
= members
= true;
1093 /* When the optstring begins with a '-' getopt_long does not rearrange the
1094 non-options arguments to be at the end, but leaves them alone. */
1095 optstring
= concat ("-ac:Cf:Il:o:Qr:RSVhH",
1096 (CTAGS
) ? "BxdtTuvw" : "Di:",
1099 while ((opt
= getopt_long (argc
, argv
, optstring
, longopts
, NULL
)) != EOF
)
1103 /* If getopt returns 0, then it has already processed a
1104 long-named option. We should do nothing. */
1108 /* This means that a file name has been seen. Record it. */
1109 argbuffer
[current_arg
].arg_type
= at_filename
;
1110 argbuffer
[current_arg
].what
= optarg
;
1111 len
= strlen (optarg
);
1112 if (whatlen_max
< len
)
1119 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1120 argbuffer
[current_arg
].arg_type
= at_stdin
;
1121 argbuffer
[current_arg
].what
= optarg
;
1122 len
= strlen (optarg
);
1123 if (whatlen_max
< len
)
1128 fatal ("cannot parse standard input more than once");
1129 parsing_stdin
= true;
1132 /* Common options. */
1133 case 'a': append_to_tagfile
= true; break;
1134 case 'C': cplusplus
= true; break;
1135 case 'f': /* for compatibility with old makefiles */
1139 error ("-o option may only be given once.");
1140 suggest_asking_for_help ();
1146 case 'S': /* for backward compatibility */
1147 ignoreindent
= true;
1151 language
*lang
= get_language_from_langname (optarg
);
1154 argbuffer
[current_arg
].lang
= lang
;
1155 argbuffer
[current_arg
].arg_type
= at_language
;
1161 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1162 optarg
= concat (optarg
, "i", ""); /* memory leak here */
1165 argbuffer
[current_arg
].arg_type
= at_regexp
;
1166 argbuffer
[current_arg
].what
= optarg
;
1167 len
= strlen (optarg
);
1168 if (whatlen_max
< len
)
1173 argbuffer
[current_arg
].arg_type
= at_regexp
;
1174 argbuffer
[current_arg
].what
= NULL
;
1189 case 'D': constantypedefs
= false; break;
1190 case 'i': included_files
[nincluded_files
++] = optarg
; break;
1192 /* Ctags options. */
1193 case 'B': searchar
= '?'; break;
1194 case 'd': constantypedefs
= true; break;
1195 case 't': typedefs
= true; break;
1196 case 'T': typedefs
= typedefs_or_cplusplus
= true; break;
1197 case 'u': update
= true; break;
1198 case 'v': vgrind_style
= true; FALLTHROUGH
;
1199 case 'x': cxref_style
= true; break;
1200 case 'w': no_warnings
= true; break;
1202 suggest_asking_for_help ();
1206 /* No more options. Store the rest of arguments. */
1207 for (; optind
< argc
; optind
++)
1209 argbuffer
[current_arg
].arg_type
= at_filename
;
1210 argbuffer
[current_arg
].what
= argv
[optind
];
1211 len
= strlen (argv
[optind
]);
1212 if (whatlen_max
< len
)
1218 argbuffer
[current_arg
].arg_type
= at_end
;
1221 print_help (argbuffer
);
1224 if (nincluded_files
== 0 && file_count
== 0)
1226 error ("no input files specified.");
1227 suggest_asking_for_help ();
1231 if (tagfile
== NULL
)
1232 tagfile
= savestr (CTAGS
? "tags" : "TAGS");
1233 cwd
= etags_getcwd (); /* the current working directory */
1234 if (cwd
[strlen (cwd
) - 1] != '/')
1237 cwd
= concat (oldcwd
, "/", "");
1241 /* Compute base directory for relative file names. */
1242 if (streq (tagfile
, "-")
1243 || strneq (tagfile
, "/dev/", 5))
1244 tagfiledir
= cwd
; /* relative file names are relative to cwd */
1247 canonicalize_filename (tagfile
);
1248 tagfiledir
= absolute_dirname (tagfile
, cwd
);
1251 linebuffer_init (&lb
);
1252 linebuffer_init (&filename_lb
);
1253 linebuffer_init (&filebuf
);
1254 linebuffer_init (&token_name
);
1258 if (streq (tagfile
, "-"))
1261 set_binary_mode (STDOUT_FILENO
, O_BINARY
);
1264 tagf
= fopen (tagfile
, append_to_tagfile
? "ab" : "wb");
1270 * Loop through files finding functions.
1272 for (i
= 0; i
< current_arg
; i
++)
1274 static language
*lang
; /* non-NULL if language is forced */
1277 switch (argbuffer
[i
].arg_type
)
1280 lang
= argbuffer
[i
].lang
;
1283 analyze_regex (argbuffer
[i
].what
);
1286 this_file
= argbuffer
[i
].what
;
1287 /* Input file named "-" means read file names from stdin
1288 (one per line) and use them. */
1289 if (streq (this_file
, "-"))
1292 fatal ("cannot parse standard input "
1293 "AND read file names from it");
1294 while (readline_internal (&filename_lb
, stdin
, "-") > 0)
1295 process_file_name (filename_lb
.buffer
, lang
);
1298 process_file_name (this_file
, lang
);
1301 this_file
= argbuffer
[i
].what
;
1302 process_file (stdin
, this_file
, lang
);
1305 error ("internal error: arg_type");
1311 free (filebuf
.buffer
);
1312 free (token_name
.buffer
);
1314 if (!CTAGS
|| cxref_style
)
1316 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1317 put_entries (nodehead
);
1318 free_tree (nodehead
);
1324 /* Output file entries that have no tags. */
1325 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
1327 fprintf (tagf
, "\f\n%s,0\n", fdp
->taggedfname
);
1329 while (nincluded_files
-- > 0)
1330 fprintf (tagf
, "\f\n%s,include\n", *included_files
++);
1332 if (fclose (tagf
) == EOF
)
1336 return EXIT_SUCCESS
;
1339 /* From here on, we are in (CTAGS && !cxref_style) */
1343 xmalloc (strlen (tagfile
) + whatlen_max
+
1344 sizeof "mv..OTAGS;grep -Fv '\t\t' OTAGS >;rm OTAGS");
1345 for (i
= 0; i
< current_arg
; ++i
)
1347 switch (argbuffer
[i
].arg_type
)
1353 continue; /* the for loop */
1355 char *z
= stpcpy (cmd
, "mv ");
1356 z
= stpcpy (z
, tagfile
);
1357 z
= stpcpy (z
, " OTAGS;grep -Fv '\t");
1358 z
= stpcpy (z
, argbuffer
[i
].what
);
1359 z
= stpcpy (z
, "\t' OTAGS >");
1360 z
= stpcpy (z
, tagfile
);
1361 strcpy (z
, ";rm OTAGS");
1362 if (system (cmd
) != EXIT_SUCCESS
)
1363 fatal ("failed to execute shell command");
1366 append_to_tagfile
= true;
1369 tagf
= fopen (tagfile
, append_to_tagfile
? "ab" : "wb");
1372 put_entries (nodehead
); /* write all the tags (CTAGS) */
1373 free_tree (nodehead
);
1375 if (fclose (tagf
) == EOF
)
1379 if (append_to_tagfile
|| update
)
1381 char *cmd
= xmalloc (2 * strlen (tagfile
) + sizeof "sort -u -o..");
1382 /* Maybe these should be used:
1383 setenv ("LC_COLLATE", "C", 1);
1384 setenv ("LC_ALL", "C", 1); */
1385 char *z
= stpcpy (cmd
, "sort -u -o ");
1386 z
= stpcpy (z
, tagfile
);
1388 strcpy (z
, tagfile
);
1389 return system (cmd
);
1391 return EXIT_SUCCESS
;
1396 * Return a compressor given the file name. If EXTPTR is non-zero,
1397 * return a pointer into FILE where the compressor-specific
1398 * extension begins. If no compressor is found, NULL is returned
1399 * and EXTPTR is not significant.
1400 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1403 get_compressor_from_suffix (char *file
, char **extptr
)
1406 char *slash
, *suffix
;
1408 /* File has been processed by canonicalize_filename,
1409 so we don't need to consider backslashes on DOS_NT. */
1410 slash
= strrchr (file
, '/');
1411 suffix
= strrchr (file
, '.');
1412 if (suffix
== NULL
|| suffix
< slash
)
1417 /* Let those poor souls who live with DOS 8+3 file name limits get
1418 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1419 Only the first do loop is run if not MSDOS */
1422 for (compr
= compressors
; compr
->suffix
!= NULL
; compr
++)
1423 if (streq (compr
->suffix
, suffix
))
1426 break; /* do it only once: not really a loop */
1429 } while (*suffix
!= '\0');
1436 * Return a language given the name.
1439 get_language_from_langname (const char *name
)
1444 error ("empty language name");
1447 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1448 if (streq (name
, lang
->name
))
1450 error ("unknown language \"%s\"", name
);
1458 * Return a language given the interpreter name.
1461 get_language_from_interpreter (char *interpreter
)
1466 if (interpreter
== NULL
)
1468 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1469 if (lang
->interpreters
!= NULL
)
1470 for (iname
= lang
->interpreters
; *iname
!= NULL
; iname
++)
1471 if (streq (*iname
, interpreter
))
1480 * Return a language given the file name.
1483 get_language_from_filename (char *file
, int case_sensitive
)
1486 const char **name
, **ext
, *suffix
;
1489 /* Try whole file name first. */
1490 slash
= strrchr (file
, '/');
1494 else if (file
[0] && file
[1] == ':')
1497 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1498 if (lang
->filenames
!= NULL
)
1499 for (name
= lang
->filenames
; *name
!= NULL
; name
++)
1500 if ((case_sensitive
)
1501 ? streq (*name
, file
)
1502 : strcaseeq (*name
, file
))
1505 /* If not found, try suffix after last dot. */
1506 suffix
= strrchr (file
, '.');
1510 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1511 if (lang
->suffixes
!= NULL
)
1512 for (ext
= lang
->suffixes
; *ext
!= NULL
; ext
++)
1513 if ((case_sensitive
)
1514 ? streq (*ext
, suffix
)
1515 : strcaseeq (*ext
, suffix
))
1522 * This routine is called on each file argument.
1525 process_file_name (char *file
, language
*lang
)
1530 char *compressed_name
, *uncompressed_name
;
1531 char *ext
, *real_name
, *tmp_name
;
1534 canonicalize_filename (file
);
1535 if (streq (file
, tagfile
) && !streq (tagfile
, "-"))
1537 error ("skipping inclusion of %s in self.", file
);
1540 compr
= get_compressor_from_suffix (file
, &ext
);
1543 compressed_name
= file
;
1544 uncompressed_name
= savenstr (file
, ext
- file
);
1548 compressed_name
= NULL
;
1549 uncompressed_name
= file
;
1552 /* If the canonicalized uncompressed name
1553 has already been dealt with, skip it silently. */
1554 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
1556 assert (fdp
->infname
!= NULL
);
1557 if (streq (uncompressed_name
, fdp
->infname
))
1561 inf
= fopen (file
, "r" FOPEN_BINARY
);
1566 int file_errno
= errno
;
1567 if (compressed_name
)
1569 /* Try with the given suffix. */
1570 inf
= fopen (uncompressed_name
, "r" FOPEN_BINARY
);
1572 real_name
= uncompressed_name
;
1576 /* Try all possible suffixes. */
1577 for (compr
= compressors
; compr
->suffix
!= NULL
; compr
++)
1579 compressed_name
= concat (file
, ".", compr
->suffix
);
1580 inf
= fopen (compressed_name
, "r" FOPEN_BINARY
);
1583 real_name
= compressed_name
;
1588 char *suf
= compressed_name
+ strlen (file
);
1589 size_t suflen
= strlen (compr
->suffix
) + 1;
1590 for ( ; suf
[1]; suf
++, suflen
--)
1592 memmove (suf
, suf
+ 1, suflen
);
1593 inf
= fopen (compressed_name
, "r" FOPEN_BINARY
);
1596 real_name
= compressed_name
;
1603 free (compressed_name
);
1604 compressed_name
= NULL
;
1615 if (real_name
== compressed_name
)
1618 tmp_name
= etags_mktmp ();
1623 #if MSDOS || defined (DOS_NT)
1624 char *cmd1
= concat (compr
->command
, " \"", real_name
);
1625 char *cmd
= concat (cmd1
, "\" > ", tmp_name
);
1627 char *cmd1
= concat (compr
->command
, " '", real_name
);
1628 char *cmd
= concat (cmd1
, "' > ", tmp_name
);
1632 if (system (cmd
) == -1)
1639 inf
= fopen (tmp_name
, "r" FOPEN_BINARY
);
1653 process_file (inf
, uncompressed_name
, lang
);
1655 retval
= fclose (inf
);
1656 if (real_name
== compressed_name
)
1665 if (compressed_name
!= file
)
1666 free (compressed_name
);
1667 if (uncompressed_name
!= file
)
1668 free (uncompressed_name
);
1675 process_file (FILE *fh
, char *fn
, language
*lang
)
1677 static const fdesc emptyfdesc
;
1681 /* Create a new input file description entry. */
1682 fdp
= xnew (1, fdesc
);
1685 fdp
->infname
= savestr (fn
);
1687 fdp
->infabsname
= absolute_filename (fn
, cwd
);
1688 fdp
->infabsdir
= absolute_dirname (fn
, cwd
);
1689 if (filename_is_absolute (fn
))
1691 /* An absolute file name. Canonicalize it. */
1692 fdp
->taggedfname
= absolute_filename (fn
, NULL
);
1696 /* A file name relative to cwd. Make it relative
1697 to the directory of the tags file. */
1698 fdp
->taggedfname
= relative_filename (fn
, tagfiledir
);
1700 fdp
->usecharno
= true; /* use char position when making tags */
1702 fdp
->written
= false; /* not written on tags file yet */
1705 curfdp
= fdhead
; /* the current file description */
1709 /* If not Ctags, and if this is not metasource and if it contained no #line
1710 directives, we can write the tags and free all nodes pointing to
1713 && curfdp
->usecharno
/* no #line directives in this file */
1714 && !curfdp
->lang
->metasource
)
1718 /* Look for the head of the sublist relative to this file. See add_node
1719 for the structure of the node tree. */
1721 for (np
= nodehead
; np
!= NULL
; prev
= np
, np
= np
->left
)
1722 if (np
->fdp
== curfdp
)
1725 /* If we generated tags for this file, write and delete them. */
1728 /* This is the head of the last sublist, if any. The following
1729 instructions depend on this being true. */
1730 assert (np
->left
== NULL
);
1732 assert (fdhead
== curfdp
);
1733 assert (last_node
->fdp
== curfdp
);
1734 put_entries (np
); /* write tags for file curfdp->taggedfname */
1735 free_tree (np
); /* remove the written nodes */
1737 nodehead
= NULL
; /* no nodes left */
1739 prev
->left
= NULL
; /* delete the pointer to the sublist */
1745 reset_input (FILE *inf
)
1747 if (fseek (inf
, 0, SEEK_SET
) != 0)
1748 perror (infilename
);
1752 * This routine opens the specified file and calls the function
1753 * which finds the function and type definitions.
1756 find_entries (FILE *inf
)
1759 language
*lang
= curfdp
->lang
;
1760 Lang_function
*parser
= NULL
;
1762 /* If user specified a language, use it. */
1763 if (lang
!= NULL
&& lang
->function
!= NULL
)
1765 parser
= lang
->function
;
1768 /* Else try to guess the language given the file name. */
1771 lang
= get_language_from_filename (curfdp
->infname
, true);
1772 if (lang
!= NULL
&& lang
->function
!= NULL
)
1774 curfdp
->lang
= lang
;
1775 parser
= lang
->function
;
1779 /* Else look for sharp-bang as the first two characters. */
1781 && readline_internal (&lb
, inf
, infilename
) > 0
1783 && lb
.buffer
[0] == '#'
1784 && lb
.buffer
[1] == '!')
1788 /* Set lp to point at the first char after the last slash in the
1789 line or, if no slashes, at the first nonblank. Then set cp to
1790 the first successive blank and terminate the string. */
1791 lp
= strrchr (lb
.buffer
+2, '/');
1795 lp
= skip_spaces (lb
.buffer
+ 2);
1796 cp
= skip_non_spaces (lp
);
1799 if (strlen (lp
) > 0)
1801 lang
= get_language_from_interpreter (lp
);
1802 if (lang
!= NULL
&& lang
->function
!= NULL
)
1804 curfdp
->lang
= lang
;
1805 parser
= lang
->function
;
1812 /* Else try to guess the language given the case insensitive file name. */
1815 lang
= get_language_from_filename (curfdp
->infname
, false);
1816 if (lang
!= NULL
&& lang
->function
!= NULL
)
1818 curfdp
->lang
= lang
;
1819 parser
= lang
->function
;
1823 /* Else try Fortran or C. */
1826 node
*old_last_node
= last_node
;
1828 curfdp
->lang
= get_language_from_langname ("fortran");
1831 if (old_last_node
== last_node
)
1832 /* No Fortran entries found. Try C. */
1835 curfdp
->lang
= get_language_from_langname (cplusplus
? "c++" : "c");
1841 if (!no_line_directive
1842 && curfdp
->lang
!= NULL
&& curfdp
->lang
->metasource
)
1843 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1844 file, or anyway we parsed a file that is automatically generated from
1845 this one. If this is the case, the bingo.c file contained #line
1846 directives that generated tags pointing to this file. Let's delete
1847 them all before parsing this file, which is the real source. */
1849 fdesc
**fdpp
= &fdhead
;
1850 while (*fdpp
!= NULL
)
1852 && streq ((*fdpp
)->taggedfname
, curfdp
->taggedfname
))
1853 /* We found one of those! We must delete both the file description
1854 and all tags referring to it. */
1856 fdesc
*badfdp
= *fdpp
;
1858 /* Delete the tags referring to badfdp->taggedfname
1859 that were obtained from badfdp->infname. */
1860 invalidate_nodes (badfdp
, &nodehead
);
1862 *fdpp
= badfdp
->next
; /* remove the bad description from the list */
1863 free_fdesc (badfdp
);
1866 fdpp
= &(*fdpp
)->next
; /* advance the list pointer */
1869 assert (parser
!= NULL
);
1871 /* Generic initializations before reading from file. */
1872 linebuffer_setlen (&filebuf
, 0); /* reset the file buffer */
1874 /* Generic initializations before parsing file with readline. */
1875 lineno
= 0; /* reset global line number */
1876 charno
= 0; /* reset global char number */
1877 linecharno
= 0; /* reset global char number of line start */
1881 regex_tag_multiline ();
1886 * Check whether an implicitly named tag should be created,
1887 * then call `pfnote'.
1888 * NAME is a string that is internally copied by this function.
1890 * TAGS format specification
1891 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1892 * The following is explained in some more detail in etc/ETAGS.EBNF.
1894 * make_tag creates tags with "implicit tag names" (unnamed tags)
1895 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1896 * 1. NAME does not contain any of the characters in NONAM;
1897 * 2. LINESTART contains name as either a rightmost, or rightmost but
1898 * one character, substring;
1899 * 3. the character, if any, immediately before NAME in LINESTART must
1900 * be a character in NONAM;
1901 * 4. the character, if any, immediately after NAME in LINESTART must
1902 * also be a character in NONAM.
1904 * The implementation uses the notinname() macro, which recognizes the
1905 * characters stored in the string `nonam'.
1906 * etags.el needs to use the same characters that are in NONAM.
1909 make_tag (const char *name
, /* tag name, or NULL if unnamed */
1910 int namelen
, /* tag length */
1911 bool is_func
, /* tag is a function */
1912 char *linestart
, /* start of the line where tag is */
1913 int linelen
, /* length of the line where tag is */
1914 int lno
, /* line number */
1915 long int cno
) /* character number */
1917 bool named
= (name
!= NULL
&& namelen
> 0);
1920 if (!CTAGS
&& named
) /* maybe set named to false */
1921 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1922 such that etags.el can guess a name from it. */
1925 register const char *cp
= name
;
1927 for (i
= 0; i
< namelen
; i
++)
1928 if (notinname (*cp
++))
1930 if (i
== namelen
) /* rule #1 */
1932 cp
= linestart
+ linelen
- namelen
;
1933 if (notinname (linestart
[linelen
-1]))
1934 cp
-= 1; /* rule #4 */
1935 if (cp
>= linestart
/* rule #2 */
1937 || notinname (cp
[-1])) /* rule #3 */
1938 && strneq (name
, cp
, namelen
)) /* rule #2 */
1939 named
= false; /* use implicit tag name */
1944 nname
= savenstr (name
, namelen
);
1946 pfnote (nname
, is_func
, linestart
, linelen
, lno
, cno
);
1951 pfnote (char *name
, bool is_func
, char *linestart
, int linelen
, int lno
,
1953 /* tag name, or NULL if unnamed */
1954 /* tag is a function */
1955 /* start of the line where tag is */
1956 /* length of the line where tag is */
1958 /* character number */
1962 assert (name
== NULL
|| name
[0] != '\0');
1963 if (CTAGS
&& name
== NULL
)
1966 np
= xnew (1, node
);
1968 /* If ctags mode, change name "main" to M<thisfilename>. */
1969 if (CTAGS
&& !cxref_style
&& streq (name
, "main"))
1971 char *fp
= strrchr (curfdp
->taggedfname
, '/');
1972 np
->name
= concat ("M", fp
== NULL
? curfdp
->taggedfname
: fp
+ 1, "");
1973 fp
= strrchr (np
->name
, '.');
1974 if (fp
!= NULL
&& fp
[1] != '\0' && fp
[2] == '\0')
1980 np
->been_warned
= false;
1982 np
->is_func
= is_func
;
1984 if (np
->fdp
->usecharno
)
1985 /* Our char numbers are 0-base, because of C language tradition?
1986 ctags compatibility? old versions compatibility? I don't know.
1987 Anyway, since emacs's are 1-base we expect etags.el to take care
1988 of the difference. If we wanted to have 1-based numbers, we would
1989 uncomment the +1 below. */
1990 np
->cno
= cno
/* + 1 */ ;
1992 np
->cno
= invalidcharno
;
1993 np
->left
= np
->right
= NULL
;
1994 if (CTAGS
&& !cxref_style
)
1996 if (strlen (linestart
) < 50)
1997 np
->regex
= concat (linestart
, "$", "");
1999 np
->regex
= savenstr (linestart
, 50);
2002 np
->regex
= savenstr (linestart
, linelen
);
2004 add_node (np
, &nodehead
);
2008 * Utility functions and data to avoid recursion.
2011 typedef struct stack_entry
{
2013 struct stack_entry
*next
;
2017 push_node (node
*np
, stkentry
**stack_top
)
2021 stkentry
*new = xnew (1, stkentry
);
2024 new->next
= *stack_top
;
2030 pop_node (stkentry
**stack_top
)
2036 stkentry
*old_start
= *stack_top
;
2038 ret
= (*stack_top
)->np
;
2039 *stack_top
= (*stack_top
)->next
;
2047 * emulate recursion on left children, iterate on right children.
2050 free_tree (register node
*np
)
2052 stkentry
*stack
= NULL
;
2056 /* Descent on left children. */
2059 push_node (np
, &stack
);
2062 /* Free node without left children. */
2063 node
*node_right
= np
->right
;
2069 /* Backtrack to find a node with right children, while freeing nodes
2070 that don't have right children. */
2071 while (node_right
== NULL
&& (np
= pop_node (&stack
)) != NULL
)
2073 node_right
= np
->right
;
2079 /* Free right children. */
2086 * delete a file description
2089 free_fdesc (register fdesc
*fdp
)
2091 free (fdp
->infname
);
2092 free (fdp
->infabsname
);
2093 free (fdp
->infabsdir
);
2094 free (fdp
->taggedfname
);
2101 * Adds a node to the tree of nodes. In etags mode, sort by file
2102 * name. In ctags mode, sort by tag name. Make no attempt at
2105 * add_node is the only function allowed to add nodes, so it can
2109 add_node (node
*np
, node
**cur_node_p
)
2111 node
*cur_node
= *cur_node_p
;
2113 /* Make the first node. */
2114 if (cur_node
== NULL
)
2124 /* For each file name, tags are in a linked sublist on the right
2125 pointer. The first tags of different files are a linked list
2126 on the left pointer. last_node points to the end of the last
2128 if (last_node
!= NULL
&& last_node
->fdp
== np
->fdp
)
2130 /* Let's use the same sublist as the last added node. */
2131 assert (last_node
->right
== NULL
);
2132 last_node
->right
= np
;
2137 while (cur_node
->fdp
!= np
->fdp
)
2139 if (cur_node
->left
== NULL
)
2141 /* The head of this sublist is not good for us. Let's try the
2143 cur_node
= cur_node
->left
;
2147 /* Scanning the list we found the head of a sublist which is
2148 good for us. Let's scan this sublist. */
2149 if (cur_node
->right
)
2151 cur_node
= cur_node
->right
;
2152 while (cur_node
->right
)
2153 cur_node
= cur_node
->right
;
2155 /* Make a new node in this sublist. */
2156 cur_node
->right
= np
;
2160 /* Make a new sublist. */
2161 cur_node
->left
= np
;
2165 } /* if ETAGS mode */
2169 node
**next_node
= &cur_node
;
2171 while ((cur_node
= *next_node
) != NULL
)
2173 int dif
= strcmp (np
->name
, cur_node
->name
);
2175 * If this tag name matches an existing one, then
2176 * do not add the node, but maybe print a warning.
2178 if (!dif
&& no_duplicates
)
2180 if (np
->fdp
== cur_node
->fdp
)
2185 "Duplicate entry in file %s, line %d: %s\n",
2186 np
->fdp
->infname
, lineno
, np
->name
);
2187 fprintf (stderr
, "Second entry ignored\n");
2190 else if (!cur_node
->been_warned
&& !no_warnings
)
2194 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2195 np
->fdp
->infname
, cur_node
->fdp
->infname
, np
->name
);
2196 cur_node
->been_warned
= true;
2201 next_node
= dif
< 0 ? &cur_node
->left
: &cur_node
->right
;
2205 } /* if CTAGS mode */
2209 * invalidate_nodes ()
2210 * Scan the node tree and invalidate all nodes pointing to the
2211 * given file description (CTAGS case) or free them (ETAGS case).
2214 invalidate_nodes (fdesc
*badfdp
, node
**npp
)
2217 stkentry
*stack
= NULL
;
2223 /* Push all the left children on the stack. */
2224 while (np
->left
!= NULL
)
2226 push_node (np
, &stack
);
2229 /* Invalidate this node. */
2230 if (np
->fdp
== badfdp
)
2234 /* Pop nodes from stack, invalidating them, until we find one
2235 with a right child. */
2236 while ((np
= pop_node (&stack
)) != NULL
)
2238 if (np
->fdp
== badfdp
)
2240 if (np
->right
!= NULL
)
2244 /* Process the right child, if any. */
2251 node super_root
, *np_parent
= NULL
;
2253 super_root
.left
= np
;
2254 super_root
.fdp
= (fdesc
*) -1;
2259 /* Descent on left children until node with BADFP. */
2260 while (np
&& np
->fdp
!= badfdp
)
2262 assert (np
->fdp
!= NULL
);
2268 np_parent
->left
= np
->left
; /* detach subtree from the tree */
2269 np
->left
= NULL
; /* isolate it */
2270 free_tree (np
); /* free it */
2272 /* Continue with rest of tree. */
2273 np
= np_parent
->left
;
2276 *npp
= super_root
.left
;
2281 static int total_size_of_entries (node
*);
2282 static int number_len (long) ATTRIBUTE_CONST
;
2284 /* Length of a non-negative number's decimal representation. */
2286 number_len (long int num
)
2289 while ((num
/= 10) > 0)
2295 * Return total number of characters that put_entries will output for
2296 * the nodes in the linked list at the right of the specified node.
2297 * This count is irrelevant with etags.el since emacs 19.34 at least,
2298 * but is still supplied for backward compatibility.
2301 total_size_of_entries (register node
*np
)
2303 register int total
= 0;
2305 for (; np
!= NULL
; np
= np
->right
)
2308 total
+= strlen (np
->regex
) + 1; /* pat\177 */
2309 if (np
->name
!= NULL
)
2310 total
+= strlen (np
->name
) + 1; /* name\001 */
2311 total
+= number_len ((long) np
->lno
) + 1; /* lno, */
2312 if (np
->cno
!= invalidcharno
) /* cno */
2313 total
+= number_len (np
->cno
);
2314 total
+= 1; /* newline */
2321 put_entry (node
*np
)
2324 static fdesc
*fdp
= NULL
;
2326 /* Output this entry */
2335 fprintf (tagf
, "\f\n%s,%d\n",
2336 fdp
->taggedfname
, total_size_of_entries (np
));
2337 fdp
->written
= true;
2339 fputs (np
->regex
, tagf
);
2340 fputc ('\177', tagf
);
2341 if (np
->name
!= NULL
)
2343 fputs (np
->name
, tagf
);
2344 fputc ('\001', tagf
);
2346 fprintf (tagf
, "%d,", np
->lno
);
2347 if (np
->cno
!= invalidcharno
)
2348 fprintf (tagf
, "%ld", np
->cno
);
2354 if (np
->name
== NULL
)
2355 error ("internal error: NULL name in ctags mode.");
2360 fprintf (stdout
, "%s %s %d\n",
2361 np
->name
, np
->fdp
->taggedfname
, (np
->lno
+ 63) / 64);
2363 fprintf (stdout
, "%-16s %3d %-16s %s\n",
2364 np
->name
, np
->lno
, np
->fdp
->taggedfname
, np
->regex
);
2368 fprintf (tagf
, "%s\t%s\t", np
->name
, np
->fdp
->taggedfname
);
2371 { /* function or #define macro with args */
2372 putc (searchar
, tagf
);
2375 for (sp
= np
->regex
; *sp
; sp
++)
2377 if (*sp
== '\\' || *sp
== searchar
)
2381 putc (searchar
, tagf
);
2384 { /* anything else; text pattern inadequate */
2385 fprintf (tagf
, "%d", np
->lno
);
2390 } /* if this node contains a valid tag */
2394 put_entries (node
*np
)
2396 stkentry
*stack
= NULL
;
2405 /* Stack subentries that precede this one. */
2408 push_node (np
, &stack
);
2411 /* Output this subentry. */
2413 /* Stack subentries that follow this one. */
2416 /* Output subentries that precede the next one. */
2417 np
= pop_node (&stack
);
2428 push_node (np
, &stack
);
2429 while ((np
= pop_node (&stack
)) != NULL
)
2431 /* Output this subentry. */
2435 /* Output subentries that follow this one. */
2436 put_entry (np
->right
);
2437 /* Stack subentries from the following files. */
2438 push_node (np
->left
, &stack
);
2441 push_node (np
->left
, &stack
);
2448 #define C_EXT 0x00fff /* C extensions */
2449 #define C_PLAIN 0x00000 /* C */
2450 #define C_PLPL 0x00001 /* C++ */
2451 #define C_STAR 0x00003 /* C* */
2452 #define C_JAVA 0x00005 /* JAVA */
2453 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2454 #define YACC 0x10000 /* yacc file */
2457 * The C symbol tables.
2462 st_C_objprot
, st_C_objimpl
, st_C_objend
,
2464 st_C_ignore
, st_C_attribute
, st_C_enum_bf
,
2467 st_C_class
, st_C_template
,
2468 st_C_struct
, st_C_extern
, st_C_enum
, st_C_define
, st_C_typedef
2471 /* Feed stuff between (but not including) %[ and %] lines to:
2477 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2481 while, 0, st_C_ignore
2482 switch, 0, st_C_ignore
2483 return, 0, st_C_ignore
2484 __attribute__, 0, st_C_attribute
2485 GTY, 0, st_C_attribute
2486 @interface, 0, st_C_objprot
2487 @protocol, 0, st_C_objprot
2488 @implementation,0, st_C_objimpl
2489 @end, 0, st_C_objend
2490 import, (C_JAVA & ~C_PLPL), st_C_ignore
2491 package, (C_JAVA & ~C_PLPL), st_C_ignore
2492 friend, C_PLPL, st_C_ignore
2493 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2494 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2495 interface, (C_JAVA & ~C_PLPL), st_C_struct
2496 class, 0, st_C_class
2497 namespace, C_PLPL, st_C_struct
2498 domain, C_STAR, st_C_struct
2499 union, 0, st_C_struct
2500 struct, 0, st_C_struct
2501 extern, 0, st_C_extern
2503 typedef, 0, st_C_typedef
2504 define, 0, st_C_define
2505 undef, 0, st_C_define
2506 operator, C_PLPL, st_C_operator
2507 template, 0, st_C_template
2508 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2509 DEFUN, 0, st_C_gnumacro
2510 SYSCALL, 0, st_C_gnumacro
2511 ENTRY, 0, st_C_gnumacro
2512 PSEUDO, 0, st_C_gnumacro
2513 ENUM_BF, 0, st_C_enum_bf
2514 # These are defined inside C functions, so currently they are not met.
2515 # EXFUN used in glibc, DEFVAR_* in emacs.
2516 #EXFUN, 0, st_C_gnumacro
2517 #DEFVAR_, 0, st_C_gnumacro
2519 and replace lines between %< and %> with its output, then:
2520 - remove the #if characterset check
2521 - remove any #line directives
2522 - make in_word_set static and not inline
2523 - remove any 'register' qualifications from variable decls. */
2525 /* C code produced by gperf version 3.0.1 */
2526 /* Command-line: gperf -m 5 */
2527 /* Computed positions: -k'2-3' */
2529 struct C_stab_entry
{ const char *name
; int c_ext
; enum sym_type type
; };
2530 /* maximum key range = 34, duplicates = 0 */
2533 hash (const char *str
, int len
)
2535 static char const asso_values
[] =
2537 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2538 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2539 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2540 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2541 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2542 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2543 36, 36, 36, 36, 36, 36, 36, 36, 36, 3,
2544 27, 36, 36, 36, 36, 36, 36, 36, 26, 36,
2545 36, 36, 36, 25, 0, 0, 36, 36, 36, 0,
2546 36, 36, 36, 36, 36, 1, 36, 16, 36, 6,
2547 23, 0, 0, 36, 22, 0, 36, 36, 5, 0,
2548 0, 15, 1, 36, 6, 36, 8, 19, 36, 16,
2549 4, 5, 36, 36, 36, 36, 36, 36, 36, 36,
2550 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2551 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2552 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2553 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2554 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2555 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2556 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2557 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2558 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2559 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2560 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2561 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2562 36, 36, 36, 36, 36, 36
2569 hval
+= asso_values
[(unsigned char) str
[2]];
2572 hval
+= asso_values
[(unsigned char) str
[1]];
2578 static struct C_stab_entry
*
2579 in_word_set (register const char *str
, register unsigned int len
)
2583 TOTAL_KEYWORDS
= 34,
2584 MIN_WORD_LENGTH
= 2,
2585 MAX_WORD_LENGTH
= 15,
2590 static struct C_stab_entry wordlist
[] =
2593 {"if", 0, st_C_ignore
},
2594 {"GTY", 0, st_C_attribute
},
2595 {"@end", 0, st_C_objend
},
2596 {"union", 0, st_C_struct
},
2597 {"define", 0, st_C_define
},
2598 {"import", (C_JAVA
& ~C_PLPL
), st_C_ignore
},
2599 {"template", 0, st_C_template
},
2600 {"operator", C_PLPL
, st_C_operator
},
2601 {"@interface", 0, st_C_objprot
},
2602 {"implements", (C_JAVA
& ~C_PLPL
), st_C_javastruct
},
2603 {"friend", C_PLPL
, st_C_ignore
},
2604 {"typedef", 0, st_C_typedef
},
2605 {"return", 0, st_C_ignore
},
2606 {"@implementation",0, st_C_objimpl
},
2607 {"@protocol", 0, st_C_objprot
},
2608 {"interface", (C_JAVA
& ~C_PLPL
), st_C_struct
},
2609 {"extern", 0, st_C_extern
},
2610 {"extends", (C_JAVA
& ~C_PLPL
), st_C_javastruct
},
2611 {"struct", 0, st_C_struct
},
2612 {"domain", C_STAR
, st_C_struct
},
2613 {"switch", 0, st_C_ignore
},
2614 {"enum", 0, st_C_enum
},
2615 {"for", 0, st_C_ignore
},
2616 {"namespace", C_PLPL
, st_C_struct
},
2617 {"class", 0, st_C_class
},
2618 {"while", 0, st_C_ignore
},
2619 {"undef", 0, st_C_define
},
2620 {"package", (C_JAVA
& ~C_PLPL
), st_C_ignore
},
2621 {"__attribute__", 0, st_C_attribute
},
2622 {"ENTRY", 0, st_C_gnumacro
},
2623 {"SYSCALL", 0, st_C_gnumacro
},
2624 {"ENUM_BF", 0, st_C_enum_bf
},
2625 {"PSEUDO", 0, st_C_gnumacro
},
2626 {"DEFUN", 0, st_C_gnumacro
}
2629 if (len
<= MAX_WORD_LENGTH
&& len
>= MIN_WORD_LENGTH
)
2631 int key
= hash (str
, len
);
2633 if (key
<= MAX_HASH_VALUE
&& key
>= 0)
2635 const char *s
= wordlist
[key
].name
;
2637 if (*str
== *s
&& !strncmp (str
+ 1, s
+ 1, len
- 1) && s
[len
] == '\0')
2638 return &wordlist
[key
];
2645 static enum sym_type
2646 C_symtype (char *str
, int len
, int c_ext
)
2648 register struct C_stab_entry
*se
= in_word_set (str
, len
);
2650 if (se
== NULL
|| (se
->c_ext
&& !(c_ext
& se
->c_ext
)))
2657 * Ignoring __attribute__ ((list))
2659 static bool inattribute
; /* looking at an __attribute__ construct */
2661 /* Ignoring ENUM_BF (type)
2664 static bool in_enum_bf
; /* inside parentheses following ENUM_BF */
2667 * C functions and variables are recognized using a simple
2668 * finite automaton. fvdef is its state variable.
2672 fvnone
, /* nothing seen */
2673 fdefunkey
, /* Emacs DEFUN keyword seen */
2674 fdefunname
, /* Emacs DEFUN name seen */
2675 foperator
, /* func: operator keyword seen (cplpl) */
2676 fvnameseen
, /* function or variable name seen */
2677 fstartlist
, /* func: just after open parenthesis */
2678 finlist
, /* func: in parameter list */
2679 flistseen
, /* func: after parameter list */
2680 fignore
, /* func: before open brace */
2681 vignore
/* var-like: ignore until ';' */
2684 static bool fvextern
; /* func or var: extern keyword seen; */
2687 * typedefs are recognized using a simple finite automaton.
2688 * typdef is its state variable.
2692 tnone
, /* nothing seen */
2693 tkeyseen
, /* typedef keyword seen */
2694 ttypeseen
, /* defined type seen */
2695 tinbody
, /* inside typedef body */
2696 tend
, /* just before typedef tag */
2697 tignore
/* junk after typedef tag */
2701 * struct-like structures (enum, struct and union) are recognized
2702 * using another simple finite automaton. `structdef' is its state
2707 snone
, /* nothing seen yet,
2708 or in struct body if bracelev > 0 */
2709 skeyseen
, /* struct-like keyword seen */
2710 stagseen
, /* struct-like tag seen */
2711 scolonseen
/* colon seen after struct-like tag */
2715 * When objdef is different from onone, objtag is the name of the class.
2717 static const char *objtag
= "<uninited>";
2720 * Yet another little state machine to deal with preprocessor lines.
2724 dnone
, /* nothing seen */
2725 dsharpseen
, /* '#' seen as first char on line */
2726 ddefineseen
, /* '#' and 'define' seen */
2727 dignorerest
/* ignore rest of line */
2731 * State machine for Objective C protocols and implementations.
2732 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2736 onone
, /* nothing seen */
2737 oprotocol
, /* @interface or @protocol seen */
2738 oimplementation
, /* @implementations seen */
2739 otagseen
, /* class name seen */
2740 oparenseen
, /* parenthesis before category seen */
2741 ocatseen
, /* category name seen */
2742 oinbody
, /* in @implementation body */
2743 omethodsign
, /* in @implementation body, after +/- */
2744 omethodtag
, /* after method name */
2745 omethodcolon
, /* after method colon */
2746 omethodparm
, /* after method parameter */
2747 oignore
/* wait for @end */
2752 * Use this structure to keep info about the token read, and how it
2753 * should be tagged. Used by the make_C_tag function to build a tag.
2757 char *line
; /* string containing the token */
2758 int offset
; /* where the token starts in LINE */
2759 int length
; /* token length */
2761 The previous members can be used to pass strings around for generic
2762 purposes. The following ones specifically refer to creating tags. In this
2763 case the token contained here is the pattern that will be used to create a
2766 bool valid
; /* do not create a tag; the token should be
2767 invalidated whenever a state machine is
2768 reset prematurely */
2769 bool named
; /* create a named tag */
2770 int lineno
; /* source line number of tag */
2771 long linepos
; /* source char number of tag */
2772 } token
; /* latest token read */
2775 * Variables and functions for dealing with nested structures.
2776 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2778 static void pushclass_above (int, char *, int);
2779 static void popclass_above (int);
2780 static void write_classname (linebuffer
*, const char *qualifier
);
2783 char **cname
; /* nested class names */
2784 int *bracelev
; /* nested class brace level */
2785 int nl
; /* class nesting level (elements used) */
2786 int size
; /* length of the array */
2787 } cstack
; /* stack for nested declaration tags */
2788 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2789 #define nestlev (cstack.nl)
2790 /* After struct keyword or in struct body, not inside a nested function. */
2791 #define instruct (structdef == snone && nestlev > 0 \
2792 && bracelev == cstack.bracelev[nestlev-1] + 1)
2795 pushclass_above (int bracelev
, char *str
, int len
)
2799 popclass_above (bracelev
);
2801 if (nl
>= cstack
.size
)
2803 int size
= cstack
.size
*= 2;
2804 xrnew (cstack
.cname
, size
, char *);
2805 xrnew (cstack
.bracelev
, size
, int);
2807 assert (nl
== 0 || cstack
.bracelev
[nl
-1] < bracelev
);
2808 cstack
.cname
[nl
] = (str
== NULL
) ? NULL
: savenstr (str
, len
);
2809 cstack
.bracelev
[nl
] = bracelev
;
2814 popclass_above (int bracelev
)
2818 for (nl
= cstack
.nl
- 1;
2819 nl
>= 0 && cstack
.bracelev
[nl
] >= bracelev
;
2822 free (cstack
.cname
[nl
]);
2828 write_classname (linebuffer
*cn
, const char *qualifier
)
2831 int qlen
= strlen (qualifier
);
2833 if (cstack
.nl
== 0 || cstack
.cname
[0] == NULL
)
2837 cn
->buffer
[0] = '\0';
2841 len
= strlen (cstack
.cname
[0]);
2842 linebuffer_setlen (cn
, len
);
2843 strcpy (cn
->buffer
, cstack
.cname
[0]);
2845 for (i
= 1; i
< cstack
.nl
; i
++)
2847 char *s
= cstack
.cname
[i
];
2850 linebuffer_setlen (cn
, len
+ qlen
+ strlen (s
));
2851 len
+= sprintf (cn
->buffer
+ len
, "%s%s", qualifier
, s
);
2856 static bool consider_token (char *, int, int, int *, int, int, bool *);
2857 static void make_C_tag (bool);
2861 * checks to see if the current token is at the start of a
2862 * function or variable, or corresponds to a typedef, or
2863 * is a struct/union/enum tag, or #define, or an enum constant.
2865 * *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2866 * with args. C_EXTP points to which language we are looking at.
2877 consider_token (char *str
, int len
, int c
, int *c_extp
,
2878 int bracelev
, int parlev
, bool *is_func_or_var
)
2879 /* IN: token pointer */
2880 /* IN: token length */
2881 /* IN: first char after the token */
2882 /* IN, OUT: C extensions mask */
2883 /* IN: brace level */
2884 /* IN: parenthesis level */
2885 /* OUT: function or variable found */
2887 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2888 structtype is the type of the preceding struct-like keyword, and
2889 structbracelev is the brace level where it has been seen. */
2890 static enum sym_type structtype
;
2891 static int structbracelev
;
2892 static enum sym_type toktype
;
2895 toktype
= C_symtype (str
, len
, *c_extp
);
2898 * Skip __attribute__
2900 if (toktype
== st_C_attribute
)
2909 if (toktype
== st_C_enum_bf
&& definedef
== dnone
)
2916 * Advance the definedef state machine.
2921 /* We're not on a preprocessor line. */
2922 if (toktype
== st_C_gnumacro
)
2929 if (toktype
== st_C_define
)
2931 definedef
= ddefineseen
;
2935 definedef
= dignorerest
;
2940 * Make a tag for any macro, unless it is a constant
2941 * and constantypedefs is false.
2943 definedef
= dignorerest
;
2944 *is_func_or_var
= (c
== '(');
2945 if (!*is_func_or_var
&& !constantypedefs
)
2952 error ("internal error: definedef value.");
2961 if (toktype
== st_C_typedef
)
2984 if (structdef
== snone
&& fvdef
== fvnone
)
3006 case st_C_javastruct
:
3007 if (structdef
== stagseen
)
3008 structdef
= scolonseen
;
3012 if ((*c_extp
& C_AUTO
) /* automatic detection of C++ language */
3014 && definedef
== dnone
&& structdef
== snone
3015 && typdef
== tnone
&& fvdef
== fvnone
)
3016 *c_extp
= (*c_extp
| C_PLPL
) & ~C_AUTO
;
3017 if (toktype
== st_C_template
)
3024 && (typdef
== tkeyseen
3025 || (typedefs_or_cplusplus
&& structdef
== snone
)))
3027 structdef
= skeyseen
;
3028 structtype
= toktype
;
3029 structbracelev
= bracelev
;
3030 if (fvdef
== fvnameseen
)
3038 if (structdef
== skeyseen
)
3040 structdef
= stagseen
;
3044 if (typdef
!= tnone
)
3047 /* Detect Objective C constructs. */
3057 objdef
= oimplementation
;
3063 case oimplementation
:
3064 /* Save the class tag for functions or variables defined inside. */
3065 objtag
= savenstr (str
, len
);
3069 /* Save the class tag for categories. */
3070 objtag
= savenstr (str
, len
);
3072 *is_func_or_var
= true;
3076 *is_func_or_var
= true;
3084 objdef
= omethodtag
;
3085 linebuffer_setlen (&token_name
, len
);
3086 memcpy (token_name
.buffer
, str
, len
);
3087 token_name
.buffer
[len
] = '\0';
3093 objdef
= omethodparm
;
3098 objdef
= omethodtag
;
3101 int oldlen
= token_name
.len
;
3103 linebuffer_setlen (&token_name
, oldlen
+ len
);
3104 memcpy (token_name
.buffer
+ oldlen
, str
, len
);
3105 token_name
.buffer
[oldlen
+ len
] = '\0';
3111 if (toktype
== st_C_objend
)
3113 /* Memory leakage here: the string pointed by objtag is
3114 never released, because many tests would be needed to
3115 avoid breaking on incorrect input code. The amount of
3116 memory leaked here is the sum of the lengths of the
3126 /* A function, variable or enum constant? */
3148 *is_func_or_var
= true;
3152 && structdef
== snone
3153 && structtype
== st_C_enum
&& bracelev
> structbracelev
3154 /* Don't tag tokens in expressions that assign values to enum
3156 && fvdef
!= vignore
)
3157 return true; /* enum constant */
3163 fvdef
= fdefunname
; /* GNU macro */
3164 *is_func_or_var
= true;
3172 if ((strneq (str
, "asm", 3) && endtoken (str
[3]))
3173 || (strneq (str
, "__asm__", 7) && endtoken (str
[7])))
3184 if (len
>= 10 && strneq (str
+len
-10, "::operator", 10))
3186 if (*c_extp
& C_AUTO
) /* automatic detection of C++ */
3187 *c_extp
= (*c_extp
| C_PLPL
) & ~C_AUTO
;
3189 *is_func_or_var
= true;
3192 if (bracelev
> 0 && !instruct
)
3194 fvdef
= fvnameseen
; /* function or variable */
3195 *is_func_or_var
= true;
3210 * C_entries often keeps pointers to tokens or lines which are older than
3211 * the line currently read. By keeping two line buffers, and switching
3212 * them at end of line, it is possible to use those pointers.
3220 #define current_lb_is_new (newndx == curndx)
3221 #define switch_line_buffers() (curndx = 1 - curndx)
3223 #define curlb (lbs[curndx].lb)
3224 #define newlb (lbs[newndx].lb)
3225 #define curlinepos (lbs[curndx].linepos)
3226 #define newlinepos (lbs[newndx].linepos)
3228 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3229 #define cplpl (c_ext & C_PLPL)
3230 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3232 #define CNL_SAVE_DEFINEDEF() \
3234 curlinepos = charno; \
3235 readline (&curlb, inf); \
3236 lp = curlb.buffer; \
3243 CNL_SAVE_DEFINEDEF (); \
3244 if (savetoken.valid) \
3246 token = savetoken; \
3247 savetoken.valid = false; \
3249 definedef = dnone; \
3254 make_C_tag (bool isfun
)
3256 /* This function is never called when token.valid is false, but
3257 we must protect against invalid input or internal errors. */
3259 make_tag (token_name
.buffer
, token_name
.len
, isfun
, token
.line
,
3260 token
.offset
+token
.length
+1, token
.lineno
, token
.linepos
);
3262 { /* this branch is optimized away if !DEBUG */
3263 make_tag (concat ("INVALID TOKEN:-->", token_name
.buffer
, ""),
3264 token_name
.len
+ 17, isfun
, token
.line
,
3265 token
.offset
+token
.length
+1, token
.lineno
, token
.linepos
);
3266 error ("INVALID TOKEN");
3269 token
.valid
= false;
3273 perhaps_more_input (FILE *inf
)
3275 return !feof (inf
) && !ferror (inf
);
3281 * This routine finds functions, variables, typedefs,
3282 * #define's, enum constants and struct/union/enum definitions in
3283 * C syntax and adds them to the list.
3286 C_entries (int c_ext
, FILE *inf
)
3287 /* extension of C */
3290 register char c
; /* latest char read; '\0' for end of line */
3291 register char *lp
; /* pointer one beyond the character `c' */
3292 int curndx
, newndx
; /* indices for current and new lb */
3293 register int tokoff
; /* offset in line of start of current token */
3294 register int toklen
; /* length of current token */
3295 const char *qualifier
; /* string used to qualify names */
3296 int qlen
; /* length of qualifier */
3297 int bracelev
; /* current brace level */
3298 int bracketlev
; /* current bracket level */
3299 int parlev
; /* current parenthesis level */
3300 int attrparlev
; /* __attribute__ parenthesis level */
3301 int templatelev
; /* current template level */
3302 int typdefbracelev
; /* bracelev where a typedef struct body begun */
3303 bool incomm
, inquote
, inchar
, quotednl
, midtoken
;
3304 bool yacc_rules
; /* in the rules part of a yacc file */
3305 struct tok savetoken
= {0}; /* token saved during preprocessor handling */
3308 linebuffer_init (&lbs
[0].lb
);
3309 linebuffer_init (&lbs
[1].lb
);
3310 if (cstack
.size
== 0)
3312 cstack
.size
= (DEBUG
) ? 1 : 4;
3314 cstack
.cname
= xnew (cstack
.size
, char *);
3315 cstack
.bracelev
= xnew (cstack
.size
, int);
3318 tokoff
= toklen
= typdefbracelev
= 0; /* keep compiler quiet */
3319 curndx
= newndx
= 0;
3323 fvdef
= fvnone
; fvextern
= false; typdef
= tnone
;
3324 structdef
= snone
; definedef
= dnone
; objdef
= onone
;
3326 midtoken
= inquote
= inchar
= incomm
= quotednl
= false;
3327 token
.valid
= savetoken
.valid
= false;
3328 bracelev
= bracketlev
= parlev
= attrparlev
= templatelev
= 0;
3330 { qualifier
= "."; qlen
= 1; }
3332 { qualifier
= "::"; qlen
= 2; }
3335 while (perhaps_more_input (inf
))
3340 /* If we are at the end of the line, the next character is a
3341 '\0'; do not skip it, because it is what tells us
3342 to read the next line. */
3363 /* Newlines inside comments do not end macro definitions in
3365 CNL_SAVE_DEFINEDEF ();
3378 /* Newlines inside strings do not end macro definitions
3379 in traditional cpp, even though compilers don't
3380 usually accept them. */
3381 CNL_SAVE_DEFINEDEF ();
3391 /* Hmmm, something went wrong. */
3427 if (fvdef
!= finlist
&& fvdef
!= fignore
&& fvdef
!= vignore
)
3442 else if (/* cplpl && */ *lp
== '/')
3448 if ((c_ext
& YACC
) && *lp
== '%')
3450 /* Entering or exiting rules section in yacc file. */
3452 definedef
= dnone
; fvdef
= fvnone
; fvextern
= false;
3453 typdef
= tnone
; structdef
= snone
;
3454 midtoken
= inquote
= inchar
= incomm
= quotednl
= false;
3456 yacc_rules
= !yacc_rules
;
3462 if (definedef
== dnone
)
3465 bool cpptoken
= true;
3467 /* Look back on this line. If all blanks, or nonblanks
3468 followed by an end of comment, this is a preprocessor
3470 for (cp
= newlb
.buffer
; cp
< lp
-1; cp
++)
3471 if (!c_isspace (*cp
))
3473 if (*cp
== '*' && cp
[1] == '/')
3483 definedef
= dsharpseen
;
3484 /* This is needed for tagging enum values: when there are
3485 preprocessor conditionals inside the enum, we need to
3486 reset the value of fvdef so that the next enum value is
3487 tagged even though the one before it did not end in a
3489 if (fvdef
== vignore
&& instruct
&& parlev
== 0)
3491 if (strneq (cp
, "#if", 3) || strneq (cp
, "#el", 3))
3495 } /* if (definedef == dnone) */
3506 CNL_SAVE_DEFINEDEF ();
3513 /* Consider token only if some involved conditions are satisfied. */
3514 if (typdef
!= tignore
3515 && definedef
!= dignorerest
3518 && (definedef
!= dnone
3519 || structdef
!= scolonseen
)
3527 if (c
== ':' && *lp
== ':' && begtoken (lp
[1]))
3528 /* This handles :: in the middle,
3529 but not at the beginning of an identifier.
3530 Also, space-separated :: is not recognized. */
3532 if (c_ext
& C_AUTO
) /* automatic detection of C++ */
3533 c_ext
= (c_ext
| C_PLPL
) & ~C_AUTO
;
3537 goto still_in_token
;
3541 bool funorvar
= false;
3544 || consider_token (newlb
.buffer
+ tokoff
, toklen
, c
,
3545 &c_ext
, bracelev
, parlev
,
3548 if (fvdef
== foperator
)
3551 lp
= skip_spaces (lp
-1);
3555 && !c_isspace (*lp
) && *lp
!= '(')
3558 toklen
+= lp
- oldlp
;
3560 token
.named
= false;
3562 && nestlev
> 0 && definedef
== dnone
)
3563 /* in struct body */
3568 write_classname (&token_name
, qualifier
);
3569 len
= token_name
.len
;
3570 linebuffer_setlen (&token_name
,
3571 len
+ qlen
+ toklen
);
3572 sprintf (token_name
.buffer
+ len
, "%s%.*s",
3574 newlb
.buffer
+ tokoff
);
3578 linebuffer_setlen (&token_name
, toklen
);
3579 sprintf (token_name
.buffer
, "%.*s",
3580 toklen
, newlb
.buffer
+ tokoff
);
3584 else if (objdef
== ocatseen
)
3585 /* Objective C category */
3589 int len
= strlen (objtag
) + 2 + toklen
;
3590 linebuffer_setlen (&token_name
, len
);
3591 sprintf (token_name
.buffer
, "%s(%.*s)",
3593 newlb
.buffer
+ tokoff
);
3597 linebuffer_setlen (&token_name
, toklen
);
3598 sprintf (token_name
.buffer
, "%.*s",
3599 toklen
, newlb
.buffer
+ tokoff
);
3603 else if (objdef
== omethodtag
3604 || objdef
== omethodparm
)
3605 /* Objective C method */
3609 else if (fvdef
== fdefunname
)
3610 /* GNU DEFUN and similar macros */
3612 bool defun
= (newlb
.buffer
[tokoff
] == 'F');
3621 /* First, tag it as its C name */
3622 linebuffer_setlen (&token_name
, toklen
);
3623 memcpy (token_name
.buffer
,
3624 newlb
.buffer
+ tokoff
, toklen
);
3625 token_name
.buffer
[toklen
] = '\0';
3627 token
.lineno
= lineno
;
3628 token
.offset
= tokoff
;
3629 token
.length
= toklen
;
3630 token
.line
= newlb
.buffer
;
3631 token
.linepos
= newlinepos
;
3633 make_C_tag (funorvar
);
3635 /* Rewrite the tag so that emacs lisp DEFUNs
3636 can be found also by their elisp name */
3637 linebuffer_setlen (&token_name
, len
);
3638 memcpy (token_name
.buffer
,
3639 newlb
.buffer
+ off
, len
);
3640 token_name
.buffer
[len
] = '\0';
3643 if (token_name
.buffer
[len
] == '_')
3644 token_name
.buffer
[len
] = '-';
3645 token
.named
= defun
;
3649 linebuffer_setlen (&token_name
, toklen
);
3650 memcpy (token_name
.buffer
,
3651 newlb
.buffer
+ tokoff
, toklen
);
3652 token_name
.buffer
[toklen
] = '\0';
3653 /* Name macros and members. */
3654 token
.named
= (structdef
== stagseen
3655 || typdef
== ttypeseen
3658 && definedef
== dignorerest
)
3660 && definedef
== dnone
3661 && structdef
== snone
3664 token
.lineno
= lineno
;
3665 token
.offset
= tokoff
;
3666 token
.length
= toklen
;
3667 token
.line
= newlb
.buffer
;
3668 token
.linepos
= newlinepos
;
3671 if (definedef
== dnone
3672 && (fvdef
== fvnameseen
3673 || fvdef
== foperator
3674 || structdef
== stagseen
3676 || typdef
== ttypeseen
3677 || objdef
!= onone
))
3679 if (current_lb_is_new
)
3680 switch_line_buffers ();
3682 else if (definedef
!= dnone
3683 || fvdef
== fdefunname
3685 make_C_tag (funorvar
);
3687 else /* not yacc and consider_token failed */
3689 if (inattribute
&& fvdef
== fignore
)
3691 /* We have just met __attribute__ after a
3692 function parameter list: do not tag the
3699 } /* if (endtoken (c)) */
3700 else if (intoken (c
))
3706 } /* if (midtoken) */
3707 else if (begtoken (c
))
3715 /* This prevents tagging fb in
3716 void (__attribute__((noreturn)) *fb) (void);
3717 Fixing this is not easy and not very important. */
3721 if (plainc
|| declarations
)
3723 make_C_tag (true); /* a function */
3730 if (structdef
== stagseen
&& !cjava
)
3732 popclass_above (bracelev
);
3742 if (!yacc_rules
|| lp
== newlb
.buffer
+ 1)
3744 tokoff
= lp
- 1 - newlb
.buffer
;
3749 } /* if (begtoken) */
3750 } /* if must look at token */
3753 /* Detect end of line, colon, comma, semicolon and various braces
3754 after having handled a token.*/
3760 if (yacc_rules
&& token
.offset
== 0 && token
.valid
)
3762 make_C_tag (false); /* a yacc function */
3765 if (definedef
!= dnone
)
3771 make_C_tag (true); /* an Objective C class */
3775 objdef
= omethodcolon
;
3778 int toklen
= token_name
.len
;
3779 linebuffer_setlen (&token_name
, toklen
+ 1);
3780 strcpy (token_name
.buffer
+ toklen
, ":");
3786 if (structdef
== stagseen
)
3788 structdef
= scolonseen
;
3791 /* Should be useless, but may be work as a safety net. */
3792 if (cplpl
&& fvdef
== flistseen
)
3794 make_C_tag (true); /* a function */
3800 if (definedef
!= dnone
|| inattribute
)
3806 make_C_tag (false); /* a typedef */
3816 if (typdef
== tignore
|| cplpl
)
3820 if ((globals
&& bracelev
== 0 && (!fvextern
|| declarations
))
3821 || (members
&& instruct
))
3822 make_C_tag (false); /* a variable */
3825 token
.valid
= false;
3829 && (cplpl
|| !instruct
)
3830 && (typdef
== tnone
|| (typdef
!= tignore
&& instruct
)))
3832 && plainc
&& instruct
))
3833 make_C_tag (true); /* a function */
3839 && cplpl
&& structdef
== stagseen
)
3840 make_C_tag (false); /* forward declaration */
3842 token
.valid
= false;
3843 } /* switch (fvdef) */
3849 if (structdef
== stagseen
)
3853 if (definedef
!= dnone
|| inattribute
)
3859 make_C_tag (true); /* an Objective C method */
3874 if (instruct
&& parlev
== 0)
3885 && (!fvextern
|| declarations
))
3886 || (members
&& instruct
)))
3887 make_C_tag (false); /* a variable */
3890 if ((declarations
&& typdef
== tnone
&& !instruct
)
3891 || (members
&& typdef
!= tignore
&& instruct
))
3893 make_C_tag (true); /* a function */
3896 else if (!declarations
)
3898 token
.valid
= false;
3903 if (structdef
== stagseen
)
3907 if (definedef
!= dnone
|| inattribute
)
3909 if (structdef
== stagseen
)
3916 make_C_tag (false); /* a typedef */
3928 if ((members
&& bracelev
== 1)
3929 || (globals
&& bracelev
== 0
3930 && (!fvextern
|| declarations
)))
3931 make_C_tag (false); /* a variable */
3947 if (definedef
!= dnone
)
3949 if (objdef
== otagseen
&& parlev
== 0)
3950 objdef
= oparenseen
;
3954 if (typdef
== ttypeseen
3958 /* This handles constructs like:
3959 typedef void OperatorFun (int fun); */
3980 if (--attrparlev
== 0)
3981 inattribute
= false;
3990 if (definedef
!= dnone
)
3992 if (objdef
== ocatseen
&& parlev
== 1)
3994 make_C_tag (true); /* an Objective C category */
4010 || typdef
== ttypeseen
))
4013 make_C_tag (false); /* a typedef */
4016 else if (parlev
< 0) /* can happen due to ill-conceived #if's. */
4020 if (definedef
!= dnone
)
4022 if (typdef
== ttypeseen
)
4024 /* Whenever typdef is set to tinbody (currently only
4025 here), typdefbracelev should be set to bracelev. */
4027 typdefbracelev
= bracelev
;
4032 if (cplpl
&& !class_qualify
)
4034 /* Remove class and namespace qualifiers from the token,
4035 leaving only the method/member name. */
4036 char *cc
, *uqname
= token_name
.buffer
;
4037 char *tok_end
= token_name
.buffer
+ token_name
.len
;
4039 for (cc
= token_name
.buffer
; cc
< tok_end
; cc
++)
4041 if (*cc
== ':' && cc
[1] == ':')
4047 if (uqname
> token_name
.buffer
)
4049 int uqlen
= strlen (uqname
);
4050 linebuffer_setlen (&token_name
, uqlen
);
4051 memmove (token_name
.buffer
, uqname
, uqlen
+ 1);
4054 make_C_tag (true); /* a function */
4063 make_C_tag (true); /* an Objective C class */
4068 make_C_tag (true); /* an Objective C method */
4072 /* Neutralize `extern "C" {' grot. */
4073 if (bracelev
== 0 && structdef
== snone
&& nestlev
== 0
4083 case skeyseen
: /* unnamed struct */
4084 pushclass_above (bracelev
, NULL
, 0);
4087 case stagseen
: /* named struct or enum */
4088 case scolonseen
: /* a class */
4089 pushclass_above (bracelev
,token
.line
+token
.offset
, token
.length
);
4091 make_C_tag (false); /* a struct or enum */
4099 if (definedef
!= dnone
)
4101 if (fvdef
== fstartlist
)
4103 fvdef
= fvnone
; /* avoid tagging `foo' in `foo (*bar()) ()' */
4104 token
.valid
= false;
4108 if (definedef
!= dnone
)
4111 if (!ignoreindent
&& lp
== newlb
.buffer
+ 1)
4114 token
.valid
= false; /* unexpected value, token unreliable */
4115 bracelev
= 0; /* reset brace level if first column */
4116 parlev
= 0; /* also reset paren level, just in case... */
4118 else if (bracelev
< 0)
4120 token
.valid
= false; /* something gone amiss, token unreliable */
4123 if (bracelev
== 0 && fvdef
== vignore
)
4124 fvdef
= fvnone
; /* end of function */
4125 popclass_above (bracelev
);
4127 /* Only if typdef == tinbody is typdefbracelev significant. */
4128 if (typdef
== tinbody
&& bracelev
<= typdefbracelev
)
4130 assert (bracelev
== typdefbracelev
);
4135 if (definedef
!= dnone
)
4145 if ((members
&& bracelev
== 1)
4146 || (globals
&& bracelev
== 0 && (!fvextern
|| declarations
)))
4147 make_C_tag (false); /* a variable */
4155 && (structdef
== stagseen
|| fvdef
== fvnameseen
))
4162 if (templatelev
> 0)
4170 if (objdef
== oinbody
&& bracelev
== 0)
4172 objdef
= omethodsign
;
4177 case '#': case '~': case '&': case '%': case '/':
4178 case '|': case '^': case '!': case '.': case '?':
4179 if (definedef
!= dnone
)
4181 /* These surely cannot follow a function tag in C. */
4194 if (objdef
== otagseen
)
4196 make_C_tag (true); /* an Objective C class */
4199 /* If a macro spans multiple lines don't reset its state. */
4201 CNL_SAVE_DEFINEDEF ();
4207 } /* while not eof */
4209 free (lbs
[0].lb
.buffer
);
4210 free (lbs
[1].lb
.buffer
);
4214 * Process either a C++ file or a C file depending on the setting
4218 default_C_entries (FILE *inf
)
4220 C_entries (cplusplus
? C_PLPL
: C_AUTO
, inf
);
4223 /* Always do plain C. */
4225 plain_C_entries (FILE *inf
)
4230 /* Always do C++. */
4232 Cplusplus_entries (FILE *inf
)
4234 C_entries (C_PLPL
, inf
);
4237 /* Always do Java. */
4239 Cjava_entries (FILE *inf
)
4241 C_entries (C_JAVA
, inf
);
4246 Cstar_entries (FILE *inf
)
4248 C_entries (C_STAR
, inf
);
4251 /* Always do Yacc. */
4253 Yacc_entries (FILE *inf
)
4255 C_entries (YACC
, inf
);
4259 /* Useful macros. */
4260 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4261 while (perhaps_more_input (file_pointer) \
4262 && (readline (&(line_buffer), file_pointer), \
4263 (char_pointer) = (line_buffer).buffer, \
4266 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4267 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4268 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4269 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
4270 && ((cp) = skip_spaces ((cp) + sizeof (kw) - 1), true)) /* skip spaces */
4272 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4273 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4274 ((assert ("" kw), true) /* syntax error if not a literal string */ \
4275 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
4276 && ((cp) += sizeof (kw) - 1, true)) /* skip spaces */
4279 * Read a file, but do no processing. This is used to do regexp
4280 * matching on files that have no language defined.
4283 just_read_file (FILE *inf
)
4285 while (perhaps_more_input (inf
))
4286 readline (&lb
, inf
);
4290 /* Fortran parsing */
4292 static void F_takeprec (void);
4293 static void F_getit (FILE *);
4298 dbp
= skip_spaces (dbp
);
4302 dbp
= skip_spaces (dbp
);
4303 if (strneq (dbp
, "(*)", 3))
4308 if (!c_isdigit (*dbp
))
4310 --dbp
; /* force failure */
4315 while (c_isdigit (*dbp
));
4323 dbp
= skip_spaces (dbp
);
4326 readline (&lb
, inf
);
4331 dbp
= skip_spaces (dbp
);
4333 if (!c_isalpha (*dbp
) && *dbp
!= '_' && *dbp
!= '$')
4335 for (cp
= dbp
+ 1; *cp
!= '\0' && intoken (*cp
); cp
++)
4337 make_tag (dbp
, cp
-dbp
, true,
4338 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4343 Fortran_functions (FILE *inf
)
4345 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
4348 dbp
++; /* Ratfor escape to fortran */
4349 dbp
= skip_spaces (dbp
);
4353 if (LOOKING_AT_NOCASE (dbp
, "recursive"))
4354 dbp
= skip_spaces (dbp
);
4356 if (LOOKING_AT_NOCASE (dbp
, "pure"))
4357 dbp
= skip_spaces (dbp
);
4359 if (LOOKING_AT_NOCASE (dbp
, "elemental"))
4360 dbp
= skip_spaces (dbp
);
4362 switch (c_tolower (*dbp
))
4365 if (nocase_tail ("integer"))
4369 if (nocase_tail ("real"))
4373 if (nocase_tail ("logical"))
4377 if (nocase_tail ("complex") || nocase_tail ("character"))
4381 if (nocase_tail ("double"))
4383 dbp
= skip_spaces (dbp
);
4386 if (nocase_tail ("precision"))
4392 dbp
= skip_spaces (dbp
);
4395 switch (c_tolower (*dbp
))
4398 if (nocase_tail ("function"))
4402 if (nocase_tail ("subroutine"))
4406 if (nocase_tail ("entry"))
4410 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4412 dbp
= skip_spaces (dbp
);
4413 if (*dbp
== '\0') /* assume un-named */
4414 make_tag ("blockdata", 9, true,
4415 lb
.buffer
, dbp
- lb
.buffer
, lineno
, linecharno
);
4417 F_getit (inf
); /* look for name */
4426 * Go language support
4427 * Original code by Xi Lu <lx@shellcodes.org> (2016)
4430 Go_functions(FILE *inf
)
4434 LOOP_ON_INPUT_LINES(inf
, lb
, cp
)
4436 cp
= skip_spaces (cp
);
4438 if (LOOKING_AT (cp
, "package"))
4441 while (!notinname (*cp
) && *cp
!= '\0')
4443 make_tag (name
, cp
- name
, false, lb
.buffer
,
4444 cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4446 else if (LOOKING_AT (cp
, "func"))
4448 /* Go implementation of interface, such as:
4449 func (n *Integer) Add(m Integer) ...
4450 skip `(n *Integer)` part.
4456 cp
= skip_spaces (cp
+1);
4463 while (!notinname (*cp
))
4466 make_tag (name
, cp
- name
, true, lb
.buffer
,
4467 cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4470 else if (members
&& LOOKING_AT (cp
, "type"))
4474 /* Ignore the likes of the following:
4482 while (!notinname (*cp
) && *cp
!= '\0')
4485 make_tag (name
, cp
- name
, false, lb
.buffer
,
4486 cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4495 * Philippe Waroquiers (1998)
4498 /* Once we are positioned after an "interesting" keyword, let's get
4499 the real tag value necessary. */
4501 Ada_getit (FILE *inf
, const char *name_qualifier
)
4507 while (perhaps_more_input (inf
))
4509 dbp
= skip_spaces (dbp
);
4511 || (dbp
[0] == '-' && dbp
[1] == '-'))
4513 readline (&lb
, inf
);
4516 switch (c_tolower (*dbp
))
4519 if (nocase_tail ("body"))
4521 /* Skipping body of procedure body or package body or ....
4522 resetting qualifier to body instead of spec. */
4523 name_qualifier
= "/b";
4528 /* Skipping type of task type or protected type ... */
4529 if (nocase_tail ("type"))
4536 for (cp
= dbp
; *cp
!= '\0' && *cp
!= '"'; cp
++)
4541 dbp
= skip_spaces (dbp
);
4543 c_isalnum (*cp
) || *cp
== '_' || *cp
== '.';
4551 name
= concat (dbp
, name_qualifier
, "");
4553 make_tag (name
, strlen (name
), true,
4554 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4563 Ada_funcs (FILE *inf
)
4565 bool inquote
= false;
4566 bool skip_till_semicolumn
= false;
4568 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
4570 while (*dbp
!= '\0')
4572 /* Skip a string i.e. "abcd". */
4573 if (inquote
|| (*dbp
== '"'))
4575 dbp
= strchr (dbp
+ !inquote
, '"');
4580 continue; /* advance char */
4585 break; /* advance line */
4589 /* Skip comments. */
4590 if (dbp
[0] == '-' && dbp
[1] == '-')
4591 break; /* advance line */
4593 /* Skip character enclosed in single quote i.e. 'a'
4594 and skip single quote starting an attribute i.e. 'Image. */
4603 if (skip_till_semicolumn
)
4606 skip_till_semicolumn
= false;
4608 continue; /* advance char */
4611 /* Search for beginning of a token. */
4612 if (!begtoken (*dbp
))
4615 continue; /* advance char */
4618 /* We are at the beginning of a token. */
4619 switch (c_tolower (*dbp
))
4622 if (!packages_only
&& nocase_tail ("function"))
4623 Ada_getit (inf
, "/f");
4625 break; /* from switch */
4626 continue; /* advance char */
4628 if (!packages_only
&& nocase_tail ("procedure"))
4629 Ada_getit (inf
, "/p");
4630 else if (nocase_tail ("package"))
4631 Ada_getit (inf
, "/s");
4632 else if (nocase_tail ("protected")) /* protected type */
4633 Ada_getit (inf
, "/t");
4635 break; /* from switch */
4636 continue; /* advance char */
4639 if (typedefs
&& !packages_only
&& nocase_tail ("use"))
4641 /* when tagging types, avoid tagging use type Pack.Typename;
4642 for this, we will skip everything till a ; */
4643 skip_till_semicolumn
= true;
4644 continue; /* advance char */
4648 if (!packages_only
&& nocase_tail ("task"))
4649 Ada_getit (inf
, "/k");
4650 else if (typedefs
&& !packages_only
&& nocase_tail ("type"))
4652 Ada_getit (inf
, "/t");
4653 while (*dbp
!= '\0')
4657 break; /* from switch */
4658 continue; /* advance char */
4661 /* Look for the end of the token. */
4662 while (!endtoken (*dbp
))
4665 } /* advance char */
4666 } /* advance line */
4671 * Unix and microcontroller assembly tag handling
4672 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4673 * Idea by Bob Weiner, Motorola Inc. (1994)
4676 Asm_labels (FILE *inf
)
4680 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4682 /* If first char is alphabetic or one of [_.$], test for colon
4683 following identifier. */
4684 if (c_isalpha (*cp
) || *cp
== '_' || *cp
== '.' || *cp
== '$')
4686 /* Read past label. */
4688 while (c_isalnum (*cp
) || *cp
== '_' || *cp
== '.' || *cp
== '$')
4690 if (*cp
== ':' || c_isspace (*cp
))
4691 /* Found end of label, so copy it and add it to the table. */
4692 make_tag (lb
.buffer
, cp
- lb
.buffer
, true,
4693 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4701 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4702 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4703 * Perl variable names: /^(my|local).../
4704 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4705 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4706 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4709 Perl_functions (FILE *inf
)
4711 char *package
= savestr ("main"); /* current package name */
4714 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4716 cp
= skip_spaces (cp
);
4718 if (LOOKING_AT (cp
, "package"))
4721 get_tag (cp
, &package
);
4723 else if (LOOKING_AT (cp
, "sub"))
4729 while (!notinname (*cp
))
4732 continue; /* nothing found */
4733 pos
= strchr (sp
, ':');
4734 if (pos
&& pos
< cp
&& pos
[1] == ':')
4736 /* The name is already qualified. */
4739 char *q
= pos
+ 2, *qpos
;
4740 while ((qpos
= strchr (q
, ':')) != NULL
4746 make_tag (sp
, cp
- sp
, true,
4747 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4749 else if (class_qualify
)
4752 char savechar
, *name
;
4756 name
= concat (package
, "::", sp
);
4758 make_tag (name
, strlen (name
), true,
4759 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4763 make_tag (sp
, cp
- sp
, true,
4764 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4766 else if (LOOKING_AT (cp
, "use constant")
4767 || LOOKING_AT (cp
, "use constant::defer"))
4769 /* For hash style multi-constant like
4770 use constant { FOO => 123,
4772 only the first FOO is picked up. Parsing across the value
4773 expressions would be difficult in general, due to possible nested
4774 hashes, here-documents, etc. */
4776 cp
= skip_spaces (cp
+1);
4779 else if (globals
) /* only if we are tagging global vars */
4781 /* Skip a qualifier, if any. */
4782 bool qual
= LOOKING_AT (cp
, "my") || LOOKING_AT (cp
, "local");
4783 /* After "my" or "local", but before any following paren or space. */
4784 char *varstart
= cp
;
4786 if (qual
/* should this be removed? If yes, how? */
4787 && (*cp
== '$' || *cp
== '@' || *cp
== '%'))
4792 while (c_isalnum (*cp
) || *cp
== '_');
4796 /* Should be examining a variable list at this point;
4797 could insist on seeing an open parenthesis. */
4798 while (*cp
!= '\0' && *cp
!= ';' && *cp
!= '=' && *cp
!= ')')
4804 make_tag (varstart
, cp
- varstart
, false,
4805 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4814 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4815 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4816 * More ideas by seb bacon <seb@jamkit.com> (2002)
4819 Python_functions (FILE *inf
)
4823 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4825 cp
= skip_spaces (cp
);
4826 if (LOOKING_AT (cp
, "def") || LOOKING_AT (cp
, "class"))
4829 while (!notinname (*cp
) && *cp
!= ':')
4831 make_tag (name
, cp
- name
, true,
4832 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4839 * Original code by Xi Lu <lx@shellcodes.org> (2015)
4842 Ruby_functions (FILE *inf
)
4845 bool reader
= false, writer
= false, alias
= false, continuation
= false;
4847 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4849 bool is_class
= false;
4850 bool is_method
= false;
4853 cp
= skip_spaces (cp
);
4856 && c_isalpha (*cp
) && c_isupper (*cp
))
4858 char *bp
, *colon
= NULL
;
4862 for (cp
++; c_isalnum (*cp
) || *cp
== '_' || *cp
== ':'; cp
++)
4869 bp
= skip_spaces (cp
);
4870 if (*bp
== '=' && !(bp
[1] == '=' || bp
[1] == '>'))
4872 if (colon
&& !c_isspace (colon
[1]))
4874 make_tag (name
, cp
- name
, false,
4875 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4879 else if (!continuation
4880 /* Modules, classes, methods. */
4881 && ((is_method
= LOOKING_AT (cp
, "def"))
4882 || (is_class
= LOOKING_AT (cp
, "class"))
4883 || LOOKING_AT (cp
, "module")))
4885 const char self_name
[] = "self.";
4886 const size_t self_size1
= sizeof (self_name
) - 1;
4890 /* Ruby method names can end in a '='. Also, operator overloading can
4891 define operators whose names include '='. */
4892 while (!notinname (*cp
) || *cp
== '=')
4895 /* Remove "self." from the method name. */
4896 if (cp
- name
> self_size1
4897 && strneq (name
, self_name
, self_size1
))
4900 /* Remove the class/module qualifiers from method names. */
4905 for (q
= name
; q
< cp
&& *q
!= '.'; q
++)
4907 if (q
< cp
- 1) /* punt if we see just "FOO." */
4911 /* Don't tag singleton classes. */
4912 if (is_class
&& strneq (name
, "<<", 2) && cp
== name
+ 2)
4915 make_tag (name
, cp
- name
, true,
4916 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4920 /* Tag accessors and aliases. */
4923 reader
= writer
= alias
= false;
4925 while (*cp
&& *cp
!= '#')
4929 reader
= writer
= alias
= false;
4930 if (LOOKING_AT (cp
, "attr_reader"))
4932 else if (LOOKING_AT (cp
, "attr_writer"))
4934 else if (LOOKING_AT (cp
, "attr_accessor"))
4939 else if (LOOKING_AT (cp
, "alias_method"))
4942 if (reader
|| writer
|| alias
)
4947 cp
= skip_spaces (cp
);
4949 cp
= skip_spaces (cp
+ 1);
4951 cp
= skip_name (cp
);
4957 make_tag (np
, cp
- np
, true,
4958 lb
.buffer
, cp
- lb
.buffer
+ 1,
4959 lineno
, linecharno
);
4960 continuation
= false;
4964 size_t name_len
= cp
- np
+ 1;
4965 char *wr_name
= xnew (name_len
+ 1, char);
4967 memcpy (wr_name
, np
, name_len
- 1);
4968 memcpy (wr_name
+ name_len
- 1, "=", 2);
4969 pfnote (wr_name
, true, lb
.buffer
, cp
- lb
.buffer
+ 1,
4970 lineno
, linecharno
);
4971 continuation
= false;
4976 make_tag (np
, cp
- np
, true,
4977 lb
.buffer
, cp
- lb
.buffer
+ 1,
4978 lineno
, linecharno
);
4979 continuation
= false;
4980 while (*cp
&& *cp
!= '#' && *cp
!= ';')
4983 continuation
= true;
4984 else if (!c_isspace (*cp
))
4985 continuation
= false;
4989 continuation
= false;
4991 cp
= skip_spaces (cp
);
4994 : (continuation
= (*cp
== ',')))
4995 && (cp
= skip_spaces (cp
+ 1), *cp
&& *cp
!= '#'));
4998 cp
= skip_name (cp
);
4999 while (*cp
&& *cp
!= '#' && notinname (*cp
))
5010 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
5011 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
5012 * - /^[ \t]*define\(\"[^\"]+/
5013 * Only with --members:
5014 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
5015 * Idea by Diez B. Roggisch (2001)
5018 PHP_functions (FILE *inf
)
5021 bool search_identifier
= false;
5023 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5025 cp
= skip_spaces (cp
);
5027 if (search_identifier
5030 while (!notinname (*cp
))
5032 make_tag (name
, cp
- name
, true,
5033 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5034 search_identifier
= false;
5036 else if (LOOKING_AT (cp
, "function"))
5039 cp
= skip_spaces (cp
+1);
5043 while (!notinname (*cp
))
5045 make_tag (name
, cp
- name
, true,
5046 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5049 search_identifier
= true;
5051 else if (LOOKING_AT (cp
, "class"))
5056 while (*cp
!= '\0' && !c_isspace (*cp
))
5058 make_tag (name
, cp
- name
, false,
5059 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5062 search_identifier
= true;
5064 else if (strneq (cp
, "define", 6)
5065 && (cp
= skip_spaces (cp
+6))
5067 && (*cp
== '"' || *cp
== '\''))
5071 while (*cp
!= quote
&& *cp
!= '\0')
5073 make_tag (name
, cp
- name
, false,
5074 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5077 && LOOKING_AT (cp
, "var")
5081 while (!notinname (*cp
))
5083 make_tag (name
, cp
- name
, false,
5084 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5091 * Cobol tag functions
5092 * We could look for anything that could be a paragraph name.
5093 * i.e. anything that starts in column 8 is one word and ends in a full stop.
5094 * Idea by Corny de Souza (1993)
5097 Cobol_paragraphs (FILE *inf
)
5099 register char *bp
, *ep
;
5101 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
5107 /* If eoln, compiler option or comment ignore whole line. */
5108 if (bp
[-1] != ' ' || !c_isalnum (bp
[0]))
5111 for (ep
= bp
; c_isalnum (*ep
) || *ep
== '-'; ep
++)
5114 make_tag (bp
, ep
- bp
, true,
5115 lb
.buffer
, ep
- lb
.buffer
+ 1, lineno
, linecharno
);
5122 * Ideas by Assar Westerlund <assar@sics.se> (2001)
5125 Makefile_targets (FILE *inf
)
5129 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
5131 if (*bp
== '\t' || *bp
== '#')
5133 while (*bp
!= '\0' && *bp
!= '=' && *bp
!= ':')
5135 if (*bp
== ':' || (globals
&& *bp
== '='))
5137 /* We should detect if there is more than one tag, but we do not.
5138 We just skip initial and final spaces. */
5139 char * namestart
= skip_spaces (lb
.buffer
);
5140 while (--bp
> namestart
)
5141 if (!notinname (*bp
))
5143 make_tag (namestart
, bp
- namestart
+ 1, true,
5144 lb
.buffer
, bp
- lb
.buffer
+ 2, lineno
, linecharno
);
5152 * Original code by Mosur K. Mohan (1989)
5154 * Locates tags for procedures & functions. Doesn't do any type- or
5155 * var-definitions. It does look for the keyword "extern" or
5156 * "forward" immediately following the procedure statement; if found,
5157 * the tag is skipped.
5160 Pascal_functions (FILE *inf
)
5162 linebuffer tline
; /* mostly copied from C_entries */
5164 int save_lineno
, namelen
, taglen
;
5167 bool /* each of these flags is true if: */
5168 incomment
, /* point is inside a comment */
5169 inquote
, /* point is inside '..' string */
5170 get_tagname
, /* point is after PROCEDURE/FUNCTION
5171 keyword, so next item = potential tag */
5172 found_tag
, /* point is after a potential tag */
5173 inparms
, /* point is within parameter-list */
5174 verify_tag
; /* point has passed the parm-list, so the
5175 next token will determine whether this
5176 is a FORWARD/EXTERN to be ignored, or
5177 whether it is a real tag */
5179 save_lcno
= save_lineno
= namelen
= taglen
= 0; /* keep compiler quiet */
5180 name
= NULL
; /* keep compiler quiet */
5183 linebuffer_init (&tline
);
5185 incomment
= inquote
= false;
5186 found_tag
= false; /* have a proc name; check if extern */
5187 get_tagname
= false; /* found "procedure" keyword */
5188 inparms
= false; /* found '(' after "proc" */
5189 verify_tag
= false; /* check if "extern" is ahead */
5192 while (perhaps_more_input (inf
)) /* long main loop to get next char */
5195 if (c
== '\0') /* if end of line */
5197 readline (&lb
, inf
);
5201 if (!((found_tag
&& verify_tag
)
5203 c
= *dbp
++; /* only if don't need *dbp pointing
5204 to the beginning of the name of
5205 the procedure or function */
5209 if (c
== '}') /* within { } comments */
5211 else if (c
== '*' && *dbp
== ')') /* within (* *) comments */
5228 inquote
= true; /* found first quote */
5230 case '{': /* found open { comment */
5234 if (*dbp
== '*') /* found open (* comment */
5239 else if (found_tag
) /* found '(' after tag, i.e., parm-list */
5242 case ')': /* end of parms list */
5247 if (found_tag
&& !inparms
) /* end of proc or fn stmt */
5254 if (found_tag
&& verify_tag
&& (*dbp
!= ' '))
5256 /* Check if this is an "extern" declaration. */
5259 if (c_tolower (*dbp
) == 'e')
5261 if (nocase_tail ("extern")) /* superfluous, really! */
5267 else if (c_tolower (*dbp
) == 'f')
5269 if (nocase_tail ("forward")) /* check for forward reference */
5275 if (found_tag
&& verify_tag
) /* not external proc, so make tag */
5279 make_tag (name
, namelen
, true,
5280 tline
.buffer
, taglen
, save_lineno
, save_lcno
);
5284 if (get_tagname
) /* grab name of proc or fn */
5291 /* Find block name. */
5292 for (cp
= dbp
+ 1; *cp
!= '\0' && !endtoken (*cp
); cp
++)
5295 /* Save all values for later tagging. */
5296 linebuffer_setlen (&tline
, lb
.len
);
5297 strcpy (tline
.buffer
, lb
.buffer
);
5298 save_lineno
= lineno
;
5299 save_lcno
= linecharno
;
5300 name
= tline
.buffer
+ (dbp
- lb
.buffer
);
5302 taglen
= cp
- lb
.buffer
+ 1;
5304 dbp
= cp
; /* set dbp to e-o-token */
5305 get_tagname
= false;
5309 /* And proceed to check for "extern". */
5311 else if (!incomment
&& !inquote
&& !found_tag
)
5313 /* Check for proc/fn keywords. */
5314 switch (c_tolower (c
))
5317 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5321 if (nocase_tail ("unction"))
5326 } /* while not eof */
5328 free (tline
.buffer
);
5333 * Lisp tag functions
5334 * look for (def or (DEF, quote or QUOTE
5337 static void L_getit (void);
5342 if (*dbp
== '\'') /* Skip prefix quote */
5344 else if (*dbp
== '(')
5347 /* Try to skip "(quote " */
5348 if (!LOOKING_AT (dbp
, "quote") && !LOOKING_AT (dbp
, "QUOTE"))
5349 /* Ok, then skip "(" before name in (defstruct (foo)) */
5350 dbp
= skip_spaces (dbp
);
5352 get_lispy_tag (dbp
);
5356 Lisp_functions (FILE *inf
)
5358 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
5363 /* "(defvar foo)" is a declaration rather than a definition. */
5367 if (LOOKING_AT (p
, "defvar"))
5369 p
= skip_name (p
); /* past var name */
5370 p
= skip_spaces (p
);
5376 if (strneq (dbp
+ 1, "cl-", 3) || strneq (dbp
+ 1, "CL-", 3))
5379 if (strneq (dbp
+1, "def", 3) || strneq (dbp
+1, "DEF", 3))
5381 dbp
= skip_non_spaces (dbp
);
5382 dbp
= skip_spaces (dbp
);
5387 /* Check for (foo::defmumble name-defined ... */
5390 while (!notinname (*dbp
) && *dbp
!= ':');
5395 while (*dbp
== ':');
5397 if (strneq (dbp
, "def", 3) || strneq (dbp
, "DEF", 3))
5399 dbp
= skip_non_spaces (dbp
);
5400 dbp
= skip_spaces (dbp
);
5410 * Lua script language parsing
5411 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5413 * "function" and "local function" are tags if they start at column 1.
5416 Lua_functions (FILE *inf
)
5420 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
5422 bp
= skip_spaces (bp
);
5423 if (bp
[0] != 'f' && bp
[0] != 'l')
5426 (void)LOOKING_AT (bp
, "local"); /* skip possible "local" */
5428 if (LOOKING_AT (bp
, "function"))
5430 char *tag_name
, *tp_dot
, *tp_colon
;
5432 get_tag (bp
, &tag_name
);
5433 /* If the tag ends with ".foo" or ":foo", make an additional tag for
5435 tp_dot
= strrchr (tag_name
, '.');
5436 tp_colon
= strrchr (tag_name
, ':');
5437 if (tp_dot
|| tp_colon
)
5439 char *p
= tp_dot
> tp_colon
? tp_dot
: tp_colon
;
5440 int len_add
= p
- tag_name
+ 1;
5442 get_tag (bp
+ len_add
, NULL
);
5451 * Just look for lines where the first character is '/'
5452 * Also look at "defineps" for PSWrap
5454 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5455 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5458 PS_functions (FILE *inf
)
5460 register char *bp
, *ep
;
5462 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
5467 *ep
!= '\0' && *ep
!= ' ' && *ep
!= '{';
5470 make_tag (bp
, ep
- bp
, true,
5471 lb
.buffer
, ep
- lb
.buffer
+ 1, lineno
, linecharno
);
5473 else if (LOOKING_AT (bp
, "defineps"))
5481 * Ignore anything after \ followed by space or in ( )
5482 * Look for words defined by :
5483 * Look for constant, code, create, defer, value, and variable
5484 * OBP extensions: Look for buffer:, field,
5485 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5488 Forth_words (FILE *inf
)
5492 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
5493 while ((bp
= skip_spaces (bp
))[0] != '\0')
5494 if (bp
[0] == '\\' && c_isspace (bp
[1]))
5495 break; /* read next line */
5496 else if (bp
[0] == '(' && c_isspace (bp
[1]))
5497 do /* skip to ) or eol */
5499 while (*bp
!= ')' && *bp
!= '\0');
5500 else if (((bp
[0] == ':' && c_isspace (bp
[1]) && bp
++)
5501 || LOOKING_AT_NOCASE (bp
, "constant")
5502 || LOOKING_AT_NOCASE (bp
, "2constant")
5503 || LOOKING_AT_NOCASE (bp
, "fconstant")
5504 || LOOKING_AT_NOCASE (bp
, "code")
5505 || LOOKING_AT_NOCASE (bp
, "create")
5506 || LOOKING_AT_NOCASE (bp
, "defer")
5507 || LOOKING_AT_NOCASE (bp
, "value")
5508 || LOOKING_AT_NOCASE (bp
, "2value")
5509 || LOOKING_AT_NOCASE (bp
, "fvalue")
5510 || LOOKING_AT_NOCASE (bp
, "variable")
5511 || LOOKING_AT_NOCASE (bp
, "2variable")
5512 || LOOKING_AT_NOCASE (bp
, "fvariable")
5513 || LOOKING_AT_NOCASE (bp
, "buffer:")
5514 || LOOKING_AT_NOCASE (bp
, "field:")
5515 || LOOKING_AT_NOCASE (bp
, "+field")
5516 || LOOKING_AT_NOCASE (bp
, "field") /* not standard? */
5517 || LOOKING_AT_NOCASE (bp
, "begin-structure")
5518 || LOOKING_AT_NOCASE (bp
, "synonym")
5520 && c_isspace (bp
[0]))
5522 /* Yay! A definition! */
5523 char* name_start
= skip_spaces (bp
);
5524 char* name_end
= skip_non_spaces (name_start
);
5525 if (name_start
< name_end
)
5526 make_tag (name_start
, name_end
- name_start
,
5527 true, lb
.buffer
, name_end
- lb
.buffer
,
5528 lineno
, linecharno
);
5532 bp
= skip_non_spaces (bp
);
5537 * Scheme tag functions
5538 * look for (def... xyzzy
5540 * (def ... ((...(xyzzy ....
5542 * Original code by Ken Haase (1985?)
5545 Scheme_functions (FILE *inf
)
5549 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
5551 if (strneq (bp
, "(def", 4) || strneq (bp
, "(DEF", 4))
5553 bp
= skip_non_spaces (bp
+4);
5554 /* Skip over open parens and white space.
5555 Don't continue past '\0' or '='. */
5556 while (*bp
&& notinname (*bp
) && *bp
!= '=')
5560 if (LOOKING_AT (bp
, "(SET!") || LOOKING_AT (bp
, "(set!"))
5566 /* Find tags in TeX and LaTeX input files. */
5568 /* TEX_toktab is a table of TeX control sequences that define tags.
5569 * Each entry records one such control sequence.
5571 * Original code from who knows whom.
5573 * Stefan Monnier (2002)
5576 static linebuffer
*TEX_toktab
= NULL
; /* Table with tag tokens */
5578 /* Default set of control sequences to put into TEX_toktab.
5579 The value of environment var TEXTAGS is prepended to this. */
5580 static const char *TEX_defenv
= "\
5581 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5582 :part:appendix:entry:index:def\
5583 :newcommand:renewcommand:newenvironment:renewenvironment";
5585 static void TEX_decode_env (const char *, const char *);
5588 * TeX/LaTeX scanning loop.
5591 TeX_commands (FILE *inf
)
5596 char TEX_esc
= '\0';
5597 char TEX_opgrp
, TEX_clgrp
;
5599 /* Initialize token table once from environment. */
5600 if (TEX_toktab
== NULL
)
5601 TEX_decode_env ("TEXTAGS", TEX_defenv
);
5603 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5605 /* Look at each TEX keyword in line. */
5608 /* Look for a TEX escape. */
5612 if (c
== '\0' || c
== '%')
5615 /* Select either \ or ! as escape character, whichever comes
5616 first outside a comment. */
5637 for (key
= TEX_toktab
; key
->buffer
!= NULL
; key
++)
5638 if (strneq (cp
, key
->buffer
, key
->len
))
5641 int namelen
, linelen
;
5644 cp
= skip_spaces (cp
+ key
->len
);
5645 if (*cp
== TEX_opgrp
)
5651 (!c_isspace (*p
) && *p
!= '#' &&
5652 *p
!= TEX_opgrp
&& *p
!= TEX_clgrp
);
5657 if (!opgrp
|| *p
== TEX_clgrp
)
5659 while (*p
!= '\0' && *p
!= TEX_opgrp
&& *p
!= TEX_clgrp
)
5661 linelen
= p
- lb
.buffer
+ 1;
5663 make_tag (cp
, namelen
, true,
5664 lb
.buffer
, linelen
, lineno
, linecharno
);
5665 goto tex_next_line
; /* We only tag a line once */
5673 /* Read environment and prepend it to the default string.
5674 Build token table. */
5676 TEX_decode_env (const char *evarname
, const char *defenv
)
5678 register const char *env
, *p
;
5681 /* Append default string to environment. */
5682 env
= getenv (evarname
);
5686 env
= concat (env
, defenv
, "");
5688 /* Allocate a token table */
5689 for (len
= 1, p
= env
; (p
= strchr (p
, ':')); )
5692 TEX_toktab
= xnew (len
, linebuffer
);
5694 /* Unpack environment string into token table. Be careful about */
5695 /* zero-length strings (leading ':', "::" and trailing ':') */
5696 for (i
= 0; *env
!= '\0';)
5698 p
= strchr (env
, ':');
5699 if (!p
) /* End of environment string. */
5700 p
= env
+ strlen (env
);
5702 { /* Only non-zero strings. */
5703 TEX_toktab
[i
].buffer
= savenstr (env
, p
- env
);
5704 TEX_toktab
[i
].len
= p
- env
;
5711 TEX_toktab
[i
].buffer
= NULL
; /* Mark end of table. */
5712 TEX_toktab
[i
].len
= 0;
5719 /* Texinfo support. Dave Love, Mar. 2000. */
5721 Texinfo_nodes (FILE *inf
)
5724 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5725 if (LOOKING_AT (cp
, "@node"))
5728 while (*cp
!= '\0' && *cp
!= ',')
5730 make_tag (start
, cp
- start
, true,
5731 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5738 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5739 * Contents of <a name=xxx> are tags with name xxx.
5741 * Francesco Potortì, 2002.
5744 HTML_labels (FILE *inf
)
5746 bool getnext
= false; /* next text outside of HTML tags is a tag */
5747 bool skiptag
= false; /* skip to the end of the current HTML tag */
5748 bool intag
= false; /* inside an html tag, looking for ID= */
5749 bool inanchor
= false; /* when INTAG, is an anchor, look for NAME= */
5753 linebuffer_setlen (&token_name
, 0); /* no name in buffer */
5755 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
5756 for (;;) /* loop on the same line */
5758 if (skiptag
) /* skip HTML tag */
5760 while (*dbp
!= '\0' && *dbp
!= '>')
5766 continue; /* look on the same line */
5768 break; /* go to next line */
5771 else if (intag
) /* look for "name=" or "id=" */
5773 while (*dbp
!= '\0' && *dbp
!= '>'
5774 && c_tolower (*dbp
) != 'n' && c_tolower (*dbp
) != 'i')
5777 break; /* go to next line */
5782 continue; /* look on the same line */
5784 if ((inanchor
&& LOOKING_AT_NOCASE (dbp
, "name="))
5785 || LOOKING_AT_NOCASE (dbp
, "id="))
5787 bool quoted
= (dbp
[0] == '"');
5790 for (end
= ++dbp
; *end
!= '\0' && *end
!= '"'; end
++)
5793 for (end
= dbp
; *end
!= '\0' && intoken (*end
); end
++)
5795 linebuffer_setlen (&token_name
, end
- dbp
);
5796 memcpy (token_name
.buffer
, dbp
, end
- dbp
);
5797 token_name
.buffer
[end
- dbp
] = '\0';
5800 intag
= false; /* we found what we looked for */
5801 skiptag
= true; /* skip to the end of the tag */
5802 getnext
= true; /* then grab the text */
5803 continue; /* look on the same line */
5808 else if (getnext
) /* grab next tokens and tag them */
5810 dbp
= skip_spaces (dbp
);
5812 break; /* go to next line */
5816 inanchor
= (c_tolower (dbp
[1]) == 'a' && !intoken (dbp
[2]));
5817 continue; /* look on the same line */
5820 for (end
= dbp
+ 1; *end
!= '\0' && *end
!= '<'; end
++)
5822 make_tag (token_name
.buffer
, token_name
.len
, true,
5823 dbp
, end
- dbp
, lineno
, linecharno
);
5824 linebuffer_setlen (&token_name
, 0); /* no name in buffer */
5826 break; /* go to next line */
5829 else /* look for an interesting HTML tag */
5831 while (*dbp
!= '\0' && *dbp
!= '<')
5834 break; /* go to next line */
5836 if (c_tolower (dbp
[1]) == 'a' && !intoken (dbp
[2]))
5839 continue; /* look on the same line */
5841 else if (LOOKING_AT_NOCASE (dbp
, "<title>")
5842 || LOOKING_AT_NOCASE (dbp
, "<h1>")
5843 || LOOKING_AT_NOCASE (dbp
, "<h2>")
5844 || LOOKING_AT_NOCASE (dbp
, "<h3>"))
5848 continue; /* look on the same line */
5859 * Assumes that the predicate or rule starts at column 0.
5860 * Only the first clause of a predicate or rule is added.
5861 * Original code by Sunichirou Sugou (1989)
5862 * Rewritten by Anders Lindgren (1996)
5864 static size_t prolog_pr (char *, char *);
5865 static void prolog_skip_comment (linebuffer
*, FILE *);
5866 static size_t prolog_atom (char *, size_t);
5869 Prolog_functions (FILE *inf
)
5879 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5881 if (cp
[0] == '\0') /* Empty line */
5883 else if (c_isspace (cp
[0])) /* Not a predicate */
5885 else if (cp
[0] == '/' && cp
[1] == '*') /* comment. */
5886 prolog_skip_comment (&lb
, inf
);
5887 else if ((len
= prolog_pr (cp
, last
)) > 0)
5889 /* Predicate or rule. Store the function name so that we
5890 only generate a tag for the first clause. */
5892 last
= xnew (len
+ 1, char);
5893 else if (len
+ 1 > allocated
)
5894 xrnew (last
, len
+ 1, char);
5895 allocated
= len
+ 1;
5896 memcpy (last
, cp
, len
);
5905 prolog_skip_comment (linebuffer
*plb
, FILE *inf
)
5911 for (cp
= plb
->buffer
; *cp
!= '\0'; cp
++)
5912 if (cp
[0] == '*' && cp
[1] == '/')
5914 readline (plb
, inf
);
5916 while (perhaps_more_input (inf
));
5920 * A predicate or rule definition is added if it matches:
5921 * <beginning of line><Prolog Atom><whitespace>(
5922 * or <beginning of line><Prolog Atom><whitespace>:-
5924 * It is added to the tags database if it doesn't match the
5925 * name of the previous clause header.
5927 * Return the size of the name of the predicate or rule, or 0 if no
5931 prolog_pr (char *s
, char *last
)
5933 /* Name of last clause. */
5938 pos
= prolog_atom (s
, 0);
5943 pos
= skip_spaces (s
+ pos
) - s
;
5946 || (s
[pos
] == '(' && (pos
+= 1))
5947 || (s
[pos
] == ':' && s
[pos
+ 1] == '-' && (pos
+= 2)))
5948 && (last
== NULL
/* save only the first clause */
5949 || len
!= strlen (last
)
5950 || !strneq (s
, last
, len
)))
5952 make_tag (s
, len
, true, s
, pos
, lineno
, linecharno
);
5960 * Consume a Prolog atom.
5961 * Return the number of bytes consumed, or 0 if there was an error.
5963 * A prolog atom, in this context, could be one of:
5964 * - An alphanumeric sequence, starting with a lower case letter.
5965 * - A quoted arbitrary string. Single quotes can escape themselves.
5966 * Backslash quotes everything.
5969 prolog_atom (char *s
, size_t pos
)
5975 if (c_islower (s
[pos
]) || s
[pos
] == '_')
5977 /* The atom is unquoted. */
5979 while (c_isalnum (s
[pos
]) || s
[pos
] == '_')
5983 return pos
- origpos
;
5985 else if (s
[pos
] == '\'')
5996 pos
++; /* A double quote */
5998 else if (s
[pos
] == '\0')
5999 /* Multiline quoted atoms are ignored. */
6001 else if (s
[pos
] == '\\')
6003 if (s
[pos
+1] == '\0')
6010 return pos
- origpos
;
6018 * Support for Erlang
6020 * Generates tags for functions, defines, and records.
6021 * Assumes that Erlang functions start at column 0.
6022 * Original code by Anders Lindgren (1996)
6024 static int erlang_func (char *, char *);
6025 static void erlang_attribute (char *);
6026 static int erlang_atom (char *);
6029 Erlang_functions (FILE *inf
)
6039 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
6041 if (cp
[0] == '\0') /* Empty line */
6043 else if (c_isspace (cp
[0])) /* Not function nor attribute */
6045 else if (cp
[0] == '%') /* comment */
6047 else if (cp
[0] == '"') /* Sometimes, strings start in column one */
6049 else if (cp
[0] == '-') /* attribute, e.g. "-define" */
6051 erlang_attribute (cp
);
6058 else if ((len
= erlang_func (cp
, last
)) > 0)
6061 * Function. Store the function name so that we only
6062 * generates a tag for the first clause.
6065 last
= xnew (len
+ 1, char);
6066 else if (len
+ 1 > allocated
)
6067 xrnew (last
, len
+ 1, char);
6068 allocated
= len
+ 1;
6069 memcpy (last
, cp
, len
);
6078 * A function definition is added if it matches:
6079 * <beginning of line><Erlang Atom><whitespace>(
6081 * It is added to the tags database if it doesn't match the
6082 * name of the previous clause header.
6084 * Return the size of the name of the function, or 0 if no function
6088 erlang_func (char *s
, char *last
)
6090 /* Name of last clause. */
6095 pos
= erlang_atom (s
);
6100 pos
= skip_spaces (s
+ pos
) - s
;
6102 /* Save only the first clause. */
6105 || len
!= (int)strlen (last
)
6106 || !strneq (s
, last
, len
)))
6108 make_tag (s
, len
, true, s
, pos
, lineno
, linecharno
);
6117 * Handle attributes. Currently, tags are generated for defines
6120 * They are on the form:
6121 * -define(foo, bar).
6122 * -define(Foo(M, N), M+N).
6123 * -record(graph, {vtab = notable, cyclic = true}).
6126 erlang_attribute (char *s
)
6130 if ((LOOKING_AT (cp
, "-define") || LOOKING_AT (cp
, "-record"))
6133 int len
= erlang_atom (skip_spaces (cp
));
6135 make_tag (cp
, len
, true, s
, cp
+ len
- s
, lineno
, linecharno
);
6142 * Consume an Erlang atom (or variable).
6143 * Return the number of bytes consumed, or -1 if there was an error.
6146 erlang_atom (char *s
)
6150 if (c_isalpha (s
[pos
]) || s
[pos
] == '_')
6152 /* The atom is unquoted. */
6155 while (c_isalnum (s
[pos
]) || s
[pos
] == '_');
6157 else if (s
[pos
] == '\'')
6159 for (pos
++; s
[pos
] != '\''; pos
++)
6160 if (s
[pos
] == '\0' /* multiline quoted atoms are ignored */
6161 || (s
[pos
] == '\\' && s
[++pos
] == '\0'))
6170 static char *scan_separators (char *);
6171 static void add_regex (char *, language
*);
6172 static char *substitute (char *, char *, struct re_registers
*);
6175 * Take a string like "/blah/" and turn it into "blah", verifying
6176 * that the first and last characters are the same, and handling
6177 * quoted separator characters. Actually, stops on the occurrence of
6178 * an unquoted separator. Also process \t, \n, etc. and turn into
6179 * appropriate characters. Works in place. Null terminates name string.
6180 * Returns pointer to terminating separator, or NULL for
6181 * unterminated regexps.
6184 scan_separators (char *name
)
6187 char *copyto
= name
;
6188 bool quoted
= false;
6190 for (++name
; *name
!= '\0'; ++name
)
6196 case 'a': *copyto
++ = '\007'; break; /* BEL (bell) */
6197 case 'b': *copyto
++ = '\b'; break; /* BS (back space) */
6198 case 'd': *copyto
++ = 0177; break; /* DEL (delete) */
6199 case 'e': *copyto
++ = 033; break; /* ESC (delete) */
6200 case 'f': *copyto
++ = '\f'; break; /* FF (form feed) */
6201 case 'n': *copyto
++ = '\n'; break; /* NL (new line) */
6202 case 'r': *copyto
++ = '\r'; break; /* CR (carriage return) */
6203 case 't': *copyto
++ = '\t'; break; /* TAB (horizontal tab) */
6204 case 'v': *copyto
++ = '\v'; break; /* VT (vertical tab) */
6210 /* Something else is quoted, so preserve the quote. */
6218 else if (*name
== '\\')
6220 else if (*name
== sep
)
6226 name
= NULL
; /* signal unterminated regexp */
6228 /* Terminate copied string. */
6233 /* Look at the argument of --regex or --no-regex and do the right
6234 thing. Same for each line of a regexp file. */
6236 analyze_regex (char *regex_arg
)
6238 if (regex_arg
== NULL
)
6240 free_regexps (); /* --no-regex: remove existing regexps */
6244 /* A real --regexp option or a line in a regexp file. */
6245 switch (regex_arg
[0])
6247 /* Comments in regexp file or null arg to --regex. */
6253 /* Read a regex file. This is recursive and may result in a
6254 loop, which will stop when the file descriptors are exhausted. */
6258 linebuffer regexbuf
;
6259 char *regexfile
= regex_arg
+ 1;
6261 /* regexfile is a file containing regexps, one per line. */
6262 regexfp
= fopen (regexfile
, "r" FOPEN_BINARY
);
6263 if (regexfp
== NULL
)
6265 linebuffer_init (®exbuf
);
6266 while (readline_internal (®exbuf
, regexfp
, regexfile
) > 0)
6267 analyze_regex (regexbuf
.buffer
);
6268 free (regexbuf
.buffer
);
6269 if (fclose (regexfp
) != 0)
6274 /* Regexp to be used for a specific language only. */
6278 char *lang_name
= regex_arg
+ 1;
6281 for (cp
= lang_name
; *cp
!= '}'; cp
++)
6284 error ("unterminated language name in regex: %s", regex_arg
);
6288 lang
= get_language_from_langname (lang_name
);
6291 add_regex (cp
, lang
);
6295 /* Regexp to be used for any language. */
6297 add_regex (regex_arg
, NULL
);
6302 /* Separate the regexp pattern, compile it,
6303 and care for optional name and modifiers. */
6305 add_regex (char *regexp_pattern
, language
*lang
)
6307 static struct re_pattern_buffer zeropattern
;
6308 char sep
, *pat
, *name
, *modifiers
;
6311 struct re_pattern_buffer
*patbuf
;
6314 force_explicit_name
= true, /* do not use implicit tag names */
6315 ignore_case
= false, /* case is significant */
6316 multi_line
= false, /* matches are done one line at a time */
6317 single_line
= false; /* dot does not match newline */
6320 if (strlen (regexp_pattern
) < 3)
6322 error ("null regexp");
6325 sep
= regexp_pattern
[0];
6326 name
= scan_separators (regexp_pattern
);
6329 error ("%s: unterminated regexp", regexp_pattern
);
6334 error ("null name for regexp \"%s\"", regexp_pattern
);
6337 modifiers
= scan_separators (name
);
6338 if (modifiers
== NULL
) /* no terminating separator --> no name */
6344 modifiers
+= 1; /* skip separator */
6346 /* Parse regex modifiers. */
6347 for (; modifiers
[0] != '\0'; modifiers
++)
6348 switch (modifiers
[0])
6351 if (modifiers
== name
)
6352 error ("forcing explicit tag name but no name, ignoring");
6353 force_explicit_name
= true;
6363 need_filebuf
= true;
6366 error ("invalid regexp modifier '%c', ignoring", modifiers
[0]);
6370 patbuf
= xnew (1, struct re_pattern_buffer
);
6371 *patbuf
= zeropattern
;
6374 static char lc_trans
[UCHAR_MAX
+ 1];
6376 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++)
6377 lc_trans
[i
] = c_tolower (i
);
6378 patbuf
->translate
= lc_trans
; /* translation table to fold case */
6382 pat
= concat ("^", regexp_pattern
, ""); /* anchor to beginning of line */
6384 pat
= regexp_pattern
;
6387 re_set_syntax (RE_SYNTAX_EMACS
| RE_DOT_NEWLINE
);
6389 re_set_syntax (RE_SYNTAX_EMACS
);
6391 err
= re_compile_pattern (pat
, strlen (pat
), patbuf
);
6396 error ("%s while compiling pattern", err
);
6401 p_head
= xnew (1, regexp
);
6402 p_head
->pattern
= savestr (regexp_pattern
);
6403 p_head
->p_next
= rp
;
6404 p_head
->lang
= lang
;
6405 p_head
->pat
= patbuf
;
6406 p_head
->name
= savestr (name
);
6407 p_head
->error_signaled
= false;
6408 p_head
->force_explicit_name
= force_explicit_name
;
6409 p_head
->ignore_case
= ignore_case
;
6410 p_head
->multi_line
= multi_line
;
6414 * Do the substitutions indicated by the regular expression and
6418 substitute (char *in
, char *out
, struct re_registers
*regs
)
6421 int size
, dig
, diglen
;
6424 size
= strlen (out
);
6426 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6427 if (out
[size
- 1] == '\\')
6428 fatal ("pattern error in \"%s\"", out
);
6429 for (t
= strchr (out
, '\\');
6431 t
= strchr (t
+ 2, '\\'))
6432 if (c_isdigit (t
[1]))
6435 diglen
= regs
->end
[dig
] - regs
->start
[dig
];
6441 /* Allocate space and do the substitutions. */
6443 result
= xnew (size
+ 1, char);
6445 for (t
= result
; *out
!= '\0'; out
++)
6446 if (*out
== '\\' && c_isdigit (*++out
))
6449 diglen
= regs
->end
[dig
] - regs
->start
[dig
];
6450 memcpy (t
, in
+ regs
->start
[dig
], diglen
);
6457 assert (t
<= result
+ size
);
6458 assert (t
- result
== (int)strlen (result
));
6463 /* Deallocate all regexps. */
6468 while (p_head
!= NULL
)
6470 rp
= p_head
->p_next
;
6471 free (p_head
->pattern
);
6472 free (p_head
->name
);
6480 * Reads the whole file as a single string from `filebuf' and looks for
6481 * multi-line regular expressions, creating tags on matches.
6482 * readline already dealt with normal regexps.
6484 * Idea by Ben Wing <ben@666.com> (2002).
6487 regex_tag_multiline (void)
6489 char *buffer
= filebuf
.buffer
;
6493 for (rp
= p_head
; rp
!= NULL
; rp
= rp
->p_next
)
6497 if (!rp
->multi_line
)
6498 continue; /* skip normal regexps */
6500 /* Generic initializations before parsing file from memory. */
6501 lineno
= 1; /* reset global line number */
6502 charno
= 0; /* reset global char number */
6503 linecharno
= 0; /* reset global char number of line start */
6505 /* Only use generic regexps or those for the current language. */
6506 if (rp
->lang
!= NULL
&& rp
->lang
!= curfdp
->lang
)
6509 while (match
>= 0 && match
< filebuf
.len
)
6511 match
= re_search (rp
->pat
, buffer
, filebuf
.len
, charno
,
6512 filebuf
.len
- match
, &rp
->regs
);
6517 if (!rp
->error_signaled
)
6519 error ("regexp stack overflow while matching \"%s\"",
6521 rp
->error_signaled
= true;
6528 if (match
== rp
->regs
.end
[0])
6530 if (!rp
->error_signaled
)
6532 error ("regexp matches the empty string: \"%s\"",
6534 rp
->error_signaled
= true;
6536 match
= -3; /* exit from while loop */
6540 /* Match occurred. Construct a tag. */
6541 while (charno
< rp
->regs
.end
[0])
6542 if (buffer
[charno
++] == '\n')
6543 lineno
++, linecharno
= charno
;
6545 if (name
[0] == '\0')
6547 else /* make a named tag */
6548 name
= substitute (buffer
, rp
->name
, &rp
->regs
);
6549 if (rp
->force_explicit_name
)
6550 /* Force explicit tag name, if a name is there. */
6551 pfnote (name
, true, buffer
+ linecharno
,
6552 charno
- linecharno
+ 1, lineno
, linecharno
);
6554 make_tag (name
, strlen (name
), true, buffer
+ linecharno
,
6555 charno
- linecharno
+ 1, lineno
, linecharno
);
6564 nocase_tail (const char *cp
)
6568 while (*cp
!= '\0' && c_tolower (*cp
) == c_tolower (dbp
[len
]))
6570 if (*cp
== '\0' && !intoken (dbp
[len
]))
6579 get_tag (register char *bp
, char **namepp
)
6581 register char *cp
= bp
;
6585 /* Go till you get to white space or a syntactic break */
6586 for (cp
= bp
+ 1; !notinname (*cp
); cp
++)
6588 make_tag (bp
, cp
- bp
, true,
6589 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
6593 *namepp
= savenstr (bp
, cp
- bp
);
6596 /* Similar to get_tag, but include '=' as part of the tag. */
6598 get_lispy_tag (register char *bp
)
6600 register char *cp
= bp
;
6604 /* Go till you get to white space or a syntactic break */
6605 for (cp
= bp
+ 1; !notinname (*cp
) || *cp
== '='; cp
++)
6607 make_tag (bp
, cp
- bp
, true,
6608 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
6613 * Read a line of text from `stream' into `lbp', excluding the
6614 * newline or CR-NL, if any. Return the number of characters read from
6615 * `stream', which is the length of the line including the newline.
6617 * On DOS or Windows we do not count the CR character, if any before the
6618 * NL, in the returned length; this mirrors the behavior of Emacs on those
6619 * platforms (for text files, it translates CR-NL to NL as it reads in the
6622 * If multi-line regular expressions are requested, each line read is
6623 * appended to `filebuf'.
6626 readline_internal (linebuffer
*lbp
, FILE *stream
, char const *filename
)
6628 char *buffer
= lbp
->buffer
;
6629 char *p
= lbp
->buffer
;
6633 pend
= p
+ lbp
->size
; /* Separate to avoid 386/IX compiler bug. */
6637 register int c
= getc (stream
);
6640 /* We're at the end of linebuffer: expand it. */
6642 xrnew (buffer
, lbp
->size
, char);
6643 p
+= buffer
- lbp
->buffer
;
6644 pend
= buffer
+ lbp
->size
;
6645 lbp
->buffer
= buffer
;
6649 if (ferror (stream
))
6657 if (p
> buffer
&& p
[-1] == '\r')
6671 lbp
->len
= p
- buffer
;
6673 if (need_filebuf
/* we need filebuf for multi-line regexps */
6674 && chars_deleted
> 0) /* not at EOF */
6676 while (filebuf
.size
<= filebuf
.len
+ lbp
->len
+ 1) /* +1 for \n */
6678 /* Expand filebuf. */
6680 xrnew (filebuf
.buffer
, filebuf
.size
, char);
6682 memcpy (filebuf
.buffer
+ filebuf
.len
, lbp
->buffer
, lbp
->len
);
6683 filebuf
.len
+= lbp
->len
;
6684 filebuf
.buffer
[filebuf
.len
++] = '\n';
6685 filebuf
.buffer
[filebuf
.len
] = '\0';
6688 return lbp
->len
+ chars_deleted
;
6692 * Like readline_internal, above, but in addition try to match the
6693 * input line against relevant regular expressions and manage #line
6697 readline (linebuffer
*lbp
, FILE *stream
)
6701 linecharno
= charno
; /* update global char number of line start */
6702 result
= readline_internal (lbp
, stream
, infilename
); /* read line */
6703 lineno
+= 1; /* increment global line number */
6704 charno
+= result
; /* increment global char number */
6706 /* Honor #line directives. */
6707 if (!no_line_directive
)
6709 static bool discard_until_line_directive
;
6711 /* Check whether this is a #line directive. */
6712 if (result
> 12 && strneq (lbp
->buffer
, "#line ", 6))
6717 if (sscanf (lbp
->buffer
, "#line %u \"%n", &lno
, &start
) >= 1
6718 && start
> 0) /* double quote character found */
6720 char *endp
= lbp
->buffer
+ start
;
6722 while ((endp
= strchr (endp
, '"')) != NULL
6723 && endp
[-1] == '\\')
6726 /* Ok, this is a real #line directive. Let's deal with it. */
6728 char *taggedabsname
; /* absolute name of original file */
6729 char *taggedfname
; /* name of original file as given */
6730 char *name
; /* temp var */
6732 discard_until_line_directive
= false; /* found it */
6733 name
= lbp
->buffer
+ start
;
6735 canonicalize_filename (name
);
6736 taggedabsname
= absolute_filename (name
, tagfiledir
);
6737 if (filename_is_absolute (name
)
6738 || filename_is_absolute (curfdp
->infname
))
6739 taggedfname
= savestr (taggedabsname
);
6741 taggedfname
= relative_filename (taggedabsname
,tagfiledir
);
6743 if (streq (curfdp
->taggedfname
, taggedfname
))
6744 /* The #line directive is only a line number change. We
6745 deal with this afterwards. */
6748 /* The tags following this #line directive should be
6749 attributed to taggedfname. In order to do this, set
6750 curfdp accordingly. */
6752 fdesc
*fdp
; /* file description pointer */
6754 /* Go look for a file description already set up for the
6755 file indicated in the #line directive. If there is
6756 one, use it from now until the next #line
6758 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
6759 if (streq (fdp
->infname
, curfdp
->infname
)
6760 && streq (fdp
->taggedfname
, taggedfname
))
6761 /* If we remove the second test above (after the &&)
6762 then all entries pertaining to the same file are
6763 coalesced in the tags file. If we use it, then
6764 entries pertaining to the same file but generated
6765 from different files (via #line directives) will
6766 go into separate sections in the tags file. These
6767 alternatives look equivalent. The first one
6768 destroys some apparently useless information. */
6774 /* Else, if we already tagged the real file, skip all
6775 input lines until the next #line directive. */
6776 if (fdp
== NULL
) /* not found */
6777 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
6778 if (streq (fdp
->infabsname
, taggedabsname
))
6780 discard_until_line_directive
= true;
6784 /* Else create a new file description and use that from
6785 now on, until the next #line directive. */
6786 if (fdp
== NULL
) /* not found */
6789 fdhead
= xnew (1, fdesc
);
6790 *fdhead
= *curfdp
; /* copy curr. file description */
6792 fdhead
->infname
= savestr (curfdp
->infname
);
6793 fdhead
->infabsname
= savestr (curfdp
->infabsname
);
6794 fdhead
->infabsdir
= savestr (curfdp
->infabsdir
);
6795 fdhead
->taggedfname
= taggedfname
;
6796 fdhead
->usecharno
= false;
6797 fdhead
->prop
= NULL
;
6798 fdhead
->written
= false;
6802 free (taggedabsname
);
6804 readline (lbp
, stream
);
6806 } /* if a real #line directive */
6807 } /* if #line is followed by a number */
6808 } /* if line begins with "#line " */
6810 /* If we are here, no #line directive was found. */
6811 if (discard_until_line_directive
)
6815 /* Do a tail recursion on ourselves, thus discarding the contents
6816 of the line buffer. */
6817 readline (lbp
, stream
);
6821 discard_until_line_directive
= false;
6824 } /* if #line directives should be considered */
6831 /* Match against relevant regexps. */
6833 for (rp
= p_head
; rp
!= NULL
; rp
= rp
->p_next
)
6835 /* Only use generic regexps or those for the current language.
6836 Also do not use multiline regexps, which is the job of
6837 regex_tag_multiline. */
6838 if ((rp
->lang
!= NULL
&& rp
->lang
!= fdhead
->lang
)
6842 match
= re_match (rp
->pat
, lbp
->buffer
, lbp
->len
, 0, &rp
->regs
);
6847 if (!rp
->error_signaled
)
6849 error ("regexp stack overflow while matching \"%s\"",
6851 rp
->error_signaled
= true;
6858 /* Empty string matched. */
6859 if (!rp
->error_signaled
)
6861 error ("regexp matches the empty string: \"%s\"", rp
->pattern
);
6862 rp
->error_signaled
= true;
6866 /* Match occurred. Construct a tag. */
6868 if (name
[0] == '\0')
6870 else /* make a named tag */
6871 name
= substitute (lbp
->buffer
, rp
->name
, &rp
->regs
);
6872 if (rp
->force_explicit_name
)
6873 /* Force explicit tag name, if a name is there. */
6874 pfnote (name
, true, lbp
->buffer
, match
, lineno
, linecharno
);
6876 make_tag (name
, strlen (name
), true,
6877 lbp
->buffer
, match
, lineno
, linecharno
);
6886 * Return a pointer to a space of size strlen(cp)+1 allocated
6887 * with xnew where the string CP has been copied.
6890 savestr (const char *cp
)
6892 return savenstr (cp
, strlen (cp
));
6896 * Return a pointer to a space of size LEN+1 allocated with xnew where
6897 * the string CP has been copied for at most the first LEN characters.
6900 savenstr (const char *cp
, int len
)
6902 char *dp
= xnew (len
+ 1, char);
6904 return memcpy (dp
, cp
, len
);
6907 /* Skip spaces (end of string is not space), return new pointer. */
6909 skip_spaces (char *cp
)
6911 while (c_isspace (*cp
))
6916 /* Skip non spaces, except end of string, return new pointer. */
6918 skip_non_spaces (char *cp
)
6920 while (*cp
!= '\0' && !c_isspace (*cp
))
6925 /* Skip any chars in the "name" class.*/
6927 skip_name (char *cp
)
6929 /* '\0' is a notinname() so loop stops there too */
6930 while (! notinname (*cp
))
6935 /* Print error message and exit. */
6937 fatal (char const *format
, ...)
6940 va_start (ap
, format
);
6941 verror (format
, ap
);
6943 exit (EXIT_FAILURE
);
6947 pfatal (const char *s1
)
6950 exit (EXIT_FAILURE
);
6954 suggest_asking_for_help (void)
6956 fprintf (stderr
, "\tTry '%s --help' for a complete list of options.\n",
6958 exit (EXIT_FAILURE
);
6961 /* Output a diagnostic with printf-style FORMAT and args. */
6963 error (const char *format
, ...)
6966 va_start (ap
, format
);
6967 verror (format
, ap
);
6972 verror (char const *format
, va_list ap
)
6974 fprintf (stderr
, "%s: ", progname
);
6975 vfprintf (stderr
, format
, ap
);
6976 fprintf (stderr
, "\n");
6979 /* Return a newly-allocated string whose contents
6980 concatenate those of s1, s2, s3. */
6982 concat (const char *s1
, const char *s2
, const char *s3
)
6984 int len1
= strlen (s1
), len2
= strlen (s2
), len3
= strlen (s3
);
6985 char *result
= xnew (len1
+ len2
+ len3
+ 1, char);
6987 strcpy (result
, s1
);
6988 strcpy (result
+ len1
, s2
);
6989 strcpy (result
+ len1
+ len2
, s3
);
6995 /* Does the same work as the system V getcwd, but does not need to
6996 guess the buffer size in advance. */
7001 char *path
= xnew (bufsize
, char);
7003 while (getcwd (path
, bufsize
) == NULL
)
7005 if (errno
!= ERANGE
)
7009 path
= xnew (bufsize
, char);
7012 canonicalize_filename (path
);
7016 /* Return a newly allocated string containing a name of a temporary file. */
7020 const char *tmpdir
= getenv ("TMPDIR");
7021 const char *slash
= "/";
7023 #if MSDOS || defined (DOS_NT)
7025 tmpdir
= getenv ("TEMP");
7027 tmpdir
= getenv ("TMP");
7030 if (tmpdir
[strlen (tmpdir
) - 1] == '/'
7031 || tmpdir
[strlen (tmpdir
) - 1] == '\\')
7036 if (tmpdir
[strlen (tmpdir
) - 1] == '/')
7040 char *templt
= concat (tmpdir
, slash
, "etXXXXXX");
7041 int fd
= mkostemp (templt
, O_CLOEXEC
);
7042 if (fd
< 0 || close (fd
) != 0)
7044 int temp_errno
= errno
;
7050 #if defined (DOS_NT)
7051 /* The file name will be used in shell redirection, so it needs to have
7052 DOS-style backslashes, or else the Windows shell will barf. */
7054 for (p
= templt
; *p
; p
++)
7062 /* Return a newly allocated string containing the file name of FILE
7063 relative to the absolute directory DIR (which should end with a slash). */
7065 relative_filename (char *file
, char *dir
)
7067 char *fp
, *dp
, *afn
, *res
;
7070 /* Find the common root of file and dir (with a trailing slash). */
7071 afn
= absolute_filename (file
, cwd
);
7074 while (*fp
++ == *dp
++)
7076 fp
--, dp
--; /* back to the first differing char */
7078 if (fp
== afn
&& afn
[0] != '/') /* cannot build a relative name */
7081 do /* look at the equal chars until '/' */
7085 /* Build a sequence of "../" strings for the resulting relative file name. */
7087 while ((dp
= strchr (dp
+ 1, '/')) != NULL
)
7089 res
= xnew (3*i
+ strlen (fp
+ 1) + 1, char);
7092 z
= stpcpy (z
, "../");
7094 /* Add the file name relative to the common root of file and dir. */
7101 /* Return a newly allocated string containing the absolute file name
7102 of FILE given DIR (which should end with a slash). */
7104 absolute_filename (char *file
, char *dir
)
7106 char *slashp
, *cp
, *res
;
7108 if (filename_is_absolute (file
))
7109 res
= savestr (file
);
7111 /* We don't support non-absolute file names with a drive
7112 letter, like `d:NAME' (it's too much hassle). */
7113 else if (file
[1] == ':')
7114 fatal ("%s: relative file names with drive letters not supported", file
);
7117 res
= concat (dir
, file
, "");
7119 /* Delete the "/dirname/.." and "/." substrings. */
7120 slashp
= strchr (res
, '/');
7121 while (slashp
!= NULL
&& slashp
[0] != '\0')
7123 if (slashp
[1] == '.')
7125 if (slashp
[2] == '.'
7126 && (slashp
[3] == '/' || slashp
[3] == '\0'))
7131 while (cp
>= res
&& !filename_is_absolute (cp
));
7133 cp
= slashp
; /* the absolute name begins with "/.." */
7135 /* Under MSDOS and NT we get `d:/NAME' as absolute
7136 file name, so the luser could say `d:/../NAME'.
7137 We silently treat this as `d:/NAME'. */
7138 else if (cp
[0] != '/')
7141 memmove (cp
, slashp
+ 3, strlen (slashp
+ 2));
7145 else if (slashp
[2] == '/' || slashp
[2] == '\0')
7147 memmove (slashp
, slashp
+ 2, strlen (slashp
+ 1));
7152 slashp
= strchr (slashp
+ 1, '/');
7155 if (res
[0] == '\0') /* just a safety net: should never happen */
7158 return savestr ("/");
7164 /* Return a newly allocated string containing the absolute
7165 file name of dir where FILE resides given DIR (which should
7166 end with a slash). */
7168 absolute_dirname (char *file
, char *dir
)
7173 slashp
= strrchr (file
, '/');
7175 return savestr (dir
);
7178 res
= absolute_filename (file
, dir
);
7184 /* Whether the argument string is an absolute file name. The argument
7185 string must have been canonicalized with canonicalize_filename. */
7187 filename_is_absolute (char *fn
)
7189 return (fn
[0] == '/'
7191 || (c_isalpha (fn
[0]) && fn
[1] == ':' && fn
[2] == '/')
7196 /* Downcase DOS drive letter and collapse separators into single slashes.
7199 canonicalize_filename (register char *fn
)
7204 /* Canonicalize drive letter case. */
7205 if (c_isupper (fn
[0]) && fn
[1] == ':')
7206 fn
[0] = c_tolower (fn
[0]);
7208 /* Collapse multiple forward- and back-slashes into a single forward
7210 for (cp
= fn
; *cp
!= '\0'; cp
++, fn
++)
7211 if (*cp
== '/' || *cp
== '\\')
7214 while (cp
[1] == '/' || cp
[1] == '\\')
7222 /* Collapse multiple slashes into a single slash. */
7223 for (cp
= fn
; *cp
!= '\0'; cp
++, fn
++)
7227 while (cp
[1] == '/')
7233 #endif /* !DOS_NT */
7239 /* Initialize a linebuffer for use. */
7241 linebuffer_init (linebuffer
*lbp
)
7243 lbp
->size
= (DEBUG
) ? 3 : 200;
7244 lbp
->buffer
= xnew (lbp
->size
, char);
7245 lbp
->buffer
[0] = '\0';
7249 /* Set the minimum size of a string contained in a linebuffer. */
7251 linebuffer_setlen (linebuffer
*lbp
, int toksize
)
7253 while (lbp
->size
<= toksize
)
7256 xrnew (lbp
->buffer
, lbp
->size
, char);
7261 /* Like malloc but get fatal error if memory is exhausted. */
7263 xmalloc (size_t size
)
7265 void *result
= malloc (size
);
7267 fatal ("virtual memory exhausted");
7272 xrealloc (void *ptr
, size_t size
)
7274 void *result
= realloc (ptr
, size
);
7276 fatal ("virtual memory exhausted");
7282 * indent-tabs-mode: t
7285 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
7286 * c-file-style: "gnu"
7290 /* etags.c ends here */