1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
3 Copyright (C) 1984 The Regents of the University of California
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2013 Free Software
34 This file is not considered part of GNU Emacs.
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
72 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
81 char pot_etags_version
[] = "@(#) pot revision number is 17.38.1.4";
91 # define NDEBUG /* disable assert */
97 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
100 /* WIN32_NATIVE is for XEmacs.
101 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
106 #endif /* WIN32_NATIVE */
112 # include <sys/param.h>
122 # define MAXPATHLEN _MAX_PATH
126 #endif /* WINDOWSNT */
135 #include <sys/types.h>
136 #include <sys/stat.h>
137 #include <c-strcase.h>
141 # undef assert /* some systems have a buggy assert.h */
142 # define assert(x) ((void) 0)
148 /* Define CTAGS to make the program "ctags" compatible with the usual one.
149 Leave it undefined to make the program "etags", which makes emacs-style
150 tag tables and tags typedefs, #defines and struct/union/enum by default. */
158 #define streq(s,t) (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
159 #define strcaseeq(s,t) (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
160 #define strneq(s,t,n) (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
161 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
163 #define CHARS 256 /* 2^sizeof(char) */
164 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
165 #define iswhite(c) (_wht[CHAR (c)]) /* c is white (see white) */
166 #define notinname(c) (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
167 #define begtoken(c) (_btk[CHAR (c)]) /* c can start token (see begtk) */
168 #define intoken(c) (_itk[CHAR (c)]) /* c can be in token (see midtk) */
169 #define endtoken(c) (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
171 #define ISALNUM(c) isalnum (CHAR (c))
172 #define ISALPHA(c) isalpha (CHAR (c))
173 #define ISDIGIT(c) isdigit (CHAR (c))
174 #define ISLOWER(c) islower (CHAR (c))
176 #define lowcase(c) tolower (CHAR (c))
180 * xnew, xrnew -- allocate, reallocate storage
182 * SYNOPSIS: Type *xnew (int n, Type);
183 * void xrnew (OldPointer, int n, Type);
186 # include "chkmalloc.h"
187 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
188 (n) * sizeof (Type)))
189 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
190 (char *) (op), (n) * sizeof (Type)))
192 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
193 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
194 (char *) (op), (n) * sizeof (Type)))
199 typedef void Lang_function (FILE *);
203 const char *suffix
; /* file name suffix for this compressor */
204 const char *command
; /* takes one arg and decompresses to stdout */
209 const char *name
; /* language name */
210 const char *help
; /* detailed help for the language */
211 Lang_function
*function
; /* parse function */
212 const char **suffixes
; /* name suffixes of this language's files */
213 const char **filenames
; /* names of this language's files */
214 const char **interpreters
; /* interpreters for this language */
215 bool metasource
; /* source used to generate other sources */
220 struct fdesc
*next
; /* for the linked list */
221 char *infname
; /* uncompressed input file name */
222 char *infabsname
; /* absolute uncompressed input file name */
223 char *infabsdir
; /* absolute dir of input file */
224 char *taggedfname
; /* file name to write in tagfile */
225 language
*lang
; /* language of file */
226 char *prop
; /* file properties to write in tagfile */
227 bool usecharno
; /* etags tags shall contain char number */
228 bool written
; /* entry written in the tags file */
231 typedef struct node_st
232 { /* sorting structure */
233 struct node_st
*left
, *right
; /* left and right sons */
234 fdesc
*fdp
; /* description of file to whom tag belongs */
235 char *name
; /* tag name */
236 char *regex
; /* search regexp */
237 bool valid
; /* write this tag on the tag file */
238 bool is_func
; /* function tag: use regexp in CTAGS mode */
239 bool been_warned
; /* warning already given for duplicated tag */
240 int lno
; /* line number tag is on */
241 long cno
; /* character number line starts on */
245 * A `linebuffer' is a structure which holds a line of text.
246 * `readline_internal' reads a line from a stream into a linebuffer
247 * and works regardless of the length of the line.
248 * SIZE is the size of BUFFER, LEN is the length of the string in
249 * BUFFER after readline reads it.
258 /* Used to support mixing of --lang and file names. */
262 at_language
, /* a language specification */
263 at_regexp
, /* a regular expression */
264 at_filename
, /* a file name */
265 at_stdin
, /* read from stdin here */
266 at_end
/* stop parsing the list */
267 } arg_type
; /* argument type */
268 language
*lang
; /* language associated with the argument */
269 char *what
; /* the argument itself */
272 /* Structure defining a regular expression. */
273 typedef struct regexp
275 struct regexp
*p_next
; /* pointer to next in list */
276 language
*lang
; /* if set, use only for this language */
277 char *pattern
; /* the regexp pattern */
278 char *name
; /* tag name */
279 struct re_pattern_buffer
*pat
; /* the compiled pattern */
280 struct re_registers regs
; /* re registers */
281 bool error_signaled
; /* already signaled for this regexp */
282 bool force_explicit_name
; /* do not allow implicit tag name */
283 bool ignore_case
; /* ignore case when matching */
284 bool multi_line
; /* do a multi-line match on the whole file */
288 /* Many compilers barf on this:
289 Lang_function Ada_funcs;
290 so let's write it this way */
291 static void Ada_funcs (FILE *);
292 static void Asm_labels (FILE *);
293 static void C_entries (int c_ext
, FILE *);
294 static void default_C_entries (FILE *);
295 static void plain_C_entries (FILE *);
296 static void Cjava_entries (FILE *);
297 static void Cobol_paragraphs (FILE *);
298 static void Cplusplus_entries (FILE *);
299 static void Cstar_entries (FILE *);
300 static void Erlang_functions (FILE *);
301 static void Forth_words (FILE *);
302 static void Fortran_functions (FILE *);
303 static void HTML_labels (FILE *);
304 static void Lisp_functions (FILE *);
305 static void Lua_functions (FILE *);
306 static void Makefile_targets (FILE *);
307 static void Pascal_functions (FILE *);
308 static void Perl_functions (FILE *);
309 static void PHP_functions (FILE *);
310 static void PS_functions (FILE *);
311 static void Prolog_functions (FILE *);
312 static void Python_functions (FILE *);
313 static void Scheme_functions (FILE *);
314 static void TeX_commands (FILE *);
315 static void Texinfo_nodes (FILE *);
316 static void Yacc_entries (FILE *);
317 static void just_read_file (FILE *);
319 static language
*get_language_from_langname (const char *);
320 static void readline (linebuffer
*, FILE *);
321 static long readline_internal (linebuffer
*, FILE *);
322 static bool nocase_tail (const char *);
323 static void get_tag (char *, char **);
325 static void analyse_regex (char *);
326 static void free_regexps (void);
327 static void regex_tag_multiline (void);
328 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
329 static _Noreturn
void suggest_asking_for_help (void);
330 _Noreturn
void fatal (const char *, const char *);
331 static _Noreturn
void pfatal (const char *);
332 static void add_node (node
*, node
**);
334 static void init (void);
335 static void process_file_name (char *, language
*);
336 static void process_file (FILE *, char *, language
*);
337 static void find_entries (FILE *);
338 static void free_tree (node
*);
339 static void free_fdesc (fdesc
*);
340 static void pfnote (char *, bool, char *, int, int, long);
341 static void invalidate_nodes (fdesc
*, node
**);
342 static void put_entries (node
*);
344 static char *concat (const char *, const char *, const char *);
345 static char *skip_spaces (char *);
346 static char *skip_non_spaces (char *);
347 static char *skip_name (char *);
348 static char *savenstr (const char *, int);
349 static char *savestr (const char *);
350 static char *etags_strchr (const char *, int);
351 static char *etags_strrchr (const char *, int);
352 static char *etags_getcwd (void);
353 static char *relative_filename (char *, char *);
354 static char *absolute_filename (char *, char *);
355 static char *absolute_dirname (char *, char *);
356 static bool filename_is_absolute (char *f
);
357 static void canonicalize_filename (char *);
358 static void linebuffer_init (linebuffer
*);
359 static void linebuffer_setlen (linebuffer
*, int);
360 static void *xmalloc (size_t);
361 static void *xrealloc (char *, size_t);
364 static char searchar
= '/'; /* use /.../ searches */
366 static char *tagfile
; /* output file */
367 static char *progname
; /* name this program was invoked with */
368 static char *cwd
; /* current working directory */
369 static char *tagfiledir
; /* directory of tagfile */
370 static FILE *tagf
; /* ioptr for tags file */
371 static ptrdiff_t whatlen_max
; /* maximum length of any 'what' member */
373 static fdesc
*fdhead
; /* head of file description list */
374 static fdesc
*curfdp
; /* current file description */
375 static int lineno
; /* line number of current line */
376 static long charno
; /* current character number */
377 static long linecharno
; /* charno of start of current line */
378 static char *dbp
; /* pointer to start of current tag */
380 static const int invalidcharno
= -1;
382 static node
*nodehead
; /* the head of the binary tree of tags */
383 static node
*last_node
; /* the last node created */
385 static linebuffer lb
; /* the current line */
386 static linebuffer filebuf
; /* a buffer containing the whole file */
387 static linebuffer token_name
; /* a buffer containing a tag name */
389 /* boolean "functions" (see init) */
390 static bool _wht
[CHARS
], _nin
[CHARS
], _itk
[CHARS
], _btk
[CHARS
], _etk
[CHARS
];
393 *white
= " \f\t\n\r\v",
395 *nonam
= " \f\t\n\r()=,;", /* look at make_tag before modifying! */
396 /* token ending chars */
397 *endtk
= " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
398 /* token starting chars */
399 *begtk
= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
400 /* valid in-token chars */
401 *midtk
= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
403 static bool append_to_tagfile
; /* -a: append to tags */
404 /* The next five default to TRUE in C and derived languages. */
405 static bool typedefs
; /* -t: create tags for C and Ada typedefs */
406 static bool typedefs_or_cplusplus
; /* -T: create tags for C typedefs, level */
407 /* 0 struct/enum/union decls, and C++ */
408 /* member functions. */
409 static bool constantypedefs
; /* -d: create tags for C #define, enum */
410 /* constants and variables. */
411 /* -D: opposite of -d. Default under ctags. */
412 static bool globals
; /* create tags for global variables */
413 static bool members
; /* create tags for C member variables */
414 static bool declarations
; /* --declarations: tag them and extern in C&Co*/
415 static bool no_line_directive
; /* ignore #line directives (undocumented) */
416 static bool no_duplicates
; /* no duplicate tags for ctags (undocumented) */
417 static bool update
; /* -u: update tags */
418 static bool vgrind_style
; /* -v: create vgrind style index output */
419 static bool no_warnings
; /* -w: suppress warnings (undocumented) */
420 static bool cxref_style
; /* -x: create cxref style output */
421 static bool cplusplus
; /* .[hc] means C++, not C (undocumented) */
422 static bool ignoreindent
; /* -I: ignore indentation in C */
423 static bool packages_only
; /* --packages-only: in Ada, only tag packages*/
425 /* STDIN is defined in LynxOS system headers */
430 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
431 static bool parsing_stdin
; /* --parse-stdin used */
433 static regexp
*p_head
; /* list of all regexps */
434 static bool need_filebuf
; /* some regexes are multi-line */
436 static struct option longopts
[] =
438 { "append", no_argument
, NULL
, 'a' },
439 { "packages-only", no_argument
, &packages_only
, TRUE
},
440 { "c++", no_argument
, NULL
, 'C' },
441 { "declarations", no_argument
, &declarations
, TRUE
},
442 { "no-line-directive", no_argument
, &no_line_directive
, TRUE
},
443 { "no-duplicates", no_argument
, &no_duplicates
, TRUE
},
444 { "help", no_argument
, NULL
, 'h' },
445 { "help", no_argument
, NULL
, 'H' },
446 { "ignore-indentation", no_argument
, NULL
, 'I' },
447 { "language", required_argument
, NULL
, 'l' },
448 { "members", no_argument
, &members
, TRUE
},
449 { "no-members", no_argument
, &members
, FALSE
},
450 { "output", required_argument
, NULL
, 'o' },
451 { "regex", required_argument
, NULL
, 'r' },
452 { "no-regex", no_argument
, NULL
, 'R' },
453 { "ignore-case-regex", required_argument
, NULL
, 'c' },
454 { "parse-stdin", required_argument
, NULL
, STDIN
},
455 { "version", no_argument
, NULL
, 'V' },
457 #if CTAGS /* Ctags options */
458 { "backward-search", no_argument
, NULL
, 'B' },
459 { "cxref", no_argument
, NULL
, 'x' },
460 { "defines", no_argument
, NULL
, 'd' },
461 { "globals", no_argument
, &globals
, TRUE
},
462 { "typedefs", no_argument
, NULL
, 't' },
463 { "typedefs-and-c++", no_argument
, NULL
, 'T' },
464 { "update", no_argument
, NULL
, 'u' },
465 { "vgrind", no_argument
, NULL
, 'v' },
466 { "no-warn", no_argument
, NULL
, 'w' },
468 #else /* Etags options */
469 { "no-defines", no_argument
, NULL
, 'D' },
470 { "no-globals", no_argument
, &globals
, FALSE
},
471 { "include", required_argument
, NULL
, 'i' },
476 static compressor compressors
[] =
478 { "z", "gzip -d -c"},
479 { "Z", "gzip -d -c"},
480 { "gz", "gzip -d -c"},
481 { "GZ", "gzip -d -c"},
482 { "bz2", "bzip2 -d -c" },
483 { "xz", "xz -d -c" },
492 static const char *Ada_suffixes
[] =
493 { "ads", "adb", "ada", NULL
};
494 static const char Ada_help
[] =
495 "In Ada code, functions, procedures, packages, tasks and types are\n\
496 tags. Use the `--packages-only' option to create tags for\n\
498 Ada tag names have suffixes indicating the type of entity:\n\
499 Entity type: Qualifier:\n\
500 ------------ ----------\n\
507 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
508 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
509 will just search for any tag `bidule'.";
512 static const char *Asm_suffixes
[] =
513 { "a", /* Unix assembler */
514 "asm", /* Microcontroller assembly */
515 "def", /* BSO/Tasking definition includes */
516 "inc", /* Microcontroller include files */
517 "ins", /* Microcontroller include files */
518 "s", "sa", /* Unix assembler */
519 "S", /* cpp-processed Unix assembler */
520 "src", /* BSO/Tasking C compiler output */
523 static const char Asm_help
[] =
524 "In assembler code, labels appearing at the beginning of a line,\n\
525 followed by a colon, are tags.";
528 /* Note that .c and .h can be considered C++, if the --c++ flag was
529 given, or if the `class' or `template' keywords are met inside the file.
530 That is why default_C_entries is called for these. */
531 static const char *default_C_suffixes
[] =
533 #if CTAGS /* C help for Ctags */
534 static const char default_C_help
[] =
535 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
536 Use -T to tag definitions of `struct', `union' and `enum'.\n\
537 Use -d to tag `#define' macro definitions and `enum' constants.\n\
538 Use --globals to tag global variables.\n\
539 You can tag function declarations and external variables by\n\
540 using `--declarations', and struct members by using `--members'.";
541 #else /* C help for Etags */
542 static const char default_C_help
[] =
543 "In C code, any C function or typedef is a tag, and so are\n\
544 definitions of `struct', `union' and `enum'. `#define' macro\n\
545 definitions and `enum' constants are tags unless you specify\n\
546 `--no-defines'. Global variables are tags unless you specify\n\
547 `--no-globals' and so are struct members unless you specify\n\
548 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
549 `--no-members' can make the tags table file much smaller.\n\
550 You can tag function declarations and external variables by\n\
551 using `--declarations'.";
552 #endif /* C help for Ctags and Etags */
554 static const char *Cplusplus_suffixes
[] =
555 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
556 "M", /* Objective C++ */
557 "pdb", /* PostScript with C syntax */
559 static const char Cplusplus_help
[] =
560 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
561 --help --lang=c --lang=c++ for full help.)\n\
562 In addition to C tags, member functions are also recognized. Member\n\
563 variables are recognized unless you use the `--no-members' option.\n\
564 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
565 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
568 static const char *Cjava_suffixes
[] =
570 static char Cjava_help
[] =
571 "In Java code, all the tags constructs of C and C++ code are\n\
572 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
575 static const char *Cobol_suffixes
[] =
576 { "COB", "cob", NULL
};
577 static char Cobol_help
[] =
578 "In Cobol code, tags are paragraph names; that is, any word\n\
579 starting in column 8 and followed by a period.";
581 static const char *Cstar_suffixes
[] =
582 { "cs", "hs", NULL
};
584 static const char *Erlang_suffixes
[] =
585 { "erl", "hrl", NULL
};
586 static const char Erlang_help
[] =
587 "In Erlang code, the tags are the functions, records and macros\n\
588 defined in the file.";
590 const char *Forth_suffixes
[] =
591 { "fth", "tok", NULL
};
592 static const char Forth_help
[] =
593 "In Forth code, tags are words defined by `:',\n\
594 constant, code, create, defer, value, variable, buffer:, field.";
596 static const char *Fortran_suffixes
[] =
597 { "F", "f", "f90", "for", NULL
};
598 static const char Fortran_help
[] =
599 "In Fortran code, functions, subroutines and block data are tags.";
601 static const char *HTML_suffixes
[] =
602 { "htm", "html", "shtml", NULL
};
603 static const char HTML_help
[] =
604 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
605 `h3' headers. Also, tags are `name=' in anchors and all\n\
606 occurrences of `id='.";
608 static const char *Lisp_suffixes
[] =
609 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL
};
610 static const char Lisp_help
[] =
611 "In Lisp code, any function defined with `defun', any variable\n\
612 defined with `defvar' or `defconst', and in general the first\n\
613 argument of any expression that starts with `(def' in column zero\n\
615 The `--declarations' option tags \"(defvar foo)\" constructs too.";
617 static const char *Lua_suffixes
[] =
618 { "lua", "LUA", NULL
};
619 static const char Lua_help
[] =
620 "In Lua scripts, all functions are tags.";
622 static const char *Makefile_filenames
[] =
623 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL
};
624 static const char Makefile_help
[] =
625 "In makefiles, targets are tags; additionally, variables are tags\n\
626 unless you specify `--no-globals'.";
628 static const char *Objc_suffixes
[] =
629 { "lm", /* Objective lex file */
630 "m", /* Objective C file */
632 static const char Objc_help
[] =
633 "In Objective C code, tags include Objective C definitions for classes,\n\
634 class categories, methods and protocols. Tags for variables and\n\
635 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
636 (Use --help --lang=c --lang=objc --lang=java for full help.)";
638 static const char *Pascal_suffixes
[] =
639 { "p", "pas", NULL
};
640 static const char Pascal_help
[] =
641 "In Pascal code, the tags are the functions and procedures defined\n\
643 /* " // this is for working around an Emacs highlighting bug... */
645 static const char *Perl_suffixes
[] =
646 { "pl", "pm", NULL
};
647 static const char *Perl_interpreters
[] =
648 { "perl", "@PERL@", NULL
};
649 static const char Perl_help
[] =
650 "In Perl code, the tags are the packages, subroutines and variables\n\
651 defined by the `package', `sub', `my' and `local' keywords. Use\n\
652 `--globals' if you want to tag global variables. Tags for\n\
653 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
654 defined in the default package is `main::SUB'.";
656 static const char *PHP_suffixes
[] =
657 { "php", "php3", "php4", NULL
};
658 static const char PHP_help
[] =
659 "In PHP code, tags are functions, classes and defines. Unless you use\n\
660 the `--no-members' option, vars are tags too.";
662 static const char *plain_C_suffixes
[] =
663 { "pc", /* Pro*C file */
666 static const char *PS_suffixes
[] =
667 { "ps", "psw", NULL
}; /* .psw is for PSWrap */
668 static const char PS_help
[] =
669 "In PostScript code, the tags are the functions.";
671 static const char *Prolog_suffixes
[] =
673 static const char Prolog_help
[] =
674 "In Prolog code, tags are predicates and rules at the beginning of\n\
677 static const char *Python_suffixes
[] =
679 static const char Python_help
[] =
680 "In Python code, `def' or `class' at the beginning of a line\n\
683 /* Can't do the `SCM' or `scm' prefix with a version number. */
684 static const char *Scheme_suffixes
[] =
685 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL
};
686 static const char Scheme_help
[] =
687 "In Scheme code, tags include anything defined with `def' or with a\n\
688 construct whose name starts with `def'. They also include\n\
689 variables set with `set!' at top level in the file.";
691 static const char *TeX_suffixes
[] =
692 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL
};
693 static const char TeX_help
[] =
694 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
695 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
696 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
697 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
698 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
700 Other commands can be specified by setting the environment variable\n\
701 `TEXTAGS' to a colon-separated list like, for example,\n\
702 TEXTAGS=\"mycommand:myothercommand\".";
705 static const char *Texinfo_suffixes
[] =
706 { "texi", "texinfo", "txi", NULL
};
707 static const char Texinfo_help
[] =
708 "for texinfo files, lines starting with @node are tagged.";
710 static const char *Yacc_suffixes
[] =
711 { "y", "y++", "ym", "yxx", "yy", NULL
}; /* .ym is Objective yacc file */
712 static const char Yacc_help
[] =
713 "In Bison or Yacc input files, each rule defines as a tag the\n\
714 nonterminal it constructs. The portions of the file that contain\n\
715 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
718 static const char auto_help
[] =
719 "`auto' is not a real language, it indicates to use\n\
720 a default language for files base on file name suffix and file contents.";
722 static const char none_help
[] =
723 "`none' is not a real language, it indicates to only do\n\
724 regexp processing on files.";
726 static const char no_lang_help
[] =
727 "No detailed help available for this language.";
731 * Table of languages.
733 * It is ok for a given function to be listed under more than one
734 * name. I just didn't.
737 static language lang_names
[] =
739 { "ada", Ada_help
, Ada_funcs
, Ada_suffixes
},
740 { "asm", Asm_help
, Asm_labels
, Asm_suffixes
},
741 { "c", default_C_help
, default_C_entries
, default_C_suffixes
},
742 { "c++", Cplusplus_help
, Cplusplus_entries
, Cplusplus_suffixes
},
743 { "c*", no_lang_help
, Cstar_entries
, Cstar_suffixes
},
744 { "cobol", Cobol_help
, Cobol_paragraphs
, Cobol_suffixes
},
745 { "erlang", Erlang_help
, Erlang_functions
, Erlang_suffixes
},
746 { "forth", Forth_help
, Forth_words
, Forth_suffixes
},
747 { "fortran", Fortran_help
, Fortran_functions
, Fortran_suffixes
},
748 { "html", HTML_help
, HTML_labels
, HTML_suffixes
},
749 { "java", Cjava_help
, Cjava_entries
, Cjava_suffixes
},
750 { "lisp", Lisp_help
, Lisp_functions
, Lisp_suffixes
},
751 { "lua", Lua_help
, Lua_functions
, Lua_suffixes
},
752 { "makefile", Makefile_help
,Makefile_targets
,NULL
,Makefile_filenames
},
753 { "objc", Objc_help
, plain_C_entries
, Objc_suffixes
},
754 { "pascal", Pascal_help
, Pascal_functions
, Pascal_suffixes
},
755 { "perl",Perl_help
,Perl_functions
,Perl_suffixes
,NULL
,Perl_interpreters
},
756 { "php", PHP_help
, PHP_functions
, PHP_suffixes
},
757 { "postscript",PS_help
, PS_functions
, PS_suffixes
},
758 { "proc", no_lang_help
, plain_C_entries
, plain_C_suffixes
},
759 { "prolog", Prolog_help
, Prolog_functions
, Prolog_suffixes
},
760 { "python", Python_help
, Python_functions
, Python_suffixes
},
761 { "scheme", Scheme_help
, Scheme_functions
, Scheme_suffixes
},
762 { "tex", TeX_help
, TeX_commands
, TeX_suffixes
},
763 { "texinfo", Texinfo_help
, Texinfo_nodes
, Texinfo_suffixes
},
764 { "yacc", Yacc_help
,Yacc_entries
,Yacc_suffixes
,NULL
,NULL
,TRUE
},
765 { "auto", auto_help
}, /* default guessing scheme */
766 { "none", none_help
, just_read_file
}, /* regexp matching only */
767 { NULL
} /* end of list */
772 print_language_names (void)
775 const char **name
, **ext
;
777 puts ("\nThese are the currently supported languages, along with the\n\
778 default file names and dot suffixes:");
779 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
781 printf (" %-*s", 10, lang
->name
);
782 if (lang
->filenames
!= NULL
)
783 for (name
= lang
->filenames
; *name
!= NULL
; name
++)
784 printf (" %s", *name
);
785 if (lang
->suffixes
!= NULL
)
786 for (ext
= lang
->suffixes
; *ext
!= NULL
; ext
++)
787 printf (" .%s", *ext
);
790 puts ("where `auto' means use default language for files based on file\n\
791 name suffix, and `none' means only do regexp processing on files.\n\
792 If no language is specified and no matching suffix is found,\n\
793 the first line of the file is read for a sharp-bang (#!) sequence\n\
794 followed by the name of an interpreter. If no such sequence is found,\n\
795 Fortran is tried first; if no tags are found, C is tried next.\n\
796 When parsing any C file, a \"class\" or \"template\" keyword\n\
798 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
800 For detailed help on a given language use, for example,\n\
801 etags --help --lang=ada.");
805 # define EMACS_NAME "standalone"
808 # define VERSION "17.38.1.4"
810 static _Noreturn
void
813 char emacs_copyright
[] = COPYRIGHT
;
815 printf ("%s (%s %s)\n", (CTAGS
) ? "ctags" : "etags", EMACS_NAME
, VERSION
);
816 puts (emacs_copyright
);
817 puts ("This program is distributed under the terms in ETAGS.README");
822 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
823 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
826 static _Noreturn
void
827 print_help (argument
*argbuffer
)
829 bool help_for_lang
= FALSE
;
831 for (; argbuffer
->arg_type
!= at_end
; argbuffer
++)
832 if (argbuffer
->arg_type
== at_language
)
836 puts (argbuffer
->lang
->help
);
837 help_for_lang
= TRUE
;
843 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
845 These are the options accepted by %s.\n", progname
, progname
);
846 puts ("You may use unambiguous abbreviations for the long option names.");
847 puts (" A - as file name means read names from stdin (one per line).\n\
848 Absolute names are stored in the output file as they are.\n\
849 Relative ones are stored relative to the output file's directory.\n");
851 puts ("-a, --append\n\
852 Append tag entries to existing tags file.");
854 puts ("--packages-only\n\
855 For Ada files, only generate tags for packages.");
858 puts ("-B, --backward-search\n\
859 Write the search commands for the tag entries using '?', the\n\
860 backward-search command instead of '/', the forward-search command.");
862 /* This option is mostly obsolete, because etags can now automatically
863 detect C++. Retained for backward compatibility and for debugging and
864 experimentation. In principle, we could want to tag as C++ even
865 before any "class" or "template" keyword.
867 Treat files whose name suffix defaults to C language as C++ files.");
870 puts ("--declarations\n\
871 In C and derived languages, create tags for function declarations,");
873 puts ("\tand create tags for extern variables if --globals is used.");
876 ("\tand create tags for extern variables unless --no-globals is used.");
879 puts ("-d, --defines\n\
880 Create tag entries for C #define constants and enum constants, too.");
882 puts ("-D, --no-defines\n\
883 Don't create tag entries for C #define constants and enum constants.\n\
884 This makes the tags file smaller.");
887 puts ("-i FILE, --include=FILE\n\
888 Include a note in tag file indicating that, when searching for\n\
889 a tag, one should also consult the tags file FILE after\n\
890 checking the current file.");
892 puts ("-l LANG, --language=LANG\n\
893 Force the following files to be considered as written in the\n\
894 named language up to the next --language=LANG option.");
898 Create tag entries for global variables in some languages.");
900 puts ("--no-globals\n\
901 Do not create tag entries for global variables in some\n\
902 languages. This makes the tags file smaller.");
904 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
905 puts ("--no-line-directive\n\
906 Ignore #line preprocessor directives in C and derived languages.");
910 Create tag entries for members of structures in some languages.");
912 puts ("--no-members\n\
913 Do not create tag entries for members of structures\n\
914 in some languages.");
916 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
917 Make a tag for each line matching a regular expression pattern\n\
918 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
919 files only. REGEXFILE is a file containing one REGEXP per line.\n\
920 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
921 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
922 puts (" If TAGNAME/ is present, the tags created are named.\n\
923 For example Tcl named tags can be created with:\n\
924 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
925 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
926 `m' means to allow multi-line matches, `s' implies `m' and\n\
927 causes dot to match any character, including newline.");
929 puts ("-R, --no-regex\n\
930 Don't create tags from regexps for the following files.");
932 puts ("-I, --ignore-indentation\n\
933 In C and C++ do not assume that a closing brace in the first\n\
934 column is the final brace of a function or structure definition.");
936 puts ("-o FILE, --output=FILE\n\
937 Write the tags to FILE.");
939 puts ("--parse-stdin=NAME\n\
940 Read from standard input and record tags as belonging to file NAME.");
944 puts ("-t, --typedefs\n\
945 Generate tag entries for C and Ada typedefs.");
946 puts ("-T, --typedefs-and-c++\n\
947 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
948 and C++ member functions.");
952 puts ("-u, --update\n\
953 Update the tag entries for the given files, leaving tag\n\
954 entries for other files in place. Currently, this is\n\
955 implemented by deleting the existing entries for the given\n\
956 files and then rewriting the new entries at the end of the\n\
957 tags file. It is often faster to simply rebuild the entire\n\
958 tag file than to use this.");
962 puts ("-v, --vgrind\n\
963 Print on the standard output an index of items intended for\n\
964 human consumption, similar to the output of vgrind. The index\n\
965 is sorted, and gives the page number of each item.");
967 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
968 puts ("-w, --no-duplicates\n\
969 Do not create duplicate tag entries, for compatibility with\n\
970 traditional ctags.");
972 if (PRINT_UNDOCUMENTED_OPTIONS_HELP
)
973 puts ("-w, --no-warn\n\
974 Suppress warning messages about duplicate tag entries.");
976 puts ("-x, --cxref\n\
977 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
978 The output uses line numbers instead of page numbers, but\n\
979 beyond that the differences are cosmetic; try both to see\n\
983 puts ("-V, --version\n\
984 Print the version of the program.\n\
986 Print this help message.\n\
987 Followed by one or more `--language' options prints detailed\n\
988 help about tag generation for the specified languages.");
990 print_language_names ();
993 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1000 main (int argc
, char **argv
)
1003 unsigned int nincluded_files
;
1004 char **included_files
;
1005 argument
*argbuffer
;
1006 int current_arg
, file_count
;
1007 linebuffer filename_lb
;
1008 bool help_asked
= FALSE
;
1015 _fmode
= O_BINARY
; /* all of files are treated as binary files */
1019 nincluded_files
= 0;
1020 included_files
= xnew (argc
, char *);
1024 /* Allocate enough no matter what happens. Overkill, but each one
1026 argbuffer
= xnew (argc
, argument
);
1029 * Always find typedefs and structure tags.
1030 * Also default to find macro constants, enum constants, struct
1031 * members and global variables. Do it for both etags and ctags.
1033 typedefs
= typedefs_or_cplusplus
= constantypedefs
= TRUE
;
1034 globals
= members
= TRUE
;
1036 /* When the optstring begins with a '-' getopt_long does not rearrange the
1037 non-options arguments to be at the end, but leaves them alone. */
1038 optstring
= concat ("-ac:Cf:Il:o:r:RSVhH",
1039 (CTAGS
) ? "BxdtTuvw" : "Di:",
1042 while ((opt
= getopt_long (argc
, argv
, optstring
, longopts
, NULL
)) != EOF
)
1046 /* If getopt returns 0, then it has already processed a
1047 long-named option. We should do nothing. */
1051 /* This means that a file name has been seen. Record it. */
1052 argbuffer
[current_arg
].arg_type
= at_filename
;
1053 argbuffer
[current_arg
].what
= optarg
;
1054 len
= strlen (optarg
);
1055 if (whatlen_max
< len
)
1062 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1063 argbuffer
[current_arg
].arg_type
= at_stdin
;
1064 argbuffer
[current_arg
].what
= optarg
;
1065 len
= strlen (optarg
);
1066 if (whatlen_max
< len
)
1071 fatal ("cannot parse standard input more than once", (char *)NULL
);
1072 parsing_stdin
= TRUE
;
1075 /* Common options. */
1076 case 'a': append_to_tagfile
= TRUE
; break;
1077 case 'C': cplusplus
= TRUE
; break;
1078 case 'f': /* for compatibility with old makefiles */
1082 error ("-o option may only be given once.");
1083 suggest_asking_for_help ();
1089 case 'S': /* for backward compatibility */
1090 ignoreindent
= TRUE
;
1094 language
*lang
= get_language_from_langname (optarg
);
1097 argbuffer
[current_arg
].lang
= lang
;
1098 argbuffer
[current_arg
].arg_type
= at_language
;
1104 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1105 optarg
= concat (optarg
, "i", ""); /* memory leak here */
1108 argbuffer
[current_arg
].arg_type
= at_regexp
;
1109 argbuffer
[current_arg
].what
= optarg
;
1110 len
= strlen (optarg
);
1111 if (whatlen_max
< len
)
1116 argbuffer
[current_arg
].arg_type
= at_regexp
;
1117 argbuffer
[current_arg
].what
= NULL
;
1129 case 'D': constantypedefs
= FALSE
; break;
1130 case 'i': included_files
[nincluded_files
++] = optarg
; break;
1132 /* Ctags options. */
1133 case 'B': searchar
= '?'; break;
1134 case 'd': constantypedefs
= TRUE
; break;
1135 case 't': typedefs
= TRUE
; break;
1136 case 'T': typedefs
= typedefs_or_cplusplus
= TRUE
; break;
1137 case 'u': update
= TRUE
; break;
1138 case 'v': vgrind_style
= TRUE
; /*FALLTHRU*/
1139 case 'x': cxref_style
= TRUE
; break;
1140 case 'w': no_warnings
= TRUE
; break;
1142 suggest_asking_for_help ();
1146 /* No more options. Store the rest of arguments. */
1147 for (; optind
< argc
; optind
++)
1149 argbuffer
[current_arg
].arg_type
= at_filename
;
1150 argbuffer
[current_arg
].what
= argv
[optind
];
1151 len
= strlen (argv
[optind
]);
1152 if (whatlen_max
< len
)
1158 argbuffer
[current_arg
].arg_type
= at_end
;
1161 print_help (argbuffer
);
1164 if (nincluded_files
== 0 && file_count
== 0)
1166 error ("no input files specified.");
1167 suggest_asking_for_help ();
1171 if (tagfile
== NULL
)
1172 tagfile
= savestr (CTAGS
? "tags" : "TAGS");
1173 cwd
= etags_getcwd (); /* the current working directory */
1174 if (cwd
[strlen (cwd
) - 1] != '/')
1177 cwd
= concat (oldcwd
, "/", "");
1181 /* Compute base directory for relative file names. */
1182 if (streq (tagfile
, "-")
1183 || strneq (tagfile
, "/dev/", 5))
1184 tagfiledir
= cwd
; /* relative file names are relative to cwd */
1187 canonicalize_filename (tagfile
);
1188 tagfiledir
= absolute_dirname (tagfile
, cwd
);
1191 init (); /* set up boolean "functions" */
1193 linebuffer_init (&lb
);
1194 linebuffer_init (&filename_lb
);
1195 linebuffer_init (&filebuf
);
1196 linebuffer_init (&token_name
);
1200 if (streq (tagfile
, "-"))
1204 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1205 doesn't take effect until after `stdout' is already open). */
1206 if (!isatty (fileno (stdout
)))
1207 setmode (fileno (stdout
), O_BINARY
);
1211 tagf
= fopen (tagfile
, append_to_tagfile
? "a" : "w");
1217 * Loop through files finding functions.
1219 for (i
= 0; i
< current_arg
; i
++)
1221 static language
*lang
; /* non-NULL if language is forced */
1224 switch (argbuffer
[i
].arg_type
)
1227 lang
= argbuffer
[i
].lang
;
1230 analyse_regex (argbuffer
[i
].what
);
1233 this_file
= argbuffer
[i
].what
;
1234 /* Input file named "-" means read file names from stdin
1235 (one per line) and use them. */
1236 if (streq (this_file
, "-"))
1239 fatal ("cannot parse standard input AND read file names from it",
1241 while (readline_internal (&filename_lb
, stdin
) > 0)
1242 process_file_name (filename_lb
.buffer
, lang
);
1245 process_file_name (this_file
, lang
);
1248 this_file
= argbuffer
[i
].what
;
1249 process_file (stdin
, this_file
, lang
);
1256 free (filebuf
.buffer
);
1257 free (token_name
.buffer
);
1259 if (!CTAGS
|| cxref_style
)
1261 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1262 put_entries (nodehead
);
1263 free_tree (nodehead
);
1269 /* Output file entries that have no tags. */
1270 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
1272 fprintf (tagf
, "\f\n%s,0\n", fdp
->taggedfname
);
1274 while (nincluded_files
-- > 0)
1275 fprintf (tagf
, "\f\n%s,include\n", *included_files
++);
1277 if (fclose (tagf
) == EOF
)
1281 exit (EXIT_SUCCESS
);
1284 /* From here on, we are in (CTAGS && !cxref_style) */
1288 xmalloc (strlen (tagfile
) + whatlen_max
+
1289 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1290 for (i
= 0; i
< current_arg
; ++i
)
1292 switch (argbuffer
[i
].arg_type
)
1298 continue; /* the for loop */
1300 strcpy (cmd
, "mv ");
1301 strcat (cmd
, tagfile
);
1302 strcat (cmd
, " OTAGS;fgrep -v '\t");
1303 strcat (cmd
, argbuffer
[i
].what
);
1304 strcat (cmd
, "\t' OTAGS >");
1305 strcat (cmd
, tagfile
);
1306 strcat (cmd
, ";rm OTAGS");
1307 if (system (cmd
) != EXIT_SUCCESS
)
1308 fatal ("failed to execute shell command", (char *)NULL
);
1311 append_to_tagfile
= TRUE
;
1314 tagf
= fopen (tagfile
, append_to_tagfile
? "a" : "w");
1317 put_entries (nodehead
); /* write all the tags (CTAGS) */
1318 free_tree (nodehead
);
1320 if (fclose (tagf
) == EOF
)
1324 if (append_to_tagfile
|| update
)
1326 char *cmd
= xmalloc (2 * strlen (tagfile
) + sizeof "sort -u -o..");
1327 /* Maybe these should be used:
1328 setenv ("LC_COLLATE", "C", 1);
1329 setenv ("LC_ALL", "C", 1); */
1330 strcpy (cmd
, "sort -u -o ");
1331 strcat (cmd
, tagfile
);
1333 strcat (cmd
, tagfile
);
1334 exit (system (cmd
));
1336 return EXIT_SUCCESS
;
1341 * Return a compressor given the file name. If EXTPTR is non-zero,
1342 * return a pointer into FILE where the compressor-specific
1343 * extension begins. If no compressor is found, NULL is returned
1344 * and EXTPTR is not significant.
1345 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1348 get_compressor_from_suffix (char *file
, char **extptr
)
1351 char *slash
, *suffix
;
1353 /* File has been processed by canonicalize_filename,
1354 so we don't need to consider backslashes on DOS_NT. */
1355 slash
= etags_strrchr (file
, '/');
1356 suffix
= etags_strrchr (file
, '.');
1357 if (suffix
== NULL
|| suffix
< slash
)
1362 /* Let those poor souls who live with DOS 8+3 file name limits get
1363 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1364 Only the first do loop is run if not MSDOS */
1367 for (compr
= compressors
; compr
->suffix
!= NULL
; compr
++)
1368 if (streq (compr
->suffix
, suffix
))
1371 break; /* do it only once: not really a loop */
1374 } while (*suffix
!= '\0');
1381 * Return a language given the name.
1384 get_language_from_langname (const char *name
)
1389 error ("empty language name");
1392 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1393 if (streq (name
, lang
->name
))
1395 error ("unknown language \"%s\"", name
);
1403 * Return a language given the interpreter name.
1406 get_language_from_interpreter (char *interpreter
)
1411 if (interpreter
== NULL
)
1413 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1414 if (lang
->interpreters
!= NULL
)
1415 for (iname
= lang
->interpreters
; *iname
!= NULL
; iname
++)
1416 if (streq (*iname
, interpreter
))
1425 * Return a language given the file name.
1428 get_language_from_filename (char *file
, int case_sensitive
)
1431 const char **name
, **ext
, *suffix
;
1433 /* Try whole file name first. */
1434 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1435 if (lang
->filenames
!= NULL
)
1436 for (name
= lang
->filenames
; *name
!= NULL
; name
++)
1437 if ((case_sensitive
)
1438 ? streq (*name
, file
)
1439 : strcaseeq (*name
, file
))
1442 /* If not found, try suffix after last dot. */
1443 suffix
= etags_strrchr (file
, '.');
1447 for (lang
= lang_names
; lang
->name
!= NULL
; lang
++)
1448 if (lang
->suffixes
!= NULL
)
1449 for (ext
= lang
->suffixes
; *ext
!= NULL
; ext
++)
1450 if ((case_sensitive
)
1451 ? streq (*ext
, suffix
)
1452 : strcaseeq (*ext
, suffix
))
1459 * This routine is called on each file argument.
1462 process_file_name (char *file
, language
*lang
)
1464 struct stat stat_buf
;
1468 char *compressed_name
, *uncompressed_name
;
1469 char *ext
, *real_name
;
1472 canonicalize_filename (file
);
1473 if (streq (file
, tagfile
) && !streq (tagfile
, "-"))
1475 error ("skipping inclusion of %s in self.", file
);
1478 if ((compr
= get_compressor_from_suffix (file
, &ext
)) == NULL
)
1480 compressed_name
= NULL
;
1481 real_name
= uncompressed_name
= savestr (file
);
1485 real_name
= compressed_name
= savestr (file
);
1486 uncompressed_name
= savenstr (file
, ext
- file
);
1489 /* If the canonicalized uncompressed name
1490 has already been dealt with, skip it silently. */
1491 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
1493 assert (fdp
->infname
!= NULL
);
1494 if (streq (uncompressed_name
, fdp
->infname
))
1498 if (stat (real_name
, &stat_buf
) != 0)
1500 /* Reset real_name and try with a different name. */
1502 if (compressed_name
!= NULL
) /* try with the given suffix */
1504 if (stat (uncompressed_name
, &stat_buf
) == 0)
1505 real_name
= uncompressed_name
;
1507 else /* try all possible suffixes */
1509 for (compr
= compressors
; compr
->suffix
!= NULL
; compr
++)
1511 compressed_name
= concat (file
, ".", compr
->suffix
);
1512 if (stat (compressed_name
, &stat_buf
) != 0)
1516 char *suf
= compressed_name
+ strlen (file
);
1517 size_t suflen
= strlen (compr
->suffix
) + 1;
1518 for ( ; suf
[1]; suf
++, suflen
--)
1520 memmove (suf
, suf
+ 1, suflen
);
1521 if (stat (compressed_name
, &stat_buf
) == 0)
1523 real_name
= compressed_name
;
1527 if (real_name
!= NULL
)
1530 free (compressed_name
);
1531 compressed_name
= NULL
;
1535 real_name
= compressed_name
;
1540 if (real_name
== NULL
)
1545 } /* try with a different name */
1547 if (!S_ISREG (stat_buf
.st_mode
))
1549 error ("skipping %s: it is not a regular file.", real_name
);
1552 if (real_name
== compressed_name
)
1554 char *cmd
= concat (compr
->command
, " ", real_name
);
1555 inf
= (FILE *) popen (cmd
, "r");
1559 inf
= fopen (real_name
, "r");
1566 process_file (inf
, uncompressed_name
, lang
);
1568 if (real_name
== compressed_name
)
1569 retval
= pclose (inf
);
1571 retval
= fclose (inf
);
1576 free (compressed_name
);
1577 free (uncompressed_name
);
1584 process_file (FILE *fh
, char *fn
, language
*lang
)
1586 static const fdesc emptyfdesc
;
1589 /* Create a new input file description entry. */
1590 fdp
= xnew (1, fdesc
);
1593 fdp
->infname
= savestr (fn
);
1595 fdp
->infabsname
= absolute_filename (fn
, cwd
);
1596 fdp
->infabsdir
= absolute_dirname (fn
, cwd
);
1597 if (filename_is_absolute (fn
))
1599 /* An absolute file name. Canonicalize it. */
1600 fdp
->taggedfname
= absolute_filename (fn
, NULL
);
1604 /* A file name relative to cwd. Make it relative
1605 to the directory of the tags file. */
1606 fdp
->taggedfname
= relative_filename (fn
, tagfiledir
);
1608 fdp
->usecharno
= TRUE
; /* use char position when making tags */
1610 fdp
->written
= FALSE
; /* not written on tags file yet */
1613 curfdp
= fdhead
; /* the current file description */
1617 /* If not Ctags, and if this is not metasource and if it contained no #line
1618 directives, we can write the tags and free all nodes pointing to
1621 && curfdp
->usecharno
/* no #line directives in this file */
1622 && !curfdp
->lang
->metasource
)
1626 /* Look for the head of the sublist relative to this file. See add_node
1627 for the structure of the node tree. */
1629 for (np
= nodehead
; np
!= NULL
; prev
= np
, np
= np
->left
)
1630 if (np
->fdp
== curfdp
)
1633 /* If we generated tags for this file, write and delete them. */
1636 /* This is the head of the last sublist, if any. The following
1637 instructions depend on this being true. */
1638 assert (np
->left
== NULL
);
1640 assert (fdhead
== curfdp
);
1641 assert (last_node
->fdp
== curfdp
);
1642 put_entries (np
); /* write tags for file curfdp->taggedfname */
1643 free_tree (np
); /* remove the written nodes */
1645 nodehead
= NULL
; /* no nodes left */
1647 prev
->left
= NULL
; /* delete the pointer to the sublist */
1653 * This routine sets up the boolean pseudo-functions which work
1654 * by setting boolean flags dependent upon the corresponding character.
1655 * Every char which is NOT in that string is not a white char. Therefore,
1656 * all of the array "_wht" is set to FALSE, and then the elements
1657 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1658 * of a char is TRUE if it is the string "white", else FALSE.
1663 register const char *sp
;
1666 for (i
= 0; i
< CHARS
; i
++)
1667 iswhite (i
) = notinname (i
) = begtoken (i
) = intoken (i
) = endtoken (i
) = FALSE
;
1668 for (sp
= white
; *sp
!= '\0'; sp
++) iswhite (*sp
) = TRUE
;
1669 for (sp
= nonam
; *sp
!= '\0'; sp
++) notinname (*sp
) = TRUE
;
1670 notinname ('\0') = notinname ('\n');
1671 for (sp
= begtk
; *sp
!= '\0'; sp
++) begtoken (*sp
) = TRUE
;
1672 begtoken ('\0') = begtoken ('\n');
1673 for (sp
= midtk
; *sp
!= '\0'; sp
++) intoken (*sp
) = TRUE
;
1674 intoken ('\0') = intoken ('\n');
1675 for (sp
= endtk
; *sp
!= '\0'; sp
++) endtoken (*sp
) = TRUE
;
1676 endtoken ('\0') = endtoken ('\n');
1680 * This routine opens the specified file and calls the function
1681 * which finds the function and type definitions.
1684 find_entries (FILE *inf
)
1687 language
*lang
= curfdp
->lang
;
1688 Lang_function
*parser
= NULL
;
1690 /* If user specified a language, use it. */
1691 if (lang
!= NULL
&& lang
->function
!= NULL
)
1693 parser
= lang
->function
;
1696 /* Else try to guess the language given the file name. */
1699 lang
= get_language_from_filename (curfdp
->infname
, TRUE
);
1700 if (lang
!= NULL
&& lang
->function
!= NULL
)
1702 curfdp
->lang
= lang
;
1703 parser
= lang
->function
;
1707 /* Else look for sharp-bang as the first two characters. */
1709 && readline_internal (&lb
, inf
) > 0
1711 && lb
.buffer
[0] == '#'
1712 && lb
.buffer
[1] == '!')
1716 /* Set lp to point at the first char after the last slash in the
1717 line or, if no slashes, at the first nonblank. Then set cp to
1718 the first successive blank and terminate the string. */
1719 lp
= etags_strrchr (lb
.buffer
+2, '/');
1723 lp
= skip_spaces (lb
.buffer
+ 2);
1724 cp
= skip_non_spaces (lp
);
1727 if (strlen (lp
) > 0)
1729 lang
= get_language_from_interpreter (lp
);
1730 if (lang
!= NULL
&& lang
->function
!= NULL
)
1732 curfdp
->lang
= lang
;
1733 parser
= lang
->function
;
1738 /* We rewind here, even if inf may be a pipe. We fail if the
1739 length of the first line is longer than the pipe block size,
1740 which is unlikely. */
1743 /* Else try to guess the language given the case insensitive file name. */
1746 lang
= get_language_from_filename (curfdp
->infname
, FALSE
);
1747 if (lang
!= NULL
&& lang
->function
!= NULL
)
1749 curfdp
->lang
= lang
;
1750 parser
= lang
->function
;
1754 /* Else try Fortran or C. */
1757 node
*old_last_node
= last_node
;
1759 curfdp
->lang
= get_language_from_langname ("fortran");
1762 if (old_last_node
== last_node
)
1763 /* No Fortran entries found. Try C. */
1765 /* We do not tag if rewind fails.
1766 Only the file name will be recorded in the tags file. */
1768 curfdp
->lang
= get_language_from_langname (cplusplus
? "c++" : "c");
1774 if (!no_line_directive
1775 && curfdp
->lang
!= NULL
&& curfdp
->lang
->metasource
)
1776 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1777 file, or anyway we parsed a file that is automatically generated from
1778 this one. If this is the case, the bingo.c file contained #line
1779 directives that generated tags pointing to this file. Let's delete
1780 them all before parsing this file, which is the real source. */
1782 fdesc
**fdpp
= &fdhead
;
1783 while (*fdpp
!= NULL
)
1785 && streq ((*fdpp
)->taggedfname
, curfdp
->taggedfname
))
1786 /* We found one of those! We must delete both the file description
1787 and all tags referring to it. */
1789 fdesc
*badfdp
= *fdpp
;
1791 /* Delete the tags referring to badfdp->taggedfname
1792 that were obtained from badfdp->infname. */
1793 invalidate_nodes (badfdp
, &nodehead
);
1795 *fdpp
= badfdp
->next
; /* remove the bad description from the list */
1796 free_fdesc (badfdp
);
1799 fdpp
= &(*fdpp
)->next
; /* advance the list pointer */
1802 assert (parser
!= NULL
);
1804 /* Generic initializations before reading from file. */
1805 linebuffer_setlen (&filebuf
, 0); /* reset the file buffer */
1807 /* Generic initializations before parsing file with readline. */
1808 lineno
= 0; /* reset global line number */
1809 charno
= 0; /* reset global char number */
1810 linecharno
= 0; /* reset global char number of line start */
1814 regex_tag_multiline ();
1819 * Check whether an implicitly named tag should be created,
1820 * then call `pfnote'.
1821 * NAME is a string that is internally copied by this function.
1823 * TAGS format specification
1824 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1825 * The following is explained in some more detail in etc/ETAGS.EBNF.
1827 * make_tag creates tags with "implicit tag names" (unnamed tags)
1828 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1829 * 1. NAME does not contain any of the characters in NONAM;
1830 * 2. LINESTART contains name as either a rightmost, or rightmost but
1831 * one character, substring;
1832 * 3. the character, if any, immediately before NAME in LINESTART must
1833 * be a character in NONAM;
1834 * 4. the character, if any, immediately after NAME in LINESTART must
1835 * also be a character in NONAM.
1837 * The implementation uses the notinname() macro, which recognizes the
1838 * characters stored in the string `nonam'.
1839 * etags.el needs to use the same characters that are in NONAM.
1842 make_tag (const char *name
, /* tag name, or NULL if unnamed */
1843 int namelen
, /* tag length */
1844 int is_func
, /* tag is a function */
1845 char *linestart
, /* start of the line where tag is */
1846 int linelen
, /* length of the line where tag is */
1847 int lno
, /* line number */
1848 long int cno
) /* character number */
1850 bool named
= (name
!= NULL
&& namelen
> 0);
1853 if (!CTAGS
&& named
) /* maybe set named to false */
1854 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1855 such that etags.el can guess a name from it. */
1858 register const char *cp
= name
;
1860 for (i
= 0; i
< namelen
; i
++)
1861 if (notinname (*cp
++))
1863 if (i
== namelen
) /* rule #1 */
1865 cp
= linestart
+ linelen
- namelen
;
1866 if (notinname (linestart
[linelen
-1]))
1867 cp
-= 1; /* rule #4 */
1868 if (cp
>= linestart
/* rule #2 */
1870 || notinname (cp
[-1])) /* rule #3 */
1871 && strneq (name
, cp
, namelen
)) /* rule #2 */
1872 named
= FALSE
; /* use implicit tag name */
1877 nname
= savenstr (name
, namelen
);
1879 pfnote (nname
, is_func
, linestart
, linelen
, lno
, cno
);
1884 pfnote (char *name
, int is_func
, char *linestart
, int linelen
, int lno
, long int cno
)
1885 /* tag name, or NULL if unnamed */
1886 /* tag is a function */
1887 /* start of the line where tag is */
1888 /* length of the line where tag is */
1890 /* character number */
1894 assert (name
== NULL
|| name
[0] != '\0');
1895 if (CTAGS
&& name
== NULL
)
1898 np
= xnew (1, node
);
1900 /* If ctags mode, change name "main" to M<thisfilename>. */
1901 if (CTAGS
&& !cxref_style
&& streq (name
, "main"))
1903 register char *fp
= etags_strrchr (curfdp
->taggedfname
, '/');
1904 np
->name
= concat ("M", fp
== NULL
? curfdp
->taggedfname
: fp
+ 1, "");
1905 fp
= etags_strrchr (np
->name
, '.');
1906 if (fp
!= NULL
&& fp
[1] != '\0' && fp
[2] == '\0')
1912 np
->been_warned
= FALSE
;
1914 np
->is_func
= is_func
;
1916 if (np
->fdp
->usecharno
)
1917 /* Our char numbers are 0-base, because of C language tradition?
1918 ctags compatibility? old versions compatibility? I don't know.
1919 Anyway, since emacs's are 1-base we expect etags.el to take care
1920 of the difference. If we wanted to have 1-based numbers, we would
1921 uncomment the +1 below. */
1922 np
->cno
= cno
/* + 1 */ ;
1924 np
->cno
= invalidcharno
;
1925 np
->left
= np
->right
= NULL
;
1926 if (CTAGS
&& !cxref_style
)
1928 if (strlen (linestart
) < 50)
1929 np
->regex
= concat (linestart
, "$", "");
1931 np
->regex
= savenstr (linestart
, 50);
1934 np
->regex
= savenstr (linestart
, linelen
);
1936 add_node (np
, &nodehead
);
1941 * recurse on left children, iterate on right children.
1944 free_tree (register node
*np
)
1948 register node
*node_right
= np
->right
;
1949 free_tree (np
->left
);
1959 * delete a file description
1962 free_fdesc (register fdesc
*fdp
)
1964 free (fdp
->infname
);
1965 free (fdp
->infabsname
);
1966 free (fdp
->infabsdir
);
1967 free (fdp
->taggedfname
);
1974 * Adds a node to the tree of nodes. In etags mode, sort by file
1975 * name. In ctags mode, sort by tag name. Make no attempt at
1978 * add_node is the only function allowed to add nodes, so it can
1982 add_node (node
*np
, node
**cur_node_p
)
1985 register node
*cur_node
= *cur_node_p
;
1987 if (cur_node
== NULL
)
1997 /* For each file name, tags are in a linked sublist on the right
1998 pointer. The first tags of different files are a linked list
1999 on the left pointer. last_node points to the end of the last
2001 if (last_node
!= NULL
&& last_node
->fdp
== np
->fdp
)
2003 /* Let's use the same sublist as the last added node. */
2004 assert (last_node
->right
== NULL
);
2005 last_node
->right
= np
;
2008 else if (cur_node
->fdp
== np
->fdp
)
2010 /* Scanning the list we found the head of a sublist which is
2011 good for us. Let's scan this sublist. */
2012 add_node (np
, &cur_node
->right
);
2015 /* The head of this sublist is not good for us. Let's try the
2017 add_node (np
, &cur_node
->left
);
2018 } /* if ETAGS mode */
2023 dif
= strcmp (np
->name
, cur_node
->name
);
2026 * If this tag name matches an existing one, then
2027 * do not add the node, but maybe print a warning.
2029 if (no_duplicates
&& !dif
)
2031 if (np
->fdp
== cur_node
->fdp
)
2035 fprintf (stderr
, "Duplicate entry in file %s, line %d: %s\n",
2036 np
->fdp
->infname
, lineno
, np
->name
);
2037 fprintf (stderr
, "Second entry ignored\n");
2040 else if (!cur_node
->been_warned
&& !no_warnings
)
2044 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2045 np
->fdp
->infname
, cur_node
->fdp
->infname
, np
->name
);
2046 cur_node
->been_warned
= TRUE
;
2051 /* Actually add the node */
2052 add_node (np
, dif
< 0 ? &cur_node
->left
: &cur_node
->right
);
2053 } /* if CTAGS mode */
2057 * invalidate_nodes ()
2058 * Scan the node tree and invalidate all nodes pointing to the
2059 * given file description (CTAGS case) or free them (ETAGS case).
2062 invalidate_nodes (fdesc
*badfdp
, node
**npp
)
2071 if (np
->left
!= NULL
)
2072 invalidate_nodes (badfdp
, &np
->left
);
2073 if (np
->fdp
== badfdp
)
2075 if (np
->right
!= NULL
)
2076 invalidate_nodes (badfdp
, &np
->right
);
2080 assert (np
->fdp
!= NULL
);
2081 if (np
->fdp
== badfdp
)
2083 *npp
= np
->left
; /* detach the sublist from the list */
2084 np
->left
= NULL
; /* isolate it */
2085 free_tree (np
); /* free it */
2086 invalidate_nodes (badfdp
, npp
);
2089 invalidate_nodes (badfdp
, &np
->left
);
2094 static int total_size_of_entries (node
*);
2095 static int number_len (long) ATTRIBUTE_CONST
;
2097 /* Length of a non-negative number's decimal representation. */
2099 number_len (long int num
)
2102 while ((num
/= 10) > 0)
2108 * Return total number of characters that put_entries will output for
2109 * the nodes in the linked list at the right of the specified node.
2110 * This count is irrelevant with etags.el since emacs 19.34 at least,
2111 * but is still supplied for backward compatibility.
2114 total_size_of_entries (register node
*np
)
2116 register int total
= 0;
2118 for (; np
!= NULL
; np
= np
->right
)
2121 total
+= strlen (np
->regex
) + 1; /* pat\177 */
2122 if (np
->name
!= NULL
)
2123 total
+= strlen (np
->name
) + 1; /* name\001 */
2124 total
+= number_len ((long) np
->lno
) + 1; /* lno, */
2125 if (np
->cno
!= invalidcharno
) /* cno */
2126 total
+= number_len (np
->cno
);
2127 total
+= 1; /* newline */
2134 put_entries (register node
*np
)
2137 static fdesc
*fdp
= NULL
;
2142 /* Output subentries that precede this one */
2144 put_entries (np
->left
);
2146 /* Output this entry */
2155 fprintf (tagf
, "\f\n%s,%d\n",
2156 fdp
->taggedfname
, total_size_of_entries (np
));
2157 fdp
->written
= TRUE
;
2159 fputs (np
->regex
, tagf
);
2160 fputc ('\177', tagf
);
2161 if (np
->name
!= NULL
)
2163 fputs (np
->name
, tagf
);
2164 fputc ('\001', tagf
);
2166 fprintf (tagf
, "%d,", np
->lno
);
2167 if (np
->cno
!= invalidcharno
)
2168 fprintf (tagf
, "%ld", np
->cno
);
2174 if (np
->name
== NULL
)
2175 error ("internal error: NULL name in ctags mode.");
2180 fprintf (stdout
, "%s %s %d\n",
2181 np
->name
, np
->fdp
->taggedfname
, (np
->lno
+ 63) / 64);
2183 fprintf (stdout
, "%-16s %3d %-16s %s\n",
2184 np
->name
, np
->lno
, np
->fdp
->taggedfname
, np
->regex
);
2188 fprintf (tagf
, "%s\t%s\t", np
->name
, np
->fdp
->taggedfname
);
2191 { /* function or #define macro with args */
2192 putc (searchar
, tagf
);
2195 for (sp
= np
->regex
; *sp
; sp
++)
2197 if (*sp
== '\\' || *sp
== searchar
)
2201 putc (searchar
, tagf
);
2204 { /* anything else; text pattern inadequate */
2205 fprintf (tagf
, "%d", np
->lno
);
2210 } /* if this node contains a valid tag */
2212 /* Output subentries that follow this one */
2213 put_entries (np
->right
);
2215 put_entries (np
->left
);
2220 #define C_EXT 0x00fff /* C extensions */
2221 #define C_PLAIN 0x00000 /* C */
2222 #define C_PLPL 0x00001 /* C++ */
2223 #define C_STAR 0x00003 /* C* */
2224 #define C_JAVA 0x00005 /* JAVA */
2225 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2226 #define YACC 0x10000 /* yacc file */
2229 * The C symbol tables.
2234 st_C_objprot
, st_C_objimpl
, st_C_objend
,
2236 st_C_ignore
, st_C_attribute
,
2239 st_C_class
, st_C_template
,
2240 st_C_struct
, st_C_extern
, st_C_enum
, st_C_define
, st_C_typedef
2243 static unsigned int hash (const char *, unsigned int);
2244 static struct C_stab_entry
* in_word_set (const char *, unsigned int);
2245 static enum sym_type
C_symtype (char *, int, int);
2247 /* Feed stuff between (but not including) %[ and %] lines to:
2253 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2257 while, 0, st_C_ignore
2258 switch, 0, st_C_ignore
2259 return, 0, st_C_ignore
2260 __attribute__, 0, st_C_attribute
2261 GTY, 0, st_C_attribute
2262 @interface, 0, st_C_objprot
2263 @protocol, 0, st_C_objprot
2264 @implementation,0, st_C_objimpl
2265 @end, 0, st_C_objend
2266 import, (C_JAVA & ~C_PLPL), st_C_ignore
2267 package, (C_JAVA & ~C_PLPL), st_C_ignore
2268 friend, C_PLPL, st_C_ignore
2269 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2270 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2271 interface, (C_JAVA & ~C_PLPL), st_C_struct
2272 class, 0, st_C_class
2273 namespace, C_PLPL, st_C_struct
2274 domain, C_STAR, st_C_struct
2275 union, 0, st_C_struct
2276 struct, 0, st_C_struct
2277 extern, 0, st_C_extern
2279 typedef, 0, st_C_typedef
2280 define, 0, st_C_define
2281 undef, 0, st_C_define
2282 operator, C_PLPL, st_C_operator
2283 template, 0, st_C_template
2284 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2285 DEFUN, 0, st_C_gnumacro
2286 SYSCALL, 0, st_C_gnumacro
2287 ENTRY, 0, st_C_gnumacro
2288 PSEUDO, 0, st_C_gnumacro
2289 # These are defined inside C functions, so currently they are not met.
2290 # EXFUN used in glibc, DEFVAR_* in emacs.
2291 #EXFUN, 0, st_C_gnumacro
2292 #DEFVAR_, 0, st_C_gnumacro
2294 and replace lines between %< and %> with its output, then:
2295 - remove the #if characterset check
2296 - make in_word_set static and not inline. */
2298 /* C code produced by gperf version 3.0.1 */
2299 /* Command-line: gperf -m 5 */
2300 /* Computed positions: -k'2-3' */
2302 struct C_stab_entry
{ const char *name
; int c_ext
; enum sym_type type
; };
2303 /* maximum key range = 33, duplicates = 0 */
2305 static inline unsigned int
2306 hash (register const char *str
, register unsigned int len
)
2308 static unsigned char asso_values
[] =
2310 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2311 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2312 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2313 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2314 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2315 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2316 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2317 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2318 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2319 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2320 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2321 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2322 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2323 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2324 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2325 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2326 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2327 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2328 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2329 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2330 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2331 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2332 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2333 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2334 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2335 35, 35, 35, 35, 35, 35
2337 register int hval
= len
;
2342 hval
+= asso_values
[(unsigned char)str
[2]];
2345 hval
+= asso_values
[(unsigned char)str
[1]];
2351 static struct C_stab_entry
*
2352 in_word_set (register const char *str
, register unsigned int len
)
2356 TOTAL_KEYWORDS
= 33,
2357 MIN_WORD_LENGTH
= 2,
2358 MAX_WORD_LENGTH
= 15,
2363 static struct C_stab_entry wordlist
[] =
2366 {"if", 0, st_C_ignore
},
2367 {"GTY", 0, st_C_attribute
},
2368 {"@end", 0, st_C_objend
},
2369 {"union", 0, st_C_struct
},
2370 {"define", 0, st_C_define
},
2371 {"import", (C_JAVA
& ~C_PLPL
), st_C_ignore
},
2372 {"template", 0, st_C_template
},
2373 {"operator", C_PLPL
, st_C_operator
},
2374 {"@interface", 0, st_C_objprot
},
2375 {"implements", (C_JAVA
& ~C_PLPL
), st_C_javastruct
},
2376 {"friend", C_PLPL
, st_C_ignore
},
2377 {"typedef", 0, st_C_typedef
},
2378 {"return", 0, st_C_ignore
},
2379 {"@implementation",0, st_C_objimpl
},
2380 {"@protocol", 0, st_C_objprot
},
2381 {"interface", (C_JAVA
& ~C_PLPL
), st_C_struct
},
2382 {"extern", 0, st_C_extern
},
2383 {"extends", (C_JAVA
& ~C_PLPL
), st_C_javastruct
},
2384 {"struct", 0, st_C_struct
},
2385 {"domain", C_STAR
, st_C_struct
},
2386 {"switch", 0, st_C_ignore
},
2387 {"enum", 0, st_C_enum
},
2388 {"for", 0, st_C_ignore
},
2389 {"namespace", C_PLPL
, st_C_struct
},
2390 {"class", 0, st_C_class
},
2391 {"while", 0, st_C_ignore
},
2392 {"undef", 0, st_C_define
},
2393 {"package", (C_JAVA
& ~C_PLPL
), st_C_ignore
},
2394 {"__attribute__", 0, st_C_attribute
},
2395 {"SYSCALL", 0, st_C_gnumacro
},
2396 {"ENTRY", 0, st_C_gnumacro
},
2397 {"PSEUDO", 0, st_C_gnumacro
},
2398 {"DEFUN", 0, st_C_gnumacro
}
2401 if (len
<= MAX_WORD_LENGTH
&& len
>= MIN_WORD_LENGTH
)
2403 register int key
= hash (str
, len
);
2405 if (key
<= MAX_HASH_VALUE
&& key
>= 0)
2407 register const char *s
= wordlist
[key
].name
;
2409 if (*str
== *s
&& !strncmp (str
+ 1, s
+ 1, len
- 1) && s
[len
] == '\0')
2410 return &wordlist
[key
];
2417 static enum sym_type
2418 C_symtype (char *str
, int len
, int c_ext
)
2420 register struct C_stab_entry
*se
= in_word_set (str
, len
);
2422 if (se
== NULL
|| (se
->c_ext
&& !(c_ext
& se
->c_ext
)))
2429 * Ignoring __attribute__ ((list))
2431 static bool inattribute
; /* looking at an __attribute__ construct */
2434 * C functions and variables are recognized using a simple
2435 * finite automaton. fvdef is its state variable.
2439 fvnone
, /* nothing seen */
2440 fdefunkey
, /* Emacs DEFUN keyword seen */
2441 fdefunname
, /* Emacs DEFUN name seen */
2442 foperator
, /* func: operator keyword seen (cplpl) */
2443 fvnameseen
, /* function or variable name seen */
2444 fstartlist
, /* func: just after open parenthesis */
2445 finlist
, /* func: in parameter list */
2446 flistseen
, /* func: after parameter list */
2447 fignore
, /* func: before open brace */
2448 vignore
/* var-like: ignore until ';' */
2451 static bool fvextern
; /* func or var: extern keyword seen; */
2454 * typedefs are recognized using a simple finite automaton.
2455 * typdef is its state variable.
2459 tnone
, /* nothing seen */
2460 tkeyseen
, /* typedef keyword seen */
2461 ttypeseen
, /* defined type seen */
2462 tinbody
, /* inside typedef body */
2463 tend
, /* just before typedef tag */
2464 tignore
/* junk after typedef tag */
2468 * struct-like structures (enum, struct and union) are recognized
2469 * using another simple finite automaton. `structdef' is its state
2474 snone
, /* nothing seen yet,
2475 or in struct body if bracelev > 0 */
2476 skeyseen
, /* struct-like keyword seen */
2477 stagseen
, /* struct-like tag seen */
2478 scolonseen
/* colon seen after struct-like tag */
2482 * When objdef is different from onone, objtag is the name of the class.
2484 static const char *objtag
= "<uninited>";
2487 * Yet another little state machine to deal with preprocessor lines.
2491 dnone
, /* nothing seen */
2492 dsharpseen
, /* '#' seen as first char on line */
2493 ddefineseen
, /* '#' and 'define' seen */
2494 dignorerest
/* ignore rest of line */
2498 * State machine for Objective C protocols and implementations.
2499 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2503 onone
, /* nothing seen */
2504 oprotocol
, /* @interface or @protocol seen */
2505 oimplementation
, /* @implementations seen */
2506 otagseen
, /* class name seen */
2507 oparenseen
, /* parenthesis before category seen */
2508 ocatseen
, /* category name seen */
2509 oinbody
, /* in @implementation body */
2510 omethodsign
, /* in @implementation body, after +/- */
2511 omethodtag
, /* after method name */
2512 omethodcolon
, /* after method colon */
2513 omethodparm
, /* after method parameter */
2514 oignore
/* wait for @end */
2519 * Use this structure to keep info about the token read, and how it
2520 * should be tagged. Used by the make_C_tag function to build a tag.
2524 char *line
; /* string containing the token */
2525 int offset
; /* where the token starts in LINE */
2526 int length
; /* token length */
2528 The previous members can be used to pass strings around for generic
2529 purposes. The following ones specifically refer to creating tags. In this
2530 case the token contained here is the pattern that will be used to create a
2533 bool valid
; /* do not create a tag; the token should be
2534 invalidated whenever a state machine is
2535 reset prematurely */
2536 bool named
; /* create a named tag */
2537 int lineno
; /* source line number of tag */
2538 long linepos
; /* source char number of tag */
2539 } token
; /* latest token read */
2542 * Variables and functions for dealing with nested structures.
2543 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2545 static void pushclass_above (int, char *, int);
2546 static void popclass_above (int);
2547 static void write_classname (linebuffer
*, const char *qualifier
);
2550 char **cname
; /* nested class names */
2551 int *bracelev
; /* nested class brace level */
2552 int nl
; /* class nesting level (elements used) */
2553 int size
; /* length of the array */
2554 } cstack
; /* stack for nested declaration tags */
2555 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2556 #define nestlev (cstack.nl)
2557 /* After struct keyword or in struct body, not inside a nested function. */
2558 #define instruct (structdef == snone && nestlev > 0 \
2559 && bracelev == cstack.bracelev[nestlev-1] + 1)
2562 pushclass_above (int bracelev
, char *str
, int len
)
2566 popclass_above (bracelev
);
2568 if (nl
>= cstack
.size
)
2570 int size
= cstack
.size
*= 2;
2571 xrnew (cstack
.cname
, size
, char *);
2572 xrnew (cstack
.bracelev
, size
, int);
2574 assert (nl
== 0 || cstack
.bracelev
[nl
-1] < bracelev
);
2575 cstack
.cname
[nl
] = (str
== NULL
) ? NULL
: savenstr (str
, len
);
2576 cstack
.bracelev
[nl
] = bracelev
;
2581 popclass_above (int bracelev
)
2585 for (nl
= cstack
.nl
- 1;
2586 nl
>= 0 && cstack
.bracelev
[nl
] >= bracelev
;
2589 free (cstack
.cname
[nl
]);
2595 write_classname (linebuffer
*cn
, const char *qualifier
)
2598 int qlen
= strlen (qualifier
);
2600 if (cstack
.nl
== 0 || cstack
.cname
[0] == NULL
)
2604 cn
->buffer
[0] = '\0';
2608 len
= strlen (cstack
.cname
[0]);
2609 linebuffer_setlen (cn
, len
);
2610 strcpy (cn
->buffer
, cstack
.cname
[0]);
2612 for (i
= 1; i
< cstack
.nl
; i
++)
2614 char *s
= cstack
.cname
[i
];
2617 linebuffer_setlen (cn
, len
+ qlen
+ strlen (s
));
2618 len
+= sprintf (cn
->buffer
+ len
, "%s%s", qualifier
, s
);
2623 static bool consider_token (char *, int, int, int *, int, int, bool *);
2624 static void make_C_tag (bool);
2628 * checks to see if the current token is at the start of a
2629 * function or variable, or corresponds to a typedef, or
2630 * is a struct/union/enum tag, or #define, or an enum constant.
2632 * *IS_FUNC gets TRUE if the token is a function or #define macro
2633 * with args. C_EXTP points to which language we are looking at.
2644 consider_token (register char *str
, register int len
, register int c
, int *c_extp
, int bracelev
, int parlev
, int *is_func_or_var
)
2645 /* IN: token pointer */
2646 /* IN: token length */
2647 /* IN: first char after the token */
2648 /* IN, OUT: C extensions mask */
2649 /* IN: brace level */
2650 /* IN: parenthesis level */
2651 /* OUT: function or variable found */
2653 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2654 structtype is the type of the preceding struct-like keyword, and
2655 structbracelev is the brace level where it has been seen. */
2656 static enum sym_type structtype
;
2657 static int structbracelev
;
2658 static enum sym_type toktype
;
2661 toktype
= C_symtype (str
, len
, *c_extp
);
2664 * Skip __attribute__
2666 if (toktype
== st_C_attribute
)
2673 * Advance the definedef state machine.
2678 /* We're not on a preprocessor line. */
2679 if (toktype
== st_C_gnumacro
)
2686 if (toktype
== st_C_define
)
2688 definedef
= ddefineseen
;
2692 definedef
= dignorerest
;
2697 * Make a tag for any macro, unless it is a constant
2698 * and constantypedefs is FALSE.
2700 definedef
= dignorerest
;
2701 *is_func_or_var
= (c
== '(');
2702 if (!*is_func_or_var
&& !constantypedefs
)
2709 error ("internal error: definedef value.");
2718 if (toktype
== st_C_typedef
)
2738 if (structdef
== snone
&& fvdef
== fvnone
)
2757 case st_C_javastruct
:
2758 if (structdef
== stagseen
)
2759 structdef
= scolonseen
;
2763 if ((*c_extp
& C_AUTO
) /* automatic detection of C++ language */
2765 && definedef
== dnone
&& structdef
== snone
2766 && typdef
== tnone
&& fvdef
== fvnone
)
2767 *c_extp
= (*c_extp
| C_PLPL
) & ~C_AUTO
;
2768 if (toktype
== st_C_template
)
2775 && (typdef
== tkeyseen
2776 || (typedefs_or_cplusplus
&& structdef
== snone
)))
2778 structdef
= skeyseen
;
2779 structtype
= toktype
;
2780 structbracelev
= bracelev
;
2781 if (fvdef
== fvnameseen
)
2787 if (structdef
== skeyseen
)
2789 structdef
= stagseen
;
2793 if (typdef
!= tnone
)
2796 /* Detect Objective C constructs. */
2806 objdef
= oimplementation
;
2810 case oimplementation
:
2811 /* Save the class tag for functions or variables defined inside. */
2812 objtag
= savenstr (str
, len
);
2816 /* Save the class tag for categories. */
2817 objtag
= savenstr (str
, len
);
2819 *is_func_or_var
= TRUE
;
2823 *is_func_or_var
= TRUE
;
2831 objdef
= omethodtag
;
2832 linebuffer_setlen (&token_name
, len
);
2833 memcpy (token_name
.buffer
, str
, len
);
2834 token_name
.buffer
[len
] = '\0';
2840 objdef
= omethodparm
;
2845 int oldlen
= token_name
.len
;
2847 objdef
= omethodtag
;
2848 linebuffer_setlen (&token_name
, oldlen
+ len
);
2849 memcpy (token_name
.buffer
+ oldlen
, str
, len
);
2850 token_name
.buffer
[oldlen
+ len
] = '\0';
2855 if (toktype
== st_C_objend
)
2857 /* Memory leakage here: the string pointed by objtag is
2858 never released, because many tests would be needed to
2859 avoid breaking on incorrect input code. The amount of
2860 memory leaked here is the sum of the lengths of the
2868 /* A function, variable or enum constant? */
2890 *is_func_or_var
= TRUE
;
2894 && structdef
== snone
2895 && structtype
== st_C_enum
&& bracelev
> structbracelev
)
2896 return TRUE
; /* enum constant */
2902 fvdef
= fdefunname
; /* GNU macro */
2903 *is_func_or_var
= TRUE
;
2911 if ((strneq (str
, "asm", 3) && endtoken (str
[3]))
2912 || (strneq (str
, "__asm__", 7) && endtoken (str
[7])))
2921 if (len
>= 10 && strneq (str
+len
-10, "::operator", 10))
2923 if (*c_extp
& C_AUTO
) /* automatic detection of C++ */
2924 *c_extp
= (*c_extp
| C_PLPL
) & ~C_AUTO
;
2926 *is_func_or_var
= TRUE
;
2929 if (bracelev
> 0 && !instruct
)
2931 fvdef
= fvnameseen
; /* function or variable */
2932 *is_func_or_var
= TRUE
;
2943 * C_entries often keeps pointers to tokens or lines which are older than
2944 * the line currently read. By keeping two line buffers, and switching
2945 * them at end of line, it is possible to use those pointers.
2953 #define current_lb_is_new (newndx == curndx)
2954 #define switch_line_buffers() (curndx = 1 - curndx)
2956 #define curlb (lbs[curndx].lb)
2957 #define newlb (lbs[newndx].lb)
2958 #define curlinepos (lbs[curndx].linepos)
2959 #define newlinepos (lbs[newndx].linepos)
2961 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2962 #define cplpl (c_ext & C_PLPL)
2963 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2965 #define CNL_SAVE_DEFINEDEF() \
2967 curlinepos = charno; \
2968 readline (&curlb, inf); \
2969 lp = curlb.buffer; \
2976 CNL_SAVE_DEFINEDEF(); \
2977 if (savetoken.valid) \
2979 token = savetoken; \
2980 savetoken.valid = FALSE; \
2982 definedef = dnone; \
2987 make_C_tag (int isfun
)
2989 /* This function is never called when token.valid is FALSE, but
2990 we must protect against invalid input or internal errors. */
2992 make_tag (token_name
.buffer
, token_name
.len
, isfun
, token
.line
,
2993 token
.offset
+token
.length
+1, token
.lineno
, token
.linepos
);
2995 { /* this branch is optimized away if !DEBUG */
2996 make_tag (concat ("INVALID TOKEN:-->", token_name
.buffer
, ""),
2997 token_name
.len
+ 17, isfun
, token
.line
,
2998 token
.offset
+token
.length
+1, token
.lineno
, token
.linepos
);
2999 error ("INVALID TOKEN");
3002 token
.valid
= FALSE
;
3008 * This routine finds functions, variables, typedefs,
3009 * #define's, enum constants and struct/union/enum definitions in
3010 * C syntax and adds them to the list.
3013 C_entries (int c_ext
, FILE *inf
)
3014 /* extension of C */
3017 register char c
; /* latest char read; '\0' for end of line */
3018 register char *lp
; /* pointer one beyond the character `c' */
3019 int curndx
, newndx
; /* indices for current and new lb */
3020 register int tokoff
; /* offset in line of start of current token */
3021 register int toklen
; /* length of current token */
3022 const char *qualifier
; /* string used to qualify names */
3023 int qlen
; /* length of qualifier */
3024 int bracelev
; /* current brace level */
3025 int bracketlev
; /* current bracket level */
3026 int parlev
; /* current parenthesis level */
3027 int attrparlev
; /* __attribute__ parenthesis level */
3028 int templatelev
; /* current template level */
3029 int typdefbracelev
; /* bracelev where a typedef struct body begun */
3030 bool incomm
, inquote
, inchar
, quotednl
, midtoken
;
3031 bool yacc_rules
; /* in the rules part of a yacc file */
3032 struct tok savetoken
= {0}; /* token saved during preprocessor handling */
3035 linebuffer_init (&lbs
[0].lb
);
3036 linebuffer_init (&lbs
[1].lb
);
3037 if (cstack
.size
== 0)
3039 cstack
.size
= (DEBUG
) ? 1 : 4;
3041 cstack
.cname
= xnew (cstack
.size
, char *);
3042 cstack
.bracelev
= xnew (cstack
.size
, int);
3045 tokoff
= toklen
= typdefbracelev
= 0; /* keep compiler quiet */
3046 curndx
= newndx
= 0;
3050 fvdef
= fvnone
; fvextern
= FALSE
; typdef
= tnone
;
3051 structdef
= snone
; definedef
= dnone
; objdef
= onone
;
3053 midtoken
= inquote
= inchar
= incomm
= quotednl
= FALSE
;
3054 token
.valid
= savetoken
.valid
= FALSE
;
3055 bracelev
= bracketlev
= parlev
= attrparlev
= templatelev
= 0;
3057 { qualifier
= "."; qlen
= 1; }
3059 { qualifier
= "::"; qlen
= 2; }
3067 /* If we are at the end of the line, the next character is a
3068 '\0'; do not skip it, because it is what tells us
3069 to read the next line. */
3090 /* Newlines inside comments do not end macro definitions in
3092 CNL_SAVE_DEFINEDEF ();
3105 /* Newlines inside strings do not end macro definitions
3106 in traditional cpp, even though compilers don't
3107 usually accept them. */
3108 CNL_SAVE_DEFINEDEF ();
3118 /* Hmmm, something went wrong. */
3154 if (fvdef
!= finlist
&& fvdef
!= fignore
&& fvdef
!= vignore
)
3169 else if (/* cplpl && */ *lp
== '/')
3175 if ((c_ext
& YACC
) && *lp
== '%')
3177 /* Entering or exiting rules section in yacc file. */
3179 definedef
= dnone
; fvdef
= fvnone
; fvextern
= FALSE
;
3180 typdef
= tnone
; structdef
= snone
;
3181 midtoken
= inquote
= inchar
= incomm
= quotednl
= FALSE
;
3183 yacc_rules
= !yacc_rules
;
3189 if (definedef
== dnone
)
3192 bool cpptoken
= TRUE
;
3194 /* Look back on this line. If all blanks, or nonblanks
3195 followed by an end of comment, this is a preprocessor
3197 for (cp
= newlb
.buffer
; cp
< lp
-1; cp
++)
3200 if (*cp
== '*' && cp
[1] == '/')
3209 definedef
= dsharpseen
;
3210 } /* if (definedef == dnone) */
3221 CNL_SAVE_DEFINEDEF ();
3228 /* Consider token only if some involved conditions are satisfied. */
3229 if (typdef
!= tignore
3230 && definedef
!= dignorerest
3233 && (definedef
!= dnone
3234 || structdef
!= scolonseen
)
3241 if (c
== ':' && *lp
== ':' && begtoken (lp
[1]))
3242 /* This handles :: in the middle,
3243 but not at the beginning of an identifier.
3244 Also, space-separated :: is not recognized. */
3246 if (c_ext
& C_AUTO
) /* automatic detection of C++ */
3247 c_ext
= (c_ext
| C_PLPL
) & ~C_AUTO
;
3251 goto still_in_token
;
3255 bool funorvar
= FALSE
;
3258 || consider_token (newlb
.buffer
+ tokoff
, toklen
, c
,
3259 &c_ext
, bracelev
, parlev
,
3262 if (fvdef
== foperator
)
3265 lp
= skip_spaces (lp
-1);
3269 && !iswhite (*lp
) && *lp
!= '(')
3272 toklen
+= lp
- oldlp
;
3274 token
.named
= FALSE
;
3276 && nestlev
> 0 && definedef
== dnone
)
3277 /* in struct body */
3280 write_classname (&token_name
, qualifier
);
3281 len
= token_name
.len
;
3282 linebuffer_setlen (&token_name
, len
+qlen
+toklen
);
3283 sprintf (token_name
.buffer
+ len
, "%s%.*s",
3284 qualifier
, toklen
, newlb
.buffer
+ tokoff
);
3287 else if (objdef
== ocatseen
)
3288 /* Objective C category */
3290 int len
= strlen (objtag
) + 2 + toklen
;
3291 linebuffer_setlen (&token_name
, len
);
3292 sprintf (token_name
.buffer
, "%s(%.*s)",
3293 objtag
, toklen
, newlb
.buffer
+ tokoff
);
3296 else if (objdef
== omethodtag
3297 || objdef
== omethodparm
)
3298 /* Objective C method */
3302 else if (fvdef
== fdefunname
)
3303 /* GNU DEFUN and similar macros */
3305 bool defun
= (newlb
.buffer
[tokoff
] == 'F');
3309 /* Rewrite the tag so that emacs lisp DEFUNs
3310 can be found by their elisp name */
3316 linebuffer_setlen (&token_name
, len
);
3317 memcpy (token_name
.buffer
,
3318 newlb
.buffer
+ off
, len
);
3319 token_name
.buffer
[len
] = '\0';
3322 if (token_name
.buffer
[len
] == '_')
3323 token_name
.buffer
[len
] = '-';
3324 token
.named
= defun
;
3328 linebuffer_setlen (&token_name
, toklen
);
3329 memcpy (token_name
.buffer
,
3330 newlb
.buffer
+ tokoff
, toklen
);
3331 token_name
.buffer
[toklen
] = '\0';
3332 /* Name macros and members. */
3333 token
.named
= (structdef
== stagseen
3334 || typdef
== ttypeseen
3337 && definedef
== dignorerest
)
3339 && definedef
== dnone
3340 && structdef
== snone
3343 token
.lineno
= lineno
;
3344 token
.offset
= tokoff
;
3345 token
.length
= toklen
;
3346 token
.line
= newlb
.buffer
;
3347 token
.linepos
= newlinepos
;
3350 if (definedef
== dnone
3351 && (fvdef
== fvnameseen
3352 || fvdef
== foperator
3353 || structdef
== stagseen
3355 || typdef
== ttypeseen
3356 || objdef
!= onone
))
3358 if (current_lb_is_new
)
3359 switch_line_buffers ();
3361 else if (definedef
!= dnone
3362 || fvdef
== fdefunname
3364 make_C_tag (funorvar
);
3366 else /* not yacc and consider_token failed */
3368 if (inattribute
&& fvdef
== fignore
)
3370 /* We have just met __attribute__ after a
3371 function parameter list: do not tag the
3378 } /* if (endtoken (c)) */
3379 else if (intoken (c
))
3385 } /* if (midtoken) */
3386 else if (begtoken (c
))
3394 /* This prevents tagging fb in
3395 void (__attribute__((noreturn)) *fb) (void);
3396 Fixing this is not easy and not very important. */
3400 if (plainc
|| declarations
)
3402 make_C_tag (TRUE
); /* a function */
3407 if (structdef
== stagseen
&& !cjava
)
3409 popclass_above (bracelev
);
3417 if (!yacc_rules
|| lp
== newlb
.buffer
+ 1)
3419 tokoff
= lp
- 1 - newlb
.buffer
;
3424 } /* if (begtoken) */
3425 } /* if must look at token */
3428 /* Detect end of line, colon, comma, semicolon and various braces
3429 after having handled a token.*/
3435 if (yacc_rules
&& token
.offset
== 0 && token
.valid
)
3437 make_C_tag (FALSE
); /* a yacc function */
3440 if (definedef
!= dnone
)
3446 make_C_tag (TRUE
); /* an Objective C class */
3450 objdef
= omethodcolon
;
3451 linebuffer_setlen (&token_name
, token_name
.len
+ 1);
3452 strcat (token_name
.buffer
, ":");
3455 if (structdef
== stagseen
)
3457 structdef
= scolonseen
;
3460 /* Should be useless, but may be work as a safety net. */
3461 if (cplpl
&& fvdef
== flistseen
)
3463 make_C_tag (TRUE
); /* a function */
3469 if (definedef
!= dnone
|| inattribute
)
3475 make_C_tag (FALSE
); /* a typedef */
3485 if (typdef
== tignore
|| cplpl
)
3489 if ((globals
&& bracelev
== 0 && (!fvextern
|| declarations
))
3490 || (members
&& instruct
))
3491 make_C_tag (FALSE
); /* a variable */
3494 token
.valid
= FALSE
;
3498 && (cplpl
|| !instruct
)
3499 && (typdef
== tnone
|| (typdef
!= tignore
&& instruct
)))
3501 && plainc
&& instruct
))
3502 make_C_tag (TRUE
); /* a function */
3508 && cplpl
&& structdef
== stagseen
)
3509 make_C_tag (FALSE
); /* forward declaration */
3511 token
.valid
= FALSE
;
3512 } /* switch (fvdef) */
3518 if (structdef
== stagseen
)
3522 if (definedef
!= dnone
|| inattribute
)
3528 make_C_tag (TRUE
); /* an Objective C method */
3549 && (!fvextern
|| declarations
))
3550 || (members
&& instruct
)))
3551 make_C_tag (FALSE
); /* a variable */
3554 if ((declarations
&& typdef
== tnone
&& !instruct
)
3555 || (members
&& typdef
!= tignore
&& instruct
))
3557 make_C_tag (TRUE
); /* a function */
3560 else if (!declarations
)
3562 token
.valid
= FALSE
;
3567 if (structdef
== stagseen
)
3571 if (definedef
!= dnone
|| inattribute
)
3573 if (structdef
== stagseen
)
3580 make_C_tag (FALSE
); /* a typedef */
3592 if ((members
&& bracelev
== 1)
3593 || (globals
&& bracelev
== 0
3594 && (!fvextern
|| declarations
)))
3595 make_C_tag (FALSE
); /* a variable */
3609 if (definedef
!= dnone
)
3611 if (objdef
== otagseen
&& parlev
== 0)
3612 objdef
= oparenseen
;
3616 if (typdef
== ttypeseen
3620 /* This handles constructs like:
3621 typedef void OperatorFun (int fun); */
3640 if (--attrparlev
== 0)
3641 inattribute
= FALSE
;
3644 if (definedef
!= dnone
)
3646 if (objdef
== ocatseen
&& parlev
== 1)
3648 make_C_tag (TRUE
); /* an Objective C category */
3662 || typdef
== ttypeseen
))
3665 make_C_tag (FALSE
); /* a typedef */
3668 else if (parlev
< 0) /* can happen due to ill-conceived #if's. */
3672 if (definedef
!= dnone
)
3674 if (typdef
== ttypeseen
)
3676 /* Whenever typdef is set to tinbody (currently only
3677 here), typdefbracelev should be set to bracelev. */
3679 typdefbracelev
= bracelev
;
3684 make_C_tag (TRUE
); /* a function */
3693 make_C_tag (TRUE
); /* an Objective C class */
3698 make_C_tag (TRUE
); /* an Objective C method */
3702 /* Neutralize `extern "C" {' grot. */
3703 if (bracelev
== 0 && structdef
== snone
&& nestlev
== 0
3711 case skeyseen
: /* unnamed struct */
3712 pushclass_above (bracelev
, NULL
, 0);
3715 case stagseen
: /* named struct or enum */
3716 case scolonseen
: /* a class */
3717 pushclass_above (bracelev
,token
.line
+token
.offset
, token
.length
);
3719 make_C_tag (FALSE
); /* a struct or enum */
3725 if (definedef
!= dnone
)
3727 if (fvdef
== fstartlist
)
3729 fvdef
= fvnone
; /* avoid tagging `foo' in `foo (*bar()) ()' */
3730 token
.valid
= FALSE
;
3734 if (definedef
!= dnone
)
3737 if (!ignoreindent
&& lp
== newlb
.buffer
+ 1)
3740 token
.valid
= FALSE
; /* unexpected value, token unreliable */
3741 bracelev
= 0; /* reset brace level if first column */
3742 parlev
= 0; /* also reset paren level, just in case... */
3744 else if (bracelev
< 0)
3746 token
.valid
= FALSE
; /* something gone amiss, token unreliable */
3749 if (bracelev
== 0 && fvdef
== vignore
)
3750 fvdef
= fvnone
; /* end of function */
3751 popclass_above (bracelev
);
3753 /* Only if typdef == tinbody is typdefbracelev significant. */
3754 if (typdef
== tinbody
&& bracelev
<= typdefbracelev
)
3756 assert (bracelev
== typdefbracelev
);
3761 if (definedef
!= dnone
)
3771 if ((members
&& bracelev
== 1)
3772 || (globals
&& bracelev
== 0 && (!fvextern
|| declarations
)))
3773 make_C_tag (FALSE
); /* a variable */
3781 && (structdef
== stagseen
|| fvdef
== fvnameseen
))
3788 if (templatelev
> 0)
3796 if (objdef
== oinbody
&& bracelev
== 0)
3798 objdef
= omethodsign
;
3803 case '#': case '~': case '&': case '%': case '/':
3804 case '|': case '^': case '!': case '.': case '?':
3805 if (definedef
!= dnone
)
3807 /* These surely cannot follow a function tag in C. */
3820 if (objdef
== otagseen
)
3822 make_C_tag (TRUE
); /* an Objective C class */
3825 /* If a macro spans multiple lines don't reset its state. */
3827 CNL_SAVE_DEFINEDEF ();
3833 } /* while not eof */
3835 free (lbs
[0].lb
.buffer
);
3836 free (lbs
[1].lb
.buffer
);
3840 * Process either a C++ file or a C file depending on the setting
3844 default_C_entries (FILE *inf
)
3846 C_entries (cplusplus
? C_PLPL
: C_AUTO
, inf
);
3849 /* Always do plain C. */
3851 plain_C_entries (FILE *inf
)
3856 /* Always do C++. */
3858 Cplusplus_entries (FILE *inf
)
3860 C_entries (C_PLPL
, inf
);
3863 /* Always do Java. */
3865 Cjava_entries (FILE *inf
)
3867 C_entries (C_JAVA
, inf
);
3872 Cstar_entries (FILE *inf
)
3874 C_entries (C_STAR
, inf
);
3877 /* Always do Yacc. */
3879 Yacc_entries (FILE *inf
)
3881 C_entries (YACC
, inf
);
3885 /* Useful macros. */
3886 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3887 for (; /* loop initialization */ \
3888 !feof (file_pointer) /* loop test */ \
3889 && /* instructions at start of loop */ \
3890 (readline (&line_buffer, file_pointer), \
3891 char_pointer = line_buffer.buffer, \
3895 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
3896 ((assert ("" kw), TRUE) /* syntax error if not a literal string */ \
3897 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3898 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
3899 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3901 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3902 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3903 ((assert ("" kw), TRUE) /* syntax error if not a literal string */ \
3904 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3905 && ((cp) += sizeof (kw)-1)) /* skip spaces */
3908 * Read a file, but do no processing. This is used to do regexp
3909 * matching on files that have no language defined.
3912 just_read_file (FILE *inf
)
3915 readline (&lb
, inf
);
3919 /* Fortran parsing */
3921 static void F_takeprec (void);
3922 static void F_getit (FILE *);
3927 dbp
= skip_spaces (dbp
);
3931 dbp
= skip_spaces (dbp
);
3932 if (strneq (dbp
, "(*)", 3))
3937 if (!ISDIGIT (*dbp
))
3939 --dbp
; /* force failure */
3944 while (ISDIGIT (*dbp
));
3952 dbp
= skip_spaces (dbp
);
3955 readline (&lb
, inf
);
3960 dbp
= skip_spaces (dbp
);
3962 if (!ISALPHA (*dbp
) && *dbp
!= '_' && *dbp
!= '$')
3964 for (cp
= dbp
+ 1; *cp
!= '\0' && intoken (*cp
); cp
++)
3966 make_tag (dbp
, cp
-dbp
, TRUE
,
3967 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
3972 Fortran_functions (FILE *inf
)
3974 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
3977 dbp
++; /* Ratfor escape to fortran */
3978 dbp
= skip_spaces (dbp
);
3982 if (LOOKING_AT_NOCASE (dbp
, "recursive"))
3983 dbp
= skip_spaces (dbp
);
3985 if (LOOKING_AT_NOCASE (dbp
, "pure"))
3986 dbp
= skip_spaces (dbp
);
3988 if (LOOKING_AT_NOCASE (dbp
, "elemental"))
3989 dbp
= skip_spaces (dbp
);
3991 switch (lowcase (*dbp
))
3994 if (nocase_tail ("integer"))
3998 if (nocase_tail ("real"))
4002 if (nocase_tail ("logical"))
4006 if (nocase_tail ("complex") || nocase_tail ("character"))
4010 if (nocase_tail ("double"))
4012 dbp
= skip_spaces (dbp
);
4015 if (nocase_tail ("precision"))
4021 dbp
= skip_spaces (dbp
);
4024 switch (lowcase (*dbp
))
4027 if (nocase_tail ("function"))
4031 if (nocase_tail ("subroutine"))
4035 if (nocase_tail ("entry"))
4039 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4041 dbp
= skip_spaces (dbp
);
4042 if (*dbp
== '\0') /* assume un-named */
4043 make_tag ("blockdata", 9, TRUE
,
4044 lb
.buffer
, dbp
- lb
.buffer
, lineno
, linecharno
);
4046 F_getit (inf
); /* look for name */
4057 * Philippe Waroquiers (1998)
4060 /* Once we are positioned after an "interesting" keyword, let's get
4061 the real tag value necessary. */
4063 Ada_getit (FILE *inf
, const char *name_qualifier
)
4071 dbp
= skip_spaces (dbp
);
4073 || (dbp
[0] == '-' && dbp
[1] == '-'))
4075 readline (&lb
, inf
);
4078 switch (lowcase (*dbp
))
4081 if (nocase_tail ("body"))
4083 /* Skipping body of procedure body or package body or ....
4084 resetting qualifier to body instead of spec. */
4085 name_qualifier
= "/b";
4090 /* Skipping type of task type or protected type ... */
4091 if (nocase_tail ("type"))
4098 for (cp
= dbp
; *cp
!= '\0' && *cp
!= '"'; cp
++)
4103 dbp
= skip_spaces (dbp
);
4106 && (ISALPHA (*cp
) || ISDIGIT (*cp
) || *cp
== '_' || *cp
== '.'));
4114 name
= concat (dbp
, name_qualifier
, "");
4116 make_tag (name
, strlen (name
), TRUE
,
4117 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4126 Ada_funcs (FILE *inf
)
4128 bool inquote
= FALSE
;
4129 bool skip_till_semicolumn
= FALSE
;
4131 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
4133 while (*dbp
!= '\0')
4135 /* Skip a string i.e. "abcd". */
4136 if (inquote
|| (*dbp
== '"'))
4138 dbp
= etags_strchr (dbp
+ !inquote
, '"');
4143 continue; /* advance char */
4148 break; /* advance line */
4152 /* Skip comments. */
4153 if (dbp
[0] == '-' && dbp
[1] == '-')
4154 break; /* advance line */
4156 /* Skip character enclosed in single quote i.e. 'a'
4157 and skip single quote starting an attribute i.e. 'Image. */
4166 if (skip_till_semicolumn
)
4169 skip_till_semicolumn
= FALSE
;
4171 continue; /* advance char */
4174 /* Search for beginning of a token. */
4175 if (!begtoken (*dbp
))
4178 continue; /* advance char */
4181 /* We are at the beginning of a token. */
4182 switch (lowcase (*dbp
))
4185 if (!packages_only
&& nocase_tail ("function"))
4186 Ada_getit (inf
, "/f");
4188 break; /* from switch */
4189 continue; /* advance char */
4191 if (!packages_only
&& nocase_tail ("procedure"))
4192 Ada_getit (inf
, "/p");
4193 else if (nocase_tail ("package"))
4194 Ada_getit (inf
, "/s");
4195 else if (nocase_tail ("protected")) /* protected type */
4196 Ada_getit (inf
, "/t");
4198 break; /* from switch */
4199 continue; /* advance char */
4202 if (typedefs
&& !packages_only
&& nocase_tail ("use"))
4204 /* when tagging types, avoid tagging use type Pack.Typename;
4205 for this, we will skip everything till a ; */
4206 skip_till_semicolumn
= TRUE
;
4207 continue; /* advance char */
4211 if (!packages_only
&& nocase_tail ("task"))
4212 Ada_getit (inf
, "/k");
4213 else if (typedefs
&& !packages_only
&& nocase_tail ("type"))
4215 Ada_getit (inf
, "/t");
4216 while (*dbp
!= '\0')
4220 break; /* from switch */
4221 continue; /* advance char */
4224 /* Look for the end of the token. */
4225 while (!endtoken (*dbp
))
4228 } /* advance char */
4229 } /* advance line */
4234 * Unix and microcontroller assembly tag handling
4235 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4236 * Idea by Bob Weiner, Motorola Inc. (1994)
4239 Asm_labels (FILE *inf
)
4243 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4245 /* If first char is alphabetic or one of [_.$], test for colon
4246 following identifier. */
4247 if (ISALPHA (*cp
) || *cp
== '_' || *cp
== '.' || *cp
== '$')
4249 /* Read past label. */
4251 while (ISALNUM (*cp
) || *cp
== '_' || *cp
== '.' || *cp
== '$')
4253 if (*cp
== ':' || iswhite (*cp
))
4254 /* Found end of label, so copy it and add it to the table. */
4255 make_tag (lb
.buffer
, cp
- lb
.buffer
, TRUE
,
4256 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4264 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4265 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4266 * Perl variable names: /^(my|local).../
4267 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4268 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4269 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4272 Perl_functions (FILE *inf
)
4274 char *package
= savestr ("main"); /* current package name */
4277 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4279 cp
= skip_spaces (cp
);
4281 if (LOOKING_AT (cp
, "package"))
4284 get_tag (cp
, &package
);
4286 else if (LOOKING_AT (cp
, "sub"))
4292 while (!notinname (*cp
))
4295 continue; /* nothing found */
4296 if ((pos
= etags_strchr (sp
, ':')) != NULL
4297 && pos
< cp
&& pos
[1] == ':')
4298 /* The name is already qualified. */
4299 make_tag (sp
, cp
- sp
, TRUE
,
4300 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4304 char savechar
, *name
;
4308 name
= concat (package
, "::", sp
);
4310 make_tag (name
, strlen (name
), TRUE
,
4311 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4315 else if (LOOKING_AT (cp
, "use constant")
4316 || LOOKING_AT (cp
, "use constant::defer"))
4318 /* For hash style multi-constant like
4319 use constant { FOO => 123,
4321 only the first FOO is picked up. Parsing across the value
4322 expressions would be difficult in general, due to possible nested
4323 hashes, here-documents, etc. */
4325 cp
= skip_spaces (cp
+1);
4328 else if (globals
) /* only if we are tagging global vars */
4330 /* Skip a qualifier, if any. */
4331 bool qual
= LOOKING_AT (cp
, "my") || LOOKING_AT (cp
, "local");
4332 /* After "my" or "local", but before any following paren or space. */
4333 char *varstart
= cp
;
4335 if (qual
/* should this be removed? If yes, how? */
4336 && (*cp
== '$' || *cp
== '@' || *cp
== '%'))
4341 while (ISALNUM (*cp
) || *cp
== '_');
4345 /* Should be examining a variable list at this point;
4346 could insist on seeing an open parenthesis. */
4347 while (*cp
!= '\0' && *cp
!= ';' && *cp
!= '=' && *cp
!= ')')
4353 make_tag (varstart
, cp
- varstart
, FALSE
,
4354 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4363 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4364 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4365 * More ideas by seb bacon <seb@jamkit.com> (2002)
4368 Python_functions (FILE *inf
)
4372 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4374 cp
= skip_spaces (cp
);
4375 if (LOOKING_AT (cp
, "def") || LOOKING_AT (cp
, "class"))
4378 while (!notinname (*cp
) && *cp
!= ':')
4380 make_tag (name
, cp
- name
, TRUE
,
4381 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4390 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4391 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4392 * - /^[ \t]*define\(\"[^\"]+/
4393 * Only with --members:
4394 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4395 * Idea by Diez B. Roggisch (2001)
4398 PHP_functions (FILE *inf
)
4400 register char *cp
, *name
;
4401 bool search_identifier
= FALSE
;
4403 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4405 cp
= skip_spaces (cp
);
4407 if (search_identifier
4410 while (!notinname (*cp
))
4412 make_tag (name
, cp
- name
, TRUE
,
4413 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4414 search_identifier
= FALSE
;
4416 else if (LOOKING_AT (cp
, "function"))
4419 cp
= skip_spaces (cp
+1);
4423 while (!notinname (*cp
))
4425 make_tag (name
, cp
- name
, TRUE
,
4426 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4429 search_identifier
= TRUE
;
4431 else if (LOOKING_AT (cp
, "class"))
4436 while (*cp
!= '\0' && !iswhite (*cp
))
4438 make_tag (name
, cp
- name
, FALSE
,
4439 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4442 search_identifier
= TRUE
;
4444 else if (strneq (cp
, "define", 6)
4445 && (cp
= skip_spaces (cp
+6))
4447 && (*cp
== '"' || *cp
== '\''))
4451 while (*cp
!= quote
&& *cp
!= '\0')
4453 make_tag (name
, cp
- name
, FALSE
,
4454 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4457 && LOOKING_AT (cp
, "var")
4461 while (!notinname (*cp
))
4463 make_tag (name
, cp
- name
, FALSE
,
4464 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
4471 * Cobol tag functions
4472 * We could look for anything that could be a paragraph name.
4473 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4474 * Idea by Corny de Souza (1993)
4477 Cobol_paragraphs (FILE *inf
)
4479 register char *bp
, *ep
;
4481 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4487 /* If eoln, compiler option or comment ignore whole line. */
4488 if (bp
[-1] != ' ' || !ISALNUM (bp
[0]))
4491 for (ep
= bp
; ISALNUM (*ep
) || *ep
== '-'; ep
++)
4494 make_tag (bp
, ep
- bp
, TRUE
,
4495 lb
.buffer
, ep
- lb
.buffer
+ 1, lineno
, linecharno
);
4502 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4505 Makefile_targets (FILE *inf
)
4509 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4511 if (*bp
== '\t' || *bp
== '#')
4513 while (*bp
!= '\0' && *bp
!= '=' && *bp
!= ':')
4515 if (*bp
== ':' || (globals
&& *bp
== '='))
4517 /* We should detect if there is more than one tag, but we do not.
4518 We just skip initial and final spaces. */
4519 char * namestart
= skip_spaces (lb
.buffer
);
4520 while (--bp
> namestart
)
4521 if (!notinname (*bp
))
4523 make_tag (namestart
, bp
- namestart
+ 1, TRUE
,
4524 lb
.buffer
, bp
- lb
.buffer
+ 2, lineno
, linecharno
);
4532 * Original code by Mosur K. Mohan (1989)
4534 * Locates tags for procedures & functions. Doesn't do any type- or
4535 * var-definitions. It does look for the keyword "extern" or
4536 * "forward" immediately following the procedure statement; if found,
4537 * the tag is skipped.
4540 Pascal_functions (FILE *inf
)
4542 linebuffer tline
; /* mostly copied from C_entries */
4544 int save_lineno
, namelen
, taglen
;
4547 bool /* each of these flags is TRUE if: */
4548 incomment
, /* point is inside a comment */
4549 inquote
, /* point is inside '..' string */
4550 get_tagname
, /* point is after PROCEDURE/FUNCTION
4551 keyword, so next item = potential tag */
4552 found_tag
, /* point is after a potential tag */
4553 inparms
, /* point is within parameter-list */
4554 verify_tag
; /* point has passed the parm-list, so the
4555 next token will determine whether this
4556 is a FORWARD/EXTERN to be ignored, or
4557 whether it is a real tag */
4559 save_lcno
= save_lineno
= namelen
= taglen
= 0; /* keep compiler quiet */
4560 name
= NULL
; /* keep compiler quiet */
4563 linebuffer_init (&tline
);
4565 incomment
= inquote
= FALSE
;
4566 found_tag
= FALSE
; /* have a proc name; check if extern */
4567 get_tagname
= FALSE
; /* found "procedure" keyword */
4568 inparms
= FALSE
; /* found '(' after "proc" */
4569 verify_tag
= FALSE
; /* check if "extern" is ahead */
4572 while (!feof (inf
)) /* long main loop to get next char */
4575 if (c
== '\0') /* if end of line */
4577 readline (&lb
, inf
);
4581 if (!((found_tag
&& verify_tag
)
4583 c
= *dbp
++; /* only if don't need *dbp pointing
4584 to the beginning of the name of
4585 the procedure or function */
4589 if (c
== '}') /* within { } comments */
4591 else if (c
== '*' && *dbp
== ')') /* within (* *) comments */
4608 inquote
= TRUE
; /* found first quote */
4610 case '{': /* found open { comment */
4614 if (*dbp
== '*') /* found open (* comment */
4619 else if (found_tag
) /* found '(' after tag, i.e., parm-list */
4622 case ')': /* end of parms list */
4627 if (found_tag
&& !inparms
) /* end of proc or fn stmt */
4634 if (found_tag
&& verify_tag
&& (*dbp
!= ' '))
4636 /* Check if this is an "extern" declaration. */
4639 if (lowcase (*dbp
) == 'e')
4641 if (nocase_tail ("extern")) /* superfluous, really! */
4647 else if (lowcase (*dbp
) == 'f')
4649 if (nocase_tail ("forward")) /* check for forward reference */
4655 if (found_tag
&& verify_tag
) /* not external proc, so make tag */
4659 make_tag (name
, namelen
, TRUE
,
4660 tline
.buffer
, taglen
, save_lineno
, save_lcno
);
4664 if (get_tagname
) /* grab name of proc or fn */
4671 /* Find block name. */
4672 for (cp
= dbp
+ 1; *cp
!= '\0' && !endtoken (*cp
); cp
++)
4675 /* Save all values for later tagging. */
4676 linebuffer_setlen (&tline
, lb
.len
);
4677 strcpy (tline
.buffer
, lb
.buffer
);
4678 save_lineno
= lineno
;
4679 save_lcno
= linecharno
;
4680 name
= tline
.buffer
+ (dbp
- lb
.buffer
);
4682 taglen
= cp
- lb
.buffer
+ 1;
4684 dbp
= cp
; /* set dbp to e-o-token */
4685 get_tagname
= FALSE
;
4689 /* And proceed to check for "extern". */
4691 else if (!incomment
&& !inquote
&& !found_tag
)
4693 /* Check for proc/fn keywords. */
4694 switch (lowcase (c
))
4697 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4701 if (nocase_tail ("unction"))
4706 } /* while not eof */
4708 free (tline
.buffer
);
4713 * Lisp tag functions
4714 * look for (def or (DEF, quote or QUOTE
4717 static void L_getit (void);
4722 if (*dbp
== '\'') /* Skip prefix quote */
4724 else if (*dbp
== '(')
4727 /* Try to skip "(quote " */
4728 if (!LOOKING_AT (dbp
, "quote") && !LOOKING_AT (dbp
, "QUOTE"))
4729 /* Ok, then skip "(" before name in (defstruct (foo)) */
4730 dbp
= skip_spaces (dbp
);
4732 get_tag (dbp
, NULL
);
4736 Lisp_functions (FILE *inf
)
4738 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
4743 /* "(defvar foo)" is a declaration rather than a definition. */
4747 if (LOOKING_AT (p
, "defvar"))
4749 p
= skip_name (p
); /* past var name */
4750 p
= skip_spaces (p
);
4756 if (strneq (dbp
+1, "def", 3) || strneq (dbp
+1, "DEF", 3))
4758 dbp
= skip_non_spaces (dbp
);
4759 dbp
= skip_spaces (dbp
);
4764 /* Check for (foo::defmumble name-defined ... */
4767 while (!notinname (*dbp
) && *dbp
!= ':');
4772 while (*dbp
== ':');
4774 if (strneq (dbp
, "def", 3) || strneq (dbp
, "DEF", 3))
4776 dbp
= skip_non_spaces (dbp
);
4777 dbp
= skip_spaces (dbp
);
4787 * Lua script language parsing
4788 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4790 * "function" and "local function" are tags if they start at column 1.
4793 Lua_functions (FILE *inf
)
4797 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4799 if (bp
[0] != 'f' && bp
[0] != 'l')
4802 (void)LOOKING_AT (bp
, "local"); /* skip possible "local" */
4804 if (LOOKING_AT (bp
, "function"))
4812 * Just look for lines where the first character is '/'
4813 * Also look at "defineps" for PSWrap
4815 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4816 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4819 PS_functions (FILE *inf
)
4821 register char *bp
, *ep
;
4823 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4828 *ep
!= '\0' && *ep
!= ' ' && *ep
!= '{';
4831 make_tag (bp
, ep
- bp
, TRUE
,
4832 lb
.buffer
, ep
- lb
.buffer
+ 1, lineno
, linecharno
);
4834 else if (LOOKING_AT (bp
, "defineps"))
4842 * Ignore anything after \ followed by space or in ( )
4843 * Look for words defined by :
4844 * Look for constant, code, create, defer, value, and variable
4845 * OBP extensions: Look for buffer:, field,
4846 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4849 Forth_words (FILE *inf
)
4853 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4854 while ((bp
= skip_spaces (bp
))[0] != '\0')
4855 if (bp
[0] == '\\' && iswhite (bp
[1]))
4856 break; /* read next line */
4857 else if (bp
[0] == '(' && iswhite (bp
[1]))
4858 do /* skip to ) or eol */
4860 while (*bp
!= ')' && *bp
!= '\0');
4861 else if ((bp
[0] == ':' && iswhite (bp
[1]) && bp
++)
4862 || LOOKING_AT_NOCASE (bp
, "constant")
4863 || LOOKING_AT_NOCASE (bp
, "code")
4864 || LOOKING_AT_NOCASE (bp
, "create")
4865 || LOOKING_AT_NOCASE (bp
, "defer")
4866 || LOOKING_AT_NOCASE (bp
, "value")
4867 || LOOKING_AT_NOCASE (bp
, "variable")
4868 || LOOKING_AT_NOCASE (bp
, "buffer:")
4869 || LOOKING_AT_NOCASE (bp
, "field"))
4870 get_tag (skip_spaces (bp
), NULL
); /* Yay! A definition! */
4872 bp
= skip_non_spaces (bp
);
4877 * Scheme tag functions
4878 * look for (def... xyzzy
4880 * (def ... ((...(xyzzy ....
4882 * Original code by Ken Haase (1985?)
4885 Scheme_functions (FILE *inf
)
4889 LOOP_ON_INPUT_LINES (inf
, lb
, bp
)
4891 if (strneq (bp
, "(def", 4) || strneq (bp
, "(DEF", 4))
4893 bp
= skip_non_spaces (bp
+4);
4894 /* Skip over open parens and white space. Don't continue past
4896 while (*bp
&& notinname (*bp
))
4900 if (LOOKING_AT (bp
, "(SET!") || LOOKING_AT (bp
, "(set!"))
4906 /* Find tags in TeX and LaTeX input files. */
4908 /* TEX_toktab is a table of TeX control sequences that define tags.
4909 * Each entry records one such control sequence.
4911 * Original code from who knows whom.
4913 * Stefan Monnier (2002)
4916 static linebuffer
*TEX_toktab
= NULL
; /* Table with tag tokens */
4918 /* Default set of control sequences to put into TEX_toktab.
4919 The value of environment var TEXTAGS is prepended to this. */
4920 static const char *TEX_defenv
= "\
4921 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4922 :part:appendix:entry:index:def\
4923 :newcommand:renewcommand:newenvironment:renewenvironment";
4925 static void TEX_mode (FILE *);
4926 static void TEX_decode_env (const char *, const char *);
4928 static char TEX_esc
= '\\';
4929 static char TEX_opgrp
= '{';
4930 static char TEX_clgrp
= '}';
4933 * TeX/LaTeX scanning loop.
4936 TeX_commands (FILE *inf
)
4941 /* Select either \ or ! as escape character. */
4944 /* Initialize token table once from environment. */
4945 if (TEX_toktab
== NULL
)
4946 TEX_decode_env ("TEXTAGS", TEX_defenv
);
4948 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
4950 /* Look at each TEX keyword in line. */
4953 /* Look for a TEX escape. */
4954 while (*cp
++ != TEX_esc
)
4955 if (cp
[-1] == '\0' || cp
[-1] == '%')
4958 for (key
= TEX_toktab
; key
->buffer
!= NULL
; key
++)
4959 if (strneq (cp
, key
->buffer
, key
->len
))
4962 int namelen
, linelen
;
4965 cp
= skip_spaces (cp
+ key
->len
);
4966 if (*cp
== TEX_opgrp
)
4972 (!iswhite (*p
) && *p
!= '#' &&
4973 *p
!= TEX_opgrp
&& *p
!= TEX_clgrp
);
4978 if (!opgrp
|| *p
== TEX_clgrp
)
4980 while (*p
!= '\0' && *p
!= TEX_opgrp
&& *p
!= TEX_clgrp
)
4982 linelen
= p
- lb
.buffer
+ 1;
4984 make_tag (cp
, namelen
, TRUE
,
4985 lb
.buffer
, linelen
, lineno
, linecharno
);
4986 goto tex_next_line
; /* We only tag a line once */
4994 #define TEX_LESC '\\'
4995 #define TEX_SESC '!'
4997 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4998 chars accordingly. */
5000 TEX_mode (FILE *inf
)
5004 while ((c
= getc (inf
)) != EOF
)
5006 /* Skip to next line if we hit the TeX comment char. */
5008 while (c
!= '\n' && c
!= EOF
)
5010 else if (c
== TEX_LESC
|| c
== TEX_SESC
)
5026 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5027 No attempt is made to correct the situation. */
5031 /* Read environment and prepend it to the default string.
5032 Build token table. */
5034 TEX_decode_env (const char *evarname
, const char *defenv
)
5036 register const char *env
, *p
;
5039 /* Append default string to environment. */
5040 env
= getenv (evarname
);
5044 env
= concat (env
, defenv
, "");
5046 /* Allocate a token table */
5047 for (len
= 1, p
= env
; p
;)
5048 if ((p
= etags_strchr (p
, ':')) && *++p
!= '\0')
5050 TEX_toktab
= xnew (len
, linebuffer
);
5052 /* Unpack environment string into token table. Be careful about */
5053 /* zero-length strings (leading ':', "::" and trailing ':') */
5054 for (i
= 0; *env
!= '\0';)
5056 p
= etags_strchr (env
, ':');
5057 if (!p
) /* End of environment string. */
5058 p
= env
+ strlen (env
);
5060 { /* Only non-zero strings. */
5061 TEX_toktab
[i
].buffer
= savenstr (env
, p
- env
);
5062 TEX_toktab
[i
].len
= p
- env
;
5069 TEX_toktab
[i
].buffer
= NULL
; /* Mark end of table. */
5070 TEX_toktab
[i
].len
= 0;
5077 /* Texinfo support. Dave Love, Mar. 2000. */
5079 Texinfo_nodes (FILE *inf
)
5082 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5083 if (LOOKING_AT (cp
, "@node"))
5086 while (*cp
!= '\0' && *cp
!= ',')
5088 make_tag (start
, cp
- start
, TRUE
,
5089 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5096 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5097 * Contents of <a name=xxx> are tags with name xxx.
5099 * Francesco Potortì, 2002.
5102 HTML_labels (FILE *inf
)
5104 bool getnext
= FALSE
; /* next text outside of HTML tags is a tag */
5105 bool skiptag
= FALSE
; /* skip to the end of the current HTML tag */
5106 bool intag
= FALSE
; /* inside an html tag, looking for ID= */
5107 bool inanchor
= FALSE
; /* when INTAG, is an anchor, look for NAME= */
5111 linebuffer_setlen (&token_name
, 0); /* no name in buffer */
5113 LOOP_ON_INPUT_LINES (inf
, lb
, dbp
)
5114 for (;;) /* loop on the same line */
5116 if (skiptag
) /* skip HTML tag */
5118 while (*dbp
!= '\0' && *dbp
!= '>')
5124 continue; /* look on the same line */
5126 break; /* go to next line */
5129 else if (intag
) /* look for "name=" or "id=" */
5131 while (*dbp
!= '\0' && *dbp
!= '>'
5132 && lowcase (*dbp
) != 'n' && lowcase (*dbp
) != 'i')
5135 break; /* go to next line */
5140 continue; /* look on the same line */
5142 if ((inanchor
&& LOOKING_AT_NOCASE (dbp
, "name="))
5143 || LOOKING_AT_NOCASE (dbp
, "id="))
5145 bool quoted
= (dbp
[0] == '"');
5148 for (end
= ++dbp
; *end
!= '\0' && *end
!= '"'; end
++)
5151 for (end
= dbp
; *end
!= '\0' && intoken (*end
); end
++)
5153 linebuffer_setlen (&token_name
, end
- dbp
);
5154 memcpy (token_name
.buffer
, dbp
, end
- dbp
);
5155 token_name
.buffer
[end
- dbp
] = '\0';
5158 intag
= FALSE
; /* we found what we looked for */
5159 skiptag
= TRUE
; /* skip to the end of the tag */
5160 getnext
= TRUE
; /* then grab the text */
5161 continue; /* look on the same line */
5166 else if (getnext
) /* grab next tokens and tag them */
5168 dbp
= skip_spaces (dbp
);
5170 break; /* go to next line */
5174 inanchor
= (lowcase (dbp
[1]) == 'a' && !intoken (dbp
[2]));
5175 continue; /* look on the same line */
5178 for (end
= dbp
+ 1; *end
!= '\0' && *end
!= '<'; end
++)
5180 make_tag (token_name
.buffer
, token_name
.len
, TRUE
,
5181 dbp
, end
- dbp
, lineno
, linecharno
);
5182 linebuffer_setlen (&token_name
, 0); /* no name in buffer */
5184 break; /* go to next line */
5187 else /* look for an interesting HTML tag */
5189 while (*dbp
!= '\0' && *dbp
!= '<')
5192 break; /* go to next line */
5194 if (lowcase (dbp
[1]) == 'a' && !intoken (dbp
[2]))
5197 continue; /* look on the same line */
5199 else if (LOOKING_AT_NOCASE (dbp
, "<title>")
5200 || LOOKING_AT_NOCASE (dbp
, "<h1>")
5201 || LOOKING_AT_NOCASE (dbp
, "<h2>")
5202 || LOOKING_AT_NOCASE (dbp
, "<h3>"))
5206 continue; /* look on the same line */
5217 * Assumes that the predicate or rule starts at column 0.
5218 * Only the first clause of a predicate or rule is added.
5219 * Original code by Sunichirou Sugou (1989)
5220 * Rewritten by Anders Lindgren (1996)
5222 static size_t prolog_pr (char *, char *);
5223 static void prolog_skip_comment (linebuffer
*, FILE *);
5224 static size_t prolog_atom (char *, size_t);
5227 Prolog_functions (FILE *inf
)
5237 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5239 if (cp
[0] == '\0') /* Empty line */
5241 else if (iswhite (cp
[0])) /* Not a predicate */
5243 else if (cp
[0] == '/' && cp
[1] == '*') /* comment. */
5244 prolog_skip_comment (&lb
, inf
);
5245 else if ((len
= prolog_pr (cp
, last
)) > 0)
5247 /* Predicate or rule. Store the function name so that we
5248 only generate a tag for the first clause. */
5250 last
= xnew (len
+ 1, char);
5251 else if (len
+ 1 > allocated
)
5252 xrnew (last
, len
+ 1, char);
5253 allocated
= len
+ 1;
5254 memcpy (last
, cp
, len
);
5263 prolog_skip_comment (linebuffer
*plb
, FILE *inf
)
5269 for (cp
= plb
->buffer
; *cp
!= '\0'; cp
++)
5270 if (cp
[0] == '*' && cp
[1] == '/')
5272 readline (plb
, inf
);
5274 while (!feof (inf
));
5278 * A predicate or rule definition is added if it matches:
5279 * <beginning of line><Prolog Atom><whitespace>(
5280 * or <beginning of line><Prolog Atom><whitespace>:-
5282 * It is added to the tags database if it doesn't match the
5283 * name of the previous clause header.
5285 * Return the size of the name of the predicate or rule, or 0 if no
5289 prolog_pr (char *s
, char *last
)
5291 /* Name of last clause. */
5296 pos
= prolog_atom (s
, 0);
5301 pos
= skip_spaces (s
+ pos
) - s
;
5304 || (s
[pos
] == '(' && (pos
+= 1))
5305 || (s
[pos
] == ':' && s
[pos
+ 1] == '-' && (pos
+= 2)))
5306 && (last
== NULL
/* save only the first clause */
5307 || len
!= strlen (last
)
5308 || !strneq (s
, last
, len
)))
5310 make_tag (s
, len
, TRUE
, s
, pos
, lineno
, linecharno
);
5318 * Consume a Prolog atom.
5319 * Return the number of bytes consumed, or 0 if there was an error.
5321 * A prolog atom, in this context, could be one of:
5322 * - An alphanumeric sequence, starting with a lower case letter.
5323 * - A quoted arbitrary string. Single quotes can escape themselves.
5324 * Backslash quotes everything.
5327 prolog_atom (char *s
, size_t pos
)
5333 if (ISLOWER (s
[pos
]) || (s
[pos
] == '_'))
5335 /* The atom is unquoted. */
5337 while (ISALNUM (s
[pos
]) || (s
[pos
] == '_'))
5341 return pos
- origpos
;
5343 else if (s
[pos
] == '\'')
5354 pos
++; /* A double quote */
5356 else if (s
[pos
] == '\0')
5357 /* Multiline quoted atoms are ignored. */
5359 else if (s
[pos
] == '\\')
5361 if (s
[pos
+1] == '\0')
5368 return pos
- origpos
;
5376 * Support for Erlang
5378 * Generates tags for functions, defines, and records.
5379 * Assumes that Erlang functions start at column 0.
5380 * Original code by Anders Lindgren (1996)
5382 static int erlang_func (char *, char *);
5383 static void erlang_attribute (char *);
5384 static int erlang_atom (char *);
5387 Erlang_functions (FILE *inf
)
5397 LOOP_ON_INPUT_LINES (inf
, lb
, cp
)
5399 if (cp
[0] == '\0') /* Empty line */
5401 else if (iswhite (cp
[0])) /* Not function nor attribute */
5403 else if (cp
[0] == '%') /* comment */
5405 else if (cp
[0] == '"') /* Sometimes, strings start in column one */
5407 else if (cp
[0] == '-') /* attribute, e.g. "-define" */
5409 erlang_attribute (cp
);
5416 else if ((len
= erlang_func (cp
, last
)) > 0)
5419 * Function. Store the function name so that we only
5420 * generates a tag for the first clause.
5423 last
= xnew (len
+ 1, char);
5424 else if (len
+ 1 > allocated
)
5425 xrnew (last
, len
+ 1, char);
5426 allocated
= len
+ 1;
5427 memcpy (last
, cp
, len
);
5436 * A function definition is added if it matches:
5437 * <beginning of line><Erlang Atom><whitespace>(
5439 * It is added to the tags database if it doesn't match the
5440 * name of the previous clause header.
5442 * Return the size of the name of the function, or 0 if no function
5446 erlang_func (char *s
, char *last
)
5448 /* Name of last clause. */
5453 pos
= erlang_atom (s
);
5458 pos
= skip_spaces (s
+ pos
) - s
;
5460 /* Save only the first clause. */
5463 || len
!= (int)strlen (last
)
5464 || !strneq (s
, last
, len
)))
5466 make_tag (s
, len
, TRUE
, s
, pos
, lineno
, linecharno
);
5475 * Handle attributes. Currently, tags are generated for defines
5478 * They are on the form:
5479 * -define(foo, bar).
5480 * -define(Foo(M, N), M+N).
5481 * -record(graph, {vtab = notable, cyclic = true}).
5484 erlang_attribute (char *s
)
5488 if ((LOOKING_AT (cp
, "-define") || LOOKING_AT (cp
, "-record"))
5491 int len
= erlang_atom (skip_spaces (cp
));
5493 make_tag (cp
, len
, TRUE
, s
, cp
+ len
- s
, lineno
, linecharno
);
5500 * Consume an Erlang atom (or variable).
5501 * Return the number of bytes consumed, or -1 if there was an error.
5504 erlang_atom (char *s
)
5508 if (ISALPHA (s
[pos
]) || s
[pos
] == '_')
5510 /* The atom is unquoted. */
5513 while (ISALNUM (s
[pos
]) || s
[pos
] == '_');
5515 else if (s
[pos
] == '\'')
5517 for (pos
++; s
[pos
] != '\''; pos
++)
5518 if (s
[pos
] == '\0' /* multiline quoted atoms are ignored */
5519 || (s
[pos
] == '\\' && s
[++pos
] == '\0'))
5528 static char *scan_separators (char *);
5529 static void add_regex (char *, language
*);
5530 static char *substitute (char *, char *, struct re_registers
*);
5533 * Take a string like "/blah/" and turn it into "blah", verifying
5534 * that the first and last characters are the same, and handling
5535 * quoted separator characters. Actually, stops on the occurrence of
5536 * an unquoted separator. Also process \t, \n, etc. and turn into
5537 * appropriate characters. Works in place. Null terminates name string.
5538 * Returns pointer to terminating separator, or NULL for
5539 * unterminated regexps.
5542 scan_separators (char *name
)
5545 char *copyto
= name
;
5546 bool quoted
= FALSE
;
5548 for (++name
; *name
!= '\0'; ++name
)
5554 case 'a': *copyto
++ = '\007'; break; /* BEL (bell) */
5555 case 'b': *copyto
++ = '\b'; break; /* BS (back space) */
5556 case 'd': *copyto
++ = 0177; break; /* DEL (delete) */
5557 case 'e': *copyto
++ = 033; break; /* ESC (delete) */
5558 case 'f': *copyto
++ = '\f'; break; /* FF (form feed) */
5559 case 'n': *copyto
++ = '\n'; break; /* NL (new line) */
5560 case 'r': *copyto
++ = '\r'; break; /* CR (carriage return) */
5561 case 't': *copyto
++ = '\t'; break; /* TAB (horizontal tab) */
5562 case 'v': *copyto
++ = '\v'; break; /* VT (vertical tab) */
5568 /* Something else is quoted, so preserve the quote. */
5576 else if (*name
== '\\')
5578 else if (*name
== sep
)
5584 name
= NULL
; /* signal unterminated regexp */
5586 /* Terminate copied string. */
5591 /* Look at the argument of --regex or --no-regex and do the right
5592 thing. Same for each line of a regexp file. */
5594 analyse_regex (char *regex_arg
)
5596 if (regex_arg
== NULL
)
5598 free_regexps (); /* --no-regex: remove existing regexps */
5602 /* A real --regexp option or a line in a regexp file. */
5603 switch (regex_arg
[0])
5605 /* Comments in regexp file or null arg to --regex. */
5611 /* Read a regex file. This is recursive and may result in a
5612 loop, which will stop when the file descriptors are exhausted. */
5616 linebuffer regexbuf
;
5617 char *regexfile
= regex_arg
+ 1;
5619 /* regexfile is a file containing regexps, one per line. */
5620 regexfp
= fopen (regexfile
, "r");
5621 if (regexfp
== NULL
)
5623 linebuffer_init (®exbuf
);
5624 while (readline_internal (®exbuf
, regexfp
) > 0)
5625 analyse_regex (regexbuf
.buffer
);
5626 free (regexbuf
.buffer
);
5631 /* Regexp to be used for a specific language only. */
5635 char *lang_name
= regex_arg
+ 1;
5638 for (cp
= lang_name
; *cp
!= '}'; cp
++)
5641 error ("unterminated language name in regex: %s", regex_arg
);
5645 lang
= get_language_from_langname (lang_name
);
5648 add_regex (cp
, lang
);
5652 /* Regexp to be used for any language. */
5654 add_regex (regex_arg
, NULL
);
5659 /* Separate the regexp pattern, compile it,
5660 and care for optional name and modifiers. */
5662 add_regex (char *regexp_pattern
, language
*lang
)
5664 static struct re_pattern_buffer zeropattern
;
5665 char sep
, *pat
, *name
, *modifiers
;
5668 struct re_pattern_buffer
*patbuf
;
5671 force_explicit_name
= TRUE
, /* do not use implicit tag names */
5672 ignore_case
= FALSE
, /* case is significant */
5673 multi_line
= FALSE
, /* matches are done one line at a time */
5674 single_line
= FALSE
; /* dot does not match newline */
5677 if (strlen (regexp_pattern
) < 3)
5679 error ("null regexp");
5682 sep
= regexp_pattern
[0];
5683 name
= scan_separators (regexp_pattern
);
5686 error ("%s: unterminated regexp", regexp_pattern
);
5691 error ("null name for regexp \"%s\"", regexp_pattern
);
5694 modifiers
= scan_separators (name
);
5695 if (modifiers
== NULL
) /* no terminating separator --> no name */
5701 modifiers
+= 1; /* skip separator */
5703 /* Parse regex modifiers. */
5704 for (; modifiers
[0] != '\0'; modifiers
++)
5705 switch (modifiers
[0])
5708 if (modifiers
== name
)
5709 error ("forcing explicit tag name but no name, ignoring");
5710 force_explicit_name
= TRUE
;
5720 need_filebuf
= TRUE
;
5723 error ("invalid regexp modifier `%c', ignoring", modifiers
[0]);
5727 patbuf
= xnew (1, struct re_pattern_buffer
);
5728 *patbuf
= zeropattern
;
5731 static char lc_trans
[CHARS
];
5733 for (i
= 0; i
< CHARS
; i
++)
5734 lc_trans
[i
] = lowcase (i
);
5735 patbuf
->translate
= lc_trans
; /* translation table to fold case */
5739 pat
= concat ("^", regexp_pattern
, ""); /* anchor to beginning of line */
5741 pat
= regexp_pattern
;
5744 re_set_syntax (RE_SYNTAX_EMACS
| RE_DOT_NEWLINE
);
5746 re_set_syntax (RE_SYNTAX_EMACS
);
5748 err
= re_compile_pattern (pat
, strlen (pat
), patbuf
);
5753 error ("%s while compiling pattern", err
);
5758 p_head
= xnew (1, regexp
);
5759 p_head
->pattern
= savestr (regexp_pattern
);
5760 p_head
->p_next
= rp
;
5761 p_head
->lang
= lang
;
5762 p_head
->pat
= patbuf
;
5763 p_head
->name
= savestr (name
);
5764 p_head
->error_signaled
= FALSE
;
5765 p_head
->force_explicit_name
= force_explicit_name
;
5766 p_head
->ignore_case
= ignore_case
;
5767 p_head
->multi_line
= multi_line
;
5771 * Do the substitutions indicated by the regular expression and
5775 substitute (char *in
, char *out
, struct re_registers
*regs
)
5778 int size
, dig
, diglen
;
5781 size
= strlen (out
);
5783 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5784 if (out
[size
- 1] == '\\')
5785 fatal ("pattern error in \"%s\"", out
);
5786 for (t
= etags_strchr (out
, '\\');
5788 t
= etags_strchr (t
+ 2, '\\'))
5792 diglen
= regs
->end
[dig
] - regs
->start
[dig
];
5798 /* Allocate space and do the substitutions. */
5800 result
= xnew (size
+ 1, char);
5802 for (t
= result
; *out
!= '\0'; out
++)
5803 if (*out
== '\\' && ISDIGIT (*++out
))
5806 diglen
= regs
->end
[dig
] - regs
->start
[dig
];
5807 memcpy (t
, in
+ regs
->start
[dig
], diglen
);
5814 assert (t
<= result
+ size
);
5815 assert (t
- result
== (int)strlen (result
));
5820 /* Deallocate all regexps. */
5825 while (p_head
!= NULL
)
5827 rp
= p_head
->p_next
;
5828 free (p_head
->pattern
);
5829 free (p_head
->name
);
5837 * Reads the whole file as a single string from `filebuf' and looks for
5838 * multi-line regular expressions, creating tags on matches.
5839 * readline already dealt with normal regexps.
5841 * Idea by Ben Wing <ben@666.com> (2002).
5844 regex_tag_multiline (void)
5846 char *buffer
= filebuf
.buffer
;
5850 for (rp
= p_head
; rp
!= NULL
; rp
= rp
->p_next
)
5854 if (!rp
->multi_line
)
5855 continue; /* skip normal regexps */
5857 /* Generic initializations before parsing file from memory. */
5858 lineno
= 1; /* reset global line number */
5859 charno
= 0; /* reset global char number */
5860 linecharno
= 0; /* reset global char number of line start */
5862 /* Only use generic regexps or those for the current language. */
5863 if (rp
->lang
!= NULL
&& rp
->lang
!= curfdp
->lang
)
5866 while (match
>= 0 && match
< filebuf
.len
)
5868 match
= re_search (rp
->pat
, buffer
, filebuf
.len
, charno
,
5869 filebuf
.len
- match
, &rp
->regs
);
5874 if (!rp
->error_signaled
)
5876 error ("regexp stack overflow while matching \"%s\"",
5878 rp
->error_signaled
= TRUE
;
5885 if (match
== rp
->regs
.end
[0])
5887 if (!rp
->error_signaled
)
5889 error ("regexp matches the empty string: \"%s\"",
5891 rp
->error_signaled
= TRUE
;
5893 match
= -3; /* exit from while loop */
5897 /* Match occurred. Construct a tag. */
5898 while (charno
< rp
->regs
.end
[0])
5899 if (buffer
[charno
++] == '\n')
5900 lineno
++, linecharno
= charno
;
5902 if (name
[0] == '\0')
5904 else /* make a named tag */
5905 name
= substitute (buffer
, rp
->name
, &rp
->regs
);
5906 if (rp
->force_explicit_name
)
5907 /* Force explicit tag name, if a name is there. */
5908 pfnote (name
, TRUE
, buffer
+ linecharno
,
5909 charno
- linecharno
+ 1, lineno
, linecharno
);
5911 make_tag (name
, strlen (name
), TRUE
, buffer
+ linecharno
,
5912 charno
- linecharno
+ 1, lineno
, linecharno
);
5921 nocase_tail (const char *cp
)
5923 register int len
= 0;
5925 while (*cp
!= '\0' && lowcase (*cp
) == lowcase (dbp
[len
]))
5927 if (*cp
== '\0' && !intoken (dbp
[len
]))
5936 get_tag (register char *bp
, char **namepp
)
5938 register char *cp
= bp
;
5942 /* Go till you get to white space or a syntactic break */
5943 for (cp
= bp
+ 1; !notinname (*cp
); cp
++)
5945 make_tag (bp
, cp
- bp
, TRUE
,
5946 lb
.buffer
, cp
- lb
.buffer
+ 1, lineno
, linecharno
);
5950 *namepp
= savenstr (bp
, cp
- bp
);
5954 * Read a line of text from `stream' into `lbp', excluding the
5955 * newline or CR-NL, if any. Return the number of characters read from
5956 * `stream', which is the length of the line including the newline.
5958 * On DOS or Windows we do not count the CR character, if any before the
5959 * NL, in the returned length; this mirrors the behavior of Emacs on those
5960 * platforms (for text files, it translates CR-NL to NL as it reads in the
5963 * If multi-line regular expressions are requested, each line read is
5964 * appended to `filebuf'.
5967 readline_internal (linebuffer
*lbp
, register FILE *stream
)
5969 char *buffer
= lbp
->buffer
;
5970 register char *p
= lbp
->buffer
;
5971 register char *pend
;
5974 pend
= p
+ lbp
->size
; /* Separate to avoid 386/IX compiler bug. */
5978 register int c
= getc (stream
);
5981 /* We're at the end of linebuffer: expand it. */
5983 xrnew (buffer
, lbp
->size
, char);
5984 p
+= buffer
- lbp
->buffer
;
5985 pend
= buffer
+ lbp
->size
;
5986 lbp
->buffer
= buffer
;
5996 if (p
> buffer
&& p
[-1] == '\r')
6000 /* Assume CRLF->LF translation will be performed by Emacs
6001 when loading this file, so CRs won't appear in the buffer.
6002 It would be cleaner to compensate within Emacs;
6003 however, Emacs does not know how many CRs were deleted
6004 before any given point in the file. */
6019 lbp
->len
= p
- buffer
;
6021 if (need_filebuf
/* we need filebuf for multi-line regexps */
6022 && chars_deleted
> 0) /* not at EOF */
6024 while (filebuf
.size
<= filebuf
.len
+ lbp
->len
+ 1) /* +1 for \n */
6026 /* Expand filebuf. */
6028 xrnew (filebuf
.buffer
, filebuf
.size
, char);
6030 memcpy (filebuf
.buffer
+ filebuf
.len
, lbp
->buffer
, lbp
->len
);
6031 filebuf
.len
+= lbp
->len
;
6032 filebuf
.buffer
[filebuf
.len
++] = '\n';
6033 filebuf
.buffer
[filebuf
.len
] = '\0';
6036 return lbp
->len
+ chars_deleted
;
6040 * Like readline_internal, above, but in addition try to match the
6041 * input line against relevant regular expressions and manage #line
6045 readline (linebuffer
*lbp
, FILE *stream
)
6049 linecharno
= charno
; /* update global char number of line start */
6050 result
= readline_internal (lbp
, stream
); /* read line */
6051 lineno
+= 1; /* increment global line number */
6052 charno
+= result
; /* increment global char number */
6054 /* Honor #line directives. */
6055 if (!no_line_directive
)
6057 static bool discard_until_line_directive
;
6059 /* Check whether this is a #line directive. */
6060 if (result
> 12 && strneq (lbp
->buffer
, "#line ", 6))
6065 if (sscanf (lbp
->buffer
, "#line %u \"%n", &lno
, &start
) >= 1
6066 && start
> 0) /* double quote character found */
6068 char *endp
= lbp
->buffer
+ start
;
6070 while ((endp
= etags_strchr (endp
, '"')) != NULL
6071 && endp
[-1] == '\\')
6074 /* Ok, this is a real #line directive. Let's deal with it. */
6076 char *taggedabsname
; /* absolute name of original file */
6077 char *taggedfname
; /* name of original file as given */
6078 char *name
; /* temp var */
6080 discard_until_line_directive
= FALSE
; /* found it */
6081 name
= lbp
->buffer
+ start
;
6083 canonicalize_filename (name
);
6084 taggedabsname
= absolute_filename (name
, tagfiledir
);
6085 if (filename_is_absolute (name
)
6086 || filename_is_absolute (curfdp
->infname
))
6087 taggedfname
= savestr (taggedabsname
);
6089 taggedfname
= relative_filename (taggedabsname
,tagfiledir
);
6091 if (streq (curfdp
->taggedfname
, taggedfname
))
6092 /* The #line directive is only a line number change. We
6093 deal with this afterwards. */
6096 /* The tags following this #line directive should be
6097 attributed to taggedfname. In order to do this, set
6098 curfdp accordingly. */
6100 fdesc
*fdp
; /* file description pointer */
6102 /* Go look for a file description already set up for the
6103 file indicated in the #line directive. If there is
6104 one, use it from now until the next #line
6106 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
6107 if (streq (fdp
->infname
, curfdp
->infname
)
6108 && streq (fdp
->taggedfname
, taggedfname
))
6109 /* If we remove the second test above (after the &&)
6110 then all entries pertaining to the same file are
6111 coalesced in the tags file. If we use it, then
6112 entries pertaining to the same file but generated
6113 from different files (via #line directives) will
6114 go into separate sections in the tags file. These
6115 alternatives look equivalent. The first one
6116 destroys some apparently useless information. */
6122 /* Else, if we already tagged the real file, skip all
6123 input lines until the next #line directive. */
6124 if (fdp
== NULL
) /* not found */
6125 for (fdp
= fdhead
; fdp
!= NULL
; fdp
= fdp
->next
)
6126 if (streq (fdp
->infabsname
, taggedabsname
))
6128 discard_until_line_directive
= TRUE
;
6132 /* Else create a new file description and use that from
6133 now on, until the next #line directive. */
6134 if (fdp
== NULL
) /* not found */
6137 fdhead
= xnew (1, fdesc
);
6138 *fdhead
= *curfdp
; /* copy curr. file description */
6140 fdhead
->infname
= savestr (curfdp
->infname
);
6141 fdhead
->infabsname
= savestr (curfdp
->infabsname
);
6142 fdhead
->infabsdir
= savestr (curfdp
->infabsdir
);
6143 fdhead
->taggedfname
= taggedfname
;
6144 fdhead
->usecharno
= FALSE
;
6145 fdhead
->prop
= NULL
;
6146 fdhead
->written
= FALSE
;
6150 free (taggedabsname
);
6152 readline (lbp
, stream
);
6154 } /* if a real #line directive */
6155 } /* if #line is followed by a number */
6156 } /* if line begins with "#line " */
6158 /* If we are here, no #line directive was found. */
6159 if (discard_until_line_directive
)
6163 /* Do a tail recursion on ourselves, thus discarding the contents
6164 of the line buffer. */
6165 readline (lbp
, stream
);
6169 discard_until_line_directive
= FALSE
;
6172 } /* if #line directives should be considered */
6179 /* Match against relevant regexps. */
6181 for (rp
= p_head
; rp
!= NULL
; rp
= rp
->p_next
)
6183 /* Only use generic regexps or those for the current language.
6184 Also do not use multiline regexps, which is the job of
6185 regex_tag_multiline. */
6186 if ((rp
->lang
!= NULL
&& rp
->lang
!= fdhead
->lang
)
6190 match
= re_match (rp
->pat
, lbp
->buffer
, lbp
->len
, 0, &rp
->regs
);
6195 if (!rp
->error_signaled
)
6197 error ("regexp stack overflow while matching \"%s\"",
6199 rp
->error_signaled
= TRUE
;
6206 /* Empty string matched. */
6207 if (!rp
->error_signaled
)
6209 error ("regexp matches the empty string: \"%s\"", rp
->pattern
);
6210 rp
->error_signaled
= TRUE
;
6214 /* Match occurred. Construct a tag. */
6216 if (name
[0] == '\0')
6218 else /* make a named tag */
6219 name
= substitute (lbp
->buffer
, rp
->name
, &rp
->regs
);
6220 if (rp
->force_explicit_name
)
6221 /* Force explicit tag name, if a name is there. */
6222 pfnote (name
, TRUE
, lbp
->buffer
, match
, lineno
, linecharno
);
6224 make_tag (name
, strlen (name
), TRUE
,
6225 lbp
->buffer
, match
, lineno
, linecharno
);
6234 * Return a pointer to a space of size strlen(cp)+1 allocated
6235 * with xnew where the string CP has been copied.
6238 savestr (const char *cp
)
6240 return savenstr (cp
, strlen (cp
));
6244 * Return a pointer to a space of size LEN+1 allocated with xnew where
6245 * the string CP has been copied for at most the first LEN characters.
6248 savenstr (const char *cp
, int len
)
6252 dp
= xnew (len
+ 1, char);
6253 memcpy (dp
, cp
, len
);
6259 * Return the ptr in sp at which the character c last
6260 * appears; NULL if not found
6262 * Identical to POSIX strrchr, included for portability.
6265 etags_strrchr (register const char *sp
, register int c
)
6267 register const char *r
;
6279 * Return the ptr in sp at which the character c first
6280 * appears; NULL if not found
6282 * Identical to POSIX strchr, included for portability.
6285 etags_strchr (register const char *sp
, register int c
)
6295 /* Skip spaces (end of string is not space), return new pointer. */
6297 skip_spaces (char *cp
)
6299 while (iswhite (*cp
))
6304 /* Skip non spaces, except end of string, return new pointer. */
6306 skip_non_spaces (char *cp
)
6308 while (*cp
!= '\0' && !iswhite (*cp
))
6313 /* Skip any chars in the "name" class.*/
6315 skip_name (char *cp
)
6317 /* '\0' is a notinname() so loop stops there too */
6318 while (! notinname (*cp
))
6323 /* Print error message and exit. */
6325 fatal (const char *s1
, const char *s2
)
6328 exit (EXIT_FAILURE
);
6332 pfatal (const char *s1
)
6335 exit (EXIT_FAILURE
);
6339 suggest_asking_for_help (void)
6341 fprintf (stderr
, "\tTry `%s --help' for a complete list of options.\n",
6343 exit (EXIT_FAILURE
);
6346 /* Output a diagnostic with printf-style FORMAT and args. */
6348 error (const char *format
, ...)
6351 va_start (ap
, format
);
6352 fprintf (stderr
, "%s: ", progname
);
6353 vfprintf (stderr
, format
, ap
);
6354 fprintf (stderr
, "\n");
6358 /* Return a newly-allocated string whose contents
6359 concatenate those of s1, s2, s3. */
6361 concat (const char *s1
, const char *s2
, const char *s3
)
6363 int len1
= strlen (s1
), len2
= strlen (s2
), len3
= strlen (s3
);
6364 char *result
= xnew (len1
+ len2
+ len3
+ 1, char);
6366 strcpy (result
, s1
);
6367 strcpy (result
+ len1
, s2
);
6368 strcpy (result
+ len1
+ len2
, s3
);
6369 result
[len1
+ len2
+ len3
] = '\0';
6375 /* Does the same work as the system V getcwd, but does not need to
6376 guess the buffer size in advance. */
6381 char *path
= xnew (bufsize
, char);
6383 while (getcwd (path
, bufsize
) == NULL
)
6385 if (errno
!= ERANGE
)
6389 path
= xnew (bufsize
, char);
6392 canonicalize_filename (path
);
6396 /* Return a newly allocated string containing the file name of FILE
6397 relative to the absolute directory DIR (which should end with a slash). */
6399 relative_filename (char *file
, char *dir
)
6401 char *fp
, *dp
, *afn
, *res
;
6404 /* Find the common root of file and dir (with a trailing slash). */
6405 afn
= absolute_filename (file
, cwd
);
6408 while (*fp
++ == *dp
++)
6410 fp
--, dp
--; /* back to the first differing char */
6412 if (fp
== afn
&& afn
[0] != '/') /* cannot build a relative name */
6415 do /* look at the equal chars until '/' */
6419 /* Build a sequence of "../" strings for the resulting relative file name. */
6421 while ((dp
= etags_strchr (dp
+ 1, '/')) != NULL
)
6423 res
= xnew (3*i
+ strlen (fp
+ 1) + 1, char);
6426 strcat (res
, "../");
6428 /* Add the file name relative to the common root of file and dir. */
6429 strcat (res
, fp
+ 1);
6435 /* Return a newly allocated string containing the absolute file name
6436 of FILE given DIR (which should end with a slash). */
6438 absolute_filename (char *file
, char *dir
)
6440 char *slashp
, *cp
, *res
;
6442 if (filename_is_absolute (file
))
6443 res
= savestr (file
);
6445 /* We don't support non-absolute file names with a drive
6446 letter, like `d:NAME' (it's too much hassle). */
6447 else if (file
[1] == ':')
6448 fatal ("%s: relative file names with drive letters not supported", file
);
6451 res
= concat (dir
, file
, "");
6453 /* Delete the "/dirname/.." and "/." substrings. */
6454 slashp
= etags_strchr (res
, '/');
6455 while (slashp
!= NULL
&& slashp
[0] != '\0')
6457 if (slashp
[1] == '.')
6459 if (slashp
[2] == '.'
6460 && (slashp
[3] == '/' || slashp
[3] == '\0'))
6465 while (cp
>= res
&& !filename_is_absolute (cp
));
6467 cp
= slashp
; /* the absolute name begins with "/.." */
6469 /* Under MSDOS and NT we get `d:/NAME' as absolute
6470 file name, so the luser could say `d:/../NAME'.
6471 We silently treat this as `d:/NAME'. */
6472 else if (cp
[0] != '/')
6475 memmove (cp
, slashp
+ 3, strlen (slashp
+ 2));
6479 else if (slashp
[2] == '/' || slashp
[2] == '\0')
6481 memmove (slashp
, slashp
+ 2, strlen (slashp
+ 1));
6486 slashp
= etags_strchr (slashp
+ 1, '/');
6489 if (res
[0] == '\0') /* just a safety net: should never happen */
6492 return savestr ("/");
6498 /* Return a newly allocated string containing the absolute
6499 file name of dir where FILE resides given DIR (which should
6500 end with a slash). */
6502 absolute_dirname (char *file
, char *dir
)
6507 slashp
= etags_strrchr (file
, '/');
6509 return savestr (dir
);
6512 res
= absolute_filename (file
, dir
);
6518 /* Whether the argument string is an absolute file name. The argument
6519 string must have been canonicalized with canonicalize_filename. */
6521 filename_is_absolute (char *fn
)
6523 return (fn
[0] == '/'
6525 || (ISALPHA (fn
[0]) && fn
[1] == ':' && fn
[2] == '/')
6530 /* Downcase DOS drive letter and collapse separators into single slashes.
6533 canonicalize_filename (register char *fn
)
6539 /* Canonicalize drive letter case. */
6540 # define ISUPPER(c) isupper (CHAR (c))
6541 if (fn
[0] != '\0' && fn
[1] == ':' && ISUPPER (fn
[0]))
6542 fn
[0] = lowcase (fn
[0]);
6547 /* Collapse multiple separators into a single slash. */
6548 for (cp
= fn
; *cp
!= '\0'; cp
++, fn
++)
6552 while (cp
[1] == sep
)
6561 /* Initialize a linebuffer for use. */
6563 linebuffer_init (linebuffer
*lbp
)
6565 lbp
->size
= (DEBUG
) ? 3 : 200;
6566 lbp
->buffer
= xnew (lbp
->size
, char);
6567 lbp
->buffer
[0] = '\0';
6571 /* Set the minimum size of a string contained in a linebuffer. */
6573 linebuffer_setlen (linebuffer
*lbp
, int toksize
)
6575 while (lbp
->size
<= toksize
)
6578 xrnew (lbp
->buffer
, lbp
->size
, char);
6583 /* Like malloc but get fatal error if memory is exhausted. */
6585 xmalloc (size_t size
)
6587 void *result
= malloc (size
);
6589 fatal ("virtual memory exhausted", (char *)NULL
);
6594 xrealloc (char *ptr
, size_t size
)
6596 void *result
= realloc (ptr
, size
);
6598 fatal ("virtual memory exhausted", (char *)NULL
);
6604 * indent-tabs-mode: t
6607 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6608 * c-file-style: "gnu"
6612 /* etags.c ends here */