lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2024 Free Software
  32 Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or (at
  39 your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  * Francesco Potortì maintained and improved it for many years
  72    starting in 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 #ifdef DEBUG
  82 #  undef DEBUG
  83 #  define DEBUG true
  84 #else
  85 #  define DEBUG false
  86 #endif
  87
  88 #include <config.h>
  89
  90 #ifdef MSDOS
  91 # undef MSDOS
  92 # define MSDOS true
  93 # include <sys/param.h>
  94 #else
  95 # define MSDOS false
  96 #endif /* MSDOS */
  97
  98 #ifdef WINDOWSNT
  99 # include <direct.h>
 100 # undef HAVE_NTGUI
 101 # undef  DOS_NT
 102 # define DOS_NT
 103 /* The WINDOWSNT build doesn't use Gnulib's fcntl.h.  */
 104 # define O_CLOEXEC O_NOINHERIT
 105 #endif /* WINDOWSNT */
 106
 107 #include <attribute.h>
 108 #include <inttypes.h>
 109 #include <limits.h>
 110 #include <unistd.h>
 111 #include <stdarg.h>
 112 #include <stdckdint.h>
 113 #include <stdlib.h>
 114 #include <string.h>
 115 #include <sysstdio.h>
 116 #include <errno.h>
 117 #include <fcntl.h>
 118 #include <binary-io.h>
 119 #include <intprops.h>
 120 #include <unlocked-io.h>
 121 #include <verify.h>
 122 #include <c-ctype.h>
 123 #include <c-strcase.h>
 124
 125 #include <assert.h>
 126 #include <getopt.h>
 127 #include <regex.h>
 128
 129 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 130  Leave it undefined to make the program "etags", which makes emacs-style
 131  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 132 #ifdef CTAGS
 133 # undef  CTAGS
 134 # define CTAGS true
 135 #else
 136 # define CTAGS false
 137 #endif
 138
 139 /* Define MERCURY_HEURISTICS_RATIO as it was necessary to disambiguate
 140    Mercury from Objective C, which have same file extensions .m
 141    See comments before function test_objc_is_mercury for details.  */
 142 #ifndef  MERCURY_HEURISTICS_RATIO
 143 # define MERCURY_HEURISTICS_RATIO 0.5
 144 #endif
 145
 146 /* Work around GCC bug 114882
 147    <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114882>.  */
 148 #if GNUC_PREREQ (14, 0, 0)
 149 # pragma GCC diagnostic ignored "-Wanalyzer-use-of-uninitialized-value"
 150 #endif
 151
 152 /* COPY to DEST from SRC (containing LEN bytes), and append a NUL byte.  */
 153 static void
 154 memcpyz (void *dest, void const *src, ptrdiff_t len)
 155 {
 156   char *e = mempcpy (dest, src, len);
 157   *e = '\0';
 158 }
 159
 160 static bool
 161 streq (char const *s, char const *t)
 162 {
 163   return strcmp (s, t) == 0;
 164 }
 165
 166 static bool
 167 strcaseeq (char const *s, char const *t)
 168 {
 169   return c_strcasecmp (s, t) == 0;
 170 }
 171
 172 static bool
 173 strneq (char const *s, char const *t, size_t n)
 174 {
 175   return strncmp (s, t, n) == 0;
 176 }
 177
 178 static bool
 179 strncaseeq (char const *s, char const *t, size_t n)
 180 {
 181   return c_strncasecmp (s, t, n) == 0;
 182 }
 183
 184 /* C is not in a name.  */
 185 static bool
 186 notinname (unsigned char c)
 187 {
 188   /* Look at make_tag before modifying!  */
 189   static bool const table[UCHAR_MAX + 1] = {
 190     ['\0']=1, ['\t']=1, ['\n']=1, ['\f']=1, ['\r']=1, [' ']=1,
 191     ['(']=1, [')']=1, [',']=1, [';']=1, ['=']=1
 192   };
 193   return table[c];
 194 }
 195
 196 /* C can start a token.  */
 197 static bool
 198 begtoken (unsigned char c)
 199 {
 200   static bool const table[UCHAR_MAX + 1] = {
 201     ['$']=1, ['@']=1,
 202     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 203     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 204     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 205     ['Y']=1, ['Z']=1,
 206     ['_']=1,
 207     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 208     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 209     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 210     ['y']=1, ['z']=1,
 211     ['~']=1
 212   };
 213   return table[c];
 214 }
 215
 216 /* C can be in the middle of a token.  */
 217 static bool
 218 intoken (unsigned char c)
 219 {
 220   static bool const table[UCHAR_MAX + 1] = {
 221     ['$']=1,
 222     ['0']=1, ['1']=1, ['2']=1, ['3']=1, ['4']=1,
 223     ['5']=1, ['6']=1, ['7']=1, ['8']=1, ['9']=1,
 224     ['A']=1, ['B']=1, ['C']=1, ['D']=1, ['E']=1, ['F']=1, ['G']=1, ['H']=1,
 225     ['I']=1, ['J']=1, ['K']=1, ['L']=1, ['M']=1, ['N']=1, ['O']=1, ['P']=1,
 226     ['Q']=1, ['R']=1, ['S']=1, ['T']=1, ['U']=1, ['V']=1, ['W']=1, ['X']=1,
 227     ['Y']=1, ['Z']=1,
 228     ['_']=1,
 229     ['a']=1, ['b']=1, ['c']=1, ['d']=1, ['e']=1, ['f']=1, ['g']=1, ['h']=1,
 230     ['i']=1, ['j']=1, ['k']=1, ['l']=1, ['m']=1, ['n']=1, ['o']=1, ['p']=1,
 231     ['q']=1, ['r']=1, ['s']=1, ['t']=1, ['u']=1, ['v']=1, ['w']=1, ['x']=1,
 232     ['y']=1, ['z']=1
 233   };
 234   return table[c];
 235 }
 236
 237 /* C can end a token.  */
 238 static bool
 239 endtoken (unsigned char c)
 240 {
 241   static bool const table[UCHAR_MAX + 1] = {
 242     ['\0']=1, ['\t']=1, ['\n']=1, ['\r']=1, [' ']=1,
 243     ['!']=1, ['"']=1, ['#']=1, ['%']=1, ['&']=1, ['\'']=1, ['(']=1, [')']=1,
 244     ['*']=1, ['+']=1, [',']=1, ['-']=1, ['.']=1, ['/']=1, [':']=1, [';']=1,
 245     ['<']=1, ['=']=1, ['>']=1, ['?']=1, ['[']=1, [']']=1, ['^']=1,
 246     ['{']=1, ['|']=1, ['}']=1, ['~']=1
 247   };
 248   return table[c];
 249 }
 250
 251 /*
 252  *      xrnew -- reallocate storage
 253  *
 254  * SYNOPSIS:    void xrnew (OldPointer, ptrdiff_t n, int multiplier);
 255  */
 256 #define xrnew(op, n, m) ((op) = xnrealloc (op, n, (m) * sizeof *(op)))
 257
 258 typedef void Lang_function (FILE *);
 259
 260 typedef struct
 261 {
 262   const char *suffix;           /* file name suffix for this compressor */
 263   const char *command;          /* takes one arg and decompresses to stdout */
 264 } compressor;
 265
 266 typedef struct
 267 {
 268   const char *name;             /* language name */
 269   const char *help;             /* detailed help for the language */
 270   Lang_function *function;      /* parse function */
 271   const char **suffixes;        /* name suffixes of this language's files */
 272   const char **filenames;       /* names of this language's files */
 273   const char **interpreters;    /* interpreters for this language */
 274   bool metasource;              /* source used to generate other sources */
 275 } language;
 276
 277 typedef struct fdesc
 278 {
 279   struct fdesc *next;           /* for the linked list */
 280   char *infname;                /* uncompressed input file name */
 281   char *infabsname;             /* absolute uncompressed input file name */
 282   char *infabsdir;              /* absolute dir of input file */
 283   char *taggedfname;            /* file name to write in tagfile */
 284   language *lang;               /* language of file */
 285   char *prop;                   /* file properties to write in tagfile */
 286   bool usecharno;               /* etags tags shall contain char number */
 287   bool written;                 /* entry written in the tags file */
 288 } fdesc;
 289
 290 typedef struct node_st
 291 {                               /* sorting structure */
 292   struct node_st *left, *right; /* left and right sons */
 293   fdesc *fdp;                   /* description of file to whom tag belongs */
 294   char *name;                   /* tag name */
 295   char *regex;                  /* search regexp */
 296   bool valid;                   /* write this tag on the tag file */
 297   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 298   bool been_warned;             /* warning already given for duplicated tag */
 299   intmax_t lno;                 /* line number tag is on */
 300   intmax_t cno;                 /* character number line starts on */
 301 } node;
 302
 303 /*
 304  * A `linebuffer' is a structure which holds a line of text.
 305  * `readline_internal' reads a line from a stream into a linebuffer
 306  * and works regardless of the length of the line.
 307  * SIZE is the size of BUFFER, LEN is the length of the string in
 308  * BUFFER after readline reads it.
 309  */
 310 typedef struct
 311 {
 312   ptrdiff_t size;
 313   ptrdiff_t len;
 314   char *buffer;
 315 } linebuffer;
 316
 317 /* Used to support mixing of --lang and file names. */
 318 typedef struct
 319 {
 320   enum {
 321     at_language,                /* a language specification */
 322     at_regexp,                  /* a regular expression */
 323     at_filename,                /* a file name */
 324     at_stdin,                   /* read from stdin here */
 325     at_end                      /* stop parsing the list */
 326   } arg_type;                   /* argument type */
 327   language *lang;               /* language associated with the argument */
 328   char *what;                   /* the argument itself */
 329 } argument;
 330
 331 /* Structure defining a regular expression. */
 332 typedef struct regexp
 333 {
 334   struct regexp *p_next;        /* pointer to next in list */
 335   language *lang;               /* if set, use only for this language */
 336   char *pattern;                /* the regexp pattern */
 337   char *name;                   /* tag name */
 338   struct re_pattern_buffer *pat; /* the compiled pattern */
 339   struct re_registers regs;     /* re registers */
 340   bool error_signaled;          /* already signaled for this regexp */
 341   bool ignore_case;             /* ignore case when matching */
 342   bool multi_line;              /* do a multi-line match on the whole file */
 343 } regexp;
 344
 345
 346 /* Many compilers barf on this:
 347         Lang_function Ada_funcs;
 348    so let's write it this way */
 349 static void Ada_funcs (FILE *);
 350 static void Asm_labels (FILE *);
 351 static void C_entries (int c_ext, FILE *);
 352 static void default_C_entries (FILE *);
 353 static void plain_C_entries (FILE *);
 354 static void Cjava_entries (FILE *);
 355 static void Cobol_paragraphs (FILE *);
 356 static void Cplusplus_entries (FILE *);
 357 static void Cstar_entries (FILE *);
 358 static void Erlang_functions (FILE *);
 359 static void Forth_words (FILE *);
 360 static void Fortran_functions (FILE *);
 361 static void Go_functions (FILE *);
 362 static void HTML_labels (FILE *);
 363 static void Lisp_functions (FILE *);
 364 static void Lua_functions (FILE *);
 365 static void Makefile_targets (FILE *);
 366 static void Mercury_functions (FILE *);
 367 static void Pascal_functions (FILE *);
 368 static void Perl_functions (FILE *);
 369 static void PHP_functions (FILE *);
 370 static void PS_functions (FILE *);
 371 static void Prolog_functions (FILE *);
 372 static void Python_functions (FILE *);
 373 static void Ruby_functions (FILE *);
 374 static void Rust_entries (FILE *);
 375 static void Scheme_functions (FILE *);
 376 static void TeX_commands (FILE *);
 377 static void Texinfo_nodes (FILE *);
 378 static void Yacc_entries (FILE *);
 379 static void just_read_file (FILE *);
 380
 381 static language *get_language_from_langname (const char *);
 382 static void readline (linebuffer *, FILE *);
 383 static ptrdiff_t readline_internal (linebuffer *, FILE *, char const *, const bool);
 384 static bool nocase_tail (const char *);
 385 static void get_tag (char *, char **);
 386 static void get_lispy_tag (char *);
 387 static void test_objc_is_mercury (char *, language **);
 388
 389 static void analyze_regex (char *);
 390 static void free_regexps (void);
 391 static void regex_tag_multiline (void);
 392 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 393 static void verror (char const *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0);
 394 static _Noreturn void suggest_asking_for_help (void);
 395 static _Noreturn void fatal (char const *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 396 static _Noreturn void pfatal (const char *);
 397 static void add_node (node *, node **);
 398
 399 static void process_file_name (char *, language *);
 400 static void process_file (FILE *, char *, language *);
 401 static void find_entries (FILE *);
 402 static void free_tree (node *);
 403 static void free_fdesc (fdesc *);
 404 static void pfnote (char *, bool, char *, ptrdiff_t, intmax_t, intmax_t);
 405 static void invalidate_nodes (fdesc *, node **);
 406 static void put_entries (node *);
 407 static void cleanup_tags_file (char const * const, char const * const);
 408
 409 #if !MSDOS && !defined (DOS_NT)
 410 static char *escape_shell_arg_string (char *);
 411 #endif
 412 static void do_move_file (const char *, const char *);
 413 static char *concat (const char *, const char *, const char *);
 414 static char *skip_spaces (char *);
 415 static char *skip_non_spaces (char *);
 416 static char *skip_name (char *);
 417 static char *savenstr (const char *, ptrdiff_t);
 418 static char *savestr (const char *);
 419 static char *etags_getcwd (void);
 420 static char *relative_filename (char *, char *);
 421 static char *absolute_filename (char *, char *);
 422 static char *absolute_dirname (char *, char *);
 423 static bool filename_is_absolute (char *f);
 424 static void canonicalize_filename (char *);
 425 static char *etags_mktmp (void);
 426 static void linebuffer_init (linebuffer *);
 427 static void linebuffer_setlen (linebuffer *, ptrdiff_t);
 428 static void *xmalloc (ptrdiff_t) ATTRIBUTE_MALLOC_SIZE ((1));
 429 static void *xnmalloc (ptrdiff_t, ptrdiff_t) ATTRIBUTE_MALLOC_SIZE ((1,2));
 430 static void *xnrealloc (void *, ptrdiff_t, ptrdiff_t)
 431   ATTRIBUTE_ALLOC_SIZE ((2,3));
 432
 433 \f
 434 static char searchar = '/';     /* use /.../ searches */
 435
 436 static char *tagfile;           /* output file */
 437 static char *progname;          /* name this program was invoked with */
 438 static char *cwd;               /* current working directory */
 439 static char *tagfiledir;        /* directory of tagfile */
 440 static FILE *tagf;              /* ioptr for tags file */
 441 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 442
 443 static fdesc *fdhead;           /* head of file description list */
 444 static fdesc *curfdp;           /* current file description */
 445 static char *infilename;        /* current input file name */
 446 static intmax_t lineno;         /* line number of current line */
 447 static intmax_t charno;         /* current character number */
 448 static intmax_t linecharno;     /* charno of start of current line */
 449 static char *dbp;               /* pointer to start of current tag */
 450
 451 static intmax_t const invalidcharno = -1;
 452
 453 static node *nodehead;          /* the head of the binary tree of tags */
 454 static node *last_node;         /* the last node created */
 455
 456 static linebuffer lb;           /* the current line */
 457 static linebuffer filebuf;      /* a buffer containing the whole file */
 458 static linebuffer token_name;   /* a buffer containing a tag name */
 459
 460 static bool append_to_tagfile;  /* -a: append to tags */
 461 /* The next five default to true in C and derived languages.  */
 462 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 463 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 464                                 /* 0 struct/enum/union decls, and C++ */
 465                                 /* member functions. */
 466 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 467                                 /* constants and variables. */
 468                                 /* -D: opposite of -d.  Default under ctags. */
 469 static int globals;             /* create tags for global variables */
 470 static int members;             /* create tags for C member variables */
 471 static int declarations;        /* --declarations: tag them and extern in C&Co*/
 472 static int no_line_directive;   /* ignore #line directives (undocumented) */
 473 static int no_duplicates;       /* no duplicate tags for ctags (undocumented) */
 474 static bool update;             /* -u: update tags */
 475 static bool vgrind_style;       /* -v: create vgrind style index output */
 476 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 477 static bool cxref_style;        /* -x: create cxref style output */
 478 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 479 static bool ignoreindent;       /* -I: ignore indentation in C */
 480 static int packages_only;       /* --packages-only: in Ada, only tag packages*/
 481 static int class_qualify;       /* -Q: produce class-qualified tags in C++/Java */
 482 static int debug;               /* --debug */
 483
 484 /* STDIN is defined in LynxOS system headers */
 485 #ifdef STDIN
 486 # undef STDIN
 487 #endif
 488
 489 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 490 static bool parsing_stdin;      /* --parse-stdin used */
 491
 492 static regexp *p_head;          /* list of all regexps */
 493 static bool need_filebuf;       /* some regexes are multi-line */
 494
 495 static struct option longopts[] =
 496 {
 497   { "append",             no_argument,       NULL,               'a'   },
 498   { "packages-only",      no_argument,       &packages_only,     1     },
 499   { "c++",                no_argument,       NULL,               'C'   },
 500   { "debug",              no_argument,       &debug,             1     },
 501   { "declarations",       no_argument,       &declarations,      1     },
 502   { "no-line-directive",  no_argument,       &no_line_directive, 1     },
 503   { "no-duplicates",      no_argument,       &no_duplicates,     1     },
 504   { "help",               no_argument,       NULL,               'h'   },
 505   { "help",               no_argument,       NULL,               'H'   },
 506   { "ignore-indentation", no_argument,       NULL,               'I'   },
 507   { "language",           required_argument, NULL,               'l'   },
 508   { "members",            no_argument,       &members,           1     },
 509   { "no-members",         no_argument,       &members,           0     },
 510   { "output",             required_argument, NULL,               'o'   },
 511   { "class-qualify",      no_argument,       &class_qualify,     'Q'   },
 512   { "regex",              required_argument, NULL,               'r'   },
 513   { "no-regex",           no_argument,       NULL,               'R'   },
 514   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 515   { "parse-stdin",        required_argument, NULL,               STDIN },
 516   { "version",            no_argument,       NULL,               'V'   },
 517
 518 #if CTAGS /* Ctags options */
 519   { "backward-search",    no_argument,       NULL,               'B'   },
 520   { "cxref",              no_argument,       NULL,               'x'   },
 521   { "defines",            no_argument,       NULL,               'd'   },
 522   { "globals",            no_argument,       &globals,           1     },
 523   { "typedefs",           no_argument,       NULL,               't'   },
 524   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 525   { "update",             no_argument,       NULL,               'u'   },
 526   { "vgrind",             no_argument,       NULL,               'v'   },
 527   { "no-warn",            no_argument,       NULL,               'w'   },
 528
 529 #else /* Etags options */
 530   { "no-defines",         no_argument,       NULL,               'D'   },
 531   { "no-globals",         no_argument,       &globals,           0     },
 532   { "include",            required_argument, NULL,               'i'   },
 533 #endif
 534   { NULL }
 535 };
 536
 537 static compressor compressors[] =
 538 {
 539   { "z", "gzip -d -c"},
 540   { "Z", "gzip -d -c"},
 541   { "gz", "gzip -d -c"},
 542   { "GZ", "gzip -d -c"},
 543   { "bz2", "bzip2 -d -c" },
 544   { "xz", "xz -d -c" },
 545   { "zst", "zstd -d -c" },
 546   { NULL }
 547 };
 548
 549 /*
 550  * Language stuff.
 551  */
 552
 553 /* Ada code */
 554 static const char *Ada_suffixes [] =
 555   { "ads", "adb", "ada", NULL };
 556 static const char Ada_help [] =
 557 "In Ada code, functions, procedures, packages, tasks and types are\n\
 558 tags.  Use the '--packages-only' option to create tags for\n\
 559 packages only.\n\
 560 Ada tag names have suffixes indicating the type of entity:\n\
 561         Entity type:    Qualifier:\n\
 562         ------------    ----------\n\
 563         function        /f\n\
 564         procedure       /p\n\
 565         package spec    /s\n\
 566         package body    /b\n\
 567         type            /t\n\
 568         task            /k\n\
 569 Thus, 'M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 570 body of the package 'bidule', while 'M-x find-tag <RET> bidule <RET>'\n\
 571 will just search for any tag 'bidule'.";
 572
 573 /* Assembly code */
 574 static const char *Asm_suffixes [] =
 575   { "a",        /* Unix assembler */
 576     "asm", /* Microcontroller assembly */
 577     "def", /* BSO/Tasking definition includes  */
 578     "inc", /* Microcontroller include files */
 579     "ins", /* Microcontroller include files */
 580     "s", "sa", /* Unix assembler */
 581     "S",   /* cpp-processed Unix assembler */
 582     "src", /* BSO/Tasking C compiler output */
 583     NULL
 584   };
 585 static const char Asm_help [] =
 586 "In assembler code, labels appearing at the beginning of a line,\n\
 587 followed by a colon, are tags.";
 588
 589
 590 /* Note that .c and .h can be considered C++, if the --c++ flag was
 591    given, or if the `class' or `template' keywords are met inside the file.
 592    That is why default_C_entries is called for these. */
 593 static const char *default_C_suffixes [] =
 594   { "c", "h", NULL };
 595 #if CTAGS                               /* C help for Ctags */
 596 static const char default_C_help [] =
 597 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 598 Use -T to tag definitions of 'struct', 'union' and 'enum'.\n\
 599 Use -d to tag '#define' macro definitions and 'enum' constants.\n\
 600 Use --globals to tag global variables.\n\
 601 You can tag function declarations and external variables by\n\
 602 using '--declarations', and struct members by using '--members'.";
 603 #else                                   /* C help for Etags */
 604 static const char default_C_help [] =
 605 "In C code, any C function or typedef is a tag, and so are\n\
 606 definitions of 'struct', 'union' and 'enum'.  '#define' macro\n\
 607 definitions and 'enum' constants are tags unless you specify\n\
 608 '--no-defines'.  Global variables are tags unless you specify\n\
 609 '--no-globals' and so are struct members unless you specify\n\
 610 '--no-members'.  Use of '--no-globals', '--no-defines' and\n\
 611 '--no-members' can make the tags table file much smaller.\n\
 612 You can tag function declarations and external variables by\n\
 613 using '--declarations'.";
 614 #endif  /* C help for Ctags and Etags */
 615
 616 static const char *Cplusplus_suffixes [] =
 617   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 618     "M",                        /* Objective C++ */
 619     "pdb",                      /* PostScript with C syntax */
 620     NULL };
 621 static const char Cplusplus_help [] =
 622 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 623 --help --lang=c --lang=c++ for full help.)\n\
 624 In addition to C tags, member functions are also recognized.  Member\n\
 625 variables are recognized unless you use the '--no-members' option.\n\
 626 Tags for variables and functions in classes are named 'CLASS::VARIABLE'\n\
 627 and 'CLASS::FUNCTION'.  'operator' definitions have tag names like\n\
 628 'operator+'.";
 629
 630 static const char *Cjava_suffixes [] =
 631   { "java", NULL };
 632 static char Cjava_help [] =
 633 "In Java code, all the tags constructs of C and C++ code are\n\
 634 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 635
 636
 637 static const char *Cobol_suffixes [] =
 638   { "COB", "cob", NULL };
 639 static char Cobol_help [] =
 640 "In Cobol code, tags are paragraph names; that is, any word\n\
 641 starting in column 8 and followed by a period.";
 642
 643 static const char *Cstar_suffixes [] =
 644   { "cs", "hs", NULL };
 645
 646 static const char *Erlang_suffixes [] =
 647   { "erl", "hrl", NULL };
 648 static const char Erlang_help [] =
 649 "In Erlang code, the tags are the functions, records and macros\n\
 650 defined in the file.";
 651 static const char *Erlang_interpreters [] =
 652   { "escript", NULL };
 653
 654 static const char *Forth_suffixes [] =
 655   { "fth", "tok", NULL };
 656 static const char Forth_help [] =
 657 "In Forth code, tags are words defined by ':',\n\
 658 constant, code, create, defer, value, variable, buffer:, field.";
 659
 660 static const char *Fortran_suffixes [] =
 661   { "F", "f", "f90", "for", NULL };
 662 static const char Fortran_help [] =
 663 "In Fortran code, functions, subroutines and block data are tags.";
 664
 665 static const char *Go_suffixes [] = {"go", NULL};
 666 static const char Go_help [] =
 667   "In Go code, functions, interfaces and packages are tags.";
 668
 669 static const char *HTML_suffixes [] =
 670   { "htm", "html", "shtml", NULL };
 671 static const char HTML_help [] =
 672 "In HTML input files, the tags are the 'title' and the 'h1', 'h2',\n\
 673 'h3' headers.  Also, tags are 'name=' in anchors and all\n\
 674 occurrences of 'id='.";
 675
 676 static const char *Lisp_suffixes [] =
 677   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 678 static const char Lisp_help [] =
 679 "In Lisp code, any function defined with 'defun', any variable\n\
 680 defined with 'defvar' or 'defconst', and in general the first\n\
 681 argument of any expression that starts with '(def' in column zero\n\
 682 is a tag.\n\
 683 The '--declarations' option tags \"(defvar foo)\" constructs too.";
 684
 685 static const char *Lua_suffixes [] =
 686   { "lua", "LUA", NULL };
 687 static const char Lua_help [] =
 688 "In Lua scripts, all functions are tags.";
 689 static const char *Lua_interpreters [] =
 690   { "lua", NULL };
 691
 692 static const char *Makefile_filenames [] =
 693   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 694 static const char Makefile_help [] =
 695 "In makefiles, targets are tags; additionally, variables are tags\n\
 696 unless you specify '--no-globals'.";
 697
 698 /* Mercury and Objective C share the same .m file extensions.  */
 699 static const char *Mercury_suffixes [] =
 700   {"m",
 701    NULL};
 702 static const char Mercury_help [] =
 703   "In Mercury code, tags are all declarations beginning a line with ':-'\n\
 704 and optionally Prolog-like definitions (first rule for a predicate or \
 705 function).\n\
 706 To enable this behavior, run etags using --declarations.";
 707 static bool with_mercury_definitions = false;
 708 float mercury_heuristics_ratio = MERCURY_HEURISTICS_RATIO;
 709
 710 static const char *Objc_suffixes [] =
 711   { "lm",                       /* Objective lex file  */
 712     "m",                        /* By default, Objective C file will be assumed.  */
 713      NULL};
 714 static const char Objc_help [] =
 715 "In Objective C code, tags include Objective C definitions for classes,\n\
 716 class categories, methods and protocols.  Tags for variables and\n\
 717 functions in classes are named 'CLASS::VARIABLE' and 'CLASS::FUNCTION'.\
 718 \n(Use --help --lang=c --lang=objc --lang=java for full help.)";
 719
 720 static const char *Pascal_suffixes [] =
 721   { "p", "pas", NULL };
 722 static const char Pascal_help [] =
 723 "In Pascal code, the tags are the functions and procedures defined\n\
 724 in the file.";
 725 /* " // this is for working around an Emacs highlighting bug... */
 726
 727 static const char *Perl_suffixes [] =
 728   { "pl", "pm", NULL };
 729 static const char *Perl_interpreters [] =
 730   { "perl", "@PERL@", NULL };
 731 static const char Perl_help [] =
 732 "In Perl code, the tags are the packages, subroutines and variables\n\
 733 defined by the 'package', 'sub', 'my' and 'local' keywords.  Use\n\
 734 '--globals' if you want to tag global variables.  Tags for\n\
 735 subroutines are named 'PACKAGE::SUB'.  The name for subroutines\n\
 736 defined in the default package is 'main::SUB'.";
 737
 738 static const char *PHP_suffixes [] =
 739   { "php", "php3", "php4", NULL };
 740 static const char PHP_help [] =
 741 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 742 the '--no-members' option, vars are tags too.";
 743
 744 static const char *plain_C_suffixes [] =
 745   { "pc",                       /* Pro*C file */
 746      NULL };
 747
 748 static const char *PS_suffixes [] =
 749   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 750 static const char PS_help [] =
 751 "In PostScript code, the tags are the functions.";
 752
 753 static const char *Prolog_suffixes [] =
 754   { "prolog", NULL };
 755 static const char Prolog_help [] =
 756 "In Prolog code, tags are predicates and rules at the beginning of\n\
 757 line.";
 758 static const char *Prolog_interpreters [] =
 759   { "gprolog", "pl", "yap", "swipl", "prolog", NULL };
 760
 761 static const char *Python_suffixes [] =
 762   { "py", NULL };
 763 static const char Python_help [] =
 764 "In Python code, 'def' or 'class' at the beginning of a line\n\
 765 generate a tag.";
 766 static const char *Python_interpreters [] =
 767   { "python", NULL };
 768
 769 static const char *Ruby_suffixes [] =
 770   { "rb", "ru", "rbw", NULL };
 771 static const char *Ruby_filenames [] =
 772   { "Rakefile", "Thorfile", NULL };
 773 static const char Ruby_help [] =
 774   "In Ruby code, 'def' or 'class' or 'module' at the beginning of\n\
 775 a line generate a tag.  Constants also generate a tag.";
 776 static const char *Ruby_interpreters [] =
 777   { "ruby", NULL };
 778
 779 static const char *Rust_suffixes [] =
 780   { "rs", NULL };
 781 static const char Rust_help [] =
 782   "In Rust code, tags anything defined with 'fn', 'enum', \n\
 783 'struct' or 'macro_rules!'.";
 784
 785 /* Can't do the `SCM' or `scm' prefix with a version number. */
 786 static const char *Scheme_suffixes [] =
 787   { "oak", "rkt", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 788 static const char Scheme_help [] =
 789 "In Scheme code, tags include anything defined with 'def' or with a\n\
 790 construct whose name starts with 'def'.  They also include\n\
 791 variables set with 'set!' at top level in the file.";
 792
 793 static const char *TeX_suffixes [] =
 794   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 795 static const char TeX_help [] =
 796 "In LaTeX text, the argument of any of the commands '\\chapter',\n\
 797 '\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\
 798 '\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\
 799 '\\index', '\\def', '\\newcommand', '\\renewcommand',\n\
 800 '\\newenvironment' or '\\renewenvironment' is a tag.\n\
 801 \n\
 802 Other commands can be specified by setting the environment variable\n\
 803 'TEXTAGS' to a colon-separated list like, for example,\n\
 804      TEXTAGS=\"mycommand:myothercommand\".";
 805
 806
 807 static const char *Texinfo_suffixes [] =
 808   { "texi", "texinfo", "txi", NULL };
 809 static const char Texinfo_help [] =
 810 "for texinfo files, lines starting with @node are tagged.";
 811
 812 static const char *Yacc_suffixes [] =
 813   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 814 static const char Yacc_help [] =
 815 "In Bison or Yacc input files, each rule defines as a tag the\n\
 816 nonterminal it constructs.  The portions of the file that contain\n\
 817 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 818 for full help).";
 819
 820 static const char auto_help [] =
 821 "'auto' is not a real language, it indicates to use\n\
 822 a default language for files base on file name suffix and file contents.";
 823
 824 static const char none_help [] =
 825 "'none' is not a real language, it indicates to only do\n\
 826 regexp processing on files.";
 827
 828 static const char no_lang_help [] =
 829 "No detailed help available for this language.";
 830
 831
 832 /*
 833  * Table of languages.
 834  *
 835  * It is ok for a given function to be listed under more than one
 836  * name.  I just didn't.
 837  */
 838
 839 static language lang_names [] =
 840 {
 841   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 842   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 843   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 844   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 845   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 846   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 847   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes,
 848                  NULL,           Erlang_interpreters },
 849   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 850   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 851   { "go",        Go_help,        Go_functions,      Go_suffixes        },
 852   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 853   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 854   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 855   { "lua",       Lua_help,Lua_functions,Lua_suffixes,NULL,Lua_interpreters},
 856   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 857   /* objc listed before mercury as it is a better default for .m extensions.  */
 858   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 859   { "mercury",   Mercury_help,   Mercury_functions, Mercury_suffixes   },
 860   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 861   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 862   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 863   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 864   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 865   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes,
 866                  NULL,           Prolog_interpreters },
 867   { "python",    Python_help,    Python_functions,  Python_suffixes,
 868                  NULL,           Python_interpreters },
 869   { "ruby",      Ruby_help,      Ruby_functions,    Ruby_suffixes,
 870                  Ruby_filenames, Ruby_interpreters },
 871   { "rust",      Rust_help,      Rust_entries,      Rust_suffixes      },
 872   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 873   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 874   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 875   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,true},
 876   { "auto",      auto_help },                      /* default guessing scheme */
 877   { "none",      none_help,      just_read_file }, /* regexp matching only */
 878   { NULL }                /* end of list */
 879 };
 880
 881 \f
 882 static void
 883 print_language_names (void)
 884 {
 885   language *lang;
 886   const char **name, **ext;
 887
 888   puts ("\nThese are the currently supported languages, along with the\n\
 889 default file names and dot suffixes:");
 890   for (lang = lang_names; lang->name != NULL; lang++)
 891     {
 892       printf ("  %-*s", 10, lang->name);
 893       if (lang->filenames != NULL)
 894         for (name = lang->filenames; *name != NULL; name++)
 895           printf (" %s", *name);
 896       if (lang->suffixes != NULL)
 897         for (ext = lang->suffixes; *ext != NULL; ext++)
 898           printf (" .%s", *ext);
 899       puts ("");
 900     }
 901   puts ("where 'auto' means use default language for files based on file\n\
 902 name suffix, and 'none' means only do regexp processing on files.\n\
 903 If no language is specified and no matching suffix is found,\n\
 904 the first line of the file is read for a sharp-bang (#!) sequence\n\
 905 followed by the name of an interpreter.  If no such sequence is found,\n\
 906 Fortran is tried first; if no tags are found, C is tried next.\n\
 907 When parsing any C file, a \"class\" or \"template\" keyword\n\
 908 switches to C++.");
 909   puts ("Compressed files are supported using gzip, bzip2, xz, and zstd.\n\
 910 \n\
 911 For detailed help on a given language use, for example,\n\
 912 etags --help --lang=ada.");
 913 }
 914
 915 #if CTAGS
 916 # define PROGRAM_NAME "ctags"
 917 #else
 918 # define PROGRAM_NAME "etags"
 919 #endif
 920 static _Noreturn void
 921 print_version (void)
 922 {
 923   fputs ((PROGRAM_NAME " (" PACKAGE_NAME " " PACKAGE_VERSION ")\n"
 924           COPYRIGHT "\n"
 925           "This program is distributed under the terms in ETAGS.README\n"),
 926          stdout);
 927   exit (EXIT_SUCCESS);
 928 }
 929
 930 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 931 # define PRINT_UNDOCUMENTED_OPTIONS_HELP false
 932 #endif
 933
 934 static _Noreturn void
 935 print_help (argument *argbuffer)
 936 {
 937   bool help_for_lang = false;
 938
 939   for (; argbuffer->arg_type != at_end; argbuffer++)
 940     if (argbuffer->arg_type == at_language)
 941       {
 942         if (help_for_lang)
 943           puts ("");
 944         puts (argbuffer->lang->help);
 945         help_for_lang = true;
 946       }
 947
 948   if (help_for_lang)
 949     exit (EXIT_SUCCESS);
 950
 951   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 952 \n\
 953 These are the options accepted by %s.\n", progname, progname);
 954   puts ("You may use unambiguous abbreviations for the long option names.");
 955   puts ("  A - as file name means read names from stdin (one per line).\n\
 956 Absolute names are stored in the output file as they are.\n\
 957 Relative ones are stored relative to the output file's directory.\n");
 958
 959   puts ("-a, --append\n\
 960         Append tag entries to existing tags file.");
 961
 962   puts ("--packages-only\n\
 963         For Ada files, only generate tags for packages.");
 964
 965   if (CTAGS)
 966     puts ("-B, --backward-search\n\
 967         Write the search commands for the tag entries using '?', the\n\
 968         backward-search command instead of '/', the forward-search command.");
 969
 970   /* This option is mostly obsolete, because etags can now automatically
 971      detect C++.  Retained for backward compatibility and for debugging and
 972      experimentation.  In principle, we could want to tag as C++ even
 973      before any "class" or "template" keyword.
 974   puts ("-C, --c++\n\
 975         Treat files whose name suffix defaults to C language as C++ files.");
 976   */
 977
 978   puts ("--declarations\n\
 979         In C and derived languages, create tags for function declarations,");
 980   if (CTAGS)
 981     puts ("\tand create tags for extern variables if --globals is used.");
 982   else
 983     puts
 984       ("\tand create tags for extern variables unless --no-globals is used.");
 985
 986   puts ("\tIn Mercury, tag both declarations starting a line with ':-' and\n\
 987         first predicates or functions in clauses.");
 988
 989   if (CTAGS)
 990     puts ("-d, --defines\n\
 991         Create tag entries for C #define constants and enum constants, too.");
 992   else
 993     puts ("-D, --no-defines\n\
 994         Don't create tag entries for C #define constants and enum constants.\n\
 995         This makes the tags file smaller.");
 996
 997   if (!CTAGS)
 998     puts ("-i FILE, --include=FILE\n\
 999         Include a note in tag file indicating that, when searching for\n\
1000         a tag, one should also consult the tags file FILE after\n\
1001         checking the current file.");
1002
1003   puts ("-l LANG, --language=LANG\n\
1004         Force the following files to be considered as written in the\n\
1005         named language up to the next --language=LANG option.");
1006
1007   if (CTAGS)
1008     puts ("--globals\n\
1009         Create tag entries for global variables in some languages.");
1010   else
1011     puts ("--no-globals\n\
1012         Do not create tag entries for global variables in some\n\
1013         languages.  This makes the tags file smaller.");
1014
1015   puts ("--no-line-directive\n\
1016         Ignore #line preprocessor directives in C and derived languages.");
1017
1018   if (CTAGS)
1019     puts ("--members\n\
1020         Create tag entries for members of structures in some languages.");
1021   else
1022     puts ("--no-members\n\
1023         Do not create tag entries for members of structures\n\
1024         in some languages.");
1025
1026   puts ("-Q, --class-qualify\n\
1027         Qualify tag names with their class name in C++, ObjC, Java, and Perl.\n\
1028         This produces tag names of the form \"class::member\" for C++,\n\
1029         \"class(category)\" for Objective C, and \"class.member\" for Java.\n\
1030         For Objective C, this also produces class methods qualified with\n\
1031         their arguments, as in \"foo:bar:baz:more\".\n\
1032         For Perl, this produces \"package::member\".");
1033   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1034         Make a tag for each line matching a regular expression pattern\n\
1035         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1036         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
1037         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1038         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
1039   puts ("       If TAGNAME/ is present, the tags created are named.\n\
1040         For example Tcl named tags can be created with:\n\
1041           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1042         MODS are optional one-letter modifiers: 'i' means to ignore case,\n\
1043         'm' means to allow multi-line matches, 's' implies 'm' and\n\
1044         causes dot to match any character, including newline.");
1045
1046   puts ("-R, --no-regex\n\
1047         Don't create tags from regexps for the following files.");
1048
1049   puts ("-I, --ignore-indentation\n\
1050         In C and C++ do not assume that a closing brace in the first\n\
1051         column is the final brace of a function or structure definition.");
1052
1053   puts ("-o FILE, --output=FILE\n\
1054         Write the tags to FILE.");
1055
1056   puts ("--parse-stdin=NAME\n\
1057         Read from standard input and record tags as belonging to file NAME.");
1058
1059   if (CTAGS)
1060     {
1061       puts ("-t, --typedefs\n\
1062         Generate tag entries for C and Ada typedefs.");
1063       puts ("-T, --typedefs-and-c++\n\
1064         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1065         and C++ member functions.");
1066     }
1067
1068   if (CTAGS)
1069     puts ("-u, --update\n\
1070         Update the tag entries for the given files, leaving tag\n\
1071         entries for other files in place.  Currently, this is\n\
1072         implemented by deleting the existing entries for the given\n\
1073         files and then rewriting the new entries at the end of the\n\
1074         tags file.  It is often faster to simply rebuild the entire\n\
1075         tag file than to use this.");
1076
1077   if (CTAGS)
1078     {
1079       puts ("-v, --vgrind\n\
1080         Print on the standard output an index of items intended for\n\
1081         human consumption, similar to the output of vgrind.  The index\n\
1082         is sorted, and gives the page number of each item.");
1083
1084       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1085         puts ("-w, --no-duplicates\n\
1086         Do not create duplicate tag entries, for compatibility with\n\
1087         traditional ctags.");
1088
1089       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1090         puts ("-w, --no-warn\n\
1091         Suppress warning messages about duplicate tag entries.");
1092
1093       puts ("-x, --cxref\n\
1094         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1095         The output uses line numbers instead of page numbers, but\n\
1096         beyond that the differences are cosmetic; try both to see\n\
1097         which you like.");
1098     }
1099
1100   puts ("-V, --version\n\
1101         Print the version of the program.\n\
1102 -h, --help\n\
1103         Print this help message.\n\
1104         Followed by one or more '--language' options prints detailed\n\
1105         help about tag generation for the specified languages.");
1106
1107   print_language_names ();
1108
1109   puts ("");
1110   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1111
1112   exit (EXIT_SUCCESS);
1113 }
1114
1115 \f
1116 int
1117 main (int argc, char **argv)
1118 {
1119   int i;
1120   int nincluded_files;
1121   char **included_files;
1122   argument *argbuffer;
1123   int current_arg, file_count;
1124   linebuffer filename_lb;
1125   bool help_asked = false;
1126   ptrdiff_t len;
1127   char *optstring;
1128   int opt;
1129
1130   progname = argv[0];
1131   nincluded_files = 0;
1132   included_files = xnmalloc (argc, sizeof *included_files);
1133   current_arg = 0;
1134   file_count = 0;
1135
1136   /* Allocate enough no matter what happens.  Overkill, but each one
1137      is small. */
1138   argbuffer = xnmalloc (argc, sizeof *argbuffer);
1139
1140   /*
1141    * Always find typedefs and structure tags.
1142    * Also default to find macro constants, enum constants, struct
1143    * members and global variables.  Do it for both etags and ctags.
1144    */
1145   typedefs = typedefs_or_cplusplus = constantypedefs = true;
1146   globals = members = true;
1147
1148   /* When the optstring begins with a '-' getopt_long does not rearrange the
1149      non-options arguments to be at the end, but leaves them alone. */
1150   optstring = concat ("-ac:Cf:Il:o:Qr:RSVhH",
1151                       (CTAGS) ? "BxdtTuvw" : "Di:",
1152                       "");
1153
1154   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1155     switch (opt)
1156       {
1157       case 0:
1158         /* If getopt returns 0, then it has already processed a
1159            long-named option.  We should do nothing.  */
1160         break;
1161
1162       case 1:
1163         /* This means that a file name has been seen.  Record it. */
1164         argbuffer[current_arg].arg_type = at_filename;
1165         argbuffer[current_arg].what     = optarg;
1166         len = strlen (optarg);
1167         if (whatlen_max < len)
1168           whatlen_max = len;
1169         ++current_arg;
1170         ++file_count;
1171         break;
1172
1173       case STDIN:
1174         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1175         argbuffer[current_arg].arg_type = at_stdin;
1176         argbuffer[current_arg].what     = optarg;
1177         len = strlen (optarg);
1178         if (whatlen_max < len)
1179           whatlen_max = len;
1180         ++current_arg;
1181         ++file_count;
1182         if (parsing_stdin)
1183           fatal ("cannot parse standard input more than once");
1184         parsing_stdin = true;
1185         break;
1186
1187         /* Common options. */
1188       case 'a': append_to_tagfile = true;       break;
1189       case 'C': cplusplus = true;               break;
1190       case 'f':         /* for compatibility with old makefiles */
1191       case 'o':
1192         if (tagfile)
1193           {
1194             error ("-o option may only be given once.");
1195             suggest_asking_for_help ();
1196           }
1197         tagfile = optarg;
1198         break;
1199       case 'I':
1200       case 'S':         /* for backward compatibility */
1201         ignoreindent = true;
1202         break;
1203       case 'l':
1204         {
1205           language *lang = get_language_from_langname (optarg);
1206           if (lang != NULL)
1207             {
1208               argbuffer[current_arg].lang = lang;
1209               argbuffer[current_arg].arg_type = at_language;
1210               ++current_arg;
1211             }
1212         }
1213         break;
1214       case 'c':
1215         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1216         optarg = concat (optarg, "i", ""); /* memory leak here */
1217         FALLTHROUGH;
1218       case 'r':
1219         argbuffer[current_arg].arg_type = at_regexp;
1220         argbuffer[current_arg].what = optarg;
1221         len = strlen (optarg);
1222         if (whatlen_max < len)
1223           whatlen_max = len;
1224         ++current_arg;
1225         break;
1226       case 'R':
1227         argbuffer[current_arg].arg_type = at_regexp;
1228         argbuffer[current_arg].what = NULL;
1229         ++current_arg;
1230         break;
1231       case 'V':
1232         print_version ();
1233         break;
1234       case 'h':
1235       case 'H':
1236         help_asked = true;
1237         break;
1238       case 'Q':
1239         class_qualify = 1;
1240         break;
1241
1242         /* Etags options */
1243       case 'D': constantypedefs = false;                        break;
1244       case 'i': included_files[nincluded_files++] = optarg;     break;
1245
1246         /* Ctags options. */
1247       case 'B': searchar = '?';                                 break;
1248       case 'd': constantypedefs = true;                         break;
1249       case 't': typedefs = true;                                break;
1250       case 'T': typedefs = typedefs_or_cplusplus = true;        break;
1251       case 'u': update = true;                                  break;
1252       case 'v': vgrind_style = true;                            FALLTHROUGH;
1253       case 'x': cxref_style = true;                             break;
1254       case 'w': no_warnings = true;                             break;
1255       default:
1256         suggest_asking_for_help ();
1257       }
1258
1259   /* No more options.  Store the rest of arguments. */
1260   for (; optind < argc; optind++)
1261     {
1262       argbuffer[current_arg].arg_type = at_filename;
1263       argbuffer[current_arg].what = argv[optind];
1264       len = strlen (argv[optind]);
1265       if (whatlen_max < len)
1266         whatlen_max = len;
1267       ++current_arg;
1268       ++file_count;
1269     }
1270
1271   argbuffer[current_arg].arg_type = at_end;
1272
1273   if (help_asked)
1274     print_help (argbuffer);
1275
1276   if (nincluded_files == 0 && file_count == 0)
1277     {
1278       error ("no input files specified.");
1279       suggest_asking_for_help ();
1280     }
1281
1282   if (tagfile == NULL)
1283     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1284   cwd = etags_getcwd ();        /* the current working directory */
1285   if (cwd[strlen (cwd) - 1] != '/')
1286     {
1287       char *oldcwd = cwd;
1288       cwd = concat (oldcwd, "/", "");
1289       free (oldcwd);
1290     }
1291
1292   /* Compute base directory for relative file names. */
1293   if (streq (tagfile, "-")
1294       || strneq (tagfile, "/dev/", 5))
1295     tagfiledir = cwd;            /* relative file names are relative to cwd */
1296   else
1297     {
1298       canonicalize_filename (tagfile);
1299       tagfiledir = absolute_dirname (tagfile, cwd);
1300     }
1301
1302   linebuffer_init (&lb);
1303   linebuffer_init (&filename_lb);
1304   linebuffer_init (&filebuf);
1305   linebuffer_init (&token_name);
1306
1307   if (!CTAGS)
1308     {
1309       if (streq (tagfile, "-"))
1310         {
1311           tagf = stdout;
1312           set_binary_mode (STDOUT_FILENO, O_BINARY);
1313         }
1314       else
1315         tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1316       if (tagf == NULL)
1317         pfatal (tagfile);
1318     }
1319
1320   /*
1321    * Loop through files finding functions.
1322    */
1323   for (i = 0; i < current_arg; i++)
1324     {
1325       static language *lang;    /* non-NULL if language is forced */
1326       char *this_file;
1327
1328       switch (argbuffer[i].arg_type)
1329         {
1330         case at_language:
1331           lang = argbuffer[i].lang;
1332           break;
1333         case at_regexp:
1334           analyze_regex (argbuffer[i].what);
1335           break;
1336         case at_filename:
1337               this_file = argbuffer[i].what;
1338               /* Input file named "-" means read file names from stdin
1339                  (one per line) and use them. */
1340               if (streq (this_file, "-"))
1341                 {
1342                   if (parsing_stdin)
1343                     fatal ("cannot parse standard input "
1344                            "AND read file names from it");
1345                   while (readline_internal (&filename_lb, stdin, "-", false) > 0)
1346                     process_file_name (filename_lb.buffer, lang);
1347                 }
1348               else
1349                 process_file_name (this_file, lang);
1350           break;
1351         case at_stdin:
1352           this_file = argbuffer[i].what;
1353           process_file (stdin, this_file, lang);
1354           break;
1355         default:
1356           error ("internal error: arg_type");
1357         }
1358     }
1359
1360   free_regexps ();
1361   free (lb.buffer);
1362   free (filebuf.buffer);
1363   free (token_name.buffer);
1364
1365   if (!CTAGS || cxref_style)
1366     {
1367       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1368       put_entries (nodehead);
1369       free_tree (nodehead);
1370       nodehead = NULL;
1371       if (!CTAGS)
1372         {
1373           fdesc *fdp;
1374
1375           /* Output file entries that have no tags. */
1376           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1377             if (!fdp->written)
1378               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1379
1380           while (nincluded_files-- > 0)
1381             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1382
1383           if (fclose (tagf) == EOF)
1384             pfatal (tagfile);
1385         }
1386
1387       return EXIT_SUCCESS;
1388     }
1389
1390   /* From here on, we are in (CTAGS && !cxref_style) */
1391   if (update)
1392     {
1393       for (i = 0; i < current_arg; ++i)
1394         {
1395           switch (argbuffer[i].arg_type)
1396             {
1397             case at_filename:
1398             case at_stdin:
1399               break;
1400             default:
1401               continue;         /* the for loop */
1402             }
1403           cleanup_tags_file (tagfile, argbuffer[i].what);
1404         }
1405       append_to_tagfile = true;
1406     }
1407
1408   tagf = fopen (tagfile, append_to_tagfile ? "ab" : "wb");
1409   if (tagf == NULL)
1410     pfatal (tagfile);
1411   put_entries (nodehead);       /* write all the tags (CTAGS) */
1412   free_tree (nodehead);
1413   nodehead = NULL;
1414   if (fclose (tagf) == EOF)
1415     pfatal (tagfile);
1416
1417   if (CTAGS)
1418     if (append_to_tagfile || update)
1419       {
1420         /* Maybe these should be used:
1421            setenv ("LC_COLLATE", "C", 1);
1422            setenv ("LC_ALL", "C", 1); */
1423         char *cmd = xmalloc (8 * strlen (tagfile) + sizeof "sort -u -o '' ''");
1424 #if defined WINDOWSNT || MSDOS
1425         /* Quote "like this".  No need to escape the quotes in the file name,
1426            since it is not allowed in file names on these systems.  */
1427         char *z = stpcpy (cmd, "sort -u -o \"");
1428         z = stpcpy (z, tagfile);
1429         z = stpcpy (z, "\" \"");
1430         z = stpcpy (z, tagfile);
1431         stpcpy (z, "\"");
1432 #else
1433         /* Quote 'like this', and escape the apostrophe in the file name.  */
1434         char *z = stpcpy (cmd, "sort -u -o '");
1435         char *escaped_tagfile = z;
1436         for (; *tagfile; *z++ = *tagfile++)
1437           if (*tagfile == '\'')
1438             z = stpcpy (z, "'\\'");
1439         ptrdiff_t escaped_tagfile_len = z - escaped_tagfile;
1440         z = stpcpy (z, "' '");
1441         z = mempcpy (z, escaped_tagfile, escaped_tagfile_len);
1442         strcpy (z, "'");
1443 #endif
1444         return system (cmd);
1445       }
1446   return EXIT_SUCCESS;
1447 }
1448
1449 /*
1450  * Equivalent to: mv tags OTAGS;grep -Fv ' filename ' OTAGS >tags;rm OTAGS
1451  */
1452 static void
1453 cleanup_tags_file (const char* tagfile, const char* match_file_name)
1454 {
1455   FILE *otags_f = fopen ("OTAGS", "wb");
1456   FILE *tag_f = fopen (tagfile, "rb");
1457
1458   if (otags_f == NULL)
1459     pfatal ("OTAGS");
1460
1461   if (tag_f == NULL)
1462     pfatal (tagfile);
1463
1464   int buf_len = strlen (match_file_name) + sizeof ("\t\t ") + 1;
1465   char *buf = xmalloc (buf_len);
1466   snprintf (buf, buf_len, "\t%s\t", match_file_name);
1467
1468   linebuffer line;
1469   linebuffer_init (&line);
1470   while (readline_internal (&line, tag_f, tagfile, true) > 0)
1471     {
1472       if (ferror (tag_f))
1473         pfatal (tagfile);
1474
1475       if (strstr (line.buffer, buf) == NULL)
1476         {
1477           fprintf (otags_f, "%s\n", line.buffer);
1478           if (ferror (tag_f))
1479             pfatal (tagfile);
1480         }
1481     }
1482   free (buf);
1483   free (line.buffer);
1484
1485   if (fclose (otags_f) == EOF)
1486     pfatal ("OTAGS");
1487
1488   if (fclose (tag_f) == EOF)
1489     pfatal (tagfile);
1490
1491   do_move_file ("OTAGS", tagfile);
1492   return;
1493 }
1494
1495 /*
1496  * Return a compressor given the file name.  If EXTPTR is non-zero,
1497  * return a pointer into FILE where the compressor-specific
1498  * extension begins.  If no compressor is found, NULL is returned
1499  * and EXTPTR is not significant.
1500  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1501  */
1502 static compressor *
1503 get_compressor_from_suffix (char *file, char **extptr)
1504 {
1505   compressor *compr;
1506   char *slash, *suffix;
1507
1508   /* File has been processed by canonicalize_filename,
1509      so we don't need to consider backslashes on DOS_NT.  */
1510   slash = strrchr (file, '/');
1511   suffix = strrchr (file, '.');
1512   if (suffix == NULL || suffix < slash)
1513     return NULL;
1514   if (extptr != NULL)
1515     *extptr = suffix;
1516   suffix += 1;
1517   /* Let those poor souls who live with DOS 8+3 file name limits get
1518      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1519      Only the first do loop is run if not MSDOS */
1520   do
1521     {
1522       for (compr = compressors; compr->suffix != NULL; compr++)
1523         if (streq (compr->suffix, suffix))
1524           return compr;
1525       if (!MSDOS)
1526         break;                  /* do it only once: not really a loop */
1527       if (extptr != NULL)
1528         *extptr = ++suffix;
1529     } while (*suffix != '\0');
1530   return NULL;
1531 }
1532
1533
1534
1535 /*
1536  * Return a language given the name.
1537  */
1538 static language *
1539 get_language_from_langname (const char *name)
1540 {
1541   language *lang;
1542
1543   if (name == NULL)
1544     error ("empty language name");
1545   else
1546     {
1547       for (lang = lang_names; lang->name != NULL; lang++)
1548         if (streq (name, lang->name))
1549           return lang;
1550       error ("unknown language \"%s\"", name);
1551     }
1552
1553   return NULL;
1554 }
1555
1556
1557 /*
1558  * Return a language given the interpreter name.
1559  */
1560 static language *
1561 get_language_from_interpreter (char *interpreter)
1562 {
1563   language *lang;
1564   const char **iname;
1565
1566   if (interpreter == NULL)
1567     return NULL;
1568   for (lang = lang_names; lang->name != NULL; lang++)
1569     if (lang->interpreters != NULL)
1570       for (iname = lang->interpreters; *iname != NULL; iname++)
1571         if (streq (*iname, interpreter))
1572             return lang;
1573
1574   return NULL;
1575 }
1576
1577
1578
1579 /*
1580  * Return a language given the file name.
1581  */
1582 static language *
1583 get_language_from_filename (char *file, bool case_sensitive)
1584 {
1585   language *lang;
1586   const char **name, **ext, *suffix;
1587   char *slash;
1588
1589   /* Try whole file name first. */
1590   slash = strrchr (file, '/');
1591   if (slash != NULL)
1592     file = slash + 1;
1593 #ifdef DOS_NT
1594   else if (file[0] && file[1] == ':')
1595     file += 2;
1596 #endif
1597   for (lang = lang_names; lang->name != NULL; lang++)
1598     if (lang->filenames != NULL)
1599       for (name = lang->filenames; *name != NULL; name++)
1600         if ((case_sensitive)
1601             ? streq (*name, file)
1602             : strcaseeq (*name, file))
1603           return lang;
1604
1605   /* If not found, try suffix after last dot. */
1606   suffix = strrchr (file, '.');
1607   if (suffix == NULL)
1608     return NULL;
1609   suffix += 1;
1610   for (lang = lang_names; lang->name != NULL; lang++)
1611     if (lang->suffixes != NULL)
1612       for (ext = lang->suffixes; *ext != NULL; ext++)
1613         if ((case_sensitive)
1614             ? streq (*ext, suffix)
1615             : strcaseeq (*ext, suffix))
1616           return lang;
1617   return NULL;
1618 }
1619
1620 \f
1621 /*
1622  * This routine is called on each file argument.
1623  */
1624 static void
1625 process_file_name (char *file, language *lang)
1626 {
1627   FILE *inf;
1628   fdesc *fdp;
1629   compressor *compr;
1630   char *compressed_name, *uncompressed_name;
1631   char *ext, *real_name UNINIT, *tmp_name UNINIT;
1632   int retval;
1633
1634   canonicalize_filename (file);
1635   if (streq (file, tagfile) && !streq (tagfile, "-"))
1636     {
1637       error ("skipping inclusion of %s in self.", file);
1638       return;
1639     }
1640   compr = get_compressor_from_suffix (file, &ext);
1641   if (compr)
1642     {
1643       compressed_name = file;
1644       uncompressed_name = savenstr (file, ext - file);
1645     }
1646   else
1647     {
1648       compressed_name = NULL;
1649       uncompressed_name = file;
1650     }
1651
1652   /* If the canonicalized uncompressed name
1653      has already been dealt with, skip it silently. */
1654   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1655     {
1656       assert (fdp->infname != NULL);
1657       if (streq (uncompressed_name, fdp->infname))
1658         goto cleanup;
1659     }
1660
1661   inf = fopen (file, "r" FOPEN_BINARY);
1662   if (inf)
1663     real_name = file;
1664   else
1665     {
1666       int file_errno = errno;
1667       if (compressed_name)
1668         {
1669           /* Try with the given suffix.  */
1670           inf = fopen (uncompressed_name, "r" FOPEN_BINARY);
1671           if (inf)
1672             real_name = uncompressed_name;
1673         }
1674       else
1675         {
1676           /* Try all possible suffixes.  */
1677           for (compr = compressors; compr->suffix != NULL; compr++)
1678             {
1679               compressed_name = concat (file, ".", compr->suffix);
1680               inf = fopen (compressed_name, "r" FOPEN_BINARY);
1681               if (inf)
1682                 {
1683                   real_name = compressed_name;
1684                   break;
1685                 }
1686               if (MSDOS)
1687                 {
1688                   char *suf = compressed_name + strlen (file);
1689                   size_t suflen = strlen (compr->suffix) + 1;
1690                   for ( ; suf[1]; suf++, suflen--)
1691                     {
1692                       memmove (suf, suf + 1, suflen);
1693                       inf = fopen (compressed_name, "r" FOPEN_BINARY);
1694                       if (inf)
1695                         {
1696                           real_name = compressed_name;
1697                           break;
1698                         }
1699                     }
1700                   if (inf)
1701                     break;
1702                 }
1703               free (compressed_name);
1704               compressed_name = NULL;
1705             }
1706         }
1707       if (! inf)
1708         {
1709           errno = file_errno;
1710           perror (file);
1711           goto cleanup;
1712         }
1713     }
1714
1715   if (real_name == compressed_name)
1716     {
1717       fclose (inf);
1718       tmp_name = etags_mktmp ();
1719       if (!tmp_name)
1720         inf = NULL;
1721       else
1722         {
1723 #if MSDOS || defined (DOS_NT)
1724           int buf_len =
1725             strlen (compr->command)
1726             + strlen (" \"\" > \"\"") + strlen (real_name)
1727             + strlen (tmp_name) + 1;
1728           char *cmd = xmalloc (buf_len);
1729           snprintf (cmd, buf_len, "%s \"%s\" > \"%s\"",
1730                     compr->command, real_name, tmp_name);
1731 #else
1732           char *new_real_name = escape_shell_arg_string (real_name);
1733           char *new_tmp_name = escape_shell_arg_string (tmp_name);
1734           int buf_len =
1735             strlen (compr->command) + strlen ("  > ") + strlen (new_real_name)
1736             + strlen (new_tmp_name) + 1;
1737           char *cmd = xmalloc (buf_len);
1738           snprintf (cmd, buf_len, "%s %s > %s",
1739                     compr->command, new_real_name, new_tmp_name);
1740           free (new_real_name);
1741           free (new_tmp_name);
1742 #endif
1743           inf = (system (cmd) == -1
1744                  ? NULL
1745                  : fopen (tmp_name, "r" FOPEN_BINARY));
1746           free (cmd);
1747         }
1748
1749       if (!inf)
1750         {
1751           perror (real_name);
1752           goto cleanup;
1753         }
1754     }
1755
1756   process_file (inf, uncompressed_name, lang);
1757
1758   retval = fclose (inf);
1759   if (real_name == compressed_name)
1760     {
1761       remove (tmp_name);
1762       free (tmp_name);
1763     }
1764   if (retval < 0)
1765     pfatal (file);
1766
1767  cleanup:
1768   if (compressed_name != file)
1769     free (compressed_name);
1770   if (uncompressed_name != file)
1771     free (uncompressed_name);
1772   last_node = NULL;
1773   curfdp = NULL;
1774   return;
1775 }
1776
1777 static void
1778 process_file (FILE *fh, char *fn, language *lang)
1779 {
1780   static const fdesc emptyfdesc;
1781   fdesc *fdp;
1782
1783   infilename = fn;
1784   /* Create a new input file description entry. */
1785   fdp = xmalloc (sizeof *fdp);
1786   *fdp = emptyfdesc;
1787   fdp->next = fdhead;
1788   fdp->infname = savestr (fn);
1789   fdp->lang = lang;
1790   fdp->infabsname = absolute_filename (fn, cwd);
1791   fdp->infabsdir = absolute_dirname (fn, cwd);
1792   if (filename_is_absolute (fn))
1793     {
1794       /* An absolute file name.  Canonicalize it. */
1795       fdp->taggedfname = absolute_filename (fn, NULL);
1796     }
1797   else
1798     {
1799       /* A file name relative to cwd.  Make it relative
1800          to the directory of the tags file. */
1801       fdp->taggedfname = relative_filename (fn, tagfiledir);
1802     }
1803   fdp->usecharno = true;        /* use char position when making tags */
1804   fdp->prop = NULL;
1805   fdp->written = false;         /* not written on tags file yet */
1806
1807   fdhead = fdp;
1808   curfdp = fdhead;              /* the current file description */
1809
1810   find_entries (fh);
1811
1812   /* If not Ctags, and if this is not metasource and if it contained no #line
1813      directives, we can write the tags and free all nodes pointing to
1814      curfdp. */
1815   if (!CTAGS
1816       && curfdp->usecharno      /* no #line directives in this file */
1817       && !curfdp->lang->metasource)
1818     {
1819       node *np, *prev;
1820
1821       /* Look for the head of the sublist relative to this file.  See add_node
1822          for the structure of the node tree. */
1823       prev = NULL;
1824       for (np = nodehead; np != NULL; prev = np, np = np->left)
1825         if (np->fdp == curfdp)
1826           break;
1827
1828       /* If we generated tags for this file, write and delete them. */
1829       if (np != NULL)
1830         {
1831           /* This is the head of the last sublist, if any.  The following
1832              instructions depend on this being true. */
1833           assert (np->left == NULL);
1834
1835           assert (fdhead == curfdp);
1836           assert (last_node->fdp == curfdp);
1837           put_entries (np);     /* write tags for file curfdp->taggedfname */
1838           free_tree (np);       /* remove the written nodes */
1839           if (prev == NULL)
1840             nodehead = NULL;    /* no nodes left */
1841           else
1842             prev->left = NULL;  /* delete the pointer to the sublist */
1843         }
1844     }
1845 }
1846
1847 static void
1848 reset_input (FILE *inf)
1849 {
1850   if (fseek (inf, 0, SEEK_SET) != 0)
1851     perror (infilename);
1852 }
1853
1854 /*
1855  * This routine opens the specified file and calls the function
1856  * which finds the function and type definitions.
1857  */
1858 static void
1859 find_entries (FILE *inf)
1860 {
1861   char *cp;
1862   language *lang = curfdp->lang;
1863   Lang_function *parser = NULL;
1864
1865   /* If user specified a language, use it. */
1866   if (lang != NULL && lang->function != NULL)
1867     {
1868       parser = lang->function;
1869     }
1870
1871   /* Else try to guess the language given the file name. */
1872   if (parser == NULL)
1873     {
1874       lang = get_language_from_filename (curfdp->infname, true);
1875
1876       /* Disambiguate file names between Objc and Mercury. */
1877       if (lang != NULL && strcmp (lang->name, "objc") == 0)
1878         test_objc_is_mercury (curfdp->infname, &lang);
1879
1880       if (lang != NULL && lang->function != NULL)
1881         {
1882           curfdp->lang = lang;
1883           parser = lang->function;
1884         }
1885     }
1886
1887   /* Else look for sharp-bang as the first two characters. */
1888   if (parser == NULL
1889       && readline_internal (&lb, inf, infilename, false) > 0
1890       && lb.len >= 2
1891       && lb.buffer[0] == '#'
1892       && lb.buffer[1] == '!')
1893     {
1894       char *lp;
1895
1896       /* Set lp to point at the first char after the last slash in the
1897          line or, if no slashes, at the first nonblank.  Then set cp to
1898          the first successive blank and terminate the string. */
1899       lp = strrchr (lb.buffer+2, '/');
1900       if (lp != NULL)
1901         lp += 1;
1902       else
1903         lp = skip_spaces (lb.buffer + 2);
1904       cp = skip_non_spaces (lp);
1905       /* If the "interpreter" turns out to be "env", the real interpreter is
1906          the next word.  */
1907       if (cp > lp && strneq (lp, "env", cp - lp))
1908         {
1909           lp = skip_spaces (cp);
1910           cp = skip_non_spaces (lp);
1911         }
1912       *cp = '\0';
1913
1914       if (*lp)
1915         {
1916           lang = get_language_from_interpreter (lp);
1917           if (lang != NULL && lang->function != NULL)
1918             {
1919               curfdp->lang = lang;
1920               parser = lang->function;
1921             }
1922         }
1923     }
1924
1925   reset_input (inf);
1926
1927   /* Else try to guess the language given the case insensitive file name. */
1928   if (parser == NULL)
1929     {
1930       lang = get_language_from_filename (curfdp->infname, false);
1931       if (lang != NULL && lang->function != NULL)
1932         {
1933           curfdp->lang = lang;
1934           parser = lang->function;
1935         }
1936     }
1937
1938   /* Else try Fortran or C. */
1939   if (parser == NULL)
1940     {
1941       node *old_last_node = last_node;
1942
1943       curfdp->lang = get_language_from_langname ("fortran");
1944       find_entries (inf);
1945
1946       if (old_last_node == last_node)
1947         /* No Fortran entries found.  Try C. */
1948         {
1949           reset_input (inf);
1950           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1951           find_entries (inf);
1952         }
1953       return;
1954     }
1955
1956   if (!no_line_directive
1957       && curfdp->lang != NULL && curfdp->lang->metasource)
1958     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1959        file, or anyway we parsed a file that is automatically generated from
1960        this one.  If this is the case, the bingo.c file contained #line
1961        directives that generated tags pointing to this file.  Let's delete
1962        them all before parsing this file, which is the real source. */
1963     {
1964       fdesc **fdpp = &fdhead;
1965       while (*fdpp != NULL)
1966         if (*fdpp != curfdp
1967             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1968           /* We found one of those!  We must delete both the file description
1969              and all tags referring to it. */
1970           {
1971             fdesc *badfdp = *fdpp;
1972
1973             /* Delete the tags referring to badfdp->taggedfname
1974                that were obtained from badfdp->infname. */
1975             invalidate_nodes (badfdp, &nodehead);
1976
1977             *fdpp = badfdp->next; /* remove the bad description from the list */
1978             free_fdesc (badfdp);
1979           }
1980         else
1981           fdpp = &(*fdpp)->next; /* advance the list pointer */
1982     }
1983
1984   assert (parser != NULL);
1985
1986   /* Generic initializations before reading from file. */
1987   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1988
1989   /* Generic initializations before parsing file with readline. */
1990   lineno = 0;                  /* reset global line number */
1991   charno = 0;                  /* reset global char number */
1992   linecharno = 0;              /* reset global char number of line start */
1993
1994   parser (inf);
1995
1996   regex_tag_multiline ();
1997 }
1998
1999 \f
2000 /*
2001  * Check whether an implicitly named tag should be created,
2002  * then call `pfnote'.
2003  * NAME is a string that is internally copied by this function.
2004  *
2005  * TAGS format specification
2006  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2007  * The following is explained in some more detail in etc/ETAGS.EBNF.
2008  *
2009  * make_tag creates tags with "implicit tag names" (unnamed tags)
2010  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2011  *  1. NAME does not contain any of the characters in NONAM;
2012  *  2. LINESTART contains name as either a rightmost, or rightmost but
2013  *     one character, substring;
2014  *  3. the character, if any, immediately before NAME in LINESTART must
2015  *     be a character in NONAM;
2016  *  4. the character, if any, immediately after NAME in LINESTART must
2017  *     also be a character in NONAM.
2018  *
2019  * The implementation uses the notinname() macro, which recognizes the
2020  * characters stored in the string `nonam'.
2021  * etags.el needs to use the same characters that are in NONAM.
2022  */
2023 static void
2024 make_tag (const char *name,     /* tag name, or NULL if unnamed */
2025           ptrdiff_t namelen,    /* tag length */
2026           bool is_func,         /* tag is a function */
2027           char *linestart,      /* start of the line where tag is */
2028           ptrdiff_t linelen,    /* length of the line where tag is */
2029           intmax_t lno,         /* line number */
2030           intmax_t cno)         /* character number */
2031 {
2032   bool named = (name != NULL && namelen > 0);
2033   char *nname = NULL;
2034
2035   if (debug)
2036     fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n",
2037              named ? name : "(unnamed)", curfdp->taggedfname, lno, linestart);
2038
2039   if (!CTAGS && named)          /* maybe set named to false */
2040     /* Let's try to make an implicit tag name, that is, create an unnamed tag
2041        such that etags.el can guess a name from it. */
2042     {
2043       ptrdiff_t i;
2044       const char *cp = name;
2045
2046       for (i = 0; i < namelen; i++)
2047         if (notinname (*cp++))
2048           break;
2049       if (i == namelen)                         /* rule #1 */
2050         {
2051           cp = linestart + linelen - namelen;
2052           if (notinname (linestart[linelen-1]))
2053             cp -= 1;                            /* rule #4 */
2054           if (cp >= linestart                   /* rule #2 */
2055               && (cp == linestart
2056                   || notinname (cp[-1]))        /* rule #3 */
2057               && strneq (name, cp, namelen))    /* rule #2 */
2058             named = false;      /* use implicit tag name */
2059         }
2060     }
2061
2062   if (named)
2063     nname = savenstr (name, namelen);
2064
2065   pfnote (nname, is_func, linestart, linelen, lno, cno);
2066 }
2067
2068 /* Record a tag. */
2069 static void
2070 pfnote (char *name,             /* tag name, or NULL if unnamed */
2071         bool is_func,           /* tag is a function */
2072         char *linestart,        /* start of the line where tag is */
2073         ptrdiff_t linelen,      /* length of the line where tag is */
2074         intmax_t lno,           /* line number */
2075         intmax_t cno)           /* character number */
2076
2077 {
2078   register node *np;
2079
2080   if ((CTAGS && name == NULL)
2081       /* We used to have an assertion here for the case below, but if we hit
2082          that case, it just means our parser got confused, and there's nothing
2083          to do about such empty "tags".  */
2084       || (!CTAGS && name && name[0] == '\0'))
2085     return;
2086
2087   np = xmalloc (sizeof *np);
2088
2089   /* If ctags mode, change name "main" to M<thisfilename>. */
2090   if (CTAGS && !cxref_style && streq (name, "main"))
2091     {
2092       char *fp = strrchr (curfdp->taggedfname, '/');
2093       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2094       fp = strrchr (np->name, '.');
2095       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2096         fp[0] = '\0';
2097     }
2098   else
2099     np->name = name;
2100   np->valid = true;
2101   np->been_warned = false;
2102   np->fdp = curfdp;
2103   np->is_func = is_func;
2104   np->lno = lno;
2105   if (np->fdp->usecharno)
2106     /* Our char numbers are 0-base, because of C language tradition?
2107        ctags compatibility?  old versions compatibility?   I don't know.
2108        Anyway, since emacs's are 1-base we expect etags.el to take care
2109        of the difference.  If we wanted to have 1-based numbers, we would
2110        uncomment the +1 below. */
2111     np->cno = cno /* + 1 */ ;
2112   else
2113     np->cno = invalidcharno;
2114   np->left = np->right = NULL;
2115   if (CTAGS && !cxref_style)
2116     {
2117       if (strnlen (linestart, 50) < 50)
2118         np->regex = concat (linestart, "$", "");
2119       else
2120         np->regex = savenstr (linestart, 50);
2121     }
2122   else
2123     np->regex = savenstr (linestart, linelen);
2124
2125   add_node (np, &nodehead);
2126 }
2127
2128 /*
2129  * Utility functions and data to avoid recursion.
2130  */
2131
2132 typedef struct stack_entry {
2133   node *np;
2134   struct stack_entry *next;
2135 } stkentry;
2136
2137 static void
2138 push_node (node *np, stkentry **stack_top)
2139 {
2140   if (np)
2141     {
2142       stkentry *new = xmalloc (sizeof *new);
2143
2144       new->np = np;
2145       new->next = *stack_top;
2146       *stack_top = new;
2147     }
2148 }
2149
2150 static node *
2151 pop_node (stkentry **stack_top)
2152 {
2153   node *ret = NULL;
2154
2155   if (*stack_top)
2156     {
2157       stkentry *old_start = *stack_top;
2158
2159       ret = (*stack_top)->np;
2160       *stack_top = (*stack_top)->next;
2161       free (old_start);
2162     }
2163   return ret;
2164 }
2165
2166 /*
2167  * free_tree ()
2168  *      emulate recursion on left children, iterate on right children.
2169  */
2170 static void
2171 free_tree (register node *np)
2172 {
2173   stkentry *stack = NULL;
2174
2175   while (np)
2176     {
2177       /* Descent on left children.  */
2178       while (np->left)
2179         {
2180           push_node (np, &stack);
2181           np = np->left;
2182         }
2183       /* Free node without left children.  */
2184       node *node_right = np->right;
2185       free (np->name);
2186       free (np->regex);
2187       free (np);
2188       if (!node_right)
2189         {
2190           /* Backtrack to find a node with right children, while freeing nodes
2191              that don't have right children.  */
2192           while (node_right == NULL && (np = pop_node (&stack)) != NULL)
2193             {
2194               node_right = np->right;
2195               free (np->name);
2196               free (np->regex);
2197               free (np);
2198             }
2199         }
2200       /* Free right children.  */
2201       np = node_right;
2202     }
2203 }
2204
2205 /*
2206  * free_fdesc ()
2207  *      delete a file description
2208  */
2209 static void
2210 free_fdesc (register fdesc *fdp)
2211 {
2212   free (fdp->infname);
2213   free (fdp->infabsname);
2214   free (fdp->infabsdir);
2215   free (fdp->taggedfname);
2216   free (fdp->prop);
2217   free (fdp);
2218 }
2219
2220 /*
2221  * add_node ()
2222  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2223  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2224  *      balancing.
2225  *
2226  *      add_node is the only function allowed to add nodes, so it can
2227  *      maintain state.
2228  */
2229 static void
2230 add_node (node *np, node **cur_node_p)
2231 {
2232   node *cur_node = *cur_node_p;
2233
2234   /* Make the first node.  */
2235   if (cur_node == NULL)
2236     {
2237       *cur_node_p = np;
2238       last_node = np;
2239       return;
2240     }
2241
2242   if (!CTAGS)
2243     /* Etags Mode */
2244     {
2245       /* For each file name, tags are in a linked sublist on the right
2246          pointer.  The first tags of different files are a linked list
2247          on the left pointer.  last_node points to the end of the last
2248          used sublist. */
2249       if (last_node != NULL && last_node->fdp == np->fdp)
2250         {
2251           /* Let's use the same sublist as the last added node. */
2252           assert (last_node->right == NULL);
2253           last_node->right = np;
2254           last_node = np;
2255         }
2256       else
2257         {
2258            while (cur_node->fdp != np->fdp)
2259              {
2260                if (cur_node->left == NULL)
2261                  break;
2262                /* The head of this sublist is not good for us.  Let's try the
2263                   next one. */
2264                cur_node = cur_node->left;
2265              }
2266            if (cur_node->left)
2267              {
2268                /* Scanning the list we found the head of a sublist which is
2269                   good for us.  Let's scan this sublist. */
2270                if (cur_node->right)
2271                  {
2272                    cur_node = cur_node->right;
2273                    while (cur_node->right)
2274                      cur_node = cur_node->right;
2275                  }
2276                /* Make a new node in this sublist.  */
2277                cur_node->right = np;
2278              }
2279            else
2280              {
2281                /* Make a new sublist.  */
2282                cur_node->left = np;
2283              }
2284            last_node = np;
2285         }
2286     } /* if ETAGS mode */
2287   else
2288     {
2289       /* Ctags Mode */
2290       node **next_node = &cur_node;
2291
2292       while ((cur_node = *next_node) != NULL)
2293         {
2294           int dif = strcmp (np->name, cur_node->name);
2295           /*
2296            * If this tag name matches an existing one, then
2297            * do not add the node, but maybe print a warning.
2298            */
2299           if (!dif && no_duplicates)
2300             {
2301               if (np->fdp == cur_node->fdp)
2302                 {
2303                   if (!no_warnings)
2304                     {
2305                       fprintf (stderr,
2306                                ("Duplicate entry in file %s, "
2307                                 "line %"PRIdMAX": %s\n"),
2308                                np->fdp->infname, lineno, np->name);
2309                       fprintf (stderr, "Second entry ignored\n");
2310                     }
2311                 }
2312               else if (!cur_node->been_warned && !no_warnings)
2313                 {
2314                   fprintf
2315                     (stderr,
2316                      "Duplicate entry in files %s and %s: %s (Warning only)\n",
2317                      np->fdp->infname, cur_node->fdp->infname, np->name);
2318                   cur_node->been_warned = true;
2319                 }
2320               return;
2321             }
2322           else
2323             next_node = dif < 0 ? &cur_node->left : &cur_node->right;
2324         }
2325       *next_node = np;
2326       last_node = np;
2327     } /* if CTAGS mode */
2328 }
2329
2330 /*
2331  * invalidate_nodes ()
2332  *      Scan the node tree and invalidate all nodes pointing to the
2333  *      given file description (CTAGS case) or free them (ETAGS case).
2334  */
2335 static void
2336 invalidate_nodes (fdesc *badfdp, node **npp)
2337 {
2338   node *np = *npp;
2339   stkentry *stack = NULL;
2340
2341   if (CTAGS)
2342     {
2343       while (np)
2344         {
2345           /* Push all the left children on the stack.  */
2346           while (np->left != NULL)
2347             {
2348               push_node (np, &stack);
2349               np = np->left;
2350             }
2351           /* Invalidate this node.  */
2352           if (np->fdp == badfdp)
2353             np->valid = false;
2354           if (!np->right)
2355             {
2356               /* Pop nodes from stack, invalidating them, until we find one
2357                  with a right child.  */
2358               while ((np = pop_node (&stack)) != NULL)
2359                 {
2360                   if (np->fdp == badfdp)
2361                     np->valid = false;
2362                   if (np->right != NULL)
2363                     break;
2364                 }
2365             }
2366           /* Process the right child, if any.  */
2367           if (np)
2368             np = np->right;
2369         }
2370     }
2371   else
2372     {
2373       node super_root, *np_parent = NULL;
2374
2375       super_root.left = np;
2376       super_root.fdp = (fdesc *) -1;
2377       np = &super_root;
2378
2379       while (np)
2380         {
2381           /* Descent on left children until node with BADFP.  */
2382           while (np && np->fdp != badfdp)
2383             {
2384               assert (np->fdp != NULL);
2385               np_parent = np;
2386               np = np->left;
2387             }
2388           if (np)
2389             {
2390               np_parent->left = np->left; /* detach subtree from the tree */
2391               np->left = NULL;            /* isolate it */
2392               free_tree (np);             /* free it */
2393
2394               /* Continue with rest of tree.  */
2395               np = np_parent->left;
2396             }
2397         }
2398       *npp = super_root.left;
2399     }
2400 }
2401
2402 \f
2403 static ptrdiff_t total_size_of_entries (node *);
2404 static int number_len (intmax_t) ATTRIBUTE_CONST;
2405
2406 /* Length of a non-negative number's decimal representation. */
2407 static int
2408 number_len (intmax_t num)
2409 {
2410   int len = 1;
2411   while ((num /= 10) > 0)
2412     len += 1;
2413   return len;
2414 }
2415
2416 /*
2417  * Return total number of characters that put_entries will output for
2418  * the nodes in the linked list at the right of the specified node.
2419  * This count is irrelevant with etags.el since emacs 19.34 at least,
2420  * but is still supplied for backward compatibility.
2421  */
2422 static ptrdiff_t
2423 total_size_of_entries (node *np)
2424 {
2425   ptrdiff_t total = 0;
2426
2427   for (; np != NULL; np = np->right)
2428     if (np->valid)
2429       {
2430         total += strlen (np->regex) + 1;                /* pat\177 */
2431         if (np->name != NULL)
2432           total += strlen (np->name) + 1;               /* name\001 */
2433         total += number_len (np->lno) + 1;              /* lno, */
2434         if (np->cno != invalidcharno)                   /* cno */
2435           total += number_len (np->cno);
2436         total += 1;                                     /* newline */
2437       }
2438
2439   return total;
2440 }
2441
2442 static void
2443 put_entry (node *np)
2444 {
2445   register char *sp;
2446   static fdesc *fdp = NULL;
2447
2448   /* Output this entry */
2449   if (np->valid)
2450     {
2451       if (!CTAGS)
2452         {
2453           /* Etags mode */
2454           if (fdp != np->fdp)
2455             {
2456               fdp = np->fdp;
2457               fprintf (tagf, "\f\n%s,%"PRIdPTR"\n",
2458                        fdp->taggedfname, total_size_of_entries (np));
2459               fdp->written = true;
2460             }
2461           fputs (np->regex, tagf);
2462           fputc ('\177', tagf);
2463           if (np->name != NULL)
2464             {
2465               fputs (np->name, tagf);
2466               fputc ('\001', tagf);
2467             }
2468           fprintf (tagf, "%"PRIdMAX",", np->lno);
2469           if (np->cno != invalidcharno)
2470             fprintf (tagf, "%"PRIdMAX, np->cno);
2471           fputs ("\n", tagf);
2472         }
2473       else
2474         {
2475           /* Ctags mode */
2476           if (np->name == NULL)
2477             error ("internal error: NULL name in ctags mode.");
2478
2479           if (cxref_style)
2480             {
2481               if (vgrind_style)
2482                 fprintf (stdout, "%s %s %"PRIdMAX"\n",
2483                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2484               else
2485                 fprintf (stdout, "%-16s %3"PRIdMAX" %-16s %s\n",
2486                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2487             }
2488           else
2489             {
2490               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2491
2492               if (np->is_func)
2493                 {               /* function or #define macro with args */
2494                   putc (searchar, tagf);
2495                   putc ('^', tagf);
2496
2497                   for (sp = np->regex; *sp; sp++)
2498                     {
2499                       if (*sp == '\\' || *sp == searchar)
2500                         putc ('\\', tagf);
2501                       putc (*sp, tagf);
2502                     }
2503                   putc (searchar, tagf);
2504                 }
2505               else
2506                 {               /* anything else; text pattern inadequate */
2507                   fprintf (tagf, "%"PRIdMAX, np->lno);
2508                 }
2509               putc ('\n', tagf);
2510             }
2511         }
2512     } /* if this node contains a valid tag */
2513 }
2514
2515 static void
2516 put_entries (node *np)
2517 {
2518   stkentry *stack = NULL;
2519
2520   if (np == NULL)
2521     return;
2522
2523   if (CTAGS)
2524     {
2525       while (np)
2526         {
2527           /* Stack subentries that precede this one.  */
2528           while (np->left)
2529             {
2530               push_node (np, &stack);
2531               np = np->left;
2532             }
2533           /* Output this subentry.  */
2534           put_entry (np);
2535           /* Stack subentries that follow this one.  */
2536           while (!np->right)
2537             {
2538               /* Output subentries that precede the next one.  */
2539               np = pop_node (&stack);
2540               if (!np)
2541                 break;
2542               put_entry (np);
2543             }
2544           if (np)
2545             np = np->right;
2546         }
2547     }
2548   else
2549     {
2550       push_node (np, &stack);
2551       while ((np = pop_node (&stack)) != NULL)
2552         {
2553           /* Output this subentry.  */
2554           put_entry (np);
2555           while (np->right)
2556             {
2557               /* Output subentries that follow this one.  */
2558               put_entry (np->right);
2559               /* Stack subentries from the following files.  */
2560               push_node (np->left, &stack);
2561               np = np->right;
2562             }
2563           push_node (np->left, &stack);
2564         }
2565     }
2566 }
2567
2568 \f
2569 /* C extensions. */
2570 #define C_EXT   0x00fff         /* C extensions */
2571 #define C_PLAIN 0x00000         /* C */
2572 #define C_PLPL  0x00001         /* C++ */
2573 #define C_STAR  0x00003         /* C* */
2574 #define C_JAVA  0x00005         /* JAVA */
2575 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2576 #define YACC    0x10000         /* yacc file */
2577
2578 /*
2579  * The C symbol tables.
2580  */
2581 enum sym_type
2582 {
2583   st_none,
2584   st_C_objprot, st_C_objimpl, st_C_objend,
2585   st_C_gnumacro,
2586   st_C_ignore, st_C_attribute, st_C_enum_bf,
2587   st_C_javastruct,
2588   st_C_operator,
2589   st_C_class, st_C_template,
2590   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2591 };
2592
2593 /* Feed stuff between (but not including) %[ and %] lines to:
2594      gperf -m 5
2595 %[
2596 %compare-strncmp
2597 %enum
2598 %struct-type
2599 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2600 %%
2601 if,             0,                      st_C_ignore
2602 for,            0,                      st_C_ignore
2603 while,          0,                      st_C_ignore
2604 switch,         0,                      st_C_ignore
2605 return,         0,                      st_C_ignore
2606 __attribute__,  0,                      st_C_attribute
2607 GTY,            0,                      st_C_attribute
2608 @interface,     0,                      st_C_objprot
2609 @protocol,      0,                      st_C_objprot
2610 @implementation,0,                      st_C_objimpl
2611 @end,           0,                      st_C_objend
2612 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2613 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2614 friend,         C_PLPL,                 st_C_ignore
2615 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2616 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2617 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2618 class,          0,                      st_C_class
2619 namespace,      C_PLPL,                 st_C_struct
2620 domain,         C_STAR,                 st_C_struct
2621 union,          0,                      st_C_struct
2622 struct,         0,                      st_C_struct
2623 extern,         0,                      st_C_extern
2624 enum,           0,                      st_C_enum
2625 typedef,        0,                      st_C_typedef
2626 define,         0,                      st_C_define
2627 undef,          0,                      st_C_define
2628 operator,       C_PLPL,                 st_C_operator
2629 template,       0,                      st_C_template
2630 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2631 DEFUN,          0,                      st_C_gnumacro
2632 SYSCALL,        0,                      st_C_gnumacro
2633 ENTRY,          0,                      st_C_gnumacro
2634 PSEUDO,         0,                      st_C_gnumacro
2635 ENUM_BF,        0,                      st_C_enum_bf
2636 # These are defined inside C functions, so currently they are not met.
2637 # EXFUN used in glibc, DEFVAR_* in emacs.
2638 #EXFUN,         0,                      st_C_gnumacro
2639 #DEFVAR_,       0,                      st_C_gnumacro
2640 %]
2641 and replace lines between %< and %> with its output, then:
2642  - remove the #if characterset check
2643  - remove any #line directives
2644  - make in_word_set static and not inline
2645  - remove any 'register' qualifications from variable decls. */
2646 /*%<*/
2647 /* C code produced by gperf version 3.0.1 */
2648 /* Command-line: gperf -m 5 */
2649 /* Computed positions: -k'2-3' */
2650
2651 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2652 /* maximum key range = 34, duplicates = 0 */
2653
2654 static int
2655 hash (const char *str, int len)
2656 {
2657   static char const asso_values[] =
2658     {
2659       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2660       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2661       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2662       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2663       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2664       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2665       36, 36, 36, 36, 36, 36, 36, 36, 36,  3,
2666       27, 36, 36, 36, 36, 36, 36, 36, 26, 36,
2667       36, 36, 36, 25,  0,  0, 36, 36, 36,  0,
2668       36, 36, 36, 36, 36,  1, 36, 16, 36,  6,
2669       23,  0,  0, 36, 22,  0, 36, 36,  5,  0,
2670        0, 15,  1, 36,  6, 36,  8, 19, 36, 16,
2671        4,  5, 36, 36, 36, 36, 36, 36, 36, 36,
2672       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2673       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2674       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2675       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2676       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2677       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2678       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2679       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2680       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2681       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2682       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2683       36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
2684       36, 36, 36, 36, 36, 36
2685     };
2686   int hval = len;
2687
2688   switch (hval)
2689     {
2690       default:
2691         hval += asso_values[(unsigned char) str[2]];
2692         FALLTHROUGH;
2693       case 2:
2694         hval += asso_values[(unsigned char) str[1]];
2695         break;
2696     }
2697   return hval;
2698 }
2699
2700 static struct C_stab_entry *
2701 in_word_set (const char *str, ptrdiff_t len)
2702 {
2703   enum
2704     {
2705       TOTAL_KEYWORDS = 34,
2706       MIN_WORD_LENGTH = 2,
2707       MAX_WORD_LENGTH = 15,
2708       MIN_HASH_VALUE = 2,
2709       MAX_HASH_VALUE = 35
2710     };
2711
2712   static struct C_stab_entry wordlist[] =
2713     {
2714       {""}, {""},
2715       {"if",            0,                      st_C_ignore},
2716       {"GTY",           0,                      st_C_attribute},
2717       {"@end",          0,                      st_C_objend},
2718       {"union",         0,                      st_C_struct},
2719       {"define",                0,                      st_C_define},
2720       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2721       {"template",      0,                      st_C_template},
2722       {"operator",      C_PLPL,                 st_C_operator},
2723       {"@interface",    0,                      st_C_objprot},
2724       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2725       {"friend",                C_PLPL,                 st_C_ignore},
2726       {"typedef",       0,                      st_C_typedef},
2727       {"return",                0,                      st_C_ignore},
2728       {"@implementation",0,                     st_C_objimpl},
2729       {"@protocol",     0,                      st_C_objprot},
2730       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2731       {"extern",                0,                      st_C_extern},
2732       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2733       {"struct",                0,                      st_C_struct},
2734       {"domain",                C_STAR,                 st_C_struct},
2735       {"switch",                0,                      st_C_ignore},
2736       {"enum",          0,                      st_C_enum},
2737       {"for",           0,                      st_C_ignore},
2738       {"namespace",     C_PLPL,                 st_C_struct},
2739       {"class",         0,                      st_C_class},
2740       {"while",         0,                      st_C_ignore},
2741       {"undef",         0,                      st_C_define},
2742       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2743       {"__attribute__", 0,                      st_C_attribute},
2744       {"ENTRY",         0,                      st_C_gnumacro},
2745       {"SYSCALL",       0,                      st_C_gnumacro},
2746       {"ENUM_BF",       0,                      st_C_enum_bf},
2747       {"PSEUDO",                0,                      st_C_gnumacro},
2748       {"DEFUN",         0,                      st_C_gnumacro}
2749     };
2750
2751   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2752     {
2753       int key = hash (str, len);
2754
2755       if (key <= MAX_HASH_VALUE && key >= 0)
2756         {
2757           const char *s = wordlist[key].name;
2758
2759           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2760             return &wordlist[key];
2761         }
2762     }
2763   return 0;
2764 }
2765 /*%>*/
2766
2767 static enum sym_type
2768 C_symtype (char *str, ptrdiff_t len, int c_ext)
2769 {
2770   struct C_stab_entry *se = in_word_set (str, len);
2771
2772   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2773     return st_none;
2774   return se->type;
2775 }
2776
2777 \f
2778 /*
2779  * Ignoring __attribute__ ((list))
2780  */
2781 static bool inattribute;        /* looking at an __attribute__ construct */
2782
2783 /* Ignoring ENUM_BF (type)
2784  *
2785  */
2786 static bool in_enum_bf;         /* inside parentheses following ENUM_BF */
2787
2788 /*
2789  * C functions and variables are recognized using a simple
2790  * finite automaton.  fvdef is its state variable.
2791  */
2792 static enum
2793 {
2794   fvnone,                       /* nothing seen */
2795   fdefunkey,                    /* Emacs DEFUN keyword seen */
2796   fdefunname,                   /* Emacs DEFUN name seen */
2797   foperator,                    /* func: operator keyword seen (cplpl) */
2798   fvnameseen,                   /* function or variable name seen */
2799   fstartlist,                   /* func: just after open parenthesis */
2800   finlist,                      /* func: in parameter list */
2801   flistseen,                    /* func: after parameter list */
2802   fignore,                      /* func: before open brace */
2803   vignore                       /* var-like: ignore until ';' */
2804 } fvdef;
2805
2806 static bool fvextern;           /* func or var: extern keyword seen; */
2807
2808 /*
2809  * typedefs are recognized using a simple finite automaton.
2810  * typdef is its state variable.
2811  */
2812 static enum
2813 {
2814   tnone,                        /* nothing seen */
2815   tkeyseen,                     /* typedef keyword seen */
2816   ttypeseen,                    /* defined type seen */
2817   tinbody,                      /* inside typedef body */
2818   tend,                         /* just before typedef tag */
2819   tignore                       /* junk after typedef tag */
2820 } typdef;
2821
2822 /*
2823  * struct-like structures (enum, struct and union) are recognized
2824  * using another simple finite automaton.  `structdef' is its state
2825  * variable.
2826  */
2827 static enum
2828 {
2829   snone,                        /* nothing seen yet,
2830                                    or in struct body if bracelev > 0 */
2831   skeyseen,                     /* struct-like keyword seen */
2832   stagseen,                     /* struct-like tag seen */
2833   scolonseen                    /* colon seen after struct-like tag */
2834 } structdef;
2835
2836 /*
2837  * When objdef is different from onone, objtag is the name of the class.
2838  */
2839 static const char *objtag = "<uninited>";
2840
2841 /*
2842  * Yet another little state machine to deal with preprocessor lines.
2843  */
2844 static enum
2845 {
2846   dnone,                        /* nothing seen */
2847   dsharpseen,                   /* '#' seen as first char on line */
2848   ddefineseen,                  /* '#' and 'define' seen */
2849   dignorerest                   /* ignore rest of line */
2850 } definedef;
2851
2852 /*
2853  * State machine for Objective C protocols and implementations.
2854  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2855  */
2856 static enum
2857 {
2858   onone,                        /* nothing seen */
2859   oprotocol,                    /* @interface or @protocol seen */
2860   oimplementation,              /* @implementations seen */
2861   otagseen,                     /* class name seen */
2862   oparenseen,                   /* parenthesis before category seen */
2863   ocatseen,                     /* category name seen */
2864   oinbody,                      /* in @implementation body */
2865   omethodsign,                  /* in @implementation body, after +/- */
2866   omethodtag,                   /* after method name */
2867   omethodcolon,                 /* after method colon */
2868   omethodparm,                  /* after method parameter */
2869   oignore                       /* wait for @end */
2870 } objdef;
2871
2872
2873 /*
2874  * Use this structure to keep info about the token read, and how it
2875  * should be tagged.  Used by the make_C_tag function to build a tag.
2876  */
2877 static struct tok
2878 {
2879   char *line;                   /* string containing the token */
2880   ptrdiff_t offset;             /* where the token starts in LINE */
2881   ptrdiff_t length;             /* token length */
2882   /*
2883     The previous members can be used to pass strings around for generic
2884     purposes.  The following ones specifically refer to creating tags.  In this
2885     case the token contained here is the pattern that will be used to create a
2886     tag.
2887   */
2888   bool valid;                   /* do not create a tag; the token should be
2889                                    invalidated whenever a state machine is
2890                                    reset prematurely */
2891   bool named;                   /* create a named tag */
2892   intmax_t lineno;              /* source line number of tag */
2893   intmax_t linepos;             /* source char number of tag */
2894 } token;                        /* latest token read */
2895
2896 /*
2897  * Variables and functions for dealing with nested structures.
2898  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2899  */
2900 static void pushclass_above (ptrdiff_t, char *, ptrdiff_t);
2901 static void popclass_above (ptrdiff_t);
2902 static void write_classname (linebuffer *, const char *qualifier);
2903
2904 static struct {
2905   char **cname;                 /* nested class names */
2906   ptrdiff_t *bracelev;          /* nested class brace level */
2907   ptrdiff_t nl;                 /* class nesting level (elements used) */
2908   ptrdiff_t size;               /* length of the array */
2909 } cstack;                       /* stack for nested declaration tags */
2910 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2911 #define nestlev         (cstack.nl)
2912 /* After struct keyword or in struct body, not inside a nested function. */
2913 #define instruct        (structdef == snone && nestlev > 0                      \
2914                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2915
2916 static void
2917 pushclass_above (ptrdiff_t bracelev, char *str, ptrdiff_t len)
2918 {
2919   ptrdiff_t nl;
2920
2921   popclass_above (bracelev);
2922   nl = cstack.nl;
2923   if (nl >= cstack.size)
2924     {
2925       xrnew (cstack.cname, cstack.size, 2);
2926       xrnew (cstack.bracelev, cstack.size, 2);
2927       cstack.size *= 2;
2928     }
2929   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2930   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2931   cstack.bracelev[nl] = bracelev;
2932   cstack.nl = nl + 1;
2933 }
2934
2935 static void
2936 popclass_above (ptrdiff_t bracelev)
2937 {
2938   for (ptrdiff_t nl = cstack.nl - 1;
2939        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2940        nl--)
2941     {
2942       free (cstack.cname[nl]);
2943       cstack.nl = nl;
2944     }
2945 }
2946
2947 static void
2948 write_classname (linebuffer *cn, const char *qualifier)
2949 {
2950   ptrdiff_t len;
2951
2952   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2953     {
2954       len = 0;
2955       cn->len = 0;
2956       cn->buffer[0] = '\0';
2957     }
2958   else
2959     {
2960       len = strlen (cstack.cname[0]);
2961       linebuffer_setlen (cn, len);
2962       strcpy (cn->buffer, cstack.cname[0]);
2963     }
2964   for (ptrdiff_t i = 1; i < cstack.nl; i++)
2965     {
2966       char *s = cstack.cname[i];
2967       if (s == NULL)
2968         continue;
2969       int qlen = strlen (qualifier);
2970       ptrdiff_t slen = strlen (s);
2971       linebuffer_setlen (cn, len + qlen + slen);
2972       memcpyz (stpcpy (cn->buffer + len, qualifier), s, slen);
2973       len += qlen + slen;
2974     }
2975 }
2976
2977 \f
2978 static bool consider_token (char *, ptrdiff_t, int, int *,
2979                             ptrdiff_t, ptrdiff_t, bool *);
2980 static void make_C_tag (bool);
2981
2982 /*
2983  * consider_token ()
2984  *      checks to see if the current token is at the start of a
2985  *      function or variable, or corresponds to a typedef, or
2986  *      is a struct/union/enum tag, or #define, or an enum constant.
2987  *
2988  *      *IS_FUNC_OR_VAR gets true if the token is a function or #define macro
2989  *      with args.  C_EXTP points to which language we are looking at.
2990  *
2991  * Globals
2992  *      fvdef                   IN OUT
2993  *      structdef               IN OUT
2994  *      definedef               IN OUT
2995  *      typdef                  IN OUT
2996  *      objdef                  IN OUT
2997  */
2998
2999 static bool
3000 consider_token (char *str,            /* IN: token pointer */
3001                 ptrdiff_t len,        /* IN: token length */
3002                 int c,                /* IN: first char after the token */
3003                 int *c_extp,          /* IN, OUT: C extensions mask */
3004                 ptrdiff_t bracelev,   /* IN: brace level */
3005                 ptrdiff_t parlev,     /* IN: parenthesis level */
3006                 bool *is_func_or_var) /* OUT: function or variable found */
3007 {
3008   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
3009      structtype is the type of the preceding struct-like keyword, and
3010      structbracelev is the brace level where it has been seen. */
3011   static enum sym_type structtype;
3012   static ptrdiff_t structbracelev;
3013   static enum sym_type toktype;
3014
3015
3016   toktype = C_symtype (str, len, *c_extp);
3017
3018   /*
3019    * Skip __attribute__
3020    */
3021   if (toktype == st_C_attribute)
3022     {
3023       inattribute = true;
3024       return false;
3025      }
3026
3027   /*
3028    * Skip ENUM_BF
3029    */
3030   if (toktype == st_C_enum_bf && definedef == dnone)
3031     {
3032       in_enum_bf = true;
3033       return false;
3034     }
3035
3036    /*
3037     * Advance the definedef state machine.
3038     */
3039    switch (definedef)
3040      {
3041      case dnone:
3042        /* We're not on a preprocessor line. */
3043        if (toktype == st_C_gnumacro)
3044          {
3045            fvdef = fdefunkey;
3046            return false;
3047          }
3048        break;
3049      case dsharpseen:
3050        if (toktype == st_C_define)
3051          {
3052            definedef = ddefineseen;
3053          }
3054        else
3055          {
3056            definedef = dignorerest;
3057          }
3058        return false;
3059      case ddefineseen:
3060        /*
3061         * Make a tag for any macro, unless it is a constant
3062         * and constantypedefs is false.
3063         */
3064        definedef = dignorerest;
3065        *is_func_or_var = (c == '(');
3066        if (!*is_func_or_var && !constantypedefs)
3067          return false;
3068        else
3069          return true;
3070      case dignorerest:
3071        return false;
3072      default:
3073        error ("internal error: definedef value.");
3074      }
3075
3076    /*
3077     * Now typedefs
3078     */
3079    switch (typdef)
3080      {
3081      case tnone:
3082        if (toktype == st_C_typedef)
3083          {
3084            if (typedefs)
3085              typdef = tkeyseen;
3086            fvextern = false;
3087            fvdef = fvnone;
3088            return false;
3089          }
3090        break;
3091      case tkeyseen:
3092        switch (toktype)
3093          {
3094          case st_none:
3095          case st_C_class:
3096          case st_C_struct:
3097          case st_C_enum:
3098            typdef = ttypeseen;
3099            break;
3100          default:
3101            break;
3102          }
3103        break;
3104      case ttypeseen:
3105        if (structdef == snone && fvdef == fvnone)
3106          {
3107            fvdef = fvnameseen;
3108            return true;
3109          }
3110        break;
3111      case tend:
3112        switch (toktype)
3113          {
3114          case st_C_class:
3115          case st_C_struct:
3116          case st_C_enum:
3117            return false;
3118          default:
3119            return true;
3120          }
3121      default:
3122        break;
3123      }
3124
3125    switch (toktype)
3126      {
3127      case st_C_javastruct:
3128        if (structdef == stagseen)
3129          structdef = scolonseen;
3130        return false;
3131      case st_C_template:
3132      case st_C_class:
3133        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
3134            && bracelev == 0
3135            && definedef == dnone && structdef == snone
3136            && typdef == tnone && fvdef == fvnone)
3137          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3138        if (toktype == st_C_template)
3139          break;
3140        FALLTHROUGH;
3141      case st_C_struct:
3142      case st_C_enum:
3143        if (parlev == 0
3144            && fvdef != vignore
3145            && (typdef == tkeyseen
3146                || (typedefs_or_cplusplus && structdef == snone)))
3147          {
3148            structdef = skeyseen;
3149            structtype = toktype;
3150            structbracelev = bracelev;
3151            if (fvdef == fvnameseen)
3152              fvdef = fvnone;
3153          }
3154        return false;
3155      default:
3156        break;
3157      }
3158
3159    if (structdef == skeyseen)
3160      {
3161        structdef = stagseen;
3162        return true;
3163      }
3164
3165    if (typdef != tnone)
3166      definedef = dnone;
3167
3168    /* Detect Objective C constructs. */
3169    switch (objdef)
3170      {
3171      case onone:
3172        switch (toktype)
3173          {
3174          case st_C_objprot:
3175            objdef = oprotocol;
3176            return false;
3177          case st_C_objimpl:
3178            objdef = oimplementation;
3179            return false;
3180          default:
3181            break;
3182          }
3183        break;
3184      case oimplementation:
3185        /* Save the class tag for functions or variables defined inside. */
3186        objtag = savenstr (str, len);
3187        objdef = oinbody;
3188        return false;
3189      case oprotocol:
3190        /* Save the class tag for categories. */
3191        objtag = savenstr (str, len);
3192        objdef = otagseen;
3193        *is_func_or_var = true;
3194        return true;
3195      case oparenseen:
3196        objdef = ocatseen;
3197        *is_func_or_var = true;
3198        return true;
3199      case oinbody:
3200        break;
3201      case omethodsign:
3202        if (parlev == 0)
3203          {
3204            fvdef = fvnone;
3205            objdef = omethodtag;
3206            linebuffer_setlen (&token_name, len);
3207            memcpyz (token_name.buffer, str, len);
3208            return true;
3209          }
3210        return false;
3211      case omethodcolon:
3212        if (parlev == 0)
3213          objdef = omethodparm;
3214        return false;
3215      case omethodparm:
3216        if (parlev == 0)
3217          {
3218            objdef = omethodtag;
3219            if (class_qualify)
3220              {
3221                ptrdiff_t oldlen = token_name.len;
3222                fvdef = fvnone;
3223                linebuffer_setlen (&token_name, oldlen + len);
3224                memcpyz (token_name.buffer + oldlen, str, len);
3225              }
3226            return true;
3227          }
3228        return false;
3229      case oignore:
3230        if (toktype == st_C_objend)
3231          {
3232            /* Memory leakage here: the string pointed by objtag is
3233               never released, because many tests would be needed to
3234               avoid breaking on incorrect input code.  The amount of
3235               memory leaked here is the sum of the lengths of the
3236               class tags.
3237            free (objtag); */
3238            objdef = onone;
3239          }
3240        return false;
3241      default:
3242        break;
3243      }
3244
3245    /* A function, variable or enum constant? */
3246    switch (toktype)
3247      {
3248      case st_C_extern:
3249        fvextern = true;
3250        switch  (fvdef)
3251          {
3252          case finlist:
3253          case flistseen:
3254          case fignore:
3255          case vignore:
3256            break;
3257          default:
3258            fvdef = fvnone;
3259          }
3260        return false;
3261      case st_C_ignore:
3262        fvextern = false;
3263        fvdef = vignore;
3264        return false;
3265      case st_C_operator:
3266        fvdef = foperator;
3267        *is_func_or_var = true;
3268        return true;
3269      case st_none:
3270        if (constantypedefs
3271            && structdef == snone
3272            && structtype == st_C_enum && bracelev > structbracelev
3273            /* Don't tag tokens in expressions that assign values to enum
3274               constants.  */
3275            && fvdef != vignore)
3276          return true;           /* enum constant */
3277        switch (fvdef)
3278          {
3279          case fdefunkey:
3280            if (bracelev > 0)
3281              break;
3282            fvdef = fdefunname;  /* GNU macro */
3283            *is_func_or_var = true;
3284            return true;
3285          case fvnone:
3286            switch (typdef)
3287              {
3288              case ttypeseen:
3289                return false;
3290              case tnone:
3291                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3292                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3293                  {
3294                    fvdef = vignore;
3295                    return false;
3296                  }
3297                break;
3298              default:
3299                break;
3300              }
3301            FALLTHROUGH;
3302           case fvnameseen:
3303           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3304             {
3305               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3306                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3307               fvdef = foperator;
3308               *is_func_or_var = true;
3309               return true;
3310             }
3311           if (bracelev > 0 && !instruct)
3312             break;
3313           fvdef = fvnameseen;   /* function or variable */
3314           *is_func_or_var = true;
3315           return true;
3316          default:
3317            break;
3318         }
3319       break;
3320      default:
3321        break;
3322     }
3323
3324   return false;
3325 }
3326
3327 \f
3328 /*
3329  * C_entries often keeps pointers to tokens or lines which are older than
3330  * the line currently read.  By keeping two line buffers, and switching
3331  * them at end of line, it is possible to use those pointers.
3332  */
3333 static struct
3334 {
3335   intmax_t linepos;
3336   linebuffer lb;
3337 } lbs[2];
3338
3339 #define current_lb_is_new (newndx == curndx)
3340 #define switch_line_buffers() (curndx = 1 - curndx)
3341
3342 #define curlb (lbs[curndx].lb)
3343 #define newlb (lbs[newndx].lb)
3344 #define curlinepos (lbs[curndx].linepos)
3345 #define newlinepos (lbs[newndx].linepos)
3346
3347 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3348 #define cplpl (c_ext & C_PLPL)
3349 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3350
3351 #define CNL_SAVE_DEFINEDEF()                                            \
3352 do {                                                                    \
3353   curlinepos = charno;                                                  \
3354   readline (&curlb, inf);                                               \
3355   lp = curlb.buffer;                                                    \
3356   quotednl = false;                                                     \
3357   newndx = curndx;                                                      \
3358 } while (0)
3359
3360 #define CNL()                                                           \
3361 do {                                                                    \
3362   CNL_SAVE_DEFINEDEF ();                                                \
3363   if (savetoken.valid)                                                  \
3364     {                                                                   \
3365       token = savetoken;                                                \
3366       savetoken.valid = false;                                          \
3367     }                                                                   \
3368   definedef = dnone;                                                    \
3369 } while (0)
3370
3371
3372 static void
3373 make_C_tag (bool isfun)
3374 {
3375   /* This function is never called when token.valid is false, but
3376      we must protect against invalid input or internal errors. */
3377   if (token.valid)
3378     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3379               token.offset+token.length+1, token.lineno, token.linepos);
3380   else if (DEBUG)
3381     {                             /* this branch is optimized away if !DEBUG */
3382       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3383                 token_name.len + 17, isfun, token.line,
3384                 token.offset+token.length+1, token.lineno, token.linepos);
3385       error ("INVALID TOKEN");
3386     }
3387
3388   token.valid = false;
3389 }
3390
3391 static bool
3392 perhaps_more_input (FILE *inf)
3393 {
3394   return !feof (inf) && !ferror (inf);
3395 }
3396
3397
3398 /*
3399  * C_entries ()
3400  *      This routine finds functions, variables, typedefs,
3401  *      #define's, enum constants and struct/union/enum definitions in
3402  *      C syntax and adds them to the list.
3403  */
3404 static void
3405 C_entries (int c_ext,           /* extension of C */
3406            FILE *inf)           /* input file */
3407 {
3408   char c;                       /* latest char read; '\0' for end of line */
3409   char *lp;                     /* pointer one beyond the character `c' */
3410   bool curndx, newndx;          /* indices for current and new lb */
3411   ptrdiff_t tokoff;             /* offset in line of start of current token */
3412   ptrdiff_t toklen;             /* length of current token */
3413   const char *qualifier;        /* string used to qualify names */
3414   int qlen;                     /* length of qualifier */
3415   ptrdiff_t bracelev;           /* current brace level */
3416   ptrdiff_t bracketlev;         /* current bracket level */
3417   ptrdiff_t parlev;             /* current parenthesis level */
3418   ptrdiff_t attrparlev;         /* __attribute__ parenthesis level */
3419   ptrdiff_t templatelev;        /* current template level */
3420   ptrdiff_t typdefbracelev;     /* bracelev where a typedef struct body begun */
3421   bool incomm, inquote, inchar, quotednl, midtoken;
3422   bool yacc_rules;              /* in the rules part of a yacc file */
3423   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3424
3425
3426   linebuffer_init (&lbs[0].lb);
3427   linebuffer_init (&lbs[1].lb);
3428   if (cstack.size == 0)
3429     {
3430       cstack.size = (DEBUG) ? 1 : 4;
3431       cstack.nl = 0;
3432       cstack.cname = xnmalloc (cstack.size, sizeof *cstack.cname);
3433       cstack.bracelev = xnmalloc (cstack.size, sizeof *cstack.bracelev);
3434     }
3435
3436   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3437   curndx = newndx = 0;
3438   lp = curlb.buffer;
3439   *lp = 0;
3440
3441   fvdef = fvnone; fvextern = false; typdef = tnone;
3442   structdef = snone; definedef = dnone; objdef = onone;
3443   yacc_rules = false;
3444   midtoken = inquote = inchar = incomm = quotednl = false;
3445   token.valid = savetoken.valid = false;
3446   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3447   if (cjava)
3448     { qualifier = "."; qlen = 1; }
3449   else
3450     { qualifier = "::"; qlen = 2; }
3451
3452
3453   while (perhaps_more_input (inf))
3454     {
3455       c = *lp++;
3456       if (c == '\\')
3457         {
3458           /* If we are at the end of the line, the next character is a
3459              '\0'; do not skip it, because it is what tells us
3460              to read the next line.  */
3461           if (*lp == '\0')
3462             {
3463               quotednl = true;
3464               continue;
3465             }
3466           lp++;
3467           c = ' ';
3468         }
3469       else if (incomm)
3470         {
3471           switch (c)
3472             {
3473             case '*':
3474               if (*lp == '/')
3475                 {
3476                   c = *lp++;
3477                   incomm = false;
3478                 }
3479               break;
3480             case '\0':
3481               /* Newlines inside comments do not end macro definitions in
3482                  traditional cpp. */
3483               CNL_SAVE_DEFINEDEF ();
3484               break;
3485             }
3486           continue;
3487         }
3488       else if (inquote)
3489         {
3490           switch (c)
3491             {
3492             case '"':
3493               inquote = false;
3494               break;
3495             case '\0':
3496               /* Newlines inside strings do not end macro definitions
3497                  in traditional cpp, even though compilers don't
3498                  usually accept them. */
3499               CNL_SAVE_DEFINEDEF ();
3500               break;
3501             }
3502           continue;
3503         }
3504       else if (inchar)
3505         {
3506           switch (c)
3507             {
3508             case '\0':
3509               /* Hmmm, something went wrong. */
3510               CNL ();
3511               FALLTHROUGH;
3512             case '\'':
3513               inchar = false;
3514               break;
3515             }
3516           continue;
3517         }
3518       else switch (c)
3519         {
3520         case '"':
3521           inquote = true;
3522           if (bracketlev > 0)
3523             continue;
3524           if (inattribute)
3525             break;
3526           switch (fvdef)
3527             {
3528             case fdefunkey:
3529             case fstartlist:
3530             case finlist:
3531             case fignore:
3532             case vignore:
3533               break;
3534             default:
3535               fvextern = false;
3536               fvdef = fvnone;
3537             }
3538           continue;
3539         case '\'':
3540           inchar = true;
3541           if (bracketlev > 0)
3542             continue;
3543           if (inattribute)
3544             break;
3545           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3546             {
3547               fvextern = false;
3548               fvdef = fvnone;
3549             }
3550           continue;
3551         case '/':
3552           if (*lp == '*')
3553             {
3554               incomm = true;
3555               lp++;
3556               c = ' ';
3557               if (bracketlev > 0)
3558                 continue;
3559             }
3560           else if (/* cplpl && */ *lp == '/')
3561             {
3562               c = '\0';
3563             }
3564           break;
3565         case '%':
3566           if ((c_ext & YACC) && *lp == '%')
3567             {
3568               /* Entering or exiting rules section in yacc file. */
3569               lp++;
3570               definedef = dnone; fvdef = fvnone; fvextern = false;
3571               typdef = tnone; structdef = snone;
3572               midtoken = inquote = inchar = incomm = quotednl = false;
3573               bracelev = 0;
3574               yacc_rules = !yacc_rules;
3575               continue;
3576             }
3577           else
3578             break;
3579         case '#':
3580           if (definedef == dnone)
3581             {
3582               char *cp;
3583               bool cpptoken = true;
3584
3585               /* Look back on this line.  If all blanks, or nonblanks
3586                  followed by an end of comment, this is a preprocessor
3587                  token. */
3588               for (cp = newlb.buffer; cp < lp-1; cp++)
3589                 if (!c_isspace (*cp))
3590                   {
3591                     if (*cp == '*' && cp[1] == '/')
3592                       {
3593                         cp++;
3594                         cpptoken = true;
3595                       }
3596                     else
3597                       cpptoken = false;
3598                   }
3599               if (cpptoken)
3600                 {
3601                   definedef = dsharpseen;
3602                   /* This is needed for tagging enum values: when there are
3603                      preprocessor conditionals inside the enum, we need to
3604                      reset the value of fvdef so that the next enum value is
3605                      tagged even though the one before it did not end in a
3606                      comma.  */
3607                   if (fvdef == vignore && instruct && parlev == 0)
3608                     {
3609                       if (strneq (cp, "#if", 3) || strneq (cp, "#el", 3))
3610                         fvdef = fvnone;
3611                     }
3612                 }
3613             } /* if (definedef == dnone) */
3614           continue;
3615         case '[':
3616           bracketlev++;
3617           continue;
3618         default:
3619           if (bracketlev > 0)
3620             {
3621               if (c == ']')
3622                 --bracketlev;
3623               else if (c == '\0')
3624                 CNL_SAVE_DEFINEDEF ();
3625               continue;
3626             }
3627           break;
3628         } /* switch (c) */
3629
3630
3631       /* Consider token only if some involved conditions are satisfied. */
3632       if (typdef != tignore
3633           && definedef != dignorerest
3634           && fvdef != finlist
3635           && templatelev == 0
3636           && (definedef != dnone
3637               || structdef != scolonseen)
3638           && !inattribute
3639           && !in_enum_bf)
3640         {
3641           if (midtoken)
3642             {
3643               if (endtoken (c))
3644                 {
3645                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3646                     /* This handles :: in the middle,
3647                        but not at the beginning of an identifier.
3648                        Also, space-separated :: is not recognized. */
3649                     {
3650                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3651                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3652                       lp += 2;
3653                       toklen += 2;
3654                       c = lp[-1];
3655                       goto still_in_token;
3656                     }
3657                   else
3658                     {
3659                       bool funorvar = false;
3660
3661                       if (yacc_rules
3662                           || consider_token (newlb.buffer + tokoff, toklen, c,
3663                                              &c_ext, bracelev, parlev,
3664                                              &funorvar))
3665                         {
3666                           if (fvdef == foperator)
3667                             {
3668                               char *oldlp = lp;
3669                               lp = skip_spaces (lp-1);
3670                               if (*lp != '\0')
3671                                 lp += 1;
3672                               while (*lp != '\0'
3673                                      && !c_isspace (*lp) && *lp != '(')
3674                                 lp += 1;
3675                               c = *lp++;
3676                               toklen += lp - oldlp;
3677                             }
3678                           token.named = false;
3679                           if (!plainc
3680                               && nestlev > 0 && definedef == dnone)
3681                             /* in struct body */
3682                             {
3683                               if (class_qualify)
3684                                 {
3685                                   write_classname (&token_name, qualifier);
3686                                   ptrdiff_t len = token_name.len;
3687                                   linebuffer_setlen (&token_name,
3688                                                      len + qlen + toklen);
3689                                   memcpyz (stpcpy (token_name.buffer + len,
3690                                                    qualifier),
3691                                            newlb.buffer + tokoff, toklen);
3692                                 }
3693                               else
3694                                 {
3695                                   linebuffer_setlen (&token_name, toklen);
3696                                   memcpyz (token_name.buffer,
3697                                            newlb.buffer + tokoff, toklen);
3698                                 }
3699                               token.named = true;
3700                             }
3701                           else if (objdef == ocatseen)
3702                             /* Objective C category */
3703                             {
3704                               if (class_qualify)
3705                                 {
3706                                   ptrdiff_t len = strlen (objtag) + 2 + toklen;
3707                                   linebuffer_setlen (&token_name, len);
3708                                   char *p1 = stpcpy (token_name.buffer, objtag);
3709                                   char *p2 = stpcpy (p1, "(");
3710                                   char *p3 = mempcpy (p2, newlb.buffer + tokoff,
3711                                                       toklen);
3712                                   strcpy (p3, ")");
3713                                 }
3714                               else
3715                                 {
3716                                   linebuffer_setlen (&token_name, toklen);
3717                                   memcpyz (token_name.buffer,
3718                                            newlb.buffer + tokoff, toklen);
3719                                 }
3720                               token.named = true;
3721                             }
3722                           else if (objdef == omethodtag
3723                                    || objdef == omethodparm)
3724                             /* Objective C method */
3725                             {
3726                               token.named = true;
3727                             }
3728                           else if (fvdef == fdefunname)
3729                             /* GNU DEFUN and similar macros */
3730                             {
3731                               bool defun = (newlb.buffer[tokoff] == 'F');
3732                               ptrdiff_t off = tokoff;
3733                               ptrdiff_t len = toklen;
3734
3735                               if (defun)
3736                                 {
3737                                   off += 1;
3738                                   len -= 1;
3739
3740                                   /* First, tag it as its C name */
3741                                   linebuffer_setlen (&token_name, toklen);
3742                                   memcpyz (token_name.buffer,
3743                                            newlb.buffer + tokoff, toklen);
3744                                   token.named = true;
3745                                   token.lineno = lineno;
3746                                   token.offset = tokoff;
3747                                   token.length = toklen;
3748                                   token.line = newlb.buffer;
3749                                   token.linepos = newlinepos;
3750                                   token.valid = true;
3751                                   make_C_tag (funorvar);
3752                                 }
3753                               /* Rewrite the tag so that emacs lisp DEFUNs
3754                                  can be found also by their elisp name */
3755                               linebuffer_setlen (&token_name, len);
3756                               memcpyz (token_name.buffer,
3757                                        newlb.buffer + off, len);
3758                               if (defun)
3759                                 while (--len >= 0)
3760                                   if (token_name.buffer[len] == '_')
3761                                     token_name.buffer[len] = '-';
3762                               token.named = defun;
3763                             }
3764                           else
3765                             {
3766                               linebuffer_setlen (&token_name, toklen);
3767                               memcpyz (token_name.buffer,
3768                                        newlb.buffer + tokoff, toklen);
3769                               /* Name macros and members. */
3770                               token.named = (structdef == stagseen
3771                                              || typdef == ttypeseen
3772                                              || typdef == tend
3773                                              || (funorvar
3774                                                  && definedef == dignorerest)
3775                                              || (funorvar
3776                                                  && definedef == dnone
3777                                                  && structdef == snone
3778                                                  && bracelev > 0));
3779                             }
3780                           token.lineno = lineno;
3781                           token.offset = tokoff;
3782                           token.length = toklen;
3783                           token.line = newlb.buffer;
3784                           token.linepos = newlinepos;
3785                           token.valid = true;
3786
3787                           if (definedef == dnone
3788                               && (fvdef == fvnameseen
3789                                   || fvdef == foperator
3790                                   || structdef == stagseen
3791                                   || typdef == tend
3792                                   || typdef == ttypeseen
3793                                   || objdef != onone))
3794                             {
3795                               if (current_lb_is_new)
3796                                 switch_line_buffers ();
3797                             }
3798                           else if (definedef != dnone
3799                                    || fvdef == fdefunname
3800                                    || instruct)
3801                             make_C_tag (funorvar);
3802                         }
3803                       else /* not yacc and consider_token failed */
3804                         {
3805                           if (inattribute && fvdef == fignore)
3806                             {
3807                               /* We have just met __attribute__ after a
3808                                  function parameter list: do not tag the
3809                                  function again. */
3810                               fvdef = fvnone;
3811                             }
3812                         }
3813                       midtoken = false;
3814                     }
3815                 } /* if (endtoken (c)) */
3816               else if (intoken (c))
3817                 still_in_token:
3818                 {
3819                   toklen++;
3820                   continue;
3821                 }
3822             } /* if (midtoken) */
3823           else if (begtoken (c))
3824             {
3825               switch (definedef)
3826                 {
3827                 case dnone:
3828                   switch (fvdef)
3829                     {
3830                     case fstartlist:
3831                       /* This prevents tagging fb in
3832                          void (__attribute__ ((noreturn)) *fb) (void);
3833                          Fixing this is not easy and not very important. */
3834                       fvdef = finlist;
3835                       continue;
3836                     case flistseen:
3837                       if (plainc || declarations)
3838                         {
3839                           make_C_tag (true); /* a function */
3840                           fvdef = fignore;
3841                         }
3842                       break;
3843                     default:
3844                       break;
3845                     }
3846                   if (structdef == stagseen && !cjava)
3847                     {
3848                       popclass_above (bracelev);
3849                       structdef = snone;
3850                     }
3851                   break;
3852                 case dsharpseen:
3853                   savetoken = token;
3854                   break;
3855                 default:
3856                   break;
3857                 }
3858               if (!yacc_rules || lp == newlb.buffer + 1)
3859                 {
3860                   tokoff = lp - 1 - newlb.buffer;
3861                   toklen = 1;
3862                   midtoken = true;
3863                 }
3864               continue;
3865             } /* if (begtoken) */
3866         } /* if must look at token */
3867
3868
3869       /* Detect end of line, colon, comma, semicolon and various braces
3870          after having handled a token.*/
3871       switch (c)
3872         {
3873         case ':':
3874           if (inattribute)
3875             break;
3876           if (yacc_rules && token.offset == 0 && token.valid)
3877             {
3878               make_C_tag (false); /* a yacc function */
3879               break;
3880             }
3881           if (definedef != dnone)
3882             break;
3883           switch (objdef)
3884             {
3885             case otagseen:
3886               objdef = oignore;
3887               make_C_tag (true); /* an Objective C class */
3888               break;
3889             case omethodtag:
3890             case omethodparm:
3891               objdef = omethodcolon;
3892               if (class_qualify)
3893                 {
3894                   ptrdiff_t toklen = token_name.len;
3895                   linebuffer_setlen (&token_name, toklen + 1);
3896                   strcpy (token_name.buffer + toklen, ":");
3897                 }
3898               break;
3899             default:
3900               break;
3901             }
3902           if (structdef == stagseen)
3903             {
3904               structdef = scolonseen;
3905               break;
3906             }
3907           /* Should be useless, but may be work as a safety net. */
3908           if (cplpl && fvdef == flistseen)
3909             {
3910               make_C_tag (true); /* a function */
3911               fvdef = fignore;
3912               break;
3913             }
3914           break;
3915         case ';':
3916           if (definedef != dnone || inattribute)
3917             break;
3918           switch (typdef)
3919             {
3920             case tend:
3921             case ttypeseen:
3922               make_C_tag (false); /* a typedef */
3923               typdef = tnone;
3924               fvdef = fvnone;
3925               break;
3926             case tnone:
3927             case tinbody:
3928             case tignore:
3929               switch (fvdef)
3930                 {
3931                 case fignore:
3932                   if (typdef == tignore || cplpl)
3933                     fvdef = fvnone;
3934                   break;
3935                 case fvnameseen:
3936                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3937                       || (members && instruct))
3938                     make_C_tag (false); /* a variable */
3939                   fvextern = false;
3940                   fvdef = fvnone;
3941                   token.valid = false;
3942                   break;
3943                 case flistseen:
3944                   if ((declarations
3945                        && (cplpl || !instruct)
3946                        && (typdef == tnone || (typdef != tignore && instruct)))
3947                       || (members
3948                           && plainc && instruct))
3949                     make_C_tag (true);  /* a function */
3950                   FALLTHROUGH;
3951                 default:
3952                   fvextern = false;
3953                   fvdef = fvnone;
3954                   if (declarations
3955                        && cplpl && structdef == stagseen)
3956                     make_C_tag (false); /* forward declaration */
3957                   else
3958                     token.valid = false;
3959                 } /* switch (fvdef) */
3960               FALLTHROUGH;
3961             default:
3962               if (!instruct)
3963                 typdef = tnone;
3964             }
3965           if (structdef == stagseen)
3966             structdef = snone;
3967           break;
3968         case ',':
3969           if (definedef != dnone || inattribute)
3970             break;
3971           switch (objdef)
3972             {
3973             case omethodtag:
3974             case omethodparm:
3975               make_C_tag (true); /* an Objective C method */
3976               objdef = oinbody;
3977               break;
3978             default:
3979               break;
3980             }
3981           switch (fvdef)
3982             {
3983             case fdefunkey:
3984             case foperator:
3985             case fstartlist:
3986             case finlist:
3987             case fignore:
3988               break;
3989             case vignore:
3990               if (instruct && parlev == 0)
3991                 fvdef = fvnone;
3992               break;
3993             case fdefunname:
3994               fvdef = fignore;
3995               break;
3996             case fvnameseen:
3997               if (parlev == 0
3998                   && ((globals
3999                        && bracelev == 0
4000                        && templatelev == 0
4001                        && (!fvextern || declarations))
4002                       || (members && instruct)))
4003                   make_C_tag (false); /* a variable */
4004               break;
4005             case flistseen:
4006               if ((declarations && typdef == tnone && !instruct)
4007                   || (members && typdef != tignore && instruct))
4008                 {
4009                   make_C_tag (true); /* a function */
4010                   fvdef = fvnameseen;
4011                 }
4012               else if (!declarations)
4013                 fvdef = fvnone;
4014               token.valid = false;
4015               break;
4016             default:
4017               fvdef = fvnone;
4018             }
4019           if (structdef == stagseen)
4020             structdef = snone;
4021           break;
4022         case ']':
4023           if (definedef != dnone || inattribute)
4024             break;
4025           if (structdef == stagseen)
4026             structdef = snone;
4027           switch (typdef)
4028             {
4029             case ttypeseen:
4030             case tend:
4031               typdef = tignore;
4032               make_C_tag (false);       /* a typedef */
4033               break;
4034             case tnone:
4035             case tinbody:
4036               switch (fvdef)
4037                 {
4038                 case foperator:
4039                 case finlist:
4040                 case fignore:
4041                 case vignore:
4042                   break;
4043                 case fvnameseen:
4044                   if ((members && bracelev == 1)
4045                       || (globals && bracelev == 0
4046                           && (!fvextern || declarations)))
4047                     make_C_tag (false); /* a variable */
4048                   FALLTHROUGH;
4049                 default:
4050                   fvdef = fvnone;
4051                 }
4052               break;
4053             default:
4054               break;
4055             }
4056           break;
4057         case '(':
4058           if (inattribute)
4059             {
4060               attrparlev++;
4061               break;
4062             }
4063           if (definedef != dnone)
4064             break;
4065           if (objdef == otagseen && parlev == 0)
4066             objdef = oparenseen;
4067           switch (fvdef)
4068             {
4069             case fvnameseen:
4070               if (typdef == ttypeseen
4071                   && *lp != '*'
4072                   && !instruct)
4073                 {
4074                   /* This handles constructs like:
4075                      typedef void OperatorFun (int fun); */
4076                   make_C_tag (false);
4077                   typdef = tignore;
4078                   fvdef = fignore;
4079                   break;
4080                 }
4081               FALLTHROUGH;
4082             case foperator:
4083               fvdef = fstartlist;
4084               break;
4085             case flistseen:
4086               fvdef = finlist;
4087               break;
4088             default:
4089               break;
4090             }
4091           parlev++;
4092           break;
4093         case ')':
4094           if (inattribute)
4095             {
4096               if (--attrparlev == 0)
4097                 inattribute = false;
4098               break;
4099             }
4100           if (in_enum_bf)
4101             {
4102               if (--parlev == 0)
4103                 in_enum_bf = false;
4104               break;
4105             }
4106           if (definedef != dnone)
4107             break;
4108           if (objdef == ocatseen && parlev == 1)
4109             {
4110               make_C_tag (true); /* an Objective C category */
4111               objdef = oignore;
4112             }
4113           if (--parlev == 0)
4114             {
4115               switch (fvdef)
4116                 {
4117                 case fstartlist:
4118                 case finlist:
4119                   fvdef = flistseen;
4120                   break;
4121                 default:
4122                   break;
4123                 }
4124               if (!instruct
4125                   && (typdef == tend
4126                       || typdef == ttypeseen))
4127                 {
4128                   typdef = tignore;
4129                   make_C_tag (false); /* a typedef */
4130                 }
4131             }
4132           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
4133             parlev = 0;
4134           break;
4135         case '{':
4136           if (definedef != dnone)
4137             break;
4138           if (typdef == ttypeseen)
4139             {
4140               /* Whenever typdef is set to tinbody (currently only
4141                  here), typdefbracelev should be set to bracelev. */
4142               typdef = tinbody;
4143               typdefbracelev = bracelev;
4144             }
4145           switch (fvdef)
4146             {
4147             case flistseen:
4148               if (cplpl && !class_qualify)
4149                 {
4150                   /* Remove class and namespace qualifiers from the token,
4151                      leaving only the method/member name.  */
4152                   char *cc, *uqname = token_name.buffer;
4153                   char *tok_end = token_name.buffer + token_name.len;
4154
4155                   for (cc = token_name.buffer; cc < tok_end; cc++)
4156                     {
4157                       if (*cc == ':' && cc[1] == ':')
4158                         {
4159                           uqname = cc + 2;
4160                           cc++;
4161                         }
4162                     }
4163                   if (uqname > token_name.buffer)
4164                     {
4165                       ptrdiff_t uqlen = strlen (uqname);
4166                       linebuffer_setlen (&token_name, uqlen);
4167                       memmove (token_name.buffer, uqname, uqlen + 1);
4168                     }
4169                 }
4170               make_C_tag (true);    /* a function */
4171               FALLTHROUGH;
4172             case fignore:
4173               fvdef = fvnone;
4174               break;
4175             case fvnone:
4176               switch (objdef)
4177                 {
4178                 case otagseen:
4179                   make_C_tag (true); /* an Objective C class */
4180                   objdef = oignore;
4181                   break;
4182                 case omethodtag:
4183                 case omethodparm:
4184                   make_C_tag (true); /* an Objective C method */
4185                   objdef = oinbody;
4186                   break;
4187                 default:
4188                   /* Neutralize `extern "C" {' grot. */
4189                   if (bracelev == 0 && structdef == snone && nestlev == 0
4190                       && typdef == tnone)
4191                     bracelev = -1;
4192                 }
4193               break;
4194             default:
4195               break;
4196             }
4197           switch (structdef)
4198             {
4199             case skeyseen:         /* unnamed struct */
4200               pushclass_above (bracelev, NULL, 0);
4201               structdef = snone;
4202               break;
4203             case stagseen:         /* named struct or enum */
4204             case scolonseen:       /* a class */
4205               pushclass_above (bracelev,token.line+token.offset, token.length);
4206               structdef = snone;
4207               make_C_tag (false);  /* a struct or enum */
4208               break;
4209             default:
4210               break;
4211             }
4212           bracelev += 1;
4213           break;
4214         case '*':
4215           if (definedef != dnone)
4216             break;
4217           if (fvdef == fstartlist)
4218             {
4219               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
4220               token.valid = false;
4221             }
4222           break;
4223         case '}':
4224           if (definedef != dnone)
4225             break;
4226           bracelev -= 1;
4227           /* If we see a closing brace in column zero, and we weren't told to
4228              ignore indentation, we assume this the final brace of a function
4229              or struct definition, and reset bracelev to zero.  */
4230           if (!ignoreindent && lp == newlb.buffer + 1)
4231             {
4232               if (bracelev != 0)
4233                 token.valid = false; /* unexpected value, token unreliable */
4234               bracelev = 0;     /* reset brace level if first column */
4235               parlev = 0;       /* also reset paren level, just in case... */
4236             }
4237           else if (bracelev < 0)
4238             {
4239               token.valid = false; /* something gone amiss, token unreliable */
4240               bracelev = 0;
4241             }
4242           if (bracelev == 0 && fvdef == vignore)
4243             fvdef = fvnone;             /* end of function */
4244           popclass_above (bracelev);
4245           structdef = snone;
4246           /* Only if typdef == tinbody is typdefbracelev significant. */
4247           if (typdef == tinbody && bracelev <= typdefbracelev)
4248             {
4249               assert (bracelev == typdefbracelev);
4250               typdef = tend;
4251             }
4252           break;
4253         case '=':
4254           if (definedef != dnone)
4255             break;
4256           switch (fvdef)
4257             {
4258             case foperator:
4259             case finlist:
4260             case fignore:
4261             case vignore:
4262               break;
4263             case fvnameseen:
4264               if ((members && bracelev == 1)
4265                   || (globals && bracelev == 0 && (!fvextern || declarations)))
4266                 make_C_tag (false); /* a variable */
4267               FALLTHROUGH;
4268             default:
4269               fvdef = vignore;
4270             }
4271           break;
4272         case '<':
4273           if (cplpl
4274               && (structdef == stagseen || fvdef == fvnameseen))
4275             {
4276               templatelev++;
4277               break;
4278             }
4279           goto resetfvdef;
4280         case '>':
4281           if (templatelev > 0)
4282             {
4283               templatelev--;
4284               break;
4285             }
4286           goto resetfvdef;
4287         case '+':
4288         case '-':
4289           if (objdef == oinbody && bracelev == 0)
4290             {
4291               objdef = omethodsign;
4292               break;
4293             }
4294           FALLTHROUGH;
4295         case '#': case '~': case '&': case '%': case '/':
4296         case '|': case '^': case '!': case '.': case '?':
4297         resetfvdef:
4298           if (definedef != dnone)
4299             break;
4300           /* These surely cannot follow a function tag in C. */
4301           switch (fvdef)
4302             {
4303             case foperator:
4304             case finlist:
4305             case fignore:
4306             case vignore:
4307               break;
4308             default:
4309               fvdef = fvnone;
4310             }
4311           break;
4312         case '\0':
4313           if (objdef == otagseen)
4314             {
4315               make_C_tag (true); /* an Objective C class */
4316               objdef = oignore;
4317             }
4318           /* If a macro spans multiple lines don't reset its state. */
4319           if (quotednl)
4320             CNL_SAVE_DEFINEDEF ();
4321           else
4322             CNL ();
4323           break;
4324         } /* switch (c) */
4325
4326     } /* while not eof */
4327
4328   free (lbs[0].lb.buffer);
4329   free (lbs[1].lb.buffer);
4330 }
4331
4332 /*
4333  * Process either a C++ file or a C file depending on the setting
4334  * of a global flag.
4335  */
4336 static void
4337 default_C_entries (FILE *inf)
4338 {
4339   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4340 }
4341
4342 /* Always do plain C. */
4343 static void
4344 plain_C_entries (FILE *inf)
4345 {
4346   C_entries (0, inf);
4347 }
4348
4349 /* Always do C++. */
4350 static void
4351 Cplusplus_entries (FILE *inf)
4352 {
4353   C_entries (C_PLPL, inf);
4354 }
4355
4356 /* Always do Java. */
4357 static void
4358 Cjava_entries (FILE *inf)
4359 {
4360   C_entries (C_JAVA, inf);
4361 }
4362
4363 /* Always do C*. */
4364 static void
4365 Cstar_entries (FILE *inf)
4366 {
4367   C_entries (C_STAR, inf);
4368 }
4369
4370 /* Always do Yacc. */
4371 static void
4372 Yacc_entries (FILE *inf)
4373 {
4374   C_entries (YACC, inf);
4375 }
4376
4377 \f
4378 /* Useful macros. */
4379 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4380   while (perhaps_more_input (file_pointer)                              \
4381          && (readline (&(line_buffer), file_pointer),                   \
4382              (char_pointer) = (line_buffer).buffer,                     \
4383              true))
4384
4385 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4386   ((assert ("" kw), true)   /* syntax error if not a literal string */  \
4387    && strneq (cp, kw, sizeof (kw) - 1)          /* cp points at kw */   \
4388    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
4389    && ((cp) = skip_spaces ((cp) + sizeof (kw) - 1), true)) /* skip spaces */
4390
4391 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4392 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4393   ((assert ("" kw), true) /* syntax error if not a literal string */    \
4394    && strncaseeq (cp, kw, sizeof (kw) - 1)      /* cp points at kw */   \
4395    && ((cp) += sizeof (kw) - 1, true))          /* skip spaces */
4396
4397 /*
4398  * Read a file, but do no processing.  This is used to do regexp
4399  * matching on files that have no language defined.
4400  */
4401 static void
4402 just_read_file (FILE *inf)
4403 {
4404   while (perhaps_more_input (inf))
4405     readline (&lb, inf);
4406 }
4407
4408 \f
4409 /* Fortran parsing */
4410
4411 static void F_takeprec (void);
4412 static void F_getit (FILE *);
4413
4414 static void
4415 F_takeprec (void)
4416 {
4417   dbp = skip_spaces (dbp);
4418   if (*dbp != '*')
4419     return;
4420   dbp++;
4421   dbp = skip_spaces (dbp);
4422   if (strneq (dbp, "(*)", 3))
4423     {
4424       dbp += 3;
4425       return;
4426     }
4427   if (!c_isdigit (*dbp))
4428     {
4429       --dbp;                    /* force failure */
4430       return;
4431     }
4432   do
4433     dbp++;
4434   while (c_isdigit (*dbp));
4435 }
4436
4437 static void
4438 F_getit (FILE *inf)
4439 {
4440   register char *cp;
4441
4442   dbp = skip_spaces (dbp);
4443   if (*dbp == '\0')
4444     {
4445       readline (&lb, inf);
4446       dbp = lb.buffer;
4447       if (dbp[5] != '&')
4448         return;
4449       dbp += 6;
4450       dbp = skip_spaces (dbp);
4451     }
4452   if (!c_isalpha (*dbp) && *dbp != '_' && *dbp != '$')
4453     return;
4454   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4455     continue;
4456   make_tag (dbp, cp-dbp, true,
4457             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4458 }
4459
4460
4461 static void
4462 Fortran_functions (FILE *inf)
4463 {
4464   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4465     {
4466       if (*dbp == '%')
4467         dbp++;                  /* Ratfor escape to fortran */
4468       dbp = skip_spaces (dbp);
4469       if (*dbp == '\0')
4470         continue;
4471
4472       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4473         dbp = skip_spaces (dbp);
4474
4475       if (LOOKING_AT_NOCASE (dbp, "pure"))
4476         dbp = skip_spaces (dbp);
4477
4478       if (LOOKING_AT_NOCASE (dbp, "elemental"))
4479         dbp = skip_spaces (dbp);
4480
4481       switch (c_tolower (*dbp))
4482         {
4483         case 'i':
4484           if (nocase_tail ("integer"))
4485             F_takeprec ();
4486           break;
4487         case 'r':
4488           if (nocase_tail ("real"))
4489             F_takeprec ();
4490           break;
4491         case 'l':
4492           if (nocase_tail ("logical"))
4493             F_takeprec ();
4494           break;
4495         case 'c':
4496           if (nocase_tail ("complex") || nocase_tail ("character"))
4497             F_takeprec ();
4498           break;
4499         case 'd':
4500           if (nocase_tail ("double"))
4501             {
4502               dbp = skip_spaces (dbp);
4503               if (*dbp == '\0')
4504                 continue;
4505               if (nocase_tail ("precision"))
4506                 break;
4507               continue;
4508             }
4509           break;
4510         }
4511       dbp = skip_spaces (dbp);
4512       if (*dbp == '\0')
4513         continue;
4514       switch (c_tolower (*dbp))
4515         {
4516         case 'f':
4517           if (nocase_tail ("function"))
4518             F_getit (inf);
4519           continue;
4520         case 's':
4521           if (nocase_tail ("subroutine"))
4522             F_getit (inf);
4523           continue;
4524         case 'e':
4525           if (nocase_tail ("entry"))
4526             F_getit (inf);
4527           continue;
4528         case 'b':
4529           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4530             {
4531               dbp = skip_spaces (dbp);
4532               if (*dbp == '\0') /* assume un-named */
4533                 make_tag ("blockdata", 9, true,
4534                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4535               else
4536                 F_getit (inf);  /* look for name */
4537             }
4538           continue;
4539         }
4540     }
4541 }
4542
4543 \f
4544 /*
4545  * Go language support
4546  * Original code by Xi Lu <lx@shellcodes.org> (2016)
4547  */
4548 static void
4549 Go_functions(FILE *inf)
4550 {
4551   char *cp, *name;
4552
4553   LOOP_ON_INPUT_LINES(inf, lb, cp)
4554     {
4555       cp = skip_spaces (cp);
4556
4557       if (LOOKING_AT (cp, "package"))
4558         {
4559           name = cp;
4560           while (!notinname (*cp) && *cp != '\0')
4561             cp++;
4562           make_tag (name, cp - name, false, lb.buffer,
4563                     cp - lb.buffer + 1, lineno, linecharno);
4564         }
4565       else if (LOOKING_AT (cp, "func"))
4566         {
4567           /* Go implementation of interface, such as:
4568              func (n *Integer) Add(m Integer) ...
4569              skip `(n *Integer)` part.
4570           */
4571           if (*cp == '(')
4572             {
4573               while (*cp != ')')
4574                 cp++;
4575               cp = skip_spaces (cp+1);
4576             }
4577
4578           if (*cp)
4579             {
4580               name = cp;
4581
4582               while (!notinname (*cp))
4583                 cp++;
4584
4585               make_tag (name, cp - name, true, lb.buffer,
4586                         cp - lb.buffer + 1, lineno, linecharno);
4587             }
4588         }
4589       else if (members && LOOKING_AT (cp, "type"))
4590         {
4591           name = cp;
4592
4593           /* Ignore the likes of the following:
4594              type (
4595                     A
4596              )
4597            */
4598           if (*cp == '(')
4599             return;
4600
4601           while (!notinname (*cp) && *cp != '\0')
4602             cp++;
4603
4604           make_tag (name, cp - name, false, lb.buffer,
4605                     cp - lb.buffer + 1, lineno, linecharno);
4606         }
4607     }
4608 }
4609
4610 \f
4611 /*
4612  * Ada parsing
4613  * Original code by
4614  * Philippe Waroquiers (1998)
4615  */
4616
4617 /* Once we are positioned after an "interesting" keyword, let's get
4618    the real tag value necessary. */
4619 static void
4620 Ada_getit (FILE *inf, const char *name_qualifier)
4621 {
4622   register char *cp;
4623   char *name;
4624   char c;
4625
4626   while (perhaps_more_input (inf))
4627     {
4628       dbp = skip_spaces (dbp);
4629       if (*dbp == '\0'
4630           || (dbp[0] == '-' && dbp[1] == '-'))
4631         {
4632           readline (&lb, inf);
4633           dbp = lb.buffer;
4634         }
4635       switch (c_tolower (*dbp))
4636         {
4637         case 'b':
4638           if (nocase_tail ("body"))
4639             {
4640               /* Skipping body of   procedure body   or   package body or ....
4641                  resetting qualifier to body instead of spec. */
4642               name_qualifier = "/b";
4643               continue;
4644             }
4645           break;
4646         case 't':
4647           /* Skipping type of   task type   or   protected type ... */
4648           if (nocase_tail ("type"))
4649             continue;
4650           break;
4651         }
4652       if (*dbp == '"')
4653         {
4654           dbp += 1;
4655           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4656             continue;
4657         }
4658       else
4659         {
4660           dbp = skip_spaces (dbp);
4661           for (cp = dbp;
4662                c_isalnum (*cp) || *cp == '_' || *cp == '.';
4663                cp++)
4664             continue;
4665           if (cp == dbp)
4666             return;
4667         }
4668       c = *cp;
4669       *cp = '\0';
4670       name = concat (dbp, name_qualifier, "");
4671       *cp = c;
4672       make_tag (name, strlen (name), true,
4673                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4674       free (name);
4675       if (c == '"')
4676         dbp = cp + 1;
4677       return;
4678     }
4679 }
4680
4681 static void
4682 Ada_funcs (FILE *inf)
4683 {
4684   bool inquote = false;
4685   bool skip_till_semicolumn = false;
4686
4687   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4688     {
4689       while (*dbp != '\0')
4690         {
4691           /* Skip a string i.e. "abcd". */
4692           if (inquote || (*dbp == '"'))
4693             {
4694               dbp = strchr (dbp + !inquote, '"');
4695               if (dbp != NULL)
4696                 {
4697                   inquote = false;
4698                   dbp += 1;
4699                   continue;     /* advance char */
4700                 }
4701               else
4702                 {
4703                   inquote = true;
4704                   break;        /* advance line */
4705                 }
4706             }
4707
4708           /* Skip comments. */
4709           if (dbp[0] == '-' && dbp[1] == '-')
4710             break;              /* advance line */
4711
4712           /* Skip character enclosed in single quote i.e. 'a'
4713              and skip single quote starting an attribute i.e. 'Image. */
4714           if (*dbp == '\'')
4715             {
4716               dbp++ ;
4717               if (*dbp != '\0')
4718                 dbp++;
4719               continue;
4720             }
4721
4722           if (skip_till_semicolumn)
4723             {
4724               if (*dbp == ';')
4725                 skip_till_semicolumn = false;
4726               dbp++;
4727               continue;         /* advance char */
4728             }
4729
4730           /* Search for beginning of a token.  */
4731           if (!begtoken (*dbp))
4732             {
4733               dbp++;
4734               continue;         /* advance char */
4735             }
4736
4737           /* We are at the beginning of a token. */
4738           switch (c_tolower (*dbp))
4739             {
4740             case 'f':
4741               if (!packages_only && nocase_tail ("function"))
4742                 Ada_getit (inf, "/f");
4743               else
4744                 break;          /* from switch */
4745               continue;         /* advance char */
4746             case 'p':
4747               if (!packages_only && nocase_tail ("procedure"))
4748                 Ada_getit (inf, "/p");
4749               else if (nocase_tail ("package"))
4750                 Ada_getit (inf, "/s");
4751               else if (nocase_tail ("protected")) /* protected type */
4752                 Ada_getit (inf, "/t");
4753               else
4754                 break;          /* from switch */
4755               continue;         /* advance char */
4756
4757             case 'u':
4758               if (typedefs && !packages_only && nocase_tail ("use"))
4759                 {
4760                   /* when tagging types, avoid tagging  use type Pack.Typename;
4761                      for this, we will skip everything till a ; */
4762                   skip_till_semicolumn = true;
4763                   continue;     /* advance char */
4764                 }
4765
4766             case 't':
4767               if (!packages_only && nocase_tail ("task"))
4768                 Ada_getit (inf, "/k");
4769               else if (typedefs && !packages_only && nocase_tail ("type"))
4770                 {
4771                   Ada_getit (inf, "/t");
4772                   while (*dbp != '\0')
4773                     dbp += 1;
4774                 }
4775               else
4776                 break;          /* from switch */
4777               continue;         /* advance char */
4778             }
4779
4780           /* Look for the end of the token. */
4781           while (!endtoken (*dbp))
4782             dbp++;
4783
4784         } /* advance char */
4785     } /* advance line */
4786 }
4787
4788 \f
4789 /*
4790  * Unix and microcontroller assembly tag handling
4791  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4792  * Idea by Bob Weiner, Motorola Inc. (1994)
4793  */
4794 static void
4795 Asm_labels (FILE *inf)
4796 {
4797   register char *cp;
4798
4799   LOOP_ON_INPUT_LINES (inf, lb, cp)
4800     {
4801       /* If first char is alphabetic or one of [_.$], test for colon
4802          following identifier. */
4803       if (c_isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4804         {
4805           /* Read past label. */
4806           cp++;
4807           while (c_isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4808             cp++;
4809           if (*cp == ':' || c_isspace (*cp))
4810             /* Found end of label, so copy it and add it to the table. */
4811             make_tag (lb.buffer, cp - lb.buffer, true,
4812                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4813         }
4814     }
4815 }
4816
4817 \f
4818 /*
4819  * Perl support
4820  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4821  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
4822  * Perl variable names: /^(my|local).../
4823  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4824  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4825  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4826  */
4827 static void
4828 Perl_functions (FILE *inf)
4829 {
4830   char *package = savestr ("main"); /* current package name */
4831   register char *cp;
4832
4833   LOOP_ON_INPUT_LINES (inf, lb, cp)
4834     {
4835       cp = skip_spaces (cp);
4836
4837       if (LOOKING_AT (cp, "package"))
4838         {
4839           free (package);
4840           get_tag (cp, &package);
4841         }
4842       else if (LOOKING_AT (cp, "sub"))
4843         {
4844           char *pos, *sp;
4845
4846         subr:
4847           sp = cp;
4848           while (!notinname (*cp))
4849             cp++;
4850           if (cp == sp)
4851             continue;           /* nothing found */
4852           pos = strchr (sp, ':');
4853           if (pos && pos < cp && pos[1] == ':')
4854             {
4855               /* The name is already qualified. */
4856               if (!class_qualify)
4857                 {
4858                   char *q = pos + 2, *qpos;
4859                   while ((qpos = strchr (q, ':')) != NULL
4860                          && qpos < cp
4861                          && qpos[1] == ':')
4862                     q = qpos + 2;
4863                   sp = q;
4864                 }
4865               make_tag (sp, cp - sp, true,
4866                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4867             }
4868           else if (class_qualify)
4869             /* Qualify it. */
4870             {
4871               char savechar, *name;
4872
4873               savechar = *cp;
4874               *cp = '\0';
4875               name = concat (package, "::", sp);
4876               *cp = savechar;
4877               make_tag (name, strlen (name), true,
4878                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4879               free (name);
4880             }
4881           else
4882             make_tag (sp, cp - sp, true,
4883                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4884         }
4885       else if (LOOKING_AT (cp, "use constant")
4886                || LOOKING_AT (cp, "use constant::defer"))
4887         {
4888           /* For hash style multi-constant like
4889                 use constant { FOO => 123,
4890                                BAR => 456 };
4891              only the first FOO is picked up.  Parsing across the value
4892              expressions would be difficult in general, due to possible nested
4893              hashes, here-documents, etc.  */
4894           if (*cp == '{')
4895             cp = skip_spaces (cp+1);
4896           goto subr;
4897         }
4898       else if (globals) /* only if we are tagging global vars */
4899         {
4900           /* Skip a qualifier, if any. */
4901           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4902           /* After "my" or "local", but before any following paren or space. */
4903           char *varstart = cp;
4904
4905           if (qual              /* should this be removed?  If yes, how? */
4906               && (*cp == '$' || *cp == '@' || *cp == '%'))
4907             {
4908               varstart += 1;
4909               do
4910                 cp++;
4911               while (c_isalnum (*cp) || *cp == '_');
4912             }
4913           else if (qual)
4914             {
4915               /* Should be examining a variable list at this point;
4916                  could insist on seeing an open parenthesis. */
4917               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4918                 cp++;
4919             }
4920           else
4921             continue;
4922
4923           make_tag (varstart, cp - varstart, false,
4924                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4925         }
4926     }
4927   free (package);
4928 }
4929
4930
4931 /*
4932  * Python support
4933  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4934  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4935  * More ideas by seb bacon <seb@jamkit.com> (2002)
4936  */
4937 static void
4938 Python_functions (FILE *inf)
4939 {
4940   register char *cp;
4941
4942   LOOP_ON_INPUT_LINES (inf, lb, cp)
4943     {
4944       cp = skip_spaces (cp);
4945       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4946         {
4947           char *name = cp;
4948           while (!notinname (*cp) && *cp != ':')
4949             cp++;
4950           make_tag (name, cp - name, true,
4951                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4952         }
4953     }
4954 }
4955
4956 /*
4957  * Ruby support
4958  * Original code by Xi Lu <lx@shellcodes.org> (2015)
4959  */
4960 static void
4961 Ruby_functions (FILE *inf)
4962 {
4963   char *cp = NULL;
4964   bool reader = false, writer = false, alias = false, continuation = false;
4965
4966   LOOP_ON_INPUT_LINES (inf, lb, cp)
4967     {
4968       bool is_class = false;
4969       bool is_method = false;
4970       char *name;
4971
4972       cp = skip_spaces (cp);
4973       if (!continuation
4974           /* Constants.  */
4975           && c_isalpha (*cp) && c_isupper (*cp))
4976         {
4977           char *bp, *colon = NULL;
4978
4979           name = cp;
4980
4981           for (cp++; c_isalnum (*cp) || *cp == '_' || *cp == ':'; cp++)
4982             {
4983               if (*cp == ':')
4984                 colon = cp;
4985             }
4986           if (cp > name + 1)
4987             {
4988               bp = skip_spaces (cp);
4989               if (*bp == '=' && !(bp[1] == '=' || bp[1] == '>'))
4990                 {
4991                   if (colon && !c_isspace (colon[1]))
4992                     name = colon + 1;
4993                   make_tag (name, cp - name, false,
4994                             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4995                 }
4996             }
4997         }
4998       else if (!continuation
4999                /* Modules, classes, methods.  */
5000                && ((is_method = LOOKING_AT (cp, "def"))
5001                    || (is_class = LOOKING_AT (cp, "class"))
5002                    || LOOKING_AT (cp, "module")))
5003         {
5004           const char self_name[] = "self.";
5005           const size_t self_size1 = sizeof (self_name) - 1;
5006
5007           name = cp;
5008
5009          /* Ruby method names can end in a '='.  Also, operator overloading can
5010             define operators whose names include '='.  */
5011           while (!notinname (*cp) || *cp == '=')
5012             cp++;
5013
5014           /* Remove "self." from the method name.  */
5015           if (cp - name > self_size1
5016               && strneq (name, self_name, self_size1))
5017             name += self_size1;
5018
5019           /* Remove the class/module qualifiers from method names.  */
5020           if (is_method)
5021             {
5022               char *q;
5023
5024               for (q = name; q < cp && *q != '.'; q++)
5025                 ;
5026               if (q < cp - 1)   /* punt if we see just "FOO." */
5027                 name = q + 1;
5028             }
5029
5030           /* Don't tag singleton classes.  */
5031           if (is_class && strneq (name, "<<", 2) && cp == name + 2)
5032             continue;
5033
5034           make_tag (name, cp - name, true,
5035                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5036         }
5037       else
5038         {
5039           /* Tag accessors and aliases.  */
5040
5041           if (!continuation)
5042             reader = writer = alias = false;
5043
5044           while (*cp && *cp != '#')
5045             {
5046               if (!continuation)
5047                 {
5048                   reader = writer = alias = false;
5049                   if (LOOKING_AT (cp, "attr_reader"))
5050                     reader = true;
5051                   else if (LOOKING_AT (cp, "attr_writer"))
5052                     writer = true;
5053                   else if (LOOKING_AT (cp, "attr_accessor"))
5054                     {
5055                       reader = true;
5056                       writer = true;
5057                     }
5058                   else if (LOOKING_AT (cp, "alias_method"))
5059                     alias = true;
5060                 }
5061               if (reader || writer || alias)
5062                 {
5063                   do {
5064                     char *np;
5065
5066                     cp = skip_spaces (cp);
5067                     if (*cp == '(')
5068                       cp = skip_spaces (cp + 1);
5069                     np = cp;
5070                     cp = skip_name (cp);
5071                     if (*np != ':')
5072                       continue;
5073                     np++;
5074                     if (reader)
5075                       {
5076                         make_tag (np, cp - np, true,
5077                                   lb.buffer, cp - lb.buffer + 1,
5078                                   lineno, linecharno);
5079                         continuation = false;
5080                       }
5081                     if (writer)
5082                       {
5083                         size_t name_len = cp - np + 1;
5084                         char *wr_name = xmalloc (name_len + 1);
5085
5086                         strcpy (mempcpy (wr_name, np, name_len - 1), "=");
5087                         pfnote (wr_name, true, lb.buffer, cp - lb.buffer + 1,
5088                                 lineno, linecharno);
5089                         if (debug)
5090                           fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n", wr_name,
5091                                    curfdp->taggedfname, lineno, lb.buffer);
5092                         continuation = false;
5093                       }
5094                     if (alias)
5095                       {
5096                         if (!continuation)
5097                           make_tag (np, cp - np, true,
5098                                     lb.buffer, cp - lb.buffer + 1,
5099                                     lineno, linecharno);
5100                         continuation = false;
5101                         while (*cp && *cp != '#' && *cp != ';')
5102                           {
5103                             if (*cp == ',')
5104                               continuation = true;
5105                             else if (!c_isspace (*cp))
5106                               continuation = false;
5107                             cp++;
5108                           }
5109                         if (*cp == ';')
5110                           continuation = false;
5111                       }
5112                     cp = skip_spaces (cp);
5113                   } while ((alias
5114                             ? (*cp == ',')
5115                             : (continuation = (*cp == ',')))
5116                            && (cp = skip_spaces (cp + 1), *cp && *cp != '#'));
5117                 }
5118               if (*cp != '#')
5119                 cp = skip_name (cp);
5120               while (*cp && *cp != '#' && notinname (*cp))
5121                 cp++;
5122             }
5123         }
5124     }
5125 }
5126
5127 \f
5128 /*
5129  * Rust support
5130  * Look for:
5131  *  - fn: Function
5132  *  - struct: Structure
5133  *  - enum: Enumeration
5134  *  - macro_rules!: Macro
5135  */
5136 static void
5137 Rust_entries (FILE *inf)
5138 {
5139   char *cp, *name;
5140   bool is_func = false;
5141
5142   LOOP_ON_INPUT_LINES(inf, lb, cp)
5143     {
5144       cp = skip_spaces(cp);
5145       name = cp;
5146
5147       // Skip 'pub' keyworld
5148       (void)LOOKING_AT (cp, "pub");
5149
5150       // Look for define
5151       if ((is_func = LOOKING_AT (cp, "fn"))
5152           || LOOKING_AT (cp, "enum")
5153           || LOOKING_AT (cp, "struct")
5154           || (is_func = LOOKING_AT (cp, "macro_rules!")))
5155         {
5156           cp = skip_spaces (cp);
5157           name = cp;
5158
5159           while (!notinname (*cp))
5160             cp++;
5161
5162           make_tag (name, cp - name, is_func,
5163                     lb.buffer, cp - lb.buffer + 1,
5164                     lineno, linecharno);
5165           is_func = false;
5166         }
5167     }
5168 }
5169
5170 \f
5171 /*
5172  * PHP support
5173  * Look for:
5174  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
5175  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
5176  *  - /^[ \t]*define\(\"[^\"]+/
5177  * Only with --members:
5178  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
5179  * Idea by Diez B. Roggisch (2001)
5180  */
5181 static void
5182 PHP_functions (FILE *inf)
5183 {
5184   char *cp, *name;
5185   bool search_identifier = false;
5186
5187   LOOP_ON_INPUT_LINES (inf, lb, cp)
5188     {
5189       cp = skip_spaces (cp);
5190       name = cp;
5191       if (search_identifier
5192           && *cp != '\0')
5193         {
5194           while (!notinname (*cp))
5195             cp++;
5196           make_tag (name, cp - name, true,
5197                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5198           search_identifier = false;
5199         }
5200       else if (LOOKING_AT (cp, "function"))
5201         {
5202           if (*cp == '&')
5203             cp = skip_spaces (cp+1);
5204           if (*cp != '\0')
5205             {
5206               name = cp;
5207               while (!notinname (*cp))
5208                 cp++;
5209               make_tag (name, cp - name, true,
5210                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5211             }
5212           else
5213             search_identifier = true;
5214         }
5215       else if (LOOKING_AT (cp, "class"))
5216         {
5217           if (*cp != '\0')
5218             {
5219               name = cp;
5220               while (*cp != '\0' && !c_isspace (*cp))
5221                 cp++;
5222               make_tag (name, cp - name, false,
5223                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5224             }
5225           else
5226             search_identifier = true;
5227         }
5228       else if (strneq (cp, "define", 6)
5229                && (cp = skip_spaces (cp+6))
5230                && *cp++ == '('
5231                && (*cp == '"' || *cp == '\''))
5232         {
5233           char quote = *cp++;
5234           name = cp;
5235           while (*cp != quote && *cp != '\0')
5236             cp++;
5237           make_tag (name, cp - name, false,
5238                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5239         }
5240       else if (members
5241                && LOOKING_AT (cp, "var")
5242                && *cp == '$')
5243         {
5244           name = cp;
5245           while (!notinname (*cp))
5246             cp++;
5247           make_tag (name, cp - name, false,
5248                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5249         }
5250     }
5251 }
5252
5253 \f
5254 /*
5255  * Cobol tag functions
5256  * We could look for anything that could be a paragraph name.
5257  * i.e. anything that starts in column 8 is one word and ends in a full stop.
5258  * Idea by Corny de Souza (1993)
5259  */
5260 static void
5261 Cobol_paragraphs (FILE *inf)
5262 {
5263   register char *bp, *ep;
5264
5265   LOOP_ON_INPUT_LINES (inf, lb, bp)
5266     {
5267       if (lb.len < 9)
5268         continue;
5269       bp += 8;
5270
5271       /* If eoln, compiler option or comment ignore whole line. */
5272       if (bp[-1] != ' ' || !c_isalnum (bp[0]))
5273         continue;
5274
5275       for (ep = bp; c_isalnum (*ep) || *ep == '-'; ep++)
5276         continue;
5277       if (*ep++ == '.')
5278         make_tag (bp, ep - bp, true,
5279                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5280     }
5281 }
5282
5283 \f
5284 /*
5285  * Makefile support
5286  * Ideas by Assar Westerlund <assar@sics.se> (2001)
5287  */
5288 static void
5289 Makefile_targets (FILE *inf)
5290 {
5291   register char *bp;
5292
5293   LOOP_ON_INPUT_LINES (inf, lb, bp)
5294     {
5295       if (*bp == '\t' || *bp == '#')
5296         continue;
5297       while (*bp != '\0' && *bp != '=' && *bp != ':')
5298         bp++;
5299       if (*bp == ':' || (globals && *bp == '='))
5300         {
5301           /* We should detect if there is more than one tag, but we do not.
5302              We just skip initial and final spaces. */
5303           char * namestart = skip_spaces (lb.buffer);
5304           while (--bp > namestart)
5305             if (!notinname (*bp))
5306               break;
5307           make_tag (namestart, bp - namestart + 1, true,
5308                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
5309         }
5310     }
5311 }
5312
5313 \f
5314 /*
5315  * Pascal parsing
5316  * Original code by Mosur K. Mohan (1989)
5317  *
5318  *  Locates tags for procedures & functions.  Doesn't do any type- or
5319  *  var-definitions.  It does look for the keyword "extern" or
5320  *  "forward" immediately following the procedure statement; if found,
5321  *  the tag is skipped.
5322  */
5323 static void
5324 Pascal_functions (FILE *inf)
5325 {
5326   linebuffer tline;             /* mostly copied from C_entries */
5327   intmax_t save_lcno, save_lineno;
5328   ptrdiff_t namelen, taglen;
5329   char c, *name;
5330
5331   bool                          /* each of these flags is true if: */
5332     incomment,                  /* point is inside a comment */
5333     inquote,                    /* point is inside '..' string */
5334     get_tagname,                /* point is after PROCEDURE/FUNCTION
5335                                    keyword, so next item = potential tag */
5336     found_tag,                  /* point is after a potential tag */
5337     inparms,                    /* point is within parameter-list */
5338     verify_tag;                 /* point has passed the parm-list, so the
5339                                    next token will determine whether this
5340                                    is a FORWARD/EXTERN to be ignored, or
5341                                    whether it is a real tag */
5342
5343   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
5344   name = NULL;                  /* keep compiler quiet */
5345   dbp = lb.buffer;
5346   *dbp = '\0';
5347   linebuffer_init (&tline);
5348
5349   incomment = inquote = false;
5350   found_tag = false;            /* have a proc name; check if extern */
5351   get_tagname = false;          /* found "procedure" keyword         */
5352   inparms = false;              /* found '(' after "proc"            */
5353   verify_tag = false;           /* check if "extern" is ahead        */
5354
5355
5356   while (perhaps_more_input (inf)) /* long main loop to get next char */
5357     {
5358       c = *dbp++;
5359       if (c == '\0')            /* if end of line */
5360         {
5361           readline (&lb, inf);
5362           dbp = lb.buffer;
5363           if (*dbp == '\0')
5364             continue;
5365           if (!((found_tag && verify_tag)
5366                 || get_tagname))
5367             c = *dbp++;         /* only if don't need *dbp pointing
5368                                    to the beginning of the name of
5369                                    the procedure or function */
5370         }
5371       if (incomment)
5372         {
5373           if (c == '}')         /* within { } comments */
5374             incomment = false;
5375           else if (c == '*' && *dbp == ')') /* within (* *) comments */
5376             {
5377               dbp++;
5378               incomment = false;
5379             }
5380           continue;
5381         }
5382       else if (inquote)
5383         {
5384           if (c == '\'')
5385             inquote = false;
5386           continue;
5387         }
5388       else
5389         switch (c)
5390           {
5391           case '\'':
5392             inquote = true;     /* found first quote */
5393             continue;
5394           case '{':             /* found open { comment */
5395             incomment = true;
5396             continue;
5397           case '(':
5398             if (*dbp == '*')    /* found open (* comment */
5399               {
5400                 incomment = true;
5401                 dbp++;
5402               }
5403             else if (found_tag) /* found '(' after tag, i.e., parm-list */
5404               inparms = true;
5405             continue;
5406           case ')':             /* end of parms list */
5407             if (inparms)
5408               inparms = false;
5409             continue;
5410           case ';':
5411             if (found_tag && !inparms) /* end of proc or fn stmt */
5412               {
5413                 verify_tag = true;
5414                 break;
5415               }
5416             continue;
5417           }
5418       if (found_tag && verify_tag && (*dbp != ' '))
5419         {
5420           /* Check if this is an "extern" declaration. */
5421           if (*dbp == '\0')
5422             continue;
5423           if (c_tolower (*dbp) == 'e')
5424             {
5425               if (nocase_tail ("extern")) /* superfluous, really! */
5426                 {
5427                   found_tag = false;
5428                   verify_tag = false;
5429                 }
5430             }
5431           else if (c_tolower (*dbp) == 'f')
5432             {
5433               if (nocase_tail ("forward")) /* check for forward reference */
5434                 {
5435                   found_tag = false;
5436                   verify_tag = false;
5437                 }
5438             }
5439           if (found_tag && verify_tag) /* not external proc, so make tag */
5440             {
5441               found_tag = false;
5442               verify_tag = false;
5443               make_tag (name, namelen, true,
5444                         tline.buffer, taglen, save_lineno, save_lcno);
5445               continue;
5446             }
5447         }
5448       if (get_tagname)          /* grab name of proc or fn */
5449         {
5450           char *cp;
5451
5452           if (*dbp == '\0')
5453             continue;
5454
5455           /* Find block name. */
5456           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
5457             continue;
5458
5459           /* Save all values for later tagging. */
5460           linebuffer_setlen (&tline, lb.len);
5461           strcpy (tline.buffer, lb.buffer);
5462           save_lineno = lineno;
5463           save_lcno = linecharno;
5464           name = tline.buffer + (dbp - lb.buffer);
5465           namelen = cp - dbp;
5466           taglen = cp - lb.buffer + 1;
5467
5468           dbp = cp;             /* set dbp to e-o-token */
5469           get_tagname = false;
5470           found_tag = true;
5471           continue;
5472
5473           /* And proceed to check for "extern". */
5474         }
5475       else if (!incomment && !inquote && !found_tag)
5476         {
5477           /* Check for proc/fn keywords. */
5478           switch (c_tolower (c))
5479             {
5480             case 'p':
5481               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
5482                 get_tagname = true;
5483               continue;
5484             case 'f':
5485               if (nocase_tail ("unction"))
5486                 get_tagname = true;
5487               continue;
5488             }
5489         }
5490     } /* while not eof */
5491
5492   free (tline.buffer);
5493 }
5494
5495 \f
5496 /*
5497  * Lisp tag functions
5498  *  look for (def or (DEF, quote or QUOTE
5499  */
5500
5501 static void L_getit (void);
5502
5503 static void
5504 L_getit (void)
5505 {
5506   if (*dbp == '\'')             /* Skip prefix quote */
5507     dbp++;
5508   else if (*dbp == '(')
5509   {
5510     dbp++;
5511     /* Try to skip "(quote " */
5512     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
5513       /* Ok, then skip "(" before name in (defstruct (foo)) */
5514       dbp = skip_spaces (dbp);
5515   }
5516   get_lispy_tag (dbp);
5517 }
5518
5519 static void
5520 Lisp_functions (FILE *inf)
5521 {
5522   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5523     {
5524       if (dbp[0] != '(')
5525         continue;
5526
5527       /* "(defvar foo)" is a declaration rather than a definition.  */
5528       if (! declarations)
5529         {
5530           char *p = dbp + 1;
5531           if (LOOKING_AT (p, "defvar"))
5532             {
5533               p = skip_name (p); /* past var name */
5534               p = skip_spaces (p);
5535               if (*p == ')')
5536                 continue;
5537             }
5538         }
5539
5540       if (strneq (dbp + 1, "cl-", 3) || strneq (dbp + 1, "CL-", 3))
5541         dbp += 3;
5542
5543       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5544         {
5545           dbp = skip_non_spaces (dbp);
5546           dbp = skip_spaces (dbp);
5547           L_getit ();
5548         }
5549       else
5550         {
5551           /* Check for (foo::defmumble name-defined ... */
5552           do
5553             dbp++;
5554           while (!notinname (*dbp) && *dbp != ':');
5555           if (*dbp == ':')
5556             {
5557               do
5558                 dbp++;
5559               while (*dbp == ':');
5560
5561               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5562                 {
5563                   dbp = skip_non_spaces (dbp);
5564                   dbp = skip_spaces (dbp);
5565                   L_getit ();
5566                 }
5567             }
5568         }
5569     }
5570 }
5571
5572 \f
5573 /*
5574  * Lua script language parsing
5575  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5576  *
5577  *  "function" and "local function" are tags if they start at column 1.
5578  */
5579 static void
5580 Lua_functions (FILE *inf)
5581 {
5582   register char *bp;
5583
5584   LOOP_ON_INPUT_LINES (inf, lb, bp)
5585     {
5586       bp = skip_spaces (bp);
5587       if (bp[0] != 'f' && bp[0] != 'l')
5588         continue;
5589
5590       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5591
5592       if (LOOKING_AT (bp, "function"))
5593         {
5594           char *tag_name, *tp_dot, *tp_colon;
5595
5596           get_tag (bp, &tag_name);
5597           /* If the tag ends with ".foo" or ":foo", make an additional tag for
5598              "foo".  */
5599           tp_dot = strrchr (tag_name, '.');
5600           tp_colon = strrchr (tag_name, ':');
5601           if (tp_dot || tp_colon)
5602             {
5603               char *p = tp_dot > tp_colon ? tp_dot : tp_colon;
5604               ptrdiff_t len_add = p - tag_name + 1;
5605
5606               get_tag (bp + len_add, NULL);
5607             }
5608         }
5609     }
5610 }
5611
5612 \f
5613 /*
5614  * PostScript tags
5615  * Just look for lines where the first character is '/'
5616  * Also look at "defineps" for PSWrap
5617  * Ideas by:
5618  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5619  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5620  */
5621 static void
5622 PS_functions (FILE *inf)
5623 {
5624   register char *bp, *ep;
5625
5626   LOOP_ON_INPUT_LINES (inf, lb, bp)
5627     {
5628       if (bp[0] == '/')
5629         {
5630           for (ep = bp+1;
5631                *ep != '\0' && *ep != ' ' && *ep != '{';
5632                ep++)
5633             continue;
5634           make_tag (bp, ep - bp, true,
5635                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5636         }
5637       else if (LOOKING_AT (bp, "defineps"))
5638         get_tag (bp, NULL);
5639     }
5640 }
5641
5642 \f
5643 /*
5644  * Forth tags
5645  * Ignore anything after \ followed by space or in ( )
5646  * Look for words defined by :
5647  * Look for constant, code, create, defer, value, and variable
5648  * OBP extensions:  Look for buffer:, field,
5649  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5650  */
5651 static void
5652 Forth_words (FILE *inf)
5653 {
5654   register char *bp;
5655
5656   LOOP_ON_INPUT_LINES (inf, lb, bp)
5657     while ((bp = skip_spaces (bp))[0] != '\0')
5658       if (bp[0] == '\\' && c_isspace (bp[1]))
5659         break;                  /* read next line */
5660       else if (bp[0] == '(' && c_isspace (bp[1]))
5661         do                      /* skip to ) or eol */
5662           bp++;
5663         while (*bp != ')' && *bp != '\0');
5664       else if (((bp[0] == ':' && c_isspace (bp[1]) && bp++)
5665                 || LOOKING_AT_NOCASE (bp, "constant")
5666                 || LOOKING_AT_NOCASE (bp, "2constant")
5667                 || LOOKING_AT_NOCASE (bp, "fconstant")
5668                 || LOOKING_AT_NOCASE (bp, "code")
5669                 || LOOKING_AT_NOCASE (bp, "create")
5670                 || LOOKING_AT_NOCASE (bp, "defer")
5671                 || LOOKING_AT_NOCASE (bp, "value")
5672                 || LOOKING_AT_NOCASE (bp, "2value")
5673                 || LOOKING_AT_NOCASE (bp, "fvalue")
5674                 || LOOKING_AT_NOCASE (bp, "variable")
5675                 || LOOKING_AT_NOCASE (bp, "2variable")
5676                 || LOOKING_AT_NOCASE (bp, "fvariable")
5677                 || LOOKING_AT_NOCASE (bp, "buffer:")
5678                 || LOOKING_AT_NOCASE (bp, "field:")
5679                 || LOOKING_AT_NOCASE (bp, "+field")
5680                 || LOOKING_AT_NOCASE (bp, "field") /* not standard? */
5681                 || LOOKING_AT_NOCASE (bp, "begin-structure")
5682                 || LOOKING_AT_NOCASE (bp, "synonym")
5683                 )
5684                && c_isspace (bp[0]))
5685         {
5686           /* Yay!  A definition! */
5687           char* name_start = skip_spaces (bp);
5688           char* name_end = skip_non_spaces (name_start);
5689           if (name_start < name_end)
5690             make_tag (name_start, name_end - name_start,
5691                       true, lb.buffer, name_end - lb.buffer,
5692                       lineno, linecharno);
5693           bp = name_end;
5694         }
5695       else
5696         bp = skip_non_spaces (bp);
5697 }
5698
5699 \f
5700 /*
5701  * Scheme tag functions
5702  * look for (def... xyzzy
5703  *          (def... (xyzzy
5704  *          (def ... ((...(xyzzy ....
5705  *          (set! xyzzy
5706  * Original code by Ken Haase (1985?)
5707  */
5708 static void
5709 Scheme_functions (FILE *inf)
5710 {
5711   register char *bp;
5712
5713   LOOP_ON_INPUT_LINES (inf, lb, bp)
5714     {
5715       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5716         {
5717           bp = skip_non_spaces (bp+4);
5718           /* Skip over open parens and white space.
5719              Don't continue past '\0' or '='. */
5720           while (*bp && notinname (*bp) && *bp != '=')
5721             bp++;
5722           get_lispy_tag (bp);
5723         }
5724       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5725         get_lispy_tag (bp);
5726     }
5727 }
5728
5729 \f
5730 /* Find tags in TeX and LaTeX input files.  */
5731
5732 /* TEX_toktab is a table of TeX control sequences that define tags.
5733  * Each entry records one such control sequence.
5734  *
5735  * Original code from who knows whom.
5736  * Ideas by:
5737  *   Stefan Monnier (2002)
5738  */
5739
5740 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5741
5742 /* Default set of control sequences to put into TEX_toktab.
5743    The value of environment var TEXTAGS is prepended to this.  */
5744 static const char *TEX_defenv = "\
5745 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5746 :part:appendix:entry:index:def\
5747 :newcommand:renewcommand:newenvironment:renewenvironment";
5748
5749 static void TEX_decode_env (const char *, const char *);
5750
5751 /*
5752  * TeX/LaTeX scanning loop.
5753  */
5754 static void
5755 TeX_commands (FILE *inf)
5756 {
5757   char *cp;
5758   linebuffer *key;
5759
5760   char TEX_esc = '\0';
5761   char TEX_opgrp UNINIT, TEX_clgrp UNINIT;
5762
5763   /* Initialize token table once from environment. */
5764   if (TEX_toktab == NULL)
5765     TEX_decode_env ("TEXTAGS", TEX_defenv);
5766
5767   LOOP_ON_INPUT_LINES (inf, lb, cp)
5768     {
5769       /* Look at each TEX keyword in line. */
5770       for (;;)
5771         {
5772           /* Look for a TEX escape. */
5773           while (true)
5774             {
5775               char c = *cp++;
5776               if (c == '\0' || c == '%')
5777                 goto tex_next_line;
5778
5779               /* Select either \ or ! as escape character, whichever comes
5780                  first outside a comment.  */
5781               if (!TEX_esc)
5782                 switch (c)
5783                   {
5784                   case '\\':
5785                     TEX_esc = c;
5786                     TEX_opgrp = '{';
5787                     TEX_clgrp = '}';
5788                     break;
5789
5790                   case '!':
5791                     TEX_esc = c;
5792                     TEX_opgrp = '<';
5793                     TEX_clgrp = '>';
5794                     break;
5795                   }
5796
5797               if (c == TEX_esc)
5798                 break;
5799             }
5800
5801           for (key = TEX_toktab; key->buffer != NULL; key++)
5802             if (strneq (cp, key->buffer, key->len))
5803               {
5804                 char *p;
5805                 ptrdiff_t namelen, linelen;
5806                 bool opgrp = false;
5807
5808                 cp = skip_spaces (cp + key->len);
5809                 if (*cp == TEX_opgrp)
5810                   {
5811                     opgrp = true;
5812                     cp++;
5813                   }
5814                 for (p = cp;
5815                      (!c_isspace (*p) && *p != '#' &&
5816                       *p != TEX_opgrp && *p != TEX_clgrp);
5817                      p++)
5818                   continue;
5819                 namelen = p - cp;
5820                 linelen = lb.len;
5821                 if (!opgrp || *p == TEX_clgrp)
5822                   {
5823                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5824                       p++;
5825                     linelen = p - lb.buffer + 1;
5826                   }
5827                 make_tag (cp, namelen, true,
5828                           lb.buffer, linelen, lineno, linecharno);
5829                 goto tex_next_line; /* We only tag a line once */
5830               }
5831         }
5832     tex_next_line:
5833       ;
5834     }
5835 }
5836
5837 /* Read environment and prepend it to the default string.
5838    Build token table. */
5839 static void
5840 TEX_decode_env (const char *evarname, const char *defenv)
5841 {
5842   const char *env, *p;
5843   ptrdiff_t len = 1;
5844
5845   /* Append default string to environment. */
5846   env = getenv (evarname);
5847   if (!env)
5848     env = defenv;
5849   else
5850     env = concat (env, defenv, "");
5851
5852   /* If the environment variable doesn't start with a colon, increase
5853      the length of the token table.  */
5854   if (*env != ':')
5855     len++;
5856
5857   /* Allocate a token table */
5858   for (p = env; (p = strchr (p, ':')); )
5859     if (*++p)
5860       len++;
5861   TEX_toktab = xnmalloc (len, sizeof *TEX_toktab);
5862
5863   /* Unpack environment string into token table. Be careful about */
5864   /* zero-length strings (leading ':', "::" and trailing ':') */
5865   for (ptrdiff_t i = 0; *env != '\0'; )
5866     {
5867       p = strchr (env, ':');
5868       if (!p)                   /* End of environment string. */
5869         p = env + strlen (env);
5870       if (p - env > 0)
5871         {                       /* Only non-zero strings. */
5872           TEX_toktab[i].buffer = savenstr (env, p - env);
5873           TEX_toktab[i].len = p - env;
5874           i++;
5875         }
5876       if (*p)
5877         env = p + 1;
5878       else
5879         {
5880           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5881           TEX_toktab[i].len = 0;
5882           break;
5883         }
5884     }
5885 }
5886
5887 \f
5888 /* Texinfo support.  Dave Love, Mar. 2000.  */
5889 static void
5890 Texinfo_nodes (FILE *inf)
5891 {
5892   char *cp, *start;
5893   LOOP_ON_INPUT_LINES (inf, lb, cp)
5894     if (LOOKING_AT (cp, "@node"))
5895       {
5896         start = cp;
5897         while (*cp != '\0' && *cp != ',')
5898           cp++;
5899         make_tag (start, cp - start, true,
5900                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5901       }
5902 }
5903
5904 \f
5905 /*
5906  * HTML support.
5907  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5908  * Contents of <a name=xxx> are tags with name xxx.
5909  *
5910  * Francesco Potortì, 2002.
5911  */
5912 static void
5913 HTML_labels (FILE *inf)
5914 {
5915   bool getnext = false;         /* next text outside of HTML tags is a tag */
5916   bool skiptag = false;         /* skip to the end of the current HTML tag */
5917   bool intag = false;           /* inside an html tag, looking for ID= */
5918   bool inanchor = false;        /* when INTAG, is an anchor, look for NAME= */
5919   char *end;
5920
5921
5922   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5923
5924   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5925     for (;;)                    /* loop on the same line */
5926       {
5927         if (skiptag)            /* skip HTML tag */
5928           {
5929             while (*dbp != '\0' && *dbp != '>')
5930               dbp++;
5931             if (*dbp == '>')
5932               {
5933                 dbp += 1;
5934                 skiptag = false;
5935                 continue;       /* look on the same line */
5936               }
5937             break;              /* go to next line */
5938           }
5939
5940         else if (intag) /* look for "name=" or "id=" */
5941           {
5942             while (*dbp != '\0' && *dbp != '>'
5943                    && c_tolower (*dbp) != 'n' && c_tolower (*dbp) != 'i')
5944               dbp++;
5945             if (*dbp == '\0')
5946               break;            /* go to next line */
5947             if (*dbp == '>')
5948               {
5949                 dbp += 1;
5950                 intag = false;
5951                 continue;       /* look on the same line */
5952               }
5953             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5954                 || LOOKING_AT_NOCASE (dbp, "id="))
5955               {
5956                 bool quoted = (dbp[0] == '"');
5957
5958                 if (quoted)
5959                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5960                     continue;
5961                 else
5962                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5963                     continue;
5964                 linebuffer_setlen (&token_name, end - dbp);
5965                 memcpyz (token_name.buffer, dbp, end - dbp);
5966
5967                 dbp = end;
5968                 intag = false;  /* we found what we looked for */
5969                 skiptag = true; /* skip to the end of the tag */
5970                 getnext = true; /* then grab the text */
5971                 continue;       /* look on the same line */
5972               }
5973             dbp += 1;
5974           }
5975
5976         else if (getnext)       /* grab next tokens and tag them */
5977           {
5978             dbp = skip_spaces (dbp);
5979             if (*dbp == '\0')
5980               break;            /* go to next line */
5981             if (*dbp == '<')
5982               {
5983                 intag = true;
5984                 inanchor = (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]));
5985                 continue;       /* look on the same line */
5986               }
5987
5988             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5989               continue;
5990             make_tag (token_name.buffer, token_name.len, true,
5991                       dbp, end - dbp, lineno, linecharno);
5992             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5993             getnext = false;
5994             break;              /* go to next line */
5995           }
5996
5997         else                    /* look for an interesting HTML tag */
5998           {
5999             while (*dbp != '\0' && *dbp != '<')
6000               dbp++;
6001             if (*dbp == '\0')
6002               break;            /* go to next line */
6003             intag = true;
6004             if (c_tolower (dbp[1]) == 'a' && !intoken (dbp[2]))
6005               {
6006                 inanchor = true;
6007                 continue;       /* look on the same line */
6008               }
6009             else if (LOOKING_AT_NOCASE (dbp, "<title>")
6010                      || LOOKING_AT_NOCASE (dbp, "<h1>")
6011                      || LOOKING_AT_NOCASE (dbp, "<h2>")
6012                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
6013               {
6014                 intag = false;
6015                 getnext = true;
6016                 continue;       /* look on the same line */
6017               }
6018             dbp += 1;
6019           }
6020       }
6021 }
6022
6023 \f
6024 /*
6025  * Prolog support
6026  *
6027  * Assumes that the predicate or rule starts at column 0.
6028  * Only the first clause of a predicate or rule is added.
6029  * Original code by Sunichirou Sugou (1989)
6030  * Rewritten by Anders Lindgren (1996)
6031  */
6032 static ptrdiff_t prolog_pr (char *, char *, ptrdiff_t);
6033 static void prolog_skip_comment (linebuffer *, FILE *);
6034 static size_t prolog_atom (char *, size_t);
6035
6036 static void
6037 Prolog_functions (FILE *inf)
6038 {
6039   char *cp, *last = NULL;
6040   ptrdiff_t lastlen = 0, allocated = 0;
6041
6042   LOOP_ON_INPUT_LINES (inf, lb, cp)
6043     {
6044       if (cp[0] == '\0')        /* Empty line */
6045         continue;
6046       else if (c_isspace (cp[0])) /* Not a predicate */
6047         continue;
6048       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
6049         prolog_skip_comment (&lb, inf);
6050       else
6051         {
6052           ptrdiff_t len = prolog_pr (cp, last, lastlen);
6053           if (0 < len)
6054             {
6055               /* Store the predicate name to avoid generating duplicate
6056                  tags later.  */
6057               if (allocated <= len)
6058                 {
6059                   xrnew (last, len + 1, 1);
6060                   allocated = len + 1;
6061                 }
6062               memcpyz (last, cp, len);
6063               lastlen = len;
6064             }
6065         }
6066     }
6067   free (last);
6068 }
6069
6070
6071 static void
6072 prolog_skip_comment (linebuffer *plb, FILE *inf)
6073 {
6074   char *cp;
6075
6076   do
6077     {
6078       for (cp = plb->buffer; *cp != '\0'; cp++)
6079         if (cp[0] == '*' && cp[1] == '/')
6080           return;
6081       readline (plb, inf);
6082     }
6083   while (perhaps_more_input (inf));
6084 }
6085
6086 /*
6087  * A predicate or rule definition is added if it matches:
6088  *     <beginning of line><Prolog Atom><whitespace>(
6089  * or  <beginning of line><Prolog Atom><whitespace>:-
6090  *
6091  * It is added to the tags database if it doesn't match the
6092  * name of the previous clause header.
6093  *
6094  * Return the size of the name of the predicate or rule, or 0 if no
6095  * header was found.
6096  */
6097 static ptrdiff_t
6098 prolog_pr (char *s, char *last, ptrdiff_t lastlen)
6099 {
6100   ptrdiff_t len = prolog_atom (s, 0);
6101   if (len == 0)
6102     return 0;
6103   ptrdiff_t pos = skip_spaces (s + len) - s;
6104
6105   /* Save only the first clause.  */
6106   if ((s[pos] == '.'
6107        || (s[pos] == '(' && (pos += 1))
6108        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
6109       && ! (lastlen == len && memcmp (s, last, len) == 0))
6110     {
6111       make_tag (s, len, true, s, pos, lineno, linecharno);
6112       return len;
6113     }
6114
6115   return 0;
6116 }
6117
6118 /*
6119  * Consume a Prolog atom.
6120  * Return the number of bytes consumed, or 0 if there was an error.
6121  *
6122  * A prolog atom, in this context, could be one of:
6123  * - An alphanumeric sequence, starting with a lower case letter.
6124  * - A quoted arbitrary string. Single quotes can escape themselves.
6125  *   Backslash quotes everything.
6126  */
6127 static size_t
6128 prolog_atom (char *s, size_t pos)
6129 {
6130   size_t origpos;
6131
6132   origpos = pos;
6133
6134   if (c_islower (s[pos]) || s[pos] == '_')
6135     {
6136       /* The atom is unquoted. */
6137       pos++;
6138       while (c_isalnum (s[pos]) || s[pos] == '_')
6139         {
6140           pos++;
6141         }
6142       return pos - origpos;
6143     }
6144   else if (s[pos] == '\'')
6145     {
6146       pos++;
6147
6148       for (;;)
6149         {
6150           if (s[pos] == '\'')
6151             {
6152               pos++;
6153               if (s[pos] != '\'')
6154                 break;
6155               pos++;            /* A double quote  */
6156             }
6157           else if (s[pos] == '\0')
6158             /* Multiline quoted atoms are ignored.  */
6159             return 0;
6160           else if (s[pos] == '\\')
6161             {
6162               if (s[pos+1] == '\0')
6163                 return 0;
6164               pos += 2;
6165             }
6166           else
6167             pos++;
6168         }
6169       return pos - origpos;
6170     }
6171   else
6172     return 0;
6173 }
6174
6175 \f
6176 /*
6177  * Support for Mercury
6178  *
6179  * Assumes that the declarations start at column 0.
6180  * Original code by Sunichirou Sugou (1989) for Prolog.
6181  * Rewritten by Anders Lindgren (1996) for Prolog.
6182  * Adapted by Fabrice Nicol (2021) for Mercury.
6183  * Note: Prolog-support behavior is preserved if
6184  * --declarations is used, corresponding to
6185  * with_mercury_definitions=true.
6186  */
6187
6188 static ptrdiff_t mercury_pr (char *, char *, ptrdiff_t);
6189 static void mercury_skip_comment (linebuffer *, FILE *);
6190 static bool is_mercury_type = false;
6191 static bool is_mercury_quantifier = false;
6192 static bool is_mercury_declaration = false;
6193 typedef struct
6194 {
6195   size_t pos;          /* Position reached in parsing tag name.  */
6196   size_t namelength;   /* Length of tag name  */
6197   size_t totlength;    /* Total length of parsed tag: this field is currently
6198                           reserved for control and debugging.   */
6199 } mercury_pos_t;
6200
6201 /*
6202  * Objective-C and Mercury have identical file extension .m.
6203  * To disambiguate between Objective C and Mercury, parse file
6204  * with the following heuristics hook:
6205  *   - if line starts with :-, choose Mercury unconditionally;
6206  *   - if line starts with #, @, choose Objective-C;
6207  *   - otherwise compute the following ratio:
6208  *
6209  *     r = (number of lines with :-
6210  *          or % in non-commented parts or . at trimmed EOL)
6211  *         / (number of lines - number of lines starting by any amount
6212  *                        of whitespace, optionally followed by comment(s))
6213  *
6214  * Note: strings are neglected in counts.
6215  *
6216  * If r > mercury_heuristics_ratio, choose Mercury.
6217  * Experimental tests show that a possibly optimal default value for
6218  * this floor value is around 0.5.  This is the default value for
6219  * MERCURY_HEURISTICS_RATIO, defined in the first lines of this file.
6220  * The closer r is to 0.5, the closer the source code to pure Prolog.
6221  * Idiomatic Mercury is scored either with r = 1.0 or higher.
6222  * Objective-C is scored with r = 0.0.  When this fails, the r-score
6223  * never rose above 0.1 in Objective-C tests.
6224  */
6225
6226 static void
6227 test_objc_is_mercury (char *this_file, language **lang)
6228 {
6229   if (this_file == NULL) return;
6230   FILE* fp = fopen (this_file, "r");
6231   if (fp == NULL)
6232     pfatal (this_file);
6233
6234   bool blank_line = false; /* Line starting with any amount of white space
6235                               followed by optional comment(s).  */
6236   bool commented_line = false;
6237   bool found_dot = false;
6238   bool only_space_before = true;
6239   bool start_of_line = true;
6240   int c;
6241   intmax_t lines = 1;
6242   intmax_t mercury_dots = 0;
6243   intmax_t percentage_signs = 0;
6244   intmax_t rule_signs = 0;
6245   float ratio = 0;
6246
6247   while ((c = fgetc (fp)) != EOF)
6248     {
6249       switch (c)
6250         {
6251         case '\n':
6252           if (! blank_line) ++lines;
6253           blank_line = true;
6254           commented_line = false;
6255           start_of_line = true;
6256           if (found_dot) ++mercury_dots;
6257           found_dot = false;
6258           only_space_before = true;
6259           break;
6260         case '.':
6261           found_dot = ! commented_line;
6262           only_space_before = false;
6263           break;
6264         case  '%': /* More frequent in Mercury.  May be modulo in Obj.-C.  */
6265           if (! commented_line)
6266             {
6267               ++percentage_signs;
6268               /* Cannot tell if it is a comment or modulo yet for sure.
6269                  Yet works for heuristic purposes.  */
6270               commented_line = true;
6271             }
6272           found_dot = false;
6273           start_of_line = false;
6274           only_space_before = false;
6275           break;
6276         case  '/':
6277           {
6278             int d = fgetc (fp);
6279             found_dot = false;
6280             only_space_before = false;
6281             if (! commented_line)
6282               {
6283                 if (d == '*')
6284                   commented_line = true;
6285                 else
6286                   /* If d == '/', cannot tell if it is an Obj.-C comment:
6287                      may be Mercury integ. division.  */
6288                     blank_line = false;
6289               }
6290           }
6291           FALLTHROUGH;
6292         case  ' ':
6293         case '\t':
6294           start_of_line = false;
6295           break;
6296         case ':':
6297           c = fgetc (fp);
6298           if (start_of_line)
6299             {
6300               if (c == '-')
6301                 {
6302                   ratio = 1.0; /* Failsafe, not an operator in Obj.-C.  */
6303                   goto out;
6304                 }
6305               start_of_line = false;
6306             }
6307           else
6308             {
6309               /* p :- q.  Frequent in Mercury.
6310                  Rare or in quoted exprs in Obj.-C.  */
6311               if (c == '-' && ! commented_line)
6312                 ++rule_signs;
6313             }
6314           blank_line = false;
6315           found_dot = false;
6316           only_space_before = false;
6317           break;
6318         case '@':
6319         case '#':
6320           if (start_of_line || only_space_before)
6321             {
6322               ratio = 0.0;
6323               goto out;
6324             }
6325           FALLTHROUGH;
6326         default:
6327           start_of_line = false;
6328           blank_line = false;
6329           found_dot = false;
6330           only_space_before = false;
6331         }
6332     }
6333
6334   /* Fallback heuristic test.  Not failsafe but errless in practice.  */
6335   ratio = ((float) rule_signs + percentage_signs + mercury_dots) / lines;
6336
6337  out:
6338   if (fclose (fp) == EOF)
6339     pfatal (this_file);
6340
6341   if (ratio > mercury_heuristics_ratio)
6342     {
6343       /* Change the language from Objective-C to Mercury.  */
6344       static language lang0 = { "mercury", Mercury_help, Mercury_functions,
6345         Mercury_suffixes };
6346       *lang = &lang0;
6347     }
6348 }
6349
6350 static void
6351 Mercury_functions (FILE *inf)
6352 {
6353   char *cp, *last = NULL;
6354   ptrdiff_t lastlen = 0, allocated = 0;
6355   if (declarations) with_mercury_definitions = true;
6356
6357   LOOP_ON_INPUT_LINES (inf, lb, cp)
6358     {
6359       if (cp[0] == '\0')   /* Empty line.  */
6360         continue;
6361       else if (c_isspace (cp[0]) || cp[0] == '%')
6362         /*  A Prolog-type comment or anything other than a declaration.  */
6363         continue;
6364       else if (cp[0] == '/' && cp[1] == '*')  /* Mercury C-type comment.  */
6365         mercury_skip_comment (&lb, inf);
6366       else
6367         {
6368           is_mercury_declaration = (cp[0] == ':' && cp[1] == '-');
6369
6370           if (is_mercury_declaration
6371               || with_mercury_definitions)
6372             {
6373               ptrdiff_t len = mercury_pr (cp, last, lastlen);
6374               if (0 < len)
6375                 {
6376                   /* Store the declaration to avoid generating duplicate
6377                      tags later.  */
6378                   if (allocated <= len)
6379                     {
6380                       xrnew (last, len + 1, 1);
6381                       allocated = len + 1;
6382                     }
6383                   memcpyz (last, cp, len);
6384                   lastlen = len;
6385                 }
6386             }
6387         }
6388     }
6389   free (last);
6390 }
6391
6392 static void
6393 mercury_skip_comment (linebuffer *plb, FILE *inf)
6394 {
6395   char *cp;
6396
6397   do
6398     {
6399       for (cp = plb->buffer; *cp != '\0'; ++cp)
6400         if (cp[0] == '*' && cp[1] == '/')
6401           return;
6402       readline (plb, inf);
6403     }
6404   while (perhaps_more_input (inf));
6405 }
6406
6407 /*
6408  * A declaration is added if it matches:
6409  *     <beginning of line>:-<whitespace><Mercury Term><whitespace>(
6410  * If with_mercury_definitions == true, we also add:
6411  *     <beginning of line><Mercury item><whitespace>(
6412  * or  <beginning of line><Mercury item><whitespace>:-
6413  * As for Prolog support, different arities and types are not taken into
6414  * consideration.
6415  * Item is added to the tags database if it doesn't match the
6416  * name of the previous declaration.
6417  *
6418  * Consume a Mercury declaration.
6419  * Return the number of bytes consumed, or 0 if there was an error.
6420  *
6421  * A Mercury declaration must be one of:
6422  *  :- type
6423  *  :- solver type
6424  *  :- pred
6425  *  :- func
6426  *  :- inst
6427  *  :- mode
6428  *  :- typeclass
6429  *  :- instance
6430  *  :- pragma
6431  *  :- promise
6432  *  :- initialise
6433  *  :- finalise
6434  *  :- mutable
6435  *  :- module
6436  *  :- interface
6437  *  :- implementation
6438  *  :- import_module
6439  *  :- use_module
6440  *  :- include_module
6441  *  :- end_module
6442  * followed on the same line by an alphanumeric sequence, starting with a lower
6443  * case letter or by a single-quoted arbitrary string.
6444  * Single quotes can escape themselves.  Backslash quotes everything.
6445  *
6446  * Return the size of the name of the declaration or 0 if no header was found.
6447  * As quantifiers may precede functions or predicates, we must list them too.
6448  */
6449
6450 static const char *Mercury_decl_tags[] = {"type", "solver type", "pred",
6451   "func", "inst", "mode", "typeclass", "instance", "pragma", "promise",
6452   "initialise", "finalise", "mutable", "module", "interface", "implementation",
6453   "import_module", "use_module", "include_module", "end_module", "some", "all"};
6454
6455 static mercury_pos_t
6456 mercury_decl (char *s, size_t pos)
6457 {
6458   mercury_pos_t null_pos = {0, 0, 0};
6459
6460   if (s == NULL) return null_pos;
6461
6462   size_t origpos;
6463   origpos = pos;
6464
6465   while (c_isalnum (s[pos]) || s[pos] == '_')
6466     pos++;
6467
6468   unsigned char decl_type_length = pos - origpos;
6469   char buf[decl_type_length + 1];
6470   memset (buf, 0, decl_type_length + 1);
6471
6472   /* Mercury declaration tags.  Consume them, then check the declaration item
6473      following :- is legitimate, then go on as in the prolog case.  */
6474
6475   memcpy (buf, &s[origpos], decl_type_length);
6476
6477   bool found_decl_tag = false;
6478
6479   if (is_mercury_quantifier)
6480     {
6481       if (strcmp (buf, "pred") != 0 && strcmp (buf, "func") != 0) /* Bad syntax.  */
6482         return null_pos;
6483
6484       is_mercury_quantifier = false; /* Reset to base value.  */
6485       found_decl_tag = true;
6486     }
6487   else
6488     {
6489       for (int j = 0; j < sizeof (Mercury_decl_tags) / sizeof (char*); ++j)
6490         {
6491           if (strcmp (buf, Mercury_decl_tags[j]) == 0)
6492             {
6493               found_decl_tag = true;
6494               if (strcmp (buf, "type") == 0)
6495                 is_mercury_type = true;
6496
6497               if (strcmp (buf, "some") == 0
6498                   || strcmp (buf, "all") == 0)
6499                 {
6500                   is_mercury_quantifier = true;
6501                 }
6502
6503               break;  /* Found declaration tag of rank j.  */
6504             }
6505           else
6506             /* 'solver type' has a blank in the middle,
6507                so this is the hard case.  */
6508             if (strcmp (buf, "solver") == 0)
6509               {
6510                 do
6511                   pos++;
6512                 while (c_isalnum (s[pos]) || s[pos] == '_');
6513
6514                 decl_type_length = pos - origpos;
6515                 char buf2[decl_type_length + 1];
6516                 memset (buf2, 0, decl_type_length + 1);
6517                 memcpy (buf2, &s[origpos], decl_type_length);
6518
6519                 if (strcmp (buf2, "solver type") == 0)
6520                   {
6521                     found_decl_tag = false;
6522                     break;  /* Found declaration tag of rank j.  */
6523                   }
6524               }
6525         }
6526     }
6527
6528   /* If with_mercury_definitions == false
6529    * this is a Mercury syntax error, ignoring... */
6530
6531   if (with_mercury_definitions)
6532     {
6533       if (found_decl_tag)
6534         pos = skip_spaces (s + pos) - s; /* Skip len blanks again.  */
6535       else
6536         /* Prolog-like behavior
6537          * we have parsed the predicate once, yet inappropriately
6538          * so restarting again the parsing step.  */
6539         pos = 0;
6540     }
6541   else
6542     {
6543       if (found_decl_tag)
6544         pos = skip_spaces (s + pos) - s; /* Skip len blanks again.  */
6545       else
6546         return null_pos;
6547     }
6548
6549   /* From now on it is the same as for Prolog except for module dots.  */
6550
6551   size_t start_of_name = pos;
6552
6553   if (c_islower (s[pos]) || s[pos] == '_' )
6554     {
6555       /* The name is unquoted.
6556          Do not confuse module dots with end-of-declaration dots.  */
6557       int module_dot_pos = 0;
6558
6559       while (c_isalnum (s[pos])
6560              || s[pos] == '_'
6561              || (s[pos] == '.' /* A module dot.  */
6562                  && (c_isalnum (s[pos + 1]) || s[pos + 1] == '_')
6563                  && (module_dot_pos = pos)))  /* Record module dot position.
6564                                                  Erase module from name.  */
6565         ++pos;
6566
6567       if (module_dot_pos)
6568         {
6569           start_of_name = module_dot_pos + 2;
6570           ++pos;
6571         }
6572
6573       mercury_pos_t position = {pos, pos - start_of_name + 1, pos - origpos};
6574       return position;
6575     }
6576   else if (s[pos] == '\'')
6577     {
6578       ++pos;
6579       for (;;)
6580         {
6581           if (s[pos] == '\'')
6582             {
6583               ++pos;
6584               if (s[pos] != '\'')
6585                 break;
6586               ++pos; /* A double quote.  */
6587             }
6588           else if (s[pos] == '\0')  /* Multiline quoted atoms are ignored.  */
6589             return null_pos;
6590           else if (s[pos] == '\\')
6591             {
6592               if (s[pos+1] == '\0')
6593                 return null_pos;
6594               pos += 2;
6595             }
6596           else
6597             ++pos;
6598         }
6599
6600       mercury_pos_t position = {pos, pos - start_of_name + 1, pos - origpos};
6601       return position;
6602     }
6603   else if (is_mercury_quantifier && s[pos] == '[')   /* :- some [T] pred/func.  */
6604     {
6605       char *close_bracket = strchr (s + pos + 1, ']');
6606       if (!close_bracket)
6607         return null_pos;
6608       pos = skip_spaces (close_bracket + 1) - s;
6609       mercury_pos_t position = mercury_decl (s, pos);
6610       position.totlength += pos - origpos;
6611       return position;
6612     }
6613   else if (s[pos] == '.')  /* as in ':- interface.'  */
6614     {
6615       mercury_pos_t position = {pos, pos - origpos + 1, pos - origpos};
6616       return position;
6617     }
6618   else
6619     return null_pos;
6620 }
6621
6622 static ptrdiff_t
6623 mercury_pr (char *s, char *last, ptrdiff_t lastlen)
6624 {
6625   size_t len0 = 0;
6626   is_mercury_type = false;
6627   is_mercury_quantifier = false;
6628   bool stop_at_rule = false;
6629
6630   if (is_mercury_declaration)
6631     {
6632       /* Skip len0 blanks only for declarations.  */
6633       len0 = skip_spaces (s + 2) - s;
6634     }
6635
6636   mercury_pos_t position = mercury_decl (s, len0);
6637   size_t pos = position.pos;
6638   int offset = 0;  /* may be < 0  */
6639   if (pos == 0) return 0;
6640
6641   /* Skip white space for:
6642      a. rules in definitions before :-
6643      b. 0-arity predicates with inlined modes.
6644      c. possibly multiline type definitions  */
6645
6646   while (c_isspace (s[pos])) { ++pos; ++offset; }
6647
6648   if (( ((s[pos] == '.' && (pos += 1))     /* case 1
6649                                               This is a statement dot,
6650                                               not a module dot. */
6651          || c_isalnum(s[pos])              /* 0-arity procedures  */
6652          || (s[pos] == '(' && (pos += 1))  /* case 2: arity > 0   */
6653          || ((s[pos] == ':')               /* case 3: rules  */
6654              && s[pos + 1] == '-' && (stop_at_rule = true)))
6655      && (lastlen != pos || memcmp (s, last, pos) != 0)
6656         )
6657       /* Types are often declared on several lines so keeping just
6658          the first line.  */
6659
6660       || is_mercury_type)  /* When types are implemented.  */
6661     {
6662       size_t namelength = position.namelength;
6663       if (stop_at_rule && offset) --offset;
6664
6665       /* Left-trim type definitions.  */
6666
6667       while (pos > namelength + offset
6668              && c_isspace (s[pos - namelength - offset]))
6669         --offset;
6670
6671       make_tag (s + pos - namelength - offset, namelength - 1, true,
6672                                 s, pos - offset - 1, lineno, linecharno);
6673       return pos;
6674     }
6675
6676   return 0;
6677 }
6678
6679 \f
6680 /*
6681  * Support for Erlang
6682  *
6683  * Generates tags for functions, defines, and records.
6684  * Assumes that Erlang functions start at column 0.
6685  * Original code by Anders Lindgren (1996)
6686  */
6687 static ptrdiff_t erlang_func (char *, char *, ptrdiff_t, ptrdiff_t *);
6688 static void erlang_attribute (char *);
6689 static ptrdiff_t erlang_atom (char *);
6690
6691 static void
6692 Erlang_functions (FILE *inf)
6693 {
6694   char *cp, *last = NULL;
6695   ptrdiff_t lastlen = 0, allocated = 0;
6696
6697   LOOP_ON_INPUT_LINES (inf, lb, cp)
6698     {
6699       if (cp[0] == '\0')        /* Empty line */
6700         continue;
6701       else if (c_isspace (cp[0])) /* Not function nor attribute */
6702         continue;
6703       else if (cp[0] == '%')    /* comment */
6704         continue;
6705       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
6706         continue;
6707       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
6708         {
6709           erlang_attribute (cp);
6710           if (last != NULL)
6711             {
6712               free (last);
6713               last = NULL;
6714               allocated = lastlen = 0;
6715             }
6716         }
6717       else
6718         {
6719           ptrdiff_t name_offset;
6720           ptrdiff_t len = erlang_func (cp, last, lastlen, &name_offset);
6721           if (0 < len)
6722             {
6723               /* Store the function name to avoid generating duplicate
6724                  tags later.  */
6725               if (allocated <= len)
6726                 {
6727                   xrnew (last, len + 1, 1);
6728                   allocated = len + 1;
6729                 }
6730               memcpyz (last, cp + name_offset, len);
6731               lastlen = len;
6732             }
6733         }
6734     }
6735   free (last);
6736 }
6737
6738
6739 /*
6740  * A function definition is added if it matches:
6741  *     <beginning of line><Erlang Atom><whitespace>(
6742  *
6743  * It is added to the tags database if it doesn't match the
6744  * name of the previous clause header.
6745  *
6746  * Return the size of the name of the function, or 0 if no function
6747  * was found.
6748  */
6749 static ptrdiff_t
6750 erlang_func (char *s, char *last, ptrdiff_t lastlen, ptrdiff_t *name_offset)
6751 {
6752   char *name = s;
6753   ptrdiff_t len = erlang_atom (s);
6754   if (len == 0)
6755     return 0;
6756   ptrdiff_t pos = skip_spaces (s + len) - s;
6757
6758   /* If the name is quoted, the quotes are not part of the name. */
6759   bool quoted = 2 < len && name[0] == '\'' && name[len - 1] == '\'';
6760   name += quoted;
6761   len -= 2 * quoted;
6762
6763   /* Save only the first clause. */
6764   if (s[pos++] == '('
6765       && ! (lastlen == len && memcmp (name, last, len) == 0))
6766     {
6767       make_tag (s, len, true, s, pos, lineno, linecharno);
6768       *name_offset = quoted;
6769       return len;
6770     }
6771
6772   return 0;
6773 }
6774
6775
6776 /*
6777  * Handle attributes.  Currently, tags are generated for defines
6778  * and records.
6779  *
6780  * They are on the form:
6781  * -define(foo, bar).
6782  * -define(Foo(M, N), M+N).
6783  * -record(graph, {vtab = notable, cyclic = true}).
6784  */
6785 static void
6786 erlang_attribute (char *s)
6787 {
6788   char *cp = s;
6789
6790   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
6791       && *cp++ == '(')
6792     {
6793       cp = skip_spaces (cp);
6794       ptrdiff_t len = erlang_atom (cp);
6795       ptrdiff_t pos = cp + len - s;
6796       if (len > 0)
6797         {
6798           /* If the name is quoted, the quotes are not part of the name. */
6799           if (len > 2 && cp[0] == '\'' && cp[len - 1] == '\'')
6800             {
6801               cp++;
6802               len -= 2;
6803             }
6804           make_tag (cp, len, true, s, pos, lineno, linecharno);
6805         }
6806     }
6807   return;
6808 }
6809
6810
6811 /*
6812  * Consume an Erlang atom (or variable).
6813  * Return the number of bytes consumed, or -1 if there was an error.
6814  */
6815 static ptrdiff_t
6816 erlang_atom (char *s)
6817 {
6818   ptrdiff_t pos = 0;
6819
6820   if (c_isalpha (s[pos]) || s[pos] == '_')
6821     {
6822       /* The atom is unquoted. */
6823       do
6824         pos++;
6825       while (c_isalnum (s[pos]) || s[pos] == '_');
6826     }
6827   else if (s[pos] == '\'')
6828     {
6829       for (pos++; s[pos] != '\''; pos++)
6830         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
6831             || (s[pos] == '\\' && s[++pos] == '\0'))
6832           return 0;
6833       pos++;
6834     }
6835
6836   return pos;
6837 }
6838
6839 \f
6840 static char *scan_separators (char *);
6841 static void add_regex (char *, language *);
6842 static char *substitute (char *, char *, struct re_registers *);
6843
6844 /*
6845  * Take a string like "/blah/" and turn it into "blah", verifying
6846  * that the first and last characters are the same, and handling
6847  * quoted separator characters.  Actually, stops on the occurrence of
6848  * an unquoted separator.  Also process \t, \n, etc. and turn into
6849  * appropriate characters. Works in place.  Null terminates name string.
6850  * Returns pointer to terminating separator, or NULL for
6851  * unterminated regexps.
6852  */
6853 static char *
6854 scan_separators (char *name)
6855 {
6856   char sep = name[0];
6857   char *copyto = name;
6858   bool quoted = false;
6859
6860   for (++name; *name != '\0'; ++name)
6861     {
6862       if (quoted)
6863         {
6864           switch (*name)
6865             {
6866             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
6867             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
6868             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
6869             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
6870             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
6871             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
6872             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
6873             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
6874             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
6875             default:
6876               if (*name == sep)
6877                 *copyto++ = sep;
6878               else
6879                 {
6880                   /* Something else is quoted, so preserve the quote. */
6881                   *copyto++ = '\\';
6882                   *copyto++ = *name;
6883                 }
6884               break;
6885             }
6886           quoted = false;
6887         }
6888       else if (*name == '\\')
6889         quoted = true;
6890       else if (*name == sep)
6891         break;
6892       else
6893         *copyto++ = *name;
6894     }
6895   if (*name != sep)
6896     name = NULL;                /* signal unterminated regexp */
6897
6898   /* Terminate copied string. */
6899   *copyto = '\0';
6900   return name;
6901 }
6902
6903 /* Look at the argument of --regex or --no-regex and do the right
6904    thing.  Same for each line of a regexp file. */
6905 static void
6906 analyze_regex (char *regex_arg)
6907 {
6908   if (regex_arg == NULL)
6909     {
6910       free_regexps ();          /* --no-regex: remove existing regexps */
6911       return;
6912     }
6913
6914   /* A real --regexp option or a line in a regexp file. */
6915   switch (regex_arg[0])
6916     {
6917       /* Comments in regexp file or null arg to --regex. */
6918     case '\0':
6919     case ' ':
6920     case '\t':
6921       break;
6922
6923       /* Read a regex file.  This is recursive and may result in a
6924          loop, which will stop when the file descriptors are exhausted. */
6925     case '@':
6926       {
6927         FILE *regexfp;
6928         linebuffer regexbuf;
6929         char *regexfile = regex_arg + 1;
6930
6931         /* regexfile is a file containing regexps, one per line. */
6932         regexfp = fopen (regexfile, "r" FOPEN_BINARY);
6933         if (regexfp == NULL)
6934           pfatal (regexfile);
6935         linebuffer_init (&regexbuf);
6936         while (readline_internal (&regexbuf, regexfp, regexfile, false) > 0)
6937           analyze_regex (regexbuf.buffer);
6938         free (regexbuf.buffer);
6939         if (fclose (regexfp) != 0)
6940           pfatal (regexfile);
6941       }
6942       break;
6943
6944       /* Regexp to be used for a specific language only. */
6945     case '{':
6946       {
6947         language *lang;
6948         char *lang_name = regex_arg + 1;
6949         char *cp;
6950
6951         for (cp = lang_name; *cp != '}'; cp++)
6952           if (*cp == '\0')
6953             {
6954               error ("unterminated language name in regex: %s", regex_arg);
6955               return;
6956             }
6957         *cp++ = '\0';
6958         lang = get_language_from_langname (lang_name);
6959         if (lang == NULL)
6960           return;
6961         add_regex (cp, lang);
6962       }
6963       break;
6964
6965       /* Regexp to be used for any language. */
6966     default:
6967       add_regex (regex_arg, NULL);
6968       break;
6969     }
6970 }
6971
6972 /* Separate the regexp pattern, compile it,
6973    and care for optional name and modifiers. */
6974 static void
6975 add_regex (char *regexp_pattern, language *lang)
6976 {
6977   static struct re_pattern_buffer zeropattern;
6978   char sep, *pat, *name, *modifiers;
6979   char empty = '\0';
6980   const char *err;
6981   struct re_pattern_buffer *patbuf;
6982   regexp *rp;
6983   bool
6984     ignore_case = false,        /* case is significant */
6985     multi_line = false,         /* matches are done one line at a time */
6986     single_line = false;        /* dot does not match newline */
6987
6988
6989   if (strnlen (regexp_pattern, 3) < 3)
6990     {
6991       error ("null regexp");
6992       return;
6993     }
6994   sep = regexp_pattern[0];
6995   name = scan_separators (regexp_pattern);
6996   if (name == NULL)
6997     {
6998       error ("%s: unterminated regexp", regexp_pattern);
6999       return;
7000     }
7001   if (name[1] == sep)
7002     {
7003       error ("null name for regexp \"%s\"", regexp_pattern);
7004       return;
7005     }
7006   modifiers = scan_separators (name);
7007   if (modifiers == NULL)        /* no terminating separator --> no name */
7008     {
7009       modifiers = name;
7010       name = &empty;
7011     }
7012   else
7013     modifiers += 1;             /* skip separator */
7014
7015   /* Parse regex modifiers. */
7016   for (; modifiers[0] != '\0'; modifiers++)
7017     switch (modifiers[0])
7018       {
7019       case 'N':
7020         if (modifiers == name)
7021           error ("forcing explicit tag name but no name, ignoring");
7022         /* This option has no effect and is present only for backward
7023            compatibility.  */
7024         break;
7025       case 'i':
7026         ignore_case = true;
7027         break;
7028       case 's':
7029         single_line = true;
7030         FALLTHROUGH;
7031       case 'm':
7032         multi_line = true;
7033         need_filebuf = true;
7034         break;
7035       default:
7036         error ("invalid regexp modifier '%c', ignoring", modifiers[0]);
7037         break;
7038       }
7039
7040   patbuf = xmalloc (sizeof *patbuf);
7041   *patbuf = zeropattern;
7042   if (ignore_case)
7043     {
7044       static unsigned char lc_trans[UCHAR_MAX + 1];
7045       int i;
7046       for (i = 0; i < UCHAR_MAX + 1; i++)
7047         lc_trans[i] = c_tolower (i);
7048       patbuf->translate = lc_trans;     /* translation table to fold case  */
7049     }
7050
7051   if (multi_line)
7052     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
7053   else
7054     pat = regexp_pattern;
7055
7056   if (single_line)
7057     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
7058   else
7059     re_set_syntax (RE_SYNTAX_EMACS);
7060
7061   err = re_compile_pattern (pat, strlen (pat), patbuf);
7062   if (multi_line)
7063     free (pat);
7064   if (err != NULL)
7065     {
7066       error ("%s while compiling pattern", err);
7067       return;
7068     }
7069
7070   rp = p_head;
7071   p_head = xmalloc (sizeof *p_head);
7072   p_head->pattern = savestr (regexp_pattern);
7073   p_head->p_next = rp;
7074   p_head->lang = lang;
7075   p_head->pat = patbuf;
7076   p_head->name = savestr (name);
7077   p_head->error_signaled = false;
7078   p_head->ignore_case = ignore_case;
7079   p_head->multi_line = multi_line;
7080 }
7081
7082 /*
7083  * Do the substitutions indicated by the regular expression and
7084  * arguments.
7085  */
7086 static char *
7087 substitute (char *in, char *out, struct re_registers *regs)
7088 {
7089   char *result, *t;
7090
7091   result = NULL;
7092   ptrdiff_t size = strlen (out);
7093
7094   /* Pass 1: figure out how much to allocate by finding all \N strings. */
7095   if (out[size - 1] == '\\')
7096     fatal ("pattern error in \"%s\"", out);
7097   for (t = strchr (out, '\\');
7098        t != NULL;
7099        t = strchr (t + 2, '\\'))
7100     if (c_isdigit (t[1]))
7101       {
7102         int dig = t[1] - '0';
7103         ptrdiff_t diglen = regs->end[dig] - regs->start[dig];
7104         size += diglen - 2;
7105       }
7106     else
7107       size -= 1;
7108
7109   /* Allocate space and do the substitutions. */
7110   assert (size >= 0);
7111   result = xmalloc (size + 1);
7112
7113   for (t = result; *out != '\0'; out++)
7114     if (*out == '\\' && c_isdigit (*++out))
7115       {
7116         int dig = *out - '0';
7117         ptrdiff_t diglen = regs->end[dig] - regs->start[dig];
7118         memcpy (t, in + regs->start[dig], diglen);
7119         t += diglen;
7120       }
7121     else
7122       *t++ = *out;
7123   *t = '\0';
7124
7125   assert (t <= result + size);
7126   assert (t == result + strlen (result));
7127
7128   return result;
7129 }
7130
7131 /* Deallocate all regexps. */
7132 static void
7133 free_regexps (void)
7134 {
7135   regexp *rp;
7136   while (p_head != NULL)
7137     {
7138       rp = p_head->p_next;
7139       free (p_head->pattern);
7140       free (p_head->name);
7141       free (p_head);
7142       p_head = rp;
7143     }
7144   return;
7145 }
7146
7147 /*
7148  * Reads the whole file as a single string from `filebuf' and looks for
7149  * multi-line regular expressions, creating tags on matches.
7150  * readline already dealt with normal regexps.
7151  *
7152  * Idea by Ben Wing <ben@666.com> (2002).
7153  */
7154 static void
7155 regex_tag_multiline (void)
7156 {
7157   char *buffer = filebuf.buffer;
7158   regexp *rp;
7159   char *name;
7160
7161   for (rp = p_head; rp != NULL; rp = rp->p_next)
7162     {
7163       ptrdiff_t match = 0;
7164
7165       if (!rp->multi_line)
7166         continue;               /* skip normal regexps */
7167
7168       /* Generic initializations before parsing file from memory. */
7169       lineno = 1;               /* reset global line number */
7170       charno = 0;               /* reset global char number */
7171       linecharno = 0;           /* reset global char number of line start */
7172
7173       /* Only use generic regexps or those for the current language. */
7174       if (rp->lang != NULL && rp->lang != curfdp->lang)
7175         continue;
7176
7177       while (match >= 0 && match < filebuf.len)
7178         {
7179           match = re_search (rp->pat, buffer, filebuf.len, charno,
7180                              filebuf.len - match, &rp->regs);
7181           switch (match)
7182             {
7183             case -2:
7184               /* Some error. */
7185               if (!rp->error_signaled)
7186                 {
7187                   error ("regexp stack overflow while matching \"%s\"",
7188                          rp->pattern);
7189                   rp->error_signaled = true;
7190                 }
7191               break;
7192             case -1:
7193               /* No match. */
7194               break;
7195             default:
7196               if (match == rp->regs.end[0])
7197                 {
7198                   if (!rp->error_signaled)
7199                     {
7200                       error ("regexp matches the empty string: \"%s\"",
7201                              rp->pattern);
7202                       rp->error_signaled = true;
7203                     }
7204                   match = -3;   /* exit from while loop */
7205                   break;
7206                 }
7207
7208               /* Match occurred.  Construct a tag. */
7209               while (charno < rp->regs.end[0])
7210                 if (buffer[charno++] == '\n')
7211                   lineno++, linecharno = charno;
7212               name = rp->name;
7213               if (name[0] == '\0')
7214                 name = NULL;
7215               else /* make a named tag */
7216                 name = substitute (buffer, rp->name, &rp->regs);
7217
7218               /* Force explicit tag name, if a name is there. */
7219               pfnote (name, true, buffer + linecharno,
7220                       charno - linecharno + 1, lineno, linecharno);
7221
7222               if (debug)
7223                 fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n",
7224                          name ? name : "(unnamed)", curfdp->taggedfname,
7225                          lineno, buffer + linecharno);
7226               break;
7227             }
7228         }
7229     }
7230 }
7231
7232 \f
7233 static bool
7234 nocase_tail (const char *cp)
7235 {
7236   ptrdiff_t len = 0;
7237
7238   while (*cp != '\0' && c_tolower (*cp) == c_tolower (dbp[len]))
7239     cp++, len++;
7240   if (*cp == '\0' && !intoken (dbp[len]))
7241     {
7242       dbp += len;
7243       return true;
7244     }
7245   return false;
7246 }
7247
7248 static void
7249 get_tag (register char *bp, char **namepp)
7250 {
7251   register char *cp = bp;
7252
7253   if (*bp != '\0')
7254     {
7255       /* Go till you get to white space or a syntactic break */
7256       for (cp = bp + 1; !notinname (*cp); cp++)
7257         continue;
7258       make_tag (bp, cp - bp, true,
7259                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
7260     }
7261
7262   if (namepp != NULL)
7263     *namepp = savenstr (bp, cp - bp);
7264 }
7265
7266 /* Similar to get_tag, but include '=' as part of the tag. */
7267 static void
7268 get_lispy_tag (register char *bp)
7269 {
7270   register char *cp = bp;
7271
7272   if (*bp != '\0')
7273     {
7274       /* Go till you get to white space or a syntactic break */
7275       for (cp = bp + 1; !notinname (*cp) || *cp == '='; cp++)
7276         continue;
7277       make_tag (bp, cp - bp, true,
7278                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
7279     }
7280 }
7281
7282 /*
7283  * Read a line of text from `stream' into `lbp', excluding the
7284  * newline or CR-NL (if `leave_cr` is false), if any.  Return the
7285  * number of characters read from `stream', which is the length
7286  * of the line including the newline.
7287  *
7288  * On DOS or Windows, if `leave_cr` is false, we do not count the
7289  * CR character, if any before the NL, in the returned length;
7290  * this mirrors the behavior of Emacs on those
7291  * platforms (for text files, it translates CR-NL to NL as it reads in the
7292  * file).
7293  *
7294  * If multi-line regular expressions are requested, each line read is
7295  * appended to `filebuf'.
7296  */
7297 static ptrdiff_t
7298 readline_internal (linebuffer *lbp, FILE *stream, char const *filename,
7299                    const bool leave_cr)
7300 {
7301   char *buffer = lbp->buffer;
7302   char *p = lbp->buffer;
7303   char *pend;
7304   int chars_deleted;
7305
7306   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
7307
7308   for (;;)
7309     {
7310       register int c = getc (stream);
7311       if (p == pend)
7312         {
7313           /* We're at the end of linebuffer: expand it. */
7314           xrnew (buffer, lbp->size, 2);
7315           p = buffer + lbp->size;
7316           lbp->size *= 2;
7317           pend = buffer + lbp->size;
7318           lbp->buffer = buffer;
7319         }
7320       if (c == EOF)
7321         {
7322           if (ferror (stream))
7323             perror (filename);
7324           *p = '\0';
7325           chars_deleted = 0;
7326           break;
7327         }
7328       if (c == '\n')
7329         {
7330           if (!leave_cr && p > buffer && p[-1] == '\r')
7331             {
7332               p -= 1;
7333               chars_deleted = 2;
7334             }
7335           else
7336             {
7337               chars_deleted = 1;
7338             }
7339           *p = '\0';
7340           break;
7341         }
7342       *p++ = c;
7343     }
7344   lbp->len = p - buffer;
7345
7346   if (need_filebuf              /* we need filebuf for multi-line regexps */
7347       && chars_deleted > 0)     /* not at EOF */
7348     {
7349       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
7350         {
7351           /* Expand filebuf. */
7352           xrnew (filebuf.buffer, filebuf.size, 2);
7353           filebuf.size *= 2;
7354         }
7355       strcpy (mempcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len),
7356               "\n");
7357       filebuf.len += lbp->len + 1;
7358     }
7359
7360   return lbp->len + chars_deleted;
7361 }
7362
7363 /*
7364  * Like readline_internal, above, but in addition try to match the
7365  * input line against relevant regular expressions and manage #line
7366  * directives.
7367  */
7368 static void
7369 readline (linebuffer *lbp, FILE *stream)
7370 {
7371   linecharno = charno;          /* update global char number of line start */
7372   ptrdiff_t result = readline_internal (lbp, stream, infilename, false);
7373   lineno += 1;                  /* increment global line number */
7374   charno += result;             /* increment global char number */
7375
7376   /* Honor #line directives. */
7377   if (!no_line_directive)
7378     {
7379       static bool discard_until_line_directive;
7380
7381       /* Check whether this is a #line directive. */
7382       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
7383         {
7384           char *lno_start = lbp->buffer + 6;
7385           char *lno_end;
7386           intmax_t lno = strtoimax (lno_start, &lno_end, 10);
7387           char *quoted_filename
7388             = lno_start < lno_end ? skip_spaces (lno_end) : NULL;
7389
7390           if (quoted_filename && *quoted_filename == '"')
7391             {
7392               char *endp = quoted_filename;
7393               while (*++endp && *endp != '"')
7394                 endp += *endp == '\\' && endp[1];
7395
7396               if (*endp)
7397                 /* Ok, this is a real #line directive.  Let's deal with it. */
7398                 {
7399                   char *taggedabsname;  /* absolute name of original file */
7400                   char *taggedfname;    /* name of original file as given */
7401                   char *name = quoted_filename + 1;
7402
7403                   discard_until_line_directive = false; /* found it */
7404                   *endp = '\0';
7405                   canonicalize_filename (name);
7406                   taggedabsname = absolute_filename (name, tagfiledir);
7407                   if (filename_is_absolute (name)
7408                       || filename_is_absolute (curfdp->infname))
7409                     taggedfname = savestr (taggedabsname);
7410                   else
7411                     taggedfname = relative_filename (taggedabsname,tagfiledir);
7412
7413                   if (streq (curfdp->taggedfname, taggedfname))
7414                     /* The #line directive is only a line number change.  We
7415                        deal with this afterwards. */
7416                     free (taggedfname);
7417                   else
7418                     /* The tags following this #line directive should be
7419                        attributed to taggedfname.  In order to do this, set
7420                        curfdp accordingly. */
7421                     {
7422                       fdesc *fdp; /* file description pointer */
7423
7424                       /* Go look for a file description already set up for the
7425                          file indicated in the #line directive.  If there is
7426                          one, use it from now until the next #line
7427                          directive. */
7428                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
7429                         if (streq (fdp->infname, curfdp->infname)
7430                             && streq (fdp->taggedfname, taggedfname))
7431                           /* If we remove the second test above (after the &&)
7432                              then all entries pertaining to the same file are
7433                              coalesced in the tags file.  If we use it, then
7434                              entries pertaining to the same file but generated
7435                              from different files (via #line directives) will
7436                              go into separate sections in the tags file.  These
7437                              alternatives look equivalent.  The first one
7438                              destroys some apparently useless information. */
7439                           {
7440                             curfdp = fdp;
7441                             free (taggedfname);
7442                             break;
7443                           }
7444                       /* Else, if we already tagged the real file, skip all
7445                          input lines until the next #line directive. */
7446                       if (fdp == NULL) /* not found */
7447                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
7448                           if (streq (fdp->infabsname, taggedabsname))
7449                             {
7450                               discard_until_line_directive = true;
7451                               free (taggedfname);
7452                               break;
7453                             }
7454                       /* Else create a new file description and use that from
7455                          now on, until the next #line directive. */
7456                       if (fdp == NULL) /* not found */
7457                         {
7458                           fdp = fdhead;
7459                           fdhead = xmalloc (sizeof *fdhead);
7460                           *fdhead = *curfdp; /* copy curr. file description */
7461                           fdhead->next = fdp;
7462                           fdhead->infname = savestr (curfdp->infname);
7463                           fdhead->infabsname = savestr (curfdp->infabsname);
7464                           fdhead->infabsdir = savestr (curfdp->infabsdir);
7465                           fdhead->taggedfname = taggedfname;
7466                           fdhead->usecharno = false;
7467                           fdhead->prop = NULL;
7468                           fdhead->written = false;
7469                           curfdp = fdhead;
7470                         }
7471                     }
7472                   free (taggedabsname);
7473                   lineno = lno - 1;
7474                   readline (lbp, stream);
7475                   return;
7476                 } /* if a real #line directive */
7477             } /* if #line is followed by a number */
7478         } /* if line begins with "#line " */
7479
7480       /* If we are here, no #line directive was found. */
7481       if (discard_until_line_directive)
7482         {
7483           if (result > 0)
7484             {
7485               /* Do a tail recursion on ourselves, thus discarding the contents
7486                  of the line buffer. */
7487               readline (lbp, stream);
7488               return;
7489             }
7490           /* End of file. */
7491           discard_until_line_directive = false;
7492           return;
7493         }
7494     } /* if #line directives should be considered */
7495
7496   {
7497     ptrdiff_t match;
7498     regexp *rp;
7499     char *name;
7500
7501     /* Match against relevant regexps. */
7502     if (lbp->len > 0)
7503       for (rp = p_head; rp != NULL; rp = rp->p_next)
7504         {
7505           /* Only use generic regexps or those for the current language.
7506              Also do not use multiline regexps, which is the job of
7507              regex_tag_multiline. */
7508           if ((rp->lang != NULL && rp->lang != fdhead->lang)
7509               || rp->multi_line)
7510             continue;
7511
7512           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
7513           switch (match)
7514             {
7515             case -2:
7516               /* Some error. */
7517               if (!rp->error_signaled)
7518                 {
7519                   error ("regexp stack overflow while matching \"%s\"",
7520                          rp->pattern);
7521                   rp->error_signaled = true;
7522                 }
7523               break;
7524             case -1:
7525               /* No match. */
7526               break;
7527             case 0:
7528               /* Empty string matched. */
7529               if (!rp->error_signaled)
7530                 {
7531                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
7532                   rp->error_signaled = true;
7533                 }
7534               break;
7535             default:
7536               /* Match occurred.  Construct a tag. */
7537               name = rp->name;
7538               if (name[0] == '\0')
7539                 name = NULL;
7540               else /* make a named tag */
7541                 name = substitute (lbp->buffer, rp->name, &rp->regs);
7542
7543               /* Force explicit tag name, if a name is there. */
7544               pfnote (name, true, lbp->buffer, match, lineno, linecharno);
7545
7546               if (debug)
7547                 fprintf (stderr, "%s on %s:%"PRIdMAX": %s\n",
7548                          name ? name : "(unnamed)", curfdp->taggedfname,
7549                          lineno, lbp->buffer);
7550               break;
7551             }
7552         }
7553   }
7554 }
7555
7556 \f
7557 /*
7558  * Return a pointer to a space of size strlen(cp)+1 allocated
7559  * with xmalloc where the string CP has been copied.
7560  */
7561 static char *
7562 savestr (const char *cp)
7563 {
7564   return savenstr (cp, strlen (cp));
7565 }
7566
7567 /*
7568  * Return a pointer to a space of size LEN+1 allocated with xmalloc
7569  * with a copy of CP (containing LEN bytes) followed by a NUL byte.
7570  */
7571 static char *
7572 savenstr (const char *cp, ptrdiff_t len)
7573 {
7574   char *dp = xmalloc (len + 1);
7575   dp[len] = '\0';
7576   return memcpy (dp, cp, len);
7577 }
7578
7579 /* Skip spaces (end of string is not space), return new pointer. */
7580 static char *
7581 skip_spaces (char *cp)
7582 {
7583   while (c_isspace (*cp))
7584     cp++;
7585   return cp;
7586 }
7587
7588 /* Skip non spaces, except end of string, return new pointer. */
7589 static char *
7590 skip_non_spaces (char *cp)
7591 {
7592   while (*cp != '\0' && !c_isspace (*cp))
7593     cp++;
7594   return cp;
7595 }
7596
7597 /* Skip any chars in the "name" class.*/
7598 static char *
7599 skip_name (char *cp)
7600 {
7601   /* '\0' is a notinname() so loop stops there too */
7602   while (! notinname (*cp))
7603     cp++;
7604   return cp;
7605 }
7606
7607 /* Print error message and exit.  */
7608 static void
7609 fatal (char const *format, ...)
7610 {
7611   va_list ap;
7612   va_start (ap, format);
7613   verror (format, ap);
7614   va_end (ap);
7615   exit (EXIT_FAILURE);
7616 }
7617
7618 static void
7619 pfatal (const char *s1)
7620 {
7621   perror (s1);
7622   exit (EXIT_FAILURE);
7623 }
7624
7625 static void
7626 suggest_asking_for_help (void)
7627 {
7628   fprintf (stderr, "\tTry '%s --help' for a complete list of options.\n",
7629            progname);
7630   exit (EXIT_FAILURE);
7631 }
7632
7633 /* Output a diagnostic with printf-style FORMAT and args.  */
7634 static void
7635 error (const char *format, ...)
7636 {
7637   va_list ap;
7638   va_start (ap, format);
7639   verror (format, ap);
7640   va_end (ap);
7641 }
7642
7643 static void
7644 verror (char const *format, va_list ap)
7645 {
7646   fprintf (stderr, "%s: ", progname);
7647   vfprintf (stderr, format, ap);
7648   fprintf (stderr, "\n");
7649 }
7650
7651 /* Return a newly-allocated string whose contents
7652    concatenate those of s1, s2, s3.  */
7653 static char *
7654 concat (const char *s1, const char *s2, const char *s3)
7655 {
7656   ptrdiff_t len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
7657   char *result = xmalloc (len1 + len2 + len3 + 1);
7658   strcpy (stpcpy (stpcpy (result, s1), s2), s3);
7659   return result;
7660 }
7661
7662 \f
7663 /* Does the same work as the system V getcwd, but does not need to
7664    guess the buffer size in advance. */
7665 static char *
7666 etags_getcwd (void)
7667 {
7668   ptrdiff_t bufsize = 200;
7669   char *path = xmalloc (bufsize);
7670
7671   while (getcwd (path, bufsize) == NULL)
7672     {
7673       if (errno != ERANGE)
7674         pfatal ("getcwd");
7675       free (path);
7676       path = xnmalloc (bufsize, 2 * sizeof *path);
7677       bufsize *= 2;
7678     }
7679
7680   canonicalize_filename (path);
7681   return path;
7682 }
7683
7684 /* Return a newly allocated string containing a name of a temporary file.  */
7685 static char *
7686 etags_mktmp (void)
7687 {
7688   const char *tmpdir = getenv ("TMPDIR");
7689   const char *slash = "/";
7690
7691 #if MSDOS || defined (DOS_NT)
7692   if (!tmpdir)
7693     tmpdir = getenv ("TEMP");
7694   if (!tmpdir)
7695     tmpdir = getenv ("TMP");
7696   if (!tmpdir)
7697     tmpdir = ".";
7698   if (tmpdir[strlen (tmpdir) - 1] == '/'
7699       || tmpdir[strlen (tmpdir) - 1] == '\\')
7700     slash = "";
7701 #else
7702   if (!tmpdir)
7703     tmpdir = "/tmp";
7704   if (tmpdir[strlen (tmpdir) - 1] == '/')
7705     slash = "";
7706 #endif
7707
7708   char *templt = concat (tmpdir, slash, "etXXXXXX");
7709   int fd = mkostemp (templt, O_CLOEXEC);
7710   if (fd < 0 || close (fd) != 0)
7711     {
7712       free (templt);
7713       templt = NULL;
7714     }
7715 #if defined (DOS_NT)
7716   else
7717     {
7718       /* The file name will be used in shell redirection, so it needs to have
7719          DOS-style backslashes, or else the Windows shell will barf.  */
7720       char *p;
7721       for (p = templt; *p; p++)
7722         if (*p == '/')
7723           *p = '\\';
7724     }
7725 #endif
7726
7727   return templt;
7728 }
7729
7730 #if !MSDOS && !defined (DOS_NT)
7731 /*
7732  * Add single quotes around a string, and escape any single quotes.
7733  * Return a newly-allocated string.
7734  *
7735  * For example:
7736  * escape_shell_arg_string ("test.txt")  => "'test.txt'"
7737  * escape_shell_arg_string ("'test.txt") => "''\''test.txt'"
7738  */
7739 static char *
7740 escape_shell_arg_string (char *str)
7741 {
7742   char *p = str;
7743   int need_space = 2;           /* ' at begin and end */
7744
7745   while (*p != '\0')
7746     {
7747       if (*p == '\'')
7748         need_space += 4;        /* ' to '\'', length is 4 */
7749       else
7750         need_space++;
7751
7752       p++;
7753     }
7754
7755   char *new_str = xmalloc (need_space + 1);
7756   new_str[0] = '\'';
7757   new_str[need_space-1] = '\'';
7758
7759   int i = 1;                    /* skip first byte */
7760   p = str;
7761   while (*p != '\0')
7762     {
7763       new_str[i] = *p;
7764       if (*p == '\'')
7765         {
7766           new_str[i+1] = '\\';
7767           new_str[i+2] = '\'';
7768           new_str[i+3] = '\'';
7769           i += 3;
7770         }
7771
7772       i++;
7773       p++;
7774     }
7775
7776   new_str[need_space] = '\0';
7777   return new_str;
7778 }
7779 #endif
7780
7781 static void
7782 do_move_file (const char *src_file, const char *dst_file)
7783 {
7784   if (rename (src_file, dst_file) == 0)
7785     return;
7786
7787   FILE *src_f = fopen (src_file, "rb");
7788   FILE *dst_f = fopen (dst_file, "wb");
7789
7790   if (src_f == NULL)
7791     pfatal (src_file);
7792
7793   if (dst_f == NULL)
7794     pfatal (dst_file);
7795
7796   int c;
7797   while ((c = fgetc (src_f)) != EOF)
7798     {
7799       if (ferror (src_f))
7800         pfatal (src_file);
7801
7802       if (ferror (dst_f))
7803         pfatal (dst_file);
7804
7805       if (fputc (c, dst_f) == EOF)
7806         pfatal ("cannot write");
7807     }
7808
7809   if (fclose (src_f) == EOF)
7810     pfatal (src_file);
7811
7812   if (fclose (dst_f) == EOF)
7813     pfatal (dst_file);
7814
7815   if (unlink (src_file) == -1)
7816     pfatal ("unlink error");
7817
7818   return;
7819 }
7820
7821 /* Return a newly allocated string containing the file name of FILE
7822    relative to the absolute directory DIR (which should end with a slash). */
7823 static char *
7824 relative_filename (char *file, char *dir)
7825 {
7826   char *fp, *dp, *afn, *res;
7827   ptrdiff_t i;
7828   char *dir_last_slash UNINIT;
7829
7830   /* Find the common root of file and dir (with a trailing slash). */
7831   afn = absolute_filename (file, cwd);
7832   fp = afn;
7833   dp = dir;
7834   while (*fp++ == *dp++)
7835     if (dp[-1] == '/')
7836       dir_last_slash = dp - 1;
7837 #ifdef DOS_NT
7838   if (fp - 1 == afn && afn[0] != '/')
7839     return afn; /* Cannot build a relative name.  */
7840 #endif
7841   fp -= dp - dir_last_slash;
7842   dp = dir_last_slash;
7843
7844   /* Build a sequence of "../" strings for the resulting relative file name. */
7845   i = 0;
7846   while ((dp = strchr (dp + 1, '/')) != NULL)
7847     i += 1;
7848   res = xmalloc (3*i + strlen (fp + 1) + 1);
7849   char *z = res;
7850   while (i-- > 0)
7851     z = stpcpy (z, "../");
7852
7853   /* Add the file name relative to the common root of file and dir. */
7854   strcpy (z, fp + 1);
7855   free (afn);
7856
7857   return res;
7858 }
7859
7860 /* Return a newly allocated string containing the absolute file name
7861    of FILE given DIR (which should end with a slash). */
7862 static char *
7863 absolute_filename (char *file, char *dir)
7864 {
7865   char *slashp, *cp, *res;
7866
7867   if (filename_is_absolute (file))
7868     res = savestr (file);
7869 #ifdef DOS_NT
7870   /* We don't support non-absolute file names with a drive
7871      letter, like `d:NAME' (it's too much hassle).  */
7872   else if (file[1] == ':')
7873     fatal ("%s: relative file names with drive letters not supported", file);
7874 #endif
7875   else
7876     res = concat (dir, file, "");
7877
7878   /* Delete the "/dirname/.." and "/." substrings. */
7879   slashp = strchr (res, '/');
7880   while (slashp != NULL && slashp[0] != '\0')
7881     {
7882       if (slashp[1] == '.')
7883         {
7884           if (slashp[2] == '.'
7885               && (slashp[3] == '/' || slashp[3] == '\0'))
7886             {
7887               cp = slashp;
7888               do
7889                 cp--;
7890               while (cp >= res && !filename_is_absolute (cp));
7891               if (cp < res)
7892                 cp = slashp;    /* the absolute name begins with "/.." */
7893 #ifdef DOS_NT
7894               /* Under MSDOS and NT we get `d:/NAME' as absolute
7895                  file name, so the luser could say `d:/../NAME'.
7896                  We silently treat this as `d:/NAME'.  */
7897               else if (cp[0] != '/')
7898                 cp = slashp;
7899 #endif
7900               memmove (cp, slashp + 3, strlen (slashp + 2));
7901               slashp = cp;
7902               continue;
7903             }
7904           else if (slashp[2] == '/' || slashp[2] == '\0')
7905             {
7906               memmove (slashp, slashp + 2, strlen (slashp + 1));
7907               continue;
7908             }
7909         }
7910
7911       slashp = strchr (slashp + 1, '/');
7912     }
7913
7914   if (res[0] == '\0')           /* just a safety net: should never happen */
7915     {
7916       free (res);
7917       return savestr ("/");
7918     }
7919   else
7920     return res;
7921 }
7922
7923 /* Return a newly allocated string containing the absolute
7924    file name of dir where FILE resides given DIR (which should
7925    end with a slash). */
7926 static char *
7927 absolute_dirname (char *file, char *dir)
7928 {
7929   char *slashp, *res;
7930   char save;
7931
7932   slashp = strrchr (file, '/');
7933   if (slashp == NULL)
7934     return savestr (dir);
7935   save = slashp[1];
7936   slashp[1] = '\0';
7937   res = absolute_filename (file, dir);
7938   slashp[1] = save;
7939
7940   return res;
7941 }
7942
7943 /* Whether the argument string is an absolute file name.  The argument
7944    string must have been canonicalized with canonicalize_filename. */
7945 static bool
7946 filename_is_absolute (char *fn)
7947 {
7948   return (fn[0] == '/'
7949 #ifdef DOS_NT
7950           || (c_isalpha (fn[0]) && fn[1] == ':' && fn[2] == '/')
7951 #endif
7952           );
7953 }
7954
7955 /* Downcase DOS drive letter and collapse separators into single slashes.
7956    Works in place. */
7957 static void
7958 canonicalize_filename (register char *fn)
7959 {
7960   register char* cp;
7961
7962 #ifdef DOS_NT
7963   /* Canonicalize drive letter case.  */
7964   if (c_isupper (fn[0]) && fn[1] == ':')
7965     fn[0] = c_tolower (fn[0]);
7966
7967   /* Collapse multiple forward- and back-slashes into a single forward
7968      slash. */
7969   for (cp = fn; *cp != '\0'; cp++, fn++)
7970     if (*cp == '/' || *cp == '\\')
7971       {
7972         *fn = '/';
7973         while (cp[1] == '/' || cp[1] == '\\')
7974           cp++;
7975       }
7976     else
7977       *fn = *cp;
7978
7979 #else  /* !DOS_NT */
7980
7981   /* Collapse multiple slashes into a single slash. */
7982   for (cp = fn; *cp != '\0'; cp++, fn++)
7983     if (*cp == '/')
7984       {
7985         *fn = '/';
7986         while (cp[1] == '/')
7987           cp++;
7988       }
7989     else
7990       *fn = *cp;
7991
7992 #endif  /* !DOS_NT */
7993
7994   *fn = '\0';
7995 }
7996
7997 \f
7998 /* Initialize a linebuffer for use. */
7999 static void
8000 linebuffer_init (linebuffer *lbp)
8001 {
8002   lbp->size = (DEBUG) ? 3 : 200;
8003   lbp->buffer = xmalloc (lbp->size);
8004   lbp->buffer[0] = '\0';
8005   lbp->len = 0;
8006 }
8007
8008 /* Set the minimum size of a string contained in a linebuffer. */
8009 static void
8010 linebuffer_setlen (linebuffer *lbp, ptrdiff_t toksize)
8011 {
8012   if (lbp->size <= toksize)
8013     {
8014       ptrdiff_t multiplier = toksize / lbp->size + 1;
8015       xrnew (lbp->buffer, lbp->size, multiplier);
8016       lbp->size *= multiplier;
8017     }
8018   lbp->len = toksize;
8019 }
8020
8021 /* Memory allocators with a fatal error if memory is exhausted.  */
8022
8023 static void
8024 memory_full (void)
8025 {
8026   fatal ("virtual memory exhausted");
8027 }
8028
8029 static void *
8030 xmalloc (ptrdiff_t size)
8031 {
8032   if (SIZE_MAX < size)
8033     memory_full ();
8034   void *result = malloc (size);
8035   if (result == NULL)
8036     memory_full ();
8037   return result;
8038 }
8039
8040 static void *
8041 xnmalloc (ptrdiff_t nitems, ptrdiff_t item_size)
8042 {
8043   ptrdiff_t nbytes;
8044   assume (0 <= nitems);
8045   assume (0 < item_size);
8046   if (ckd_mul (&nbytes, nitems, item_size))
8047     memory_full ();
8048   return xmalloc (nbytes);
8049 }
8050
8051 static void *
8052 xnrealloc (void *pa, ptrdiff_t nitems, ptrdiff_t item_size)
8053 {
8054   ptrdiff_t nbytes;
8055   assume (0 <= nitems);
8056   assume (0 < item_size);
8057   if (ckd_mul (&nbytes, nitems, item_size) || SIZE_MAX < nbytes)
8058     memory_full ();
8059   void *result = realloc (pa, nbytes);
8060   if (!result)
8061     memory_full ();
8062   return result;
8063 }
8064
8065 /*
8066  * Local Variables:
8067  * indent-tabs-mode: t
8068  * tab-width: 8
8069  * fill-column: 79
8070  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
8071  * c-file-style: "gnu"
8072  * End:
8073  */
8074
8075 /* etags.c ends here */