lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2    Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995,
   3                  1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4                  2005 Free Software Foundation, Inc. and Ken Arnold
   5
   6  This file is not considered part of GNU Emacs.
   7
   8  This program is free software; you can redistribute it and/or modify
   9  it under the terms of the GNU General Public License as published by
  10  the Free Software Foundation; either version 2 of the License, or
  11  (at your option) any later version.
  12
  13  This program is distributed in the hope that it will be useful,
  14  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  GNU General Public License for more details.
  17
  18  You should have received a copy of the GNU General Public License
  19  along with this program; if not, write to the Free Software Foundation,
  20  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  21
  22 /*
  23  * Authors:
  24  *      Ctags originally by Ken Arnold.
  25  *      Fortran added by Jim Kleckner.
  26  *      Ed Pelegri-Llopart added C typedefs.
  27  *      Gnu Emacs TAGS format and modifications by RMS?
  28  * 1989 Sam Kendall added C++.
  29  * 1992 Joseph B. Wells improved C and C++ parsing.
  30  * 1993 Francesco Potortì reorganised C and C++.
  31  * 1994 Line-by-line regexp tags by Tom Tromey.
  32  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  33  * 2002 #line directives by Francesco Potortì.
  34  *
  35  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  36  */
  37
  38 /*
  39  * If you want to add support for a new language, start by looking at the LUA
  40  * language, which is the simplest.  Alternatively, consider shipping a
  41  * configuration file containing regexp definitions for etags.
  42  */
  43
  44 char pot_etags_version[] = "@(#) pot revision number is 17.5";
  45
  46 #define TRUE    1
  47 #define FALSE   0
  48
  49 #ifdef DEBUG
  50 #  undef DEBUG
  51 #  define DEBUG TRUE
  52 #else
  53 #  define DEBUG  FALSE
  54 #  define NDEBUG                /* disable assert */
  55 #endif
  56
  57 #ifdef HAVE_CONFIG_H
  58 # include <config.h>
  59   /* On some systems, Emacs defines static as nothing for the sake
  60      of unexec.  We don't want that here since we don't use unexec. */
  61 # undef static
  62 # define ETAGS_REGEXPS          /* use the regexp features */
  63 # define LONG_OPTIONS           /* accept long options */
  64 # ifndef PTR                    /* for Xemacs */
  65 #   define PTR void *
  66 # endif
  67 # ifndef __P                    /* for Xemacs */
  68 #   define __P(args) args
  69 # endif
  70 #else  /* no config.h */
  71 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  72 #   define __P(args) args       /* use prototypes */
  73 #   define PTR void *           /* for generic pointers */
  74 # else /* not standard C */
  75 #   define __P(args) ()         /* no prototypes */
  76 #   define const                /* remove const for old compilers' sake */
  77 #   define PTR long *           /* don't use void* */
  78 # endif
  79 #endif /* !HAVE_CONFIG_H */
  80
  81 #ifndef _GNU_SOURCE
  82 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  83 #endif
  84
  85 #ifdef LONG_OPTIONS
  86 #  undef LONG_OPTIONS
  87 #  define LONG_OPTIONS TRUE
  88 #else
  89 #  define LONG_OPTIONS  FALSE
  90 #endif
  91
  92 /* WIN32_NATIVE is for Xemacs.
  93    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  94 #ifdef WIN32_NATIVE
  95 # undef MSDOS
  96 # undef  WINDOWSNT
  97 # define WINDOWSNT
  98 #endif /* WIN32_NATIVE */
  99
 100 #ifdef MSDOS
 101 # undef MSDOS
 102 # define MSDOS TRUE
 103 # include <fcntl.h>
 104 # include <sys/param.h>
 105 # include <io.h>
 106 # ifndef HAVE_CONFIG_H
 107 #   define DOS_NT
 108 #   include <sys/config.h>
 109 # endif
 110 #else
 111 # define MSDOS FALSE
 112 #endif /* MSDOS */
 113
 114 #ifdef WINDOWSNT
 115 # include <stdlib.h>
 116 # include <fcntl.h>
 117 # include <string.h>
 118 # include <direct.h>
 119 # include <io.h>
 120 # define MAXPATHLEN _MAX_PATH
 121 # undef HAVE_NTGUI
 122 # undef  DOS_NT
 123 # define DOS_NT
 124 # ifndef HAVE_GETCWD
 125 #   define HAVE_GETCWD
 126 # endif /* undef HAVE_GETCWD */
 127 #else /* not WINDOWSNT */
 128 # ifdef STDC_HEADERS
 129 #  include <stdlib.h>
 130 #  include <string.h>
 131 # else /* no standard C headers */
 132     extern char *getenv ();
 133 #  ifdef VMS
 134 #   define EXIT_SUCCESS 1
 135 #   define EXIT_FAILURE 0
 136 #  else /* no VMS */
 137 #   define EXIT_SUCCESS 0
 138 #   define EXIT_FAILURE 1
 139 #  endif
 140 # endif
 141 #endif /* !WINDOWSNT */
 142
 143 #ifdef HAVE_UNISTD_H
 144 # include <unistd.h>
 145 #else
 146 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 147     extern char *getcwd (char *buf, size_t size);
 148 # endif
 149 #endif /* HAVE_UNISTD_H */
 150
 151 #include <stdio.h>
 152 #include <ctype.h>
 153 #include <errno.h>
 154 #ifndef errno
 155   extern int errno;
 156 #endif
 157 #include <sys/types.h>
 158 #include <sys/stat.h>
 159
 160 #include <assert.h>
 161 #ifdef NDEBUG
 162 # undef  assert                 /* some systems have a buggy assert.h */
 163 # define assert(x) ((void) 0)
 164 #endif
 165
 166 #if !defined (S_ISREG) && defined (S_IFREG)
 167 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 168 #endif
 169
 170 #if LONG_OPTIONS
 171 # include <getopt.h>
 172 #else
 173 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 174   extern char *optarg;
 175   extern int optind, opterr;
 176 #endif /* LONG_OPTIONS */
 177
 178 #ifdef ETAGS_REGEXPS
 179 # ifndef HAVE_CONFIG_H          /* this is a standalone compilation */
 180 #   ifdef __CYGWIN__            /* compiling on Cygwin */
 181                              !!! NOTICE !!!
 182  the regex.h distributed with Cygwin is not compatible with etags, alas!
 183 If you want regular expression support, you should delete this notice and
 184               arrange to use the GNU regex.h and regex.c.
 185 #   endif
 186 # endif
 187 # include <regex.h>
 188 #endif /* ETAGS_REGEXPS */
 189
 190 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 191  Leave it undefined to make the program "etags", which makes emacs-style
 192  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 193 #ifdef CTAGS
 194 # undef  CTAGS
 195 # define CTAGS TRUE
 196 #else
 197 # define CTAGS FALSE
 198 #endif
 199
 200 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 201 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 202 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 203 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 204
 205 #define CHARS 256               /* 2^sizeof(char) */
 206 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 207 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 208 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 209 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 210 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 211 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 212
 213 #define ISALNUM(c)      isalnum (CHAR(c))
 214 #define ISALPHA(c)      isalpha (CHAR(c))
 215 #define ISDIGIT(c)      isdigit (CHAR(c))
 216 #define ISLOWER(c)      islower (CHAR(c))
 217
 218 #define lowcase(c)      tolower (CHAR(c))
 219 #define upcase(c)       toupper (CHAR(c))
 220
 221
 222 /*
 223  *      xnew, xrnew -- allocate, reallocate storage
 224  *
 225  * SYNOPSIS:    Type *xnew (int n, Type);
 226  *              void xrnew (OldPointer, int n, Type);
 227  */
 228 #if DEBUG
 229 # include "chkmalloc.h"
 230 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 231                                                   (n) * sizeof (Type)))
 232 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 233                                         (char *) (op), (n) * sizeof (Type)))
 234 #else
 235 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 236 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 237                                         (char *) (op), (n) * sizeof (Type)))
 238 #endif
 239
 240 #define bool int
 241
 242 typedef void Lang_function __P((FILE *));
 243
 244 typedef struct
 245 {
 246   char *suffix;                 /* file name suffix for this compressor */
 247   char *command;                /* takes one arg and decompresses to stdout */
 248 } compressor;
 249
 250 typedef struct
 251 {
 252   char *name;                   /* language name */
 253   char *help;                   /* detailed help for the language */
 254   Lang_function *function;      /* parse function */
 255   char **suffixes;              /* name suffixes of this language's files */
 256   char **filenames;             /* names of this language's files */
 257   char **interpreters;          /* interpreters for this language */
 258   bool metasource;              /* source used to generate other sources */
 259 } language;
 260
 261 typedef struct fdesc
 262 {
 263   struct fdesc *next;           /* for the linked list */
 264   char *infname;                /* uncompressed input file name */
 265   char *infabsname;             /* absolute uncompressed input file name */
 266   char *infabsdir;              /* absolute dir of input file */
 267   char *taggedfname;            /* file name to write in tagfile */
 268   language *lang;               /* language of file */
 269   char *prop;                   /* file properties to write in tagfile */
 270   bool usecharno;               /* etags tags shall contain char number */
 271   bool written;                 /* entry written in the tags file */
 272 } fdesc;
 273
 274 typedef struct node_st
 275 {                               /* sorting structure */
 276   struct node_st *left, *right; /* left and right sons */
 277   fdesc *fdp;                   /* description of file to whom tag belongs */
 278   char *name;                   /* tag name */
 279   char *regex;                  /* search regexp */
 280   bool valid;                   /* write this tag on the tag file */
 281   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 282   bool been_warned;             /* warning already given for duplicated tag */
 283   int lno;                      /* line number tag is on */
 284   long cno;                     /* character number line starts on */
 285 } node;
 286
 287 /*
 288  * A `linebuffer' is a structure which holds a line of text.
 289  * `readline_internal' reads a line from a stream into a linebuffer
 290  * and works regardless of the length of the line.
 291  * SIZE is the size of BUFFER, LEN is the length of the string in
 292  * BUFFER after readline reads it.
 293  */
 294 typedef struct
 295 {
 296   long size;
 297   int len;
 298   char *buffer;
 299 } linebuffer;
 300
 301 /* Used to support mixing of --lang and file names. */
 302 typedef struct
 303 {
 304   enum {
 305     at_language,                /* a language specification */
 306     at_regexp,                  /* a regular expression */
 307     at_filename,                /* a file name */
 308     at_stdin,                   /* read from stdin here */
 309     at_end                      /* stop parsing the list */
 310   } arg_type;                   /* argument type */
 311   language *lang;               /* language associated with the argument */
 312   char *what;                   /* the argument itself */
 313 } argument;
 314
 315 #ifdef ETAGS_REGEXPS
 316 /* Structure defining a regular expression. */
 317 typedef struct regexp
 318 {
 319   struct regexp *p_next;        /* pointer to next in list */
 320   language *lang;               /* if set, use only for this language */
 321   char *pattern;                /* the regexp pattern */
 322   char *name;                   /* tag name */
 323   struct re_pattern_buffer *pat; /* the compiled pattern */
 324   struct re_registers regs;     /* re registers */
 325   bool error_signaled;          /* already signaled for this regexp */
 326   bool force_explicit_name;     /* do not allow implict tag name */
 327   bool ignore_case;             /* ignore case when matching */
 328   bool multi_line;              /* do a multi-line match on the whole file */
 329 } regexp;
 330 #endif /* ETAGS_REGEXPS */
 331
 332
 333 /* Many compilers barf on this:
 334         Lang_function Ada_funcs;
 335    so let's write it this way */
 336 static void Ada_funcs __P((FILE *));
 337 static void Asm_labels __P((FILE *));
 338 static void C_entries __P((int c_ext, FILE *));
 339 static void default_C_entries __P((FILE *));
 340 static void plain_C_entries __P((FILE *));
 341 static void Cjava_entries __P((FILE *));
 342 static void Cobol_paragraphs __P((FILE *));
 343 static void Cplusplus_entries __P((FILE *));
 344 static void Cstar_entries __P((FILE *));
 345 static void Erlang_functions __P((FILE *));
 346 static void Fortran_functions __P((FILE *));
 347 static void HTML_labels __P((FILE *));
 348 static void Lisp_functions __P((FILE *));
 349 static void Lua_functions __P((FILE *));
 350 static void Makefile_targets __P((FILE *));
 351 static void Pascal_functions __P((FILE *));
 352 static void Perl_functions __P((FILE *));
 353 static void PHP_functions __P((FILE *));
 354 static void PS_functions __P((FILE *));
 355 static void Prolog_functions __P((FILE *));
 356 static void Python_functions __P((FILE *));
 357 static void Scheme_functions __P((FILE *));
 358 static void TeX_commands __P((FILE *));
 359 static void Texinfo_nodes __P((FILE *));
 360 static void Yacc_entries __P((FILE *));
 361 static void just_read_file __P((FILE *));
 362
 363 static void print_language_names __P((void));
 364 static void print_version __P((void));
 365 static void print_help __P((argument *));
 366 int main __P((int, char **));
 367
 368 static compressor *get_compressor_from_suffix __P((char *, char **));
 369 static language *get_language_from_langname __P((const char *));
 370 static language *get_language_from_interpreter __P((char *));
 371 static language *get_language_from_filename __P((char *, bool));
 372 static void readline __P((linebuffer *, FILE *));
 373 static long readline_internal __P((linebuffer *, FILE *));
 374 static bool nocase_tail __P((char *));
 375 static void get_tag __P((char *, char **));
 376
 377 #ifdef ETAGS_REGEXPS
 378 static void analyse_regex __P((char *));
 379 static void free_regexps __P((void));
 380 static void regex_tag_multiline __P((void));
 381 #endif /* ETAGS_REGEXPS */
 382 static void error __P((const char *, const char *));
 383 static void suggest_asking_for_help __P((void));
 384 void fatal __P((char *, char *));
 385 static void pfatal __P((char *));
 386 static void add_node __P((node *, node **));
 387
 388 static void init __P((void));
 389 static void process_file_name __P((char *, language *));
 390 static void process_file __P((FILE *, char *, language *));
 391 static void find_entries __P((FILE *));
 392 static void free_tree __P((node *));
 393 static void free_fdesc __P((fdesc *));
 394 static void pfnote __P((char *, bool, char *, int, int, long));
 395 static void make_tag __P((char *, int, bool, char *, int, int, long));
 396 static void invalidate_nodes __P((fdesc *, node **));
 397 static void put_entries __P((node *));
 398
 399 static char *concat __P((char *, char *, char *));
 400 static char *skip_spaces __P((char *));
 401 static char *skip_non_spaces __P((char *));
 402 static char *savenstr __P((char *, int));
 403 static char *savestr __P((char *));
 404 static char *etags_strchr __P((const char *, int));
 405 static char *etags_strrchr __P((const char *, int));
 406 static int etags_strcasecmp __P((const char *, const char *));
 407 static int etags_strncasecmp __P((const char *, const char *, int));
 408 static char *etags_getcwd __P((void));
 409 static char *relative_filename __P((char *, char *));
 410 static char *absolute_filename __P((char *, char *));
 411 static char *absolute_dirname __P((char *, char *));
 412 static bool filename_is_absolute __P((char *f));
 413 static void canonicalize_filename __P((char *));
 414 static void linebuffer_init __P((linebuffer *));
 415 static void linebuffer_setlen __P((linebuffer *, int));
 416 static PTR xmalloc __P((unsigned int));
 417 static PTR xrealloc __P((char *, unsigned int));
 418
 419 \f
 420 static char searchar = '/';     /* use /.../ searches */
 421
 422 static char *tagfile;           /* output file */
 423 static char *progname;          /* name this program was invoked with */
 424 static char *cwd;               /* current working directory */
 425 static char *tagfiledir;        /* directory of tagfile */
 426 static FILE *tagf;              /* ioptr for tags file */
 427
 428 static fdesc *fdhead;           /* head of file description list */
 429 static fdesc *curfdp;           /* current file description */
 430 static int lineno;              /* line number of current line */
 431 static long charno;             /* current character number */
 432 static long linecharno;         /* charno of start of current line */
 433 static char *dbp;               /* pointer to start of current tag */
 434
 435 static const int invalidcharno = -1;
 436
 437 static node *nodehead;          /* the head of the binary tree of tags */
 438 static node *last_node;         /* the last node created */
 439
 440 static linebuffer lb;           /* the current line */
 441 static linebuffer filebuf;      /* a buffer containing the whole file */
 442 static linebuffer token_name;   /* a buffer containing a tag name */
 443
 444 /* boolean "functions" (see init)       */
 445 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 446 static char
 447   /* white chars */
 448   *white = " \f\t\n\r\v",
 449   /* not in a name */
 450   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 451   /* token ending chars */
 452   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 453   /* token starting chars */
 454   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 455   /* valid in-token chars */
 456   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 457
 458 static bool append_to_tagfile;  /* -a: append to tags */
 459 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 460 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 461 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 462                                 /* 0 struct/enum/union decls, and C++ */
 463                                 /* member functions. */
 464 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 465                                 /* constants and variables. */
 466                                 /* -D: opposite of -d.  Default under ctags. */
 467 static bool globals;            /* create tags for global variables */
 468 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 469 static bool members;            /* create tags for C member variables */
 470 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 471 static bool update;             /* -u: update tags */
 472 static bool vgrind_style;       /* -v: create vgrind style index output */
 473 static bool no_warnings;        /* -w: suppress warnings */
 474 static bool cxref_style;        /* -x: create cxref style output */
 475 static bool cplusplus;          /* .[hc] means C++, not C */
 476 static bool ignoreindent;       /* -I: ignore indentation in C */
 477 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 478
 479 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 480 static bool parsing_stdin;      /* --parse-stdin used */
 481
 482 #ifdef ETAGS_REGEXPS
 483 static regexp *p_head;          /* list of all regexps */
 484 static bool need_filebuf;       /* some regexes are multi-line */
 485 #else
 486 # define need_filebuf FALSE
 487 #endif /* ETAGS_REGEXPS */
 488
 489 #if LONG_OPTIONS
 490 static struct option longopts[] =
 491 {
 492   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 493   { "c++",                no_argument,       NULL,               'C'   },
 494   { "declarations",       no_argument,       &declarations,      TRUE  },
 495   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 496   { "help",               no_argument,       NULL,               'h'   },
 497   { "help",               no_argument,       NULL,               'H'   },
 498   { "ignore-indentation", no_argument,       NULL,               'I'   },
 499   { "language",           required_argument, NULL,               'l'   },
 500   { "members",            no_argument,       &members,           TRUE  },
 501   { "no-members",         no_argument,       &members,           FALSE },
 502   { "output",             required_argument, NULL,               'o'   },
 503 #ifdef ETAGS_REGEXPS
 504   { "regex",              required_argument, NULL,               'r'   },
 505   { "no-regex",           no_argument,       NULL,               'R'   },
 506   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 507 #endif /* ETAGS_REGEXPS */
 508   { "parse-stdin",        required_argument, NULL,               STDIN },
 509   { "version",            no_argument,       NULL,               'V'   },
 510
 511 #if CTAGS /* Etags options */
 512   { "backward-search",    no_argument,       NULL,               'B'   },
 513   { "cxref",              no_argument,       NULL,               'x'   },
 514   { "defines",            no_argument,       NULL,               'd'   },
 515   { "globals",            no_argument,       &globals,           TRUE  },
 516   { "typedefs",           no_argument,       NULL,               't'   },
 517   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 518   { "update",             no_argument,       NULL,               'u'   },
 519   { "vgrind",             no_argument,       NULL,               'v'   },
 520   { "no-warn",            no_argument,       NULL,               'w'   },
 521
 522 #else /* Ctags options */
 523   { "append",             no_argument,       NULL,               'a'   },
 524   { "no-defines",         no_argument,       NULL,               'D'   },
 525   { "no-globals",         no_argument,       &globals,           FALSE },
 526   { "include",            required_argument, NULL,               'i'   },
 527 #endif
 528   { NULL }
 529 };
 530 #endif /* LONG_OPTIONS */
 531
 532 static compressor compressors[] =
 533 {
 534   { "z", "gzip -d -c"},
 535   { "Z", "gzip -d -c"},
 536   { "gz", "gzip -d -c"},
 537   { "GZ", "gzip -d -c"},
 538   { "bz2", "bzip2 -d -c" },
 539   { NULL }
 540 };
 541
 542 /*
 543  * Language stuff.
 544  */
 545
 546 /* Ada code */
 547 static char *Ada_suffixes [] =
 548   { "ads", "adb", "ada", NULL };
 549 static char Ada_help [] =
 550 "In Ada code, functions, procedures, packages, tasks and types are\n\
 551 tags.  Use the `--packages-only' option to create tags for\n\
 552 packages only.\n\
 553 Ada tag names have suffixes indicating the type of entity:\n\
 554         Entity type:    Qualifier:\n\
 555         ------------    ----------\n\
 556         function        /f\n\
 557         procedure       /p\n\
 558         package spec    /s\n\
 559         package body    /b\n\
 560         type            /t\n\
 561         task            /k\n\
 562 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 563 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 564 will just search for any tag `bidule'.";
 565
 566 /* Assembly code */
 567 static char *Asm_suffixes [] =
 568   { "a",        /* Unix assembler */
 569     "asm", /* Microcontroller assembly */
 570     "def", /* BSO/Tasking definition includes  */
 571     "inc", /* Microcontroller include files */
 572     "ins", /* Microcontroller include files */
 573     "s", "sa", /* Unix assembler */
 574     "S",   /* cpp-processed Unix assembler */
 575     "src", /* BSO/Tasking C compiler output */
 576     NULL
 577   };
 578 static char Asm_help [] =
 579 "In assembler code, labels appearing at the beginning of a line,\n\
 580 followed by a colon, are tags.";
 581
 582
 583 /* Note that .c and .h can be considered C++, if the --c++ flag was
 584    given, or if the `class' or `template' keyowrds are met inside the file.
 585    That is why default_C_entries is called for these. */
 586 static char *default_C_suffixes [] =
 587   { "c", "h", NULL };
 588 static char default_C_help [] =
 589 "In C code, any C function or typedef is a tag, and so are\n\
 590 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 591 definitions and `enum' constants are tags unless you specify\n\
 592 `--no-defines'.  Global variables are tags unless you specify\n\
 593 `--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
 594 can make the tags table file much smaller.\n\
 595 You can tag function declarations and external variables by\n\
 596 using `--declarations', and struct members by using `--members'.";
 597
 598 static char *Cplusplus_suffixes [] =
 599   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 600     "M",                        /* Objective C++ */
 601     "pdb",                      /* Postscript with C syntax */
 602     NULL };
 603 static char Cplusplus_help [] =
 604 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 605 --help --lang=c --lang=c++ for full help.)\n\
 606 In addition to C tags, member functions are also recognized, and\n\
 607 optionally member variables if you use the `--members' option.\n\
 608 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 609 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 610 `operator+'.";
 611
 612 static char *Cjava_suffixes [] =
 613   { "java", NULL };
 614 static char Cjava_help [] =
 615 "In Java code, all the tags constructs of C and C++ code are\n\
 616 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 617
 618
 619 static char *Cobol_suffixes [] =
 620   { "COB", "cob", NULL };
 621 static char Cobol_help [] =
 622 "In Cobol code, tags are paragraph names; that is, any word\n\
 623 starting in column 8 and followed by a period.";
 624
 625 static char *Cstar_suffixes [] =
 626   { "cs", "hs", NULL };
 627
 628 static char *Erlang_suffixes [] =
 629   { "erl", "hrl", NULL };
 630 static char Erlang_help [] =
 631 "In Erlang code, the tags are the functions, records and macros\n\
 632 defined in the file.";
 633
 634 static char *Fortran_suffixes [] =
 635   { "F", "f", "f90", "for", NULL };
 636 static char Fortran_help [] =
 637 "In Fortran code, functions, subroutines and block data are tags.";
 638
 639 static char *HTML_suffixes [] =
 640   { "htm", "html", "shtml", NULL };
 641 static char HTML_help [] =
 642 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 643 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 644 occurrences of `id='.";
 645
 646 static char *Lisp_suffixes [] =
 647   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 648 static char Lisp_help [] =
 649 "In Lisp code, any function defined with `defun', any variable\n\
 650 defined with `defvar' or `defconst', and in general the first\n\
 651 argument of any expression that starts with `(def' in column zero\n\
 652 is a tag.";
 653
 654 static char *Lua_suffixes [] =
 655   { "lua", "LUA", NULL };
 656 static char Lua_help [] =
 657 "In Lua scripts, all functions are tags.";
 658
 659 static char *Makefile_filenames [] =
 660   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 661 static char Makefile_help [] =
 662 "In makefiles, targets are tags; additionally, variables are tags\n\
 663 unless you specify `--no-globals'.";
 664
 665 static char *Objc_suffixes [] =
 666   { "lm",                       /* Objective lex file */
 667     "m",                        /* Objective C file */
 668      NULL };
 669 static char Objc_help [] =
 670 "In Objective C code, tags include Objective C definitions for classes,\n\
 671 class categories, methods and protocols.  Tags for variables and\n\
 672 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
 673
 674 static char *Pascal_suffixes [] =
 675   { "p", "pas", NULL };
 676 static char Pascal_help [] =
 677 "In Pascal code, the tags are the functions and procedures defined\n\
 678 in the file.";
 679
 680 static char *Perl_suffixes [] =
 681   { "pl", "pm", NULL };
 682 static char *Perl_interpreters [] =
 683   { "perl", "@PERL@", NULL };
 684 static char Perl_help [] =
 685 "In Perl code, the tags are the packages, subroutines and variables\n\
 686 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 687 `--globals' if you want to tag global variables.  Tags for\n\
 688 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 689 defined in the default package is `main::SUB'.";
 690
 691 static char *PHP_suffixes [] =
 692   { "php", "php3", "php4", NULL };
 693 static char PHP_help [] =
 694 "In PHP code, tags are functions, classes and defines.  When using\n\
 695 the `--members' option, vars are tags too.";
 696
 697 static char *plain_C_suffixes [] =
 698   { "pc",                       /* Pro*C file */
 699      NULL };
 700
 701 static char *PS_suffixes [] =
 702   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 703 static char PS_help [] =
 704 "In PostScript code, the tags are the functions.";
 705
 706 static char *Prolog_suffixes [] =
 707   { "prolog", NULL };
 708 static char Prolog_help [] =
 709 "In Prolog code, tags are predicates and rules at the beginning of\n\
 710 line.";
 711
 712 static char *Python_suffixes [] =
 713   { "py", NULL };
 714 static char Python_help [] =
 715 "In Python code, `def' or `class' at the beginning of a line\n\
 716 generate a tag.";
 717
 718 /* Can't do the `SCM' or `scm' prefix with a version number. */
 719 static char *Scheme_suffixes [] =
 720   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 721 static char Scheme_help [] =
 722 "In Scheme code, tags include anything defined with `def' or with a\n\
 723 construct whose name starts with `def'.  They also include\n\
 724 variables set with `set!' at top level in the file.";
 725
 726 static char *TeX_suffixes [] =
 727   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 728 static char TeX_help [] =
 729 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 730 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 731 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 732 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 733 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 734 \n\
 735 Other commands can be specified by setting the environment variable\n\
 736 `TEXTAGS' to a colon-separated list like, for example,\n\
 737      TEXTAGS=\"mycommand:myothercommand\".";
 738
 739
 740 static char *Texinfo_suffixes [] =
 741   { "texi", "texinfo", "txi", NULL };
 742 static char Texinfo_help [] =
 743 "for texinfo files, lines starting with @node are tagged.";
 744
 745 static char *Yacc_suffixes [] =
 746   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 747 static char Yacc_help [] =
 748 "In Bison or Yacc input files, each rule defines as a tag the\n\
 749 nonterminal it constructs.  The portions of the file that contain\n\
 750 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 751 for full help).";
 752
 753 static char auto_help [] =
 754 "`auto' is not a real language, it indicates to use\n\
 755 a default language for files base on file name suffix and file contents.";
 756
 757 static char none_help [] =
 758 "`none' is not a real language, it indicates to only do\n\
 759 regexp processing on files.";
 760
 761 static char no_lang_help [] =
 762 "No detailed help available for this language.";
 763
 764
 765 /*
 766  * Table of languages.
 767  *
 768  * It is ok for a given function to be listed under more than one
 769  * name.  I just didn't.
 770  */
 771
 772 static language lang_names [] =
 773 {
 774   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 775   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 776   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 777   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 778   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 779   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 780   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 781   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 782   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 783   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 784   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 785   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 786   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 787   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 788   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 789   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 790   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 791   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 792   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 793   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 794   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 795   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 796   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 797   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 798   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 799   { "auto",      auto_help },                      /* default guessing scheme */
 800   { "none",      none_help,      just_read_file }, /* regexp matching only */
 801   { NULL }                /* end of list */
 802 };
 803
 804 \f
 805 static void
 806 print_language_names ()
 807 {
 808   language *lang;
 809   char **name, **ext;
 810
 811   puts ("\nThese are the currently supported languages, along with the\n\
 812 default file names and dot suffixes:");
 813   for (lang = lang_names; lang->name != NULL; lang++)
 814     {
 815       printf ("  %-*s", 10, lang->name);
 816       if (lang->filenames != NULL)
 817         for (name = lang->filenames; *name != NULL; name++)
 818           printf (" %s", *name);
 819       if (lang->suffixes != NULL)
 820         for (ext = lang->suffixes; *ext != NULL; ext++)
 821           printf (" .%s", *ext);
 822       puts ("");
 823     }
 824   puts ("where `auto' means use default language for files based on file\n\
 825 name suffix, and `none' means only do regexp processing on files.\n\
 826 If no language is specified and no matching suffix is found,\n\
 827 the first line of the file is read for a sharp-bang (#!) sequence\n\
 828 followed by the name of an interpreter.  If no such sequence is found,\n\
 829 Fortran is tried first; if no tags are found, C is tried next.\n\
 830 When parsing any C file, a \"class\" or \"template\" keyword\n\
 831 switches to C++.");
 832   puts ("Compressed files are supported using gzip and bzip2.\n\
 833 \n\
 834 For detailed help on a given language use, for example,\n\
 835 etags --help --lang=ada.");
 836 }
 837
 838 #ifndef EMACS_NAME
 839 # define EMACS_NAME "standalone"
 840 #endif
 841 #ifndef VERSION
 842 # define VERSION "version"
 843 #endif
 844 static void
 845 print_version ()
 846 {
 847   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 848   puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
 849   puts ("This program is distributed under the same terms as Emacs");
 850
 851   exit (EXIT_SUCCESS);
 852 }
 853
 854 static void
 855 print_help (argbuffer)
 856      argument *argbuffer;
 857 {
 858   bool help_for_lang = FALSE;
 859
 860   for (; argbuffer->arg_type != at_end; argbuffer++)
 861     if (argbuffer->arg_type == at_language)
 862       {
 863         if (help_for_lang)
 864           puts ("");
 865         puts (argbuffer->lang->help);
 866         help_for_lang = TRUE;
 867       }
 868
 869   if (help_for_lang)
 870     exit (EXIT_SUCCESS);
 871
 872   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 873 \n\
 874 These are the options accepted by %s.\n", progname, progname);
 875   if (LONG_OPTIONS)
 876     puts ("You may use unambiguous abbreviations for the long option names.");
 877   else
 878     puts ("Long option names do not work with this executable, as it is not\n\
 879 linked with GNU getopt.");
 880   puts ("  A - as file name means read names from stdin (one per line).\n\
 881 Absolute names are stored in the output file as they are.\n\
 882 Relative ones are stored relative to the output file's directory.\n");
 883
 884   if (!CTAGS)
 885     puts ("-a, --append\n\
 886         Append tag entries to existing tags file.");
 887
 888   puts ("--packages-only\n\
 889         For Ada files, only generate tags for packages.");
 890
 891   if (CTAGS)
 892     puts ("-B, --backward-search\n\
 893         Write the search commands for the tag entries using '?', the\n\
 894         backward-search command instead of '/', the forward-search command.");
 895
 896   /* This option is mostly obsolete, because etags can now automatically
 897      detect C++.  Retained for backward compatibility and for debugging and
 898      experimentation.  In principle, we could want to tag as C++ even
 899      before any "class" or "template" keyword.
 900   puts ("-C, --c++\n\
 901         Treat files whose name suffix defaults to C language as C++ files.");
 902   */
 903
 904   puts ("--declarations\n\
 905         In C and derived languages, create tags for function declarations,");
 906   if (CTAGS)
 907     puts ("\tand create tags for extern variables if --globals is used.");
 908   else
 909     puts
 910       ("\tand create tags for extern variables unless --no-globals is used.");
 911
 912   if (CTAGS)
 913     puts ("-d, --defines\n\
 914         Create tag entries for C #define constants and enum constants, too.");
 915   else
 916     puts ("-D, --no-defines\n\
 917         Don't create tag entries for C #define constants and enum constants.\n\
 918         This makes the tags file smaller.");
 919
 920   if (!CTAGS)
 921     puts ("-i FILE, --include=FILE\n\
 922         Include a note in tag file indicating that, when searching for\n\
 923         a tag, one should also consult the tags file FILE after\n\
 924         checking the current file.");
 925
 926   puts ("-l LANG, --language=LANG\n\
 927         Force the following files to be considered as written in the\n\
 928         named language up to the next --language=LANG option.");
 929
 930   if (CTAGS)
 931     puts ("--globals\n\
 932         Create tag entries for global variables in some languages.");
 933   else
 934     puts ("--no-globals\n\
 935         Do not create tag entries for global variables in some\n\
 936         languages.  This makes the tags file smaller.");
 937   puts ("--members\n\
 938         Create tag entries for members of structures in some languages.");
 939
 940 #ifdef ETAGS_REGEXPS
 941   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 942         Make a tag for each line matching a regular expression pattern\n\
 943         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 944         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 945         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 946         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 947   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 948         For example Tcl named tags can be created with:\n\
 949           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 950         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 951         `m' means to allow multi-line matches, `s' implies `m' and\n\
 952         causes dot to match any character, including newline.");
 953   puts ("-R, --no-regex\n\
 954         Don't create tags from regexps for the following files.");
 955 #endif /* ETAGS_REGEXPS */
 956   puts ("-I, --ignore-indentation\n\
 957         In C and C++ do not assume that a closing brace in the first\n\
 958         column is the final brace of a function or structure definition.");
 959   puts ("-o FILE, --output=FILE\n\
 960         Write the tags to FILE.");
 961   puts ("--parse-stdin=NAME\n\
 962         Read from standard input and record tags as belonging to file NAME.");
 963
 964   if (CTAGS)
 965     {
 966       puts ("-t, --typedefs\n\
 967         Generate tag entries for C and Ada typedefs.");
 968       puts ("-T, --typedefs-and-c++\n\
 969         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 970         and C++ member functions.");
 971     }
 972
 973   if (CTAGS)
 974     puts ("-u, --update\n\
 975         Update the tag entries for the given files, leaving tag\n\
 976         entries for other files in place.  Currently, this is\n\
 977         implemented by deleting the existing entries for the given\n\
 978         files and then rewriting the new entries at the end of the\n\
 979         tags file.  It is often faster to simply rebuild the entire\n\
 980         tag file than to use this.");
 981
 982   if (CTAGS)
 983     {
 984       puts ("-v, --vgrind\n\
 985         Generates an index of items intended for human consumption,\n\
 986         similar to the output of vgrind.  The index is sorted, and\n\
 987         gives the page number of each item.");
 988       puts ("-w, --no-warn\n\
 989         Suppress warning messages about entries defined in multiple\n\
 990         files.");
 991       puts ("-x, --cxref\n\
 992         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 993         The output uses line numbers instead of page numbers, but\n\
 994         beyond that the differences are cosmetic; try both to see\n\
 995         which you like.");
 996     }
 997
 998   puts ("-V, --version\n\
 999         Print the version of the program.\n\
1000 -h, --help\n\
1001         Print this help message.\n\
1002         Followed by one or more `--language' options prints detailed\n\
1003         help about tag generation for the specified languages.");
1004
1005   print_language_names ();
1006
1007   puts ("");
1008   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1009
1010   exit (EXIT_SUCCESS);
1011 }
1012
1013 \f
1014 #ifdef VMS                      /* VMS specific functions */
1015
1016 #define EOS     '\0'
1017
1018 /* This is a BUG!  ANY arbitrary limit is a BUG!
1019    Won't someone please fix this?  */
1020 #define MAX_FILE_SPEC_LEN       255
1021 typedef struct  {
1022   short   curlen;
1023   char    body[MAX_FILE_SPEC_LEN + 1];
1024 } vspec;
1025
1026 /*
1027  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1028  returning in each successive call the next file name matching the input
1029  spec. The function expects that each in_spec passed
1030  to it will be processed to completion; in particular, up to and
1031  including the call following that in which the last matching name
1032  is returned, the function ignores the value of in_spec, and will
1033  only start processing a new spec with the following call.
1034  If an error occurs, on return out_spec contains the value
1035  of in_spec when the error occurred.
1036
1037  With each successive file name returned in out_spec, the
1038  function's return value is one. When there are no more matching
1039  names the function returns zero. If on the first call no file
1040  matches in_spec, or there is any other error, -1 is returned.
1041 */
1042
1043 #include        <rmsdef.h>
1044 #include        <descrip.h>
1045 #define         OUTSIZE MAX_FILE_SPEC_LEN
1046 static short
1047 fn_exp (out, in)
1048      vspec *out;
1049      char *in;
1050 {
1051   static long context = 0;
1052   static struct dsc$descriptor_s o;
1053   static struct dsc$descriptor_s i;
1054   static bool pass1 = TRUE;
1055   long status;
1056   short retval;
1057
1058   if (pass1)
1059     {
1060       pass1 = FALSE;
1061       o.dsc$a_pointer = (char *) out;
1062       o.dsc$w_length = (short)OUTSIZE;
1063       i.dsc$a_pointer = in;
1064       i.dsc$w_length = (short)strlen(in);
1065       i.dsc$b_dtype = DSC$K_DTYPE_T;
1066       i.dsc$b_class = DSC$K_CLASS_S;
1067       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1068       o.dsc$b_class = DSC$K_CLASS_VS;
1069     }
1070   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1071     {
1072       out->body[out->curlen] = EOS;
1073       return 1;
1074     }
1075   else if (status == RMS$_NMF)
1076     retval = 0;
1077   else
1078     {
1079       strcpy(out->body, in);
1080       retval = -1;
1081     }
1082   lib$find_file_end(&context);
1083   pass1 = TRUE;
1084   return retval;
1085 }
1086
1087 /*
1088   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1089   name of each file specified by the provided arg expanding wildcards.
1090 */
1091 static char *
1092 gfnames (arg, p_error)
1093      char *arg;
1094      bool *p_error;
1095 {
1096   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1097
1098   switch (fn_exp (&filename, arg))
1099     {
1100     case 1:
1101       *p_error = FALSE;
1102       return filename.body;
1103     case 0:
1104       *p_error = FALSE;
1105       return NULL;
1106     default:
1107       *p_error = TRUE;
1108       return filename.body;
1109     }
1110 }
1111
1112 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1113 system (cmd)
1114      char *cmd;
1115 {
1116   error ("%s", "system() function not implemented under VMS");
1117 }
1118 #endif
1119
1120 #define VERSION_DELIM   ';'
1121 char *massage_name (s)
1122      char *s;
1123 {
1124   char *start = s;
1125
1126   for ( ; *s; s++)
1127     if (*s == VERSION_DELIM)
1128       {
1129         *s = EOS;
1130         break;
1131       }
1132     else
1133       *s = lowcase (*s);
1134   return start;
1135 }
1136 #endif /* VMS */
1137
1138 \f
1139 int
1140 main (argc, argv)
1141      int argc;
1142      char *argv[];
1143 {
1144   int i;
1145   unsigned int nincluded_files;
1146   char **included_files;
1147   argument *argbuffer;
1148   int current_arg, file_count;
1149   linebuffer filename_lb;
1150   bool help_asked = FALSE;
1151 #ifdef VMS
1152   bool got_err;
1153 #endif
1154  char *optstring;
1155  int opt;
1156
1157
1158 #ifdef DOS_NT
1159   _fmode = O_BINARY;   /* all of files are treated as binary files */
1160 #endif /* DOS_NT */
1161
1162   progname = argv[0];
1163   nincluded_files = 0;
1164   included_files = xnew (argc, char *);
1165   current_arg = 0;
1166   file_count = 0;
1167
1168   /* Allocate enough no matter what happens.  Overkill, but each one
1169      is small. */
1170   argbuffer = xnew (argc, argument);
1171
1172   /*
1173    * If etags, always find typedefs and structure tags.  Why not?
1174    * Also default to find macro constants, enum constants and
1175    * global variables.
1176    */
1177   if (!CTAGS)
1178     {
1179       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1180       globals = TRUE;
1181     }
1182
1183   optstring = "-";
1184 #ifdef ETAGS_REGEXPS
1185   optstring = "-r:Rc:";
1186 #endif /* ETAGS_REGEXPS */
1187   if (!LONG_OPTIONS)
1188     optstring += 1;
1189   optstring = concat (optstring,
1190                       "Cf:Il:o:SVhH",
1191                       (CTAGS) ? "BxdtTuvw" : "aDi:");
1192
1193   while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
1194     switch (opt)
1195       {
1196       case 0:
1197         /* If getopt returns 0, then it has already processed a
1198            long-named option.  We should do nothing.  */
1199         break;
1200
1201       case 1:
1202         /* This means that a file name has been seen.  Record it. */
1203         argbuffer[current_arg].arg_type = at_filename;
1204         argbuffer[current_arg].what     = optarg;
1205         ++current_arg;
1206         ++file_count;
1207         break;
1208
1209       case STDIN:
1210         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1211         argbuffer[current_arg].arg_type = at_stdin;
1212         argbuffer[current_arg].what     = optarg;
1213         ++current_arg;
1214         ++file_count;
1215         if (parsing_stdin)
1216           fatal ("cannot parse standard input more than once", (char *)NULL);
1217         parsing_stdin = TRUE;
1218         break;
1219
1220         /* Common options. */
1221       case 'C': cplusplus = TRUE;               break;
1222       case 'f':         /* for compatibility with old makefiles */
1223       case 'o':
1224         if (tagfile)
1225           {
1226             error ("-o option may only be given once.", (char *)NULL);
1227             suggest_asking_for_help ();
1228             /* NOTREACHED */
1229           }
1230         tagfile = optarg;
1231         break;
1232       case 'I':
1233       case 'S':         /* for backward compatibility */
1234         ignoreindent = TRUE;
1235         break;
1236       case 'l':
1237         {
1238           language *lang = get_language_from_langname (optarg);
1239           if (lang != NULL)
1240             {
1241               argbuffer[current_arg].lang = lang;
1242               argbuffer[current_arg].arg_type = at_language;
1243               ++current_arg;
1244             }
1245         }
1246         break;
1247       case 'c':
1248         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1249         optarg = concat (optarg, "i", ""); /* memory leak here */
1250         /* FALLTHRU */
1251       case 'r':
1252         argbuffer[current_arg].arg_type = at_regexp;
1253         argbuffer[current_arg].what = optarg;
1254         ++current_arg;
1255         break;
1256       case 'R':
1257         argbuffer[current_arg].arg_type = at_regexp;
1258         argbuffer[current_arg].what = NULL;
1259         ++current_arg;
1260         break;
1261       case 'V':
1262         print_version ();
1263         break;
1264       case 'h':
1265       case 'H':
1266         help_asked = TRUE;
1267         break;
1268
1269         /* Etags options */
1270       case 'a': append_to_tagfile = TRUE;                       break;
1271       case 'D': constantypedefs = FALSE;                        break;
1272       case 'i': included_files[nincluded_files++] = optarg;     break;
1273
1274         /* Ctags options. */
1275       case 'B': searchar = '?';                                 break;
1276       case 'd': constantypedefs = TRUE;                         break;
1277       case 't': typedefs = TRUE;                                break;
1278       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1279       case 'u': update = TRUE;                                  break;
1280       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1281       case 'x': cxref_style = TRUE;                             break;
1282       case 'w': no_warnings = TRUE;                             break;
1283       default:
1284         suggest_asking_for_help ();
1285         /* NOTREACHED */
1286       }
1287
1288   for (; optind < argc; optind++)
1289     {
1290       argbuffer[current_arg].arg_type = at_filename;
1291       argbuffer[current_arg].what = argv[optind];
1292       ++current_arg;
1293       ++file_count;
1294     }
1295
1296   argbuffer[current_arg].arg_type = at_end;
1297
1298   if (help_asked)
1299     print_help (argbuffer);
1300     /* NOTREACHED */
1301
1302   if (nincluded_files == 0 && file_count == 0)
1303     {
1304       error ("no input files specified.", (char *)NULL);
1305       suggest_asking_for_help ();
1306       /* NOTREACHED */
1307     }
1308
1309   if (tagfile == NULL)
1310     tagfile = CTAGS ? "tags" : "TAGS";
1311   cwd = etags_getcwd ();        /* the current working directory */
1312   if (cwd[strlen (cwd) - 1] != '/')
1313     {
1314       char *oldcwd = cwd;
1315       cwd = concat (oldcwd, "/", "");
1316       free (oldcwd);
1317     }
1318   /* Relative file names are made relative to the current directory. */
1319   if (streq (tagfile, "-")
1320       || strneq (tagfile, "/dev/", 5))
1321     tagfiledir = cwd;
1322   else
1323     tagfiledir = absolute_dirname (tagfile, cwd);
1324
1325   init ();                      /* set up boolean "functions" */
1326
1327   linebuffer_init (&lb);
1328   linebuffer_init (&filename_lb);
1329   linebuffer_init (&filebuf);
1330   linebuffer_init (&token_name);
1331
1332   if (!CTAGS)
1333     {
1334       if (streq (tagfile, "-"))
1335         {
1336           tagf = stdout;
1337 #ifdef DOS_NT
1338           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1339              doesn't take effect until after `stdout' is already open). */
1340           if (!isatty (fileno (stdout)))
1341             setmode (fileno (stdout), O_BINARY);
1342 #endif /* DOS_NT */
1343         }
1344       else
1345         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1346       if (tagf == NULL)
1347         pfatal (tagfile);
1348     }
1349
1350   /*
1351    * Loop through files finding functions.
1352    */
1353   for (i = 0; i < current_arg; i++)
1354     {
1355       static language *lang;    /* non-NULL if language is forced */
1356       char *this_file;
1357
1358       switch (argbuffer[i].arg_type)
1359         {
1360         case at_language:
1361           lang = argbuffer[i].lang;
1362           break;
1363 #ifdef ETAGS_REGEXPS
1364         case at_regexp:
1365           analyse_regex (argbuffer[i].what);
1366           break;
1367 #endif
1368         case at_filename:
1369 #ifdef VMS
1370           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1371             {
1372               if (got_err)
1373                 {
1374                   error ("can't find file %s\n", this_file);
1375                   argc--, argv++;
1376                 }
1377               else
1378                 {
1379                   this_file = massage_name (this_file);
1380                 }
1381 #else
1382               this_file = argbuffer[i].what;
1383 #endif
1384               /* Input file named "-" means read file names from stdin
1385                  (one per line) and use them. */
1386               if (streq (this_file, "-"))
1387                 {
1388                   if (parsing_stdin)
1389                     fatal ("cannot parse standard input AND read file names from it",
1390                            (char *)NULL);
1391                   while (readline_internal (&filename_lb, stdin) > 0)
1392                     process_file_name (filename_lb.buffer, lang);
1393                 }
1394               else
1395                 process_file_name (this_file, lang);
1396 #ifdef VMS
1397             }
1398 #endif
1399           break;
1400         case at_stdin:
1401           this_file = argbuffer[i].what;
1402           process_file (stdin, this_file, lang);
1403           break;
1404         }
1405     }
1406
1407 #ifdef ETAGS_REGEXPS
1408   free_regexps ();
1409 #endif /* ETAGS_REGEXPS */
1410   free (lb.buffer);
1411   free (filebuf.buffer);
1412   free (token_name.buffer);
1413
1414   if (!CTAGS || cxref_style)
1415     {
1416       put_entries (nodehead);   /* write the remainig tags (ETAGS) */
1417       free_tree (nodehead);
1418       nodehead = NULL;
1419       if (!CTAGS)
1420         {
1421           fdesc *fdp;
1422
1423           /* Output file entries that have no tags. */
1424           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1425             if (!fdp->written)
1426               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1427
1428           while (nincluded_files-- > 0)
1429             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1430         }
1431
1432       if (fclose (tagf) == EOF)
1433         pfatal (tagfile);
1434       exit (EXIT_SUCCESS);
1435     }
1436
1437   if (update)
1438     {
1439       char cmd[BUFSIZ];
1440       for (i = 0; i < current_arg; ++i)
1441         {
1442           switch (argbuffer[i].arg_type)
1443             {
1444             case at_filename:
1445             case at_stdin:
1446               break;
1447             default:
1448               continue;         /* the for loop */
1449             }
1450           sprintf (cmd,
1451                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1452                    tagfile, argbuffer[i].what, tagfile);
1453           if (system (cmd) != EXIT_SUCCESS)
1454             fatal ("failed to execute shell command", (char *)NULL);
1455         }
1456       append_to_tagfile = TRUE;
1457     }
1458
1459   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1460   if (tagf == NULL)
1461     pfatal (tagfile);
1462   put_entries (nodehead);       /* write all the tags (CTAGS) */
1463   free_tree (nodehead);
1464   nodehead = NULL;
1465   if (fclose (tagf) == EOF)
1466     pfatal (tagfile);
1467
1468   if (update)
1469     {
1470       char cmd[2*BUFSIZ+10];
1471       sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1472       exit (system (cmd));
1473     }
1474   return EXIT_SUCCESS;
1475 }
1476
1477
1478 /*
1479  * Return a compressor given the file name.  If EXTPTR is non-zero,
1480  * return a pointer into FILE where the compressor-specific
1481  * extension begins.  If no compressor is found, NULL is returned
1482  * and EXTPTR is not significant.
1483  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1484  */
1485 static compressor *
1486 get_compressor_from_suffix (file, extptr)
1487      char *file;
1488      char **extptr;
1489 {
1490   compressor *compr;
1491   char *slash, *suffix;
1492
1493   /* This relies on FN to be after canonicalize_filename,
1494      so we don't need to consider backslashes on DOS_NT.  */
1495   slash = etags_strrchr (file, '/');
1496   suffix = etags_strrchr (file, '.');
1497   if (suffix == NULL || suffix < slash)
1498     return NULL;
1499   if (extptr != NULL)
1500     *extptr = suffix;
1501   suffix += 1;
1502   /* Let those poor souls who live with DOS 8+3 file name limits get
1503      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1504      Only the first do loop is run if not MSDOS */
1505   do
1506     {
1507       for (compr = compressors; compr->suffix != NULL; compr++)
1508         if (streq (compr->suffix, suffix))
1509           return compr;
1510       if (!MSDOS)
1511         break;                  /* do it only once: not really a loop */
1512       if (extptr != NULL)
1513         *extptr = ++suffix;
1514     } while (*suffix != '\0');
1515   return NULL;
1516 }
1517
1518
1519
1520 /*
1521  * Return a language given the name.
1522  */
1523 static language *
1524 get_language_from_langname (name)
1525      const char *name;
1526 {
1527   language *lang;
1528
1529   if (name == NULL)
1530     error ("empty language name", (char *)NULL);
1531   else
1532     {
1533       for (lang = lang_names; lang->name != NULL; lang++)
1534         if (streq (name, lang->name))
1535           return lang;
1536       error ("unknown language \"%s\"", name);
1537     }
1538
1539   return NULL;
1540 }
1541
1542
1543 /*
1544  * Return a language given the interpreter name.
1545  */
1546 static language *
1547 get_language_from_interpreter (interpreter)
1548      char *interpreter;
1549 {
1550   language *lang;
1551   char **iname;
1552
1553   if (interpreter == NULL)
1554     return NULL;
1555   for (lang = lang_names; lang->name != NULL; lang++)
1556     if (lang->interpreters != NULL)
1557       for (iname = lang->interpreters; *iname != NULL; iname++)
1558         if (streq (*iname, interpreter))
1559             return lang;
1560
1561   return NULL;
1562 }
1563
1564
1565
1566 /*
1567  * Return a language given the file name.
1568  */
1569 static language *
1570 get_language_from_filename (file, case_sensitive)
1571      char *file;
1572      bool case_sensitive;
1573 {
1574   language *lang;
1575   char **name, **ext, *suffix;
1576
1577   /* Try whole file name first. */
1578   for (lang = lang_names; lang->name != NULL; lang++)
1579     if (lang->filenames != NULL)
1580       for (name = lang->filenames; *name != NULL; name++)
1581         if ((case_sensitive)
1582             ? streq (*name, file)
1583             : strcaseeq (*name, file))
1584           return lang;
1585
1586   /* If not found, try suffix after last dot. */
1587   suffix = etags_strrchr (file, '.');
1588   if (suffix == NULL)
1589     return NULL;
1590   suffix += 1;
1591   for (lang = lang_names; lang->name != NULL; lang++)
1592     if (lang->suffixes != NULL)
1593       for (ext = lang->suffixes; *ext != NULL; ext++)
1594         if ((case_sensitive)
1595             ? streq (*ext, suffix)
1596             : strcaseeq (*ext, suffix))
1597           return lang;
1598   return NULL;
1599 }
1600
1601 \f
1602 /*
1603  * This routine is called on each file argument.
1604  */
1605 static void
1606 process_file_name (file, lang)
1607      char *file;
1608      language *lang;
1609 {
1610   struct stat stat_buf;
1611   FILE *inf;
1612   fdesc *fdp;
1613   compressor *compr;
1614   char *compressed_name, *uncompressed_name;
1615   char *ext, *real_name;
1616   int retval;
1617
1618   canonicalize_filename (file);
1619   if (streq (file, tagfile) && !streq (tagfile, "-"))
1620     {
1621       error ("skipping inclusion of %s in self.", file);
1622       return;
1623     }
1624   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1625     {
1626       compressed_name = NULL;
1627       real_name = uncompressed_name = savestr (file);
1628     }
1629   else
1630     {
1631       real_name = compressed_name = savestr (file);
1632       uncompressed_name = savenstr (file, ext - file);
1633     }
1634
1635   /* If the canonicalized uncompressed name
1636      has already been dealt with, skip it silently. */
1637   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1638     {
1639       assert (fdp->infname != NULL);
1640       if (streq (uncompressed_name, fdp->infname))
1641         goto cleanup;
1642     }
1643
1644   if (stat (real_name, &stat_buf) != 0)
1645     {
1646       /* Reset real_name and try with a different name. */
1647       real_name = NULL;
1648       if (compressed_name != NULL) /* try with the given suffix */
1649         {
1650           if (stat (uncompressed_name, &stat_buf) == 0)
1651             real_name = uncompressed_name;
1652         }
1653       else                      /* try all possible suffixes */
1654         {
1655           for (compr = compressors; compr->suffix != NULL; compr++)
1656             {
1657               compressed_name = concat (file, ".", compr->suffix);
1658               if (stat (compressed_name, &stat_buf) != 0)
1659                 {
1660                   if (MSDOS)
1661                     {
1662                       char *suf = compressed_name + strlen (file);
1663                       size_t suflen = strlen (compr->suffix) + 1;
1664                       for ( ; suf[1]; suf++, suflen--)
1665                         {
1666                           memmove (suf, suf + 1, suflen);
1667                           if (stat (compressed_name, &stat_buf) == 0)
1668                             {
1669                               real_name = compressed_name;
1670                               break;
1671                             }
1672                         }
1673                       if (real_name != NULL)
1674                         break;
1675                     } /* MSDOS */
1676                   free (compressed_name);
1677                   compressed_name = NULL;
1678                 }
1679               else
1680                 {
1681                   real_name = compressed_name;
1682                   break;
1683                 }
1684             }
1685         }
1686       if (real_name == NULL)
1687         {
1688           perror (file);
1689           goto cleanup;
1690         }
1691     } /* try with a different name */
1692
1693   if (!S_ISREG (stat_buf.st_mode))
1694     {
1695       error ("skipping %s: it is not a regular file.", real_name);
1696       goto cleanup;
1697     }
1698   if (real_name == compressed_name)
1699     {
1700       char *cmd = concat (compr->command, " ", real_name);
1701       inf = (FILE *) popen (cmd, "r");
1702       free (cmd);
1703     }
1704   else
1705     inf = fopen (real_name, "r");
1706   if (inf == NULL)
1707     {
1708       perror (real_name);
1709       goto cleanup;
1710     }
1711
1712   process_file (inf, uncompressed_name, lang);
1713
1714   if (real_name == compressed_name)
1715     retval = pclose (inf);
1716   else
1717     retval = fclose (inf);
1718   if (retval < 0)
1719     pfatal (file);
1720
1721  cleanup:
1722   if (compressed_name) free (compressed_name);
1723   if (uncompressed_name) free (uncompressed_name);
1724   last_node = NULL;
1725   curfdp = NULL;
1726   return;
1727 }
1728
1729 static void
1730 process_file (fh, fn, lang)
1731      FILE *fh;
1732      char *fn;
1733      language *lang;
1734 {
1735   static const fdesc emptyfdesc;
1736   fdesc *fdp;
1737
1738   /* Create a new input file description entry. */
1739   fdp = xnew (1, fdesc);
1740   *fdp = emptyfdesc;
1741   fdp->next = fdhead;
1742   fdp->infname = savestr (fn);
1743   fdp->lang = lang;
1744   fdp->infabsname = absolute_filename (fn, cwd);
1745   fdp->infabsdir = absolute_dirname (fn, cwd);
1746   if (filename_is_absolute (fn))
1747     {
1748       /* An absolute file name.  Canonicalize it. */
1749       fdp->taggedfname = absolute_filename (fn, NULL);
1750     }
1751   else
1752     {
1753       /* A file name relative to cwd.  Make it relative
1754          to the directory of the tags file. */
1755       fdp->taggedfname = relative_filename (fn, tagfiledir);
1756     }
1757   fdp->usecharno = TRUE;        /* use char position when making tags */
1758   fdp->prop = NULL;
1759   fdp->written = FALSE;         /* not written on tags file yet */
1760
1761   fdhead = fdp;
1762   curfdp = fdhead;              /* the current file description */
1763
1764   find_entries (fh);
1765
1766   /* If not Ctags, and if this is not metasource and if it contained no #line
1767      directives, we can write the tags and free all nodes pointing to
1768      curfdp. */
1769   if (!CTAGS
1770       && curfdp->usecharno      /* no #line directives in this file */
1771       && !curfdp->lang->metasource)
1772     {
1773       node *np, *prev;
1774
1775       /* Look for the head of the sublist relative to this file.  See add_node
1776          for the structure of the node tree. */
1777       prev = NULL;
1778       for (np = nodehead; np != NULL; prev = np, np = np->left)
1779         if (np->fdp == curfdp)
1780           break;
1781
1782       /* If we generated tags for this file, write and delete them. */
1783       if (np != NULL)
1784         {
1785           /* This is the head of the last sublist, if any.  The following
1786              instructions depend on this being true. */
1787           assert (np->left == NULL);
1788
1789           assert (fdhead == curfdp);
1790           assert (last_node->fdp == curfdp);
1791           put_entries (np);     /* write tags for file curfdp->taggedfname */
1792           free_tree (np);       /* remove the written nodes */
1793           if (prev == NULL)
1794             nodehead = NULL;    /* no nodes left */
1795           else
1796             prev->left = NULL;  /* delete the pointer to the sublist */
1797         }
1798     }
1799 }
1800
1801 /*
1802  * This routine sets up the boolean pseudo-functions which work
1803  * by setting boolean flags dependent upon the corresponding character.
1804  * Every char which is NOT in that string is not a white char.  Therefore,
1805  * all of the array "_wht" is set to FALSE, and then the elements
1806  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1807  * of a char is TRUE if it is the string "white", else FALSE.
1808  */
1809 static void
1810 init ()
1811 {
1812   register char *sp;
1813   register int i;
1814
1815   for (i = 0; i < CHARS; i++)
1816     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1817   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1818   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1819   notinname('\0') = notinname('\n');
1820   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1821   begtoken('\0') = begtoken('\n');
1822   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1823   intoken('\0') = intoken('\n');
1824   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1825   endtoken('\0') = endtoken('\n');
1826 }
1827
1828 /*
1829  * This routine opens the specified file and calls the function
1830  * which finds the function and type definitions.
1831  */
1832 static void
1833 find_entries (inf)
1834      FILE *inf;
1835 {
1836   char *cp;
1837   language *lang = curfdp->lang;
1838   Lang_function *parser = NULL;
1839
1840   /* If user specified a language, use it. */
1841   if (lang != NULL && lang->function != NULL)
1842     {
1843       parser = lang->function;
1844     }
1845
1846   /* Else try to guess the language given the file name. */
1847   if (parser == NULL)
1848     {
1849       lang = get_language_from_filename (curfdp->infname, TRUE);
1850       if (lang != NULL && lang->function != NULL)
1851         {
1852           curfdp->lang = lang;
1853           parser = lang->function;
1854         }
1855     }
1856
1857   /* Else look for sharp-bang as the first two characters. */
1858   if (parser == NULL
1859       && readline_internal (&lb, inf) > 0
1860       && lb.len >= 2
1861       && lb.buffer[0] == '#'
1862       && lb.buffer[1] == '!')
1863     {
1864       char *lp;
1865
1866       /* Set lp to point at the first char after the last slash in the
1867          line or, if no slashes, at the first nonblank.  Then set cp to
1868          the first successive blank and terminate the string. */
1869       lp = etags_strrchr (lb.buffer+2, '/');
1870       if (lp != NULL)
1871         lp += 1;
1872       else
1873         lp = skip_spaces (lb.buffer + 2);
1874       cp = skip_non_spaces (lp);
1875       *cp = '\0';
1876
1877       if (strlen (lp) > 0)
1878         {
1879           lang = get_language_from_interpreter (lp);
1880           if (lang != NULL && lang->function != NULL)
1881             {
1882               curfdp->lang = lang;
1883               parser = lang->function;
1884             }
1885         }
1886     }
1887
1888   /* We rewind here, even if inf may be a pipe.  We fail if the
1889      length of the first line is longer than the pipe block size,
1890      which is unlikely. */
1891   rewind (inf);
1892
1893   /* Else try to guess the language given the case insensitive file name. */
1894   if (parser == NULL)
1895     {
1896       lang = get_language_from_filename (curfdp->infname, FALSE);
1897       if (lang != NULL && lang->function != NULL)
1898         {
1899           curfdp->lang = lang;
1900           parser = lang->function;
1901         }
1902     }
1903
1904   /* Else try Fortran or C. */
1905   if (parser == NULL)
1906     {
1907       node *old_last_node = last_node;
1908
1909       curfdp->lang = get_language_from_langname ("fortran");
1910       find_entries (inf);
1911
1912       if (old_last_node == last_node)
1913         /* No Fortran entries found.  Try C. */
1914         {
1915           /* We do not tag if rewind fails.
1916              Only the file name will be recorded in the tags file. */
1917           rewind (inf);
1918           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1919           find_entries (inf);
1920         }
1921       return;
1922     }
1923
1924   if (!no_line_directive
1925       && curfdp->lang != NULL && curfdp->lang->metasource)
1926     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1927        file, or anyway we parsed a file that is automatically generated from
1928        this one.  If this is the case, the bingo.c file contained #line
1929        directives that generated tags pointing to this file.  Let's delete
1930        them all before parsing this file, which is the real source. */
1931     {
1932       fdesc **fdpp = &fdhead;
1933       while (*fdpp != NULL)
1934         if (*fdpp != curfdp
1935             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1936           /* We found one of those!  We must delete both the file description
1937              and all tags referring to it. */
1938           {
1939             fdesc *badfdp = *fdpp;
1940
1941             /* Delete the tags referring to badfdp->taggedfname
1942                that were obtained from badfdp->infname. */
1943             invalidate_nodes (badfdp, &nodehead);
1944
1945             *fdpp = badfdp->next; /* remove the bad description from the list */
1946             free_fdesc (badfdp);
1947           }
1948         else
1949           fdpp = &(*fdpp)->next; /* advance the list pointer */
1950     }
1951
1952   assert (parser != NULL);
1953
1954   /* Generic initialisations before reading from file. */
1955   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1956
1957   /* Generic initialisations before parsing file with readline. */
1958   lineno = 0;                  /* reset global line number */
1959   charno = 0;                  /* reset global char number */
1960   linecharno = 0;              /* reset global char number of line start */
1961
1962   parser (inf);
1963
1964 #ifdef ETAGS_REGEXPS
1965   regex_tag_multiline ();
1966 #endif /* ETAGS_REGEXPS */
1967 }
1968
1969 \f
1970 /*
1971  * Check whether an implicitly named tag should be created,
1972  * then call `pfnote'.
1973  * NAME is a string that is internally copied by this function.
1974  *
1975  * TAGS format specification
1976  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1977  * The following is explained in some more detail in etc/ETAGS.EBNF.
1978  *
1979  * make_tag creates tags with "implicit tag names" (unnamed tags)
1980  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1981  *  1. NAME does not contain any of the characters in NONAM;
1982  *  2. LINESTART contains name as either a rightmost, or rightmost but
1983  *     one character, substring;
1984  *  3. the character, if any, immediately before NAME in LINESTART must
1985  *     be a character in NONAM;
1986  *  4. the character, if any, immediately after NAME in LINESTART must
1987  *     also be a character in NONAM.
1988  *
1989  * The implementation uses the notinname() macro, which recognises the
1990  * characters stored in the string `nonam'.
1991  * etags.el needs to use the same characters that are in NONAM.
1992  */
1993 static void
1994 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1995      char *name;                /* tag name, or NULL if unnamed */
1996      int namelen;               /* tag length */
1997      bool is_func;              /* tag is a function */
1998      char *linestart;           /* start of the line where tag is */
1999      int linelen;               /* length of the line where tag is */
2000      int lno;                   /* line number */
2001      long cno;                  /* character number */
2002 {
2003   bool named = (name != NULL && namelen > 0);
2004
2005   if (!CTAGS && named)          /* maybe set named to false */
2006     /* Let's try to make an implicit tag name, that is, create an unnamed tag
2007        such that etags.el can guess a name from it. */
2008     {
2009       int i;
2010       register char *cp = name;
2011
2012       for (i = 0; i < namelen; i++)
2013         if (notinname (*cp++))
2014           break;
2015       if (i == namelen)                         /* rule #1 */
2016         {
2017           cp = linestart + linelen - namelen;
2018           if (notinname (linestart[linelen-1]))
2019             cp -= 1;                            /* rule #4 */
2020           if (cp >= linestart                   /* rule #2 */
2021               && (cp == linestart
2022                   || notinname (cp[-1]))        /* rule #3 */
2023               && strneq (name, cp, namelen))    /* rule #2 */
2024             named = FALSE;      /* use implicit tag name */
2025         }
2026     }
2027
2028   if (named)
2029     name = savenstr (name, namelen);
2030   else
2031     name = NULL;
2032   pfnote (name, is_func, linestart, linelen, lno, cno);
2033 }
2034
2035 /* Record a tag. */
2036 static void
2037 pfnote (name, is_func, linestart, linelen, lno, cno)
2038      char *name;                /* tag name, or NULL if unnamed */
2039      bool is_func;              /* tag is a function */
2040      char *linestart;           /* start of the line where tag is */
2041      int linelen;               /* length of the line where tag is */
2042      int lno;                   /* line number */
2043      long cno;                  /* character number */
2044 {
2045   register node *np;
2046
2047   assert (name == NULL || name[0] != '\0');
2048   if (CTAGS && name == NULL)
2049     return;
2050
2051   np = xnew (1, node);
2052
2053   /* If ctags mode, change name "main" to M<thisfilename>. */
2054   if (CTAGS && !cxref_style && streq (name, "main"))
2055     {
2056       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2057       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2058       fp = etags_strrchr (np->name, '.');
2059       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2060         fp[0] = '\0';
2061     }
2062   else
2063     np->name = name;
2064   np->valid = TRUE;
2065   np->been_warned = FALSE;
2066   np->fdp = curfdp;
2067   np->is_func = is_func;
2068   np->lno = lno;
2069   if (np->fdp->usecharno)
2070     /* Our char numbers are 0-base, because of C language tradition?
2071        ctags compatibility?  old versions compatibility?   I don't know.
2072        Anyway, since emacs's are 1-base we expect etags.el to take care
2073        of the difference.  If we wanted to have 1-based numbers, we would
2074        uncomment the +1 below. */
2075     np->cno = cno /* + 1 */ ;
2076   else
2077     np->cno = invalidcharno;
2078   np->left = np->right = NULL;
2079   if (CTAGS && !cxref_style)
2080     {
2081       if (strlen (linestart) < 50)
2082         np->regex = concat (linestart, "$", "");
2083       else
2084         np->regex = savenstr (linestart, 50);
2085     }
2086   else
2087     np->regex = savenstr (linestart, linelen);
2088
2089   add_node (np, &nodehead);
2090 }
2091
2092 /*
2093  * free_tree ()
2094  *      recurse on left children, iterate on right children.
2095  */
2096 static void
2097 free_tree (np)
2098      register node *np;
2099 {
2100   while (np)
2101     {
2102       register node *node_right = np->right;
2103       free_tree (np->left);
2104       if (np->name != NULL)
2105         free (np->name);
2106       free (np->regex);
2107       free (np);
2108       np = node_right;
2109     }
2110 }
2111
2112 /*
2113  * free_fdesc ()
2114  *      delete a file description
2115  */
2116 static void
2117 free_fdesc (fdp)
2118      register fdesc *fdp;
2119 {
2120   if (fdp->infname != NULL) free (fdp->infname);
2121   if (fdp->infabsname != NULL) free (fdp->infabsname);
2122   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2123   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2124   if (fdp->prop != NULL) free (fdp->prop);
2125   free (fdp);
2126 }
2127
2128 /*
2129  * add_node ()
2130  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2131  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2132  *      balancing.
2133  *
2134  *      add_node is the only function allowed to add nodes, so it can
2135  *      maintain state.
2136  */
2137 static void
2138 add_node (np, cur_node_p)
2139      node *np, **cur_node_p;
2140 {
2141   register int dif;
2142   register node *cur_node = *cur_node_p;
2143
2144   if (cur_node == NULL)
2145     {
2146       *cur_node_p = np;
2147       last_node = np;
2148       return;
2149     }
2150
2151   if (!CTAGS)
2152     /* Etags Mode */
2153     {
2154       /* For each file name, tags are in a linked sublist on the right
2155          pointer.  The first tags of different files are a linked list
2156          on the left pointer.  last_node points to the end of the last
2157          used sublist. */
2158       if (last_node != NULL && last_node->fdp == np->fdp)
2159         {
2160           /* Let's use the same sublist as the last added node. */
2161           assert (last_node->right == NULL);
2162           last_node->right = np;
2163           last_node = np;
2164         }
2165       else if (cur_node->fdp == np->fdp)
2166         {
2167           /* Scanning the list we found the head of a sublist which is
2168              good for us.  Let's scan this sublist. */
2169           add_node (np, &cur_node->right);
2170         }
2171       else
2172         /* The head of this sublist is not good for us.  Let's try the
2173            next one. */
2174         add_node (np, &cur_node->left);
2175     } /* if ETAGS mode */
2176
2177   else
2178     {
2179       /* Ctags Mode */
2180       dif = strcmp (np->name, cur_node->name);
2181
2182       /*
2183        * If this tag name matches an existing one, then
2184        * do not add the node, but maybe print a warning.
2185        */
2186       if (!dif)
2187         {
2188           if (np->fdp == cur_node->fdp)
2189             {
2190               if (!no_warnings)
2191                 {
2192                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2193                            np->fdp->infname, lineno, np->name);
2194                   fprintf (stderr, "Second entry ignored\n");
2195                 }
2196             }
2197           else if (!cur_node->been_warned && !no_warnings)
2198             {
2199               fprintf
2200                 (stderr,
2201                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2202                  np->fdp->infname, cur_node->fdp->infname, np->name);
2203               cur_node->been_warned = TRUE;
2204             }
2205           return;
2206         }
2207
2208       /* Actually add the node */
2209       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2210     } /* if CTAGS mode */
2211 }
2212
2213 /*
2214  * invalidate_nodes ()
2215  *      Scan the node tree and invalidate all nodes pointing to the
2216  *      given file description (CTAGS case) or free them (ETAGS case).
2217  */
2218 static void
2219 invalidate_nodes (badfdp, npp)
2220      fdesc *badfdp;
2221      node **npp;
2222 {
2223   node *np = *npp;
2224
2225   if (np == NULL)
2226     return;
2227
2228   if (CTAGS)
2229     {
2230       if (np->left != NULL)
2231         invalidate_nodes (badfdp, &np->left);
2232       if (np->fdp == badfdp)
2233         np->valid = FALSE;
2234       if (np->right != NULL)
2235         invalidate_nodes (badfdp, &np->right);
2236     }
2237   else
2238     {
2239       assert (np->fdp != NULL);
2240       if (np->fdp == badfdp)
2241         {
2242           *npp = np->left;      /* detach the sublist from the list */
2243           np->left = NULL;      /* isolate it */
2244           free_tree (np);       /* free it */
2245           invalidate_nodes (badfdp, npp);
2246         }
2247       else
2248         invalidate_nodes (badfdp, &np->left);
2249     }
2250 }
2251
2252 \f
2253 static int total_size_of_entries __P((node *));
2254 static int number_len __P((long));
2255
2256 /* Length of a non-negative number's decimal representation. */
2257 static int
2258 number_len (num)
2259      long num;
2260 {
2261   int len = 1;
2262   while ((num /= 10) > 0)
2263     len += 1;
2264   return len;
2265 }
2266
2267 /*
2268  * Return total number of characters that put_entries will output for
2269  * the nodes in the linked list at the right of the specified node.
2270  * This count is irrelevant with etags.el since emacs 19.34 at least,
2271  * but is still supplied for backward compatibility.
2272  */
2273 static int
2274 total_size_of_entries (np)
2275      register node *np;
2276 {
2277   register int total = 0;
2278
2279   for (; np != NULL; np = np->right)
2280     if (np->valid)
2281       {
2282         total += strlen (np->regex) + 1;                /* pat\177 */
2283         if (np->name != NULL)
2284           total += strlen (np->name) + 1;               /* name\001 */
2285         total += number_len ((long) np->lno) + 1;       /* lno, */
2286         if (np->cno != invalidcharno)                   /* cno */
2287           total += number_len (np->cno);
2288         total += 1;                                     /* newline */
2289       }
2290
2291   return total;
2292 }
2293
2294 static void
2295 put_entries (np)
2296      register node *np;
2297 {
2298   register char *sp;
2299   static fdesc *fdp = NULL;
2300
2301   if (np == NULL)
2302     return;
2303
2304   /* Output subentries that precede this one */
2305   if (CTAGS)
2306     put_entries (np->left);
2307
2308   /* Output this entry */
2309   if (np->valid)
2310     {
2311       if (!CTAGS)
2312         {
2313           /* Etags mode */
2314           if (fdp != np->fdp)
2315             {
2316               fdp = np->fdp;
2317               fprintf (tagf, "\f\n%s,%d\n",
2318                        fdp->taggedfname, total_size_of_entries (np));
2319               fdp->written = TRUE;
2320             }
2321           fputs (np->regex, tagf);
2322           fputc ('\177', tagf);
2323           if (np->name != NULL)
2324             {
2325               fputs (np->name, tagf);
2326               fputc ('\001', tagf);
2327             }
2328           fprintf (tagf, "%d,", np->lno);
2329           if (np->cno != invalidcharno)
2330             fprintf (tagf, "%ld", np->cno);
2331           fputs ("\n", tagf);
2332         }
2333       else
2334         {
2335           /* Ctags mode */
2336           if (np->name == NULL)
2337             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2338
2339           if (cxref_style)
2340             {
2341               if (vgrind_style)
2342                 fprintf (stdout, "%s %s %d\n",
2343                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2344               else
2345                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2346                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2347             }
2348           else
2349             {
2350               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2351
2352               if (np->is_func)
2353                 {               /* function or #define macro with args */
2354                   putc (searchar, tagf);
2355                   putc ('^', tagf);
2356
2357                   for (sp = np->regex; *sp; sp++)
2358                     {
2359                       if (*sp == '\\' || *sp == searchar)
2360                         putc ('\\', tagf);
2361                       putc (*sp, tagf);
2362                     }
2363                   putc (searchar, tagf);
2364                 }
2365               else
2366                 {               /* anything else; text pattern inadequate */
2367                   fprintf (tagf, "%d", np->lno);
2368                 }
2369               putc ('\n', tagf);
2370             }
2371         }
2372     } /* if this node contains a valid tag */
2373
2374   /* Output subentries that follow this one */
2375   put_entries (np->right);
2376   if (!CTAGS)
2377     put_entries (np->left);
2378 }
2379
2380 \f
2381 /* C extensions. */
2382 #define C_EXT   0x00fff         /* C extensions */
2383 #define C_PLAIN 0x00000         /* C */
2384 #define C_PLPL  0x00001         /* C++ */
2385 #define C_STAR  0x00003         /* C* */
2386 #define C_JAVA  0x00005         /* JAVA */
2387 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2388 #define YACC    0x10000         /* yacc file */
2389
2390 /*
2391  * The C symbol tables.
2392  */
2393 enum sym_type
2394 {
2395   st_none,
2396   st_C_objprot, st_C_objimpl, st_C_objend,
2397   st_C_gnumacro,
2398   st_C_ignore, st_C_attribute,
2399   st_C_javastruct,
2400   st_C_operator,
2401   st_C_class, st_C_template,
2402   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2403 };
2404
2405 static unsigned int hash __P((const char *, unsigned int));
2406 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2407 static enum sym_type C_symtype __P((char *, int, int));
2408
2409 /* Feed stuff between (but not including) %[ and %] lines to:
2410      gperf -m 5
2411 %[
2412 %compare-strncmp
2413 %enum
2414 %struct-type
2415 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2416 %%
2417 if,             0,                      st_C_ignore
2418 for,            0,                      st_C_ignore
2419 while,          0,                      st_C_ignore
2420 switch,         0,                      st_C_ignore
2421 return,         0,                      st_C_ignore
2422 __attribute__,  0,                      st_C_attribute
2423 @interface,     0,                      st_C_objprot
2424 @protocol,      0,                      st_C_objprot
2425 @implementation,0,                      st_C_objimpl
2426 @end,           0,                      st_C_objend
2427 import,         (C_JAVA & !C_PLPL),     st_C_ignore
2428 package,        (C_JAVA & !C_PLPL),     st_C_ignore
2429 friend,         C_PLPL,                 st_C_ignore
2430 extends,        (C_JAVA & !C_PLPL),     st_C_javastruct
2431 implements,     (C_JAVA & !C_PLPL),     st_C_javastruct
2432 interface,      (C_JAVA & !C_PLPL),     st_C_struct
2433 class,          0,                      st_C_class
2434 namespace,      C_PLPL,                 st_C_struct
2435 domain,         C_STAR,                 st_C_struct
2436 union,          0,                      st_C_struct
2437 struct,         0,                      st_C_struct
2438 extern,         0,                      st_C_extern
2439 enum,           0,                      st_C_enum
2440 typedef,        0,                      st_C_typedef
2441 define,         0,                      st_C_define
2442 operator,       C_PLPL,                 st_C_operator
2443 template,       0,                      st_C_template
2444 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2445 DEFUN,          0,                      st_C_gnumacro
2446 SYSCALL,        0,                      st_C_gnumacro
2447 ENTRY,          0,                      st_C_gnumacro
2448 PSEUDO,         0,                      st_C_gnumacro
2449 # These are defined inside C functions, so currently they are not met.
2450 # EXFUN used in glibc, DEFVAR_* in emacs.
2451 #EXFUN,         0,                      st_C_gnumacro
2452 #DEFVAR_,       0,                      st_C_gnumacro
2453 %]
2454 and replace lines between %< and %> with its output, then:
2455  - remove the #if characterset check
2456  - make in_word_set static and not inline. */
2457 /*%<*/
2458 /* C code produced by gperf version 3.0.1 */
2459 /* Command-line: gperf -m 5  */
2460 /* Computed positions: -k'1-2' */
2461
2462 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2463 /* maximum key range = 31, duplicates = 0 */
2464
2465 #ifdef __GNUC__
2466 __inline
2467 #else
2468 #ifdef __cplusplus
2469 inline
2470 #endif
2471 #endif
2472 static unsigned int
2473 hash (str, len)
2474      register const char *str;
2475      register unsigned int len;
2476 {
2477   static unsigned char asso_values[] =
2478     {
2479       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2480       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2481       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2482       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2483       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2484       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2485       34, 34, 34, 34,  1, 34, 34, 34, 14, 14,
2486       34, 34, 34, 34, 34, 34, 34, 34, 13, 34,
2487       13, 34, 34, 12, 34, 34, 34, 34, 34, 11,
2488       34, 34, 34, 34, 34,  8, 34, 11, 34, 12,
2489       11,  0,  1, 34,  7,  0, 34, 34, 11,  9,
2490        0,  4,  0, 34,  7,  4, 14, 21, 34, 15,
2491        0,  2, 34, 34, 34, 34, 34, 34, 34, 34,
2492       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2493       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2494       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2495       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2496       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2497       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2498       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2499       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2500       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2501       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2502       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2503       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2504       34, 34, 34, 34, 34, 34
2505     };
2506   return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
2507 }
2508
2509 static struct C_stab_entry *
2510 in_word_set (str, len)
2511      register const char *str;
2512      register unsigned int len;
2513 {
2514   enum
2515     {
2516       TOTAL_KEYWORDS = 31,
2517       MIN_WORD_LENGTH = 2,
2518       MAX_WORD_LENGTH = 15,
2519       MIN_HASH_VALUE = 3,
2520       MAX_HASH_VALUE = 33
2521     };
2522
2523   static struct C_stab_entry wordlist[] =
2524     {
2525       {""}, {""}, {""},
2526       {"if",            0,                      st_C_ignore},
2527       {"enum",          0,                      st_C_enum},
2528       {"@end",          0,                      st_C_objend},
2529       {"extern",                0,                      st_C_extern},
2530       {"extends",       (C_JAVA & !C_PLPL),     st_C_javastruct},
2531       {"for",           0,                      st_C_ignore},
2532       {"interface",     (C_JAVA & !C_PLPL),     st_C_struct},
2533       {"@protocol",     0,                      st_C_objprot},
2534       {"@interface",    0,                      st_C_objprot},
2535       {"operator",      C_PLPL,                 st_C_operator},
2536       {"return",                0,                      st_C_ignore},
2537       {"friend",                C_PLPL,                 st_C_ignore},
2538       {"import",                (C_JAVA & !C_PLPL),     st_C_ignore},
2539       {"@implementation",0,                     st_C_objimpl},
2540       {"define",                0,                      st_C_define},
2541       {"package",       (C_JAVA & !C_PLPL),     st_C_ignore},
2542       {"implements",    (C_JAVA & !C_PLPL),     st_C_javastruct},
2543       {"namespace",     C_PLPL,                 st_C_struct},
2544       {"domain",                C_STAR,                 st_C_struct},
2545       {"template",      0,                      st_C_template},
2546       {"typedef",       0,                      st_C_typedef},
2547       {"struct",                0,                      st_C_struct},
2548       {"switch",                0,                      st_C_ignore},
2549       {"union",         0,                      st_C_struct},
2550       {"while",         0,                      st_C_ignore},
2551       {"class",         0,                      st_C_class},
2552       {"__attribute__", 0,                      st_C_attribute},
2553       {"SYSCALL",       0,                      st_C_gnumacro},
2554       {"PSEUDO",                0,                      st_C_gnumacro},
2555       {"ENTRY",         0,                      st_C_gnumacro},
2556       {"DEFUN",         0,                      st_C_gnumacro}
2557     };
2558
2559   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2560     {
2561       register int key = hash (str, len);
2562
2563       if (key <= MAX_HASH_VALUE && key >= 0)
2564         {
2565           register const char *s = wordlist[key].name;
2566
2567           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2568             return &wordlist[key];
2569         }
2570     }
2571   return 0;
2572 }
2573 /*%>*/
2574
2575 static enum sym_type
2576 C_symtype (str, len, c_ext)
2577      char *str;
2578      int len;
2579      int c_ext;
2580 {
2581   register struct C_stab_entry *se = in_word_set (str, len);
2582
2583   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2584     return st_none;
2585   return se->type;
2586 }
2587
2588 \f
2589 /*
2590  * Ignoring __attribute__ ((list))
2591  */
2592 static bool inattribute;        /* looking at an __attribute__ construct */
2593
2594 /*
2595  * C functions and variables are recognized using a simple
2596  * finite automaton.  fvdef is its state variable.
2597  */
2598 static enum
2599 {
2600   fvnone,                       /* nothing seen */
2601   fdefunkey,                    /* Emacs DEFUN keyword seen */
2602   fdefunname,                   /* Emacs DEFUN name seen */
2603   foperator,                    /* func: operator keyword seen (cplpl) */
2604   fvnameseen,                   /* function or variable name seen */
2605   fstartlist,                   /* func: just after open parenthesis */
2606   finlist,                      /* func: in parameter list */
2607   flistseen,                    /* func: after parameter list */
2608   fignore,                      /* func: before open brace */
2609   vignore                       /* var-like: ignore until ';' */
2610 } fvdef;
2611
2612 static bool fvextern;           /* func or var: extern keyword seen; */
2613
2614 /*
2615  * typedefs are recognized using a simple finite automaton.
2616  * typdef is its state variable.
2617  */
2618 static enum
2619 {
2620   tnone,                        /* nothing seen */
2621   tkeyseen,                     /* typedef keyword seen */
2622   ttypeseen,                    /* defined type seen */
2623   tinbody,                      /* inside typedef body */
2624   tend,                         /* just before typedef tag */
2625   tignore                       /* junk after typedef tag */
2626 } typdef;
2627
2628 /*
2629  * struct-like structures (enum, struct and union) are recognized
2630  * using another simple finite automaton.  `structdef' is its state
2631  * variable.
2632  */
2633 static enum
2634 {
2635   snone,                        /* nothing seen yet,
2636                                    or in struct body if bracelev > 0 */
2637   skeyseen,                     /* struct-like keyword seen */
2638   stagseen,                     /* struct-like tag seen */
2639   scolonseen                    /* colon seen after struct-like tag */
2640 } structdef;
2641
2642 /*
2643  * When objdef is different from onone, objtag is the name of the class.
2644  */
2645 static char *objtag = "<uninited>";
2646
2647 /*
2648  * Yet another little state machine to deal with preprocessor lines.
2649  */
2650 static enum
2651 {
2652   dnone,                        /* nothing seen */
2653   dsharpseen,                   /* '#' seen as first char on line */
2654   ddefineseen,                  /* '#' and 'define' seen */
2655   dignorerest                   /* ignore rest of line */
2656 } definedef;
2657
2658 /*
2659  * State machine for Objective C protocols and implementations.
2660  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2661  */
2662 static enum
2663 {
2664   onone,                        /* nothing seen */
2665   oprotocol,                    /* @interface or @protocol seen */
2666   oimplementation,              /* @implementations seen */
2667   otagseen,                     /* class name seen */
2668   oparenseen,                   /* parenthesis before category seen */
2669   ocatseen,                     /* category name seen */
2670   oinbody,                      /* in @implementation body */
2671   omethodsign,                  /* in @implementation body, after +/- */
2672   omethodtag,                   /* after method name */
2673   omethodcolon,                 /* after method colon */
2674   omethodparm,                  /* after method parameter */
2675   oignore                       /* wait for @end */
2676 } objdef;
2677
2678
2679 /*
2680  * Use this structure to keep info about the token read, and how it
2681  * should be tagged.  Used by the make_C_tag function to build a tag.
2682  */
2683 static struct tok
2684 {
2685   char *line;                   /* string containing the token */
2686   int offset;                   /* where the token starts in LINE */
2687   int length;                   /* token length */
2688   /*
2689     The previous members can be used to pass strings around for generic
2690     purposes.  The following ones specifically refer to creating tags.  In this
2691     case the token contained here is the pattern that will be used to create a
2692     tag.
2693   */
2694   bool valid;                   /* do not create a tag; the token should be
2695                                    invalidated whenever a state machine is
2696                                    reset prematurely */
2697   bool named;                   /* create a named tag */
2698   int lineno;                   /* source line number of tag */
2699   long linepos;                 /* source char number of tag */
2700 } token;                        /* latest token read */
2701
2702 /*
2703  * Variables and functions for dealing with nested structures.
2704  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2705  */
2706 static void pushclass_above __P((int, char *, int));
2707 static void popclass_above __P((int));
2708 static void write_classname __P((linebuffer *, char *qualifier));
2709
2710 static struct {
2711   char **cname;                 /* nested class names */
2712   int *bracelev;                /* nested class brace level */
2713   int nl;                       /* class nesting level (elements used) */
2714   int size;                     /* length of the array */
2715 } cstack;                       /* stack for nested declaration tags */
2716 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2717 #define nestlev         (cstack.nl)
2718 /* After struct keyword or in struct body, not inside a nested function. */
2719 #define instruct        (structdef == snone && nestlev > 0                      \
2720                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2721
2722 static void
2723 pushclass_above (bracelev, str, len)
2724      int bracelev;
2725      char *str;
2726      int len;
2727 {
2728   int nl;
2729
2730   popclass_above (bracelev);
2731   nl = cstack.nl;
2732   if (nl >= cstack.size)
2733     {
2734       int size = cstack.size *= 2;
2735       xrnew (cstack.cname, size, char *);
2736       xrnew (cstack.bracelev, size, int);
2737     }
2738   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2739   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2740   cstack.bracelev[nl] = bracelev;
2741   cstack.nl = nl + 1;
2742 }
2743
2744 static void
2745 popclass_above (bracelev)
2746      int bracelev;
2747 {
2748   int nl;
2749
2750   for (nl = cstack.nl - 1;
2751        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2752        nl--)
2753     {
2754       if (cstack.cname[nl] != NULL)
2755         free (cstack.cname[nl]);
2756       cstack.nl = nl;
2757     }
2758 }
2759
2760 static void
2761 write_classname (cn, qualifier)
2762      linebuffer *cn;
2763      char *qualifier;
2764 {
2765   int i, len;
2766   int qlen = strlen (qualifier);
2767
2768   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2769     {
2770       len = 0;
2771       cn->len = 0;
2772       cn->buffer[0] = '\0';
2773     }
2774   else
2775     {
2776       len = strlen (cstack.cname[0]);
2777       linebuffer_setlen (cn, len);
2778       strcpy (cn->buffer, cstack.cname[0]);
2779     }
2780   for (i = 1; i < cstack.nl; i++)
2781     {
2782       char *s;
2783       int slen;
2784
2785       s = cstack.cname[i];
2786       if (s == NULL)
2787         continue;
2788       slen = strlen (s);
2789       len += slen + qlen;
2790       linebuffer_setlen (cn, len);
2791       strncat (cn->buffer, qualifier, qlen);
2792       strncat (cn->buffer, s, slen);
2793     }
2794 }
2795
2796 \f
2797 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2798 static void make_C_tag __P((bool));
2799
2800 /*
2801  * consider_token ()
2802  *      checks to see if the current token is at the start of a
2803  *      function or variable, or corresponds to a typedef, or
2804  *      is a struct/union/enum tag, or #define, or an enum constant.
2805  *
2806  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2807  *      with args.  C_EXTP points to which language we are looking at.
2808  *
2809  * Globals
2810  *      fvdef                   IN OUT
2811  *      structdef               IN OUT
2812  *      definedef               IN OUT
2813  *      typdef                  IN OUT
2814  *      objdef                  IN OUT
2815  */
2816
2817 static bool
2818 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2819      register char *str;        /* IN: token pointer */
2820      register int len;          /* IN: token length */
2821      register int c;            /* IN: first char after the token */
2822      int *c_extp;               /* IN, OUT: C extensions mask */
2823      int bracelev;              /* IN: brace level */
2824      int parlev;                /* IN: parenthesis level */
2825      bool *is_func_or_var;      /* OUT: function or variable found */
2826 {
2827   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2828      structtype is the type of the preceding struct-like keyword, and
2829      structbracelev is the brace level where it has been seen. */
2830   static enum sym_type structtype;
2831   static int structbracelev;
2832   static enum sym_type toktype;
2833
2834
2835   toktype = C_symtype (str, len, *c_extp);
2836
2837   /*
2838    * Skip __attribute__
2839    */
2840   if (toktype == st_C_attribute)
2841     {
2842       inattribute = TRUE;
2843       return FALSE;
2844      }
2845
2846    /*
2847     * Advance the definedef state machine.
2848     */
2849    switch (definedef)
2850      {
2851      case dnone:
2852        /* We're not on a preprocessor line. */
2853        if (toktype == st_C_gnumacro)
2854          {
2855            fvdef = fdefunkey;
2856            return FALSE;
2857          }
2858        break;
2859      case dsharpseen:
2860        if (toktype == st_C_define)
2861          {
2862            definedef = ddefineseen;
2863          }
2864        else
2865          {
2866            definedef = dignorerest;
2867          }
2868        return FALSE;
2869      case ddefineseen:
2870        /*
2871         * Make a tag for any macro, unless it is a constant
2872         * and constantypedefs is FALSE.
2873         */
2874        definedef = dignorerest;
2875        *is_func_or_var = (c == '(');
2876        if (!*is_func_or_var && !constantypedefs)
2877          return FALSE;
2878        else
2879          return TRUE;
2880      case dignorerest:
2881        return FALSE;
2882      default:
2883        error ("internal error: definedef value.", (char *)NULL);
2884      }
2885
2886    /*
2887     * Now typedefs
2888     */
2889    switch (typdef)
2890      {
2891      case tnone:
2892        if (toktype == st_C_typedef)
2893          {
2894            if (typedefs)
2895              typdef = tkeyseen;
2896            fvextern = FALSE;
2897            fvdef = fvnone;
2898            return FALSE;
2899          }
2900        break;
2901      case tkeyseen:
2902        switch (toktype)
2903          {
2904          case st_none:
2905          case st_C_class:
2906          case st_C_struct:
2907          case st_C_enum:
2908            typdef = ttypeseen;
2909          }
2910        break;
2911      case ttypeseen:
2912        if (structdef == snone && fvdef == fvnone)
2913          {
2914            fvdef = fvnameseen;
2915            return TRUE;
2916          }
2917        break;
2918      case tend:
2919        switch (toktype)
2920          {
2921          case st_C_class:
2922          case st_C_struct:
2923          case st_C_enum:
2924            return FALSE;
2925          }
2926        return TRUE;
2927      }
2928
2929    /*
2930     * This structdef business is NOT invoked when we are ctags and the
2931     * file is plain C.  This is because a struct tag may have the same
2932     * name as another tag, and this loses with ctags.
2933     */
2934    switch (toktype)
2935      {
2936      case st_C_javastruct:
2937        if (structdef == stagseen)
2938          structdef = scolonseen;
2939        return FALSE;
2940      case st_C_template:
2941      case st_C_class:
2942        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2943            && bracelev == 0
2944            && definedef == dnone && structdef == snone
2945            && typdef == tnone && fvdef == fvnone)
2946          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2947        if (toktype == st_C_template)
2948          break;
2949        /* FALLTHRU */
2950      case st_C_struct:
2951      case st_C_enum:
2952        if (parlev == 0
2953            && fvdef != vignore
2954            && (typdef == tkeyseen
2955                || (typedefs_or_cplusplus && structdef == snone)))
2956          {
2957            structdef = skeyseen;
2958            structtype = toktype;
2959            structbracelev = bracelev;
2960            if (fvdef == fvnameseen)
2961              fvdef = fvnone;
2962          }
2963        return FALSE;
2964      }
2965
2966    if (structdef == skeyseen)
2967      {
2968        structdef = stagseen;
2969        return TRUE;
2970      }
2971
2972    if (typdef != tnone)
2973      definedef = dnone;
2974
2975    /* Detect Objective C constructs. */
2976    switch (objdef)
2977      {
2978      case onone:
2979        switch (toktype)
2980          {
2981          case st_C_objprot:
2982            objdef = oprotocol;
2983            return FALSE;
2984          case st_C_objimpl:
2985            objdef = oimplementation;
2986            return FALSE;
2987          }
2988        break;
2989      case oimplementation:
2990        /* Save the class tag for functions or variables defined inside. */
2991        objtag = savenstr (str, len);
2992        objdef = oinbody;
2993        return FALSE;
2994      case oprotocol:
2995        /* Save the class tag for categories. */
2996        objtag = savenstr (str, len);
2997        objdef = otagseen;
2998        *is_func_or_var = TRUE;
2999        return TRUE;
3000      case oparenseen:
3001        objdef = ocatseen;
3002        *is_func_or_var = TRUE;
3003        return TRUE;
3004      case oinbody:
3005        break;
3006      case omethodsign:
3007        if (parlev == 0)
3008          {
3009            fvdef = fvnone;
3010            objdef = omethodtag;
3011            linebuffer_setlen (&token_name, len);
3012            strncpy (token_name.buffer, str, len);
3013            token_name.buffer[len] = '\0';
3014            return TRUE;
3015          }
3016        return FALSE;
3017      case omethodcolon:
3018        if (parlev == 0)
3019          objdef = omethodparm;
3020        return FALSE;
3021      case omethodparm:
3022        if (parlev == 0)
3023          {
3024            fvdef = fvnone;
3025            objdef = omethodtag;
3026            linebuffer_setlen (&token_name, token_name.len + len);
3027            strncat (token_name.buffer, str, len);
3028            return TRUE;
3029          }
3030        return FALSE;
3031      case oignore:
3032        if (toktype == st_C_objend)
3033          {
3034            /* Memory leakage here: the string pointed by objtag is
3035               never released, because many tests would be needed to
3036               avoid breaking on incorrect input code.  The amount of
3037               memory leaked here is the sum of the lengths of the
3038               class tags.
3039            free (objtag); */
3040            objdef = onone;
3041          }
3042        return FALSE;
3043      }
3044
3045    /* A function, variable or enum constant? */
3046    switch (toktype)
3047      {
3048      case st_C_extern:
3049        fvextern = TRUE;
3050        switch  (fvdef)
3051          {
3052          case finlist:
3053          case flistseen:
3054          case fignore:
3055          case vignore:
3056            break;
3057          default:
3058            fvdef = fvnone;
3059          }
3060        return FALSE;
3061      case st_C_ignore:
3062        fvextern = FALSE;
3063        fvdef = vignore;
3064        return FALSE;
3065      case st_C_operator:
3066        fvdef = foperator;
3067        *is_func_or_var = TRUE;
3068        return TRUE;
3069      case st_none:
3070        if (constantypedefs
3071            && structdef == snone
3072            && structtype == st_C_enum && bracelev > structbracelev)
3073          return TRUE;           /* enum constant */
3074        switch (fvdef)
3075          {
3076          case fdefunkey:
3077            if (bracelev > 0)
3078              break;
3079            fvdef = fdefunname;  /* GNU macro */
3080            *is_func_or_var = TRUE;
3081            return TRUE;
3082          case fvnone:
3083            switch (typdef)
3084              {
3085              case ttypeseen:
3086                return FALSE;
3087              case tnone:
3088                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3089                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3090                  {
3091                    fvdef = vignore;
3092                    return FALSE;
3093                  }
3094                break;
3095              }
3096           /* FALLTHRU */
3097           case fvnameseen:
3098           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3099             {
3100               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3101                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3102               fvdef = foperator;
3103               *is_func_or_var = TRUE;
3104               return TRUE;
3105             }
3106           if (bracelev > 0 && !instruct)
3107             break;
3108           fvdef = fvnameseen;   /* function or variable */
3109           *is_func_or_var = TRUE;
3110           return TRUE;
3111         }
3112       break;
3113     }
3114
3115   return FALSE;
3116 }
3117
3118 \f
3119 /*
3120  * C_entries often keeps pointers to tokens or lines which are older than
3121  * the line currently read.  By keeping two line buffers, and switching
3122  * them at end of line, it is possible to use those pointers.
3123  */
3124 static struct
3125 {
3126   long linepos;
3127   linebuffer lb;
3128 } lbs[2];
3129
3130 #define current_lb_is_new (newndx == curndx)
3131 #define switch_line_buffers() (curndx = 1 - curndx)
3132
3133 #define curlb (lbs[curndx].lb)
3134 #define newlb (lbs[newndx].lb)
3135 #define curlinepos (lbs[curndx].linepos)
3136 #define newlinepos (lbs[newndx].linepos)
3137
3138 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3139 #define cplpl (c_ext & C_PLPL)
3140 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3141
3142 #define CNL_SAVE_DEFINEDEF()                                            \
3143 do {                                                                    \
3144   curlinepos = charno;                                                  \
3145   readline (&curlb, inf);                                               \
3146   lp = curlb.buffer;                                                    \
3147   quotednl = FALSE;                                                     \
3148   newndx = curndx;                                                      \
3149 } while (0)
3150
3151 #define CNL()                                                           \
3152 do {                                                                    \
3153   CNL_SAVE_DEFINEDEF();                                                 \
3154   if (savetoken.valid)                                                  \
3155     {                                                                   \
3156       token = savetoken;                                                \
3157       savetoken.valid = FALSE;                                          \
3158     }                                                                   \
3159   definedef = dnone;                                                    \
3160 } while (0)
3161
3162
3163 static void
3164 make_C_tag (isfun)
3165      bool isfun;
3166 {
3167   /* This function should never be called when token.valid is FALSE, but
3168      we must protect against invalid input or internal errors. */
3169   if (!DEBUG && !token.valid)
3170     return;
3171
3172   if (token.valid)
3173     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3174               token.offset+token.length+1, token.lineno, token.linepos);
3175   else                          /* this case is optimised away if !DEBUG */
3176     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3177               token_name.len + 17, isfun, token.line,
3178               token.offset+token.length+1, token.lineno, token.linepos);
3179
3180   token.valid = FALSE;
3181 }
3182
3183
3184 /*
3185  * C_entries ()
3186  *      This routine finds functions, variables, typedefs,
3187  *      #define's, enum constants and struct/union/enum definitions in
3188  *      C syntax and adds them to the list.
3189  */
3190 static void
3191 C_entries (c_ext, inf)
3192      int c_ext;                 /* extension of C */
3193      FILE *inf;                 /* input file */
3194 {
3195   register char c;              /* latest char read; '\0' for end of line */
3196   register char *lp;            /* pointer one beyond the character `c' */
3197   int curndx, newndx;           /* indices for current and new lb */
3198   register int tokoff;          /* offset in line of start of current token */
3199   register int toklen;          /* length of current token */
3200   char *qualifier;              /* string used to qualify names */
3201   int qlen;                     /* length of qualifier */
3202   int bracelev;                 /* current brace level */
3203   int bracketlev;               /* current bracket level */
3204   int parlev;                   /* current parenthesis level */
3205   int attrparlev;               /* __attribute__ parenthesis level */
3206   int templatelev;              /* current template level */
3207   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3208   bool incomm, inquote, inchar, quotednl, midtoken;
3209   bool yacc_rules;              /* in the rules part of a yacc file */
3210   struct tok savetoken;         /* token saved during preprocessor handling */
3211
3212
3213   linebuffer_init (&lbs[0].lb);
3214   linebuffer_init (&lbs[1].lb);
3215   if (cstack.size == 0)
3216     {
3217       cstack.size = (DEBUG) ? 1 : 4;
3218       cstack.nl = 0;
3219       cstack.cname = xnew (cstack.size, char *);
3220       cstack.bracelev = xnew (cstack.size, int);
3221     }
3222
3223   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3224   curndx = newndx = 0;
3225   lp = curlb.buffer;
3226   *lp = 0;
3227
3228   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3229   structdef = snone; definedef = dnone; objdef = onone;
3230   yacc_rules = FALSE;
3231   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3232   token.valid = savetoken.valid = FALSE;
3233   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3234   if (cjava)
3235     { qualifier = "."; qlen = 1; }
3236   else
3237     { qualifier = "::"; qlen = 2; }
3238
3239
3240   while (!feof (inf))
3241     {
3242       c = *lp++;
3243       if (c == '\\')
3244         {
3245           /* If we are at the end of the line, the next character is a
3246              '\0'; do not skip it, because it is what tells us
3247              to read the next line.  */
3248           if (*lp == '\0')
3249             {
3250               quotednl = TRUE;
3251               continue;
3252             }
3253           lp++;
3254           c = ' ';
3255         }
3256       else if (incomm)
3257         {
3258           switch (c)
3259             {
3260             case '*':
3261               if (*lp == '/')
3262                 {
3263                   c = *lp++;
3264                   incomm = FALSE;
3265                 }
3266               break;
3267             case '\0':
3268               /* Newlines inside comments do not end macro definitions in
3269                  traditional cpp. */
3270               CNL_SAVE_DEFINEDEF ();
3271               break;
3272             }
3273           continue;
3274         }
3275       else if (inquote)
3276         {
3277           switch (c)
3278             {
3279             case '"':
3280               inquote = FALSE;
3281               break;
3282             case '\0':
3283               /* Newlines inside strings do not end macro definitions
3284                  in traditional cpp, even though compilers don't
3285                  usually accept them. */
3286               CNL_SAVE_DEFINEDEF ();
3287               break;
3288             }
3289           continue;
3290         }
3291       else if (inchar)
3292         {
3293           switch (c)
3294             {
3295             case '\0':
3296               /* Hmmm, something went wrong. */
3297               CNL ();
3298               /* FALLTHRU */
3299             case '\'':
3300               inchar = FALSE;
3301               break;
3302             }
3303           continue;
3304         }
3305       else if (bracketlev > 0)
3306         {
3307           switch (c)
3308             {
3309             case ']':
3310               if (--bracketlev > 0)
3311                 continue;
3312               break;
3313             case '\0':
3314               CNL_SAVE_DEFINEDEF ();
3315               break;
3316             }
3317           continue;
3318         }
3319       else switch (c)
3320         {
3321         case '"':
3322           inquote = TRUE;
3323           if (inattribute)
3324             break;
3325           switch (fvdef)
3326             {
3327             case fdefunkey:
3328             case fstartlist:
3329             case finlist:
3330             case fignore:
3331             case vignore:
3332               break;
3333             default:
3334               fvextern = FALSE;
3335               fvdef = fvnone;
3336             }
3337           continue;
3338         case '\'':
3339           inchar = TRUE;
3340           if (inattribute)
3341             break;
3342           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3343             {
3344               fvextern = FALSE;
3345               fvdef = fvnone;
3346             }
3347           continue;
3348         case '/':
3349           if (*lp == '*')
3350             {
3351               lp++;
3352               incomm = TRUE;
3353               continue;
3354             }
3355           else if (/* cplpl && */ *lp == '/')
3356             {
3357               c = '\0';
3358               break;
3359             }
3360           else
3361             break;
3362         case '%':
3363           if ((c_ext & YACC) && *lp == '%')
3364             {
3365               /* Entering or exiting rules section in yacc file. */
3366               lp++;
3367               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3368               typdef = tnone; structdef = snone;
3369               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3370               bracelev = 0;
3371               yacc_rules = !yacc_rules;
3372               continue;
3373             }
3374           else
3375             break;
3376         case '#':
3377           if (definedef == dnone)
3378             {
3379               char *cp;
3380               bool cpptoken = TRUE;
3381
3382               /* Look back on this line.  If all blanks, or nonblanks
3383                  followed by an end of comment, this is a preprocessor
3384                  token. */
3385               for (cp = newlb.buffer; cp < lp-1; cp++)
3386                 if (!iswhite (*cp))
3387                   {
3388                     if (*cp == '*' && *(cp+1) == '/')
3389                       {
3390                         cp++;
3391                         cpptoken = TRUE;
3392                       }
3393                     else
3394                       cpptoken = FALSE;
3395                   }
3396               if (cpptoken)
3397                 definedef = dsharpseen;
3398             } /* if (definedef == dnone) */
3399           continue;
3400         case '[':
3401           bracketlev++;
3402             continue;
3403         } /* switch (c) */
3404
3405
3406       /* Consider token only if some involved conditions are satisfied. */
3407       if (typdef != tignore
3408           && definedef != dignorerest
3409           && fvdef != finlist
3410           && templatelev == 0
3411           && (definedef != dnone
3412               || structdef != scolonseen)
3413           && !inattribute)
3414         {
3415           if (midtoken)
3416             {
3417               if (endtoken (c))
3418                 {
3419                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3420                     /* This handles :: in the middle,
3421                        but not at the beginning of an identifier.
3422                        Also, space-separated :: is not recognised. */
3423                     {
3424                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3425                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3426                       lp += 2;
3427                       toklen += 2;
3428                       c = lp[-1];
3429                       goto still_in_token;
3430                     }
3431                   else
3432                     {
3433                       bool funorvar = FALSE;
3434
3435                       if (yacc_rules
3436                           || consider_token (newlb.buffer + tokoff, toklen, c,
3437                                              &c_ext, bracelev, parlev,
3438                                              &funorvar))
3439                         {
3440                           if (fvdef == foperator)
3441                             {
3442                               char *oldlp = lp;
3443                               lp = skip_spaces (lp-1);
3444                               if (*lp != '\0')
3445                                 lp += 1;
3446                               while (*lp != '\0'
3447                                      && !iswhite (*lp) && *lp != '(')
3448                                 lp += 1;
3449                               c = *lp++;
3450                               toklen += lp - oldlp;
3451                             }
3452                           token.named = FALSE;
3453                           if (!plainc
3454                               && nestlev > 0 && definedef == dnone)
3455                             /* in struct body */
3456                             {
3457                               write_classname (&token_name, qualifier);
3458                               linebuffer_setlen (&token_name,
3459                                                  token_name.len+qlen+toklen);
3460                               strcat (token_name.buffer, qualifier);
3461                               strncat (token_name.buffer,
3462                                        newlb.buffer + tokoff, toklen);
3463                               token.named = TRUE;
3464                             }
3465                           else if (objdef == ocatseen)
3466                             /* Objective C category */
3467                             {
3468                               int len = strlen (objtag) + 2 + toklen;
3469                               linebuffer_setlen (&token_name, len);
3470                               strcpy (token_name.buffer, objtag);
3471                               strcat (token_name.buffer, "(");
3472                               strncat (token_name.buffer,
3473                                        newlb.buffer + tokoff, toklen);
3474                               strcat (token_name.buffer, ")");
3475                               token.named = TRUE;
3476                             }
3477                           else if (objdef == omethodtag
3478                                    || objdef == omethodparm)
3479                             /* Objective C method */
3480                             {
3481                               token.named = TRUE;
3482                             }
3483                           else if (fvdef == fdefunname)
3484                             /* GNU DEFUN and similar macros */
3485                             {
3486                               bool defun = (newlb.buffer[tokoff] == 'F');
3487                               int off = tokoff;
3488                               int len = toklen;
3489
3490                               /* Rewrite the tag so that emacs lisp DEFUNs
3491                                  can be found by their elisp name */
3492                               if (defun)
3493                                 {
3494                                   off += 1;
3495                                   len -= 1;
3496                                 }
3497                               len = toklen;
3498                               linebuffer_setlen (&token_name, len);
3499                               strncpy (token_name.buffer,
3500                                        newlb.buffer + off, len);
3501                               token_name.buffer[len] = '\0';
3502                               if (defun)
3503                                 while (--len >= 0)
3504                                   if (token_name.buffer[len] == '_')
3505                                     token_name.buffer[len] = '-';
3506                               token.named = defun;
3507                             }
3508                           else
3509                             {
3510                               linebuffer_setlen (&token_name, toklen);
3511                               strncpy (token_name.buffer,
3512                                        newlb.buffer + tokoff, toklen);
3513                               token_name.buffer[toklen] = '\0';
3514                               /* Name macros and members. */
3515                               token.named = (structdef == stagseen
3516                                              || typdef == ttypeseen
3517                                              || typdef == tend
3518                                              || (funorvar
3519                                                  && definedef == dignorerest)
3520                                              || (funorvar
3521                                                  && definedef == dnone
3522                                                  && structdef == snone
3523                                                  && bracelev > 0));
3524                             }
3525                           token.lineno = lineno;
3526                           token.offset = tokoff;
3527                           token.length = toklen;
3528                           token.line = newlb.buffer;
3529                           token.linepos = newlinepos;
3530                           token.valid = TRUE;
3531
3532                           if (definedef == dnone
3533                               && (fvdef == fvnameseen
3534                                   || fvdef == foperator
3535                                   || structdef == stagseen
3536                                   || typdef == tend
3537                                   || typdef == ttypeseen
3538                                   || objdef != onone))
3539                             {
3540                               if (current_lb_is_new)
3541                                 switch_line_buffers ();
3542                             }
3543                           else if (definedef != dnone
3544                                    || fvdef == fdefunname
3545                                    || instruct)
3546                             make_C_tag (funorvar);
3547                         }
3548                       else /* not yacc and consider_token failed */
3549                         {
3550                           if (inattribute && fvdef == fignore)
3551                             {
3552                               /* We have just met __attribute__ after a
3553                                  function parameter list: do not tag the
3554                                  function again. */
3555                               fvdef = fvnone;
3556                             }
3557                         }
3558                       midtoken = FALSE;
3559                     }
3560                 } /* if (endtoken (c)) */
3561               else if (intoken (c))
3562                 still_in_token:
3563                 {
3564                   toklen++;
3565                   continue;
3566                 }
3567             } /* if (midtoken) */
3568           else if (begtoken (c))
3569             {
3570               switch (definedef)
3571                 {
3572                 case dnone:
3573                   switch (fvdef)
3574                     {
3575                     case fstartlist:
3576                       /* This prevents tagging fb in
3577                          void (__attribute__((noreturn)) *fb) (void);
3578                          Fixing this is not easy and not very important. */
3579                       fvdef = finlist;
3580                       continue;
3581                     case flistseen:
3582                       if (plainc || declarations)
3583                         {
3584                           make_C_tag (TRUE); /* a function */
3585                           fvdef = fignore;
3586                         }
3587                       break;
3588                     }
3589                   if (structdef == stagseen && !cjava)
3590                     {
3591                       popclass_above (bracelev);
3592                       structdef = snone;
3593                     }
3594                   break;
3595                 case dsharpseen:
3596                   savetoken = token;
3597                   break;
3598                 }
3599               if (!yacc_rules || lp == newlb.buffer + 1)
3600                 {
3601                   tokoff = lp - 1 - newlb.buffer;
3602                   toklen = 1;
3603                   midtoken = TRUE;
3604                 }
3605               continue;
3606             } /* if (begtoken) */
3607         } /* if must look at token */
3608
3609
3610       /* Detect end of line, colon, comma, semicolon and various braces
3611          after having handled a token.*/
3612       switch (c)
3613         {
3614         case ':':
3615           if (inattribute)
3616             break;
3617           if (yacc_rules && token.offset == 0 && token.valid)
3618             {
3619               make_C_tag (FALSE); /* a yacc function */
3620               break;
3621             }
3622           if (definedef != dnone)
3623             break;
3624           switch (objdef)
3625             {
3626             case  otagseen:
3627               objdef = oignore;
3628               make_C_tag (TRUE); /* an Objective C class */
3629               break;
3630             case omethodtag:
3631             case omethodparm:
3632               objdef = omethodcolon;
3633               linebuffer_setlen (&token_name, token_name.len + 1);
3634               strcat (token_name.buffer, ":");
3635               break;
3636             }
3637           if (structdef == stagseen)
3638             {
3639               structdef = scolonseen;
3640               break;
3641             }
3642           /* Should be useless, but may be work as a safety net. */
3643           if (cplpl && fvdef == flistseen)
3644             {
3645               make_C_tag (TRUE); /* a function */
3646               fvdef = fignore;
3647               break;
3648             }
3649           break;
3650         case ';':
3651           if (definedef != dnone || inattribute)
3652             break;
3653           switch (typdef)
3654             {
3655             case tend:
3656             case ttypeseen:
3657               make_C_tag (FALSE); /* a typedef */
3658               typdef = tnone;
3659               fvdef = fvnone;
3660               break;
3661             case tnone:
3662             case tinbody:
3663             case tignore:
3664               switch (fvdef)
3665                 {
3666                 case fignore:
3667                   if (typdef == tignore || cplpl)
3668                     fvdef = fvnone;
3669                   break;
3670                 case fvnameseen:
3671                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3672                       || (members && instruct))
3673                     make_C_tag (FALSE); /* a variable */
3674                   fvextern = FALSE;
3675                   fvdef = fvnone;
3676                   token.valid = FALSE;
3677                   break;
3678                 case flistseen:
3679                   if ((declarations
3680                        && (cplpl || !instruct)
3681                        && (typdef == tnone || (typdef != tignore && instruct)))
3682                       || (members
3683                           && plainc && instruct))
3684                     make_C_tag (TRUE);  /* a function */
3685                   /* FALLTHRU */
3686                 default:
3687                   fvextern = FALSE;
3688                   fvdef = fvnone;
3689                   if (declarations
3690                        && cplpl && structdef == stagseen)
3691                     make_C_tag (FALSE); /* forward declaration */
3692                   else
3693                     token.valid = FALSE;
3694                 } /* switch (fvdef) */
3695               /* FALLTHRU */
3696             default:
3697               if (!instruct)
3698                 typdef = tnone;
3699             }
3700           if (structdef == stagseen)
3701             structdef = snone;
3702           break;
3703         case ',':
3704           if (definedef != dnone || inattribute)
3705             break;
3706           switch (objdef)
3707             {
3708             case omethodtag:
3709             case omethodparm:
3710               make_C_tag (TRUE); /* an Objective C method */
3711               objdef = oinbody;
3712               break;
3713             }
3714           switch (fvdef)
3715             {
3716             case fdefunkey:
3717             case foperator:
3718             case fstartlist:
3719             case finlist:
3720             case fignore:
3721             case vignore:
3722               break;
3723             case fdefunname:
3724               fvdef = fignore;
3725               break;
3726             case fvnameseen:
3727               if (parlev == 0
3728                   && ((globals
3729                        && bracelev == 0
3730                        && templatelev == 0
3731                        && (!fvextern || declarations))
3732                       || (members && instruct)))
3733                   make_C_tag (FALSE); /* a variable */
3734               break;
3735             case flistseen:
3736               if ((declarations && typdef == tnone && !instruct)
3737                   || (members && typdef != tignore && instruct))
3738                 {
3739                   make_C_tag (TRUE); /* a function */
3740                   fvdef = fvnameseen;
3741                 }
3742               else if (!declarations)
3743                 fvdef = fvnone;
3744               token.valid = FALSE;
3745               break;
3746             default:
3747               fvdef = fvnone;
3748             }
3749           if (structdef == stagseen)
3750             structdef = snone;
3751           break;
3752         case ']':
3753           if (definedef != dnone || inattribute)
3754             break;
3755           if (structdef == stagseen)
3756             structdef = snone;
3757           switch (typdef)
3758             {
3759             case ttypeseen:
3760             case tend:
3761               typdef = tignore;
3762               make_C_tag (FALSE);       /* a typedef */
3763               break;
3764             case tnone:
3765             case tinbody:
3766               switch (fvdef)
3767                 {
3768                 case foperator:
3769                 case finlist:
3770                 case fignore:
3771                 case vignore:
3772                   break;
3773                 case fvnameseen:
3774                   if ((members && bracelev == 1)
3775                       || (globals && bracelev == 0
3776                           && (!fvextern || declarations)))
3777                     make_C_tag (FALSE); /* a variable */
3778                   /* FALLTHRU */
3779                 default:
3780                   fvdef = fvnone;
3781                 }
3782               break;
3783             }
3784           break;
3785         case '(':
3786           if (inattribute)
3787             {
3788               attrparlev++;
3789               break;
3790             }
3791           if (definedef != dnone)
3792             break;
3793           if (objdef == otagseen && parlev == 0)
3794             objdef = oparenseen;
3795           switch (fvdef)
3796             {
3797             case fvnameseen:
3798               if (typdef == ttypeseen
3799                   && *lp != '*'
3800                   && !instruct)
3801                 {
3802                   /* This handles constructs like:
3803                      typedef void OperatorFun (int fun); */
3804                   make_C_tag (FALSE);
3805                   typdef = tignore;
3806                   fvdef = fignore;
3807                   break;
3808                 }
3809               /* FALLTHRU */
3810             case foperator:
3811               fvdef = fstartlist;
3812               break;
3813             case flistseen:
3814               fvdef = finlist;
3815               break;
3816             }
3817           parlev++;
3818           break;
3819         case ')':
3820           if (inattribute)
3821             {
3822               if (--attrparlev == 0)
3823                 inattribute = FALSE;
3824               break;
3825             }
3826           if (definedef != dnone)
3827             break;
3828           if (objdef == ocatseen && parlev == 1)
3829             {
3830               make_C_tag (TRUE); /* an Objective C category */
3831               objdef = oignore;
3832             }
3833           if (--parlev == 0)
3834             {
3835               switch (fvdef)
3836                 {
3837                 case fstartlist:
3838                 case finlist:
3839                   fvdef = flistseen;
3840                   break;
3841                 }
3842               if (!instruct
3843                   && (typdef == tend
3844                       || typdef == ttypeseen))
3845                 {
3846                   typdef = tignore;
3847                   make_C_tag (FALSE); /* a typedef */
3848                 }
3849             }
3850           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3851             parlev = 0;
3852           break;
3853         case '{':
3854           if (definedef != dnone)
3855             break;
3856           if (typdef == ttypeseen)
3857             {
3858               /* Whenever typdef is set to tinbody (currently only
3859                  here), typdefbracelev should be set to bracelev. */
3860               typdef = tinbody;
3861               typdefbracelev = bracelev;
3862             }
3863           switch (fvdef)
3864             {
3865             case flistseen:
3866               make_C_tag (TRUE);    /* a function */
3867               /* FALLTHRU */
3868             case fignore:
3869               fvdef = fvnone;
3870               break;
3871             case fvnone:
3872               switch (objdef)
3873                 {
3874                 case otagseen:
3875                   make_C_tag (TRUE); /* an Objective C class */
3876                   objdef = oignore;
3877                   break;
3878                 case omethodtag:
3879                 case omethodparm:
3880                   make_C_tag (TRUE); /* an Objective C method */
3881                   objdef = oinbody;
3882                   break;
3883                 default:
3884                   /* Neutralize `extern "C" {' grot. */
3885                   if (bracelev == 0 && structdef == snone && nestlev == 0
3886                       && typdef == tnone)
3887                     bracelev = -1;
3888                 }
3889               break;
3890             }
3891           switch (structdef)
3892             {
3893             case skeyseen:         /* unnamed struct */
3894               pushclass_above (bracelev, NULL, 0);
3895               structdef = snone;
3896               break;
3897             case stagseen:         /* named struct or enum */
3898             case scolonseen:       /* a class */
3899               pushclass_above (bracelev,token.line+token.offset, token.length);
3900               structdef = snone;
3901               make_C_tag (FALSE);  /* a struct or enum */
3902               break;
3903             }
3904           bracelev++;
3905           break;
3906         case '*':
3907           if (definedef != dnone)
3908             break;
3909           if (fvdef == fstartlist)
3910             {
3911               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3912               token.valid = FALSE;
3913             }
3914           break;
3915         case '}':
3916           if (definedef != dnone)
3917             break;
3918           if (!ignoreindent && lp == newlb.buffer + 1)
3919             {
3920               if (bracelev != 0)
3921                 token.valid = FALSE;
3922               bracelev = 0;     /* reset brace level if first column */
3923               parlev = 0;       /* also reset paren level, just in case... */
3924             }
3925           else if (bracelev > 0)
3926             bracelev--;
3927           else
3928             token.valid = FALSE; /* something gone amiss, token unreliable */
3929           popclass_above (bracelev);
3930           structdef = snone;
3931           /* Only if typdef == tinbody is typdefbracelev significant. */
3932           if (typdef == tinbody && bracelev <= typdefbracelev)
3933             {
3934               assert (bracelev == typdefbracelev);
3935               typdef = tend;
3936             }
3937           break;
3938         case '=':
3939           if (definedef != dnone)
3940             break;
3941           switch (fvdef)
3942             {
3943             case foperator:
3944             case finlist:
3945             case fignore:
3946             case vignore:
3947               break;
3948             case fvnameseen:
3949               if ((members && bracelev == 1)
3950                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3951                 make_C_tag (FALSE); /* a variable */
3952               /* FALLTHRU */
3953             default:
3954               fvdef = vignore;
3955             }
3956           break;
3957         case '<':
3958           if (cplpl
3959               && (structdef == stagseen || fvdef == fvnameseen))
3960             {
3961               templatelev++;
3962               break;
3963             }
3964           goto resetfvdef;
3965         case '>':
3966           if (templatelev > 0)
3967             {
3968               templatelev--;
3969               break;
3970             }
3971           goto resetfvdef;
3972         case '+':
3973         case '-':
3974           if (objdef == oinbody && bracelev == 0)
3975             {
3976               objdef = omethodsign;
3977               break;
3978             }
3979           /* FALLTHRU */
3980         resetfvdef:
3981         case '#': case '~': case '&': case '%': case '/':
3982         case '|': case '^': case '!': case '.': case '?':
3983           if (definedef != dnone)
3984             break;
3985           /* These surely cannot follow a function tag in C. */
3986           switch (fvdef)
3987             {
3988             case foperator:
3989             case finlist:
3990             case fignore:
3991             case vignore:
3992               break;
3993             default:
3994               fvdef = fvnone;
3995             }
3996           break;
3997         case '\0':
3998           if (objdef == otagseen)
3999             {
4000               make_C_tag (TRUE); /* an Objective C class */
4001               objdef = oignore;
4002             }
4003           /* If a macro spans multiple lines don't reset its state. */
4004           if (quotednl)
4005             CNL_SAVE_DEFINEDEF ();
4006           else
4007             CNL ();
4008           break;
4009         } /* switch (c) */
4010
4011     } /* while not eof */
4012
4013   free (lbs[0].lb.buffer);
4014   free (lbs[1].lb.buffer);
4015 }
4016
4017 /*
4018  * Process either a C++ file or a C file depending on the setting
4019  * of a global flag.
4020  */
4021 static void
4022 default_C_entries (inf)
4023      FILE *inf;
4024 {
4025   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4026 }
4027
4028 /* Always do plain C. */
4029 static void
4030 plain_C_entries (inf)
4031      FILE *inf;
4032 {
4033   C_entries (0, inf);
4034 }
4035
4036 /* Always do C++. */
4037 static void
4038 Cplusplus_entries (inf)
4039      FILE *inf;
4040 {
4041   C_entries (C_PLPL, inf);
4042 }
4043
4044 /* Always do Java. */
4045 static void
4046 Cjava_entries (inf)
4047      FILE *inf;
4048 {
4049   C_entries (C_JAVA, inf);
4050 }
4051
4052 /* Always do C*. */
4053 static void
4054 Cstar_entries (inf)
4055      FILE *inf;
4056 {
4057   C_entries (C_STAR, inf);
4058 }
4059
4060 /* Always do Yacc. */
4061 static void
4062 Yacc_entries (inf)
4063      FILE *inf;
4064 {
4065   C_entries (YACC, inf);
4066 }
4067
4068 \f
4069 /* Useful macros. */
4070 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4071   for (;                        /* loop initialization */               \
4072        !feof (file_pointer)     /* loop test */                         \
4073        &&                       /* instructions at start of loop */     \
4074           (readline (&line_buffer, file_pointer),                       \
4075            char_pointer = line_buffer.buffer,                           \
4076            TRUE);                                                       \
4077       )
4078 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */      \
4079   (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
4080    && notinname ((cp)[sizeof(keyword)-1])       /* end of keyword */    \
4081    && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
4082
4083 /*
4084  * Read a file, but do no processing.  This is used to do regexp
4085  * matching on files that have no language defined.
4086  */
4087 static void
4088 just_read_file (inf)
4089      FILE *inf;
4090 {
4091   register char *dummy;
4092
4093   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4094     continue;
4095 }
4096
4097 \f
4098 /* Fortran parsing */
4099
4100 static void F_takeprec __P((void));
4101 static void F_getit __P((FILE *));
4102
4103 static void
4104 F_takeprec ()
4105 {
4106   dbp = skip_spaces (dbp);
4107   if (*dbp != '*')
4108     return;
4109   dbp++;
4110   dbp = skip_spaces (dbp);
4111   if (strneq (dbp, "(*)", 3))
4112     {
4113       dbp += 3;
4114       return;
4115     }
4116   if (!ISDIGIT (*dbp))
4117     {
4118       --dbp;                    /* force failure */
4119       return;
4120     }
4121   do
4122     dbp++;
4123   while (ISDIGIT (*dbp));
4124 }
4125
4126 static void
4127 F_getit (inf)
4128      FILE *inf;
4129 {
4130   register char *cp;
4131
4132   dbp = skip_spaces (dbp);
4133   if (*dbp == '\0')
4134     {
4135       readline (&lb, inf);
4136       dbp = lb.buffer;
4137       if (dbp[5] != '&')
4138         return;
4139       dbp += 6;
4140       dbp = skip_spaces (dbp);
4141     }
4142   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4143     return;
4144   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4145     continue;
4146   make_tag (dbp, cp-dbp, TRUE,
4147             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4148 }
4149
4150
4151 static void
4152 Fortran_functions (inf)
4153      FILE *inf;
4154 {
4155   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4156     {
4157       if (*dbp == '%')
4158         dbp++;                  /* Ratfor escape to fortran */
4159       dbp = skip_spaces (dbp);
4160       if (*dbp == '\0')
4161         continue;
4162       switch (lowcase (*dbp))
4163         {
4164         case 'i':
4165           if (nocase_tail ("integer"))
4166             F_takeprec ();
4167           break;
4168         case 'r':
4169           if (nocase_tail ("real"))
4170             F_takeprec ();
4171           break;
4172         case 'l':
4173           if (nocase_tail ("logical"))
4174             F_takeprec ();
4175           break;
4176         case 'c':
4177           if (nocase_tail ("complex") || nocase_tail ("character"))
4178             F_takeprec ();
4179           break;
4180         case 'd':
4181           if (nocase_tail ("double"))
4182             {
4183               dbp = skip_spaces (dbp);
4184               if (*dbp == '\0')
4185                 continue;
4186               if (nocase_tail ("precision"))
4187                 break;
4188               continue;
4189             }
4190           break;
4191         }
4192       dbp = skip_spaces (dbp);
4193       if (*dbp == '\0')
4194         continue;
4195       switch (lowcase (*dbp))
4196         {
4197         case 'f':
4198           if (nocase_tail ("function"))
4199             F_getit (inf);
4200           continue;
4201         case 's':
4202           if (nocase_tail ("subroutine"))
4203             F_getit (inf);
4204           continue;
4205         case 'e':
4206           if (nocase_tail ("entry"))
4207             F_getit (inf);
4208           continue;
4209         case 'b':
4210           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4211             {
4212               dbp = skip_spaces (dbp);
4213               if (*dbp == '\0') /* assume un-named */
4214                 make_tag ("blockdata", 9, TRUE,
4215                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4216               else
4217                 F_getit (inf);  /* look for name */
4218             }
4219           continue;
4220         }
4221     }
4222 }
4223
4224 \f
4225 /*
4226  * Ada parsing
4227  * Original code by
4228  * Philippe Waroquiers (1998)
4229  */
4230
4231 static void Ada_getit __P((FILE *, char *));
4232
4233 /* Once we are positioned after an "interesting" keyword, let's get
4234    the real tag value necessary. */
4235 static void
4236 Ada_getit (inf, name_qualifier)
4237      FILE *inf;
4238      char *name_qualifier;
4239 {
4240   register char *cp;
4241   char *name;
4242   char c;
4243
4244   while (!feof (inf))
4245     {
4246       dbp = skip_spaces (dbp);
4247       if (*dbp == '\0'
4248           || (dbp[0] == '-' && dbp[1] == '-'))
4249         {
4250           readline (&lb, inf);
4251           dbp = lb.buffer;
4252         }
4253       switch (lowcase(*dbp))
4254         {
4255         case 'b':
4256           if (nocase_tail ("body"))
4257             {
4258               /* Skipping body of   procedure body   or   package body or ....
4259                  resetting qualifier to body instead of spec. */
4260               name_qualifier = "/b";
4261               continue;
4262             }
4263           break;
4264         case 't':
4265           /* Skipping type of   task type   or   protected type ... */
4266           if (nocase_tail ("type"))
4267             continue;
4268           break;
4269         }
4270       if (*dbp == '"')
4271         {
4272           dbp += 1;
4273           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4274             continue;
4275         }
4276       else
4277         {
4278           dbp = skip_spaces (dbp);
4279           for (cp = dbp;
4280                (*cp != '\0'
4281                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4282                cp++)
4283             continue;
4284           if (cp == dbp)
4285             return;
4286         }
4287       c = *cp;
4288       *cp = '\0';
4289       name = concat (dbp, name_qualifier, "");
4290       *cp = c;
4291       make_tag (name, strlen (name), TRUE,
4292                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4293       free (name);
4294       if (c == '"')
4295         dbp = cp + 1;
4296       return;
4297     }
4298 }
4299
4300 static void
4301 Ada_funcs (inf)
4302      FILE *inf;
4303 {
4304   bool inquote = FALSE;
4305   bool skip_till_semicolumn = FALSE;
4306
4307   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4308     {
4309       while (*dbp != '\0')
4310         {
4311           /* Skip a string i.e. "abcd". */
4312           if (inquote || (*dbp == '"'))
4313             {
4314               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4315               if (dbp != NULL)
4316                 {
4317                   inquote = FALSE;
4318                   dbp += 1;
4319                   continue;     /* advance char */
4320                 }
4321               else
4322                 {
4323                   inquote = TRUE;
4324                   break;        /* advance line */
4325                 }
4326             }
4327
4328           /* Skip comments. */
4329           if (dbp[0] == '-' && dbp[1] == '-')
4330             break;              /* advance line */
4331
4332           /* Skip character enclosed in single quote i.e. 'a'
4333              and skip single quote starting an attribute i.e. 'Image. */
4334           if (*dbp == '\'')
4335             {
4336               dbp++ ;
4337               if (*dbp != '\0')
4338                 dbp++;
4339               continue;
4340             }
4341
4342           if (skip_till_semicolumn)
4343             {
4344               if (*dbp == ';')
4345                 skip_till_semicolumn = FALSE;
4346               dbp++;
4347               continue;         /* advance char */
4348             }
4349
4350           /* Search for beginning of a token.  */
4351           if (!begtoken (*dbp))
4352             {
4353               dbp++;
4354               continue;         /* advance char */
4355             }
4356
4357           /* We are at the beginning of a token. */
4358           switch (lowcase(*dbp))
4359             {
4360             case 'f':
4361               if (!packages_only && nocase_tail ("function"))
4362                 Ada_getit (inf, "/f");
4363               else
4364                 break;          /* from switch */
4365               continue;         /* advance char */
4366             case 'p':
4367               if (!packages_only && nocase_tail ("procedure"))
4368                 Ada_getit (inf, "/p");
4369               else if (nocase_tail ("package"))
4370                 Ada_getit (inf, "/s");
4371               else if (nocase_tail ("protected")) /* protected type */
4372                 Ada_getit (inf, "/t");
4373               else
4374                 break;          /* from switch */
4375               continue;         /* advance char */
4376
4377             case 'u':
4378               if (typedefs && !packages_only && nocase_tail ("use"))
4379                 {
4380                   /* when tagging types, avoid tagging  use type Pack.Typename;
4381                      for this, we will skip everything till a ; */
4382                   skip_till_semicolumn = TRUE;
4383                   continue;     /* advance char */
4384                 }
4385
4386             case 't':
4387               if (!packages_only && nocase_tail ("task"))
4388                 Ada_getit (inf, "/k");
4389               else if (typedefs && !packages_only && nocase_tail ("type"))
4390                 {
4391                   Ada_getit (inf, "/t");
4392                   while (*dbp != '\0')
4393                     dbp += 1;
4394                 }
4395               else
4396                 break;          /* from switch */
4397               continue;         /* advance char */
4398             }
4399
4400           /* Look for the end of the token. */
4401           while (!endtoken (*dbp))
4402             dbp++;
4403
4404         } /* advance char */
4405     } /* advance line */
4406 }
4407
4408 \f
4409 /*
4410  * Unix and microcontroller assembly tag handling
4411  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4412  * Idea by Bob Weiner, Motorola Inc. (1994)
4413  */
4414 static void
4415 Asm_labels (inf)
4416      FILE *inf;
4417 {
4418   register char *cp;
4419
4420   LOOP_ON_INPUT_LINES (inf, lb, cp)
4421     {
4422       /* If first char is alphabetic or one of [_.$], test for colon
4423          following identifier. */
4424       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4425         {
4426           /* Read past label. */
4427           cp++;
4428           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4429             cp++;
4430           if (*cp == ':' || iswhite (*cp))
4431             /* Found end of label, so copy it and add it to the table. */
4432             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4433                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4434         }
4435     }
4436 }
4437
4438 \f
4439 /*
4440  * Perl support
4441  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4442  * Perl variable names: /^(my|local).../
4443  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4444  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4445  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4446  */
4447 static void
4448 Perl_functions (inf)
4449      FILE *inf;
4450 {
4451   char *package = savestr ("main"); /* current package name */
4452   register char *cp;
4453
4454   LOOP_ON_INPUT_LINES (inf, lb, cp)
4455     {
4456       skip_spaces(cp);
4457
4458       if (LOOKING_AT (cp, "package"))
4459         {
4460           free (package);
4461           get_tag (cp, &package);
4462         }
4463       else if (LOOKING_AT (cp, "sub"))
4464         {
4465           char *pos;
4466           char *sp = cp;
4467
4468           while (!notinname (*cp))
4469             cp++;
4470           if (cp == sp)
4471             continue;           /* nothing found */
4472           if ((pos = etags_strchr (sp, ':')) != NULL
4473               && pos < cp && pos[1] == ':')
4474             /* The name is already qualified. */
4475             make_tag (sp, cp - sp, TRUE,
4476                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4477           else
4478             /* Qualify it. */
4479             {
4480               char savechar, *name;
4481
4482               savechar = *cp;
4483               *cp = '\0';
4484               name = concat (package, "::", sp);
4485               *cp = savechar;
4486               make_tag (name, strlen(name), TRUE,
4487                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4488               free (name);
4489             }
4490         }
4491        else if (globals)        /* only if we are tagging global vars */
4492         {
4493           /* Skip a qualifier, if any. */
4494           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4495           /* After "my" or "local", but before any following paren or space. */
4496           char *varstart = cp;
4497
4498           if (qual              /* should this be removed?  If yes, how? */
4499               && (*cp == '$' || *cp == '@' || *cp == '%'))
4500             {
4501               varstart += 1;
4502               do
4503                 cp++;
4504               while (ISALNUM (*cp) || *cp == '_');
4505             }
4506           else if (qual)
4507             {
4508               /* Should be examining a variable list at this point;
4509                  could insist on seeing an open parenthesis. */
4510               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4511                 cp++;
4512             }
4513           else
4514             continue;
4515
4516           make_tag (varstart, cp - varstart, FALSE,
4517                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4518         }
4519     }
4520 }
4521
4522
4523 /*
4524  * Python support
4525  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4526  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4527  * More ideas by seb bacon <seb@jamkit.com> (2002)
4528  */
4529 static void
4530 Python_functions (inf)
4531      FILE *inf;
4532 {
4533   register char *cp;
4534
4535   LOOP_ON_INPUT_LINES (inf, lb, cp)
4536     {
4537       cp = skip_spaces (cp);
4538       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4539         {
4540           char *name = cp;
4541           while (!notinname (*cp) && *cp != ':')
4542             cp++;
4543           make_tag (name, cp - name, TRUE,
4544                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4545         }
4546     }
4547 }
4548
4549 \f
4550 /*
4551  * PHP support
4552  * Look for:
4553  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4554  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4555  *  - /^[ \t]*define\(\"[^\"]+/
4556  * Only with --members:
4557  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4558  * Idea by Diez B. Roggisch (2001)
4559  */
4560 static void
4561 PHP_functions (inf)
4562      FILE *inf;
4563 {
4564   register char *cp, *name;
4565   bool search_identifier = FALSE;
4566
4567   LOOP_ON_INPUT_LINES (inf, lb, cp)
4568     {
4569       cp = skip_spaces (cp);
4570       name = cp;
4571       if (search_identifier
4572           && *cp != '\0')
4573         {
4574           while (!notinname (*cp))
4575             cp++;
4576           make_tag (name, cp - name, TRUE,
4577                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4578           search_identifier = FALSE;
4579         }
4580       else if (LOOKING_AT (cp, "function"))
4581         {
4582           if(*cp == '&')
4583             cp = skip_spaces (cp+1);
4584           if(*cp != '\0')
4585             {
4586               name = cp;
4587               while (!notinname (*cp))
4588                 cp++;
4589               make_tag (name, cp - name, TRUE,
4590                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4591             }
4592           else
4593             search_identifier = TRUE;
4594         }
4595       else if (LOOKING_AT (cp, "class"))
4596         {
4597           if (*cp != '\0')
4598             {
4599               name = cp;
4600               while (*cp != '\0' && !iswhite (*cp))
4601                 cp++;
4602               make_tag (name, cp - name, FALSE,
4603                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4604             }
4605           else
4606             search_identifier = TRUE;
4607         }
4608       else if (strneq (cp, "define", 6)
4609                && (cp = skip_spaces (cp+6))
4610                && *cp++ == '('
4611                && (*cp == '"' || *cp == '\''))
4612         {
4613           char quote = *cp++;
4614           name = cp;
4615           while (*cp != quote && *cp != '\0')
4616             cp++;
4617           make_tag (name, cp - name, FALSE,
4618                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4619         }
4620       else if (members
4621                && LOOKING_AT (cp, "var")
4622                && *cp == '$')
4623         {
4624           name = cp;
4625           while (!notinname(*cp))
4626             cp++;
4627           make_tag (name, cp - name, FALSE,
4628                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4629         }
4630     }
4631 }
4632
4633 \f
4634 /*
4635  * Cobol tag functions
4636  * We could look for anything that could be a paragraph name.
4637  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4638  * Idea by Corny de Souza (1993)
4639  */
4640 static void
4641 Cobol_paragraphs (inf)
4642      FILE *inf;
4643 {
4644   register char *bp, *ep;
4645
4646   LOOP_ON_INPUT_LINES (inf, lb, bp)
4647     {
4648       if (lb.len < 9)
4649         continue;
4650       bp += 8;
4651
4652       /* If eoln, compiler option or comment ignore whole line. */
4653       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4654         continue;
4655
4656       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4657         continue;
4658       if (*ep++ == '.')
4659         make_tag (bp, ep - bp, TRUE,
4660                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4661     }
4662 }
4663
4664 \f
4665 /*
4666  * Makefile support
4667  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4668  */
4669 static void
4670 Makefile_targets (inf)
4671      FILE *inf;
4672 {
4673   register char *bp;
4674
4675   LOOP_ON_INPUT_LINES (inf, lb, bp)
4676     {
4677       if (*bp == '\t' || *bp == '#')
4678         continue;
4679       while (*bp != '\0' && *bp != '=' && *bp != ':')
4680         bp++;
4681       if (*bp == ':' || (globals && *bp == '='))
4682         make_tag (lb.buffer, bp - lb.buffer, TRUE,
4683                   lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4684     }
4685 }
4686
4687 \f
4688 /*
4689  * Pascal parsing
4690  * Original code by Mosur K. Mohan (1989)
4691  *
4692  *  Locates tags for procedures & functions.  Doesn't do any type- or
4693  *  var-definitions.  It does look for the keyword "extern" or
4694  *  "forward" immediately following the procedure statement; if found,
4695  *  the tag is skipped.
4696  */
4697 static void
4698 Pascal_functions (inf)
4699      FILE *inf;
4700 {
4701   linebuffer tline;             /* mostly copied from C_entries */
4702   long save_lcno;
4703   int save_lineno, namelen, taglen;
4704   char c, *name;
4705
4706   bool                          /* each of these flags is TRUE iff: */
4707     incomment,                  /* point is inside a comment */
4708     inquote,                    /* point is inside '..' string */
4709     get_tagname,                /* point is after PROCEDURE/FUNCTION
4710                                    keyword, so next item = potential tag */
4711     found_tag,                  /* point is after a potential tag */
4712     inparms,                    /* point is within parameter-list */
4713     verify_tag;                 /* point has passed the parm-list, so the
4714                                    next token will determine whether this
4715                                    is a FORWARD/EXTERN to be ignored, or
4716                                    whether it is a real tag */
4717
4718   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4719   name = NULL;                  /* keep compiler quiet */
4720   dbp = lb.buffer;
4721   *dbp = '\0';
4722   linebuffer_init (&tline);
4723
4724   incomment = inquote = FALSE;
4725   found_tag = FALSE;            /* have a proc name; check if extern */
4726   get_tagname = FALSE;          /* found "procedure" keyword         */
4727   inparms = FALSE;              /* found '(' after "proc"            */
4728   verify_tag = FALSE;           /* check if "extern" is ahead        */
4729
4730
4731   while (!feof (inf))           /* long main loop to get next char */
4732     {
4733       c = *dbp++;
4734       if (c == '\0')            /* if end of line */
4735         {
4736           readline (&lb, inf);
4737           dbp = lb.buffer;
4738           if (*dbp == '\0')
4739             continue;
4740           if (!((found_tag && verify_tag)
4741                 || get_tagname))
4742             c = *dbp++;         /* only if don't need *dbp pointing
4743                                    to the beginning of the name of
4744                                    the procedure or function */
4745         }
4746       if (incomment)
4747         {
4748           if (c == '}')         /* within { } comments */
4749             incomment = FALSE;
4750           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4751             {
4752               dbp++;
4753               incomment = FALSE;
4754             }
4755           continue;
4756         }
4757       else if (inquote)
4758         {
4759           if (c == '\'')
4760             inquote = FALSE;
4761           continue;
4762         }
4763       else
4764         switch (c)
4765           {
4766           case '\'':
4767             inquote = TRUE;     /* found first quote */
4768             continue;
4769           case '{':             /* found open { comment */
4770             incomment = TRUE;
4771             continue;
4772           case '(':
4773             if (*dbp == '*')    /* found open (* comment */
4774               {
4775                 incomment = TRUE;
4776                 dbp++;
4777               }
4778             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4779               inparms = TRUE;
4780             continue;
4781           case ')':             /* end of parms list */
4782             if (inparms)
4783               inparms = FALSE;
4784             continue;
4785           case ';':
4786             if (found_tag && !inparms) /* end of proc or fn stmt */
4787               {
4788                 verify_tag = TRUE;
4789                 break;
4790               }
4791             continue;
4792           }
4793       if (found_tag && verify_tag && (*dbp != ' '))
4794         {
4795           /* Check if this is an "extern" declaration. */
4796           if (*dbp == '\0')
4797             continue;
4798           if (lowcase (*dbp == 'e'))
4799             {
4800               if (nocase_tail ("extern")) /* superfluous, really! */
4801                 {
4802                   found_tag = FALSE;
4803                   verify_tag = FALSE;
4804                 }
4805             }
4806           else if (lowcase (*dbp) == 'f')
4807             {
4808               if (nocase_tail ("forward")) /* check for forward reference */
4809                 {
4810                   found_tag = FALSE;
4811                   verify_tag = FALSE;
4812                 }
4813             }
4814           if (found_tag && verify_tag) /* not external proc, so make tag */
4815             {
4816               found_tag = FALSE;
4817               verify_tag = FALSE;
4818               make_tag (name, namelen, TRUE,
4819                         tline.buffer, taglen, save_lineno, save_lcno);
4820               continue;
4821             }
4822         }
4823       if (get_tagname)          /* grab name of proc or fn */
4824         {
4825           char *cp;
4826
4827           if (*dbp == '\0')
4828             continue;
4829
4830           /* Find block name. */
4831           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4832             continue;
4833
4834           /* Save all values for later tagging. */
4835           linebuffer_setlen (&tline, lb.len);
4836           strcpy (tline.buffer, lb.buffer);
4837           save_lineno = lineno;
4838           save_lcno = linecharno;
4839           name = tline.buffer + (dbp - lb.buffer);
4840           namelen = cp - dbp;
4841           taglen = cp - lb.buffer + 1;
4842
4843           dbp = cp;             /* set dbp to e-o-token */
4844           get_tagname = FALSE;
4845           found_tag = TRUE;
4846           continue;
4847
4848           /* And proceed to check for "extern". */
4849         }
4850       else if (!incomment && !inquote && !found_tag)
4851         {
4852           /* Check for proc/fn keywords. */
4853           switch (lowcase (c))
4854             {
4855             case 'p':
4856               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4857                 get_tagname = TRUE;
4858               continue;
4859             case 'f':
4860               if (nocase_tail ("unction"))
4861                 get_tagname = TRUE;
4862               continue;
4863             }
4864         }
4865     } /* while not eof */
4866
4867   free (tline.buffer);
4868 }
4869
4870 \f
4871 /*
4872  * Lisp tag functions
4873  *  look for (def or (DEF, quote or QUOTE
4874  */
4875
4876 static void L_getit __P((void));
4877
4878 static void
4879 L_getit ()
4880 {
4881   if (*dbp == '\'')             /* Skip prefix quote */
4882     dbp++;
4883   else if (*dbp == '(')
4884   {
4885     dbp++;
4886     /* Try to skip "(quote " */
4887     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4888       /* Ok, then skip "(" before name in (defstruct (foo)) */
4889       dbp = skip_spaces (dbp);
4890   }
4891   get_tag (dbp, NULL);
4892 }
4893
4894 static void
4895 Lisp_functions (inf)
4896      FILE *inf;
4897 {
4898   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4899     {
4900       if (dbp[0] != '(')
4901         continue;
4902
4903       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4904         {
4905           dbp = skip_non_spaces (dbp);
4906           dbp = skip_spaces (dbp);
4907           L_getit ();
4908         }
4909       else
4910         {
4911           /* Check for (foo::defmumble name-defined ... */
4912           do
4913             dbp++;
4914           while (!notinname (*dbp) && *dbp != ':');
4915           if (*dbp == ':')
4916             {
4917               do
4918                 dbp++;
4919               while (*dbp == ':');
4920
4921               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4922                 {
4923                   dbp = skip_non_spaces (dbp);
4924                   dbp = skip_spaces (dbp);
4925                   L_getit ();
4926                 }
4927             }
4928         }
4929     }
4930 }
4931
4932 \f
4933 /*
4934  * Lua script language parsing
4935  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4936  *
4937  *  "function" and "local function" are tags if they start at column 1.
4938  */
4939 static void
4940 Lua_functions (inf)
4941      FILE *inf;
4942 {
4943   register char *bp;
4944
4945   LOOP_ON_INPUT_LINES (inf, lb, bp)
4946     {
4947       if (bp[0] != 'f' && bp[0] != 'l')
4948         continue;
4949
4950       LOOKING_AT (bp, "local"); /* skip possible "local" */
4951
4952       if (LOOKING_AT (bp, "function"))
4953         get_tag (bp, NULL);
4954     }
4955 }
4956
4957 \f
4958 /*
4959  * Postscript tag functions
4960  * Just look for lines where the first character is '/'
4961  * Also look at "defineps" for PSWrap
4962  * Ideas by:
4963  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4964  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4965  */
4966 static void
4967 PS_functions (inf)
4968      FILE *inf;
4969 {
4970   register char *bp, *ep;
4971
4972   LOOP_ON_INPUT_LINES (inf, lb, bp)
4973     {
4974       if (bp[0] == '/')
4975         {
4976           for (ep = bp+1;
4977                *ep != '\0' && *ep != ' ' && *ep != '{';
4978                ep++)
4979             continue;
4980           make_tag (bp, ep - bp, TRUE,
4981                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4982         }
4983       else if (LOOKING_AT (bp, "defineps"))
4984         get_tag (bp, NULL);
4985     }
4986 }
4987
4988 \f
4989 /*
4990  * Scheme tag functions
4991  * look for (def... xyzzy
4992  *          (def... (xyzzy
4993  *          (def ... ((...(xyzzy ....
4994  *          (set! xyzzy
4995  * Original code by Ken Haase (1985?)
4996  */
4997
4998 static void
4999 Scheme_functions (inf)
5000      FILE *inf;
5001 {
5002   register char *bp;
5003
5004   LOOP_ON_INPUT_LINES (inf, lb, bp)
5005     {
5006       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5007         {
5008           bp = skip_non_spaces (bp+4);
5009           /* Skip over open parens and white space */
5010           while (notinname (*bp))
5011             bp++;
5012           get_tag (bp, NULL);
5013         }
5014       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5015         get_tag (bp, NULL);
5016     }
5017 }
5018
5019 \f
5020 /* Find tags in TeX and LaTeX input files.  */
5021
5022 /* TEX_toktab is a table of TeX control sequences that define tags.
5023  * Each entry records one such control sequence.
5024  *
5025  * Original code from who knows whom.
5026  * Ideas by:
5027  *   Stefan Monnier (2002)
5028  */
5029
5030 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5031
5032 /* Default set of control sequences to put into TEX_toktab.
5033    The value of environment var TEXTAGS is prepended to this.  */
5034 static char *TEX_defenv = "\
5035 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5036 :part:appendix:entry:index:def\
5037 :newcommand:renewcommand:newenvironment:renewenvironment";
5038
5039 static void TEX_mode __P((FILE *));
5040 static void TEX_decode_env __P((char *, char *));
5041
5042 static char TEX_esc = '\\';
5043 static char TEX_opgrp = '{';
5044 static char TEX_clgrp = '}';
5045
5046 /*
5047  * TeX/LaTeX scanning loop.
5048  */
5049 static void
5050 TeX_commands (inf)
5051      FILE *inf;
5052 {
5053   char *cp;
5054   linebuffer *key;
5055
5056   /* Select either \ or ! as escape character.  */
5057   TEX_mode (inf);
5058
5059   /* Initialize token table once from environment. */
5060   if (TEX_toktab == NULL)
5061     TEX_decode_env ("TEXTAGS", TEX_defenv);
5062
5063   LOOP_ON_INPUT_LINES (inf, lb, cp)
5064     {
5065       /* Look at each TEX keyword in line. */
5066       for (;;)
5067         {
5068           /* Look for a TEX escape. */
5069           while (*cp++ != TEX_esc)
5070             if (cp[-1] == '\0' || cp[-1] == '%')
5071               goto tex_next_line;
5072
5073           for (key = TEX_toktab; key->buffer != NULL; key++)
5074             if (strneq (cp, key->buffer, key->len))
5075               {
5076                 register char *p;
5077                 int namelen, linelen;
5078                 bool opgrp = FALSE;
5079
5080                 cp = skip_spaces (cp + key->len);
5081                 if (*cp == TEX_opgrp)
5082                   {
5083                     opgrp = TRUE;
5084                     cp++;
5085                   }
5086                 for (p = cp;
5087                      (!iswhite (*p) && *p != '#' &&
5088                       *p != TEX_opgrp && *p != TEX_clgrp);
5089                      p++)
5090                   continue;
5091                 namelen = p - cp;
5092                 linelen = lb.len;
5093                 if (!opgrp || *p == TEX_clgrp)
5094                   {
5095                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5096                       *p++;
5097                     linelen = p - lb.buffer + 1;
5098                   }
5099                 make_tag (cp, namelen, TRUE,
5100                           lb.buffer, linelen, lineno, linecharno);
5101                 goto tex_next_line; /* We only tag a line once */
5102               }
5103         }
5104     tex_next_line:
5105       ;
5106     }
5107 }
5108
5109 #define TEX_LESC '\\'
5110 #define TEX_SESC '!'
5111
5112 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5113    chars accordingly. */
5114 static void
5115 TEX_mode (inf)
5116      FILE *inf;
5117 {
5118   int c;
5119
5120   while ((c = getc (inf)) != EOF)
5121     {
5122       /* Skip to next line if we hit the TeX comment char. */
5123       if (c == '%')
5124         while (c != '\n')
5125           c = getc (inf);
5126       else if (c == TEX_LESC || c == TEX_SESC )
5127         break;
5128     }
5129
5130   if (c == TEX_LESC)
5131     {
5132       TEX_esc = TEX_LESC;
5133       TEX_opgrp = '{';
5134       TEX_clgrp = '}';
5135     }
5136   else
5137     {
5138       TEX_esc = TEX_SESC;
5139       TEX_opgrp = '<';
5140       TEX_clgrp = '>';
5141     }
5142   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5143      No attempt is made to correct the situation. */
5144   rewind (inf);
5145 }
5146
5147 /* Read environment and prepend it to the default string.
5148    Build token table. */
5149 static void
5150 TEX_decode_env (evarname, defenv)
5151      char *evarname;
5152      char *defenv;
5153 {
5154   register char *env, *p;
5155   int i, len;
5156
5157   /* Append default string to environment. */
5158   env = getenv (evarname);
5159   if (!env)
5160     env = defenv;
5161   else
5162     {
5163       char *oldenv = env;
5164       env = concat (oldenv, defenv, "");
5165     }
5166
5167   /* Allocate a token table */
5168   for (len = 1, p = env; p;)
5169     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5170       len++;
5171   TEX_toktab = xnew (len, linebuffer);
5172
5173   /* Unpack environment string into token table. Be careful about */
5174   /* zero-length strings (leading ':', "::" and trailing ':') */
5175   for (i = 0; *env != '\0';)
5176     {
5177       p = etags_strchr (env, ':');
5178       if (!p)                   /* End of environment string. */
5179         p = env + strlen (env);
5180       if (p - env > 0)
5181         {                       /* Only non-zero strings. */
5182           TEX_toktab[i].buffer = savenstr (env, p - env);
5183           TEX_toktab[i].len = p - env;
5184           i++;
5185         }
5186       if (*p)
5187         env = p + 1;
5188       else
5189         {
5190           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5191           TEX_toktab[i].len = 0;
5192           break;
5193         }
5194     }
5195 }
5196
5197 \f
5198 /* Texinfo support.  Dave Love, Mar. 2000.  */
5199 static void
5200 Texinfo_nodes (inf)
5201      FILE * inf;
5202 {
5203   char *cp, *start;
5204   LOOP_ON_INPUT_LINES (inf, lb, cp)
5205     if (LOOKING_AT (cp, "@node"))
5206       {
5207         start = cp;
5208         while (*cp != '\0' && *cp != ',')
5209           cp++;
5210         make_tag (start, cp - start, TRUE,
5211                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5212       }
5213 }
5214
5215 \f
5216 /* Similar to LOOKING_AT but does not use notinname, does not skip */
5217 #define LOOKING_AT_NOCASE(cp, kw)       /* kw is a constant string */   \
5218   (strncaseeq ((cp), kw, sizeof(kw)-1)  /* cp points at kw */           \
5219    && ((cp) += sizeof(kw)-1))           /* skip spaces */
5220
5221 /*
5222  * HTML support.
5223  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5224  * Contents of <a name=xxx> are tags with name xxx.
5225  *
5226  * Francesco Potortì, 2002.
5227  */
5228 static void
5229 HTML_labels (inf)
5230      FILE * inf;
5231 {
5232   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5233   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5234   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5235   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5236   char *end;
5237
5238
5239   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5240
5241   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5242     for (;;)                    /* loop on the same line */
5243       {
5244         if (skiptag)            /* skip HTML tag */
5245           {
5246             while (*dbp != '\0' && *dbp != '>')
5247               dbp++;
5248             if (*dbp == '>')
5249               {
5250                 dbp += 1;
5251                 skiptag = FALSE;
5252                 continue;       /* look on the same line */
5253               }
5254             break;              /* go to next line */
5255           }
5256
5257         else if (intag) /* look for "name=" or "id=" */
5258           {
5259             while (*dbp != '\0' && *dbp != '>'
5260                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5261               dbp++;
5262             if (*dbp == '\0')
5263               break;            /* go to next line */
5264             if (*dbp == '>')
5265               {
5266                 dbp += 1;
5267                 intag = FALSE;
5268                 continue;       /* look on the same line */
5269               }
5270             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5271                 || LOOKING_AT_NOCASE (dbp, "id="))
5272               {
5273                 bool quoted = (dbp[0] == '"');
5274
5275                 if (quoted)
5276                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5277                     continue;
5278                 else
5279                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5280                     continue;
5281                 linebuffer_setlen (&token_name, end - dbp);
5282                 strncpy (token_name.buffer, dbp, end - dbp);
5283                 token_name.buffer[end - dbp] = '\0';
5284
5285                 dbp = end;
5286                 intag = FALSE;  /* we found what we looked for */
5287                 skiptag = TRUE; /* skip to the end of the tag */
5288                 getnext = TRUE; /* then grab the text */
5289                 continue;       /* look on the same line */
5290               }
5291             dbp += 1;
5292           }
5293
5294         else if (getnext)       /* grab next tokens and tag them */
5295           {
5296             dbp = skip_spaces (dbp);
5297             if (*dbp == '\0')
5298               break;            /* go to next line */
5299             if (*dbp == '<')
5300               {
5301                 intag = TRUE;
5302                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5303                 continue;       /* look on the same line */
5304               }
5305
5306             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5307               continue;
5308             make_tag (token_name.buffer, token_name.len, TRUE,
5309                       dbp, end - dbp, lineno, linecharno);
5310             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5311             getnext = FALSE;
5312             break;              /* go to next line */
5313           }
5314
5315         else                    /* look for an interesting HTML tag */
5316           {
5317             while (*dbp != '\0' && *dbp != '<')
5318               dbp++;
5319             if (*dbp == '\0')
5320               break;            /* go to next line */
5321             intag = TRUE;
5322             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5323               {
5324                 inanchor = TRUE;
5325                 continue;       /* look on the same line */
5326               }
5327             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5328                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5329                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5330                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5331               {
5332                 intag = FALSE;
5333                 getnext = TRUE;
5334                 continue;       /* look on the same line */
5335               }
5336             dbp += 1;
5337           }
5338       }
5339 }
5340
5341 \f
5342 /*
5343  * Prolog support
5344  *
5345  * Assumes that the predicate or rule starts at column 0.
5346  * Only the first clause of a predicate or rule is added.
5347  * Original code by Sunichirou Sugou (1989)
5348  * Rewritten by Anders Lindgren (1996)
5349  */
5350 static int prolog_pr __P((char *, char *));
5351 static void prolog_skip_comment __P((linebuffer *, FILE *));
5352 static int prolog_atom __P((char *, int));
5353
5354 static void
5355 Prolog_functions (inf)
5356      FILE *inf;
5357 {
5358   char *cp, *last;
5359   int len;
5360   int allocated;
5361
5362   allocated = 0;
5363   len = 0;
5364   last = NULL;
5365
5366   LOOP_ON_INPUT_LINES (inf, lb, cp)
5367     {
5368       if (cp[0] == '\0')        /* Empty line */
5369         continue;
5370       else if (iswhite (cp[0])) /* Not a predicate */
5371         continue;
5372       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5373         prolog_skip_comment (&lb, inf);
5374       else if ((len = prolog_pr (cp, last)) > 0)
5375         {
5376           /* Predicate or rule.  Store the function name so that we
5377              only generate a tag for the first clause.  */
5378           if (last == NULL)
5379             last = xnew(len + 1, char);
5380           else if (len + 1 > allocated)
5381             xrnew (last, len + 1, char);
5382           allocated = len + 1;
5383           strncpy (last, cp, len);
5384           last[len] = '\0';
5385         }
5386     }
5387 }
5388
5389
5390 static void
5391 prolog_skip_comment (plb, inf)
5392      linebuffer *plb;
5393      FILE *inf;
5394 {
5395   char *cp;
5396
5397   do
5398     {
5399       for (cp = plb->buffer; *cp != '\0'; cp++)
5400         if (cp[0] == '*' && cp[1] == '/')
5401           return;
5402       readline (plb, inf);
5403     }
5404   while (!feof(inf));
5405 }
5406
5407 /*
5408  * A predicate or rule definition is added if it matches:
5409  *     <beginning of line><Prolog Atom><whitespace>(
5410  * or  <beginning of line><Prolog Atom><whitespace>:-
5411  *
5412  * It is added to the tags database if it doesn't match the
5413  * name of the previous clause header.
5414  *
5415  * Return the size of the name of the predicate or rule, or 0 if no
5416  * header was found.
5417  */
5418 static int
5419 prolog_pr (s, last)
5420      char *s;
5421      char *last;                /* Name of last clause. */
5422 {
5423   int pos;
5424   int len;
5425
5426   pos = prolog_atom (s, 0);
5427   if (pos < 1)
5428     return 0;
5429
5430   len = pos;
5431   pos = skip_spaces (s + pos) - s;
5432
5433   if ((s[pos] == '.'
5434        || (s[pos] == '(' && (pos += 1))
5435        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5436       && (last == NULL          /* save only the first clause */
5437           || len != strlen (last)
5438           || !strneq (s, last, len)))
5439         {
5440           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5441           return len;
5442         }
5443   else
5444     return 0;
5445 }
5446
5447 /*
5448  * Consume a Prolog atom.
5449  * Return the number of bytes consumed, or -1 if there was an error.
5450  *
5451  * A prolog atom, in this context, could be one of:
5452  * - An alphanumeric sequence, starting with a lower case letter.
5453  * - A quoted arbitrary string. Single quotes can escape themselves.
5454  *   Backslash quotes everything.
5455  */
5456 static int
5457 prolog_atom (s, pos)
5458      char *s;
5459      int pos;
5460 {
5461   int origpos;
5462
5463   origpos = pos;
5464
5465   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5466     {
5467       /* The atom is unquoted. */
5468       pos++;
5469       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5470         {
5471           pos++;
5472         }
5473       return pos - origpos;
5474     }
5475   else if (s[pos] == '\'')
5476     {
5477       pos++;
5478
5479       for (;;)
5480         {
5481           if (s[pos] == '\'')
5482             {
5483               pos++;
5484               if (s[pos] != '\'')
5485                 break;
5486               pos++;            /* A double quote */
5487             }
5488           else if (s[pos] == '\0')
5489             /* Multiline quoted atoms are ignored. */
5490             return -1;
5491           else if (s[pos] == '\\')
5492             {
5493               if (s[pos+1] == '\0')
5494                 return -1;
5495               pos += 2;
5496             }
5497           else
5498             pos++;
5499         }
5500       return pos - origpos;
5501     }
5502   else
5503     return -1;
5504 }
5505
5506 \f
5507 /*
5508  * Support for Erlang
5509  *
5510  * Generates tags for functions, defines, and records.
5511  * Assumes that Erlang functions start at column 0.
5512  * Original code by Anders Lindgren (1996)
5513  */
5514 static int erlang_func __P((char *, char *));
5515 static void erlang_attribute __P((char *));
5516 static int erlang_atom __P((char *));
5517
5518 static void
5519 Erlang_functions (inf)
5520      FILE *inf;
5521 {
5522   char *cp, *last;
5523   int len;
5524   int allocated;
5525
5526   allocated = 0;
5527   len = 0;
5528   last = NULL;
5529
5530   LOOP_ON_INPUT_LINES (inf, lb, cp)
5531     {
5532       if (cp[0] == '\0')        /* Empty line */
5533         continue;
5534       else if (iswhite (cp[0])) /* Not function nor attribute */
5535         continue;
5536       else if (cp[0] == '%')    /* comment */
5537         continue;
5538       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5539         continue;
5540       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5541         {
5542           erlang_attribute (cp);
5543           last = NULL;
5544         }
5545       else if ((len = erlang_func (cp, last)) > 0)
5546         {
5547           /*
5548            * Function.  Store the function name so that we only
5549            * generates a tag for the first clause.
5550            */
5551           if (last == NULL)
5552             last = xnew (len + 1, char);
5553           else if (len + 1 > allocated)
5554             xrnew (last, len + 1, char);
5555           allocated = len + 1;
5556           strncpy (last, cp, len);
5557           last[len] = '\0';
5558         }
5559     }
5560 }
5561
5562
5563 /*
5564  * A function definition is added if it matches:
5565  *     <beginning of line><Erlang Atom><whitespace>(
5566  *
5567  * It is added to the tags database if it doesn't match the
5568  * name of the previous clause header.
5569  *
5570  * Return the size of the name of the function, or 0 if no function
5571  * was found.
5572  */
5573 static int
5574 erlang_func (s, last)
5575      char *s;
5576      char *last;                /* Name of last clause. */
5577 {
5578   int pos;
5579   int len;
5580
5581   pos = erlang_atom (s);
5582   if (pos < 1)
5583     return 0;
5584
5585   len = pos;
5586   pos = skip_spaces (s + pos) - s;
5587
5588   /* Save only the first clause. */
5589   if (s[pos++] == '('
5590       && (last == NULL
5591           || len != (int)strlen (last)
5592           || !strneq (s, last, len)))
5593         {
5594           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5595           return len;
5596         }
5597
5598   return 0;
5599 }
5600
5601
5602 /*
5603  * Handle attributes.  Currently, tags are generated for defines
5604  * and records.
5605  *
5606  * They are on the form:
5607  * -define(foo, bar).
5608  * -define(Foo(M, N), M+N).
5609  * -record(graph, {vtab = notable, cyclic = true}).
5610  */
5611 static void
5612 erlang_attribute (s)
5613      char *s;
5614 {
5615   char *cp = s;
5616
5617   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5618       && *cp++ == '(')
5619     {
5620       int len = erlang_atom (skip_spaces (cp));
5621       if (len > 0)
5622         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5623     }
5624   return;
5625 }
5626
5627
5628 /*
5629  * Consume an Erlang atom (or variable).
5630  * Return the number of bytes consumed, or -1 if there was an error.
5631  */
5632 static int
5633 erlang_atom (s)
5634      char *s;
5635 {
5636   int pos = 0;
5637
5638   if (ISALPHA (s[pos]) || s[pos] == '_')
5639     {
5640       /* The atom is unquoted. */
5641       do
5642         pos++;
5643       while (ISALNUM (s[pos]) || s[pos] == '_');
5644     }
5645   else if (s[pos] == '\'')
5646     {
5647       for (pos++; s[pos] != '\''; pos++)
5648         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5649             || (s[pos] == '\\' && s[++pos] == '\0'))
5650           return 0;
5651       pos++;
5652     }
5653
5654   return pos;
5655 }
5656
5657 \f
5658 #ifdef ETAGS_REGEXPS
5659
5660 static char *scan_separators __P((char *));
5661 static void add_regex __P((char *, language *));
5662 static char *substitute __P((char *, char *, struct re_registers *));
5663
5664 /*
5665  * Take a string like "/blah/" and turn it into "blah", verifying
5666  * that the first and last characters are the same, and handling
5667  * quoted separator characters.  Actually, stops on the occurrence of
5668  * an unquoted separator.  Also process \t, \n, etc. and turn into
5669  * appropriate characters. Works in place.  Null terminates name string.
5670  * Returns pointer to terminating separator, or NULL for
5671  * unterminated regexps.
5672  */
5673 static char *
5674 scan_separators (name)
5675      char *name;
5676 {
5677   char sep = name[0];
5678   char *copyto = name;
5679   bool quoted = FALSE;
5680
5681   for (++name; *name != '\0'; ++name)
5682     {
5683       if (quoted)
5684         {
5685           switch (*name)
5686             {
5687             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5688             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5689             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5690             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5691             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5692             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5693             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5694             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5695             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5696             default:
5697               if (*name == sep)
5698                 *copyto++ = sep;
5699               else
5700                 {
5701                   /* Something else is quoted, so preserve the quote. */
5702                   *copyto++ = '\\';
5703                   *copyto++ = *name;
5704                 }
5705               break;
5706             }
5707           quoted = FALSE;
5708         }
5709       else if (*name == '\\')
5710         quoted = TRUE;
5711       else if (*name == sep)
5712         break;
5713       else
5714         *copyto++ = *name;
5715     }
5716   if (*name != sep)
5717     name = NULL;                /* signal unterminated regexp */
5718
5719   /* Terminate copied string. */
5720   *copyto = '\0';
5721   return name;
5722 }
5723
5724 /* Look at the argument of --regex or --no-regex and do the right
5725    thing.  Same for each line of a regexp file. */
5726 static void
5727 analyse_regex (regex_arg)
5728      char *regex_arg;
5729 {
5730   if (regex_arg == NULL)
5731     {
5732       free_regexps ();          /* --no-regex: remove existing regexps */
5733       return;
5734     }
5735
5736   /* A real --regexp option or a line in a regexp file. */
5737   switch (regex_arg[0])
5738     {
5739       /* Comments in regexp file or null arg to --regex. */
5740     case '\0':
5741     case ' ':
5742     case '\t':
5743       break;
5744
5745       /* Read a regex file.  This is recursive and may result in a
5746          loop, which will stop when the file descriptors are exhausted. */
5747     case '@':
5748       {
5749         FILE *regexfp;
5750         linebuffer regexbuf;
5751         char *regexfile = regex_arg + 1;
5752
5753         /* regexfile is a file containing regexps, one per line. */
5754         regexfp = fopen (regexfile, "r");
5755         if (regexfp == NULL)
5756           {
5757             pfatal (regexfile);
5758             return;
5759           }
5760         linebuffer_init (&regexbuf);
5761         while (readline_internal (&regexbuf, regexfp) > 0)
5762           analyse_regex (regexbuf.buffer);
5763         free (regexbuf.buffer);
5764         fclose (regexfp);
5765       }
5766       break;
5767
5768       /* Regexp to be used for a specific language only. */
5769     case '{':
5770       {
5771         language *lang;
5772         char *lang_name = regex_arg + 1;
5773         char *cp;
5774
5775         for (cp = lang_name; *cp != '}'; cp++)
5776           if (*cp == '\0')
5777             {
5778               error ("unterminated language name in regex: %s", regex_arg);
5779               return;
5780             }
5781         *cp++ = '\0';
5782         lang = get_language_from_langname (lang_name);
5783         if (lang == NULL)
5784           return;
5785         add_regex (cp, lang);
5786       }
5787       break;
5788
5789       /* Regexp to be used for any language. */
5790     default:
5791       add_regex (regex_arg, NULL);
5792       break;
5793     }
5794 }
5795
5796 /* Separate the regexp pattern, compile it,
5797    and care for optional name and modifiers. */
5798 static void
5799 add_regex (regexp_pattern, lang)
5800      char *regexp_pattern;
5801      language *lang;
5802 {
5803   static struct re_pattern_buffer zeropattern;
5804   char sep, *pat, *name, *modifiers;
5805   const char *err;
5806   struct re_pattern_buffer *patbuf;
5807   regexp *rp;
5808   bool
5809     force_explicit_name = TRUE, /* do not use implicit tag names */
5810     ignore_case = FALSE,        /* case is significant */
5811     multi_line = FALSE,         /* matches are done one line at a time */
5812     single_line = FALSE;        /* dot does not match newline */
5813
5814
5815   if (strlen(regexp_pattern) < 3)
5816     {
5817       error ("null regexp", (char *)NULL);
5818       return;
5819     }
5820   sep = regexp_pattern[0];
5821   name = scan_separators (regexp_pattern);
5822   if (name == NULL)
5823     {
5824       error ("%s: unterminated regexp", regexp_pattern);
5825       return;
5826     }
5827   if (name[1] == sep)
5828     {
5829       error ("null name for regexp \"%s\"", regexp_pattern);
5830       return;
5831     }
5832   modifiers = scan_separators (name);
5833   if (modifiers == NULL)        /* no terminating separator --> no name */
5834     {
5835       modifiers = name;
5836       name = "";
5837     }
5838   else
5839     modifiers += 1;             /* skip separator */
5840
5841   /* Parse regex modifiers. */
5842   for (; modifiers[0] != '\0'; modifiers++)
5843     switch (modifiers[0])
5844       {
5845       case 'N':
5846         if (modifiers == name)
5847           error ("forcing explicit tag name but no name, ignoring", NULL);
5848         force_explicit_name = TRUE;
5849         break;
5850       case 'i':
5851         ignore_case = TRUE;
5852         break;
5853       case 's':
5854         single_line = TRUE;
5855         /* FALLTHRU */
5856       case 'm':
5857         multi_line = TRUE;
5858         need_filebuf = TRUE;
5859         break;
5860       default:
5861         {
5862           char wrongmod [2];
5863           wrongmod[0] = modifiers[0];
5864           wrongmod[1] = '\0';
5865           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5866         }
5867         break;
5868       }
5869
5870   patbuf = xnew (1, struct re_pattern_buffer);
5871   *patbuf = zeropattern;
5872   if (ignore_case)
5873     {
5874       static char lc_trans[CHARS];
5875       int i;
5876       for (i = 0; i < CHARS; i++)
5877         lc_trans[i] = lowcase (i);
5878       patbuf->translate = lc_trans;     /* translation table to fold case  */
5879     }
5880
5881   if (multi_line)
5882     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5883   else
5884     pat = regexp_pattern;
5885
5886   if (single_line)
5887     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5888   else
5889     re_set_syntax (RE_SYNTAX_EMACS);
5890
5891   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5892   if (multi_line)
5893     free (pat);
5894   if (err != NULL)
5895     {
5896       error ("%s while compiling pattern", err);
5897       return;
5898     }
5899
5900   rp = p_head;
5901   p_head = xnew (1, regexp);
5902   p_head->pattern = savestr (regexp_pattern);
5903   p_head->p_next = rp;
5904   p_head->lang = lang;
5905   p_head->pat = patbuf;
5906   p_head->name = savestr (name);
5907   p_head->error_signaled = FALSE;
5908   p_head->force_explicit_name = force_explicit_name;
5909   p_head->ignore_case = ignore_case;
5910   p_head->multi_line = multi_line;
5911 }
5912
5913 /*
5914  * Do the substitutions indicated by the regular expression and
5915  * arguments.
5916  */
5917 static char *
5918 substitute (in, out, regs)
5919      char *in, *out;
5920      struct re_registers *regs;
5921 {
5922   char *result, *t;
5923   int size, dig, diglen;
5924
5925   result = NULL;
5926   size = strlen (out);
5927
5928   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5929   if (out[size - 1] == '\\')
5930     fatal ("pattern error in \"%s\"", out);
5931   for (t = etags_strchr (out, '\\');
5932        t != NULL;
5933        t = etags_strchr (t + 2, '\\'))
5934     if (ISDIGIT (t[1]))
5935       {
5936         dig = t[1] - '0';
5937         diglen = regs->end[dig] - regs->start[dig];
5938         size += diglen - 2;
5939       }
5940     else
5941       size -= 1;
5942
5943   /* Allocate space and do the substitutions. */
5944   assert (size >= 0);
5945   result = xnew (size + 1, char);
5946
5947   for (t = result; *out != '\0'; out++)
5948     if (*out == '\\' && ISDIGIT (*++out))
5949       {
5950         dig = *out - '0';
5951         diglen = regs->end[dig] - regs->start[dig];
5952         strncpy (t, in + regs->start[dig], diglen);
5953         t += diglen;
5954       }
5955     else
5956       *t++ = *out;
5957   *t = '\0';
5958
5959   assert (t <= result + size);
5960   assert (t - result == (int)strlen (result));
5961
5962   return result;
5963 }
5964
5965 /* Deallocate all regexps. */
5966 static void
5967 free_regexps ()
5968 {
5969   regexp *rp;
5970   while (p_head != NULL)
5971     {
5972       rp = p_head->p_next;
5973       free (p_head->pattern);
5974       free (p_head->name);
5975       free (p_head);
5976       p_head = rp;
5977     }
5978   return;
5979 }
5980
5981 /*
5982  * Reads the whole file as a single string from `filebuf' and looks for
5983  * multi-line regular expressions, creating tags on matches.
5984  * readline already dealt with normal regexps.
5985  *
5986  * Idea by Ben Wing <ben@666.com> (2002).
5987  */
5988 static void
5989 regex_tag_multiline ()
5990 {
5991   char *buffer = filebuf.buffer;
5992   regexp *rp;
5993   char *name;
5994
5995   for (rp = p_head; rp != NULL; rp = rp->p_next)
5996     {
5997       int match = 0;
5998
5999       if (!rp->multi_line)
6000         continue;               /* skip normal regexps */
6001
6002       /* Generic initialisations before parsing file from memory. */
6003       lineno = 1;               /* reset global line number */
6004       charno = 0;               /* reset global char number */
6005       linecharno = 0;           /* reset global char number of line start */
6006
6007       /* Only use generic regexps or those for the current language. */
6008       if (rp->lang != NULL && rp->lang != curfdp->lang)
6009         continue;
6010
6011       while (match >= 0 && match < filebuf.len)
6012         {
6013           match = re_search (rp->pat, buffer, filebuf.len, charno,
6014                              filebuf.len - match, &rp->regs);
6015           switch (match)
6016             {
6017             case -2:
6018               /* Some error. */
6019               if (!rp->error_signaled)
6020                 {
6021                   error ("regexp stack overflow while matching \"%s\"",
6022                          rp->pattern);
6023                   rp->error_signaled = TRUE;
6024                 }
6025               break;
6026             case -1:
6027               /* No match. */
6028               break;
6029             default:
6030               if (match == rp->regs.end[0])
6031                 {
6032                   if (!rp->error_signaled)
6033                     {
6034                       error ("regexp matches the empty string: \"%s\"",
6035                              rp->pattern);
6036                       rp->error_signaled = TRUE;
6037                     }
6038                   match = -3;   /* exit from while loop */
6039                   break;
6040                 }
6041
6042               /* Match occurred.  Construct a tag. */
6043               while (charno < rp->regs.end[0])
6044                 if (buffer[charno++] == '\n')
6045                   lineno++, linecharno = charno;
6046               name = rp->name;
6047               if (name[0] == '\0')
6048                 name = NULL;
6049               else /* make a named tag */
6050                 name = substitute (buffer, rp->name, &rp->regs);
6051               if (rp->force_explicit_name)
6052                 /* Force explicit tag name, if a name is there. */
6053                 pfnote (name, TRUE, buffer + linecharno,
6054                         charno - linecharno + 1, lineno, linecharno);
6055               else
6056                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6057                           charno - linecharno + 1, lineno, linecharno);
6058               break;
6059             }
6060         }
6061     }
6062 }
6063
6064 #endif /* ETAGS_REGEXPS */
6065
6066 \f
6067 static bool
6068 nocase_tail (cp)
6069      char *cp;
6070 {
6071   register int len = 0;
6072
6073   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6074     cp++, len++;
6075   if (*cp == '\0' && !intoken (dbp[len]))
6076     {
6077       dbp += len;
6078       return TRUE;
6079     }
6080   return FALSE;
6081 }
6082
6083 static void
6084 get_tag (bp, namepp)
6085      register char *bp;
6086      char **namepp;
6087 {
6088   register char *cp = bp;
6089
6090   if (*bp != '\0')
6091     {
6092       /* Go till you get to white space or a syntactic break */
6093       for (cp = bp + 1; !notinname (*cp); cp++)
6094         continue;
6095       make_tag (bp, cp - bp, TRUE,
6096                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6097     }
6098
6099   if (namepp != NULL)
6100     *namepp = savenstr (bp, cp - bp);
6101 }
6102
6103 /*
6104  * Read a line of text from `stream' into `lbp', excluding the
6105  * newline or CR-NL, if any.  Return the number of characters read from
6106  * `stream', which is the length of the line including the newline.
6107  *
6108  * On DOS or Windows we do not count the CR character, if any before the
6109  * NL, in the returned length; this mirrors the behavior of Emacs on those
6110  * platforms (for text files, it translates CR-NL to NL as it reads in the
6111  * file).
6112  *
6113  * If multi-line regular expressions are requested, each line read is
6114  * appended to `filebuf'.
6115  */
6116 static long
6117 readline_internal (lbp, stream)
6118      linebuffer *lbp;
6119      register FILE *stream;
6120 {
6121   char *buffer = lbp->buffer;
6122   register char *p = lbp->buffer;
6123   register char *pend;
6124   int chars_deleted;
6125
6126   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6127
6128   for (;;)
6129     {
6130       register int c = getc (stream);
6131       if (p == pend)
6132         {
6133           /* We're at the end of linebuffer: expand it. */
6134           lbp->size *= 2;
6135           xrnew (buffer, lbp->size, char);
6136           p += buffer - lbp->buffer;
6137           pend = buffer + lbp->size;
6138           lbp->buffer = buffer;
6139         }
6140       if (c == EOF)
6141         {
6142           *p = '\0';
6143           chars_deleted = 0;
6144           break;
6145         }
6146       if (c == '\n')
6147         {
6148           if (p > buffer && p[-1] == '\r')
6149             {
6150               p -= 1;
6151 #ifdef DOS_NT
6152              /* Assume CRLF->LF translation will be performed by Emacs
6153                 when loading this file, so CRs won't appear in the buffer.
6154                 It would be cleaner to compensate within Emacs;
6155                 however, Emacs does not know how many CRs were deleted
6156                 before any given point in the file.  */
6157               chars_deleted = 1;
6158 #else
6159               chars_deleted = 2;
6160 #endif
6161             }
6162           else
6163             {
6164               chars_deleted = 1;
6165             }
6166           *p = '\0';
6167           break;
6168         }
6169       *p++ = c;
6170     }
6171   lbp->len = p - buffer;
6172
6173   if (need_filebuf              /* we need filebuf for multi-line regexps */
6174       && chars_deleted > 0)     /* not at EOF */
6175     {
6176       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6177         {
6178           /* Expand filebuf. */
6179           filebuf.size *= 2;
6180           xrnew (filebuf.buffer, filebuf.size, char);
6181         }
6182       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6183       filebuf.len += lbp->len;
6184       filebuf.buffer[filebuf.len++] = '\n';
6185       filebuf.buffer[filebuf.len] = '\0';
6186     }
6187
6188   return lbp->len + chars_deleted;
6189 }
6190
6191 /*
6192  * Like readline_internal, above, but in addition try to match the
6193  * input line against relevant regular expressions and manage #line
6194  * directives.
6195  */
6196 static void
6197 readline (lbp, stream)
6198      linebuffer *lbp;
6199      FILE *stream;
6200 {
6201   long result;
6202
6203   linecharno = charno;          /* update global char number of line start */
6204   result = readline_internal (lbp, stream); /* read line */
6205   lineno += 1;                  /* increment global line number */
6206   charno += result;             /* increment global char number */
6207
6208   /* Honour #line directives. */
6209   if (!no_line_directive)
6210     {
6211       static bool discard_until_line_directive;
6212
6213       /* Check whether this is a #line directive. */
6214       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6215         {
6216           int start, lno;
6217
6218           if (DEBUG) start = 0; /* shut up the compiler */
6219           if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
6220             {
6221               char *endp = lbp->buffer + start;
6222
6223               assert (start > 0);
6224               while ((endp = etags_strchr (endp, '"')) != NULL
6225                      && endp[-1] == '\\')
6226                 endp++;
6227               if (endp != NULL)
6228                 /* Ok, this is a real #line directive.  Let's deal with it. */
6229                 {
6230                   char *taggedabsname;  /* absolute name of original file */
6231                   char *taggedfname;    /* name of original file as given */
6232                   char *name;           /* temp var */
6233
6234                   discard_until_line_directive = FALSE; /* found it */
6235                   name = lbp->buffer + start;
6236                   *endp = '\0';
6237                   canonicalize_filename (name); /* for DOS */
6238                   taggedabsname = absolute_filename (name, curfdp->infabsdir);
6239                   if (filename_is_absolute (name)
6240                       || filename_is_absolute (curfdp->infname))
6241                     taggedfname = savestr (taggedabsname);
6242                   else
6243                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6244
6245                   if (streq (curfdp->taggedfname, taggedfname))
6246                     /* The #line directive is only a line number change.  We
6247                        deal with this afterwards. */
6248                     free (taggedfname);
6249                   else
6250                     /* The tags following this #line directive should be
6251                        attributed to taggedfname.  In order to do this, set
6252                        curfdp accordingly. */
6253                     {
6254                       fdesc *fdp; /* file description pointer */
6255
6256                       /* Go look for a file description already set up for the
6257                          file indicated in the #line directive.  If there is
6258                          one, use it from now until the next #line
6259                          directive. */
6260                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6261                         if (streq (fdp->infname, curfdp->infname)
6262                             && streq (fdp->taggedfname, taggedfname))
6263                           /* If we remove the second test above (after the &&)
6264                              then all entries pertaining to the same file are
6265                              coalesced in the tags file.  If we use it, then
6266                              entries pertaining to the same file but generated
6267                              from different files (via #line directives) will
6268                              go into separate sections in the tags file.  These
6269                              alternatives look equivalent.  The first one
6270                              destroys some apparently useless information. */
6271                           {
6272                             curfdp = fdp;
6273                             free (taggedfname);
6274                             break;
6275                           }
6276                       /* Else, if we already tagged the real file, skip all
6277                          input lines until the next #line directive. */
6278                       if (fdp == NULL) /* not found */
6279                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6280                           if (streq (fdp->infabsname, taggedabsname))
6281                             {
6282                               discard_until_line_directive = TRUE;
6283                               free (taggedfname);
6284                               break;
6285                             }
6286                       /* Else create a new file description and use that from
6287                          now on, until the next #line directive. */
6288                       if (fdp == NULL) /* not found */
6289                         {
6290                           fdp = fdhead;
6291                           fdhead = xnew (1, fdesc);
6292                           *fdhead = *curfdp; /* copy curr. file description */
6293                           fdhead->next = fdp;
6294                           fdhead->infname = savestr (curfdp->infname);
6295                           fdhead->infabsname = savestr (curfdp->infabsname);
6296                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6297                           fdhead->taggedfname = taggedfname;
6298                           fdhead->usecharno = FALSE;
6299                           fdhead->prop = NULL;
6300                           fdhead->written = FALSE;
6301                           curfdp = fdhead;
6302                         }
6303                     }
6304                   free (taggedabsname);
6305                   lineno = lno - 1;
6306                   readline (lbp, stream);
6307                   return;
6308                 } /* if a real #line directive */
6309             } /* if #line is followed by a a number */
6310         } /* if line begins with "#line " */
6311
6312       /* If we are here, no #line directive was found. */
6313       if (discard_until_line_directive)
6314         {
6315           if (result > 0)
6316             {
6317               /* Do a tail recursion on ourselves, thus discarding the contents
6318                  of the line buffer. */
6319               readline (lbp, stream);
6320               return;
6321             }
6322           /* End of file. */
6323           discard_until_line_directive = FALSE;
6324           return;
6325         }
6326     } /* if #line directives should be considered */
6327
6328 #ifdef ETAGS_REGEXPS
6329   {
6330     int match;
6331     regexp *rp;
6332     char *name;
6333
6334     /* Match against relevant regexps. */
6335     if (lbp->len > 0)
6336       for (rp = p_head; rp != NULL; rp = rp->p_next)
6337         {
6338           /* Only use generic regexps or those for the current language.
6339              Also do not use multiline regexps, which is the job of
6340              regex_tag_multiline. */
6341           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6342               || rp->multi_line)
6343             continue;
6344
6345           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6346           switch (match)
6347             {
6348             case -2:
6349               /* Some error. */
6350               if (!rp->error_signaled)
6351                 {
6352                   error ("regexp stack overflow while matching \"%s\"",
6353                          rp->pattern);
6354                   rp->error_signaled = TRUE;
6355                 }
6356               break;
6357             case -1:
6358               /* No match. */
6359               break;
6360             case 0:
6361               /* Empty string matched. */
6362               if (!rp->error_signaled)
6363                 {
6364                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6365                   rp->error_signaled = TRUE;
6366                 }
6367               break;
6368             default:
6369               /* Match occurred.  Construct a tag. */
6370               name = rp->name;
6371               if (name[0] == '\0')
6372                 name = NULL;
6373               else /* make a named tag */
6374                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6375               if (rp->force_explicit_name)
6376                 /* Force explicit tag name, if a name is there. */
6377                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6378               else
6379                 make_tag (name, strlen (name), TRUE,
6380                           lbp->buffer, match, lineno, linecharno);
6381               break;
6382             }
6383         }
6384   }
6385 #endif /* ETAGS_REGEXPS */
6386 }
6387
6388 \f
6389 /*
6390  * Return a pointer to a space of size strlen(cp)+1 allocated
6391  * with xnew where the string CP has been copied.
6392  */
6393 static char *
6394 savestr (cp)
6395      char *cp;
6396 {
6397   return savenstr (cp, strlen (cp));
6398 }
6399
6400 /*
6401  * Return a pointer to a space of size LEN+1 allocated with xnew where
6402  * the string CP has been copied for at most the first LEN characters.
6403  */
6404 static char *
6405 savenstr (cp, len)
6406      char *cp;
6407      int len;
6408 {
6409   register char *dp;
6410
6411   dp = xnew (len + 1, char);
6412   strncpy (dp, cp, len);
6413   dp[len] = '\0';
6414   return dp;
6415 }
6416
6417 /*
6418  * Return the ptr in sp at which the character c last
6419  * appears; NULL if not found
6420  *
6421  * Identical to POSIX strrchr, included for portability.
6422  */
6423 static char *
6424 etags_strrchr (sp, c)
6425      register const char *sp;
6426      register int c;
6427 {
6428   register const char *r;
6429
6430   r = NULL;
6431   do
6432     {
6433       if (*sp == c)
6434         r = sp;
6435   } while (*sp++);
6436   return (char *)r;
6437 }
6438
6439 /*
6440  * Return the ptr in sp at which the character c first
6441  * appears; NULL if not found
6442  *
6443  * Identical to POSIX strchr, included for portability.
6444  */
6445 static char *
6446 etags_strchr (sp, c)
6447      register const char *sp;
6448      register int c;
6449 {
6450   do
6451     {
6452       if (*sp == c)
6453         return (char *)sp;
6454     } while (*sp++);
6455   return NULL;
6456 }
6457
6458 /*
6459  * Compare two strings, ignoring case for alphabetic characters.
6460  *
6461  * Same as BSD's strcasecmp, included for portability.
6462  */
6463 static int
6464 etags_strcasecmp (s1, s2)
6465      register const char *s1;
6466      register const char *s2;
6467 {
6468   while (*s1 != '\0'
6469          && (ISALPHA (*s1) && ISALPHA (*s2)
6470              ? lowcase (*s1) == lowcase (*s2)
6471              : *s1 == *s2))
6472     s1++, s2++;
6473
6474   return (ISALPHA (*s1) && ISALPHA (*s2)
6475           ? lowcase (*s1) - lowcase (*s2)
6476           : *s1 - *s2);
6477 }
6478
6479 /*
6480  * Compare two strings, ignoring case for alphabetic characters.
6481  * Stop after a given number of characters
6482  *
6483  * Same as BSD's strncasecmp, included for portability.
6484  */
6485 static int
6486 etags_strncasecmp (s1, s2, n)
6487      register const char *s1;
6488      register const char *s2;
6489      register int n;
6490 {
6491   while (*s1 != '\0' && n-- > 0
6492          && (ISALPHA (*s1) && ISALPHA (*s2)
6493              ? lowcase (*s1) == lowcase (*s2)
6494              : *s1 == *s2))
6495     s1++, s2++;
6496
6497   if (n < 0)
6498     return 0;
6499   else
6500     return (ISALPHA (*s1) && ISALPHA (*s2)
6501             ? lowcase (*s1) - lowcase (*s2)
6502             : *s1 - *s2);
6503 }
6504
6505 /* Skip spaces, return new pointer. */
6506 static char *
6507 skip_spaces (cp)
6508      char *cp;
6509 {
6510   while (iswhite (*cp))
6511     cp++;
6512   return cp;
6513 }
6514
6515 /* Skip non spaces, return new pointer. */
6516 static char *
6517 skip_non_spaces (cp)
6518      char *cp;
6519 {
6520   while (*cp != '\0' && !iswhite (*cp))
6521     cp++;
6522   return cp;
6523 }
6524
6525 /* Print error message and exit.  */
6526 void
6527 fatal (s1, s2)
6528      char *s1, *s2;
6529 {
6530   error (s1, s2);
6531   exit (EXIT_FAILURE);
6532 }
6533
6534 static void
6535 pfatal (s1)
6536      char *s1;
6537 {
6538   perror (s1);
6539   exit (EXIT_FAILURE);
6540 }
6541
6542 static void
6543 suggest_asking_for_help ()
6544 {
6545   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6546            progname, LONG_OPTIONS ? "--help" : "-h");
6547   exit (EXIT_FAILURE);
6548 }
6549
6550 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6551 static void
6552 error (s1, s2)
6553      const char *s1, *s2;
6554 {
6555   fprintf (stderr, "%s: ", progname);
6556   fprintf (stderr, s1, s2);
6557   fprintf (stderr, "\n");
6558 }
6559
6560 /* Return a newly-allocated string whose contents
6561    concatenate those of s1, s2, s3.  */
6562 static char *
6563 concat (s1, s2, s3)
6564      char *s1, *s2, *s3;
6565 {
6566   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6567   char *result = xnew (len1 + len2 + len3 + 1, char);
6568
6569   strcpy (result, s1);
6570   strcpy (result + len1, s2);
6571   strcpy (result + len1 + len2, s3);
6572   result[len1 + len2 + len3] = '\0';
6573
6574   return result;
6575 }
6576
6577 \f
6578 /* Does the same work as the system V getcwd, but does not need to
6579    guess the buffer size in advance. */
6580 static char *
6581 etags_getcwd ()
6582 {
6583 #ifdef HAVE_GETCWD
6584   int bufsize = 200;
6585   char *path = xnew (bufsize, char);
6586
6587   while (getcwd (path, bufsize) == NULL)
6588     {
6589       if (errno != ERANGE)
6590         pfatal ("getcwd");
6591       bufsize *= 2;
6592       free (path);
6593       path = xnew (bufsize, char);
6594     }
6595
6596   canonicalize_filename (path);
6597   return path;
6598
6599 #else /* not HAVE_GETCWD */
6600 #if MSDOS
6601
6602   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6603
6604   getwd (path);
6605
6606   for (p = path; *p != '\0'; p++)
6607     if (*p == '\\')
6608       *p = '/';
6609     else
6610       *p = lowcase (*p);
6611
6612   return strdup (path);
6613 #else /* not MSDOS */
6614   linebuffer path;
6615   FILE *pipe;
6616
6617   linebuffer_init (&path);
6618   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6619   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6620     pfatal ("pwd");
6621   pclose (pipe);
6622
6623   return path.buffer;
6624 #endif /* not MSDOS */
6625 #endif /* not HAVE_GETCWD */
6626 }
6627
6628 /* Return a newly allocated string containing the file name of FILE
6629    relative to the absolute directory DIR (which should end with a slash). */
6630 static char *
6631 relative_filename (file, dir)
6632      char *file, *dir;
6633 {
6634   char *fp, *dp, *afn, *res;
6635   int i;
6636
6637   /* Find the common root of file and dir (with a trailing slash). */
6638   afn = absolute_filename (file, cwd);
6639   fp = afn;
6640   dp = dir;
6641   while (*fp++ == *dp++)
6642     continue;
6643   fp--, dp--;                   /* back to the first differing char */
6644 #ifdef DOS_NT
6645   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6646     return afn;
6647 #endif
6648   do                            /* look at the equal chars until '/' */
6649     fp--, dp--;
6650   while (*fp != '/');
6651
6652   /* Build a sequence of "../" strings for the resulting relative file name. */
6653   i = 0;
6654   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6655     i += 1;
6656   res = xnew (3*i + strlen (fp + 1) + 1, char);
6657   res[0] = '\0';
6658   while (i-- > 0)
6659     strcat (res, "../");
6660
6661   /* Add the file name relative to the common root of file and dir. */
6662   strcat (res, fp + 1);
6663   free (afn);
6664
6665   return res;
6666 }
6667
6668 /* Return a newly allocated string containing the absolute file name
6669    of FILE given DIR (which should end with a slash). */
6670 static char *
6671 absolute_filename (file, dir)
6672      char *file, *dir;
6673 {
6674   char *slashp, *cp, *res;
6675
6676   if (filename_is_absolute (file))
6677     res = savestr (file);
6678 #ifdef DOS_NT
6679   /* We don't support non-absolute file names with a drive
6680      letter, like `d:NAME' (it's too much hassle).  */
6681   else if (file[1] == ':')
6682     fatal ("%s: relative file names with drive letters not supported", file);
6683 #endif
6684   else
6685     res = concat (dir, file, "");
6686
6687   /* Delete the "/dirname/.." and "/." substrings. */
6688   slashp = etags_strchr (res, '/');
6689   while (slashp != NULL && slashp[0] != '\0')
6690     {
6691       if (slashp[1] == '.')
6692         {
6693           if (slashp[2] == '.'
6694               && (slashp[3] == '/' || slashp[3] == '\0'))
6695             {
6696               cp = slashp;
6697               do
6698                 cp--;
6699               while (cp >= res && !filename_is_absolute (cp));
6700               if (cp < res)
6701                 cp = slashp;    /* the absolute name begins with "/.." */
6702 #ifdef DOS_NT
6703               /* Under MSDOS and NT we get `d:/NAME' as absolute
6704                  file name, so the luser could say `d:/../NAME'.
6705                  We silently treat this as `d:/NAME'.  */
6706               else if (cp[0] != '/')
6707                 cp = slashp;
6708 #endif
6709               strcpy (cp, slashp + 3);
6710               slashp = cp;
6711               continue;
6712             }
6713           else if (slashp[2] == '/' || slashp[2] == '\0')
6714             {
6715               strcpy (slashp, slashp + 2);
6716               continue;
6717             }
6718         }
6719
6720       slashp = etags_strchr (slashp + 1, '/');
6721     }
6722
6723   if (res[0] == '\0')
6724     return savestr ("/");
6725   else
6726     return res;
6727 }
6728
6729 /* Return a newly allocated string containing the absolute
6730    file name of dir where FILE resides given DIR (which should
6731    end with a slash). */
6732 static char *
6733 absolute_dirname (file, dir)
6734      char *file, *dir;
6735 {
6736   char *slashp, *res;
6737   char save;
6738
6739   canonicalize_filename (file);
6740   slashp = etags_strrchr (file, '/');
6741   if (slashp == NULL)
6742     return savestr (dir);
6743   save = slashp[1];
6744   slashp[1] = '\0';
6745   res = absolute_filename (file, dir);
6746   slashp[1] = save;
6747
6748   return res;
6749 }
6750
6751 /* Whether the argument string is an absolute file name.  The argument
6752    string must have been canonicalized with canonicalize_filename. */
6753 static bool
6754 filename_is_absolute (fn)
6755      char *fn;
6756 {
6757   return (fn[0] == '/'
6758 #ifdef DOS_NT
6759           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6760 #endif
6761           );
6762 }
6763
6764 /* Translate backslashes into slashes.  Works in place. */
6765 static void
6766 canonicalize_filename (fn)
6767      register char *fn;
6768 {
6769 #ifdef DOS_NT
6770   /* Canonicalize drive letter case.  */
6771   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6772     fn[0] = upcase (fn[0]);
6773   /* Convert backslashes to slashes.  */
6774   for (; *fn != '\0'; fn++)
6775     if (*fn == '\\')
6776       *fn = '/';
6777 #else
6778   /* No action. */
6779   fn = NULL;                    /* shut up the compiler */
6780 #endif
6781 }
6782
6783 \f
6784 /* Initialize a linebuffer for use */
6785 static void
6786 linebuffer_init (lbp)
6787      linebuffer *lbp;
6788 {
6789   lbp->size = (DEBUG) ? 3 : 200;
6790   lbp->buffer = xnew (lbp->size, char);
6791   lbp->buffer[0] = '\0';
6792   lbp->len = 0;
6793 }
6794
6795 /* Set the minimum size of a string contained in a linebuffer. */
6796 static void
6797 linebuffer_setlen (lbp, toksize)
6798      linebuffer *lbp;
6799      int toksize;
6800 {
6801   while (lbp->size <= toksize)
6802     {
6803       lbp->size *= 2;
6804       xrnew (lbp->buffer, lbp->size, char);
6805     }
6806   lbp->len = toksize;
6807 }
6808
6809 /* Like malloc but get fatal error if memory is exhausted. */
6810 static PTR
6811 xmalloc (size)
6812      unsigned int size;
6813 {
6814   PTR result = (PTR) malloc (size);
6815   if (result == NULL)
6816     fatal ("virtual memory exhausted", (char *)NULL);
6817   return result;
6818 }
6819
6820 static PTR
6821 xrealloc (ptr, size)
6822      char *ptr;
6823      unsigned int size;
6824 {
6825   PTR result = (PTR) realloc (ptr, size);
6826   if (result == NULL)
6827     fatal ("virtual memory exhausted", (char *)NULL);
6828   return result;
6829 }
6830
6831 /*
6832  * Local Variables:
6833  * c-indentation-style: gnu
6834  * indent-tabs-mode: t
6835  * tab-width: 8
6836  * fill-column: 79
6837  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6838  * End:
6839  */
6840
6841 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6842    (do not change this comment) */
6843
6844 /* etags.c ends here */