gcc/input.c

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic-core.h"
  25 #include "selftest.h"
  26 #include "cpplib.h"
  27
  28 #ifndef HAVE_ICONV
  29 #define HAVE_ICONV 0
  30 #endif
  31
  32 /* This is a cache used by get_next_line to store the content of a
  33    file to be searched for file lines.  */
  34 struct fcache
  35 {
  36   /* These are information used to store a line boundary.  */
  37   struct line_info
  38   {
  39     /* The line number.  It starts from 1.  */
  40     size_t line_num;
  41
  42     /* The position (byte count) of the beginning of the line,
  43        relative to the file data pointer.  This starts at zero.  */
  44     size_t start_pos;
  45
  46     /* The position (byte count) of the last byte of the line.  This
  47        normally points to the '\n' character, or to one byte after the
  48        last byte of the file, if the file doesn't contain a '\n'
  49        character.  */
  50     size_t end_pos;
  51
  52     line_info (size_t l, size_t s, size_t e)
  53       : line_num (l), start_pos (s), end_pos (e)
  54     {}
  55
  56     line_info ()
  57       :line_num (0), start_pos (0), end_pos (0)
  58     {}
  59   };
  60
  61   /* The number of time this file has been accessed.  This is used
  62      to designate which file cache to evict from the cache
  63      array.  */
  64   unsigned use_count;
  65
  66   /* The file_path is the key for identifying a particular file in
  67      the cache.
  68      For libcpp-using code, the underlying buffer for this field is
  69      owned by the corresponding _cpp_file within the cpp_reader.  */
  70   const char *file_path;
  71
  72   FILE *fp;
  73
  74   /* This points to the content of the file that we've read so
  75      far.  */
  76   char *data;
  77
  78   /*  The size of the DATA array above.*/
  79   size_t size;
  80
  81   /* The number of bytes read from the underlying file so far.  This
  82      must be less (or equal) than SIZE above.  */
  83   size_t nb_read;
  84
  85   /* The index of the beginning of the current line.  */
  86   size_t line_start_idx;
  87
  88   /* The number of the previous line read.  This starts at 1.  Zero
  89      means we've read no line so far.  */
  90   size_t line_num;
  91
  92   /* This is the total number of lines of the current file.  At the
  93      moment, we try to get this information from the line map
  94      subsystem.  Note that this is just a hint.  When using the C++
  95      front-end, this hint is correct because the input file is then
  96      completely tokenized before parsing starts; so the line map knows
  97      the number of lines before compilation really starts.  For e.g,
  98      the C front-end, it can happen that we start emitting diagnostics
  99      before the line map has seen the end of the file.  */
 100   size_t total_lines;
 101
 102   /* Could this file be missing a trailing newline on its final line?
 103      Initially true (to cope with empty files), set to true/false
 104      as each line is read.  */
 105   bool missing_trailing_newline;
 106
 107   /* This is a record of the beginning and end of the lines we've seen
 108      while reading the file.  This is useful to avoid walking the data
 109      from the beginning when we are asked to read a line that is
 110      before LINE_START_IDX above.  Note that the maximum size of this
 111      record is fcache_line_record_size, so that the memory consumption
 112      doesn't explode.  We thus scale total_lines down to
 113      fcache_line_record_size.  */
 114   vec<line_info, va_heap> line_record;
 115
 116   fcache ();
 117   ~fcache ();
 118 };
 119
 120 /* Current position in real source file.  */
 121
 122 location_t input_location = UNKNOWN_LOCATION;
 123
 124 struct line_maps *line_table;
 125
 126 /* A stashed copy of "line_table" for use by selftest::line_table_test.
 127    This needs to be a global so that it can be a GC root, and thus
 128    prevent the stashed copy from being garbage-collected if the GC runs
 129    during a line_table_test.  */
 130
 131 struct line_maps *saved_line_table;
 132
 133 static fcache *fcache_tab;
 134 static const size_t fcache_tab_size = 16;
 135 static const size_t fcache_buffer_size = 4 * 1024;
 136 static const size_t fcache_line_record_size = 100;
 137
 138 /* Expand the source location LOC into a human readable location.  If
 139    LOC resolves to a builtin location, the file name of the readable
 140    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 141    TRUE and LOC is virtual, then it is resolved to the expansion
 142    point of the involved macro.  Otherwise, it is resolved to the
 143    spelling location of the token.
 144
 145    When resolving to the spelling location of the token, if the
 146    resulting location is for a built-in location (that is, it has no
 147    associated line/column) in the context of a macro expansion, the
 148    returned location is the first one (while unwinding the macro
 149    location towards its expansion point) that is in real source
 150    code.
 151
 152    ASPECT controls which part of the location to use.  */
 153
 154 static expanded_location
 155 expand_location_1 (source_location loc,
 156                    bool expansion_point_p,
 157                    enum location_aspect aspect)
 158 {
 159   expanded_location xloc;
 160   const line_map_ordinary *map;
 161   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 162   tree block = NULL;
 163
 164   if (IS_ADHOC_LOC (loc))
 165     {
 166       block = LOCATION_BLOCK (loc);
 167       loc = LOCATION_LOCUS (loc);
 168     }
 169
 170   memset (&xloc, 0, sizeof (xloc));
 171
 172   if (loc >= RESERVED_LOCATION_COUNT)
 173     {
 174       if (!expansion_point_p)
 175         {
 176           /* We want to resolve LOC to its spelling location.
 177
 178              But if that spelling location is a reserved location that
 179              appears in the context of a macro expansion (like for a
 180              location for a built-in token), let's consider the first
 181              location (toward the expansion point) that is not reserved;
 182              that is, the first location that is in real source code.  */
 183           loc = linemap_unwind_to_first_non_reserved_loc (line_table,
 184                                                           loc, NULL);
 185           lrk = LRK_SPELLING_LOCATION;
 186         }
 187       loc = linemap_resolve_location (line_table, loc, lrk, &map);
 188
 189       /* loc is now either in an ordinary map, or is a reserved location.
 190          If it is a compound location, the caret is in a spelling location,
 191          but the start/finish might still be a virtual location.
 192          Depending of what the caller asked for, we may need to recurse
 193          one level in order to resolve any virtual locations in the
 194          end-points.  */
 195       switch (aspect)
 196         {
 197         default:
 198           gcc_unreachable ();
 199           /* Fall through.  */
 200         case LOCATION_ASPECT_CARET:
 201           break;
 202         case LOCATION_ASPECT_START:
 203           {
 204             source_location start = get_start (loc);
 205             if (start != loc)
 206               return expand_location_1 (start, expansion_point_p, aspect);
 207           }
 208           break;
 209         case LOCATION_ASPECT_FINISH:
 210           {
 211             source_location finish = get_finish (loc);
 212             if (finish != loc)
 213               return expand_location_1 (finish, expansion_point_p, aspect);
 214           }
 215           break;
 216         }
 217       xloc = linemap_expand_location (line_table, map, loc);
 218     }
 219
 220   xloc.data = block;
 221   if (loc <= BUILTINS_LOCATION)
 222     xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
 223
 224   return xloc;
 225 }
 226
 227 /* Initialize the set of cache used for files accessed by caret
 228    diagnostic.  */
 229
 230 static void
 231 diagnostic_file_cache_init (void)
 232 {
 233   if (fcache_tab == NULL)
 234     fcache_tab = new fcache[fcache_tab_size];
 235 }
 236
 237 /* Free the resources used by the set of cache used for files accessed
 238    by caret diagnostic.  */
 239
 240 void
 241 diagnostic_file_cache_fini (void)
 242 {
 243   if (fcache_tab)
 244     {
 245       delete [] (fcache_tab);
 246       fcache_tab = NULL;
 247     }
 248 }
 249
 250 /* Return the total lines number that have been read so far by the
 251    line map (in the preprocessor) so far.  For languages like C++ that
 252    entirely preprocess the input file before starting to parse, this
 253    equals the actual number of lines of the file.  */
 254
 255 static size_t
 256 total_lines_num (const char *file_path)
 257 {
 258   size_t r = 0;
 259   source_location l = 0;
 260   if (linemap_get_file_highest_location (line_table, file_path, &l))
 261     {
 262       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 263       expanded_location xloc = expand_location (l);
 264       r = xloc.line;
 265     }
 266   return r;
 267 }
 268
 269 /* Lookup the cache used for the content of a given file accessed by
 270    caret diagnostic.  Return the found cached file, or NULL if no
 271    cached file was found.  */
 272
 273 static fcache*
 274 lookup_file_in_cache_tab (const char *file_path)
 275 {
 276   if (file_path == NULL)
 277     return NULL;
 278
 279   diagnostic_file_cache_init ();
 280
 281   /* This will contain the found cached file.  */
 282   fcache *r = NULL;
 283   for (unsigned i = 0; i < fcache_tab_size; ++i)
 284     {
 285       fcache *c = &fcache_tab[i];
 286       if (c->file_path && !strcmp (c->file_path, file_path))
 287         {
 288           ++c->use_count;
 289           r = c;
 290         }
 291     }
 292
 293   if (r)
 294     ++r->use_count;
 295
 296   return r;
 297 }
 298
 299 /* Purge any mention of FILENAME from the cache of files used for
 300    printing source code.  For use in selftests when working
 301    with tempfiles.  */
 302
 303 void
 304 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
 305 {
 306   gcc_assert (file_path);
 307
 308   fcache *r = lookup_file_in_cache_tab (file_path);
 309   if (!r)
 310     /* Not found.  */
 311     return;
 312
 313   r->file_path = NULL;
 314   if (r->fp)
 315     fclose (r->fp);
 316   r->fp = NULL;
 317   r->nb_read = 0;
 318   r->line_start_idx = 0;
 319   r->line_num = 0;
 320   r->line_record.truncate (0);
 321   r->use_count = 0;
 322   r->total_lines = 0;
 323   r->missing_trailing_newline = true;
 324 }
 325
 326 /* Return the file cache that has been less used, recently, or the
 327    first empty one.  If HIGHEST_USE_COUNT is non-null,
 328    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 329    in the cache table.  */
 330
 331 static fcache*
 332 evicted_cache_tab_entry (unsigned *highest_use_count)
 333 {
 334   diagnostic_file_cache_init ();
 335
 336   fcache *to_evict = &fcache_tab[0];
 337   unsigned huc = to_evict->use_count;
 338   for (unsigned i = 1; i < fcache_tab_size; ++i)
 339     {
 340       fcache *c = &fcache_tab[i];
 341       bool c_is_empty = (c->file_path == NULL);
 342
 343       if (c->use_count < to_evict->use_count
 344           || (to_evict->file_path && c_is_empty))
 345         /* We evict C because it's either an entry with a lower use
 346            count or one that is empty.  */
 347         to_evict = c;
 348
 349       if (huc < c->use_count)
 350         huc = c->use_count;
 351
 352       if (c_is_empty)
 353         /* We've reached the end of the cache; subsequent elements are
 354            all empty.  */
 355         break;
 356     }
 357
 358   if (highest_use_count)
 359     *highest_use_count = huc;
 360
 361   return to_evict;
 362 }
 363
 364 /* Create the cache used for the content of a given file to be
 365    accessed by caret diagnostic.  This cache is added to an array of
 366    cache and can be retrieved by lookup_file_in_cache_tab.  This
 367    function returns the created cache.  Note that only the last
 368    fcache_tab_size files are cached.  */
 369
 370 static fcache*
 371 add_file_to_cache_tab (const char *file_path)
 372 {
 373
 374   FILE *fp = fopen (file_path, "r");
 375   if (fp == NULL)
 376     return NULL;
 377
 378   unsigned highest_use_count = 0;
 379   fcache *r = evicted_cache_tab_entry (&highest_use_count);
 380   r->file_path = file_path;
 381   if (r->fp)
 382     fclose (r->fp);
 383   r->fp = fp;
 384   r->nb_read = 0;
 385   r->line_start_idx = 0;
 386   r->line_num = 0;
 387   r->line_record.truncate (0);
 388   /* Ensure that this cache entry doesn't get evicted next time
 389      add_file_to_cache_tab is called.  */
 390   r->use_count = ++highest_use_count;
 391   r->total_lines = total_lines_num (file_path);
 392   r->missing_trailing_newline = true;
 393
 394   return r;
 395 }
 396
 397 /* Lookup the cache used for the content of a given file accessed by
 398    caret diagnostic.  If no cached file was found, create a new cache
 399    for this file, add it to the array of cached file and return
 400    it.  */
 401
 402 static fcache*
 403 lookup_or_add_file_to_cache_tab (const char *file_path)
 404 {
 405   fcache *r = lookup_file_in_cache_tab (file_path);
 406   if (r == NULL)
 407     r = add_file_to_cache_tab (file_path);
 408   return r;
 409 }
 410
 411 /* Default constructor for a cache of file used by caret
 412    diagnostic.  */
 413
 414 fcache::fcache ()
 415 : use_count (0), file_path (NULL), fp (NULL), data (0),
 416   size (0), nb_read (0), line_start_idx (0), line_num (0),
 417   total_lines (0), missing_trailing_newline (true)
 418 {
 419   line_record.create (0);
 420 }
 421
 422 /* Destructor for a cache of file used by caret diagnostic.  */
 423
 424 fcache::~fcache ()
 425 {
 426   if (fp)
 427     {
 428       fclose (fp);
 429       fp = NULL;
 430     }
 431   if (data)
 432     {
 433       XDELETEVEC (data);
 434       data = 0;
 435     }
 436   line_record.release ();
 437 }
 438
 439 /* Returns TRUE iff the cache would need to be filled with data coming
 440    from the file.  That is, either the cache is empty or full or the
 441    current line is empty.  Note that if the cache is full, it would
 442    need to be extended and filled again.  */
 443
 444 static bool
 445 needs_read (fcache *c)
 446 {
 447   return (c->nb_read == 0
 448           || c->nb_read == c->size
 449           || (c->line_start_idx >= c->nb_read - 1));
 450 }
 451
 452 /*  Return TRUE iff the cache is full and thus needs to be
 453     extended.  */
 454
 455 static bool
 456 needs_grow (fcache *c)
 457 {
 458   return c->nb_read == c->size;
 459 }
 460
 461 /* Grow the cache if it needs to be extended.  */
 462
 463 static void
 464 maybe_grow (fcache *c)
 465 {
 466   if (!needs_grow (c))
 467     return;
 468
 469   size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
 470   c->data = XRESIZEVEC (char, c->data, size);
 471   c->size = size;
 472 }
 473
 474 /*  Read more data into the cache.  Extends the cache if need be.
 475     Returns TRUE iff new data could be read.  */
 476
 477 static bool
 478 read_data (fcache *c)
 479 {
 480   if (feof (c->fp) || ferror (c->fp))
 481     return false;
 482
 483   maybe_grow (c);
 484
 485   char * from = c->data + c->nb_read;
 486   size_t to_read = c->size - c->nb_read;
 487   size_t nb_read = fread (from, 1, to_read, c->fp);
 488
 489   if (ferror (c->fp))
 490     return false;
 491
 492   c->nb_read += nb_read;
 493   return !!nb_read;
 494 }
 495
 496 /* Read new data iff the cache needs to be filled with more data
 497    coming from the file FP.  Return TRUE iff the cache was filled with
 498    mode data.  */
 499
 500 static bool
 501 maybe_read_data (fcache *c)
 502 {
 503   if (!needs_read (c))
 504     return false;
 505   return read_data (c);
 506 }
 507
 508 /* Read a new line from file FP, using C as a cache for the data
 509    coming from the file.  Upon successful completion, *LINE is set to
 510    the beginning of the line found.  *LINE points directly in the
 511    line cache and is only valid until the next call of get_next_line.
 512    *LINE_LEN is set to the length of the line.  Note that the line
 513    does not contain any terminal delimiter.  This function returns
 514    true if some data was read or process from the cache, false
 515    otherwise.  Note that subsequent calls to get_next_line might
 516    make the content of *LINE invalid.  */
 517
 518 static bool
 519 get_next_line (fcache *c, char **line, ssize_t *line_len)
 520 {
 521   /* Fill the cache with data to process.  */
 522   maybe_read_data (c);
 523
 524   size_t remaining_size = c->nb_read - c->line_start_idx;
 525   if (remaining_size == 0)
 526     /* There is no more data to process.  */
 527     return false;
 528
 529   char *line_start = c->data + c->line_start_idx;
 530
 531   char *next_line_start = NULL;
 532   size_t len = 0;
 533   char *line_end = (char *) memchr (line_start, '\n', remaining_size);
 534   if (line_end == NULL)
 535     {
 536       /* We haven't found the end-of-line delimiter in the cache.
 537          Fill the cache with more data from the file and look for the
 538          '\n'.  */
 539       while (maybe_read_data (c))
 540         {
 541           line_start = c->data + c->line_start_idx;
 542           remaining_size = c->nb_read - c->line_start_idx;
 543           line_end = (char *) memchr (line_start, '\n', remaining_size);
 544           if (line_end != NULL)
 545             {
 546               next_line_start = line_end + 1;
 547               break;
 548             }
 549         }
 550       if (line_end == NULL)
 551         {
 552           /* We've loadded all the file into the cache and still no
 553              '\n'.  Let's say the line ends up at one byte passed the
 554              end of the file.  This is to stay consistent with the case
 555              of when the line ends up with a '\n' and line_end points to
 556              that terminal '\n'.  That consistency is useful below in
 557              the len calculation.  */
 558           line_end = c->data + c->nb_read ;
 559           c->missing_trailing_newline = true;
 560         }
 561       else
 562         c->missing_trailing_newline = false;
 563     }
 564   else
 565     {
 566       next_line_start = line_end + 1;
 567       c->missing_trailing_newline = false;
 568     }
 569
 570   if (ferror (c->fp))
 571     return false;
 572
 573   /* At this point, we've found the end of the of line.  It either
 574      points to the '\n' or to one byte after the last byte of the
 575      file.  */
 576   gcc_assert (line_end != NULL);
 577
 578   len = line_end - line_start;
 579
 580   if (c->line_start_idx < c->nb_read)
 581     *line = line_start;
 582
 583   ++c->line_num;
 584
 585   /* Before we update our line record, make sure the hint about the
 586      total number of lines of the file is correct.  If it's not, then
 587      we give up recording line boundaries from now on.  */
 588   bool update_line_record = true;
 589   if (c->line_num > c->total_lines)
 590     update_line_record = false;
 591
 592     /* Now update our line record so that re-reading lines from the
 593      before c->line_start_idx is faster.  */
 594   if (update_line_record
 595       && c->line_record.length () < fcache_line_record_size)
 596     {
 597       /* If the file lines fits in the line record, we just record all
 598          its lines ...*/
 599       if (c->total_lines <= fcache_line_record_size
 600           && c->line_num > c->line_record.length ())
 601         c->line_record.safe_push (fcache::line_info (c->line_num,
 602                                                  c->line_start_idx,
 603                                                  line_end - c->data));
 604       else if (c->total_lines > fcache_line_record_size)
 605         {
 606           /* ... otherwise, we just scale total_lines down to
 607              (fcache_line_record_size lines.  */
 608           size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
 609           if (c->line_record.length () == 0
 610               || n >= c->line_record.length ())
 611             c->line_record.safe_push (fcache::line_info (c->line_num,
 612                                                      c->line_start_idx,
 613                                                      line_end - c->data));
 614         }
 615     }
 616
 617   /* Update c->line_start_idx so that it points to the next line to be
 618      read.  */
 619   if (next_line_start)
 620     c->line_start_idx = next_line_start - c->data;
 621   else
 622     /* We didn't find any terminal '\n'.  Let's consider that the end
 623        of line is the end of the data in the cache.  The next
 624        invocation of get_next_line will either read more data from the
 625        underlying file or return false early because we've reached the
 626        end of the file.  */
 627     c->line_start_idx = c->nb_read;
 628
 629   *line_len = len;
 630
 631   return true;
 632 }
 633
 634 /* Consume the next bytes coming from the cache (or from its
 635    underlying file if there are remaining unread bytes in the file)
 636    until we reach the next end-of-line (or end-of-file).  There is no
 637    copying from the cache involved.  Return TRUE upon successful
 638    completion.  */
 639
 640 static bool
 641 goto_next_line (fcache *cache)
 642 {
 643   char *l;
 644   ssize_t len;
 645
 646   return get_next_line (cache, &l, &len);
 647 }
 648
 649 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 650    If the line was read successfully, *LINE points to the beginning
 651    of the line in the file cache and *LINE_LEN is the length of the
 652    line.  *LINE is not nul-terminated, but may contain zero bytes.
 653    *LINE is only valid until the next call of read_line_num.
 654    This function returns bool if a line was read.  */
 655
 656 static bool
 657 read_line_num (fcache *c, size_t line_num,
 658                char **line, ssize_t *line_len)
 659 {
 660   gcc_assert (line_num > 0);
 661
 662   if (line_num <= c->line_num)
 663     {
 664       /* We've been asked to read lines that are before c->line_num.
 665          So lets use our line record (if it's not empty) to try to
 666          avoid re-reading the file from the beginning again.  */
 667
 668       if (c->line_record.is_empty ())
 669         {
 670           c->line_start_idx = 0;
 671           c->line_num = 0;
 672         }
 673       else
 674         {
 675           fcache::line_info *i = NULL;
 676           if (c->total_lines <= fcache_line_record_size)
 677             {
 678               /* In languages where the input file is not totally
 679                  preprocessed up front, the c->total_lines hint
 680                  can be smaller than the number of lines of the
 681                  file.  In that case, only the first
 682                  c->total_lines have been recorded.
 683
 684                  Otherwise, the first c->total_lines we've read have
 685                  their start/end recorded here.  */
 686               i = (line_num <= c->total_lines)
 687                 ? &c->line_record[line_num - 1]
 688                 : &c->line_record[c->total_lines - 1];
 689               gcc_assert (i->line_num <= line_num);
 690             }
 691           else
 692             {
 693               /*  So the file had more lines than our line record
 694                   size.  Thus the number of lines we've recorded has
 695                   been scaled down to fcache_line_reacord_size.  Let's
 696                   pick the start/end of the recorded line that is
 697                   closest to line_num.  */
 698               size_t n = (line_num <= c->total_lines)
 699                 ? line_num * fcache_line_record_size / c->total_lines
 700                 : c ->line_record.length () - 1;
 701               if (n < c->line_record.length ())
 702                 {
 703                   i = &c->line_record[n];
 704                   gcc_assert (i->line_num <= line_num);
 705                 }
 706             }
 707
 708           if (i && i->line_num == line_num)
 709             {
 710               /* We have the start/end of the line.  */
 711               *line = c->data + i->start_pos;
 712               *line_len = i->end_pos - i->start_pos;
 713               return true;
 714             }
 715
 716           if (i)
 717             {
 718               c->line_start_idx = i->start_pos;
 719               c->line_num = i->line_num - 1;
 720             }
 721           else
 722             {
 723               c->line_start_idx = 0;
 724               c->line_num = 0;
 725             }
 726         }
 727     }
 728
 729   /*  Let's walk from line c->line_num up to line_num - 1, without
 730       copying any line.  */
 731   while (c->line_num < line_num - 1)
 732     if (!goto_next_line (c))
 733       return false;
 734
 735   /* The line we want is the next one.  Let's read and copy it back to
 736      the caller.  */
 737   return get_next_line (c, line, line_len);
 738 }
 739
 740 /* Return the physical source line that corresponds to FILE_PATH/LINE.
 741    The line is not nul-terminated.  The returned pointer is only
 742    valid until the next call of location_get_source_line.
 743    Note that the line can contain several null characters,
 744    so LINE_LEN, if non-null, points to the actual length of the line.
 745    If the function fails, NULL is returned.  */
 746
 747 const char *
 748 location_get_source_line (const char *file_path, int line,
 749                           int *line_len)
 750 {
 751   char *buffer = NULL;
 752   ssize_t len;
 753
 754   if (line == 0)
 755     return NULL;
 756
 757   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 758   if (c == NULL)
 759     return NULL;
 760
 761   bool read = read_line_num (c, line, &buffer, &len);
 762
 763   if (read && line_len)
 764     *line_len = len;
 765
 766   return read ? buffer : NULL;
 767 }
 768
 769 /* Determine if FILE_PATH missing a trailing newline on its final line.
 770    Only valid to call once all of the file has been loaded, by
 771    requesting a line number beyond the end of the file.  */
 772
 773 bool
 774 location_missing_trailing_newline (const char *file_path)
 775 {
 776   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 777   if (c == NULL)
 778     return false;
 779
 780   return c->missing_trailing_newline;
 781 }
 782
 783 /* Test if the location originates from the spelling location of a
 784    builtin-tokens.  That is, return TRUE if LOC is a (possibly
 785    virtual) location of a built-in token that appears in the expansion
 786    list of a macro.  Please note that this function also works on
 787    tokens that result from built-in tokens.  For instance, the
 788    function would return true if passed a token "4" that is the result
 789    of the expansion of the built-in __LINE__ macro.  */
 790 bool
 791 is_location_from_builtin_token (source_location loc)
 792 {
 793   const line_map_ordinary *map = NULL;
 794   loc = linemap_resolve_location (line_table, loc,
 795                                   LRK_SPELLING_LOCATION, &map);
 796   return loc == BUILTINS_LOCATION;
 797 }
 798
 799 /* Expand the source location LOC into a human readable location.  If
 800    LOC is virtual, it resolves to the expansion point of the involved
 801    macro.  If LOC resolves to a builtin location, the file name of the
 802    readable location is set to the string "<built-in>".  */
 803
 804 expanded_location
 805 expand_location (source_location loc)
 806 {
 807   return expand_location_1 (loc, /*expansion_point_p=*/true,
 808                             LOCATION_ASPECT_CARET);
 809 }
 810
 811 /* Expand the source location LOC into a human readable location.  If
 812    LOC is virtual, it resolves to the expansion location of the
 813    relevant macro.  If LOC resolves to a builtin location, the file
 814    name of the readable location is set to the string
 815    "<built-in>".  */
 816
 817 expanded_location
 818 expand_location_to_spelling_point (source_location loc)
 819 {
 820   return expand_location_1 (loc, /*expansion_point_p=*/false,
 821                             LOCATION_ASPECT_CARET);
 822 }
 823
 824 /* The rich_location class within libcpp requires a way to expand
 825    source_location instances, and relies on the client code
 826    providing a symbol named
 827      linemap_client_expand_location_to_spelling_point
 828    to do this.
 829
 830    This is the implementation for libcommon.a (all host binaries),
 831    which simply calls into expand_location_1.  */
 832
 833 expanded_location
 834 linemap_client_expand_location_to_spelling_point (source_location loc,
 835                                                   enum location_aspect aspect)
 836 {
 837   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
 838 }
 839
 840
 841 /* If LOCATION is in a system header and if it is a virtual location for
 842    a token coming from the expansion of a macro, unwind it to the
 843    location of the expansion point of the macro.  Otherwise, just return
 844    LOCATION.
 845
 846    This is used for instance when we want to emit diagnostics about a
 847    token that may be located in a macro that is itself defined in a
 848    system header, for example, for the NULL macro.  In such a case, if
 849    LOCATION were passed directly to diagnostic functions such as
 850    warning_at, the diagnostic would be suppressed (unless
 851    -Wsystem-headers).  */
 852
 853 source_location
 854 expansion_point_location_if_in_system_header (source_location location)
 855 {
 856   if (in_system_header_at (location))
 857     location = linemap_resolve_location (line_table, location,
 858                                          LRK_MACRO_EXPANSION_POINT,
 859                                          NULL);
 860   return location;
 861 }
 862
 863 /* If LOCATION is a virtual location for a token coming from the expansion
 864    of a macro, unwind to the location of the expansion point of the macro.  */
 865
 866 source_location
 867 expansion_point_location (source_location location)
 868 {
 869   return linemap_resolve_location (line_table, location,
 870                                    LRK_MACRO_EXPANSION_POINT, NULL);
 871 }
 872
 873 /* Construct a location with caret at CARET, ranging from START to
 874    finish e.g.
 875
 876                  11111111112
 877         12345678901234567890
 878      522
 879      523   return foo + bar;
 880                   ~~~~^~~~~
 881      524
 882
 883    The location's caret is at the "+", line 523 column 15, but starts
 884    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
 885    of "bar" at column 19.  */
 886
 887 location_t
 888 make_location (location_t caret, location_t start, location_t finish)
 889 {
 890   location_t pure_loc = get_pure_location (caret);
 891   source_range src_range;
 892   src_range.m_start = get_start (start);
 893   src_range.m_finish = get_finish (finish);
 894   location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
 895                                                    pure_loc,
 896                                                    src_range,
 897                                                    NULL);
 898   return combined_loc;
 899 }
 900
 901 #define ONE_K 1024
 902 #define ONE_M (ONE_K * ONE_K)
 903
 904 /* Display a number as an integer multiple of either:
 905    - 1024, if said integer is >= to 10 K (in base 2)
 906    - 1024 * 1024, if said integer is >= 10 M in (base 2)
 907  */
 908 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
 909                   ? (x) \
 910                   : ((x) < 10 * ONE_M \
 911                      ? (x) / ONE_K \
 912                      : (x) / ONE_M)))
 913
 914 /* For a given integer, display either:
 915    - the character 'k', if the number is higher than 10 K (in base 2)
 916      but strictly lower than 10 M (in base 2)
 917    - the character 'M' if the number is higher than 10 M (in base2)
 918    - the charcter ' ' if the number is strictly lower  than 10 K  */
 919 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
 920
 921 /* Display an integer amount as multiple of 1K or 1M (in base 2).
 922    Display the correct unit (either k, M, or ' ') after the amount, as
 923    well.  */
 924 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
 925
 926 /* Dump statistics to stderr about the memory usage of the line_table
 927    set of line maps.  This also displays some statistics about macro
 928    expansion.  */
 929
 930 void
 931 dump_line_table_statistics (void)
 932 {
 933   struct linemap_stats s;
 934   long total_used_map_size,
 935     macro_maps_size,
 936     total_allocated_map_size;
 937
 938   memset (&s, 0, sizeof (s));
 939
 940   linemap_get_statistics (line_table, &s);
 941
 942   macro_maps_size = s.macro_maps_used_size
 943     + s.macro_maps_locations_size;
 944
 945   total_allocated_map_size = s.ordinary_maps_allocated_size
 946     + s.macro_maps_allocated_size
 947     + s.macro_maps_locations_size;
 948
 949   total_used_map_size = s.ordinary_maps_used_size
 950     + s.macro_maps_used_size
 951     + s.macro_maps_locations_size;
 952
 953   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
 954            s.num_expanded_macros);
 955   if (s.num_expanded_macros != 0)
 956     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
 957              s.num_macro_tokens / s.num_expanded_macros);
 958   fprintf (stderr,
 959            "\nLine Table allocations during the "
 960            "compilation process\n");
 961   fprintf (stderr, "Number of ordinary maps used:        %5ld%c\n",
 962            SCALE (s.num_ordinary_maps_used),
 963            STAT_LABEL (s.num_ordinary_maps_used));
 964   fprintf (stderr, "Ordinary map used size:              %5ld%c\n",
 965            SCALE (s.ordinary_maps_used_size),
 966            STAT_LABEL (s.ordinary_maps_used_size));
 967   fprintf (stderr, "Number of ordinary maps allocated:   %5ld%c\n",
 968            SCALE (s.num_ordinary_maps_allocated),
 969            STAT_LABEL (s.num_ordinary_maps_allocated));
 970   fprintf (stderr, "Ordinary maps allocated size:        %5ld%c\n",
 971            SCALE (s.ordinary_maps_allocated_size),
 972            STAT_LABEL (s.ordinary_maps_allocated_size));
 973   fprintf (stderr, "Number of macro maps used:           %5ld%c\n",
 974            SCALE (s.num_macro_maps_used),
 975            STAT_LABEL (s.num_macro_maps_used));
 976   fprintf (stderr, "Macro maps used size:                %5ld%c\n",
 977            SCALE (s.macro_maps_used_size),
 978            STAT_LABEL (s.macro_maps_used_size));
 979   fprintf (stderr, "Macro maps locations size:           %5ld%c\n",
 980            SCALE (s.macro_maps_locations_size),
 981            STAT_LABEL (s.macro_maps_locations_size));
 982   fprintf (stderr, "Macro maps size:                     %5ld%c\n",
 983            SCALE (macro_maps_size),
 984            STAT_LABEL (macro_maps_size));
 985   fprintf (stderr, "Duplicated maps locations size:      %5ld%c\n",
 986            SCALE (s.duplicated_macro_maps_locations_size),
 987            STAT_LABEL (s.duplicated_macro_maps_locations_size));
 988   fprintf (stderr, "Total allocated maps size:           %5ld%c\n",
 989            SCALE (total_allocated_map_size),
 990            STAT_LABEL (total_allocated_map_size));
 991   fprintf (stderr, "Total used maps size:                %5ld%c\n",
 992            SCALE (total_used_map_size),
 993            STAT_LABEL (total_used_map_size));
 994   fprintf (stderr, "Ad-hoc table size:                   %5ld%c\n",
 995            SCALE (s.adhoc_table_size),
 996            STAT_LABEL (s.adhoc_table_size));
 997   fprintf (stderr, "Ad-hoc table entries used:           %5ld\n",
 998            s.adhoc_table_entries_used);
 999   fprintf (stderr, "optimized_ranges: %i\n",
1000            line_table->num_optimized_ranges);
1001   fprintf (stderr, "unoptimized_ranges: %i\n",
1002            line_table->num_unoptimized_ranges);
1003
1004   fprintf (stderr, "\n");
1005 }
1006
1007 /* Get location one beyond the final location in ordinary map IDX.  */
1008
1009 static source_location
1010 get_end_location (struct line_maps *set, unsigned int idx)
1011 {
1012   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1013     return set->highest_location;
1014
1015   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1016   return MAP_START_LOCATION (next_map);
1017 }
1018
1019 /* Helper function for write_digit_row.  */
1020
1021 static void
1022 write_digit (FILE *stream, int digit)
1023 {
1024   fputc ('0' + (digit % 10), stream);
1025 }
1026
1027 /* Helper function for dump_location_info.
1028    Write a row of numbers to STREAM, numbering a source line,
1029    giving the units, tens, hundreds etc of the column number.  */
1030
1031 static void
1032 write_digit_row (FILE *stream, int indent,
1033                  const line_map_ordinary *map,
1034                  source_location loc, int max_col, int divisor)
1035 {
1036   fprintf (stream, "%*c", indent, ' ');
1037   fprintf (stream, "|");
1038   for (int column = 1; column < max_col; column++)
1039     {
1040       source_location column_loc = loc + (column << map->m_range_bits);
1041       write_digit (stream, column_loc / divisor);
1042     }
1043   fprintf (stream, "\n");
1044 }
1045
1046 /* Write a half-closed (START) / half-open (END) interval of
1047    source_location to STREAM.  */
1048
1049 static void
1050 dump_location_range (FILE *stream,
1051                      source_location start, source_location end)
1052 {
1053   fprintf (stream,
1054            "  source_location interval: %u <= loc < %u\n",
1055            start, end);
1056 }
1057
1058 /* Write a labelled description of a half-closed (START) / half-open (END)
1059    interval of source_location to STREAM.  */
1060
1061 static void
1062 dump_labelled_location_range (FILE *stream,
1063                               const char *name,
1064                               source_location start, source_location end)
1065 {
1066   fprintf (stream, "%s\n", name);
1067   dump_location_range (stream, start, end);
1068   fprintf (stream, "\n");
1069 }
1070
1071 /* Write a visualization of the locations in the line_table to STREAM.  */
1072
1073 void
1074 dump_location_info (FILE *stream)
1075 {
1076   /* Visualize the reserved locations.  */
1077   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1078                                 0, RESERVED_LOCATION_COUNT);
1079
1080   /* Visualize the ordinary line_map instances, rendering the sources. */
1081   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1082     {
1083       source_location end_location = get_end_location (line_table, idx);
1084       /* half-closed: doesn't include this one. */
1085
1086       const line_map_ordinary *map
1087         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1088       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1089       dump_location_range (stream,
1090                            MAP_START_LOCATION (map), end_location);
1091       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1092       fprintf (stream, "  starting at line: %i\n",
1093                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1094       fprintf (stream, "  column and range bits: %i\n",
1095                map->m_column_and_range_bits);
1096       fprintf (stream, "  column bits: %i\n",
1097                map->m_column_and_range_bits - map->m_range_bits);
1098       fprintf (stream, "  range bits: %i\n",
1099                map->m_range_bits);
1100
1101       /* Render the span of source lines that this "map" covers.  */
1102       for (source_location loc = MAP_START_LOCATION (map);
1103            loc < end_location;
1104            loc += (1 << map->m_range_bits) )
1105         {
1106           gcc_assert (pure_location_p (line_table, loc) );
1107
1108           expanded_location exploc
1109             = linemap_expand_location (line_table, map, loc);
1110
1111           if (0 == exploc.column)
1112             {
1113               /* Beginning of a new source line: draw the line.  */
1114
1115               int line_size;
1116               const char *line_text = location_get_source_line (exploc.file,
1117                                                                 exploc.line,
1118                                                                 &line_size);
1119               if (!line_text)
1120                 break;
1121               fprintf (stream,
1122                        "%s:%3i|loc:%5i|%.*s\n",
1123                        exploc.file, exploc.line,
1124                        loc,
1125                        line_size, line_text);
1126
1127               /* "loc" is at column 0, which means "the whole line".
1128                  Render the locations *within* the line, by underlining
1129                  it, showing the source_location numeric values
1130                  at each column.  */
1131               int max_col = (1 << map->m_column_and_range_bits) - 1;
1132               if (max_col > line_size)
1133                 max_col = line_size + 1;
1134
1135               int indent = 14 + strlen (exploc.file);
1136
1137               /* Thousands.  */
1138               if (end_location > 999)
1139                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1140
1141               /* Hundreds.  */
1142               if (end_location > 99)
1143                 write_digit_row (stream, indent, map, loc, max_col, 100);
1144
1145               /* Tens.  */
1146               write_digit_row (stream, indent, map, loc, max_col, 10);
1147
1148               /* Units.  */
1149               write_digit_row (stream, indent, map, loc, max_col, 1);
1150             }
1151         }
1152       fprintf (stream, "\n");
1153     }
1154
1155   /* Visualize unallocated values.  */
1156   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1157                                 line_table->highest_location,
1158                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1159
1160   /* Visualize the macro line_map instances, rendering the sources. */
1161   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1162     {
1163       /* Each macro map that is allocated owns source_location values
1164          that are *lower* that the one before them.
1165          Hence it's meaningful to view them either in order of ascending
1166          source locations, or in order of ascending macro map index.  */
1167       const bool ascending_source_locations = true;
1168       unsigned int idx = (ascending_source_locations
1169                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1170                           : i);
1171       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1172       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1173                idx,
1174                linemap_map_get_macro_name (map),
1175                MACRO_MAP_NUM_MACRO_TOKENS (map));
1176       dump_location_range (stream,
1177                            map->start_location,
1178                            (map->start_location
1179                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1180       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1181               "expansion point is location %i",
1182               MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1183       fprintf (stream, "  map->start_location: %u\n",
1184                map->start_location);
1185
1186       fprintf (stream, "  macro_locations:\n");
1187       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1188         {
1189           source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1190           source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1191
1192           /* linemap_add_macro_token encodes token numbers in an expansion
1193              by putting them after MAP_START_LOCATION. */
1194
1195           /* I'm typically seeing 4 uninitialized entries at the end of
1196              0xafafafaf.
1197              This appears to be due to macro.c:replace_args
1198              adding 2 extra args for padding tokens; presumably there may
1199              be a leading and/or trailing padding token injected,
1200              each for 2 more location slots.
1201              This would explain there being up to 4 source_locations slots
1202              that may be uninitialized.  */
1203
1204           fprintf (stream, "    %u: %u, %u\n",
1205                    i,
1206                    x,
1207                    y);
1208           if (x == y)
1209             {
1210               if (x < MAP_START_LOCATION (map))
1211                 inform (x, "token %u has x-location == y-location == %u", i, x);
1212               else
1213                 fprintf (stream,
1214                          "x-location == y-location == %u encodes token # %u\n",
1215                          x, x - MAP_START_LOCATION (map));
1216                 }
1217           else
1218             {
1219               inform (x, "token %u has x-location == %u", i, x);
1220               inform (x, "token %u has y-location == %u", i, y);
1221             }
1222         }
1223       fprintf (stream, "\n");
1224     }
1225
1226   /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1227      macro map, presumably due to an off-by-one error somewhere
1228      between the logic in linemap_enter_macro and
1229      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1230   dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1231                                 MAX_SOURCE_LOCATION,
1232                                 MAX_SOURCE_LOCATION + 1);
1233
1234   /* Visualize ad-hoc values.  */
1235   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1236                                 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1237 }
1238
1239 /* string_concat's constructor.  */
1240
1241 string_concat::string_concat (int num, location_t *locs)
1242   : m_num (num)
1243 {
1244   m_locs = ggc_vec_alloc <location_t> (num);
1245   for (int i = 0; i < num; i++)
1246     m_locs[i] = locs[i];
1247 }
1248
1249 /* string_concat_db's constructor.  */
1250
1251 string_concat_db::string_concat_db ()
1252 {
1253   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1254 }
1255
1256 /* Record that a string concatenation occurred, covering NUM
1257    string literal tokens.  LOCS is an array of size NUM, containing the
1258    locations of the tokens.  A copy of LOCS is taken.  */
1259
1260 void
1261 string_concat_db::record_string_concatenation (int num, location_t *locs)
1262 {
1263   gcc_assert (num > 1);
1264   gcc_assert (locs);
1265
1266   location_t key_loc = get_key_loc (locs[0]);
1267
1268   string_concat *concat
1269     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1270   m_table->put (key_loc, concat);
1271 }
1272
1273 /* Determine if LOC was the location of the the initial token of a
1274    concatenation of string literal tokens.
1275    If so, *OUT_NUM is written to with the number of tokens, and
1276    *OUT_LOCS with the location of an array of locations of the
1277    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1278    storage owned by the string_concat_db.
1279    Otherwise, return false.  */
1280
1281 bool
1282 string_concat_db::get_string_concatenation (location_t loc,
1283                                             int *out_num,
1284                                             location_t **out_locs)
1285 {
1286   gcc_assert (out_num);
1287   gcc_assert (out_locs);
1288
1289   location_t key_loc = get_key_loc (loc);
1290
1291   string_concat **concat = m_table->get (key_loc);
1292   if (!concat)
1293     return false;
1294
1295   *out_num = (*concat)->m_num;
1296   *out_locs =(*concat)->m_locs;
1297   return true;
1298 }
1299
1300 /* Internal function.  Canonicalize LOC into a form suitable for
1301    use as a key within the database, stripping away macro expansion,
1302    ad-hoc information, and range information, using the location of
1303    the start of LOC within an ordinary linemap.  */
1304
1305 location_t
1306 string_concat_db::get_key_loc (location_t loc)
1307 {
1308   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1309                                   NULL);
1310
1311   loc = get_range_from_loc (line_table, loc).m_start;
1312
1313   return loc;
1314 }
1315
1316 /* Helper class for use within get_substring_ranges_for_loc.
1317    An vec of cpp_string with responsibility for releasing all of the
1318    str->text for each str in the vector.  */
1319
1320 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1321 {
1322  public:
1323   auto_cpp_string_vec (int alloc)
1324     : auto_vec <cpp_string> (alloc) {}
1325
1326   ~auto_cpp_string_vec ()
1327   {
1328     /* Clean up the copies within this vec.  */
1329     int i;
1330     cpp_string *str;
1331     FOR_EACH_VEC_ELT (*this, i, str)
1332       free (const_cast <unsigned char *> (str->text));
1333   }
1334 };
1335
1336 /* Attempt to populate RANGES with source location information on the
1337    individual characters within the string literal found at STRLOC.
1338    If CONCATS is non-NULL, then any string literals that the token at
1339    STRLOC  was concatenated with are also added to RANGES.
1340
1341    Return NULL if successful, or an error message if any errors occurred (in
1342    which case RANGES may be only partially populated and should not
1343    be used).
1344
1345    This is implemented by re-parsing the relevant source line(s).  */
1346
1347 static const char *
1348 get_substring_ranges_for_loc (cpp_reader *pfile,
1349                               string_concat_db *concats,
1350                               location_t strloc,
1351                               enum cpp_ttype type,
1352                               cpp_substring_ranges &ranges)
1353 {
1354   gcc_assert (pfile);
1355
1356   if (strloc == UNKNOWN_LOCATION)
1357     return "unknown location";
1358
1359   /* Reparsing the strings requires accurate location information.
1360      If -ftrack-macro-expansion has been overridden from its default
1361      of 2, then we might have a location of a macro expansion point,
1362      rather than the location of the literal itself.
1363      Avoid this by requiring that we have full macro expansion tracking
1364      for substring locations to be available.  */
1365   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1366     return "track_macro_expansion != 2";
1367
1368   /* If #line or # 44 "file"-style directives are present, then there's
1369      no guarantee that the line numbers we have can be used to locate
1370      the strings.  For example, we might have a .i file with # directives
1371      pointing back to lines within a .c file, but the .c file might
1372      have been edited since the .i file was created.
1373      In such a case, the safest course is to disable on-demand substring
1374      locations.  */
1375   if (line_table->seen_line_directive)
1376     return "seen line directive";
1377
1378   /* If string concatenation has occurred at STRLOC, get the locations
1379      of all of the literal tokens making up the compound string.
1380      Otherwise, just use STRLOC.  */
1381   int num_locs = 1;
1382   location_t *strlocs = &strloc;
1383   if (concats)
1384     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1385
1386   auto_cpp_string_vec strs (num_locs);
1387   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1388   for (int i = 0; i < num_locs; i++)
1389     {
1390       /* Get range of strloc.  We will use it to locate the start and finish
1391          of the literal token within the line.  */
1392       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1393
1394       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1395         /* If the string is within a macro expansion, we can't get at the
1396            end location.  */
1397         return "macro expansion";
1398
1399       if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1400         /* If so, we can't reliably determine where the token started within
1401            its line.  */
1402         return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1403
1404       if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1405         /* If so, we can't reliably determine where the token finished within
1406            its line.  */
1407         return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1408
1409       expanded_location start
1410         = expand_location_to_spelling_point (src_range.m_start);
1411       expanded_location finish
1412         = expand_location_to_spelling_point (src_range.m_finish);
1413       if (start.file != finish.file)
1414         return "range endpoints are in different files";
1415       if (start.line != finish.line)
1416         return "range endpoints are on different lines";
1417       if (start.column > finish.column)
1418         return "range endpoints are reversed";
1419
1420       int line_width;
1421       const char *line = location_get_source_line (start.file, start.line,
1422                                                    &line_width);
1423       if (line == NULL)
1424         return "unable to read source line";
1425
1426       /* Determine the location of the literal (including quotes
1427          and leading prefix chars, such as the 'u' in a u""
1428          token).  */
1429       const char *literal = line + start.column - 1;
1430       int literal_length = finish.column - start.column + 1;
1431
1432       /* Ensure that we don't crash if we got the wrong location.  */
1433       if (line_width < (start.column - 1 + literal_length))
1434         return "line is not wide enough";
1435
1436       cpp_string from;
1437       from.len = literal_length;
1438       /* Make a copy of the literal, to avoid having to rely on
1439          the lifetime of the copy of the line within the cache.
1440          This will be released by the auto_cpp_string_vec dtor.  */
1441       from.text = XDUPVEC (unsigned char, literal, literal_length);
1442       strs.safe_push (from);
1443
1444       /* For very long lines, a new linemap could have started
1445          halfway through the token.
1446          Ensure that the loc_reader uses the linemap of the
1447          *end* of the token for its start location.  */
1448       const line_map_ordinary *final_ord_map;
1449       linemap_resolve_location (line_table, src_range.m_finish,
1450                                 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1451       location_t start_loc
1452         = linemap_position_for_line_and_column (line_table, final_ord_map,
1453                                                 start.line, start.column);
1454
1455       cpp_string_location_reader loc_reader (start_loc, line_table);
1456       loc_readers.safe_push (loc_reader);
1457     }
1458
1459   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1460   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1461                                                  loc_readers.address (),
1462                                                  num_locs, &ranges, type);
1463   if (err)
1464     return err;
1465
1466   /* Success: "ranges" should now contain information on the string.  */
1467   return NULL;
1468 }
1469
1470 /* Attempt to populate *OUT_LOC with source location information on the
1471    given characters within the string literal found at STRLOC.
1472    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1473    character set.
1474
1475    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1476    and string literal "012345\n789"
1477    *OUT_LOC is written to with:
1478      "012345\n789"
1479          ~^~~~~
1480
1481    If CONCATS is non-NULL, then any string literals that the token at
1482    STRLOC was concatenated with are also considered.
1483
1484    This is implemented by re-parsing the relevant source line(s).
1485
1486    Return NULL if successful, or an error message if any errors occurred.
1487    Error messages are intended for GCC developers (to help debugging) rather
1488    than for end-users.  */
1489
1490 const char *
1491 get_source_location_for_substring (cpp_reader *pfile,
1492                                    string_concat_db *concats,
1493                                    location_t strloc,
1494                                    enum cpp_ttype type,
1495                                    int caret_idx, int start_idx, int end_idx,
1496                                    source_location *out_loc)
1497 {
1498   gcc_checking_assert (caret_idx >= 0);
1499   gcc_checking_assert (start_idx >= 0);
1500   gcc_checking_assert (end_idx >= 0);
1501   gcc_assert (out_loc);
1502
1503   cpp_substring_ranges ranges;
1504   const char *err
1505     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1506   if (err)
1507     return err;
1508
1509   if (caret_idx >= ranges.get_num_ranges ())
1510     return "caret_idx out of range";
1511   if (start_idx >= ranges.get_num_ranges ())
1512     return "start_idx out of range";
1513   if (end_idx >= ranges.get_num_ranges ())
1514     return "end_idx out of range";
1515
1516   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1517                             ranges.get_range (start_idx).m_start,
1518                             ranges.get_range (end_idx).m_finish);
1519   return NULL;
1520 }
1521
1522 #if CHECKING_P
1523
1524 namespace selftest {
1525
1526 /* Selftests of location handling.  */
1527
1528 /* Attempt to populate *OUT_RANGE with source location information on the
1529    given character within the string literal found at STRLOC.
1530    CHAR_IDX refers to an offset within the execution character set.
1531    If CONCATS is non-NULL, then any string literals that the token at
1532    STRLOC was concatenated with are also considered.
1533
1534    This is implemented by re-parsing the relevant source line(s).
1535
1536    Return NULL if successful, or an error message if any errors occurred.
1537    Error messages are intended for GCC developers (to help debugging) rather
1538    than for end-users.  */
1539
1540 static const char *
1541 get_source_range_for_char (cpp_reader *pfile,
1542                            string_concat_db *concats,
1543                            location_t strloc,
1544                            enum cpp_ttype type,
1545                            int char_idx,
1546                            source_range *out_range)
1547 {
1548   gcc_checking_assert (char_idx >= 0);
1549   gcc_assert (out_range);
1550
1551   cpp_substring_ranges ranges;
1552   const char *err
1553     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1554   if (err)
1555     return err;
1556
1557   if (char_idx >= ranges.get_num_ranges ())
1558     return "char_idx out of range";
1559
1560   *out_range = ranges.get_range (char_idx);
1561   return NULL;
1562 }
1563
1564 /* As get_source_range_for_char, but write to *OUT the number
1565    of ranges that are available.  */
1566
1567 static const char *
1568 get_num_source_ranges_for_substring (cpp_reader *pfile,
1569                                      string_concat_db *concats,
1570                                      location_t strloc,
1571                                      enum cpp_ttype type,
1572                                      int *out)
1573 {
1574   gcc_assert (out);
1575
1576   cpp_substring_ranges ranges;
1577   const char *err
1578     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1579
1580   if (err)
1581     return err;
1582
1583   *out = ranges.get_num_ranges ();
1584   return NULL;
1585 }
1586
1587 /* Selftests of location handling.  */
1588
1589 /* Helper function for verifying location data: when location_t
1590    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1591    as having column 0.  */
1592
1593 static bool
1594 should_have_column_data_p (location_t loc)
1595 {
1596   if (IS_ADHOC_LOC (loc))
1597     loc = get_location_from_adhoc_loc (line_table, loc);
1598   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1599     return false;
1600   return true;
1601 }
1602
1603 /* Selftest for should_have_column_data_p.  */
1604
1605 static void
1606 test_should_have_column_data_p ()
1607 {
1608   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1609   ASSERT_TRUE
1610     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1611   ASSERT_FALSE
1612     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1613 }
1614
1615 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1616    on LOC.  */
1617
1618 static void
1619 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1620               location_t loc)
1621 {
1622   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1623   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1624   /* If location_t values are sufficiently high, then column numbers
1625      will be unavailable and LOCATION_COLUMN (loc) will be 0.
1626      When close to the threshold, column numbers *may* be present: if
1627      the final linemap before the threshold contains a line that straddles
1628      the threshold, locations in that line have column information.  */
1629   if (should_have_column_data_p (loc))
1630     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1631 }
1632
1633 /* Various selftests involve constructing a line table and one or more
1634    line maps within it.
1635
1636    For maximum test coverage we want to run these tests with a variety
1637    of situations:
1638    - line_table->default_range_bits: some frontends use a non-zero value
1639    and others use zero
1640    - the fallback modes within line-map.c: there are various threshold
1641    values for source_location/location_t beyond line-map.c changes
1642    behavior (disabling of the range-packing optimization, disabling
1643    of column-tracking).  We can exercise these by starting the line_table
1644    at interesting values at or near these thresholds.
1645
1646    The following struct describes a particular case within our test
1647    matrix.  */
1648
1649 struct line_table_case
1650 {
1651   line_table_case (int default_range_bits, int base_location)
1652   : m_default_range_bits (default_range_bits),
1653     m_base_location (base_location)
1654   {}
1655
1656   int m_default_range_bits;
1657   int m_base_location;
1658 };
1659
1660 /* Constructor.  Store the old value of line_table, and create a new
1661    one, using sane defaults.  */
1662
1663 line_table_test::line_table_test ()
1664 {
1665   gcc_assert (saved_line_table == NULL);
1666   saved_line_table = line_table;
1667   line_table = ggc_alloc<line_maps> ();
1668   linemap_init (line_table, BUILTINS_LOCATION);
1669   gcc_assert (saved_line_table->reallocator);
1670   line_table->reallocator = saved_line_table->reallocator;
1671   gcc_assert (saved_line_table->round_alloc_size);
1672   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1673   line_table->default_range_bits = 0;
1674 }
1675
1676 /* Constructor.  Store the old value of line_table, and create a new
1677    one, using the sitation described in CASE_.  */
1678
1679 line_table_test::line_table_test (const line_table_case &case_)
1680 {
1681   gcc_assert (saved_line_table == NULL);
1682   saved_line_table = line_table;
1683   line_table = ggc_alloc<line_maps> ();
1684   linemap_init (line_table, BUILTINS_LOCATION);
1685   gcc_assert (saved_line_table->reallocator);
1686   line_table->reallocator = saved_line_table->reallocator;
1687   gcc_assert (saved_line_table->round_alloc_size);
1688   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1689   line_table->default_range_bits = case_.m_default_range_bits;
1690   if (case_.m_base_location)
1691     {
1692       line_table->highest_location = case_.m_base_location;
1693       line_table->highest_line = case_.m_base_location;
1694     }
1695 }
1696
1697 /* Destructor.  Restore the old value of line_table.  */
1698
1699 line_table_test::~line_table_test ()
1700 {
1701   gcc_assert (saved_line_table != NULL);
1702   line_table = saved_line_table;
1703   saved_line_table = NULL;
1704 }
1705
1706 /* Verify basic operation of ordinary linemaps.  */
1707
1708 static void
1709 test_accessing_ordinary_linemaps (const line_table_case &case_)
1710 {
1711   line_table_test ltt (case_);
1712
1713   /* Build a simple linemap describing some locations. */
1714   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1715
1716   linemap_line_start (line_table, 1, 100);
1717   location_t loc_a = linemap_position_for_column (line_table, 1);
1718   location_t loc_b = linemap_position_for_column (line_table, 23);
1719
1720   linemap_line_start (line_table, 2, 100);
1721   location_t loc_c = linemap_position_for_column (line_table, 1);
1722   location_t loc_d = linemap_position_for_column (line_table, 17);
1723
1724   /* Example of a very long line.  */
1725   linemap_line_start (line_table, 3, 2000);
1726   location_t loc_e = linemap_position_for_column (line_table, 700);
1727
1728   /* Transitioning back to a short line.  */
1729   linemap_line_start (line_table, 4, 0);
1730   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1731
1732   if (should_have_column_data_p (loc_back_to_short))
1733     {
1734       /* Verify that we switched to short lines in the linemap.  */
1735       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1736       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1737     }
1738
1739   /* Example of a line that will eventually be seen to be longer
1740      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1741      below that.  */
1742   linemap_line_start (line_table, 5, 2000);
1743
1744   location_t loc_start_of_very_long_line
1745     = linemap_position_for_column (line_table, 2000);
1746   location_t loc_too_wide
1747     = linemap_position_for_column (line_table, 4097);
1748   location_t loc_too_wide_2
1749     = linemap_position_for_column (line_table, 4098);
1750
1751   /* ...and back to a sane line length.  */
1752   linemap_line_start (line_table, 6, 100);
1753   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1754
1755   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1756
1757   /* Multiple files.  */
1758   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1759   linemap_line_start (line_table, 1, 200);
1760   location_t loc_f = linemap_position_for_column (line_table, 150);
1761   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1762
1763   /* Verify that we can recover the location info.  */
1764   assert_loceq ("foo.c", 1, 1, loc_a);
1765   assert_loceq ("foo.c", 1, 23, loc_b);
1766   assert_loceq ("foo.c", 2, 1, loc_c);
1767   assert_loceq ("foo.c", 2, 17, loc_d);
1768   assert_loceq ("foo.c", 3, 700, loc_e);
1769   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1770
1771   /* In the very wide line, the initial location should be fully tracked.  */
1772   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1773   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1774      be disabled.  */
1775   assert_loceq ("foo.c", 5, 0, loc_too_wide);
1776   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1777   /*...and column-tracking should be re-enabled for subsequent lines.  */
1778   assert_loceq ("foo.c", 6, 10, loc_sane_again);
1779
1780   assert_loceq ("bar.c", 1, 150, loc_f);
1781
1782   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1783   ASSERT_TRUE (pure_location_p (line_table, loc_a));
1784
1785   /* Verify using make_location to build a range, and extracting data
1786      back from it.  */
1787   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1788   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1789   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1790   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1791   ASSERT_EQ (loc_b, src_range.m_start);
1792   ASSERT_EQ (loc_d, src_range.m_finish);
1793 }
1794
1795 /* Verify various properties of UNKNOWN_LOCATION.  */
1796
1797 static void
1798 test_unknown_location ()
1799 {
1800   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1801   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1802   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1803 }
1804
1805 /* Verify various properties of BUILTINS_LOCATION.  */
1806
1807 static void
1808 test_builtins ()
1809 {
1810   assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1811   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1812 }
1813
1814 /* Regression test for make_location.
1815    Ensure that we use pure locations for the start/finish of the range,
1816    rather than storing a packed or ad-hoc range as the start/finish.  */
1817
1818 static void
1819 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1820 {
1821   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1822      with C++ frontend.
1823      ....................0000000001111111111222.
1824      ....................1234567890123456789012.  */
1825   const char *content = "     r += !aaa == bbb;\n";
1826   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1827   line_table_test ltt (case_);
1828   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1829
1830   const location_t c11 = linemap_position_for_column (line_table, 11);
1831   const location_t c12 = linemap_position_for_column (line_table, 12);
1832   const location_t c13 = linemap_position_for_column (line_table, 13);
1833   const location_t c14 = linemap_position_for_column (line_table, 14);
1834   const location_t c21 = linemap_position_for_column (line_table, 21);
1835
1836   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1837     return;
1838
1839   /* Use column 13 for the caret location, arbitrarily, to verify that we
1840      handle start != caret.  */
1841   const location_t aaa = make_location (c13, c12, c14);
1842   ASSERT_EQ (c13, get_pure_location (aaa));
1843   ASSERT_EQ (c12, get_start (aaa));
1844   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1845   ASSERT_EQ (c14, get_finish (aaa));
1846   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1847
1848   /* Make a location using a location with a range as the start-point.  */
1849   const location_t not_aaa = make_location (c11, aaa, c14);
1850   ASSERT_EQ (c11, get_pure_location (not_aaa));
1851   /* It should use the start location of the range, not store the range
1852      itself.  */
1853   ASSERT_EQ (c12, get_start (not_aaa));
1854   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1855   ASSERT_EQ (c14, get_finish (not_aaa));
1856   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1857
1858   /* Similarly, make a location with a range as the end-point.  */
1859   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1860   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1861   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1862   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1863   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1864   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1865   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1866   /* It should use the finish location of the range, not store the range
1867      itself.  */
1868   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1869   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1870   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1871   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1872   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1873 }
1874
1875 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
1876
1877 static void
1878 test_reading_source_line ()
1879 {
1880   /* Create a tempfile and write some text to it.  */
1881   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1882                         "01234567890123456789\n"
1883                         "This is the test text\n"
1884                         "This is the 3rd line");
1885
1886   /* Read back a specific line from the tempfile.  */
1887   int line_size;
1888   const char *source_line = location_get_source_line (tmp.get_filename (),
1889                                                       3, &line_size);
1890   ASSERT_TRUE (source_line != NULL);
1891   ASSERT_EQ (20, line_size);
1892   ASSERT_TRUE (!strncmp ("This is the 3rd line",
1893                          source_line, line_size));
1894
1895   source_line = location_get_source_line (tmp.get_filename (),
1896                                           2, &line_size);
1897   ASSERT_TRUE (source_line != NULL);
1898   ASSERT_EQ (21, line_size);
1899   ASSERT_TRUE (!strncmp ("This is the test text",
1900                          source_line, line_size));
1901
1902   source_line = location_get_source_line (tmp.get_filename (),
1903                                           4, &line_size);
1904   ASSERT_TRUE (source_line == NULL);
1905 }
1906
1907 /* Tests of lexing.  */
1908
1909 /* Verify that token TOK from PARSER has cpp_token_as_text
1910    equal to EXPECTED_TEXT.  */
1911
1912 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
1913   SELFTEST_BEGIN_STMT                                                   \
1914     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
1915     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
1916   SELFTEST_END_STMT
1917
1918 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1919    and ranges from EXP_START_COL to EXP_FINISH_COL.
1920    Use LOC as the effective location of the selftest.  */
1921
1922 static void
1923 assert_token_loc_eq (const location &loc,
1924                      const cpp_token *tok,
1925                      const char *exp_filename, int exp_linenum,
1926                      int exp_start_col, int exp_finish_col)
1927 {
1928   location_t tok_loc = tok->src_loc;
1929   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1930   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1931
1932   /* If location_t values are sufficiently high, then column numbers
1933      will be unavailable.  */
1934   if (!should_have_column_data_p (tok_loc))
1935     return;
1936
1937   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1938   source_range tok_range = get_range_from_loc (line_table, tok_loc);
1939   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1940   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1941 }
1942
1943 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1944    SELFTEST_LOCATION as the effective location of the selftest.  */
1945
1946 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1947                             EXP_START_COL, EXP_FINISH_COL) \
1948   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1949                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1950
1951 /* Test of lexing a file using libcpp, verifying tokens and their
1952    location information.  */
1953
1954 static void
1955 test_lexer (const line_table_case &case_)
1956 {
1957   /* Create a tempfile and write some text to it.  */
1958   const char *content =
1959     /*00000000011111111112222222222333333.3333444444444.455555555556
1960       12345678901234567890123456789012345.6789012345678.901234567890.  */
1961     ("test_name /* c-style comment */\n"
1962      "                                  \"test literal\"\n"
1963      " // test c++-style comment\n"
1964      "   42\n");
1965   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1966
1967   line_table_test ltt (case_);
1968
1969   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1970
1971   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1972   ASSERT_NE (fname, NULL);
1973
1974   /* Verify that we get the expected tokens back, with the correct
1975      location information.  */
1976
1977   location_t loc;
1978   const cpp_token *tok;
1979   tok = cpp_get_token_with_location (parser, &loc);
1980   ASSERT_NE (tok, NULL);
1981   ASSERT_EQ (tok->type, CPP_NAME);
1982   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1983   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1984
1985   tok = cpp_get_token_with_location (parser, &loc);
1986   ASSERT_NE (tok, NULL);
1987   ASSERT_EQ (tok->type, CPP_STRING);
1988   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1989   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1990
1991   tok = cpp_get_token_with_location (parser, &loc);
1992   ASSERT_NE (tok, NULL);
1993   ASSERT_EQ (tok->type, CPP_NUMBER);
1994   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1995   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1996
1997   tok = cpp_get_token_with_location (parser, &loc);
1998   ASSERT_NE (tok, NULL);
1999   ASSERT_EQ (tok->type, CPP_EOF);
2000
2001   cpp_finish (parser, NULL);
2002   cpp_destroy (parser);
2003 }
2004
2005 /* Forward decls.  */
2006
2007 struct lexer_test;
2008 class lexer_test_options;
2009
2010 /* A class for specifying options of a lexer_test.
2011    The "apply" vfunc is called during the lexer_test constructor.  */
2012
2013 class lexer_test_options
2014 {
2015  public:
2016   virtual void apply (lexer_test &) = 0;
2017 };
2018
2019 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2020    in its dtor.
2021
2022    This is needed by struct lexer_test to ensure that the cleanup of the
2023    cpp_reader happens *after* the cleanup of the temp_source_file.  */
2024
2025 class cpp_reader_ptr
2026 {
2027  public:
2028   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2029
2030   ~cpp_reader_ptr ()
2031   {
2032     cpp_finish (m_ptr, NULL);
2033     cpp_destroy (m_ptr);
2034   }
2035
2036   operator cpp_reader * () const { return m_ptr; }
2037
2038  private:
2039   cpp_reader *m_ptr;
2040 };
2041
2042 /* A struct for writing lexer tests.  */
2043
2044 struct lexer_test
2045 {
2046   lexer_test (const line_table_case &case_, const char *content,
2047               lexer_test_options *options);
2048   ~lexer_test ();
2049
2050   const cpp_token *get_token ();
2051
2052   /* The ordering of these fields matters.
2053      The line_table_test must be first, since the cpp_reader_ptr
2054      uses it.
2055      The cpp_reader must be cleaned up *after* the temp_source_file
2056      since the filenames in input.c's input cache are owned by the
2057      cpp_reader; in particular, when ~temp_source_file evicts the
2058      filename the filenames must still be alive.  */
2059   line_table_test m_ltt;
2060   cpp_reader_ptr m_parser;
2061   temp_source_file m_tempfile;
2062   string_concat_db m_concats;
2063   bool m_implicitly_expect_EOF;
2064 };
2065
2066 /* Use an EBCDIC encoding for the execution charset, specifically
2067    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2068
2069    This exercises iconv integration within libcpp.
2070    Not every build of iconv supports the given charset,
2071    so we need to flag this error and handle it gracefully.  */
2072
2073 class ebcdic_execution_charset : public lexer_test_options
2074 {
2075  public:
2076   ebcdic_execution_charset () : m_num_iconv_errors (0)
2077     {
2078       gcc_assert (s_singleton == NULL);
2079       s_singleton = this;
2080     }
2081   ~ebcdic_execution_charset ()
2082     {
2083       gcc_assert (s_singleton == this);
2084       s_singleton = NULL;
2085     }
2086
2087   void apply (lexer_test &test) FINAL OVERRIDE
2088   {
2089     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2090     cpp_opts->narrow_charset = "IBM1047";
2091
2092     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2093     callbacks->error = on_error;
2094   }
2095
2096   static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2097                         int level ATTRIBUTE_UNUSED,
2098                         int reason ATTRIBUTE_UNUSED,
2099                         rich_location *richloc ATTRIBUTE_UNUSED,
2100                         const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2101     ATTRIBUTE_FPTR_PRINTF(5,0)
2102   {
2103     gcc_assert (s_singleton);
2104     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2105     const char *msg = "conversion from %s to %s not supported by iconv";
2106 #ifdef ENABLE_NLS
2107     msg = dgettext ("cpplib", msg);
2108 #endif
2109     /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2110        when the local iconv build doesn't support the conversion.  */
2111     if (strcmp (msgid, msg) == 0)
2112       {
2113         s_singleton->m_num_iconv_errors++;
2114         return true;
2115       }
2116
2117     /* Otherwise, we have an unexpected error.  */
2118     abort ();
2119   }
2120
2121   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2122
2123  private:
2124   static ebcdic_execution_charset *s_singleton;
2125   int m_num_iconv_errors;
2126 };
2127
2128 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2129
2130 /* A lexer_test_options subclass that records a list of error
2131    messages emitted by the lexer.  */
2132
2133 class lexer_error_sink : public lexer_test_options
2134 {
2135  public:
2136   lexer_error_sink ()
2137   {
2138     gcc_assert (s_singleton == NULL);
2139     s_singleton = this;
2140   }
2141   ~lexer_error_sink ()
2142   {
2143     gcc_assert (s_singleton == this);
2144     s_singleton = NULL;
2145
2146     int i;
2147     char *str;
2148     FOR_EACH_VEC_ELT (m_errors, i, str)
2149       free (str);
2150   }
2151
2152   void apply (lexer_test &test) FINAL OVERRIDE
2153   {
2154     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2155     callbacks->error = on_error;
2156   }
2157
2158   static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2159                         int level ATTRIBUTE_UNUSED,
2160                         int reason ATTRIBUTE_UNUSED,
2161                         rich_location *richloc ATTRIBUTE_UNUSED,
2162                         const char *msgid, va_list *ap)
2163     ATTRIBUTE_FPTR_PRINTF(5,0)
2164   {
2165     char *msg = xvasprintf (msgid, *ap);
2166     s_singleton->m_errors.safe_push (msg);
2167     return true;
2168   }
2169
2170   auto_vec<char *> m_errors;
2171
2172  private:
2173   static lexer_error_sink *s_singleton;
2174 };
2175
2176 lexer_error_sink *lexer_error_sink::s_singleton;
2177
2178 /* Constructor.  Override line_table with a new instance based on CASE_,
2179    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2180    start parsing the tempfile.  */
2181
2182 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2183                         lexer_test_options *options)
2184 : m_ltt (case_),
2185   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2186   /* Create a tempfile and write the text to it.  */
2187   m_tempfile (SELFTEST_LOCATION, ".c", content),
2188   m_concats (),
2189   m_implicitly_expect_EOF (true)
2190 {
2191   if (options)
2192     options->apply (*this);
2193
2194   cpp_init_iconv (m_parser);
2195
2196   /* Parse the file.  */
2197   const char *fname = cpp_read_main_file (m_parser,
2198                                           m_tempfile.get_filename ());
2199   ASSERT_NE (fname, NULL);
2200 }
2201
2202 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2203
2204 lexer_test::~lexer_test ()
2205 {
2206   location_t loc;
2207   const cpp_token *tok;
2208
2209   if (m_implicitly_expect_EOF)
2210     {
2211       tok = cpp_get_token_with_location (m_parser, &loc);
2212       ASSERT_NE (tok, NULL);
2213       ASSERT_EQ (tok->type, CPP_EOF);
2214     }
2215 }
2216
2217 /* Get the next token from m_parser.  */
2218
2219 const cpp_token *
2220 lexer_test::get_token ()
2221 {
2222   location_t loc;
2223   const cpp_token *tok;
2224
2225   tok = cpp_get_token_with_location (m_parser, &loc);
2226   ASSERT_NE (tok, NULL);
2227   return tok;
2228 }
2229
2230 /* Verify that locations within string literals are correctly handled.  */
2231
2232 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2233    using the string concatenation database for TEST.
2234
2235    Assert that the character at index IDX is on EXPECTED_LINE,
2236    and that it begins at column EXPECTED_START_COL and ends at
2237    EXPECTED_FINISH_COL (unless the locations are beyond
2238    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2239    columns).  */
2240
2241 static void
2242 assert_char_at_range (const location &loc,
2243                       lexer_test& test,
2244                       location_t strloc, enum cpp_ttype type, int idx,
2245                       int expected_line, int expected_start_col,
2246                       int expected_finish_col)
2247 {
2248   cpp_reader *pfile = test.m_parser;
2249   string_concat_db *concats = &test.m_concats;
2250
2251   source_range actual_range = source_range();
2252   const char *err
2253     = get_source_range_for_char (pfile, concats, strloc, type, idx,
2254                                  &actual_range);
2255   if (should_have_column_data_p (strloc))
2256     ASSERT_EQ_AT (loc, NULL, err);
2257   else
2258     {
2259       ASSERT_STREQ_AT (loc,
2260                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2261                        err);
2262       return;
2263     }
2264
2265   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2266   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2267   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2268   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2269
2270   if (should_have_column_data_p (actual_range.m_start))
2271     {
2272       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2273       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2274     }
2275   if (should_have_column_data_p (actual_range.m_finish))
2276     {
2277       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2278       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2279     }
2280 }
2281
2282 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2283    the effective location of any errors.  */
2284
2285 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2286                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2287   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2288                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2289                         (EXPECTED_FINISH_COL))
2290
2291 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2292    using the string concatenation database for TEST.
2293
2294    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2295
2296 static void
2297 assert_num_substring_ranges (const location &loc,
2298                              lexer_test& test,
2299                              location_t strloc,
2300                              enum cpp_ttype type,
2301                              int expected_num_ranges)
2302 {
2303   cpp_reader *pfile = test.m_parser;
2304   string_concat_db *concats = &test.m_concats;
2305
2306   int actual_num_ranges = -1;
2307   const char *err
2308     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2309                                            &actual_num_ranges);
2310   if (should_have_column_data_p (strloc))
2311     ASSERT_EQ_AT (loc, NULL, err);
2312   else
2313     {
2314       ASSERT_STREQ_AT (loc,
2315                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2316                        err);
2317       return;
2318     }
2319   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2320 }
2321
2322 /* Macro for calling assert_num_substring_ranges, supplying
2323    SELFTEST_LOCATION for the effective location of any errors.  */
2324
2325 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2326                                     EXPECTED_NUM_RANGES)                \
2327   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2328                                (TYPE), (EXPECTED_NUM_RANGES))
2329
2330
2331 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2332    returns an error (using the string concatenation database for TEST).  */
2333
2334 static void
2335 assert_has_no_substring_ranges (const location &loc,
2336                                 lexer_test& test,
2337                                 location_t strloc,
2338                                 enum cpp_ttype type,
2339                                 const char *expected_err)
2340 {
2341   cpp_reader *pfile = test.m_parser;
2342   string_concat_db *concats = &test.m_concats;
2343   cpp_substring_ranges ranges;
2344   const char *actual_err
2345     = get_substring_ranges_for_loc (pfile, concats, strloc,
2346                                     type, ranges);
2347   if (should_have_column_data_p (strloc))
2348     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2349   else
2350     ASSERT_STREQ_AT (loc,
2351                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2352                      actual_err);
2353 }
2354
2355 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2356     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2357                                     (STRLOC), (TYPE), (ERR))
2358
2359 /* Lex a simple string literal.  Verify the substring location data, before
2360    and after running cpp_interpret_string on it.  */
2361
2362 static void
2363 test_lexer_string_locations_simple (const line_table_case &case_)
2364 {
2365   /* Digits 0-9 (with 0 at column 10), the simple way.
2366      ....................000000000.11111111112.2222222223333333333
2367      ....................123456789.01234567890.1234567890123456789
2368      We add a trailing comment to ensure that we correctly locate
2369      the end of the string literal token.  */
2370   const char *content = "        \"0123456789\" /* not a string */\n";
2371   lexer_test test (case_, content, NULL);
2372
2373   /* Verify that we get the expected token back, with the correct
2374      location information.  */
2375   const cpp_token *tok = test.get_token ();
2376   ASSERT_EQ (tok->type, CPP_STRING);
2377   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2378   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2379
2380   /* At this point in lexing, the quote characters are treated as part of
2381      the string (they are stripped off by cpp_interpret_string).  */
2382
2383   ASSERT_EQ (tok->val.str.len, 12);
2384
2385   /* Verify that cpp_interpret_string works.  */
2386   cpp_string dst_string;
2387   const enum cpp_ttype type = CPP_STRING;
2388   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2389                                       &dst_string, type);
2390   ASSERT_TRUE (result);
2391   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2392   free (const_cast <unsigned char *> (dst_string.text));
2393
2394   /* Verify ranges of individual characters.  This no longer includes the
2395      opening quote, but does include the closing quote.  */
2396   for (int i = 0; i <= 10; i++)
2397     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2398                           10 + i, 10 + i);
2399
2400   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2401 }
2402
2403 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2404    encoding.  */
2405
2406 static void
2407 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2408 {
2409   /* EBCDIC support requires iconv.  */
2410   if (!HAVE_ICONV)
2411     return;
2412
2413   /* Digits 0-9 (with 0 at column 10), the simple way.
2414      ....................000000000.11111111112.2222222223333333333
2415      ....................123456789.01234567890.1234567890123456789
2416      We add a trailing comment to ensure that we correctly locate
2417      the end of the string literal token.  */
2418   const char *content = "        \"0123456789\" /* not a string */\n";
2419   ebcdic_execution_charset use_ebcdic;
2420   lexer_test test (case_, content, &use_ebcdic);
2421
2422   /* Verify that we get the expected token back, with the correct
2423      location information.  */
2424   const cpp_token *tok = test.get_token ();
2425   ASSERT_EQ (tok->type, CPP_STRING);
2426   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2427   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2428
2429   /* At this point in lexing, the quote characters are treated as part of
2430      the string (they are stripped off by cpp_interpret_string).  */
2431
2432   ASSERT_EQ (tok->val.str.len, 12);
2433
2434   /* The remainder of the test requires an iconv implementation that
2435      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2436   if (use_ebcdic.iconv_errors_occurred_p ())
2437     return;
2438
2439   /* Verify that cpp_interpret_string works.  */
2440   cpp_string dst_string;
2441   const enum cpp_ttype type = CPP_STRING;
2442   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2443                                       &dst_string, type);
2444   ASSERT_TRUE (result);
2445   /* We should now have EBCDIC-encoded text, specifically
2446      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2447      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2448   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2449                 (const char *)dst_string.text);
2450   free (const_cast <unsigned char *> (dst_string.text));
2451
2452   /* Verify that we don't attempt to record substring location information
2453      for such cases.  */
2454   ASSERT_HAS_NO_SUBSTRING_RANGES
2455     (test, tok->src_loc, type,
2456      "execution character set != source character set");
2457 }
2458
2459 /* Lex a string literal containing a hex-escaped character.
2460    Verify the substring location data, before and after running
2461    cpp_interpret_string on it.  */
2462
2463 static void
2464 test_lexer_string_locations_hex (const line_table_case &case_)
2465 {
2466   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2467      and with a space in place of digit 6, to terminate the escaped
2468      hex code.
2469      ....................000000000.111111.11112222.
2470      ....................123456789.012345.67890123.  */
2471   const char *content = "        \"01234\\x35 789\"\n";
2472   lexer_test test (case_, content, NULL);
2473
2474   /* Verify that we get the expected token back, with the correct
2475      location information.  */
2476   const cpp_token *tok = test.get_token ();
2477   ASSERT_EQ (tok->type, CPP_STRING);
2478   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2479   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2480
2481   /* At this point in lexing, the quote characters are treated as part of
2482      the string (they are stripped off by cpp_interpret_string).  */
2483   ASSERT_EQ (tok->val.str.len, 15);
2484
2485   /* Verify that cpp_interpret_string works.  */
2486   cpp_string dst_string;
2487   const enum cpp_ttype type = CPP_STRING;
2488   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2489                                       &dst_string, type);
2490   ASSERT_TRUE (result);
2491   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2492   free (const_cast <unsigned char *> (dst_string.text));
2493
2494   /* Verify ranges of individual characters.  This no longer includes the
2495      opening quote, but does include the closing quote.  */
2496   for (int i = 0; i <= 4; i++)
2497     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2498   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2499   for (int i = 6; i <= 10; i++)
2500     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2501
2502   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2503 }
2504
2505 /* Lex a string literal containing an octal-escaped character.
2506    Verify the substring location data after running cpp_interpret_string
2507    on it.  */
2508
2509 static void
2510 test_lexer_string_locations_oct (const line_table_case &case_)
2511 {
2512   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2513      and with a space in place of digit 6, to terminate the escaped
2514      octal code.
2515      ....................000000000.111111.11112222.2222223333333333444
2516      ....................123456789.012345.67890123.4567890123456789012  */
2517   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2518   lexer_test test (case_, content, NULL);
2519
2520   /* Verify that we get the expected token back, with the correct
2521      location information.  */
2522   const cpp_token *tok = test.get_token ();
2523   ASSERT_EQ (tok->type, CPP_STRING);
2524   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2525
2526   /* Verify that cpp_interpret_string works.  */
2527   cpp_string dst_string;
2528   const enum cpp_ttype type = CPP_STRING;
2529   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2530                                       &dst_string, type);
2531   ASSERT_TRUE (result);
2532   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2533   free (const_cast <unsigned char *> (dst_string.text));
2534
2535   /* Verify ranges of individual characters.  This no longer includes the
2536      opening quote, but does include the closing quote.  */
2537   for (int i = 0; i < 5; i++)
2538     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2539   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2540   for (int i = 6; i <= 10; i++)
2541     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2542
2543   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2544 }
2545
2546 /* Test of string literal containing letter escapes.  */
2547
2548 static void
2549 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2550 {
2551   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2552      .....................000000000.1.11111.1.1.11222.22222223333333
2553      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2554   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2555   lexer_test test (case_, content, NULL);
2556
2557   /* Verify that we get the expected tokens back.  */
2558   const cpp_token *tok = test.get_token ();
2559   ASSERT_EQ (tok->type, CPP_STRING);
2560   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2561
2562   /* Verify ranges of individual characters. */
2563   /* "\t".  */
2564   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2565                         0, 1, 10, 11);
2566   /* "foo". */
2567   for (int i = 1; i <= 3; i++)
2568     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2569                           i, 1, 11 + i, 11 + i);
2570   /* "\\" and "\n".  */
2571   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2572                         4, 1, 15, 16);
2573   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2574                         5, 1, 17, 18);
2575
2576   /* "bar" and closing quote for nul-terminator.  */
2577   for (int i = 6; i <= 9; i++)
2578     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2579                           i, 1, 13 + i, 13 + i);
2580
2581   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2582 }
2583
2584 /* Another test of a string literal containing a letter escape.
2585    Based on string seen in
2586      printf ("%-%\n");
2587    in gcc.dg/format/c90-printf-1.c.  */
2588
2589 static void
2590 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2591 {
2592   /* .....................000000000.1111.11.1111.22222222223.
2593      .....................123456789.0123.45.6789.01234567890.  */
2594   const char *content = ("        \"%-%\\n\" /* non-str */\n");
2595   lexer_test test (case_, content, NULL);
2596
2597   /* Verify that we get the expected tokens back.  */
2598   const cpp_token *tok = test.get_token ();
2599   ASSERT_EQ (tok->type, CPP_STRING);
2600   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2601
2602   /* Verify ranges of individual characters. */
2603   /* "%-%".  */
2604   for (int i = 0; i < 3; i++)
2605     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2606                           i, 1, 10 + i, 10 + i);
2607   /* "\n".  */
2608   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2609                         3, 1, 13, 14);
2610
2611   /* Closing quote for nul-terminator.  */
2612   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2613                         4, 1, 15, 15);
2614
2615   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2616 }
2617
2618 /* Lex a string literal containing UCN 4 characters.
2619    Verify the substring location data after running cpp_interpret_string
2620    on it.  */
2621
2622 static void
2623 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2624 {
2625   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2626      as UCN 4.
2627      ....................000000000.111111.111122.222222223.33333333344444
2628      ....................123456789.012345.678901.234567890.12345678901234  */
2629   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
2630   lexer_test test (case_, content, NULL);
2631
2632   /* Verify that we get the expected token back, with the correct
2633      location information.  */
2634   const cpp_token *tok = test.get_token ();
2635   ASSERT_EQ (tok->type, CPP_STRING);
2636   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2637
2638   /* Verify that cpp_interpret_string works.
2639      The string should be encoded in the execution character
2640      set.  Assuming that that is UTF-8, we should have the following:
2641      -----------  ----  -----  -------  ----------------
2642      Byte offset  Byte  Octal  Unicode  Source Column(s)
2643      -----------  ----  -----  -------  ----------------
2644      0            0x30         '0'      10
2645      1            0x31         '1'      11
2646      2            0x32         '2'      12
2647      3            0x33         '3'      13
2648      4            0x34         '4'      14
2649      5            0xE2  \342   U+2174   15-20
2650      6            0x85  \205    (cont)  15-20
2651      7            0xB4  \264    (cont)  15-20
2652      8            0xE2  \342   U+2175   21-26
2653      9            0x85  \205    (cont)  21-26
2654      10           0xB5  \265    (cont)  21-26
2655      11           0x37         '7'      27
2656      12           0x38         '8'      28
2657      13           0x39         '9'      29
2658      14           0x00                  30 (closing quote)
2659      -----------  ----  -----  -------  ---------------.  */
2660
2661   cpp_string dst_string;
2662   const enum cpp_ttype type = CPP_STRING;
2663   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2664                                       &dst_string, type);
2665   ASSERT_TRUE (result);
2666   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2667                 (const char *)dst_string.text);
2668   free (const_cast <unsigned char *> (dst_string.text));
2669
2670   /* Verify ranges of individual characters.  This no longer includes the
2671      opening quote, but does include the closing quote.
2672      '01234'.  */
2673   for (int i = 0; i <= 4; i++)
2674     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2675   /* U+2174.  */
2676   for (int i = 5; i <= 7; i++)
2677     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2678   /* U+2175.  */
2679   for (int i = 8; i <= 10; i++)
2680     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2681   /* '789' and nul terminator  */
2682   for (int i = 11; i <= 14; i++)
2683     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2684
2685   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2686 }
2687
2688 /* Lex a string literal containing UCN 8 characters.
2689    Verify the substring location data after running cpp_interpret_string
2690    on it.  */
2691
2692 static void
2693 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2694 {
2695   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2696      ....................000000000.111111.1111222222.2222333333333.344444
2697      ....................123456789.012345.6789012345.6789012345678.901234  */
2698   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
2699   lexer_test test (case_, content, NULL);
2700
2701   /* Verify that we get the expected token back, with the correct
2702      location information.  */
2703   const cpp_token *tok = test.get_token ();
2704   ASSERT_EQ (tok->type, CPP_STRING);
2705   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2706                            "\"01234\\U00002174\\U00002175789\"");
2707
2708   /* Verify that cpp_interpret_string works.
2709      The UTF-8 encoding of the string is identical to that from
2710      the ucn4 testcase above; the only difference is the column
2711      locations.  */
2712   cpp_string dst_string;
2713   const enum cpp_ttype type = CPP_STRING;
2714   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2715                                       &dst_string, type);
2716   ASSERT_TRUE (result);
2717   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2718                 (const char *)dst_string.text);
2719   free (const_cast <unsigned char *> (dst_string.text));
2720
2721   /* Verify ranges of individual characters.  This no longer includes the
2722      opening quote, but does include the closing quote.
2723      '01234'.  */
2724   for (int i = 0; i <= 4; i++)
2725     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2726   /* U+2174.  */
2727   for (int i = 5; i <= 7; i++)
2728     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2729   /* U+2175.  */
2730   for (int i = 8; i <= 10; i++)
2731     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2732   /* '789' at columns 35-37  */
2733   for (int i = 11; i <= 13; i++)
2734     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2735   /* Closing quote/nul-terminator at column 38.  */
2736   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2737
2738   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2739 }
2740
2741 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
2742
2743 static uint32_t
2744 uint32_from_big_endian (const uint32_t *ptr_be_value)
2745 {
2746   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2747   return (((uint32_t) buf[0] << 24)
2748           | ((uint32_t) buf[1] << 16)
2749           | ((uint32_t) buf[2] << 8)
2750           | (uint32_t) buf[3]);
2751 }
2752
2753 /* Lex a wide string literal and verify that attempts to read substring
2754    location data from it fail gracefully.  */
2755
2756 static void
2757 test_lexer_string_locations_wide_string (const line_table_case &case_)
2758 {
2759   /* Digits 0-9.
2760      ....................000000000.11111111112.22222222233333
2761      ....................123456789.01234567890.12345678901234  */
2762   const char *content = "       L\"0123456789\" /* non-str */\n";
2763   lexer_test test (case_, content, NULL);
2764
2765   /* Verify that we get the expected token back, with the correct
2766      location information.  */
2767   const cpp_token *tok = test.get_token ();
2768   ASSERT_EQ (tok->type, CPP_WSTRING);
2769   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2770
2771   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
2772   cpp_string dst_string;
2773   const enum cpp_ttype type = CPP_WSTRING;
2774   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2775                                       &dst_string, type);
2776   ASSERT_TRUE (result);
2777   /* The cpp_reader defaults to big-endian with
2778      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2779      now be encoded as UTF-32BE.  */
2780   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2781   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2782   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2783   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2784   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2785   free (const_cast <unsigned char *> (dst_string.text));
2786
2787   /* We don't yet support generating substring location information
2788      for L"" strings.  */
2789   ASSERT_HAS_NO_SUBSTRING_RANGES
2790     (test, tok->src_loc, type,
2791      "execution character set != source character set");
2792 }
2793
2794 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
2795
2796 static uint16_t
2797 uint16_from_big_endian (const uint16_t *ptr_be_value)
2798 {
2799   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2800   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2801 }
2802
2803 /* Lex a u"" string literal and verify that attempts to read substring
2804    location data from it fail gracefully.  */
2805
2806 static void
2807 test_lexer_string_locations_string16 (const line_table_case &case_)
2808 {
2809   /* Digits 0-9.
2810      ....................000000000.11111111112.22222222233333
2811      ....................123456789.01234567890.12345678901234  */
2812   const char *content = "       u\"0123456789\" /* non-str */\n";
2813   lexer_test test (case_, content, NULL);
2814
2815   /* Verify that we get the expected token back, with the correct
2816      location information.  */
2817   const cpp_token *tok = test.get_token ();
2818   ASSERT_EQ (tok->type, CPP_STRING16);
2819   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2820
2821   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
2822   cpp_string dst_string;
2823   const enum cpp_ttype type = CPP_STRING16;
2824   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2825                                       &dst_string, type);
2826   ASSERT_TRUE (result);
2827
2828   /* The cpp_reader defaults to big-endian, so dst_string should
2829      now be encoded as UTF-16BE.  */
2830   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2831   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2832   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2833   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2834   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2835   free (const_cast <unsigned char *> (dst_string.text));
2836
2837   /* We don't yet support generating substring location information
2838      for L"" strings.  */
2839   ASSERT_HAS_NO_SUBSTRING_RANGES
2840     (test, tok->src_loc, type,
2841      "execution character set != source character set");
2842 }
2843
2844 /* Lex a U"" string literal and verify that attempts to read substring
2845    location data from it fail gracefully.  */
2846
2847 static void
2848 test_lexer_string_locations_string32 (const line_table_case &case_)
2849 {
2850   /* Digits 0-9.
2851      ....................000000000.11111111112.22222222233333
2852      ....................123456789.01234567890.12345678901234  */
2853   const char *content = "       U\"0123456789\" /* non-str */\n";
2854   lexer_test test (case_, content, NULL);
2855
2856   /* Verify that we get the expected token back, with the correct
2857      location information.  */
2858   const cpp_token *tok = test.get_token ();
2859   ASSERT_EQ (tok->type, CPP_STRING32);
2860   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2861
2862   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
2863   cpp_string dst_string;
2864   const enum cpp_ttype type = CPP_STRING32;
2865   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2866                                       &dst_string, type);
2867   ASSERT_TRUE (result);
2868
2869   /* The cpp_reader defaults to big-endian, so dst_string should
2870      now be encoded as UTF-32BE.  */
2871   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2872   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2873   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2874   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2875   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2876   free (const_cast <unsigned char *> (dst_string.text));
2877
2878   /* We don't yet support generating substring location information
2879      for L"" strings.  */
2880   ASSERT_HAS_NO_SUBSTRING_RANGES
2881     (test, tok->src_loc, type,
2882      "execution character set != source character set");
2883 }
2884
2885 /* Lex a u8-string literal.
2886    Verify the substring location data after running cpp_interpret_string
2887    on it.  */
2888
2889 static void
2890 test_lexer_string_locations_u8 (const line_table_case &case_)
2891 {
2892   /* Digits 0-9.
2893      ....................000000000.11111111112.22222222233333
2894      ....................123456789.01234567890.12345678901234  */
2895   const char *content = "      u8\"0123456789\" /* non-str */\n";
2896   lexer_test test (case_, content, NULL);
2897
2898   /* Verify that we get the expected token back, with the correct
2899      location information.  */
2900   const cpp_token *tok = test.get_token ();
2901   ASSERT_EQ (tok->type, CPP_UTF8STRING);
2902   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2903
2904   /* Verify that cpp_interpret_string works.  */
2905   cpp_string dst_string;
2906   const enum cpp_ttype type = CPP_STRING;
2907   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2908                                       &dst_string, type);
2909   ASSERT_TRUE (result);
2910   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2911   free (const_cast <unsigned char *> (dst_string.text));
2912
2913   /* Verify ranges of individual characters.  This no longer includes the
2914      opening quote, but does include the closing quote.  */
2915   for (int i = 0; i <= 10; i++)
2916     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2917 }
2918
2919 /* Lex a string literal containing UTF-8 source characters.
2920    Verify the substring location data after running cpp_interpret_string
2921    on it.  */
2922
2923 static void
2924 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2925 {
2926  /* This string literal is written out to the source file as UTF-8,
2927     and is of the form "before mojibake after", where "mojibake"
2928     is written as the following four unicode code points:
2929        U+6587 CJK UNIFIED IDEOGRAPH-6587
2930        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2931        U+5316 CJK UNIFIED IDEOGRAPH-5316
2932        U+3051 HIRAGANA LETTER KE.
2933      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2934      "before" and "after" are 1 byte per unicode character.
2935
2936      The numbering shown are "columns", which are *byte* numbers within
2937      the line, rather than unicode character numbers.
2938
2939      .................... 000000000.1111111.
2940      .................... 123456789.0123456.  */
2941   const char *content = ("        \"before "
2942                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2943                               UTF-8: 0xE6 0x96 0x87
2944                               C octal escaped UTF-8: \346\226\207
2945                             "column" numbers: 17-19.  */
2946                          "\346\226\207"
2947
2948                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2949                               UTF-8: 0xE5 0xAD 0x97
2950                               C octal escaped UTF-8: \345\255\227
2951                             "column" numbers: 20-22.  */
2952                          "\345\255\227"
2953
2954                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2955                               UTF-8: 0xE5 0x8C 0x96
2956                               C octal escaped UTF-8: \345\214\226
2957                             "column" numbers: 23-25.  */
2958                          "\345\214\226"
2959
2960                          /* U+3051 HIRAGANA LETTER KE
2961                               UTF-8: 0xE3 0x81 0x91
2962                               C octal escaped UTF-8: \343\201\221
2963                             "column" numbers: 26-28.  */
2964                          "\343\201\221"
2965
2966                          /* column numbers 29 onwards
2967                           2333333.33334444444444
2968                           9012345.67890123456789. */
2969                          " after\" /* non-str */\n");
2970   lexer_test test (case_, content, NULL);
2971
2972   /* Verify that we get the expected token back, with the correct
2973      location information.  */
2974   const cpp_token *tok = test.get_token ();
2975   ASSERT_EQ (tok->type, CPP_STRING);
2976   ASSERT_TOKEN_AS_TEXT_EQ
2977     (test.m_parser, tok,
2978      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2979
2980   /* Verify that cpp_interpret_string works.  */
2981   cpp_string dst_string;
2982   const enum cpp_ttype type = CPP_STRING;
2983   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2984                                       &dst_string, type);
2985   ASSERT_TRUE (result);
2986   ASSERT_STREQ
2987     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2988      (const char *)dst_string.text);
2989   free (const_cast <unsigned char *> (dst_string.text));
2990
2991   /* Verify ranges of individual characters.  This no longer includes the
2992      opening quote, but does include the closing quote.
2993      Assuming that both source and execution encodings are UTF-8, we have
2994      a run of 25 octets in each, plus the NUL terminator.  */
2995   for (int i = 0; i < 25; i++)
2996     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2997   /* NUL-terminator should use the closing quote at column 35.  */
2998   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
2999
3000   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3001 }
3002
3003 /* Test of string literal concatenation.  */
3004
3005 static void
3006 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3007 {
3008   /* Digits 0-9.
3009      .....................000000000.111111.11112222222222
3010      .....................123456789.012345.67890123456789.  */
3011   const char *content = ("        \"01234\" /* non-str */\n"
3012                          "        \"56789\" /* non-str */\n");
3013   lexer_test test (case_, content, NULL);
3014
3015   location_t input_locs[2];
3016
3017   /* Verify that we get the expected tokens back.  */
3018   auto_vec <cpp_string> input_strings;
3019   const cpp_token *tok_a = test.get_token ();
3020   ASSERT_EQ (tok_a->type, CPP_STRING);
3021   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3022   input_strings.safe_push (tok_a->val.str);
3023   input_locs[0] = tok_a->src_loc;
3024
3025   const cpp_token *tok_b = test.get_token ();
3026   ASSERT_EQ (tok_b->type, CPP_STRING);
3027   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3028   input_strings.safe_push (tok_b->val.str);
3029   input_locs[1] = tok_b->src_loc;
3030
3031   /* Verify that cpp_interpret_string works.  */
3032   cpp_string dst_string;
3033   const enum cpp_ttype type = CPP_STRING;
3034   bool result = cpp_interpret_string (test.m_parser,
3035                                       input_strings.address (), 2,
3036                                       &dst_string, type);
3037   ASSERT_TRUE (result);
3038   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3039   free (const_cast <unsigned char *> (dst_string.text));
3040
3041   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3042   test.m_concats.record_string_concatenation (2, input_locs);
3043
3044   location_t initial_loc = input_locs[0];
3045
3046   /* "01234" on line 1.  */
3047   for (int i = 0; i <= 4; i++)
3048     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3049   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3050   for (int i = 5; i <= 10; i++)
3051     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3052
3053   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3054 }
3055
3056 /* Another test of string literal concatenation.  */
3057
3058 static void
3059 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3060 {
3061   /* Digits 0-9.
3062      .....................000000000.111.11111112222222
3063      .....................123456789.012.34567890123456.  */
3064   const char *content = ("        \"01\" /* non-str */\n"
3065                          "        \"23\" /* non-str */\n"
3066                          "        \"45\" /* non-str */\n"
3067                          "        \"67\" /* non-str */\n"
3068                          "        \"89\" /* non-str */\n");
3069   lexer_test test (case_, content, NULL);
3070
3071   auto_vec <cpp_string> input_strings;
3072   location_t input_locs[5];
3073
3074   /* Verify that we get the expected tokens back.  */
3075   for (int i = 0; i < 5; i++)
3076     {
3077       const cpp_token *tok = test.get_token ();
3078       ASSERT_EQ (tok->type, CPP_STRING);
3079       input_strings.safe_push (tok->val.str);
3080       input_locs[i] = tok->src_loc;
3081     }
3082
3083   /* Verify that cpp_interpret_string works.  */
3084   cpp_string dst_string;
3085   const enum cpp_ttype type = CPP_STRING;
3086   bool result = cpp_interpret_string (test.m_parser,
3087                                       input_strings.address (), 5,
3088                                       &dst_string, type);
3089   ASSERT_TRUE (result);
3090   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3091   free (const_cast <unsigned char *> (dst_string.text));
3092
3093   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3094   test.m_concats.record_string_concatenation (5, input_locs);
3095
3096   location_t initial_loc = input_locs[0];
3097
3098   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3099      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3100      and expect get_source_range_for_substring to fail.
3101      However, for a string concatenation test, we can have a case
3102      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3103      but subsequent strings can be after it.
3104      Attempting to detect this within assert_char_at_range
3105      would overcomplicate the logic for the common test cases, so
3106      we detect it here.  */
3107   if (should_have_column_data_p (input_locs[0])
3108       && !should_have_column_data_p (input_locs[4]))
3109     {
3110       /* Verify that get_source_range_for_substring gracefully rejects
3111          this case.  */
3112       source_range actual_range;
3113       const char *err
3114         = get_source_range_for_char (test.m_parser, &test.m_concats,
3115                                      initial_loc, type, 0, &actual_range);
3116       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3117       return;
3118     }
3119
3120   for (int i = 0; i < 5; i++)
3121     for (int j = 0; j < 2; j++)
3122       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3123                             i + 1, 10 + j, 10 + j);
3124
3125   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3126   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3127
3128   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3129 }
3130
3131 /* Another test of string literal concatenation, this time combined with
3132    various kinds of escaped characters.  */
3133
3134 static void
3135 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3136 {
3137   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3138      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3139   const char *content
3140     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3141        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3142     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3143   lexer_test test (case_, content, NULL);
3144
3145   auto_vec <cpp_string> input_strings;
3146   location_t input_locs[4];
3147
3148   /* Verify that we get the expected tokens back.  */
3149   for (int i = 0; i < 4; i++)
3150     {
3151       const cpp_token *tok = test.get_token ();
3152       ASSERT_EQ (tok->type, CPP_STRING);
3153       input_strings.safe_push (tok->val.str);
3154       input_locs[i] = tok->src_loc;
3155     }
3156
3157   /* Verify that cpp_interpret_string works.  */
3158   cpp_string dst_string;
3159   const enum cpp_ttype type = CPP_STRING;
3160   bool result = cpp_interpret_string (test.m_parser,
3161                                       input_strings.address (), 4,
3162                                       &dst_string, type);
3163   ASSERT_TRUE (result);
3164   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3165   free (const_cast <unsigned char *> (dst_string.text));
3166
3167   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3168   test.m_concats.record_string_concatenation (4, input_locs);
3169
3170   location_t initial_loc = input_locs[0];
3171
3172   for (int i = 0; i <= 4; i++)
3173     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3174   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3175   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3176   for (int i = 7; i <= 9; i++)
3177     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3178
3179   /* NUL-terminator should use the location of the final closing quote.  */
3180   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3181
3182   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3183 }
3184
3185 /* Test of string literal in a macro.  */
3186
3187 static void
3188 test_lexer_string_locations_macro (const line_table_case &case_)
3189 {
3190   /* Digits 0-9.
3191      .....................0000000001111111111.22222222223.
3192      .....................1234567890123456789.01234567890.  */
3193   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3194                          "  MACRO");
3195   lexer_test test (case_, content, NULL);
3196
3197   /* Verify that we get the expected tokens back.  */
3198   const cpp_token *tok = test.get_token ();
3199   ASSERT_EQ (tok->type, CPP_PADDING);
3200
3201   tok = test.get_token ();
3202   ASSERT_EQ (tok->type, CPP_STRING);
3203   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3204
3205   /* Verify ranges of individual characters.  We ought to
3206      see columns within the macro definition.  */
3207   for (int i = 0; i <= 10; i++)
3208     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3209                           i, 1, 20 + i, 20 + i);
3210
3211   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3212
3213   tok = test.get_token ();
3214   ASSERT_EQ (tok->type, CPP_PADDING);
3215 }
3216
3217 /* Test of stringification of a macro argument.  */
3218
3219 static void
3220 test_lexer_string_locations_stringified_macro_argument
3221   (const line_table_case &case_)
3222 {
3223   /* .....................000000000111111111122222222223.
3224      .....................123456789012345678901234567890.  */
3225   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3226                          "MACRO(foo)\n");
3227   lexer_test test (case_, content, NULL);
3228
3229   /* Verify that we get the expected token back.  */
3230   const cpp_token *tok = test.get_token ();
3231   ASSERT_EQ (tok->type, CPP_PADDING);
3232
3233   tok = test.get_token ();
3234   ASSERT_EQ (tok->type, CPP_STRING);
3235   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3236
3237   /* We don't support getting the location of a stringified macro
3238      argument.  Verify that it fails gracefully.  */
3239   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3240                                   "cpp_interpret_string_1 failed");
3241
3242   tok = test.get_token ();
3243   ASSERT_EQ (tok->type, CPP_PADDING);
3244
3245   tok = test.get_token ();
3246   ASSERT_EQ (tok->type, CPP_PADDING);
3247 }
3248
3249 /* Ensure that we are fail gracefully if something attempts to pass
3250    in a location that isn't a string literal token.  Seen on this code:
3251
3252      const char a[] = " %d ";
3253      __builtin_printf (a, 0.5);
3254                        ^
3255
3256    when c-format.c erroneously used the indicated one-character
3257    location as the format string location, leading to a read past the
3258    end of a string buffer in cpp_interpret_string_1.  */
3259
3260 static void
3261 test_lexer_string_locations_non_string (const line_table_case &case_)
3262 {
3263   /* .....................000000000111111111122222222223.
3264      .....................123456789012345678901234567890.  */
3265   const char *content = ("         a\n");
3266   lexer_test test (case_, content, NULL);
3267
3268   /* Verify that we get the expected token back.  */
3269   const cpp_token *tok = test.get_token ();
3270   ASSERT_EQ (tok->type, CPP_NAME);
3271   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3272
3273   /* At this point, libcpp is attempting to interpret the name as a
3274      string literal, despite it not starting with a quote.  We don't detect
3275      that, but we should at least fail gracefully.  */
3276   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3277                                   "cpp_interpret_string_1 failed");
3278 }
3279
3280 /* Ensure that we can read substring information for a token which
3281    starts in one linemap and ends in another .  Adapted from
3282    gcc.dg/cpp/pr69985.c.  */
3283
3284 static void
3285 test_lexer_string_locations_long_line (const line_table_case &case_)
3286 {
3287   /* .....................000000.000111111111
3288      .....................123456.789012346789.  */
3289   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3290                          "     \"0123456789012345678901234567890123456789"
3291                          "0123456789012345678901234567890123456789"
3292                          "0123456789012345678901234567890123456789"
3293                          "0123456789\"\n");
3294
3295   lexer_test test (case_, content, NULL);
3296
3297   /* Verify that we get the expected token back.  */
3298   const cpp_token *tok = test.get_token ();
3299   ASSERT_EQ (tok->type, CPP_STRING);
3300
3301   if (!should_have_column_data_p (line_table->highest_location))
3302     return;
3303
3304   /* Verify ranges of individual characters.  */
3305   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3306   for (int i = 0; i < 131; i++)
3307     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3308                           i, 2, 7 + i, 7 + i);
3309 }
3310
3311 /* Test of locations within a raw string that doesn't contain a newline.  */
3312
3313 static void
3314 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3315 {
3316   /* .....................00.0000000111111111122.
3317      .....................12.3456789012345678901.  */
3318   const char *content = ("R\"foo(0123456789)foo\"\n");
3319   lexer_test test (case_, content, NULL);
3320
3321   /* Verify that we get the expected token back.  */
3322   const cpp_token *tok = test.get_token ();
3323   ASSERT_EQ (tok->type, CPP_STRING);
3324
3325   /* Verify that cpp_interpret_string works.  */
3326   cpp_string dst_string;
3327   const enum cpp_ttype type = CPP_STRING;
3328   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3329                                       &dst_string, type);
3330   ASSERT_TRUE (result);
3331   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3332   free (const_cast <unsigned char *> (dst_string.text));
3333
3334   if (!should_have_column_data_p (line_table->highest_location))
3335     return;
3336
3337   /* 0-9, plus the nil terminator.  */
3338   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3339   for (int i = 0; i < 11; i++)
3340     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3341                           i, 1, 7 + i, 7 + i);
3342 }
3343
3344 /* Test of locations within a raw string that contains a newline.  */
3345
3346 static void
3347 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3348 {
3349   /* .....................00.0000.
3350      .....................12.3456.  */
3351   const char *content = ("R\"foo(\n"
3352   /* .....................00000.
3353      .....................12345.  */
3354                          "hello\n"
3355                          "world\n"
3356   /* .....................00000.
3357      .....................12345.  */
3358                          ")foo\"\n");
3359   lexer_test test (case_, content, NULL);
3360
3361   /* Verify that we get the expected token back.  */
3362   const cpp_token *tok = test.get_token ();
3363   ASSERT_EQ (tok->type, CPP_STRING);
3364
3365   /* Verify that cpp_interpret_string works.  */
3366   cpp_string dst_string;
3367   const enum cpp_ttype type = CPP_STRING;
3368   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3369                                       &dst_string, type);
3370   ASSERT_TRUE (result);
3371   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3372   free (const_cast <unsigned char *> (dst_string.text));
3373
3374   if (!should_have_column_data_p (line_table->highest_location))
3375     return;
3376
3377   /* Currently we don't support locations within raw strings that
3378      contain newlines.  */
3379   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3380                                   "range endpoints are on different lines");
3381 }
3382
3383 /* Test of parsing an unterminated raw string.  */
3384
3385 static void
3386 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3387 {
3388   const char *content = "R\"ouch()ouCh\" /* etc */";
3389
3390   lexer_error_sink errors;
3391   lexer_test test (case_, content, &errors);
3392   test.m_implicitly_expect_EOF = false;
3393
3394   /* Attempt to parse the raw string.  */
3395   const cpp_token *tok = test.get_token ();
3396   ASSERT_EQ (tok->type, CPP_EOF);
3397
3398   ASSERT_EQ (1, errors.m_errors.length ());
3399   /* We expect the message "unterminated raw string"
3400      in the "cpplib" translation domain.
3401      It's not clear that dgettext is available on all supported hosts,
3402      so this assertion is commented-out for now.
3403        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3404                      errors.m_errors[0]);
3405   */
3406 }
3407
3408 /* Test of lexing char constants.  */
3409
3410 static void
3411 test_lexer_char_constants (const line_table_case &case_)
3412 {
3413   /* Various char constants.
3414      .....................0000000001111111111.22222222223.
3415      .....................1234567890123456789.01234567890.  */
3416   const char *content = ("         'a'\n"
3417                          "        u'a'\n"
3418                          "        U'a'\n"
3419                          "        L'a'\n"
3420                          "         'abc'\n");
3421   lexer_test test (case_, content, NULL);
3422
3423   /* Verify that we get the expected tokens back.  */
3424   /* 'a'.  */
3425   const cpp_token *tok = test.get_token ();
3426   ASSERT_EQ (tok->type, CPP_CHAR);
3427   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3428
3429   unsigned int chars_seen;
3430   int unsignedp;
3431   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3432                                           &chars_seen, &unsignedp);
3433   ASSERT_EQ (cc, 'a');
3434   ASSERT_EQ (chars_seen, 1);
3435
3436   /* u'a'.  */
3437   tok = test.get_token ();
3438   ASSERT_EQ (tok->type, CPP_CHAR16);
3439   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3440
3441   /* U'a'.  */
3442   tok = test.get_token ();
3443   ASSERT_EQ (tok->type, CPP_CHAR32);
3444   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3445
3446   /* L'a'.  */
3447   tok = test.get_token ();
3448   ASSERT_EQ (tok->type, CPP_WCHAR);
3449   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3450
3451   /* 'abc' (c-char-sequence).  */
3452   tok = test.get_token ();
3453   ASSERT_EQ (tok->type, CPP_CHAR);
3454   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3455 }
3456 /* A table of interesting location_t values, giving one axis of our test
3457    matrix.  */
3458
3459 static const location_t boundary_locations[] = {
3460   /* Zero means "don't override the default values for a new line_table".  */
3461   0,
3462
3463   /* An arbitrary non-zero value that isn't close to one of
3464      the boundary values below.  */
3465   0x10000,
3466
3467   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3468   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3469   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3470   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3471   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3472   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3473
3474   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3475   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3476   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3477   LINE_MAP_MAX_LOCATION_WITH_COLS,
3478   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3479   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3480 };
3481
3482 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3483
3484 void
3485 for_each_line_table_case (void (*testcase) (const line_table_case &))
3486 {
3487   /* As noted above in the description of struct line_table_case,
3488      we want to explore a test matrix of interesting line_table
3489      situations, running various selftests for each case within the
3490      matrix.  */
3491
3492   /* Run all tests with:
3493      (a) line_table->default_range_bits == 0, and
3494      (b) line_table->default_range_bits == 5.  */
3495   int num_cases_tested = 0;
3496   for (int default_range_bits = 0; default_range_bits <= 5;
3497        default_range_bits += 5)
3498     {
3499       /* ...and use each of the "interesting" location values as
3500          the starting location within line_table.  */
3501       const int num_boundary_locations
3502         = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3503       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3504         {
3505           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3506
3507           testcase (c);
3508
3509           num_cases_tested++;
3510         }
3511     }
3512
3513   /* Verify that we fully covered the test matrix.  */
3514   ASSERT_EQ (num_cases_tested, 2 * 12);
3515 }
3516
3517 /* Run all of the selftests within this file.  */
3518
3519 void
3520 input_c_tests ()
3521 {
3522   test_should_have_column_data_p ();
3523   test_unknown_location ();
3524   test_builtins ();
3525   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3526
3527   for_each_line_table_case (test_accessing_ordinary_linemaps);
3528   for_each_line_table_case (test_lexer);
3529   for_each_line_table_case (test_lexer_string_locations_simple);
3530   for_each_line_table_case (test_lexer_string_locations_ebcdic);
3531   for_each_line_table_case (test_lexer_string_locations_hex);
3532   for_each_line_table_case (test_lexer_string_locations_oct);
3533   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3534   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3535   for_each_line_table_case (test_lexer_string_locations_ucn4);
3536   for_each_line_table_case (test_lexer_string_locations_ucn8);
3537   for_each_line_table_case (test_lexer_string_locations_wide_string);
3538   for_each_line_table_case (test_lexer_string_locations_string16);
3539   for_each_line_table_case (test_lexer_string_locations_string32);
3540   for_each_line_table_case (test_lexer_string_locations_u8);
3541   for_each_line_table_case (test_lexer_string_locations_utf8_source);
3542   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3543   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3544   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3545   for_each_line_table_case (test_lexer_string_locations_macro);
3546   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3547   for_each_line_table_case (test_lexer_string_locations_non_string);
3548   for_each_line_table_case (test_lexer_string_locations_long_line);
3549   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3550   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3551   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3552   for_each_line_table_case (test_lexer_char_constants);
3553
3554   test_reading_source_line ();
3555 }
3556
3557 } // namespace selftest
3558
3559 #endif /* CHECKING_P */