gcc/input.c

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2018 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic.h"
  25 #include "diagnostic-core.h"
  26 #include "selftest.h"
  27 #include "cpplib.h"
  28
  29 #ifndef HAVE_ICONV
  30 #define HAVE_ICONV 0
  31 #endif
  32
  33 /* This is a cache used by get_next_line to store the content of a
  34    file to be searched for file lines.  */
  35 struct fcache
  36 {
  37   /* These are information used to store a line boundary.  */
  38   struct line_info
  39   {
  40     /* The line number.  It starts from 1.  */
  41     size_t line_num;
  42
  43     /* The position (byte count) of the beginning of the line,
  44        relative to the file data pointer.  This starts at zero.  */
  45     size_t start_pos;
  46
  47     /* The position (byte count) of the last byte of the line.  This
  48        normally points to the '\n' character, or to one byte after the
  49        last byte of the file, if the file doesn't contain a '\n'
  50        character.  */
  51     size_t end_pos;
  52
  53     line_info (size_t l, size_t s, size_t e)
  54       : line_num (l), start_pos (s), end_pos (e)
  55     {}
  56
  57     line_info ()
  58       :line_num (0), start_pos (0), end_pos (0)
  59     {}
  60   };
  61
  62   /* The number of time this file has been accessed.  This is used
  63      to designate which file cache to evict from the cache
  64      array.  */
  65   unsigned use_count;
  66
  67   /* The file_path is the key for identifying a particular file in
  68      the cache.
  69      For libcpp-using code, the underlying buffer for this field is
  70      owned by the corresponding _cpp_file within the cpp_reader.  */
  71   const char *file_path;
  72
  73   FILE *fp;
  74
  75   /* This points to the content of the file that we've read so
  76      far.  */
  77   char *data;
  78
  79   /*  The size of the DATA array above.*/
  80   size_t size;
  81
  82   /* The number of bytes read from the underlying file so far.  This
  83      must be less (or equal) than SIZE above.  */
  84   size_t nb_read;
  85
  86   /* The index of the beginning of the current line.  */
  87   size_t line_start_idx;
  88
  89   /* The number of the previous line read.  This starts at 1.  Zero
  90      means we've read no line so far.  */
  91   size_t line_num;
  92
  93   /* This is the total number of lines of the current file.  At the
  94      moment, we try to get this information from the line map
  95      subsystem.  Note that this is just a hint.  When using the C++
  96      front-end, this hint is correct because the input file is then
  97      completely tokenized before parsing starts; so the line map knows
  98      the number of lines before compilation really starts.  For e.g,
  99      the C front-end, it can happen that we start emitting diagnostics
 100      before the line map has seen the end of the file.  */
 101   size_t total_lines;
 102
 103   /* Could this file be missing a trailing newline on its final line?
 104      Initially true (to cope with empty files), set to true/false
 105      as each line is read.  */
 106   bool missing_trailing_newline;
 107
 108   /* This is a record of the beginning and end of the lines we've seen
 109      while reading the file.  This is useful to avoid walking the data
 110      from the beginning when we are asked to read a line that is
 111      before LINE_START_IDX above.  Note that the maximum size of this
 112      record is fcache_line_record_size, so that the memory consumption
 113      doesn't explode.  We thus scale total_lines down to
 114      fcache_line_record_size.  */
 115   vec<line_info, va_heap> line_record;
 116
 117   fcache ();
 118   ~fcache ();
 119 };
 120
 121 /* Current position in real source file.  */
 122
 123 location_t input_location = UNKNOWN_LOCATION;
 124
 125 struct line_maps *line_table;
 126
 127 /* A stashed copy of "line_table" for use by selftest::line_table_test.
 128    This needs to be a global so that it can be a GC root, and thus
 129    prevent the stashed copy from being garbage-collected if the GC runs
 130    during a line_table_test.  */
 131
 132 struct line_maps *saved_line_table;
 133
 134 static fcache *fcache_tab;
 135 static const size_t fcache_tab_size = 16;
 136 static const size_t fcache_buffer_size = 4 * 1024;
 137 static const size_t fcache_line_record_size = 100;
 138
 139 /* Expand the source location LOC into a human readable location.  If
 140    LOC resolves to a builtin location, the file name of the readable
 141    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 142    TRUE and LOC is virtual, then it is resolved to the expansion
 143    point of the involved macro.  Otherwise, it is resolved to the
 144    spelling location of the token.
 145
 146    When resolving to the spelling location of the token, if the
 147    resulting location is for a built-in location (that is, it has no
 148    associated line/column) in the context of a macro expansion, the
 149    returned location is the first one (while unwinding the macro
 150    location towards its expansion point) that is in real source
 151    code.
 152
 153    ASPECT controls which part of the location to use.  */
 154
 155 static expanded_location
 156 expand_location_1 (location_t loc,
 157                    bool expansion_point_p,
 158                    enum location_aspect aspect)
 159 {
 160   expanded_location xloc;
 161   const line_map_ordinary *map;
 162   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 163   tree block = NULL;
 164
 165   if (IS_ADHOC_LOC (loc))
 166     {
 167       block = LOCATION_BLOCK (loc);
 168       loc = LOCATION_LOCUS (loc);
 169     }
 170
 171   memset (&xloc, 0, sizeof (xloc));
 172
 173   if (loc >= RESERVED_LOCATION_COUNT)
 174     {
 175       if (!expansion_point_p)
 176         {
 177           /* We want to resolve LOC to its spelling location.
 178
 179              But if that spelling location is a reserved location that
 180              appears in the context of a macro expansion (like for a
 181              location for a built-in token), let's consider the first
 182              location (toward the expansion point) that is not reserved;
 183              that is, the first location that is in real source code.  */
 184           loc = linemap_unwind_to_first_non_reserved_loc (line_table,
 185                                                           loc, NULL);
 186           lrk = LRK_SPELLING_LOCATION;
 187         }
 188       loc = linemap_resolve_location (line_table, loc, lrk, &map);
 189
 190       /* loc is now either in an ordinary map, or is a reserved location.
 191          If it is a compound location, the caret is in a spelling location,
 192          but the start/finish might still be a virtual location.
 193          Depending of what the caller asked for, we may need to recurse
 194          one level in order to resolve any virtual locations in the
 195          end-points.  */
 196       switch (aspect)
 197         {
 198         default:
 199           gcc_unreachable ();
 200           /* Fall through.  */
 201         case LOCATION_ASPECT_CARET:
 202           break;
 203         case LOCATION_ASPECT_START:
 204           {
 205             location_t start = get_start (loc);
 206             if (start != loc)
 207               return expand_location_1 (start, expansion_point_p, aspect);
 208           }
 209           break;
 210         case LOCATION_ASPECT_FINISH:
 211           {
 212             location_t finish = get_finish (loc);
 213             if (finish != loc)
 214               return expand_location_1 (finish, expansion_point_p, aspect);
 215           }
 216           break;
 217         }
 218       xloc = linemap_expand_location (line_table, map, loc);
 219     }
 220
 221   xloc.data = block;
 222   if (loc <= BUILTINS_LOCATION)
 223     xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
 224
 225   return xloc;
 226 }
 227
 228 /* Initialize the set of cache used for files accessed by caret
 229    diagnostic.  */
 230
 231 static void
 232 diagnostic_file_cache_init (void)
 233 {
 234   if (fcache_tab == NULL)
 235     fcache_tab = new fcache[fcache_tab_size];
 236 }
 237
 238 /* Free the resources used by the set of cache used for files accessed
 239    by caret diagnostic.  */
 240
 241 void
 242 diagnostic_file_cache_fini (void)
 243 {
 244   if (fcache_tab)
 245     {
 246       delete [] (fcache_tab);
 247       fcache_tab = NULL;
 248     }
 249 }
 250
 251 /* Return the total lines number that have been read so far by the
 252    line map (in the preprocessor) so far.  For languages like C++ that
 253    entirely preprocess the input file before starting to parse, this
 254    equals the actual number of lines of the file.  */
 255
 256 static size_t
 257 total_lines_num (const char *file_path)
 258 {
 259   size_t r = 0;
 260   location_t l = 0;
 261   if (linemap_get_file_highest_location (line_table, file_path, &l))
 262     {
 263       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 264       expanded_location xloc = expand_location (l);
 265       r = xloc.line;
 266     }
 267   return r;
 268 }
 269
 270 /* Lookup the cache used for the content of a given file accessed by
 271    caret diagnostic.  Return the found cached file, or NULL if no
 272    cached file was found.  */
 273
 274 static fcache*
 275 lookup_file_in_cache_tab (const char *file_path)
 276 {
 277   if (file_path == NULL)
 278     return NULL;
 279
 280   diagnostic_file_cache_init ();
 281
 282   /* This will contain the found cached file.  */
 283   fcache *r = NULL;
 284   for (unsigned i = 0; i < fcache_tab_size; ++i)
 285     {
 286       fcache *c = &fcache_tab[i];
 287       if (c->file_path && !strcmp (c->file_path, file_path))
 288         {
 289           ++c->use_count;
 290           r = c;
 291         }
 292     }
 293
 294   if (r)
 295     ++r->use_count;
 296
 297   return r;
 298 }
 299
 300 /* Purge any mention of FILENAME from the cache of files used for
 301    printing source code.  For use in selftests when working
 302    with tempfiles.  */
 303
 304 void
 305 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
 306 {
 307   gcc_assert (file_path);
 308
 309   fcache *r = lookup_file_in_cache_tab (file_path);
 310   if (!r)
 311     /* Not found.  */
 312     return;
 313
 314   r->file_path = NULL;
 315   if (r->fp)
 316     fclose (r->fp);
 317   r->fp = NULL;
 318   r->nb_read = 0;
 319   r->line_start_idx = 0;
 320   r->line_num = 0;
 321   r->line_record.truncate (0);
 322   r->use_count = 0;
 323   r->total_lines = 0;
 324   r->missing_trailing_newline = true;
 325 }
 326
 327 /* Return the file cache that has been less used, recently, or the
 328    first empty one.  If HIGHEST_USE_COUNT is non-null,
 329    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 330    in the cache table.  */
 331
 332 static fcache*
 333 evicted_cache_tab_entry (unsigned *highest_use_count)
 334 {
 335   diagnostic_file_cache_init ();
 336
 337   fcache *to_evict = &fcache_tab[0];
 338   unsigned huc = to_evict->use_count;
 339   for (unsigned i = 1; i < fcache_tab_size; ++i)
 340     {
 341       fcache *c = &fcache_tab[i];
 342       bool c_is_empty = (c->file_path == NULL);
 343
 344       if (c->use_count < to_evict->use_count
 345           || (to_evict->file_path && c_is_empty))
 346         /* We evict C because it's either an entry with a lower use
 347            count or one that is empty.  */
 348         to_evict = c;
 349
 350       if (huc < c->use_count)
 351         huc = c->use_count;
 352
 353       if (c_is_empty)
 354         /* We've reached the end of the cache; subsequent elements are
 355            all empty.  */
 356         break;
 357     }
 358
 359   if (highest_use_count)
 360     *highest_use_count = huc;
 361
 362   return to_evict;
 363 }
 364
 365 /* Create the cache used for the content of a given file to be
 366    accessed by caret diagnostic.  This cache is added to an array of
 367    cache and can be retrieved by lookup_file_in_cache_tab.  This
 368    function returns the created cache.  Note that only the last
 369    fcache_tab_size files are cached.  */
 370
 371 static fcache*
 372 add_file_to_cache_tab (const char *file_path)
 373 {
 374
 375   FILE *fp = fopen (file_path, "r");
 376   if (fp == NULL)
 377     return NULL;
 378
 379   unsigned highest_use_count = 0;
 380   fcache *r = evicted_cache_tab_entry (&highest_use_count);
 381   r->file_path = file_path;
 382   if (r->fp)
 383     fclose (r->fp);
 384   r->fp = fp;
 385   r->nb_read = 0;
 386   r->line_start_idx = 0;
 387   r->line_num = 0;
 388   r->line_record.truncate (0);
 389   /* Ensure that this cache entry doesn't get evicted next time
 390      add_file_to_cache_tab is called.  */
 391   r->use_count = ++highest_use_count;
 392   r->total_lines = total_lines_num (file_path);
 393   r->missing_trailing_newline = true;
 394
 395   return r;
 396 }
 397
 398 /* Lookup the cache used for the content of a given file accessed by
 399    caret diagnostic.  If no cached file was found, create a new cache
 400    for this file, add it to the array of cached file and return
 401    it.  */
 402
 403 static fcache*
 404 lookup_or_add_file_to_cache_tab (const char *file_path)
 405 {
 406   fcache *r = lookup_file_in_cache_tab (file_path);
 407   if (r == NULL)
 408     r = add_file_to_cache_tab (file_path);
 409   return r;
 410 }
 411
 412 /* Default constructor for a cache of file used by caret
 413    diagnostic.  */
 414
 415 fcache::fcache ()
 416 : use_count (0), file_path (NULL), fp (NULL), data (0),
 417   size (0), nb_read (0), line_start_idx (0), line_num (0),
 418   total_lines (0), missing_trailing_newline (true)
 419 {
 420   line_record.create (0);
 421 }
 422
 423 /* Destructor for a cache of file used by caret diagnostic.  */
 424
 425 fcache::~fcache ()
 426 {
 427   if (fp)
 428     {
 429       fclose (fp);
 430       fp = NULL;
 431     }
 432   if (data)
 433     {
 434       XDELETEVEC (data);
 435       data = 0;
 436     }
 437   line_record.release ();
 438 }
 439
 440 /* Returns TRUE iff the cache would need to be filled with data coming
 441    from the file.  That is, either the cache is empty or full or the
 442    current line is empty.  Note that if the cache is full, it would
 443    need to be extended and filled again.  */
 444
 445 static bool
 446 needs_read (fcache *c)
 447 {
 448   return (c->nb_read == 0
 449           || c->nb_read == c->size
 450           || (c->line_start_idx >= c->nb_read - 1));
 451 }
 452
 453 /*  Return TRUE iff the cache is full and thus needs to be
 454     extended.  */
 455
 456 static bool
 457 needs_grow (fcache *c)
 458 {
 459   return c->nb_read == c->size;
 460 }
 461
 462 /* Grow the cache if it needs to be extended.  */
 463
 464 static void
 465 maybe_grow (fcache *c)
 466 {
 467   if (!needs_grow (c))
 468     return;
 469
 470   size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
 471   c->data = XRESIZEVEC (char, c->data, size);
 472   c->size = size;
 473 }
 474
 475 /*  Read more data into the cache.  Extends the cache if need be.
 476     Returns TRUE iff new data could be read.  */
 477
 478 static bool
 479 read_data (fcache *c)
 480 {
 481   if (feof (c->fp) || ferror (c->fp))
 482     return false;
 483
 484   maybe_grow (c);
 485
 486   char * from = c->data + c->nb_read;
 487   size_t to_read = c->size - c->nb_read;
 488   size_t nb_read = fread (from, 1, to_read, c->fp);
 489
 490   if (ferror (c->fp))
 491     return false;
 492
 493   c->nb_read += nb_read;
 494   return !!nb_read;
 495 }
 496
 497 /* Read new data iff the cache needs to be filled with more data
 498    coming from the file FP.  Return TRUE iff the cache was filled with
 499    mode data.  */
 500
 501 static bool
 502 maybe_read_data (fcache *c)
 503 {
 504   if (!needs_read (c))
 505     return false;
 506   return read_data (c);
 507 }
 508
 509 /* Read a new line from file FP, using C as a cache for the data
 510    coming from the file.  Upon successful completion, *LINE is set to
 511    the beginning of the line found.  *LINE points directly in the
 512    line cache and is only valid until the next call of get_next_line.
 513    *LINE_LEN is set to the length of the line.  Note that the line
 514    does not contain any terminal delimiter.  This function returns
 515    true if some data was read or process from the cache, false
 516    otherwise.  Note that subsequent calls to get_next_line might
 517    make the content of *LINE invalid.  */
 518
 519 static bool
 520 get_next_line (fcache *c, char **line, ssize_t *line_len)
 521 {
 522   /* Fill the cache with data to process.  */
 523   maybe_read_data (c);
 524
 525   size_t remaining_size = c->nb_read - c->line_start_idx;
 526   if (remaining_size == 0)
 527     /* There is no more data to process.  */
 528     return false;
 529
 530   char *line_start = c->data + c->line_start_idx;
 531
 532   char *next_line_start = NULL;
 533   size_t len = 0;
 534   char *line_end = (char *) memchr (line_start, '\n', remaining_size);
 535   if (line_end == NULL)
 536     {
 537       /* We haven't found the end-of-line delimiter in the cache.
 538          Fill the cache with more data from the file and look for the
 539          '\n'.  */
 540       while (maybe_read_data (c))
 541         {
 542           line_start = c->data + c->line_start_idx;
 543           remaining_size = c->nb_read - c->line_start_idx;
 544           line_end = (char *) memchr (line_start, '\n', remaining_size);
 545           if (line_end != NULL)
 546             {
 547               next_line_start = line_end + 1;
 548               break;
 549             }
 550         }
 551       if (line_end == NULL)
 552         {
 553           /* We've loadded all the file into the cache and still no
 554              '\n'.  Let's say the line ends up at one byte passed the
 555              end of the file.  This is to stay consistent with the case
 556              of when the line ends up with a '\n' and line_end points to
 557              that terminal '\n'.  That consistency is useful below in
 558              the len calculation.  */
 559           line_end = c->data + c->nb_read ;
 560           c->missing_trailing_newline = true;
 561         }
 562       else
 563         c->missing_trailing_newline = false;
 564     }
 565   else
 566     {
 567       next_line_start = line_end + 1;
 568       c->missing_trailing_newline = false;
 569     }
 570
 571   if (ferror (c->fp))
 572     return false;
 573
 574   /* At this point, we've found the end of the of line.  It either
 575      points to the '\n' or to one byte after the last byte of the
 576      file.  */
 577   gcc_assert (line_end != NULL);
 578
 579   len = line_end - line_start;
 580
 581   if (c->line_start_idx < c->nb_read)
 582     *line = line_start;
 583
 584   ++c->line_num;
 585
 586   /* Before we update our line record, make sure the hint about the
 587      total number of lines of the file is correct.  If it's not, then
 588      we give up recording line boundaries from now on.  */
 589   bool update_line_record = true;
 590   if (c->line_num > c->total_lines)
 591     update_line_record = false;
 592
 593     /* Now update our line record so that re-reading lines from the
 594      before c->line_start_idx is faster.  */
 595   if (update_line_record
 596       && c->line_record.length () < fcache_line_record_size)
 597     {
 598       /* If the file lines fits in the line record, we just record all
 599          its lines ...*/
 600       if (c->total_lines <= fcache_line_record_size
 601           && c->line_num > c->line_record.length ())
 602         c->line_record.safe_push (fcache::line_info (c->line_num,
 603                                                  c->line_start_idx,
 604                                                  line_end - c->data));
 605       else if (c->total_lines > fcache_line_record_size)
 606         {
 607           /* ... otherwise, we just scale total_lines down to
 608              (fcache_line_record_size lines.  */
 609           size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
 610           if (c->line_record.length () == 0
 611               || n >= c->line_record.length ())
 612             c->line_record.safe_push (fcache::line_info (c->line_num,
 613                                                      c->line_start_idx,
 614                                                      line_end - c->data));
 615         }
 616     }
 617
 618   /* Update c->line_start_idx so that it points to the next line to be
 619      read.  */
 620   if (next_line_start)
 621     c->line_start_idx = next_line_start - c->data;
 622   else
 623     /* We didn't find any terminal '\n'.  Let's consider that the end
 624        of line is the end of the data in the cache.  The next
 625        invocation of get_next_line will either read more data from the
 626        underlying file or return false early because we've reached the
 627        end of the file.  */
 628     c->line_start_idx = c->nb_read;
 629
 630   *line_len = len;
 631
 632   return true;
 633 }
 634
 635 /* Consume the next bytes coming from the cache (or from its
 636    underlying file if there are remaining unread bytes in the file)
 637    until we reach the next end-of-line (or end-of-file).  There is no
 638    copying from the cache involved.  Return TRUE upon successful
 639    completion.  */
 640
 641 static bool
 642 goto_next_line (fcache *cache)
 643 {
 644   char *l;
 645   ssize_t len;
 646
 647   return get_next_line (cache, &l, &len);
 648 }
 649
 650 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 651    If the line was read successfully, *LINE points to the beginning
 652    of the line in the file cache and *LINE_LEN is the length of the
 653    line.  *LINE is not nul-terminated, but may contain zero bytes.
 654    *LINE is only valid until the next call of read_line_num.
 655    This function returns bool if a line was read.  */
 656
 657 static bool
 658 read_line_num (fcache *c, size_t line_num,
 659                char **line, ssize_t *line_len)
 660 {
 661   gcc_assert (line_num > 0);
 662
 663   if (line_num <= c->line_num)
 664     {
 665       /* We've been asked to read lines that are before c->line_num.
 666          So lets use our line record (if it's not empty) to try to
 667          avoid re-reading the file from the beginning again.  */
 668
 669       if (c->line_record.is_empty ())
 670         {
 671           c->line_start_idx = 0;
 672           c->line_num = 0;
 673         }
 674       else
 675         {
 676           fcache::line_info *i = NULL;
 677           if (c->total_lines <= fcache_line_record_size)
 678             {
 679               /* In languages where the input file is not totally
 680                  preprocessed up front, the c->total_lines hint
 681                  can be smaller than the number of lines of the
 682                  file.  In that case, only the first
 683                  c->total_lines have been recorded.
 684
 685                  Otherwise, the first c->total_lines we've read have
 686                  their start/end recorded here.  */
 687               i = (line_num <= c->total_lines)
 688                 ? &c->line_record[line_num - 1]
 689                 : &c->line_record[c->total_lines - 1];
 690               gcc_assert (i->line_num <= line_num);
 691             }
 692           else
 693             {
 694               /*  So the file had more lines than our line record
 695                   size.  Thus the number of lines we've recorded has
 696                   been scaled down to fcache_line_reacord_size.  Let's
 697                   pick the start/end of the recorded line that is
 698                   closest to line_num.  */
 699               size_t n = (line_num <= c->total_lines)
 700                 ? line_num * fcache_line_record_size / c->total_lines
 701                 : c ->line_record.length () - 1;
 702               if (n < c->line_record.length ())
 703                 {
 704                   i = &c->line_record[n];
 705                   gcc_assert (i->line_num <= line_num);
 706                 }
 707             }
 708
 709           if (i && i->line_num == line_num)
 710             {
 711               /* We have the start/end of the line.  */
 712               *line = c->data + i->start_pos;
 713               *line_len = i->end_pos - i->start_pos;
 714               return true;
 715             }
 716
 717           if (i)
 718             {
 719               c->line_start_idx = i->start_pos;
 720               c->line_num = i->line_num - 1;
 721             }
 722           else
 723             {
 724               c->line_start_idx = 0;
 725               c->line_num = 0;
 726             }
 727         }
 728     }
 729
 730   /*  Let's walk from line c->line_num up to line_num - 1, without
 731       copying any line.  */
 732   while (c->line_num < line_num - 1)
 733     if (!goto_next_line (c))
 734       return false;
 735
 736   /* The line we want is the next one.  Let's read and copy it back to
 737      the caller.  */
 738   return get_next_line (c, line, line_len);
 739 }
 740
 741 /* Return the physical source line that corresponds to FILE_PATH/LINE.
 742    The line is not nul-terminated.  The returned pointer is only
 743    valid until the next call of location_get_source_line.
 744    Note that the line can contain several null characters,
 745    so the returned value's length has the actual length of the line.
 746    If the function fails, a NULL char_span is returned.  */
 747
 748 char_span
 749 location_get_source_line (const char *file_path, int line)
 750 {
 751   char *buffer = NULL;
 752   ssize_t len;
 753
 754   if (line == 0)
 755     return char_span (NULL, 0);
 756
 757   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 758   if (c == NULL)
 759     return char_span (NULL, 0);
 760
 761   bool read = read_line_num (c, line, &buffer, &len);
 762   if (!read)
 763     return char_span (NULL, 0);
 764
 765   return char_span (buffer, len);
 766 }
 767
 768 /* Determine if FILE_PATH missing a trailing newline on its final line.
 769    Only valid to call once all of the file has been loaded, by
 770    requesting a line number beyond the end of the file.  */
 771
 772 bool
 773 location_missing_trailing_newline (const char *file_path)
 774 {
 775   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 776   if (c == NULL)
 777     return false;
 778
 779   return c->missing_trailing_newline;
 780 }
 781
 782 /* Test if the location originates from the spelling location of a
 783    builtin-tokens.  That is, return TRUE if LOC is a (possibly
 784    virtual) location of a built-in token that appears in the expansion
 785    list of a macro.  Please note that this function also works on
 786    tokens that result from built-in tokens.  For instance, the
 787    function would return true if passed a token "4" that is the result
 788    of the expansion of the built-in __LINE__ macro.  */
 789 bool
 790 is_location_from_builtin_token (location_t loc)
 791 {
 792   const line_map_ordinary *map = NULL;
 793   loc = linemap_resolve_location (line_table, loc,
 794                                   LRK_SPELLING_LOCATION, &map);
 795   return loc == BUILTINS_LOCATION;
 796 }
 797
 798 /* Expand the source location LOC into a human readable location.  If
 799    LOC is virtual, it resolves to the expansion point of the involved
 800    macro.  If LOC resolves to a builtin location, the file name of the
 801    readable location is set to the string "<built-in>".  */
 802
 803 expanded_location
 804 expand_location (location_t loc)
 805 {
 806   return expand_location_1 (loc, /*expansion_point_p=*/true,
 807                             LOCATION_ASPECT_CARET);
 808 }
 809
 810 /* Expand the source location LOC into a human readable location.  If
 811    LOC is virtual, it resolves to the expansion location of the
 812    relevant macro.  If LOC resolves to a builtin location, the file
 813    name of the readable location is set to the string
 814    "<built-in>".  */
 815
 816 expanded_location
 817 expand_location_to_spelling_point (location_t loc,
 818                                    enum location_aspect aspect)
 819 {
 820   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
 821 }
 822
 823 /* The rich_location class within libcpp requires a way to expand
 824    location_t instances, and relies on the client code
 825    providing a symbol named
 826      linemap_client_expand_location_to_spelling_point
 827    to do this.
 828
 829    This is the implementation for libcommon.a (all host binaries),
 830    which simply calls into expand_location_1.  */
 831
 832 expanded_location
 833 linemap_client_expand_location_to_spelling_point (location_t loc,
 834                                                   enum location_aspect aspect)
 835 {
 836   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
 837 }
 838
 839
 840 /* If LOCATION is in a system header and if it is a virtual location for
 841    a token coming from the expansion of a macro, unwind it to the
 842    location of the expansion point of the macro.  Otherwise, just return
 843    LOCATION.
 844
 845    This is used for instance when we want to emit diagnostics about a
 846    token that may be located in a macro that is itself defined in a
 847    system header, for example, for the NULL macro.  In such a case, if
 848    LOCATION were passed directly to diagnostic functions such as
 849    warning_at, the diagnostic would be suppressed (unless
 850    -Wsystem-headers).  */
 851
 852 location_t
 853 expansion_point_location_if_in_system_header (location_t location)
 854 {
 855   if (in_system_header_at (location))
 856     location = linemap_resolve_location (line_table, location,
 857                                          LRK_MACRO_EXPANSION_POINT,
 858                                          NULL);
 859   return location;
 860 }
 861
 862 /* If LOCATION is a virtual location for a token coming from the expansion
 863    of a macro, unwind to the location of the expansion point of the macro.  */
 864
 865 location_t
 866 expansion_point_location (location_t location)
 867 {
 868   return linemap_resolve_location (line_table, location,
 869                                    LRK_MACRO_EXPANSION_POINT, NULL);
 870 }
 871
 872 /* Construct a location with caret at CARET, ranging from START to
 873    finish e.g.
 874
 875                  11111111112
 876         12345678901234567890
 877      522
 878      523   return foo + bar;
 879                   ~~~~^~~~~
 880      524
 881
 882    The location's caret is at the "+", line 523 column 15, but starts
 883    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
 884    of "bar" at column 19.  */
 885
 886 location_t
 887 make_location (location_t caret, location_t start, location_t finish)
 888 {
 889   location_t pure_loc = get_pure_location (caret);
 890   source_range src_range;
 891   src_range.m_start = get_start (start);
 892   src_range.m_finish = get_finish (finish);
 893   location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
 894                                                    pure_loc,
 895                                                    src_range,
 896                                                    NULL);
 897   return combined_loc;
 898 }
 899
 900 /* Same as above, but taking a source range rather than two locations.  */
 901
 902 location_t
 903 make_location (location_t caret, source_range src_range)
 904 {
 905   location_t pure_loc = get_pure_location (caret);
 906   return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
 907 }
 908
 909 /* Dump statistics to stderr about the memory usage of the line_table
 910    set of line maps.  This also displays some statistics about macro
 911    expansion.  */
 912
 913 void
 914 dump_line_table_statistics (void)
 915 {
 916   struct linemap_stats s;
 917   long total_used_map_size,
 918     macro_maps_size,
 919     total_allocated_map_size;
 920
 921   memset (&s, 0, sizeof (s));
 922
 923   linemap_get_statistics (line_table, &s);
 924
 925   macro_maps_size = s.macro_maps_used_size
 926     + s.macro_maps_locations_size;
 927
 928   total_allocated_map_size = s.ordinary_maps_allocated_size
 929     + s.macro_maps_allocated_size
 930     + s.macro_maps_locations_size;
 931
 932   total_used_map_size = s.ordinary_maps_used_size
 933     + s.macro_maps_used_size
 934     + s.macro_maps_locations_size;
 935
 936   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
 937            s.num_expanded_macros);
 938   if (s.num_expanded_macros != 0)
 939     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
 940              s.num_macro_tokens / s.num_expanded_macros);
 941   fprintf (stderr,
 942            "\nLine Table allocations during the "
 943            "compilation process\n");
 944   fprintf (stderr, "Number of ordinary maps used:        " PRsa (5) "\n",
 945            SIZE_AMOUNT (s.num_ordinary_maps_used));
 946   fprintf (stderr, "Ordinary map used size:              " PRsa (5) "\n",
 947            SIZE_AMOUNT (s.ordinary_maps_used_size));
 948   fprintf (stderr, "Number of ordinary maps allocated:   " PRsa (5) "\n",
 949            SIZE_AMOUNT (s.num_ordinary_maps_allocated));
 950   fprintf (stderr, "Ordinary maps allocated size:        " PRsa (5) "\n",
 951            SIZE_AMOUNT (s.ordinary_maps_allocated_size));
 952   fprintf (stderr, "Number of macro maps used:           " PRsa (5) "\n",
 953            SIZE_AMOUNT (s.num_macro_maps_used));
 954   fprintf (stderr, "Macro maps used size:                " PRsa (5) "\n",
 955            SIZE_AMOUNT (s.macro_maps_used_size));
 956   fprintf (stderr, "Macro maps locations size:           " PRsa (5) "\n",
 957            SIZE_AMOUNT (s.macro_maps_locations_size));
 958   fprintf (stderr, "Macro maps size:                     " PRsa (5) "\n",
 959            SIZE_AMOUNT (macro_maps_size));
 960   fprintf (stderr, "Duplicated maps locations size:      " PRsa (5) "\n",
 961            SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
 962   fprintf (stderr, "Total allocated maps size:           " PRsa (5) "\n",
 963            SIZE_AMOUNT (total_allocated_map_size));
 964   fprintf (stderr, "Total used maps size:                " PRsa (5) "\n",
 965            SIZE_AMOUNT (total_used_map_size));
 966   fprintf (stderr, "Ad-hoc table size:                   " PRsa (5) "\n",
 967            SIZE_AMOUNT (s.adhoc_table_size));
 968   fprintf (stderr, "Ad-hoc table entries used:           " PRsa (5) "\n",
 969            SIZE_AMOUNT (s.adhoc_table_entries_used));
 970   fprintf (stderr, "optimized_ranges:                    " PRsa (5) "\n",
 971            SIZE_AMOUNT (line_table->num_optimized_ranges));
 972   fprintf (stderr, "unoptimized_ranges:                  " PRsa (5) "\n",
 973            SIZE_AMOUNT (line_table->num_unoptimized_ranges));
 974
 975   fprintf (stderr, "\n");
 976 }
 977
 978 /* Get location one beyond the final location in ordinary map IDX.  */
 979
 980 static location_t
 981 get_end_location (struct line_maps *set, unsigned int idx)
 982 {
 983   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
 984     return set->highest_location;
 985
 986   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
 987   return MAP_START_LOCATION (next_map);
 988 }
 989
 990 /* Helper function for write_digit_row.  */
 991
 992 static void
 993 write_digit (FILE *stream, int digit)
 994 {
 995   fputc ('0' + (digit % 10), stream);
 996 }
 997
 998 /* Helper function for dump_location_info.
 999    Write a row of numbers to STREAM, numbering a source line,
1000    giving the units, tens, hundreds etc of the column number.  */
1001
1002 static void
1003 write_digit_row (FILE *stream, int indent,
1004                  const line_map_ordinary *map,
1005                  location_t loc, int max_col, int divisor)
1006 {
1007   fprintf (stream, "%*c", indent, ' ');
1008   fprintf (stream, "|");
1009   for (int column = 1; column < max_col; column++)
1010     {
1011       location_t column_loc = loc + (column << map->m_range_bits);
1012       write_digit (stream, column_loc / divisor);
1013     }
1014   fprintf (stream, "\n");
1015 }
1016
1017 /* Write a half-closed (START) / half-open (END) interval of
1018    location_t to STREAM.  */
1019
1020 static void
1021 dump_location_range (FILE *stream,
1022                      location_t start, location_t end)
1023 {
1024   fprintf (stream,
1025            "  location_t interval: %u <= loc < %u\n",
1026            start, end);
1027 }
1028
1029 /* Write a labelled description of a half-closed (START) / half-open (END)
1030    interval of location_t to STREAM.  */
1031
1032 static void
1033 dump_labelled_location_range (FILE *stream,
1034                               const char *name,
1035                               location_t start, location_t end)
1036 {
1037   fprintf (stream, "%s\n", name);
1038   dump_location_range (stream, start, end);
1039   fprintf (stream, "\n");
1040 }
1041
1042 /* Write a visualization of the locations in the line_table to STREAM.  */
1043
1044 void
1045 dump_location_info (FILE *stream)
1046 {
1047   /* Visualize the reserved locations.  */
1048   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1049                                 0, RESERVED_LOCATION_COUNT);
1050
1051   /* Visualize the ordinary line_map instances, rendering the sources. */
1052   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1053     {
1054       location_t end_location = get_end_location (line_table, idx);
1055       /* half-closed: doesn't include this one. */
1056
1057       const line_map_ordinary *map
1058         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1059       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1060       dump_location_range (stream,
1061                            MAP_START_LOCATION (map), end_location);
1062       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1063       fprintf (stream, "  starting at line: %i\n",
1064                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1065       fprintf (stream, "  column and range bits: %i\n",
1066                map->m_column_and_range_bits);
1067       fprintf (stream, "  column bits: %i\n",
1068                map->m_column_and_range_bits - map->m_range_bits);
1069       fprintf (stream, "  range bits: %i\n",
1070                map->m_range_bits);
1071       const char * reason;
1072       switch (map->reason) {
1073       case LC_ENTER:
1074         reason = "LC_ENTER";
1075         break;
1076       case LC_LEAVE:
1077         reason = "LC_LEAVE";
1078         break;
1079       case LC_RENAME:
1080         reason = "LC_RENAME";
1081         break;
1082       case LC_RENAME_VERBATIM:
1083         reason = "LC_RENAME_VERBATIM";
1084         break;
1085       case LC_ENTER_MACRO:
1086         reason = "LC_RENAME_MACRO";
1087         break;
1088       default:
1089         reason = "Unknown";
1090       }
1091       fprintf (stream, "  reason: %d (%s)\n", map->reason, reason);
1092
1093       const line_map_ordinary *includer_map
1094         = linemap_included_from_linemap (line_table, map);
1095       fprintf (stream, "  included from location: %d",
1096                linemap_included_from (map));
1097       if (includer_map) {
1098         fprintf (stream, " (in ordinary map %d)",
1099                  int (includer_map - line_table->info_ordinary.maps));
1100       }
1101       fprintf (stream, "\n");
1102
1103       /* Render the span of source lines that this "map" covers.  */
1104       for (location_t loc = MAP_START_LOCATION (map);
1105            loc < end_location;
1106            loc += (1 << map->m_range_bits) )
1107         {
1108           gcc_assert (pure_location_p (line_table, loc) );
1109
1110           expanded_location exploc
1111             = linemap_expand_location (line_table, map, loc);
1112
1113           if (exploc.column == 0)
1114             {
1115               /* Beginning of a new source line: draw the line.  */
1116
1117               char_span line_text = location_get_source_line (exploc.file,
1118                                                               exploc.line);
1119               if (!line_text)
1120                 break;
1121               fprintf (stream,
1122                        "%s:%3i|loc:%5i|%.*s\n",
1123                        exploc.file, exploc.line,
1124                        loc,
1125                        (int)line_text.length (), line_text.get_buffer ());
1126
1127               /* "loc" is at column 0, which means "the whole line".
1128                  Render the locations *within* the line, by underlining
1129                  it, showing the location_t numeric values
1130                  at each column.  */
1131               size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1132               if (max_col > line_text.length ())
1133                 max_col = line_text.length () + 1;
1134
1135               int len_lnum = num_digits (exploc.line);
1136               if (len_lnum < 3)
1137                 len_lnum = 3;
1138               int len_loc = num_digits (loc);
1139               if (len_loc < 5)
1140                 len_loc = 5;
1141
1142               int indent = 6 + strlen (exploc.file) + len_lnum + len_loc;
1143
1144               /* Thousands.  */
1145               if (end_location > 999)
1146                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1147
1148               /* Hundreds.  */
1149               if (end_location > 99)
1150                 write_digit_row (stream, indent, map, loc, max_col, 100);
1151
1152               /* Tens.  */
1153               write_digit_row (stream, indent, map, loc, max_col, 10);
1154
1155               /* Units.  */
1156               write_digit_row (stream, indent, map, loc, max_col, 1);
1157             }
1158         }
1159       fprintf (stream, "\n");
1160     }
1161
1162   /* Visualize unallocated values.  */
1163   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1164                                 line_table->highest_location,
1165                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1166
1167   /* Visualize the macro line_map instances, rendering the sources. */
1168   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1169     {
1170       /* Each macro map that is allocated owns location_t values
1171          that are *lower* that the one before them.
1172          Hence it's meaningful to view them either in order of ascending
1173          source locations, or in order of ascending macro map index.  */
1174       const bool ascending_location_ts = true;
1175       unsigned int idx = (ascending_location_ts
1176                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1177                           : i);
1178       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1179       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1180                idx,
1181                linemap_map_get_macro_name (map),
1182                MACRO_MAP_NUM_MACRO_TOKENS (map));
1183       dump_location_range (stream,
1184                            map->start_location,
1185                            (map->start_location
1186                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1187       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1188               "expansion point is location %i",
1189               MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1190       fprintf (stream, "  map->start_location: %u\n",
1191                map->start_location);
1192
1193       fprintf (stream, "  macro_locations:\n");
1194       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1195         {
1196           location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1197           location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1198
1199           /* linemap_add_macro_token encodes token numbers in an expansion
1200              by putting them after MAP_START_LOCATION. */
1201
1202           /* I'm typically seeing 4 uninitialized entries at the end of
1203              0xafafafaf.
1204              This appears to be due to macro.c:replace_args
1205              adding 2 extra args for padding tokens; presumably there may
1206              be a leading and/or trailing padding token injected,
1207              each for 2 more location slots.
1208              This would explain there being up to 4 location_ts slots
1209              that may be uninitialized.  */
1210
1211           fprintf (stream, "    %u: %u, %u\n",
1212                    i,
1213                    x,
1214                    y);
1215           if (x == y)
1216             {
1217               if (x < MAP_START_LOCATION (map))
1218                 inform (x, "token %u has x-location == y-location == %u", i, x);
1219               else
1220                 fprintf (stream,
1221                          "x-location == y-location == %u encodes token # %u\n",
1222                          x, x - MAP_START_LOCATION (map));
1223                 }
1224           else
1225             {
1226               inform (x, "token %u has x-location == %u", i, x);
1227               inform (x, "token %u has y-location == %u", i, y);
1228             }
1229         }
1230       fprintf (stream, "\n");
1231     }
1232
1233   /* It appears that MAX_LOCATION_T itself is never assigned to a
1234      macro map, presumably due to an off-by-one error somewhere
1235      between the logic in linemap_enter_macro and
1236      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1237   dump_labelled_location_range (stream, "MAX_LOCATION_T",
1238                                 MAX_LOCATION_T,
1239                                 MAX_LOCATION_T + 1);
1240
1241   /* Visualize ad-hoc values.  */
1242   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1243                                 MAX_LOCATION_T + 1, UINT_MAX);
1244 }
1245
1246 /* string_concat's constructor.  */
1247
1248 string_concat::string_concat (int num, location_t *locs)
1249   : m_num (num)
1250 {
1251   m_locs = ggc_vec_alloc <location_t> (num);
1252   for (int i = 0; i < num; i++)
1253     m_locs[i] = locs[i];
1254 }
1255
1256 /* string_concat_db's constructor.  */
1257
1258 string_concat_db::string_concat_db ()
1259 {
1260   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1261 }
1262
1263 /* Record that a string concatenation occurred, covering NUM
1264    string literal tokens.  LOCS is an array of size NUM, containing the
1265    locations of the tokens.  A copy of LOCS is taken.  */
1266
1267 void
1268 string_concat_db::record_string_concatenation (int num, location_t *locs)
1269 {
1270   gcc_assert (num > 1);
1271   gcc_assert (locs);
1272
1273   location_t key_loc = get_key_loc (locs[0]);
1274
1275   string_concat *concat
1276     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1277   m_table->put (key_loc, concat);
1278 }
1279
1280 /* Determine if LOC was the location of the the initial token of a
1281    concatenation of string literal tokens.
1282    If so, *OUT_NUM is written to with the number of tokens, and
1283    *OUT_LOCS with the location of an array of locations of the
1284    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1285    storage owned by the string_concat_db.
1286    Otherwise, return false.  */
1287
1288 bool
1289 string_concat_db::get_string_concatenation (location_t loc,
1290                                             int *out_num,
1291                                             location_t **out_locs)
1292 {
1293   gcc_assert (out_num);
1294   gcc_assert (out_locs);
1295
1296   location_t key_loc = get_key_loc (loc);
1297
1298   string_concat **concat = m_table->get (key_loc);
1299   if (!concat)
1300     return false;
1301
1302   *out_num = (*concat)->m_num;
1303   *out_locs =(*concat)->m_locs;
1304   return true;
1305 }
1306
1307 /* Internal function.  Canonicalize LOC into a form suitable for
1308    use as a key within the database, stripping away macro expansion,
1309    ad-hoc information, and range information, using the location of
1310    the start of LOC within an ordinary linemap.  */
1311
1312 location_t
1313 string_concat_db::get_key_loc (location_t loc)
1314 {
1315   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1316                                   NULL);
1317
1318   loc = get_range_from_loc (line_table, loc).m_start;
1319
1320   return loc;
1321 }
1322
1323 /* Helper class for use within get_substring_ranges_for_loc.
1324    An vec of cpp_string with responsibility for releasing all of the
1325    str->text for each str in the vector.  */
1326
1327 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1328 {
1329  public:
1330   auto_cpp_string_vec (int alloc)
1331     : auto_vec <cpp_string> (alloc) {}
1332
1333   ~auto_cpp_string_vec ()
1334   {
1335     /* Clean up the copies within this vec.  */
1336     int i;
1337     cpp_string *str;
1338     FOR_EACH_VEC_ELT (*this, i, str)
1339       free (const_cast <unsigned char *> (str->text));
1340   }
1341 };
1342
1343 /* Attempt to populate RANGES with source location information on the
1344    individual characters within the string literal found at STRLOC.
1345    If CONCATS is non-NULL, then any string literals that the token at
1346    STRLOC  was concatenated with are also added to RANGES.
1347
1348    Return NULL if successful, or an error message if any errors occurred (in
1349    which case RANGES may be only partially populated and should not
1350    be used).
1351
1352    This is implemented by re-parsing the relevant source line(s).  */
1353
1354 static const char *
1355 get_substring_ranges_for_loc (cpp_reader *pfile,
1356                               string_concat_db *concats,
1357                               location_t strloc,
1358                               enum cpp_ttype type,
1359                               cpp_substring_ranges &ranges)
1360 {
1361   gcc_assert (pfile);
1362
1363   if (strloc == UNKNOWN_LOCATION)
1364     return "unknown location";
1365
1366   /* Reparsing the strings requires accurate location information.
1367      If -ftrack-macro-expansion has been overridden from its default
1368      of 2, then we might have a location of a macro expansion point,
1369      rather than the location of the literal itself.
1370      Avoid this by requiring that we have full macro expansion tracking
1371      for substring locations to be available.  */
1372   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1373     return "track_macro_expansion != 2";
1374
1375   /* If #line or # 44 "file"-style directives are present, then there's
1376      no guarantee that the line numbers we have can be used to locate
1377      the strings.  For example, we might have a .i file with # directives
1378      pointing back to lines within a .c file, but the .c file might
1379      have been edited since the .i file was created.
1380      In such a case, the safest course is to disable on-demand substring
1381      locations.  */
1382   if (line_table->seen_line_directive)
1383     return "seen line directive";
1384
1385   /* If string concatenation has occurred at STRLOC, get the locations
1386      of all of the literal tokens making up the compound string.
1387      Otherwise, just use STRLOC.  */
1388   int num_locs = 1;
1389   location_t *strlocs = &strloc;
1390   if (concats)
1391     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1392
1393   auto_cpp_string_vec strs (num_locs);
1394   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1395   for (int i = 0; i < num_locs; i++)
1396     {
1397       /* Get range of strloc.  We will use it to locate the start and finish
1398          of the literal token within the line.  */
1399       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1400
1401       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1402         {
1403           /* If the string token was within a macro expansion, then we can
1404              cope with it for the simple case where we have a single token.
1405              Otherwise, bail out.  */
1406           if (src_range.m_start != src_range.m_finish)
1407             return "macro expansion";
1408         }
1409       else
1410         {
1411           if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1412             /* If so, we can't reliably determine where the token started within
1413                its line.  */
1414             return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1415
1416           if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1417             /* If so, we can't reliably determine where the token finished
1418                within its line.  */
1419             return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1420         }
1421
1422       expanded_location start
1423         = expand_location_to_spelling_point (src_range.m_start,
1424                                              LOCATION_ASPECT_START);
1425       expanded_location finish
1426         = expand_location_to_spelling_point (src_range.m_finish,
1427                                              LOCATION_ASPECT_FINISH);
1428       if (start.file != finish.file)
1429         return "range endpoints are in different files";
1430       if (start.line != finish.line)
1431         return "range endpoints are on different lines";
1432       if (start.column > finish.column)
1433         return "range endpoints are reversed";
1434
1435       char_span line = location_get_source_line (start.file, start.line);
1436       if (!line)
1437         return "unable to read source line";
1438
1439       /* Determine the location of the literal (including quotes
1440          and leading prefix chars, such as the 'u' in a u""
1441          token).  */
1442       size_t literal_length = finish.column - start.column + 1;
1443
1444       /* Ensure that we don't crash if we got the wrong location.  */
1445       if (line.length () < (start.column - 1 + literal_length))
1446         return "line is not wide enough";
1447
1448       char_span literal = line.subspan (start.column - 1, literal_length);
1449
1450       cpp_string from;
1451       from.len = literal_length;
1452       /* Make a copy of the literal, to avoid having to rely on
1453          the lifetime of the copy of the line within the cache.
1454          This will be released by the auto_cpp_string_vec dtor.  */
1455       from.text = (unsigned char *)literal.xstrdup ();
1456       strs.safe_push (from);
1457
1458       /* For very long lines, a new linemap could have started
1459          halfway through the token.
1460          Ensure that the loc_reader uses the linemap of the
1461          *end* of the token for its start location.  */
1462       const line_map_ordinary *start_ord_map;
1463       linemap_resolve_location (line_table, src_range.m_start,
1464                                 LRK_SPELLING_LOCATION, &start_ord_map);
1465       const line_map_ordinary *final_ord_map;
1466       linemap_resolve_location (line_table, src_range.m_finish,
1467                                 LRK_SPELLING_LOCATION, &final_ord_map);
1468       if (start_ord_map == NULL || final_ord_map == NULL)
1469         return "failed to get ordinary maps";
1470       /* Bulletproofing.  We ought to only have different ordinary maps
1471          for start vs finish due to line-length jumps.  */
1472       if (start_ord_map != final_ord_map
1473           && start_ord_map->to_file != final_ord_map->to_file)
1474           return "start and finish are spelled in different ordinary maps";
1475       location_t start_loc
1476         = linemap_position_for_line_and_column (line_table, final_ord_map,
1477                                                 start.line, start.column);
1478
1479       cpp_string_location_reader loc_reader (start_loc, line_table);
1480       loc_readers.safe_push (loc_reader);
1481     }
1482
1483   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1484   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1485                                                  loc_readers.address (),
1486                                                  num_locs, &ranges, type);
1487   if (err)
1488     return err;
1489
1490   /* Success: "ranges" should now contain information on the string.  */
1491   return NULL;
1492 }
1493
1494 /* Attempt to populate *OUT_LOC with source location information on the
1495    given characters within the string literal found at STRLOC.
1496    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1497    character set.
1498
1499    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1500    and string literal "012345\n789"
1501    *OUT_LOC is written to with:
1502      "012345\n789"
1503          ~^~~~~
1504
1505    If CONCATS is non-NULL, then any string literals that the token at
1506    STRLOC was concatenated with are also considered.
1507
1508    This is implemented by re-parsing the relevant source line(s).
1509
1510    Return NULL if successful, or an error message if any errors occurred.
1511    Error messages are intended for GCC developers (to help debugging) rather
1512    than for end-users.  */
1513
1514 const char *
1515 get_location_within_string (cpp_reader *pfile,
1516                             string_concat_db *concats,
1517                             location_t strloc,
1518                             enum cpp_ttype type,
1519                             int caret_idx, int start_idx, int end_idx,
1520                             location_t *out_loc)
1521 {
1522   gcc_checking_assert (caret_idx >= 0);
1523   gcc_checking_assert (start_idx >= 0);
1524   gcc_checking_assert (end_idx >= 0);
1525   gcc_assert (out_loc);
1526
1527   cpp_substring_ranges ranges;
1528   const char *err
1529     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1530   if (err)
1531     return err;
1532
1533   if (caret_idx >= ranges.get_num_ranges ())
1534     return "caret_idx out of range";
1535   if (start_idx >= ranges.get_num_ranges ())
1536     return "start_idx out of range";
1537   if (end_idx >= ranges.get_num_ranges ())
1538     return "end_idx out of range";
1539
1540   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1541                             ranges.get_range (start_idx).m_start,
1542                             ranges.get_range (end_idx).m_finish);
1543   return NULL;
1544 }
1545
1546 #if CHECKING_P
1547
1548 namespace selftest {
1549
1550 /* Selftests of location handling.  */
1551
1552 /* Attempt to populate *OUT_RANGE with source location information on the
1553    given character within the string literal found at STRLOC.
1554    CHAR_IDX refers to an offset within the execution character set.
1555    If CONCATS is non-NULL, then any string literals that the token at
1556    STRLOC was concatenated with are also considered.
1557
1558    This is implemented by re-parsing the relevant source line(s).
1559
1560    Return NULL if successful, or an error message if any errors occurred.
1561    Error messages are intended for GCC developers (to help debugging) rather
1562    than for end-users.  */
1563
1564 static const char *
1565 get_source_range_for_char (cpp_reader *pfile,
1566                            string_concat_db *concats,
1567                            location_t strloc,
1568                            enum cpp_ttype type,
1569                            int char_idx,
1570                            source_range *out_range)
1571 {
1572   gcc_checking_assert (char_idx >= 0);
1573   gcc_assert (out_range);
1574
1575   cpp_substring_ranges ranges;
1576   const char *err
1577     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1578   if (err)
1579     return err;
1580
1581   if (char_idx >= ranges.get_num_ranges ())
1582     return "char_idx out of range";
1583
1584   *out_range = ranges.get_range (char_idx);
1585   return NULL;
1586 }
1587
1588 /* As get_source_range_for_char, but write to *OUT the number
1589    of ranges that are available.  */
1590
1591 static const char *
1592 get_num_source_ranges_for_substring (cpp_reader *pfile,
1593                                      string_concat_db *concats,
1594                                      location_t strloc,
1595                                      enum cpp_ttype type,
1596                                      int *out)
1597 {
1598   gcc_assert (out);
1599
1600   cpp_substring_ranges ranges;
1601   const char *err
1602     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1603
1604   if (err)
1605     return err;
1606
1607   *out = ranges.get_num_ranges ();
1608   return NULL;
1609 }
1610
1611 /* Selftests of location handling.  */
1612
1613 /* Verify that compare() on linenum_type handles comparisons over the full
1614    range of the type.  */
1615
1616 static void
1617 test_linenum_comparisons ()
1618 {
1619   linenum_type min_line (0);
1620   linenum_type max_line (0xffffffff);
1621   ASSERT_EQ (0, compare (min_line, min_line));
1622   ASSERT_EQ (0, compare (max_line, max_line));
1623
1624   ASSERT_GT (compare (max_line, min_line), 0);
1625   ASSERT_LT (compare (min_line, max_line), 0);
1626 }
1627
1628 /* Helper function for verifying location data: when location_t
1629    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1630    as having column 0.  */
1631
1632 static bool
1633 should_have_column_data_p (location_t loc)
1634 {
1635   if (IS_ADHOC_LOC (loc))
1636     loc = get_location_from_adhoc_loc (line_table, loc);
1637   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1638     return false;
1639   return true;
1640 }
1641
1642 /* Selftest for should_have_column_data_p.  */
1643
1644 static void
1645 test_should_have_column_data_p ()
1646 {
1647   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1648   ASSERT_TRUE
1649     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1650   ASSERT_FALSE
1651     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1652 }
1653
1654 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1655    on LOC.  */
1656
1657 static void
1658 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1659               location_t loc)
1660 {
1661   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1662   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1663   /* If location_t values are sufficiently high, then column numbers
1664      will be unavailable and LOCATION_COLUMN (loc) will be 0.
1665      When close to the threshold, column numbers *may* be present: if
1666      the final linemap before the threshold contains a line that straddles
1667      the threshold, locations in that line have column information.  */
1668   if (should_have_column_data_p (loc))
1669     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1670 }
1671
1672 /* Various selftests involve constructing a line table and one or more
1673    line maps within it.
1674
1675    For maximum test coverage we want to run these tests with a variety
1676    of situations:
1677    - line_table->default_range_bits: some frontends use a non-zero value
1678    and others use zero
1679    - the fallback modes within line-map.c: there are various threshold
1680    values for location_t beyond line-map.c changes
1681    behavior (disabling of the range-packing optimization, disabling
1682    of column-tracking).  We can exercise these by starting the line_table
1683    at interesting values at or near these thresholds.
1684
1685    The following struct describes a particular case within our test
1686    matrix.  */
1687
1688 struct line_table_case
1689 {
1690   line_table_case (int default_range_bits, int base_location)
1691   : m_default_range_bits (default_range_bits),
1692     m_base_location (base_location)
1693   {}
1694
1695   int m_default_range_bits;
1696   int m_base_location;
1697 };
1698
1699 /* Constructor.  Store the old value of line_table, and create a new
1700    one, using sane defaults.  */
1701
1702 line_table_test::line_table_test ()
1703 {
1704   gcc_assert (saved_line_table == NULL);
1705   saved_line_table = line_table;
1706   line_table = ggc_alloc<line_maps> ();
1707   linemap_init (line_table, BUILTINS_LOCATION);
1708   gcc_assert (saved_line_table->reallocator);
1709   line_table->reallocator = saved_line_table->reallocator;
1710   gcc_assert (saved_line_table->round_alloc_size);
1711   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1712   line_table->default_range_bits = 0;
1713 }
1714
1715 /* Constructor.  Store the old value of line_table, and create a new
1716    one, using the sitation described in CASE_.  */
1717
1718 line_table_test::line_table_test (const line_table_case &case_)
1719 {
1720   gcc_assert (saved_line_table == NULL);
1721   saved_line_table = line_table;
1722   line_table = ggc_alloc<line_maps> ();
1723   linemap_init (line_table, BUILTINS_LOCATION);
1724   gcc_assert (saved_line_table->reallocator);
1725   line_table->reallocator = saved_line_table->reallocator;
1726   gcc_assert (saved_line_table->round_alloc_size);
1727   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1728   line_table->default_range_bits = case_.m_default_range_bits;
1729   if (case_.m_base_location)
1730     {
1731       line_table->highest_location = case_.m_base_location;
1732       line_table->highest_line = case_.m_base_location;
1733     }
1734 }
1735
1736 /* Destructor.  Restore the old value of line_table.  */
1737
1738 line_table_test::~line_table_test ()
1739 {
1740   gcc_assert (saved_line_table != NULL);
1741   line_table = saved_line_table;
1742   saved_line_table = NULL;
1743 }
1744
1745 /* Verify basic operation of ordinary linemaps.  */
1746
1747 static void
1748 test_accessing_ordinary_linemaps (const line_table_case &case_)
1749 {
1750   line_table_test ltt (case_);
1751
1752   /* Build a simple linemap describing some locations. */
1753   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1754
1755   linemap_line_start (line_table, 1, 100);
1756   location_t loc_a = linemap_position_for_column (line_table, 1);
1757   location_t loc_b = linemap_position_for_column (line_table, 23);
1758
1759   linemap_line_start (line_table, 2, 100);
1760   location_t loc_c = linemap_position_for_column (line_table, 1);
1761   location_t loc_d = linemap_position_for_column (line_table, 17);
1762
1763   /* Example of a very long line.  */
1764   linemap_line_start (line_table, 3, 2000);
1765   location_t loc_e = linemap_position_for_column (line_table, 700);
1766
1767   /* Transitioning back to a short line.  */
1768   linemap_line_start (line_table, 4, 0);
1769   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1770
1771   if (should_have_column_data_p (loc_back_to_short))
1772     {
1773       /* Verify that we switched to short lines in the linemap.  */
1774       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1775       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1776     }
1777
1778   /* Example of a line that will eventually be seen to be longer
1779      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1780      below that.  */
1781   linemap_line_start (line_table, 5, 2000);
1782
1783   location_t loc_start_of_very_long_line
1784     = linemap_position_for_column (line_table, 2000);
1785   location_t loc_too_wide
1786     = linemap_position_for_column (line_table, 4097);
1787   location_t loc_too_wide_2
1788     = linemap_position_for_column (line_table, 4098);
1789
1790   /* ...and back to a sane line length.  */
1791   linemap_line_start (line_table, 6, 100);
1792   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1793
1794   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1795
1796   /* Multiple files.  */
1797   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1798   linemap_line_start (line_table, 1, 200);
1799   location_t loc_f = linemap_position_for_column (line_table, 150);
1800   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1801
1802   /* Verify that we can recover the location info.  */
1803   assert_loceq ("foo.c", 1, 1, loc_a);
1804   assert_loceq ("foo.c", 1, 23, loc_b);
1805   assert_loceq ("foo.c", 2, 1, loc_c);
1806   assert_loceq ("foo.c", 2, 17, loc_d);
1807   assert_loceq ("foo.c", 3, 700, loc_e);
1808   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1809
1810   /* In the very wide line, the initial location should be fully tracked.  */
1811   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1812   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1813      be disabled.  */
1814   assert_loceq ("foo.c", 5, 0, loc_too_wide);
1815   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1816   /*...and column-tracking should be re-enabled for subsequent lines.  */
1817   assert_loceq ("foo.c", 6, 10, loc_sane_again);
1818
1819   assert_loceq ("bar.c", 1, 150, loc_f);
1820
1821   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1822   ASSERT_TRUE (pure_location_p (line_table, loc_a));
1823
1824   /* Verify using make_location to build a range, and extracting data
1825      back from it.  */
1826   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1827   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1828   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1829   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1830   ASSERT_EQ (loc_b, src_range.m_start);
1831   ASSERT_EQ (loc_d, src_range.m_finish);
1832 }
1833
1834 /* Verify various properties of UNKNOWN_LOCATION.  */
1835
1836 static void
1837 test_unknown_location ()
1838 {
1839   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1840   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1841   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1842 }
1843
1844 /* Verify various properties of BUILTINS_LOCATION.  */
1845
1846 static void
1847 test_builtins ()
1848 {
1849   assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1850   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1851 }
1852
1853 /* Regression test for make_location.
1854    Ensure that we use pure locations for the start/finish of the range,
1855    rather than storing a packed or ad-hoc range as the start/finish.  */
1856
1857 static void
1858 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1859 {
1860   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1861      with C++ frontend.
1862      ....................0000000001111111111222.
1863      ....................1234567890123456789012.  */
1864   const char *content = "     r += !aaa == bbb;\n";
1865   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1866   line_table_test ltt (case_);
1867   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1868
1869   const location_t c11 = linemap_position_for_column (line_table, 11);
1870   const location_t c12 = linemap_position_for_column (line_table, 12);
1871   const location_t c13 = linemap_position_for_column (line_table, 13);
1872   const location_t c14 = linemap_position_for_column (line_table, 14);
1873   const location_t c21 = linemap_position_for_column (line_table, 21);
1874
1875   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1876     return;
1877
1878   /* Use column 13 for the caret location, arbitrarily, to verify that we
1879      handle start != caret.  */
1880   const location_t aaa = make_location (c13, c12, c14);
1881   ASSERT_EQ (c13, get_pure_location (aaa));
1882   ASSERT_EQ (c12, get_start (aaa));
1883   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1884   ASSERT_EQ (c14, get_finish (aaa));
1885   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1886
1887   /* Make a location using a location with a range as the start-point.  */
1888   const location_t not_aaa = make_location (c11, aaa, c14);
1889   ASSERT_EQ (c11, get_pure_location (not_aaa));
1890   /* It should use the start location of the range, not store the range
1891      itself.  */
1892   ASSERT_EQ (c12, get_start (not_aaa));
1893   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1894   ASSERT_EQ (c14, get_finish (not_aaa));
1895   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1896
1897   /* Similarly, make a location with a range as the end-point.  */
1898   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1899   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1900   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1901   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1902   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1903   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1904   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1905   /* It should use the finish location of the range, not store the range
1906      itself.  */
1907   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1908   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1909   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1910   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1911   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1912 }
1913
1914 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
1915
1916 static void
1917 test_reading_source_line ()
1918 {
1919   /* Create a tempfile and write some text to it.  */
1920   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1921                         "01234567890123456789\n"
1922                         "This is the test text\n"
1923                         "This is the 3rd line");
1924
1925   /* Read back a specific line from the tempfile.  */
1926   char_span source_line = location_get_source_line (tmp.get_filename (), 3);
1927   ASSERT_TRUE (source_line);
1928   ASSERT_TRUE (source_line.get_buffer () != NULL);
1929   ASSERT_EQ (20, source_line.length ());
1930   ASSERT_TRUE (!strncmp ("This is the 3rd line",
1931                          source_line.get_buffer (), source_line.length ()));
1932
1933   source_line = location_get_source_line (tmp.get_filename (), 2);
1934   ASSERT_TRUE (source_line);
1935   ASSERT_TRUE (source_line.get_buffer () != NULL);
1936   ASSERT_EQ (21, source_line.length ());
1937   ASSERT_TRUE (!strncmp ("This is the test text",
1938                          source_line.get_buffer (), source_line.length ()));
1939
1940   source_line = location_get_source_line (tmp.get_filename (), 4);
1941   ASSERT_FALSE (source_line);
1942   ASSERT_TRUE (source_line.get_buffer () == NULL);
1943 }
1944
1945 /* Tests of lexing.  */
1946
1947 /* Verify that token TOK from PARSER has cpp_token_as_text
1948    equal to EXPECTED_TEXT.  */
1949
1950 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
1951   SELFTEST_BEGIN_STMT                                                   \
1952     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
1953     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
1954   SELFTEST_END_STMT
1955
1956 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1957    and ranges from EXP_START_COL to EXP_FINISH_COL.
1958    Use LOC as the effective location of the selftest.  */
1959
1960 static void
1961 assert_token_loc_eq (const location &loc,
1962                      const cpp_token *tok,
1963                      const char *exp_filename, int exp_linenum,
1964                      int exp_start_col, int exp_finish_col)
1965 {
1966   location_t tok_loc = tok->src_loc;
1967   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1968   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1969
1970   /* If location_t values are sufficiently high, then column numbers
1971      will be unavailable.  */
1972   if (!should_have_column_data_p (tok_loc))
1973     return;
1974
1975   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1976   source_range tok_range = get_range_from_loc (line_table, tok_loc);
1977   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1978   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1979 }
1980
1981 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1982    SELFTEST_LOCATION as the effective location of the selftest.  */
1983
1984 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1985                             EXP_START_COL, EXP_FINISH_COL) \
1986   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1987                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1988
1989 /* Test of lexing a file using libcpp, verifying tokens and their
1990    location information.  */
1991
1992 static void
1993 test_lexer (const line_table_case &case_)
1994 {
1995   /* Create a tempfile and write some text to it.  */
1996   const char *content =
1997     /*00000000011111111112222222222333333.3333444444444.455555555556
1998       12345678901234567890123456789012345.6789012345678.901234567890.  */
1999     ("test_name /* c-style comment */\n"
2000      "                                  \"test literal\"\n"
2001      " // test c++-style comment\n"
2002      "   42\n");
2003   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2004
2005   line_table_test ltt (case_);
2006
2007   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2008
2009   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2010   ASSERT_NE (fname, NULL);
2011
2012   /* Verify that we get the expected tokens back, with the correct
2013      location information.  */
2014
2015   location_t loc;
2016   const cpp_token *tok;
2017   tok = cpp_get_token_with_location (parser, &loc);
2018   ASSERT_NE (tok, NULL);
2019   ASSERT_EQ (tok->type, CPP_NAME);
2020   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2021   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2022
2023   tok = cpp_get_token_with_location (parser, &loc);
2024   ASSERT_NE (tok, NULL);
2025   ASSERT_EQ (tok->type, CPP_STRING);
2026   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2027   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2028
2029   tok = cpp_get_token_with_location (parser, &loc);
2030   ASSERT_NE (tok, NULL);
2031   ASSERT_EQ (tok->type, CPP_NUMBER);
2032   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2033   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2034
2035   tok = cpp_get_token_with_location (parser, &loc);
2036   ASSERT_NE (tok, NULL);
2037   ASSERT_EQ (tok->type, CPP_EOF);
2038
2039   cpp_finish (parser, NULL);
2040   cpp_destroy (parser);
2041 }
2042
2043 /* Forward decls.  */
2044
2045 struct lexer_test;
2046 class lexer_test_options;
2047
2048 /* A class for specifying options of a lexer_test.
2049    The "apply" vfunc is called during the lexer_test constructor.  */
2050
2051 class lexer_test_options
2052 {
2053  public:
2054   virtual void apply (lexer_test &) = 0;
2055 };
2056
2057 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2058    in its dtor.
2059
2060    This is needed by struct lexer_test to ensure that the cleanup of the
2061    cpp_reader happens *after* the cleanup of the temp_source_file.  */
2062
2063 class cpp_reader_ptr
2064 {
2065  public:
2066   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2067
2068   ~cpp_reader_ptr ()
2069   {
2070     cpp_finish (m_ptr, NULL);
2071     cpp_destroy (m_ptr);
2072   }
2073
2074   operator cpp_reader * () const { return m_ptr; }
2075
2076  private:
2077   cpp_reader *m_ptr;
2078 };
2079
2080 /* A struct for writing lexer tests.  */
2081
2082 struct lexer_test
2083 {
2084   lexer_test (const line_table_case &case_, const char *content,
2085               lexer_test_options *options);
2086   ~lexer_test ();
2087
2088   const cpp_token *get_token ();
2089
2090   /* The ordering of these fields matters.
2091      The line_table_test must be first, since the cpp_reader_ptr
2092      uses it.
2093      The cpp_reader must be cleaned up *after* the temp_source_file
2094      since the filenames in input.c's input cache are owned by the
2095      cpp_reader; in particular, when ~temp_source_file evicts the
2096      filename the filenames must still be alive.  */
2097   line_table_test m_ltt;
2098   cpp_reader_ptr m_parser;
2099   temp_source_file m_tempfile;
2100   string_concat_db m_concats;
2101   bool m_implicitly_expect_EOF;
2102 };
2103
2104 /* Use an EBCDIC encoding for the execution charset, specifically
2105    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2106
2107    This exercises iconv integration within libcpp.
2108    Not every build of iconv supports the given charset,
2109    so we need to flag this error and handle it gracefully.  */
2110
2111 class ebcdic_execution_charset : public lexer_test_options
2112 {
2113  public:
2114   ebcdic_execution_charset () : m_num_iconv_errors (0)
2115     {
2116       gcc_assert (s_singleton == NULL);
2117       s_singleton = this;
2118     }
2119   ~ebcdic_execution_charset ()
2120     {
2121       gcc_assert (s_singleton == this);
2122       s_singleton = NULL;
2123     }
2124
2125   void apply (lexer_test &test) FINAL OVERRIDE
2126   {
2127     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2128     cpp_opts->narrow_charset = "IBM1047";
2129
2130     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2131     callbacks->diagnostic = on_diagnostic;
2132   }
2133
2134   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2135                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2136                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2137                              rich_location *richloc ATTRIBUTE_UNUSED,
2138                              const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2139     ATTRIBUTE_FPTR_PRINTF(5,0)
2140   {
2141     gcc_assert (s_singleton);
2142     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2143     const char *msg = "conversion from %s to %s not supported by iconv";
2144 #ifdef ENABLE_NLS
2145     msg = dgettext ("cpplib", msg);
2146 #endif
2147     /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2148        when the local iconv build doesn't support the conversion.  */
2149     if (strcmp (msgid, msg) == 0)
2150       {
2151         s_singleton->m_num_iconv_errors++;
2152         return true;
2153       }
2154
2155     /* Otherwise, we have an unexpected error.  */
2156     abort ();
2157   }
2158
2159   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2160
2161  private:
2162   static ebcdic_execution_charset *s_singleton;
2163   int m_num_iconv_errors;
2164 };
2165
2166 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2167
2168 /* A lexer_test_options subclass that records a list of diagnostic
2169    messages emitted by the lexer.  */
2170
2171 class lexer_diagnostic_sink : public lexer_test_options
2172 {
2173  public:
2174   lexer_diagnostic_sink ()
2175   {
2176     gcc_assert (s_singleton == NULL);
2177     s_singleton = this;
2178   }
2179   ~lexer_diagnostic_sink ()
2180   {
2181     gcc_assert (s_singleton == this);
2182     s_singleton = NULL;
2183
2184     int i;
2185     char *str;
2186     FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2187       free (str);
2188   }
2189
2190   void apply (lexer_test &test) FINAL OVERRIDE
2191   {
2192     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2193     callbacks->diagnostic = on_diagnostic;
2194   }
2195
2196   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2197                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2198                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2199                              rich_location *richloc ATTRIBUTE_UNUSED,
2200                              const char *msgid, va_list *ap)
2201     ATTRIBUTE_FPTR_PRINTF(5,0)
2202   {
2203     char *msg = xvasprintf (msgid, *ap);
2204     s_singleton->m_diagnostics.safe_push (msg);
2205     return true;
2206   }
2207
2208   auto_vec<char *> m_diagnostics;
2209
2210  private:
2211   static lexer_diagnostic_sink *s_singleton;
2212 };
2213
2214 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2215
2216 /* Constructor.  Override line_table with a new instance based on CASE_,
2217    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2218    start parsing the tempfile.  */
2219
2220 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2221                         lexer_test_options *options)
2222 : m_ltt (case_),
2223   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2224   /* Create a tempfile and write the text to it.  */
2225   m_tempfile (SELFTEST_LOCATION, ".c", content),
2226   m_concats (),
2227   m_implicitly_expect_EOF (true)
2228 {
2229   if (options)
2230     options->apply (*this);
2231
2232   cpp_init_iconv (m_parser);
2233
2234   /* Parse the file.  */
2235   const char *fname = cpp_read_main_file (m_parser,
2236                                           m_tempfile.get_filename ());
2237   ASSERT_NE (fname, NULL);
2238 }
2239
2240 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2241
2242 lexer_test::~lexer_test ()
2243 {
2244   location_t loc;
2245   const cpp_token *tok;
2246
2247   if (m_implicitly_expect_EOF)
2248     {
2249       tok = cpp_get_token_with_location (m_parser, &loc);
2250       ASSERT_NE (tok, NULL);
2251       ASSERT_EQ (tok->type, CPP_EOF);
2252     }
2253 }
2254
2255 /* Get the next token from m_parser.  */
2256
2257 const cpp_token *
2258 lexer_test::get_token ()
2259 {
2260   location_t loc;
2261   const cpp_token *tok;
2262
2263   tok = cpp_get_token_with_location (m_parser, &loc);
2264   ASSERT_NE (tok, NULL);
2265   return tok;
2266 }
2267
2268 /* Verify that locations within string literals are correctly handled.  */
2269
2270 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2271    using the string concatenation database for TEST.
2272
2273    Assert that the character at index IDX is on EXPECTED_LINE,
2274    and that it begins at column EXPECTED_START_COL and ends at
2275    EXPECTED_FINISH_COL (unless the locations are beyond
2276    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2277    columns).  */
2278
2279 static void
2280 assert_char_at_range (const location &loc,
2281                       lexer_test& test,
2282                       location_t strloc, enum cpp_ttype type, int idx,
2283                       int expected_line, int expected_start_col,
2284                       int expected_finish_col)
2285 {
2286   cpp_reader *pfile = test.m_parser;
2287   string_concat_db *concats = &test.m_concats;
2288
2289   source_range actual_range = source_range();
2290   const char *err
2291     = get_source_range_for_char (pfile, concats, strloc, type, idx,
2292                                  &actual_range);
2293   if (should_have_column_data_p (strloc))
2294     ASSERT_EQ_AT (loc, NULL, err);
2295   else
2296     {
2297       ASSERT_STREQ_AT (loc,
2298                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2299                        err);
2300       return;
2301     }
2302
2303   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2304   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2305   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2306   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2307
2308   if (should_have_column_data_p (actual_range.m_start))
2309     {
2310       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2311       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2312     }
2313   if (should_have_column_data_p (actual_range.m_finish))
2314     {
2315       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2316       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2317     }
2318 }
2319
2320 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2321    the effective location of any errors.  */
2322
2323 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2324                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2325   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2326                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2327                         (EXPECTED_FINISH_COL))
2328
2329 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2330    using the string concatenation database for TEST.
2331
2332    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2333
2334 static void
2335 assert_num_substring_ranges (const location &loc,
2336                              lexer_test& test,
2337                              location_t strloc,
2338                              enum cpp_ttype type,
2339                              int expected_num_ranges)
2340 {
2341   cpp_reader *pfile = test.m_parser;
2342   string_concat_db *concats = &test.m_concats;
2343
2344   int actual_num_ranges = -1;
2345   const char *err
2346     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2347                                            &actual_num_ranges);
2348   if (should_have_column_data_p (strloc))
2349     ASSERT_EQ_AT (loc, NULL, err);
2350   else
2351     {
2352       ASSERT_STREQ_AT (loc,
2353                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2354                        err);
2355       return;
2356     }
2357   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2358 }
2359
2360 /* Macro for calling assert_num_substring_ranges, supplying
2361    SELFTEST_LOCATION for the effective location of any errors.  */
2362
2363 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2364                                     EXPECTED_NUM_RANGES)                \
2365   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2366                                (TYPE), (EXPECTED_NUM_RANGES))
2367
2368
2369 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2370    returns an error (using the string concatenation database for TEST).  */
2371
2372 static void
2373 assert_has_no_substring_ranges (const location &loc,
2374                                 lexer_test& test,
2375                                 location_t strloc,
2376                                 enum cpp_ttype type,
2377                                 const char *expected_err)
2378 {
2379   cpp_reader *pfile = test.m_parser;
2380   string_concat_db *concats = &test.m_concats;
2381   cpp_substring_ranges ranges;
2382   const char *actual_err
2383     = get_substring_ranges_for_loc (pfile, concats, strloc,
2384                                     type, ranges);
2385   if (should_have_column_data_p (strloc))
2386     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2387   else
2388     ASSERT_STREQ_AT (loc,
2389                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2390                      actual_err);
2391 }
2392
2393 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2394     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2395                                     (STRLOC), (TYPE), (ERR))
2396
2397 /* Lex a simple string literal.  Verify the substring location data, before
2398    and after running cpp_interpret_string on it.  */
2399
2400 static void
2401 test_lexer_string_locations_simple (const line_table_case &case_)
2402 {
2403   /* Digits 0-9 (with 0 at column 10), the simple way.
2404      ....................000000000.11111111112.2222222223333333333
2405      ....................123456789.01234567890.1234567890123456789
2406      We add a trailing comment to ensure that we correctly locate
2407      the end of the string literal token.  */
2408   const char *content = "        \"0123456789\" /* not a string */\n";
2409   lexer_test test (case_, content, NULL);
2410
2411   /* Verify that we get the expected token back, with the correct
2412      location information.  */
2413   const cpp_token *tok = test.get_token ();
2414   ASSERT_EQ (tok->type, CPP_STRING);
2415   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2416   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2417
2418   /* At this point in lexing, the quote characters are treated as part of
2419      the string (they are stripped off by cpp_interpret_string).  */
2420
2421   ASSERT_EQ (tok->val.str.len, 12);
2422
2423   /* Verify that cpp_interpret_string works.  */
2424   cpp_string dst_string;
2425   const enum cpp_ttype type = CPP_STRING;
2426   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2427                                       &dst_string, type);
2428   ASSERT_TRUE (result);
2429   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2430   free (const_cast <unsigned char *> (dst_string.text));
2431
2432   /* Verify ranges of individual characters.  This no longer includes the
2433      opening quote, but does include the closing quote.  */
2434   for (int i = 0; i <= 10; i++)
2435     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2436                           10 + i, 10 + i);
2437
2438   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2439 }
2440
2441 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2442    encoding.  */
2443
2444 static void
2445 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2446 {
2447   /* EBCDIC support requires iconv.  */
2448   if (!HAVE_ICONV)
2449     return;
2450
2451   /* Digits 0-9 (with 0 at column 10), the simple way.
2452      ....................000000000.11111111112.2222222223333333333
2453      ....................123456789.01234567890.1234567890123456789
2454      We add a trailing comment to ensure that we correctly locate
2455      the end of the string literal token.  */
2456   const char *content = "        \"0123456789\" /* not a string */\n";
2457   ebcdic_execution_charset use_ebcdic;
2458   lexer_test test (case_, content, &use_ebcdic);
2459
2460   /* Verify that we get the expected token back, with the correct
2461      location information.  */
2462   const cpp_token *tok = test.get_token ();
2463   ASSERT_EQ (tok->type, CPP_STRING);
2464   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2465   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2466
2467   /* At this point in lexing, the quote characters are treated as part of
2468      the string (they are stripped off by cpp_interpret_string).  */
2469
2470   ASSERT_EQ (tok->val.str.len, 12);
2471
2472   /* The remainder of the test requires an iconv implementation that
2473      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2474   if (use_ebcdic.iconv_errors_occurred_p ())
2475     return;
2476
2477   /* Verify that cpp_interpret_string works.  */
2478   cpp_string dst_string;
2479   const enum cpp_ttype type = CPP_STRING;
2480   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2481                                       &dst_string, type);
2482   ASSERT_TRUE (result);
2483   /* We should now have EBCDIC-encoded text, specifically
2484      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2485      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2486   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2487                 (const char *)dst_string.text);
2488   free (const_cast <unsigned char *> (dst_string.text));
2489
2490   /* Verify that we don't attempt to record substring location information
2491      for such cases.  */
2492   ASSERT_HAS_NO_SUBSTRING_RANGES
2493     (test, tok->src_loc, type,
2494      "execution character set != source character set");
2495 }
2496
2497 /* Lex a string literal containing a hex-escaped character.
2498    Verify the substring location data, before and after running
2499    cpp_interpret_string on it.  */
2500
2501 static void
2502 test_lexer_string_locations_hex (const line_table_case &case_)
2503 {
2504   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2505      and with a space in place of digit 6, to terminate the escaped
2506      hex code.
2507      ....................000000000.111111.11112222.
2508      ....................123456789.012345.67890123.  */
2509   const char *content = "        \"01234\\x35 789\"\n";
2510   lexer_test test (case_, content, NULL);
2511
2512   /* Verify that we get the expected token back, with the correct
2513      location information.  */
2514   const cpp_token *tok = test.get_token ();
2515   ASSERT_EQ (tok->type, CPP_STRING);
2516   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2517   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2518
2519   /* At this point in lexing, the quote characters are treated as part of
2520      the string (they are stripped off by cpp_interpret_string).  */
2521   ASSERT_EQ (tok->val.str.len, 15);
2522
2523   /* Verify that cpp_interpret_string works.  */
2524   cpp_string dst_string;
2525   const enum cpp_ttype type = CPP_STRING;
2526   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2527                                       &dst_string, type);
2528   ASSERT_TRUE (result);
2529   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2530   free (const_cast <unsigned char *> (dst_string.text));
2531
2532   /* Verify ranges of individual characters.  This no longer includes the
2533      opening quote, but does include the closing quote.  */
2534   for (int i = 0; i <= 4; i++)
2535     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2536   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2537   for (int i = 6; i <= 10; i++)
2538     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2539
2540   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2541 }
2542
2543 /* Lex a string literal containing an octal-escaped character.
2544    Verify the substring location data after running cpp_interpret_string
2545    on it.  */
2546
2547 static void
2548 test_lexer_string_locations_oct (const line_table_case &case_)
2549 {
2550   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2551      and with a space in place of digit 6, to terminate the escaped
2552      octal code.
2553      ....................000000000.111111.11112222.2222223333333333444
2554      ....................123456789.012345.67890123.4567890123456789012  */
2555   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2556   lexer_test test (case_, content, NULL);
2557
2558   /* Verify that we get the expected token back, with the correct
2559      location information.  */
2560   const cpp_token *tok = test.get_token ();
2561   ASSERT_EQ (tok->type, CPP_STRING);
2562   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2563
2564   /* Verify that cpp_interpret_string works.  */
2565   cpp_string dst_string;
2566   const enum cpp_ttype type = CPP_STRING;
2567   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2568                                       &dst_string, type);
2569   ASSERT_TRUE (result);
2570   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2571   free (const_cast <unsigned char *> (dst_string.text));
2572
2573   /* Verify ranges of individual characters.  This no longer includes the
2574      opening quote, but does include the closing quote.  */
2575   for (int i = 0; i < 5; i++)
2576     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2577   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2578   for (int i = 6; i <= 10; i++)
2579     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2580
2581   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2582 }
2583
2584 /* Test of string literal containing letter escapes.  */
2585
2586 static void
2587 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2588 {
2589   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2590      .....................000000000.1.11111.1.1.11222.22222223333333
2591      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2592   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2593   lexer_test test (case_, content, NULL);
2594
2595   /* Verify that we get the expected tokens back.  */
2596   const cpp_token *tok = test.get_token ();
2597   ASSERT_EQ (tok->type, CPP_STRING);
2598   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2599
2600   /* Verify ranges of individual characters. */
2601   /* "\t".  */
2602   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2603                         0, 1, 10, 11);
2604   /* "foo". */
2605   for (int i = 1; i <= 3; i++)
2606     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2607                           i, 1, 11 + i, 11 + i);
2608   /* "\\" and "\n".  */
2609   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2610                         4, 1, 15, 16);
2611   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2612                         5, 1, 17, 18);
2613
2614   /* "bar" and closing quote for nul-terminator.  */
2615   for (int i = 6; i <= 9; i++)
2616     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2617                           i, 1, 13 + i, 13 + i);
2618
2619   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2620 }
2621
2622 /* Another test of a string literal containing a letter escape.
2623    Based on string seen in
2624      printf ("%-%\n");
2625    in gcc.dg/format/c90-printf-1.c.  */
2626
2627 static void
2628 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2629 {
2630   /* .....................000000000.1111.11.1111.22222222223.
2631      .....................123456789.0123.45.6789.01234567890.  */
2632   const char *content = ("        \"%-%\\n\" /* non-str */\n");
2633   lexer_test test (case_, content, NULL);
2634
2635   /* Verify that we get the expected tokens back.  */
2636   const cpp_token *tok = test.get_token ();
2637   ASSERT_EQ (tok->type, CPP_STRING);
2638   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2639
2640   /* Verify ranges of individual characters. */
2641   /* "%-%".  */
2642   for (int i = 0; i < 3; i++)
2643     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2644                           i, 1, 10 + i, 10 + i);
2645   /* "\n".  */
2646   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2647                         3, 1, 13, 14);
2648
2649   /* Closing quote for nul-terminator.  */
2650   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2651                         4, 1, 15, 15);
2652
2653   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2654 }
2655
2656 /* Lex a string literal containing UCN 4 characters.
2657    Verify the substring location data after running cpp_interpret_string
2658    on it.  */
2659
2660 static void
2661 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2662 {
2663   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2664      as UCN 4.
2665      ....................000000000.111111.111122.222222223.33333333344444
2666      ....................123456789.012345.678901.234567890.12345678901234  */
2667   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
2668   lexer_test test (case_, content, NULL);
2669
2670   /* Verify that we get the expected token back, with the correct
2671      location information.  */
2672   const cpp_token *tok = test.get_token ();
2673   ASSERT_EQ (tok->type, CPP_STRING);
2674   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2675
2676   /* Verify that cpp_interpret_string works.
2677      The string should be encoded in the execution character
2678      set.  Assuming that that is UTF-8, we should have the following:
2679      -----------  ----  -----  -------  ----------------
2680      Byte offset  Byte  Octal  Unicode  Source Column(s)
2681      -----------  ----  -----  -------  ----------------
2682      0            0x30         '0'      10
2683      1            0x31         '1'      11
2684      2            0x32         '2'      12
2685      3            0x33         '3'      13
2686      4            0x34         '4'      14
2687      5            0xE2  \342   U+2174   15-20
2688      6            0x85  \205    (cont)  15-20
2689      7            0xB4  \264    (cont)  15-20
2690      8            0xE2  \342   U+2175   21-26
2691      9            0x85  \205    (cont)  21-26
2692      10           0xB5  \265    (cont)  21-26
2693      11           0x37         '7'      27
2694      12           0x38         '8'      28
2695      13           0x39         '9'      29
2696      14           0x00                  30 (closing quote)
2697      -----------  ----  -----  -------  ---------------.  */
2698
2699   cpp_string dst_string;
2700   const enum cpp_ttype type = CPP_STRING;
2701   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2702                                       &dst_string, type);
2703   ASSERT_TRUE (result);
2704   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2705                 (const char *)dst_string.text);
2706   free (const_cast <unsigned char *> (dst_string.text));
2707
2708   /* Verify ranges of individual characters.  This no longer includes the
2709      opening quote, but does include the closing quote.
2710      '01234'.  */
2711   for (int i = 0; i <= 4; i++)
2712     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2713   /* U+2174.  */
2714   for (int i = 5; i <= 7; i++)
2715     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2716   /* U+2175.  */
2717   for (int i = 8; i <= 10; i++)
2718     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2719   /* '789' and nul terminator  */
2720   for (int i = 11; i <= 14; i++)
2721     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2722
2723   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2724 }
2725
2726 /* Lex a string literal containing UCN 8 characters.
2727    Verify the substring location data after running cpp_interpret_string
2728    on it.  */
2729
2730 static void
2731 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2732 {
2733   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2734      ....................000000000.111111.1111222222.2222333333333.344444
2735      ....................123456789.012345.6789012345.6789012345678.901234  */
2736   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
2737   lexer_test test (case_, content, NULL);
2738
2739   /* Verify that we get the expected token back, with the correct
2740      location information.  */
2741   const cpp_token *tok = test.get_token ();
2742   ASSERT_EQ (tok->type, CPP_STRING);
2743   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2744                            "\"01234\\U00002174\\U00002175789\"");
2745
2746   /* Verify that cpp_interpret_string works.
2747      The UTF-8 encoding of the string is identical to that from
2748      the ucn4 testcase above; the only difference is the column
2749      locations.  */
2750   cpp_string dst_string;
2751   const enum cpp_ttype type = CPP_STRING;
2752   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2753                                       &dst_string, type);
2754   ASSERT_TRUE (result);
2755   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2756                 (const char *)dst_string.text);
2757   free (const_cast <unsigned char *> (dst_string.text));
2758
2759   /* Verify ranges of individual characters.  This no longer includes the
2760      opening quote, but does include the closing quote.
2761      '01234'.  */
2762   for (int i = 0; i <= 4; i++)
2763     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2764   /* U+2174.  */
2765   for (int i = 5; i <= 7; i++)
2766     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2767   /* U+2175.  */
2768   for (int i = 8; i <= 10; i++)
2769     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2770   /* '789' at columns 35-37  */
2771   for (int i = 11; i <= 13; i++)
2772     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2773   /* Closing quote/nul-terminator at column 38.  */
2774   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2775
2776   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2777 }
2778
2779 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
2780
2781 static uint32_t
2782 uint32_from_big_endian (const uint32_t *ptr_be_value)
2783 {
2784   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2785   return (((uint32_t) buf[0] << 24)
2786           | ((uint32_t) buf[1] << 16)
2787           | ((uint32_t) buf[2] << 8)
2788           | (uint32_t) buf[3]);
2789 }
2790
2791 /* Lex a wide string literal and verify that attempts to read substring
2792    location data from it fail gracefully.  */
2793
2794 static void
2795 test_lexer_string_locations_wide_string (const line_table_case &case_)
2796 {
2797   /* Digits 0-9.
2798      ....................000000000.11111111112.22222222233333
2799      ....................123456789.01234567890.12345678901234  */
2800   const char *content = "       L\"0123456789\" /* non-str */\n";
2801   lexer_test test (case_, content, NULL);
2802
2803   /* Verify that we get the expected token back, with the correct
2804      location information.  */
2805   const cpp_token *tok = test.get_token ();
2806   ASSERT_EQ (tok->type, CPP_WSTRING);
2807   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2808
2809   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
2810   cpp_string dst_string;
2811   const enum cpp_ttype type = CPP_WSTRING;
2812   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2813                                       &dst_string, type);
2814   ASSERT_TRUE (result);
2815   /* The cpp_reader defaults to big-endian with
2816      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2817      now be encoded as UTF-32BE.  */
2818   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2819   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2820   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2821   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2822   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2823   free (const_cast <unsigned char *> (dst_string.text));
2824
2825   /* We don't yet support generating substring location information
2826      for L"" strings.  */
2827   ASSERT_HAS_NO_SUBSTRING_RANGES
2828     (test, tok->src_loc, type,
2829      "execution character set != source character set");
2830 }
2831
2832 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
2833
2834 static uint16_t
2835 uint16_from_big_endian (const uint16_t *ptr_be_value)
2836 {
2837   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2838   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2839 }
2840
2841 /* Lex a u"" string literal and verify that attempts to read substring
2842    location data from it fail gracefully.  */
2843
2844 static void
2845 test_lexer_string_locations_string16 (const line_table_case &case_)
2846 {
2847   /* Digits 0-9.
2848      ....................000000000.11111111112.22222222233333
2849      ....................123456789.01234567890.12345678901234  */
2850   const char *content = "       u\"0123456789\" /* non-str */\n";
2851   lexer_test test (case_, content, NULL);
2852
2853   /* Verify that we get the expected token back, with the correct
2854      location information.  */
2855   const cpp_token *tok = test.get_token ();
2856   ASSERT_EQ (tok->type, CPP_STRING16);
2857   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2858
2859   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
2860   cpp_string dst_string;
2861   const enum cpp_ttype type = CPP_STRING16;
2862   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2863                                       &dst_string, type);
2864   ASSERT_TRUE (result);
2865
2866   /* The cpp_reader defaults to big-endian, so dst_string should
2867      now be encoded as UTF-16BE.  */
2868   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2869   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2870   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2871   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2872   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2873   free (const_cast <unsigned char *> (dst_string.text));
2874
2875   /* We don't yet support generating substring location information
2876      for L"" strings.  */
2877   ASSERT_HAS_NO_SUBSTRING_RANGES
2878     (test, tok->src_loc, type,
2879      "execution character set != source character set");
2880 }
2881
2882 /* Lex a U"" string literal and verify that attempts to read substring
2883    location data from it fail gracefully.  */
2884
2885 static void
2886 test_lexer_string_locations_string32 (const line_table_case &case_)
2887 {
2888   /* Digits 0-9.
2889      ....................000000000.11111111112.22222222233333
2890      ....................123456789.01234567890.12345678901234  */
2891   const char *content = "       U\"0123456789\" /* non-str */\n";
2892   lexer_test test (case_, content, NULL);
2893
2894   /* Verify that we get the expected token back, with the correct
2895      location information.  */
2896   const cpp_token *tok = test.get_token ();
2897   ASSERT_EQ (tok->type, CPP_STRING32);
2898   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2899
2900   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
2901   cpp_string dst_string;
2902   const enum cpp_ttype type = CPP_STRING32;
2903   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2904                                       &dst_string, type);
2905   ASSERT_TRUE (result);
2906
2907   /* The cpp_reader defaults to big-endian, so dst_string should
2908      now be encoded as UTF-32BE.  */
2909   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2910   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2911   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2912   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2913   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2914   free (const_cast <unsigned char *> (dst_string.text));
2915
2916   /* We don't yet support generating substring location information
2917      for L"" strings.  */
2918   ASSERT_HAS_NO_SUBSTRING_RANGES
2919     (test, tok->src_loc, type,
2920      "execution character set != source character set");
2921 }
2922
2923 /* Lex a u8-string literal.
2924    Verify the substring location data after running cpp_interpret_string
2925    on it.  */
2926
2927 static void
2928 test_lexer_string_locations_u8 (const line_table_case &case_)
2929 {
2930   /* Digits 0-9.
2931      ....................000000000.11111111112.22222222233333
2932      ....................123456789.01234567890.12345678901234  */
2933   const char *content = "      u8\"0123456789\" /* non-str */\n";
2934   lexer_test test (case_, content, NULL);
2935
2936   /* Verify that we get the expected token back, with the correct
2937      location information.  */
2938   const cpp_token *tok = test.get_token ();
2939   ASSERT_EQ (tok->type, CPP_UTF8STRING);
2940   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2941
2942   /* Verify that cpp_interpret_string works.  */
2943   cpp_string dst_string;
2944   const enum cpp_ttype type = CPP_STRING;
2945   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2946                                       &dst_string, type);
2947   ASSERT_TRUE (result);
2948   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2949   free (const_cast <unsigned char *> (dst_string.text));
2950
2951   /* Verify ranges of individual characters.  This no longer includes the
2952      opening quote, but does include the closing quote.  */
2953   for (int i = 0; i <= 10; i++)
2954     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2955 }
2956
2957 /* Lex a string literal containing UTF-8 source characters.
2958    Verify the substring location data after running cpp_interpret_string
2959    on it.  */
2960
2961 static void
2962 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2963 {
2964  /* This string literal is written out to the source file as UTF-8,
2965     and is of the form "before mojibake after", where "mojibake"
2966     is written as the following four unicode code points:
2967        U+6587 CJK UNIFIED IDEOGRAPH-6587
2968        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2969        U+5316 CJK UNIFIED IDEOGRAPH-5316
2970        U+3051 HIRAGANA LETTER KE.
2971      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2972      "before" and "after" are 1 byte per unicode character.
2973
2974      The numbering shown are "columns", which are *byte* numbers within
2975      the line, rather than unicode character numbers.
2976
2977      .................... 000000000.1111111.
2978      .................... 123456789.0123456.  */
2979   const char *content = ("        \"before "
2980                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2981                               UTF-8: 0xE6 0x96 0x87
2982                               C octal escaped UTF-8: \346\226\207
2983                             "column" numbers: 17-19.  */
2984                          "\346\226\207"
2985
2986                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2987                               UTF-8: 0xE5 0xAD 0x97
2988                               C octal escaped UTF-8: \345\255\227
2989                             "column" numbers: 20-22.  */
2990                          "\345\255\227"
2991
2992                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2993                               UTF-8: 0xE5 0x8C 0x96
2994                               C octal escaped UTF-8: \345\214\226
2995                             "column" numbers: 23-25.  */
2996                          "\345\214\226"
2997
2998                          /* U+3051 HIRAGANA LETTER KE
2999                               UTF-8: 0xE3 0x81 0x91
3000                               C octal escaped UTF-8: \343\201\221
3001                             "column" numbers: 26-28.  */
3002                          "\343\201\221"
3003
3004                          /* column numbers 29 onwards
3005                           2333333.33334444444444
3006                           9012345.67890123456789. */
3007                          " after\" /* non-str */\n");
3008   lexer_test test (case_, content, NULL);
3009
3010   /* Verify that we get the expected token back, with the correct
3011      location information.  */
3012   const cpp_token *tok = test.get_token ();
3013   ASSERT_EQ (tok->type, CPP_STRING);
3014   ASSERT_TOKEN_AS_TEXT_EQ
3015     (test.m_parser, tok,
3016      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3017
3018   /* Verify that cpp_interpret_string works.  */
3019   cpp_string dst_string;
3020   const enum cpp_ttype type = CPP_STRING;
3021   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3022                                       &dst_string, type);
3023   ASSERT_TRUE (result);
3024   ASSERT_STREQ
3025     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3026      (const char *)dst_string.text);
3027   free (const_cast <unsigned char *> (dst_string.text));
3028
3029   /* Verify ranges of individual characters.  This no longer includes the
3030      opening quote, but does include the closing quote.
3031      Assuming that both source and execution encodings are UTF-8, we have
3032      a run of 25 octets in each, plus the NUL terminator.  */
3033   for (int i = 0; i < 25; i++)
3034     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3035   /* NUL-terminator should use the closing quote at column 35.  */
3036   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3037
3038   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3039 }
3040
3041 /* Test of string literal concatenation.  */
3042
3043 static void
3044 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3045 {
3046   /* Digits 0-9.
3047      .....................000000000.111111.11112222222222
3048      .....................123456789.012345.67890123456789.  */
3049   const char *content = ("        \"01234\" /* non-str */\n"
3050                          "        \"56789\" /* non-str */\n");
3051   lexer_test test (case_, content, NULL);
3052
3053   location_t input_locs[2];
3054
3055   /* Verify that we get the expected tokens back.  */
3056   auto_vec <cpp_string> input_strings;
3057   const cpp_token *tok_a = test.get_token ();
3058   ASSERT_EQ (tok_a->type, CPP_STRING);
3059   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3060   input_strings.safe_push (tok_a->val.str);
3061   input_locs[0] = tok_a->src_loc;
3062
3063   const cpp_token *tok_b = test.get_token ();
3064   ASSERT_EQ (tok_b->type, CPP_STRING);
3065   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3066   input_strings.safe_push (tok_b->val.str);
3067   input_locs[1] = tok_b->src_loc;
3068
3069   /* Verify that cpp_interpret_string works.  */
3070   cpp_string dst_string;
3071   const enum cpp_ttype type = CPP_STRING;
3072   bool result = cpp_interpret_string (test.m_parser,
3073                                       input_strings.address (), 2,
3074                                       &dst_string, type);
3075   ASSERT_TRUE (result);
3076   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3077   free (const_cast <unsigned char *> (dst_string.text));
3078
3079   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3080   test.m_concats.record_string_concatenation (2, input_locs);
3081
3082   location_t initial_loc = input_locs[0];
3083
3084   /* "01234" on line 1.  */
3085   for (int i = 0; i <= 4; i++)
3086     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3087   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3088   for (int i = 5; i <= 10; i++)
3089     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3090
3091   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3092 }
3093
3094 /* Another test of string literal concatenation.  */
3095
3096 static void
3097 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3098 {
3099   /* Digits 0-9.
3100      .....................000000000.111.11111112222222
3101      .....................123456789.012.34567890123456.  */
3102   const char *content = ("        \"01\" /* non-str */\n"
3103                          "        \"23\" /* non-str */\n"
3104                          "        \"45\" /* non-str */\n"
3105                          "        \"67\" /* non-str */\n"
3106                          "        \"89\" /* non-str */\n");
3107   lexer_test test (case_, content, NULL);
3108
3109   auto_vec <cpp_string> input_strings;
3110   location_t input_locs[5];
3111
3112   /* Verify that we get the expected tokens back.  */
3113   for (int i = 0; i < 5; i++)
3114     {
3115       const cpp_token *tok = test.get_token ();
3116       ASSERT_EQ (tok->type, CPP_STRING);
3117       input_strings.safe_push (tok->val.str);
3118       input_locs[i] = tok->src_loc;
3119     }
3120
3121   /* Verify that cpp_interpret_string works.  */
3122   cpp_string dst_string;
3123   const enum cpp_ttype type = CPP_STRING;
3124   bool result = cpp_interpret_string (test.m_parser,
3125                                       input_strings.address (), 5,
3126                                       &dst_string, type);
3127   ASSERT_TRUE (result);
3128   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3129   free (const_cast <unsigned char *> (dst_string.text));
3130
3131   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3132   test.m_concats.record_string_concatenation (5, input_locs);
3133
3134   location_t initial_loc = input_locs[0];
3135
3136   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3137      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3138      and expect get_source_range_for_substring to fail.
3139      However, for a string concatenation test, we can have a case
3140      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3141      but subsequent strings can be after it.
3142      Attempting to detect this within assert_char_at_range
3143      would overcomplicate the logic for the common test cases, so
3144      we detect it here.  */
3145   if (should_have_column_data_p (input_locs[0])
3146       && !should_have_column_data_p (input_locs[4]))
3147     {
3148       /* Verify that get_source_range_for_substring gracefully rejects
3149          this case.  */
3150       source_range actual_range;
3151       const char *err
3152         = get_source_range_for_char (test.m_parser, &test.m_concats,
3153                                      initial_loc, type, 0, &actual_range);
3154       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3155       return;
3156     }
3157
3158   for (int i = 0; i < 5; i++)
3159     for (int j = 0; j < 2; j++)
3160       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3161                             i + 1, 10 + j, 10 + j);
3162
3163   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3164   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3165
3166   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3167 }
3168
3169 /* Another test of string literal concatenation, this time combined with
3170    various kinds of escaped characters.  */
3171
3172 static void
3173 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3174 {
3175   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3176      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3177   const char *content
3178     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3179        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3180     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3181   lexer_test test (case_, content, NULL);
3182
3183   auto_vec <cpp_string> input_strings;
3184   location_t input_locs[4];
3185
3186   /* Verify that we get the expected tokens back.  */
3187   for (int i = 0; i < 4; i++)
3188     {
3189       const cpp_token *tok = test.get_token ();
3190       ASSERT_EQ (tok->type, CPP_STRING);
3191       input_strings.safe_push (tok->val.str);
3192       input_locs[i] = tok->src_loc;
3193     }
3194
3195   /* Verify that cpp_interpret_string works.  */
3196   cpp_string dst_string;
3197   const enum cpp_ttype type = CPP_STRING;
3198   bool result = cpp_interpret_string (test.m_parser,
3199                                       input_strings.address (), 4,
3200                                       &dst_string, type);
3201   ASSERT_TRUE (result);
3202   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3203   free (const_cast <unsigned char *> (dst_string.text));
3204
3205   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3206   test.m_concats.record_string_concatenation (4, input_locs);
3207
3208   location_t initial_loc = input_locs[0];
3209
3210   for (int i = 0; i <= 4; i++)
3211     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3212   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3213   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3214   for (int i = 7; i <= 9; i++)
3215     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3216
3217   /* NUL-terminator should use the location of the final closing quote.  */
3218   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3219
3220   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3221 }
3222
3223 /* Test of string literal in a macro.  */
3224
3225 static void
3226 test_lexer_string_locations_macro (const line_table_case &case_)
3227 {
3228   /* Digits 0-9.
3229      .....................0000000001111111111.22222222223.
3230      .....................1234567890123456789.01234567890.  */
3231   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3232                          "  MACRO");
3233   lexer_test test (case_, content, NULL);
3234
3235   /* Verify that we get the expected tokens back.  */
3236   const cpp_token *tok = test.get_token ();
3237   ASSERT_EQ (tok->type, CPP_PADDING);
3238
3239   tok = test.get_token ();
3240   ASSERT_EQ (tok->type, CPP_STRING);
3241   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3242
3243   /* Verify ranges of individual characters.  We ought to
3244      see columns within the macro definition.  */
3245   for (int i = 0; i <= 10; i++)
3246     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3247                           i, 1, 20 + i, 20 + i);
3248
3249   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3250
3251   tok = test.get_token ();
3252   ASSERT_EQ (tok->type, CPP_PADDING);
3253 }
3254
3255 /* Test of stringification of a macro argument.  */
3256
3257 static void
3258 test_lexer_string_locations_stringified_macro_argument
3259   (const line_table_case &case_)
3260 {
3261   /* .....................000000000111111111122222222223.
3262      .....................123456789012345678901234567890.  */
3263   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3264                          "MACRO(foo)\n");
3265   lexer_test test (case_, content, NULL);
3266
3267   /* Verify that we get the expected token back.  */
3268   const cpp_token *tok = test.get_token ();
3269   ASSERT_EQ (tok->type, CPP_PADDING);
3270
3271   tok = test.get_token ();
3272   ASSERT_EQ (tok->type, CPP_STRING);
3273   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3274
3275   /* We don't support getting the location of a stringified macro
3276      argument.  Verify that it fails gracefully.  */
3277   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3278                                   "cpp_interpret_string_1 failed");
3279
3280   tok = test.get_token ();
3281   ASSERT_EQ (tok->type, CPP_PADDING);
3282
3283   tok = test.get_token ();
3284   ASSERT_EQ (tok->type, CPP_PADDING);
3285 }
3286
3287 /* Ensure that we are fail gracefully if something attempts to pass
3288    in a location that isn't a string literal token.  Seen on this code:
3289
3290      const char a[] = " %d ";
3291      __builtin_printf (a, 0.5);
3292                        ^
3293
3294    when c-format.c erroneously used the indicated one-character
3295    location as the format string location, leading to a read past the
3296    end of a string buffer in cpp_interpret_string_1.  */
3297
3298 static void
3299 test_lexer_string_locations_non_string (const line_table_case &case_)
3300 {
3301   /* .....................000000000111111111122222222223.
3302      .....................123456789012345678901234567890.  */
3303   const char *content = ("         a\n");
3304   lexer_test test (case_, content, NULL);
3305
3306   /* Verify that we get the expected token back.  */
3307   const cpp_token *tok = test.get_token ();
3308   ASSERT_EQ (tok->type, CPP_NAME);
3309   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3310
3311   /* At this point, libcpp is attempting to interpret the name as a
3312      string literal, despite it not starting with a quote.  We don't detect
3313      that, but we should at least fail gracefully.  */
3314   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3315                                   "cpp_interpret_string_1 failed");
3316 }
3317
3318 /* Ensure that we can read substring information for a token which
3319    starts in one linemap and ends in another .  Adapted from
3320    gcc.dg/cpp/pr69985.c.  */
3321
3322 static void
3323 test_lexer_string_locations_long_line (const line_table_case &case_)
3324 {
3325   /* .....................000000.000111111111
3326      .....................123456.789012346789.  */
3327   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3328                          "     \"0123456789012345678901234567890123456789"
3329                          "0123456789012345678901234567890123456789"
3330                          "0123456789012345678901234567890123456789"
3331                          "0123456789\"\n");
3332
3333   lexer_test test (case_, content, NULL);
3334
3335   /* Verify that we get the expected token back.  */
3336   const cpp_token *tok = test.get_token ();
3337   ASSERT_EQ (tok->type, CPP_STRING);
3338
3339   if (!should_have_column_data_p (line_table->highest_location))
3340     return;
3341
3342   /* Verify ranges of individual characters.  */
3343   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3344   for (int i = 0; i < 131; i++)
3345     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3346                           i, 2, 7 + i, 7 + i);
3347 }
3348
3349 /* Test of locations within a raw string that doesn't contain a newline.  */
3350
3351 static void
3352 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3353 {
3354   /* .....................00.0000000111111111122.
3355      .....................12.3456789012345678901.  */
3356   const char *content = ("R\"foo(0123456789)foo\"\n");
3357   lexer_test test (case_, content, NULL);
3358
3359   /* Verify that we get the expected token back.  */
3360   const cpp_token *tok = test.get_token ();
3361   ASSERT_EQ (tok->type, CPP_STRING);
3362
3363   /* Verify that cpp_interpret_string works.  */
3364   cpp_string dst_string;
3365   const enum cpp_ttype type = CPP_STRING;
3366   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3367                                       &dst_string, type);
3368   ASSERT_TRUE (result);
3369   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3370   free (const_cast <unsigned char *> (dst_string.text));
3371
3372   if (!should_have_column_data_p (line_table->highest_location))
3373     return;
3374
3375   /* 0-9, plus the nil terminator.  */
3376   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3377   for (int i = 0; i < 11; i++)
3378     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3379                           i, 1, 7 + i, 7 + i);
3380 }
3381
3382 /* Test of locations within a raw string that contains a newline.  */
3383
3384 static void
3385 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3386 {
3387   /* .....................00.0000.
3388      .....................12.3456.  */
3389   const char *content = ("R\"foo(\n"
3390   /* .....................00000.
3391      .....................12345.  */
3392                          "hello\n"
3393                          "world\n"
3394   /* .....................00000.
3395      .....................12345.  */
3396                          ")foo\"\n");
3397   lexer_test test (case_, content, NULL);
3398
3399   /* Verify that we get the expected token back.  */
3400   const cpp_token *tok = test.get_token ();
3401   ASSERT_EQ (tok->type, CPP_STRING);
3402
3403   /* Verify that cpp_interpret_string works.  */
3404   cpp_string dst_string;
3405   const enum cpp_ttype type = CPP_STRING;
3406   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3407                                       &dst_string, type);
3408   ASSERT_TRUE (result);
3409   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3410   free (const_cast <unsigned char *> (dst_string.text));
3411
3412   if (!should_have_column_data_p (line_table->highest_location))
3413     return;
3414
3415   /* Currently we don't support locations within raw strings that
3416      contain newlines.  */
3417   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3418                                   "range endpoints are on different lines");
3419 }
3420
3421 /* Test of parsing an unterminated raw string.  */
3422
3423 static void
3424 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3425 {
3426   const char *content = "R\"ouch()ouCh\" /* etc */";
3427
3428   lexer_diagnostic_sink diagnostics;
3429   lexer_test test (case_, content, &diagnostics);
3430   test.m_implicitly_expect_EOF = false;
3431
3432   /* Attempt to parse the raw string.  */
3433   const cpp_token *tok = test.get_token ();
3434   ASSERT_EQ (tok->type, CPP_EOF);
3435
3436   ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3437   /* We expect the message "unterminated raw string"
3438      in the "cpplib" translation domain.
3439      It's not clear that dgettext is available on all supported hosts,
3440      so this assertion is commented-out for now.
3441        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3442                      diagnostics.m_diagnostics[0]);
3443   */
3444 }
3445
3446 /* Test of lexing char constants.  */
3447
3448 static void
3449 test_lexer_char_constants (const line_table_case &case_)
3450 {
3451   /* Various char constants.
3452      .....................0000000001111111111.22222222223.
3453      .....................1234567890123456789.01234567890.  */
3454   const char *content = ("         'a'\n"
3455                          "        u'a'\n"
3456                          "        U'a'\n"
3457                          "        L'a'\n"
3458                          "         'abc'\n");
3459   lexer_test test (case_, content, NULL);
3460
3461   /* Verify that we get the expected tokens back.  */
3462   /* 'a'.  */
3463   const cpp_token *tok = test.get_token ();
3464   ASSERT_EQ (tok->type, CPP_CHAR);
3465   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3466
3467   unsigned int chars_seen;
3468   int unsignedp;
3469   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3470                                           &chars_seen, &unsignedp);
3471   ASSERT_EQ (cc, 'a');
3472   ASSERT_EQ (chars_seen, 1);
3473
3474   /* u'a'.  */
3475   tok = test.get_token ();
3476   ASSERT_EQ (tok->type, CPP_CHAR16);
3477   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3478
3479   /* U'a'.  */
3480   tok = test.get_token ();
3481   ASSERT_EQ (tok->type, CPP_CHAR32);
3482   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3483
3484   /* L'a'.  */
3485   tok = test.get_token ();
3486   ASSERT_EQ (tok->type, CPP_WCHAR);
3487   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3488
3489   /* 'abc' (c-char-sequence).  */
3490   tok = test.get_token ();
3491   ASSERT_EQ (tok->type, CPP_CHAR);
3492   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3493 }
3494 /* A table of interesting location_t values, giving one axis of our test
3495    matrix.  */
3496
3497 static const location_t boundary_locations[] = {
3498   /* Zero means "don't override the default values for a new line_table".  */
3499   0,
3500
3501   /* An arbitrary non-zero value that isn't close to one of
3502      the boundary values below.  */
3503   0x10000,
3504
3505   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3506   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3507   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3508   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3509   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3510   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3511
3512   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3513   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3514   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3515   LINE_MAP_MAX_LOCATION_WITH_COLS,
3516   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3517   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3518 };
3519
3520 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3521
3522 void
3523 for_each_line_table_case (void (*testcase) (const line_table_case &))
3524 {
3525   /* As noted above in the description of struct line_table_case,
3526      we want to explore a test matrix of interesting line_table
3527      situations, running various selftests for each case within the
3528      matrix.  */
3529
3530   /* Run all tests with:
3531      (a) line_table->default_range_bits == 0, and
3532      (b) line_table->default_range_bits == 5.  */
3533   int num_cases_tested = 0;
3534   for (int default_range_bits = 0; default_range_bits <= 5;
3535        default_range_bits += 5)
3536     {
3537       /* ...and use each of the "interesting" location values as
3538          the starting location within line_table.  */
3539       const int num_boundary_locations
3540         = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3541       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3542         {
3543           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3544
3545           testcase (c);
3546
3547           num_cases_tested++;
3548         }
3549     }
3550
3551   /* Verify that we fully covered the test matrix.  */
3552   ASSERT_EQ (num_cases_tested, 2 * 12);
3553 }
3554
3555 /* Run all of the selftests within this file.  */
3556
3557 void
3558 input_c_tests ()
3559 {
3560   test_linenum_comparisons ();
3561   test_should_have_column_data_p ();
3562   test_unknown_location ();
3563   test_builtins ();
3564   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3565
3566   for_each_line_table_case (test_accessing_ordinary_linemaps);
3567   for_each_line_table_case (test_lexer);
3568   for_each_line_table_case (test_lexer_string_locations_simple);
3569   for_each_line_table_case (test_lexer_string_locations_ebcdic);
3570   for_each_line_table_case (test_lexer_string_locations_hex);
3571   for_each_line_table_case (test_lexer_string_locations_oct);
3572   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3573   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3574   for_each_line_table_case (test_lexer_string_locations_ucn4);
3575   for_each_line_table_case (test_lexer_string_locations_ucn8);
3576   for_each_line_table_case (test_lexer_string_locations_wide_string);
3577   for_each_line_table_case (test_lexer_string_locations_string16);
3578   for_each_line_table_case (test_lexer_string_locations_string32);
3579   for_each_line_table_case (test_lexer_string_locations_u8);
3580   for_each_line_table_case (test_lexer_string_locations_utf8_source);
3581   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3582   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3583   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3584   for_each_line_table_case (test_lexer_string_locations_macro);
3585   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3586   for_each_line_table_case (test_lexer_string_locations_non_string);
3587   for_each_line_table_case (test_lexer_string_locations_long_line);
3588   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3589   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3590   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3591   for_each_line_table_case (test_lexer_char_constants);
3592
3593   test_reading_source_line ();
3594 }
3595
3596 } // namespace selftest
3597
3598 #endif /* CHECKING_P */