gcc/input.c

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2018 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic-core.h"
  25 #include "selftest.h"
  26 #include "cpplib.h"
  27
  28 #ifndef HAVE_ICONV
  29 #define HAVE_ICONV 0
  30 #endif
  31
  32 /* This is a cache used by get_next_line to store the content of a
  33    file to be searched for file lines.  */
  34 struct fcache
  35 {
  36   /* These are information used to store a line boundary.  */
  37   struct line_info
  38   {
  39     /* The line number.  It starts from 1.  */
  40     size_t line_num;
  41
  42     /* The position (byte count) of the beginning of the line,
  43        relative to the file data pointer.  This starts at zero.  */
  44     size_t start_pos;
  45
  46     /* The position (byte count) of the last byte of the line.  This
  47        normally points to the '\n' character, or to one byte after the
  48        last byte of the file, if the file doesn't contain a '\n'
  49        character.  */
  50     size_t end_pos;
  51
  52     line_info (size_t l, size_t s, size_t e)
  53       : line_num (l), start_pos (s), end_pos (e)
  54     {}
  55
  56     line_info ()
  57       :line_num (0), start_pos (0), end_pos (0)
  58     {}
  59   };
  60
  61   /* The number of time this file has been accessed.  This is used
  62      to designate which file cache to evict from the cache
  63      array.  */
  64   unsigned use_count;
  65
  66   /* The file_path is the key for identifying a particular file in
  67      the cache.
  68      For libcpp-using code, the underlying buffer for this field is
  69      owned by the corresponding _cpp_file within the cpp_reader.  */
  70   const char *file_path;
  71
  72   FILE *fp;
  73
  74   /* This points to the content of the file that we've read so
  75      far.  */
  76   char *data;
  77
  78   /*  The size of the DATA array above.*/
  79   size_t size;
  80
  81   /* The number of bytes read from the underlying file so far.  This
  82      must be less (or equal) than SIZE above.  */
  83   size_t nb_read;
  84
  85   /* The index of the beginning of the current line.  */
  86   size_t line_start_idx;
  87
  88   /* The number of the previous line read.  This starts at 1.  Zero
  89      means we've read no line so far.  */
  90   size_t line_num;
  91
  92   /* This is the total number of lines of the current file.  At the
  93      moment, we try to get this information from the line map
  94      subsystem.  Note that this is just a hint.  When using the C++
  95      front-end, this hint is correct because the input file is then
  96      completely tokenized before parsing starts; so the line map knows
  97      the number of lines before compilation really starts.  For e.g,
  98      the C front-end, it can happen that we start emitting diagnostics
  99      before the line map has seen the end of the file.  */
 100   size_t total_lines;
 101
 102   /* Could this file be missing a trailing newline on its final line?
 103      Initially true (to cope with empty files), set to true/false
 104      as each line is read.  */
 105   bool missing_trailing_newline;
 106
 107   /* This is a record of the beginning and end of the lines we've seen
 108      while reading the file.  This is useful to avoid walking the data
 109      from the beginning when we are asked to read a line that is
 110      before LINE_START_IDX above.  Note that the maximum size of this
 111      record is fcache_line_record_size, so that the memory consumption
 112      doesn't explode.  We thus scale total_lines down to
 113      fcache_line_record_size.  */
 114   vec<line_info, va_heap> line_record;
 115
 116   fcache ();
 117   ~fcache ();
 118 };
 119
 120 /* Current position in real source file.  */
 121
 122 location_t input_location = UNKNOWN_LOCATION;
 123
 124 struct line_maps *line_table;
 125
 126 /* A stashed copy of "line_table" for use by selftest::line_table_test.
 127    This needs to be a global so that it can be a GC root, and thus
 128    prevent the stashed copy from being garbage-collected if the GC runs
 129    during a line_table_test.  */
 130
 131 struct line_maps *saved_line_table;
 132
 133 static fcache *fcache_tab;
 134 static const size_t fcache_tab_size = 16;
 135 static const size_t fcache_buffer_size = 4 * 1024;
 136 static const size_t fcache_line_record_size = 100;
 137
 138 /* Expand the source location LOC into a human readable location.  If
 139    LOC resolves to a builtin location, the file name of the readable
 140    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 141    TRUE and LOC is virtual, then it is resolved to the expansion
 142    point of the involved macro.  Otherwise, it is resolved to the
 143    spelling location of the token.
 144
 145    When resolving to the spelling location of the token, if the
 146    resulting location is for a built-in location (that is, it has no
 147    associated line/column) in the context of a macro expansion, the
 148    returned location is the first one (while unwinding the macro
 149    location towards its expansion point) that is in real source
 150    code.
 151
 152    ASPECT controls which part of the location to use.  */
 153
 154 static expanded_location
 155 expand_location_1 (source_location loc,
 156                    bool expansion_point_p,
 157                    enum location_aspect aspect)
 158 {
 159   expanded_location xloc;
 160   const line_map_ordinary *map;
 161   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 162   tree block = NULL;
 163
 164   if (IS_ADHOC_LOC (loc))
 165     {
 166       block = LOCATION_BLOCK (loc);
 167       loc = LOCATION_LOCUS (loc);
 168     }
 169
 170   memset (&xloc, 0, sizeof (xloc));
 171
 172   if (loc >= RESERVED_LOCATION_COUNT)
 173     {
 174       if (!expansion_point_p)
 175         {
 176           /* We want to resolve LOC to its spelling location.
 177
 178              But if that spelling location is a reserved location that
 179              appears in the context of a macro expansion (like for a
 180              location for a built-in token), let's consider the first
 181              location (toward the expansion point) that is not reserved;
 182              that is, the first location that is in real source code.  */
 183           loc = linemap_unwind_to_first_non_reserved_loc (line_table,
 184                                                           loc, NULL);
 185           lrk = LRK_SPELLING_LOCATION;
 186         }
 187       loc = linemap_resolve_location (line_table, loc, lrk, &map);
 188
 189       /* loc is now either in an ordinary map, or is a reserved location.
 190          If it is a compound location, the caret is in a spelling location,
 191          but the start/finish might still be a virtual location.
 192          Depending of what the caller asked for, we may need to recurse
 193          one level in order to resolve any virtual locations in the
 194          end-points.  */
 195       switch (aspect)
 196         {
 197         default:
 198           gcc_unreachable ();
 199           /* Fall through.  */
 200         case LOCATION_ASPECT_CARET:
 201           break;
 202         case LOCATION_ASPECT_START:
 203           {
 204             source_location start = get_start (loc);
 205             if (start != loc)
 206               return expand_location_1 (start, expansion_point_p, aspect);
 207           }
 208           break;
 209         case LOCATION_ASPECT_FINISH:
 210           {
 211             source_location finish = get_finish (loc);
 212             if (finish != loc)
 213               return expand_location_1 (finish, expansion_point_p, aspect);
 214           }
 215           break;
 216         }
 217       xloc = linemap_expand_location (line_table, map, loc);
 218     }
 219
 220   xloc.data = block;
 221   if (loc <= BUILTINS_LOCATION)
 222     xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
 223
 224   return xloc;
 225 }
 226
 227 /* Initialize the set of cache used for files accessed by caret
 228    diagnostic.  */
 229
 230 static void
 231 diagnostic_file_cache_init (void)
 232 {
 233   if (fcache_tab == NULL)
 234     fcache_tab = new fcache[fcache_tab_size];
 235 }
 236
 237 /* Free the resources used by the set of cache used for files accessed
 238    by caret diagnostic.  */
 239
 240 void
 241 diagnostic_file_cache_fini (void)
 242 {
 243   if (fcache_tab)
 244     {
 245       delete [] (fcache_tab);
 246       fcache_tab = NULL;
 247     }
 248 }
 249
 250 /* Return the total lines number that have been read so far by the
 251    line map (in the preprocessor) so far.  For languages like C++ that
 252    entirely preprocess the input file before starting to parse, this
 253    equals the actual number of lines of the file.  */
 254
 255 static size_t
 256 total_lines_num (const char *file_path)
 257 {
 258   size_t r = 0;
 259   source_location l = 0;
 260   if (linemap_get_file_highest_location (line_table, file_path, &l))
 261     {
 262       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 263       expanded_location xloc = expand_location (l);
 264       r = xloc.line;
 265     }
 266   return r;
 267 }
 268
 269 /* Lookup the cache used for the content of a given file accessed by
 270    caret diagnostic.  Return the found cached file, or NULL if no
 271    cached file was found.  */
 272
 273 static fcache*
 274 lookup_file_in_cache_tab (const char *file_path)
 275 {
 276   if (file_path == NULL)
 277     return NULL;
 278
 279   diagnostic_file_cache_init ();
 280
 281   /* This will contain the found cached file.  */
 282   fcache *r = NULL;
 283   for (unsigned i = 0; i < fcache_tab_size; ++i)
 284     {
 285       fcache *c = &fcache_tab[i];
 286       if (c->file_path && !strcmp (c->file_path, file_path))
 287         {
 288           ++c->use_count;
 289           r = c;
 290         }
 291     }
 292
 293   if (r)
 294     ++r->use_count;
 295
 296   return r;
 297 }
 298
 299 /* Purge any mention of FILENAME from the cache of files used for
 300    printing source code.  For use in selftests when working
 301    with tempfiles.  */
 302
 303 void
 304 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
 305 {
 306   gcc_assert (file_path);
 307
 308   fcache *r = lookup_file_in_cache_tab (file_path);
 309   if (!r)
 310     /* Not found.  */
 311     return;
 312
 313   r->file_path = NULL;
 314   if (r->fp)
 315     fclose (r->fp);
 316   r->fp = NULL;
 317   r->nb_read = 0;
 318   r->line_start_idx = 0;
 319   r->line_num = 0;
 320   r->line_record.truncate (0);
 321   r->use_count = 0;
 322   r->total_lines = 0;
 323   r->missing_trailing_newline = true;
 324 }
 325
 326 /* Return the file cache that has been less used, recently, or the
 327    first empty one.  If HIGHEST_USE_COUNT is non-null,
 328    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 329    in the cache table.  */
 330
 331 static fcache*
 332 evicted_cache_tab_entry (unsigned *highest_use_count)
 333 {
 334   diagnostic_file_cache_init ();
 335
 336   fcache *to_evict = &fcache_tab[0];
 337   unsigned huc = to_evict->use_count;
 338   for (unsigned i = 1; i < fcache_tab_size; ++i)
 339     {
 340       fcache *c = &fcache_tab[i];
 341       bool c_is_empty = (c->file_path == NULL);
 342
 343       if (c->use_count < to_evict->use_count
 344           || (to_evict->file_path && c_is_empty))
 345         /* We evict C because it's either an entry with a lower use
 346            count or one that is empty.  */
 347         to_evict = c;
 348
 349       if (huc < c->use_count)
 350         huc = c->use_count;
 351
 352       if (c_is_empty)
 353         /* We've reached the end of the cache; subsequent elements are
 354            all empty.  */
 355         break;
 356     }
 357
 358   if (highest_use_count)
 359     *highest_use_count = huc;
 360
 361   return to_evict;
 362 }
 363
 364 /* Create the cache used for the content of a given file to be
 365    accessed by caret diagnostic.  This cache is added to an array of
 366    cache and can be retrieved by lookup_file_in_cache_tab.  This
 367    function returns the created cache.  Note that only the last
 368    fcache_tab_size files are cached.  */
 369
 370 static fcache*
 371 add_file_to_cache_tab (const char *file_path)
 372 {
 373
 374   FILE *fp = fopen (file_path, "r");
 375   if (fp == NULL)
 376     return NULL;
 377
 378   unsigned highest_use_count = 0;
 379   fcache *r = evicted_cache_tab_entry (&highest_use_count);
 380   r->file_path = file_path;
 381   if (r->fp)
 382     fclose (r->fp);
 383   r->fp = fp;
 384   r->nb_read = 0;
 385   r->line_start_idx = 0;
 386   r->line_num = 0;
 387   r->line_record.truncate (0);
 388   /* Ensure that this cache entry doesn't get evicted next time
 389      add_file_to_cache_tab is called.  */
 390   r->use_count = ++highest_use_count;
 391   r->total_lines = total_lines_num (file_path);
 392   r->missing_trailing_newline = true;
 393
 394   return r;
 395 }
 396
 397 /* Lookup the cache used for the content of a given file accessed by
 398    caret diagnostic.  If no cached file was found, create a new cache
 399    for this file, add it to the array of cached file and return
 400    it.  */
 401
 402 static fcache*
 403 lookup_or_add_file_to_cache_tab (const char *file_path)
 404 {
 405   fcache *r = lookup_file_in_cache_tab (file_path);
 406   if (r == NULL)
 407     r = add_file_to_cache_tab (file_path);
 408   return r;
 409 }
 410
 411 /* Default constructor for a cache of file used by caret
 412    diagnostic.  */
 413
 414 fcache::fcache ()
 415 : use_count (0), file_path (NULL), fp (NULL), data (0),
 416   size (0), nb_read (0), line_start_idx (0), line_num (0),
 417   total_lines (0), missing_trailing_newline (true)
 418 {
 419   line_record.create (0);
 420 }
 421
 422 /* Destructor for a cache of file used by caret diagnostic.  */
 423
 424 fcache::~fcache ()
 425 {
 426   if (fp)
 427     {
 428       fclose (fp);
 429       fp = NULL;
 430     }
 431   if (data)
 432     {
 433       XDELETEVEC (data);
 434       data = 0;
 435     }
 436   line_record.release ();
 437 }
 438
 439 /* Returns TRUE iff the cache would need to be filled with data coming
 440    from the file.  That is, either the cache is empty or full or the
 441    current line is empty.  Note that if the cache is full, it would
 442    need to be extended and filled again.  */
 443
 444 static bool
 445 needs_read (fcache *c)
 446 {
 447   return (c->nb_read == 0
 448           || c->nb_read == c->size
 449           || (c->line_start_idx >= c->nb_read - 1));
 450 }
 451
 452 /*  Return TRUE iff the cache is full and thus needs to be
 453     extended.  */
 454
 455 static bool
 456 needs_grow (fcache *c)
 457 {
 458   return c->nb_read == c->size;
 459 }
 460
 461 /* Grow the cache if it needs to be extended.  */
 462
 463 static void
 464 maybe_grow (fcache *c)
 465 {
 466   if (!needs_grow (c))
 467     return;
 468
 469   size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
 470   c->data = XRESIZEVEC (char, c->data, size);
 471   c->size = size;
 472 }
 473
 474 /*  Read more data into the cache.  Extends the cache if need be.
 475     Returns TRUE iff new data could be read.  */
 476
 477 static bool
 478 read_data (fcache *c)
 479 {
 480   if (feof (c->fp) || ferror (c->fp))
 481     return false;
 482
 483   maybe_grow (c);
 484
 485   char * from = c->data + c->nb_read;
 486   size_t to_read = c->size - c->nb_read;
 487   size_t nb_read = fread (from, 1, to_read, c->fp);
 488
 489   if (ferror (c->fp))
 490     return false;
 491
 492   c->nb_read += nb_read;
 493   return !!nb_read;
 494 }
 495
 496 /* Read new data iff the cache needs to be filled with more data
 497    coming from the file FP.  Return TRUE iff the cache was filled with
 498    mode data.  */
 499
 500 static bool
 501 maybe_read_data (fcache *c)
 502 {
 503   if (!needs_read (c))
 504     return false;
 505   return read_data (c);
 506 }
 507
 508 /* Read a new line from file FP, using C as a cache for the data
 509    coming from the file.  Upon successful completion, *LINE is set to
 510    the beginning of the line found.  *LINE points directly in the
 511    line cache and is only valid until the next call of get_next_line.
 512    *LINE_LEN is set to the length of the line.  Note that the line
 513    does not contain any terminal delimiter.  This function returns
 514    true if some data was read or process from the cache, false
 515    otherwise.  Note that subsequent calls to get_next_line might
 516    make the content of *LINE invalid.  */
 517
 518 static bool
 519 get_next_line (fcache *c, char **line, ssize_t *line_len)
 520 {
 521   /* Fill the cache with data to process.  */
 522   maybe_read_data (c);
 523
 524   size_t remaining_size = c->nb_read - c->line_start_idx;
 525   if (remaining_size == 0)
 526     /* There is no more data to process.  */
 527     return false;
 528
 529   char *line_start = c->data + c->line_start_idx;
 530
 531   char *next_line_start = NULL;
 532   size_t len = 0;
 533   char *line_end = (char *) memchr (line_start, '\n', remaining_size);
 534   if (line_end == NULL)
 535     {
 536       /* We haven't found the end-of-line delimiter in the cache.
 537          Fill the cache with more data from the file and look for the
 538          '\n'.  */
 539       while (maybe_read_data (c))
 540         {
 541           line_start = c->data + c->line_start_idx;
 542           remaining_size = c->nb_read - c->line_start_idx;
 543           line_end = (char *) memchr (line_start, '\n', remaining_size);
 544           if (line_end != NULL)
 545             {
 546               next_line_start = line_end + 1;
 547               break;
 548             }
 549         }
 550       if (line_end == NULL)
 551         {
 552           /* We've loadded all the file into the cache and still no
 553              '\n'.  Let's say the line ends up at one byte passed the
 554              end of the file.  This is to stay consistent with the case
 555              of when the line ends up with a '\n' and line_end points to
 556              that terminal '\n'.  That consistency is useful below in
 557              the len calculation.  */
 558           line_end = c->data + c->nb_read ;
 559           c->missing_trailing_newline = true;
 560         }
 561       else
 562         c->missing_trailing_newline = false;
 563     }
 564   else
 565     {
 566       next_line_start = line_end + 1;
 567       c->missing_trailing_newline = false;
 568     }
 569
 570   if (ferror (c->fp))
 571     return false;
 572
 573   /* At this point, we've found the end of the of line.  It either
 574      points to the '\n' or to one byte after the last byte of the
 575      file.  */
 576   gcc_assert (line_end != NULL);
 577
 578   len = line_end - line_start;
 579
 580   if (c->line_start_idx < c->nb_read)
 581     *line = line_start;
 582
 583   ++c->line_num;
 584
 585   /* Before we update our line record, make sure the hint about the
 586      total number of lines of the file is correct.  If it's not, then
 587      we give up recording line boundaries from now on.  */
 588   bool update_line_record = true;
 589   if (c->line_num > c->total_lines)
 590     update_line_record = false;
 591
 592     /* Now update our line record so that re-reading lines from the
 593      before c->line_start_idx is faster.  */
 594   if (update_line_record
 595       && c->line_record.length () < fcache_line_record_size)
 596     {
 597       /* If the file lines fits in the line record, we just record all
 598          its lines ...*/
 599       if (c->total_lines <= fcache_line_record_size
 600           && c->line_num > c->line_record.length ())
 601         c->line_record.safe_push (fcache::line_info (c->line_num,
 602                                                  c->line_start_idx,
 603                                                  line_end - c->data));
 604       else if (c->total_lines > fcache_line_record_size)
 605         {
 606           /* ... otherwise, we just scale total_lines down to
 607              (fcache_line_record_size lines.  */
 608           size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
 609           if (c->line_record.length () == 0
 610               || n >= c->line_record.length ())
 611             c->line_record.safe_push (fcache::line_info (c->line_num,
 612                                                      c->line_start_idx,
 613                                                      line_end - c->data));
 614         }
 615     }
 616
 617   /* Update c->line_start_idx so that it points to the next line to be
 618      read.  */
 619   if (next_line_start)
 620     c->line_start_idx = next_line_start - c->data;
 621   else
 622     /* We didn't find any terminal '\n'.  Let's consider that the end
 623        of line is the end of the data in the cache.  The next
 624        invocation of get_next_line will either read more data from the
 625        underlying file or return false early because we've reached the
 626        end of the file.  */
 627     c->line_start_idx = c->nb_read;
 628
 629   *line_len = len;
 630
 631   return true;
 632 }
 633
 634 /* Consume the next bytes coming from the cache (or from its
 635    underlying file if there are remaining unread bytes in the file)
 636    until we reach the next end-of-line (or end-of-file).  There is no
 637    copying from the cache involved.  Return TRUE upon successful
 638    completion.  */
 639
 640 static bool
 641 goto_next_line (fcache *cache)
 642 {
 643   char *l;
 644   ssize_t len;
 645
 646   return get_next_line (cache, &l, &len);
 647 }
 648
 649 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 650    If the line was read successfully, *LINE points to the beginning
 651    of the line in the file cache and *LINE_LEN is the length of the
 652    line.  *LINE is not nul-terminated, but may contain zero bytes.
 653    *LINE is only valid until the next call of read_line_num.
 654    This function returns bool if a line was read.  */
 655
 656 static bool
 657 read_line_num (fcache *c, size_t line_num,
 658                char **line, ssize_t *line_len)
 659 {
 660   gcc_assert (line_num > 0);
 661
 662   if (line_num <= c->line_num)
 663     {
 664       /* We've been asked to read lines that are before c->line_num.
 665          So lets use our line record (if it's not empty) to try to
 666          avoid re-reading the file from the beginning again.  */
 667
 668       if (c->line_record.is_empty ())
 669         {
 670           c->line_start_idx = 0;
 671           c->line_num = 0;
 672         }
 673       else
 674         {
 675           fcache::line_info *i = NULL;
 676           if (c->total_lines <= fcache_line_record_size)
 677             {
 678               /* In languages where the input file is not totally
 679                  preprocessed up front, the c->total_lines hint
 680                  can be smaller than the number of lines of the
 681                  file.  In that case, only the first
 682                  c->total_lines have been recorded.
 683
 684                  Otherwise, the first c->total_lines we've read have
 685                  their start/end recorded here.  */
 686               i = (line_num <= c->total_lines)
 687                 ? &c->line_record[line_num - 1]
 688                 : &c->line_record[c->total_lines - 1];
 689               gcc_assert (i->line_num <= line_num);
 690             }
 691           else
 692             {
 693               /*  So the file had more lines than our line record
 694                   size.  Thus the number of lines we've recorded has
 695                   been scaled down to fcache_line_reacord_size.  Let's
 696                   pick the start/end of the recorded line that is
 697                   closest to line_num.  */
 698               size_t n = (line_num <= c->total_lines)
 699                 ? line_num * fcache_line_record_size / c->total_lines
 700                 : c ->line_record.length () - 1;
 701               if (n < c->line_record.length ())
 702                 {
 703                   i = &c->line_record[n];
 704                   gcc_assert (i->line_num <= line_num);
 705                 }
 706             }
 707
 708           if (i && i->line_num == line_num)
 709             {
 710               /* We have the start/end of the line.  */
 711               *line = c->data + i->start_pos;
 712               *line_len = i->end_pos - i->start_pos;
 713               return true;
 714             }
 715
 716           if (i)
 717             {
 718               c->line_start_idx = i->start_pos;
 719               c->line_num = i->line_num - 1;
 720             }
 721           else
 722             {
 723               c->line_start_idx = 0;
 724               c->line_num = 0;
 725             }
 726         }
 727     }
 728
 729   /*  Let's walk from line c->line_num up to line_num - 1, without
 730       copying any line.  */
 731   while (c->line_num < line_num - 1)
 732     if (!goto_next_line (c))
 733       return false;
 734
 735   /* The line we want is the next one.  Let's read and copy it back to
 736      the caller.  */
 737   return get_next_line (c, line, line_len);
 738 }
 739
 740 /* Return the physical source line that corresponds to FILE_PATH/LINE.
 741    The line is not nul-terminated.  The returned pointer is only
 742    valid until the next call of location_get_source_line.
 743    Note that the line can contain several null characters,
 744    so LINE_LEN, if non-null, points to the actual length of the line.
 745    If the function fails, NULL is returned.  */
 746
 747 const char *
 748 location_get_source_line (const char *file_path, int line,
 749                           int *line_len)
 750 {
 751   char *buffer = NULL;
 752   ssize_t len;
 753
 754   if (line == 0)
 755     return NULL;
 756
 757   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 758   if (c == NULL)
 759     return NULL;
 760
 761   bool read = read_line_num (c, line, &buffer, &len);
 762
 763   if (read && line_len)
 764     *line_len = len;
 765
 766   return read ? buffer : NULL;
 767 }
 768
 769 /* Determine if FILE_PATH missing a trailing newline on its final line.
 770    Only valid to call once all of the file has been loaded, by
 771    requesting a line number beyond the end of the file.  */
 772
 773 bool
 774 location_missing_trailing_newline (const char *file_path)
 775 {
 776   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 777   if (c == NULL)
 778     return false;
 779
 780   return c->missing_trailing_newline;
 781 }
 782
 783 /* Test if the location originates from the spelling location of a
 784    builtin-tokens.  That is, return TRUE if LOC is a (possibly
 785    virtual) location of a built-in token that appears in the expansion
 786    list of a macro.  Please note that this function also works on
 787    tokens that result from built-in tokens.  For instance, the
 788    function would return true if passed a token "4" that is the result
 789    of the expansion of the built-in __LINE__ macro.  */
 790 bool
 791 is_location_from_builtin_token (source_location loc)
 792 {
 793   const line_map_ordinary *map = NULL;
 794   loc = linemap_resolve_location (line_table, loc,
 795                                   LRK_SPELLING_LOCATION, &map);
 796   return loc == BUILTINS_LOCATION;
 797 }
 798
 799 /* Expand the source location LOC into a human readable location.  If
 800    LOC is virtual, it resolves to the expansion point of the involved
 801    macro.  If LOC resolves to a builtin location, the file name of the
 802    readable location is set to the string "<built-in>".  */
 803
 804 expanded_location
 805 expand_location (source_location loc)
 806 {
 807   return expand_location_1 (loc, /*expansion_point_p=*/true,
 808                             LOCATION_ASPECT_CARET);
 809 }
 810
 811 /* Expand the source location LOC into a human readable location.  If
 812    LOC is virtual, it resolves to the expansion location of the
 813    relevant macro.  If LOC resolves to a builtin location, the file
 814    name of the readable location is set to the string
 815    "<built-in>".  */
 816
 817 expanded_location
 818 expand_location_to_spelling_point (source_location loc)
 819 {
 820   return expand_location_1 (loc, /*expansion_point_p=*/false,
 821                             LOCATION_ASPECT_CARET);
 822 }
 823
 824 /* The rich_location class within libcpp requires a way to expand
 825    source_location instances, and relies on the client code
 826    providing a symbol named
 827      linemap_client_expand_location_to_spelling_point
 828    to do this.
 829
 830    This is the implementation for libcommon.a (all host binaries),
 831    which simply calls into expand_location_1.  */
 832
 833 expanded_location
 834 linemap_client_expand_location_to_spelling_point (source_location loc,
 835                                                   enum location_aspect aspect)
 836 {
 837   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
 838 }
 839
 840
 841 /* If LOCATION is in a system header and if it is a virtual location for
 842    a token coming from the expansion of a macro, unwind it to the
 843    location of the expansion point of the macro.  Otherwise, just return
 844    LOCATION.
 845
 846    This is used for instance when we want to emit diagnostics about a
 847    token that may be located in a macro that is itself defined in a
 848    system header, for example, for the NULL macro.  In such a case, if
 849    LOCATION were passed directly to diagnostic functions such as
 850    warning_at, the diagnostic would be suppressed (unless
 851    -Wsystem-headers).  */
 852
 853 source_location
 854 expansion_point_location_if_in_system_header (source_location location)
 855 {
 856   if (in_system_header_at (location))
 857     location = linemap_resolve_location (line_table, location,
 858                                          LRK_MACRO_EXPANSION_POINT,
 859                                          NULL);
 860   return location;
 861 }
 862
 863 /* If LOCATION is a virtual location for a token coming from the expansion
 864    of a macro, unwind to the location of the expansion point of the macro.  */
 865
 866 source_location
 867 expansion_point_location (source_location location)
 868 {
 869   return linemap_resolve_location (line_table, location,
 870                                    LRK_MACRO_EXPANSION_POINT, NULL);
 871 }
 872
 873 /* Construct a location with caret at CARET, ranging from START to
 874    finish e.g.
 875
 876                  11111111112
 877         12345678901234567890
 878      522
 879      523   return foo + bar;
 880                   ~~~~^~~~~
 881      524
 882
 883    The location's caret is at the "+", line 523 column 15, but starts
 884    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
 885    of "bar" at column 19.  */
 886
 887 location_t
 888 make_location (location_t caret, location_t start, location_t finish)
 889 {
 890   location_t pure_loc = get_pure_location (caret);
 891   source_range src_range;
 892   src_range.m_start = get_start (start);
 893   src_range.m_finish = get_finish (finish);
 894   location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
 895                                                    pure_loc,
 896                                                    src_range,
 897                                                    NULL);
 898   return combined_loc;
 899 }
 900
 901 /* Same as above, but taking a source range rather than two locations.  */
 902
 903 location_t
 904 make_location (location_t caret, source_range src_range)
 905 {
 906   location_t pure_loc = get_pure_location (caret);
 907   return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
 908 }
 909
 910 #define ONE_K 1024
 911 #define ONE_M (ONE_K * ONE_K)
 912
 913 /* Display a number as an integer multiple of either:
 914    - 1024, if said integer is >= to 10 K (in base 2)
 915    - 1024 * 1024, if said integer is >= 10 M in (base 2)
 916  */
 917 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
 918                   ? (x) \
 919                   : ((x) < 10 * ONE_M \
 920                      ? (x) / ONE_K \
 921                      : (x) / ONE_M)))
 922
 923 /* For a given integer, display either:
 924    - the character 'k', if the number is higher than 10 K (in base 2)
 925      but strictly lower than 10 M (in base 2)
 926    - the character 'M' if the number is higher than 10 M (in base2)
 927    - the charcter ' ' if the number is strictly lower  than 10 K  */
 928 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
 929
 930 /* Display an integer amount as multiple of 1K or 1M (in base 2).
 931    Display the correct unit (either k, M, or ' ') after the amount, as
 932    well.  */
 933 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
 934
 935 /* Dump statistics to stderr about the memory usage of the line_table
 936    set of line maps.  This also displays some statistics about macro
 937    expansion.  */
 938
 939 void
 940 dump_line_table_statistics (void)
 941 {
 942   struct linemap_stats s;
 943   long total_used_map_size,
 944     macro_maps_size,
 945     total_allocated_map_size;
 946
 947   memset (&s, 0, sizeof (s));
 948
 949   linemap_get_statistics (line_table, &s);
 950
 951   macro_maps_size = s.macro_maps_used_size
 952     + s.macro_maps_locations_size;
 953
 954   total_allocated_map_size = s.ordinary_maps_allocated_size
 955     + s.macro_maps_allocated_size
 956     + s.macro_maps_locations_size;
 957
 958   total_used_map_size = s.ordinary_maps_used_size
 959     + s.macro_maps_used_size
 960     + s.macro_maps_locations_size;
 961
 962   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
 963            s.num_expanded_macros);
 964   if (s.num_expanded_macros != 0)
 965     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
 966              s.num_macro_tokens / s.num_expanded_macros);
 967   fprintf (stderr,
 968            "\nLine Table allocations during the "
 969            "compilation process\n");
 970   fprintf (stderr, "Number of ordinary maps used:        %5ld%c\n",
 971            SCALE (s.num_ordinary_maps_used),
 972            STAT_LABEL (s.num_ordinary_maps_used));
 973   fprintf (stderr, "Ordinary map used size:              %5ld%c\n",
 974            SCALE (s.ordinary_maps_used_size),
 975            STAT_LABEL (s.ordinary_maps_used_size));
 976   fprintf (stderr, "Number of ordinary maps allocated:   %5ld%c\n",
 977            SCALE (s.num_ordinary_maps_allocated),
 978            STAT_LABEL (s.num_ordinary_maps_allocated));
 979   fprintf (stderr, "Ordinary maps allocated size:        %5ld%c\n",
 980            SCALE (s.ordinary_maps_allocated_size),
 981            STAT_LABEL (s.ordinary_maps_allocated_size));
 982   fprintf (stderr, "Number of macro maps used:           %5ld%c\n",
 983            SCALE (s.num_macro_maps_used),
 984            STAT_LABEL (s.num_macro_maps_used));
 985   fprintf (stderr, "Macro maps used size:                %5ld%c\n",
 986            SCALE (s.macro_maps_used_size),
 987            STAT_LABEL (s.macro_maps_used_size));
 988   fprintf (stderr, "Macro maps locations size:           %5ld%c\n",
 989            SCALE (s.macro_maps_locations_size),
 990            STAT_LABEL (s.macro_maps_locations_size));
 991   fprintf (stderr, "Macro maps size:                     %5ld%c\n",
 992            SCALE (macro_maps_size),
 993            STAT_LABEL (macro_maps_size));
 994   fprintf (stderr, "Duplicated maps locations size:      %5ld%c\n",
 995            SCALE (s.duplicated_macro_maps_locations_size),
 996            STAT_LABEL (s.duplicated_macro_maps_locations_size));
 997   fprintf (stderr, "Total allocated maps size:           %5ld%c\n",
 998            SCALE (total_allocated_map_size),
 999            STAT_LABEL (total_allocated_map_size));
1000   fprintf (stderr, "Total used maps size:                %5ld%c\n",
1001            SCALE (total_used_map_size),
1002            STAT_LABEL (total_used_map_size));
1003   fprintf (stderr, "Ad-hoc table size:                   %5ld%c\n",
1004            SCALE (s.adhoc_table_size),
1005            STAT_LABEL (s.adhoc_table_size));
1006   fprintf (stderr, "Ad-hoc table entries used:           %5ld\n",
1007            s.adhoc_table_entries_used);
1008   fprintf (stderr, "optimized_ranges: %i\n",
1009            line_table->num_optimized_ranges);
1010   fprintf (stderr, "unoptimized_ranges: %i\n",
1011            line_table->num_unoptimized_ranges);
1012
1013   fprintf (stderr, "\n");
1014 }
1015
1016 /* Get location one beyond the final location in ordinary map IDX.  */
1017
1018 static source_location
1019 get_end_location (struct line_maps *set, unsigned int idx)
1020 {
1021   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1022     return set->highest_location;
1023
1024   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1025   return MAP_START_LOCATION (next_map);
1026 }
1027
1028 /* Helper function for write_digit_row.  */
1029
1030 static void
1031 write_digit (FILE *stream, int digit)
1032 {
1033   fputc ('0' + (digit % 10), stream);
1034 }
1035
1036 /* Helper function for dump_location_info.
1037    Write a row of numbers to STREAM, numbering a source line,
1038    giving the units, tens, hundreds etc of the column number.  */
1039
1040 static void
1041 write_digit_row (FILE *stream, int indent,
1042                  const line_map_ordinary *map,
1043                  source_location loc, int max_col, int divisor)
1044 {
1045   fprintf (stream, "%*c", indent, ' ');
1046   fprintf (stream, "|");
1047   for (int column = 1; column < max_col; column++)
1048     {
1049       source_location column_loc = loc + (column << map->m_range_bits);
1050       write_digit (stream, column_loc / divisor);
1051     }
1052   fprintf (stream, "\n");
1053 }
1054
1055 /* Write a half-closed (START) / half-open (END) interval of
1056    source_location to STREAM.  */
1057
1058 static void
1059 dump_location_range (FILE *stream,
1060                      source_location start, source_location end)
1061 {
1062   fprintf (stream,
1063            "  source_location interval: %u <= loc < %u\n",
1064            start, end);
1065 }
1066
1067 /* Write a labelled description of a half-closed (START) / half-open (END)
1068    interval of source_location to STREAM.  */
1069
1070 static void
1071 dump_labelled_location_range (FILE *stream,
1072                               const char *name,
1073                               source_location start, source_location end)
1074 {
1075   fprintf (stream, "%s\n", name);
1076   dump_location_range (stream, start, end);
1077   fprintf (stream, "\n");
1078 }
1079
1080 /* Write a visualization of the locations in the line_table to STREAM.  */
1081
1082 void
1083 dump_location_info (FILE *stream)
1084 {
1085   /* Visualize the reserved locations.  */
1086   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1087                                 0, RESERVED_LOCATION_COUNT);
1088
1089   /* Visualize the ordinary line_map instances, rendering the sources. */
1090   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1091     {
1092       source_location end_location = get_end_location (line_table, idx);
1093       /* half-closed: doesn't include this one. */
1094
1095       const line_map_ordinary *map
1096         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1097       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1098       dump_location_range (stream,
1099                            MAP_START_LOCATION (map), end_location);
1100       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1101       fprintf (stream, "  starting at line: %i\n",
1102                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1103       fprintf (stream, "  column and range bits: %i\n",
1104                map->m_column_and_range_bits);
1105       fprintf (stream, "  column bits: %i\n",
1106                map->m_column_and_range_bits - map->m_range_bits);
1107       fprintf (stream, "  range bits: %i\n",
1108                map->m_range_bits);
1109
1110       /* Render the span of source lines that this "map" covers.  */
1111       for (source_location loc = MAP_START_LOCATION (map);
1112            loc < end_location;
1113            loc += (1 << map->m_range_bits) )
1114         {
1115           gcc_assert (pure_location_p (line_table, loc) );
1116
1117           expanded_location exploc
1118             = linemap_expand_location (line_table, map, loc);
1119
1120           if (exploc.column == 0)
1121             {
1122               /* Beginning of a new source line: draw the line.  */
1123
1124               int line_size;
1125               const char *line_text = location_get_source_line (exploc.file,
1126                                                                 exploc.line,
1127                                                                 &line_size);
1128               if (!line_text)
1129                 break;
1130               fprintf (stream,
1131                        "%s:%3i|loc:%5i|%.*s\n",
1132                        exploc.file, exploc.line,
1133                        loc,
1134                        line_size, line_text);
1135
1136               /* "loc" is at column 0, which means "the whole line".
1137                  Render the locations *within* the line, by underlining
1138                  it, showing the source_location numeric values
1139                  at each column.  */
1140               int max_col = (1 << map->m_column_and_range_bits) - 1;
1141               if (max_col > line_size)
1142                 max_col = line_size + 1;
1143
1144               int indent = 14 + strlen (exploc.file);
1145
1146               /* Thousands.  */
1147               if (end_location > 999)
1148                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1149
1150               /* Hundreds.  */
1151               if (end_location > 99)
1152                 write_digit_row (stream, indent, map, loc, max_col, 100);
1153
1154               /* Tens.  */
1155               write_digit_row (stream, indent, map, loc, max_col, 10);
1156
1157               /* Units.  */
1158               write_digit_row (stream, indent, map, loc, max_col, 1);
1159             }
1160         }
1161       fprintf (stream, "\n");
1162     }
1163
1164   /* Visualize unallocated values.  */
1165   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1166                                 line_table->highest_location,
1167                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1168
1169   /* Visualize the macro line_map instances, rendering the sources. */
1170   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1171     {
1172       /* Each macro map that is allocated owns source_location values
1173          that are *lower* that the one before them.
1174          Hence it's meaningful to view them either in order of ascending
1175          source locations, or in order of ascending macro map index.  */
1176       const bool ascending_source_locations = true;
1177       unsigned int idx = (ascending_source_locations
1178                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1179                           : i);
1180       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1181       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1182                idx,
1183                linemap_map_get_macro_name (map),
1184                MACRO_MAP_NUM_MACRO_TOKENS (map));
1185       dump_location_range (stream,
1186                            map->start_location,
1187                            (map->start_location
1188                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1189       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1190               "expansion point is location %i",
1191               MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1192       fprintf (stream, "  map->start_location: %u\n",
1193                map->start_location);
1194
1195       fprintf (stream, "  macro_locations:\n");
1196       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1197         {
1198           source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1199           source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1200
1201           /* linemap_add_macro_token encodes token numbers in an expansion
1202              by putting them after MAP_START_LOCATION. */
1203
1204           /* I'm typically seeing 4 uninitialized entries at the end of
1205              0xafafafaf.
1206              This appears to be due to macro.c:replace_args
1207              adding 2 extra args for padding tokens; presumably there may
1208              be a leading and/or trailing padding token injected,
1209              each for 2 more location slots.
1210              This would explain there being up to 4 source_locations slots
1211              that may be uninitialized.  */
1212
1213           fprintf (stream, "    %u: %u, %u\n",
1214                    i,
1215                    x,
1216                    y);
1217           if (x == y)
1218             {
1219               if (x < MAP_START_LOCATION (map))
1220                 inform (x, "token %u has x-location == y-location == %u", i, x);
1221               else
1222                 fprintf (stream,
1223                          "x-location == y-location == %u encodes token # %u\n",
1224                          x, x - MAP_START_LOCATION (map));
1225                 }
1226           else
1227             {
1228               inform (x, "token %u has x-location == %u", i, x);
1229               inform (x, "token %u has y-location == %u", i, y);
1230             }
1231         }
1232       fprintf (stream, "\n");
1233     }
1234
1235   /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1236      macro map, presumably due to an off-by-one error somewhere
1237      between the logic in linemap_enter_macro and
1238      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1239   dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1240                                 MAX_SOURCE_LOCATION,
1241                                 MAX_SOURCE_LOCATION + 1);
1242
1243   /* Visualize ad-hoc values.  */
1244   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1245                                 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1246 }
1247
1248 /* string_concat's constructor.  */
1249
1250 string_concat::string_concat (int num, location_t *locs)
1251   : m_num (num)
1252 {
1253   m_locs = ggc_vec_alloc <location_t> (num);
1254   for (int i = 0; i < num; i++)
1255     m_locs[i] = locs[i];
1256 }
1257
1258 /* string_concat_db's constructor.  */
1259
1260 string_concat_db::string_concat_db ()
1261 {
1262   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1263 }
1264
1265 /* Record that a string concatenation occurred, covering NUM
1266    string literal tokens.  LOCS is an array of size NUM, containing the
1267    locations of the tokens.  A copy of LOCS is taken.  */
1268
1269 void
1270 string_concat_db::record_string_concatenation (int num, location_t *locs)
1271 {
1272   gcc_assert (num > 1);
1273   gcc_assert (locs);
1274
1275   location_t key_loc = get_key_loc (locs[0]);
1276
1277   string_concat *concat
1278     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1279   m_table->put (key_loc, concat);
1280 }
1281
1282 /* Determine if LOC was the location of the the initial token of a
1283    concatenation of string literal tokens.
1284    If so, *OUT_NUM is written to with the number of tokens, and
1285    *OUT_LOCS with the location of an array of locations of the
1286    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1287    storage owned by the string_concat_db.
1288    Otherwise, return false.  */
1289
1290 bool
1291 string_concat_db::get_string_concatenation (location_t loc,
1292                                             int *out_num,
1293                                             location_t **out_locs)
1294 {
1295   gcc_assert (out_num);
1296   gcc_assert (out_locs);
1297
1298   location_t key_loc = get_key_loc (loc);
1299
1300   string_concat **concat = m_table->get (key_loc);
1301   if (!concat)
1302     return false;
1303
1304   *out_num = (*concat)->m_num;
1305   *out_locs =(*concat)->m_locs;
1306   return true;
1307 }
1308
1309 /* Internal function.  Canonicalize LOC into a form suitable for
1310    use as a key within the database, stripping away macro expansion,
1311    ad-hoc information, and range information, using the location of
1312    the start of LOC within an ordinary linemap.  */
1313
1314 location_t
1315 string_concat_db::get_key_loc (location_t loc)
1316 {
1317   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1318                                   NULL);
1319
1320   loc = get_range_from_loc (line_table, loc).m_start;
1321
1322   return loc;
1323 }
1324
1325 /* Helper class for use within get_substring_ranges_for_loc.
1326    An vec of cpp_string with responsibility for releasing all of the
1327    str->text for each str in the vector.  */
1328
1329 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1330 {
1331  public:
1332   auto_cpp_string_vec (int alloc)
1333     : auto_vec <cpp_string> (alloc) {}
1334
1335   ~auto_cpp_string_vec ()
1336   {
1337     /* Clean up the copies within this vec.  */
1338     int i;
1339     cpp_string *str;
1340     FOR_EACH_VEC_ELT (*this, i, str)
1341       free (const_cast <unsigned char *> (str->text));
1342   }
1343 };
1344
1345 /* Attempt to populate RANGES with source location information on the
1346    individual characters within the string literal found at STRLOC.
1347    If CONCATS is non-NULL, then any string literals that the token at
1348    STRLOC  was concatenated with are also added to RANGES.
1349
1350    Return NULL if successful, or an error message if any errors occurred (in
1351    which case RANGES may be only partially populated and should not
1352    be used).
1353
1354    This is implemented by re-parsing the relevant source line(s).  */
1355
1356 static const char *
1357 get_substring_ranges_for_loc (cpp_reader *pfile,
1358                               string_concat_db *concats,
1359                               location_t strloc,
1360                               enum cpp_ttype type,
1361                               cpp_substring_ranges &ranges)
1362 {
1363   gcc_assert (pfile);
1364
1365   if (strloc == UNKNOWN_LOCATION)
1366     return "unknown location";
1367
1368   /* Reparsing the strings requires accurate location information.
1369      If -ftrack-macro-expansion has been overridden from its default
1370      of 2, then we might have a location of a macro expansion point,
1371      rather than the location of the literal itself.
1372      Avoid this by requiring that we have full macro expansion tracking
1373      for substring locations to be available.  */
1374   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1375     return "track_macro_expansion != 2";
1376
1377   /* If #line or # 44 "file"-style directives are present, then there's
1378      no guarantee that the line numbers we have can be used to locate
1379      the strings.  For example, we might have a .i file with # directives
1380      pointing back to lines within a .c file, but the .c file might
1381      have been edited since the .i file was created.
1382      In such a case, the safest course is to disable on-demand substring
1383      locations.  */
1384   if (line_table->seen_line_directive)
1385     return "seen line directive";
1386
1387   /* If string concatenation has occurred at STRLOC, get the locations
1388      of all of the literal tokens making up the compound string.
1389      Otherwise, just use STRLOC.  */
1390   int num_locs = 1;
1391   location_t *strlocs = &strloc;
1392   if (concats)
1393     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1394
1395   auto_cpp_string_vec strs (num_locs);
1396   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1397   for (int i = 0; i < num_locs; i++)
1398     {
1399       /* Get range of strloc.  We will use it to locate the start and finish
1400          of the literal token within the line.  */
1401       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1402
1403       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1404         /* If the string is within a macro expansion, we can't get at the
1405            end location.  */
1406         return "macro expansion";
1407
1408       if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1409         /* If so, we can't reliably determine where the token started within
1410            its line.  */
1411         return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1412
1413       if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1414         /* If so, we can't reliably determine where the token finished within
1415            its line.  */
1416         return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1417
1418       expanded_location start
1419         = expand_location_to_spelling_point (src_range.m_start);
1420       expanded_location finish
1421         = expand_location_to_spelling_point (src_range.m_finish);
1422       if (start.file != finish.file)
1423         return "range endpoints are in different files";
1424       if (start.line != finish.line)
1425         return "range endpoints are on different lines";
1426       if (start.column > finish.column)
1427         return "range endpoints are reversed";
1428
1429       int line_width;
1430       const char *line = location_get_source_line (start.file, start.line,
1431                                                    &line_width);
1432       if (line == NULL)
1433         return "unable to read source line";
1434
1435       /* Determine the location of the literal (including quotes
1436          and leading prefix chars, such as the 'u' in a u""
1437          token).  */
1438       const char *literal = line + start.column - 1;
1439       int literal_length = finish.column - start.column + 1;
1440
1441       /* Ensure that we don't crash if we got the wrong location.  */
1442       if (line_width < (start.column - 1 + literal_length))
1443         return "line is not wide enough";
1444
1445       cpp_string from;
1446       from.len = literal_length;
1447       /* Make a copy of the literal, to avoid having to rely on
1448          the lifetime of the copy of the line within the cache.
1449          This will be released by the auto_cpp_string_vec dtor.  */
1450       from.text = XDUPVEC (unsigned char, literal, literal_length);
1451       strs.safe_push (from);
1452
1453       /* For very long lines, a new linemap could have started
1454          halfway through the token.
1455          Ensure that the loc_reader uses the linemap of the
1456          *end* of the token for its start location.  */
1457       const line_map_ordinary *final_ord_map;
1458       linemap_resolve_location (line_table, src_range.m_finish,
1459                                 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1460       location_t start_loc
1461         = linemap_position_for_line_and_column (line_table, final_ord_map,
1462                                                 start.line, start.column);
1463
1464       cpp_string_location_reader loc_reader (start_loc, line_table);
1465       loc_readers.safe_push (loc_reader);
1466     }
1467
1468   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1469   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1470                                                  loc_readers.address (),
1471                                                  num_locs, &ranges, type);
1472   if (err)
1473     return err;
1474
1475   /* Success: "ranges" should now contain information on the string.  */
1476   return NULL;
1477 }
1478
1479 /* Attempt to populate *OUT_LOC with source location information on the
1480    given characters within the string literal found at STRLOC.
1481    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1482    character set.
1483
1484    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1485    and string literal "012345\n789"
1486    *OUT_LOC is written to with:
1487      "012345\n789"
1488          ~^~~~~
1489
1490    If CONCATS is non-NULL, then any string literals that the token at
1491    STRLOC was concatenated with are also considered.
1492
1493    This is implemented by re-parsing the relevant source line(s).
1494
1495    Return NULL if successful, or an error message if any errors occurred.
1496    Error messages are intended for GCC developers (to help debugging) rather
1497    than for end-users.  */
1498
1499 const char *
1500 get_source_location_for_substring (cpp_reader *pfile,
1501                                    string_concat_db *concats,
1502                                    location_t strloc,
1503                                    enum cpp_ttype type,
1504                                    int caret_idx, int start_idx, int end_idx,
1505                                    source_location *out_loc)
1506 {
1507   gcc_checking_assert (caret_idx >= 0);
1508   gcc_checking_assert (start_idx >= 0);
1509   gcc_checking_assert (end_idx >= 0);
1510   gcc_assert (out_loc);
1511
1512   cpp_substring_ranges ranges;
1513   const char *err
1514     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1515   if (err)
1516     return err;
1517
1518   if (caret_idx >= ranges.get_num_ranges ())
1519     return "caret_idx out of range";
1520   if (start_idx >= ranges.get_num_ranges ())
1521     return "start_idx out of range";
1522   if (end_idx >= ranges.get_num_ranges ())
1523     return "end_idx out of range";
1524
1525   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1526                             ranges.get_range (start_idx).m_start,
1527                             ranges.get_range (end_idx).m_finish);
1528   return NULL;
1529 }
1530
1531 #if CHECKING_P
1532
1533 namespace selftest {
1534
1535 /* Selftests of location handling.  */
1536
1537 /* Attempt to populate *OUT_RANGE with source location information on the
1538    given character within the string literal found at STRLOC.
1539    CHAR_IDX refers to an offset within the execution character set.
1540    If CONCATS is non-NULL, then any string literals that the token at
1541    STRLOC was concatenated with are also considered.
1542
1543    This is implemented by re-parsing the relevant source line(s).
1544
1545    Return NULL if successful, or an error message if any errors occurred.
1546    Error messages are intended for GCC developers (to help debugging) rather
1547    than for end-users.  */
1548
1549 static const char *
1550 get_source_range_for_char (cpp_reader *pfile,
1551                            string_concat_db *concats,
1552                            location_t strloc,
1553                            enum cpp_ttype type,
1554                            int char_idx,
1555                            source_range *out_range)
1556 {
1557   gcc_checking_assert (char_idx >= 0);
1558   gcc_assert (out_range);
1559
1560   cpp_substring_ranges ranges;
1561   const char *err
1562     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1563   if (err)
1564     return err;
1565
1566   if (char_idx >= ranges.get_num_ranges ())
1567     return "char_idx out of range";
1568
1569   *out_range = ranges.get_range (char_idx);
1570   return NULL;
1571 }
1572
1573 /* As get_source_range_for_char, but write to *OUT the number
1574    of ranges that are available.  */
1575
1576 static const char *
1577 get_num_source_ranges_for_substring (cpp_reader *pfile,
1578                                      string_concat_db *concats,
1579                                      location_t strloc,
1580                                      enum cpp_ttype type,
1581                                      int *out)
1582 {
1583   gcc_assert (out);
1584
1585   cpp_substring_ranges ranges;
1586   const char *err
1587     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1588
1589   if (err)
1590     return err;
1591
1592   *out = ranges.get_num_ranges ();
1593   return NULL;
1594 }
1595
1596 /* Selftests of location handling.  */
1597
1598 /* Verify that compare() on linenum_type handles comparisons over the full
1599    range of the type.  */
1600
1601 static void
1602 test_linenum_comparisons ()
1603 {
1604   linenum_type min_line (0);
1605   linenum_type max_line (0xffffffff);
1606   ASSERT_EQ (0, compare (min_line, min_line));
1607   ASSERT_EQ (0, compare (max_line, max_line));
1608
1609   ASSERT_GT (compare (max_line, min_line), 0);
1610   ASSERT_LT (compare (min_line, max_line), 0);
1611 }
1612
1613 /* Helper function for verifying location data: when location_t
1614    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1615    as having column 0.  */
1616
1617 static bool
1618 should_have_column_data_p (location_t loc)
1619 {
1620   if (IS_ADHOC_LOC (loc))
1621     loc = get_location_from_adhoc_loc (line_table, loc);
1622   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1623     return false;
1624   return true;
1625 }
1626
1627 /* Selftest for should_have_column_data_p.  */
1628
1629 static void
1630 test_should_have_column_data_p ()
1631 {
1632   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1633   ASSERT_TRUE
1634     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1635   ASSERT_FALSE
1636     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1637 }
1638
1639 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1640    on LOC.  */
1641
1642 static void
1643 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1644               location_t loc)
1645 {
1646   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1647   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1648   /* If location_t values are sufficiently high, then column numbers
1649      will be unavailable and LOCATION_COLUMN (loc) will be 0.
1650      When close to the threshold, column numbers *may* be present: if
1651      the final linemap before the threshold contains a line that straddles
1652      the threshold, locations in that line have column information.  */
1653   if (should_have_column_data_p (loc))
1654     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1655 }
1656
1657 /* Various selftests involve constructing a line table and one or more
1658    line maps within it.
1659
1660    For maximum test coverage we want to run these tests with a variety
1661    of situations:
1662    - line_table->default_range_bits: some frontends use a non-zero value
1663    and others use zero
1664    - the fallback modes within line-map.c: there are various threshold
1665    values for source_location/location_t beyond line-map.c changes
1666    behavior (disabling of the range-packing optimization, disabling
1667    of column-tracking).  We can exercise these by starting the line_table
1668    at interesting values at or near these thresholds.
1669
1670    The following struct describes a particular case within our test
1671    matrix.  */
1672
1673 struct line_table_case
1674 {
1675   line_table_case (int default_range_bits, int base_location)
1676   : m_default_range_bits (default_range_bits),
1677     m_base_location (base_location)
1678   {}
1679
1680   int m_default_range_bits;
1681   int m_base_location;
1682 };
1683
1684 /* Constructor.  Store the old value of line_table, and create a new
1685    one, using sane defaults.  */
1686
1687 line_table_test::line_table_test ()
1688 {
1689   gcc_assert (saved_line_table == NULL);
1690   saved_line_table = line_table;
1691   line_table = ggc_alloc<line_maps> ();
1692   linemap_init (line_table, BUILTINS_LOCATION);
1693   gcc_assert (saved_line_table->reallocator);
1694   line_table->reallocator = saved_line_table->reallocator;
1695   gcc_assert (saved_line_table->round_alloc_size);
1696   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1697   line_table->default_range_bits = 0;
1698 }
1699
1700 /* Constructor.  Store the old value of line_table, and create a new
1701    one, using the sitation described in CASE_.  */
1702
1703 line_table_test::line_table_test (const line_table_case &case_)
1704 {
1705   gcc_assert (saved_line_table == NULL);
1706   saved_line_table = line_table;
1707   line_table = ggc_alloc<line_maps> ();
1708   linemap_init (line_table, BUILTINS_LOCATION);
1709   gcc_assert (saved_line_table->reallocator);
1710   line_table->reallocator = saved_line_table->reallocator;
1711   gcc_assert (saved_line_table->round_alloc_size);
1712   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1713   line_table->default_range_bits = case_.m_default_range_bits;
1714   if (case_.m_base_location)
1715     {
1716       line_table->highest_location = case_.m_base_location;
1717       line_table->highest_line = case_.m_base_location;
1718     }
1719 }
1720
1721 /* Destructor.  Restore the old value of line_table.  */
1722
1723 line_table_test::~line_table_test ()
1724 {
1725   gcc_assert (saved_line_table != NULL);
1726   line_table = saved_line_table;
1727   saved_line_table = NULL;
1728 }
1729
1730 /* Verify basic operation of ordinary linemaps.  */
1731
1732 static void
1733 test_accessing_ordinary_linemaps (const line_table_case &case_)
1734 {
1735   line_table_test ltt (case_);
1736
1737   /* Build a simple linemap describing some locations. */
1738   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1739
1740   linemap_line_start (line_table, 1, 100);
1741   location_t loc_a = linemap_position_for_column (line_table, 1);
1742   location_t loc_b = linemap_position_for_column (line_table, 23);
1743
1744   linemap_line_start (line_table, 2, 100);
1745   location_t loc_c = linemap_position_for_column (line_table, 1);
1746   location_t loc_d = linemap_position_for_column (line_table, 17);
1747
1748   /* Example of a very long line.  */
1749   linemap_line_start (line_table, 3, 2000);
1750   location_t loc_e = linemap_position_for_column (line_table, 700);
1751
1752   /* Transitioning back to a short line.  */
1753   linemap_line_start (line_table, 4, 0);
1754   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1755
1756   if (should_have_column_data_p (loc_back_to_short))
1757     {
1758       /* Verify that we switched to short lines in the linemap.  */
1759       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1760       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1761     }
1762
1763   /* Example of a line that will eventually be seen to be longer
1764      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1765      below that.  */
1766   linemap_line_start (line_table, 5, 2000);
1767
1768   location_t loc_start_of_very_long_line
1769     = linemap_position_for_column (line_table, 2000);
1770   location_t loc_too_wide
1771     = linemap_position_for_column (line_table, 4097);
1772   location_t loc_too_wide_2
1773     = linemap_position_for_column (line_table, 4098);
1774
1775   /* ...and back to a sane line length.  */
1776   linemap_line_start (line_table, 6, 100);
1777   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1778
1779   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1780
1781   /* Multiple files.  */
1782   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1783   linemap_line_start (line_table, 1, 200);
1784   location_t loc_f = linemap_position_for_column (line_table, 150);
1785   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1786
1787   /* Verify that we can recover the location info.  */
1788   assert_loceq ("foo.c", 1, 1, loc_a);
1789   assert_loceq ("foo.c", 1, 23, loc_b);
1790   assert_loceq ("foo.c", 2, 1, loc_c);
1791   assert_loceq ("foo.c", 2, 17, loc_d);
1792   assert_loceq ("foo.c", 3, 700, loc_e);
1793   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1794
1795   /* In the very wide line, the initial location should be fully tracked.  */
1796   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1797   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1798      be disabled.  */
1799   assert_loceq ("foo.c", 5, 0, loc_too_wide);
1800   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1801   /*...and column-tracking should be re-enabled for subsequent lines.  */
1802   assert_loceq ("foo.c", 6, 10, loc_sane_again);
1803
1804   assert_loceq ("bar.c", 1, 150, loc_f);
1805
1806   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1807   ASSERT_TRUE (pure_location_p (line_table, loc_a));
1808
1809   /* Verify using make_location to build a range, and extracting data
1810      back from it.  */
1811   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1812   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1813   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1814   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1815   ASSERT_EQ (loc_b, src_range.m_start);
1816   ASSERT_EQ (loc_d, src_range.m_finish);
1817 }
1818
1819 /* Verify various properties of UNKNOWN_LOCATION.  */
1820
1821 static void
1822 test_unknown_location ()
1823 {
1824   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1825   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1826   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1827 }
1828
1829 /* Verify various properties of BUILTINS_LOCATION.  */
1830
1831 static void
1832 test_builtins ()
1833 {
1834   assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1835   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1836 }
1837
1838 /* Regression test for make_location.
1839    Ensure that we use pure locations for the start/finish of the range,
1840    rather than storing a packed or ad-hoc range as the start/finish.  */
1841
1842 static void
1843 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1844 {
1845   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1846      with C++ frontend.
1847      ....................0000000001111111111222.
1848      ....................1234567890123456789012.  */
1849   const char *content = "     r += !aaa == bbb;\n";
1850   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1851   line_table_test ltt (case_);
1852   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1853
1854   const location_t c11 = linemap_position_for_column (line_table, 11);
1855   const location_t c12 = linemap_position_for_column (line_table, 12);
1856   const location_t c13 = linemap_position_for_column (line_table, 13);
1857   const location_t c14 = linemap_position_for_column (line_table, 14);
1858   const location_t c21 = linemap_position_for_column (line_table, 21);
1859
1860   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1861     return;
1862
1863   /* Use column 13 for the caret location, arbitrarily, to verify that we
1864      handle start != caret.  */
1865   const location_t aaa = make_location (c13, c12, c14);
1866   ASSERT_EQ (c13, get_pure_location (aaa));
1867   ASSERT_EQ (c12, get_start (aaa));
1868   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1869   ASSERT_EQ (c14, get_finish (aaa));
1870   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1871
1872   /* Make a location using a location with a range as the start-point.  */
1873   const location_t not_aaa = make_location (c11, aaa, c14);
1874   ASSERT_EQ (c11, get_pure_location (not_aaa));
1875   /* It should use the start location of the range, not store the range
1876      itself.  */
1877   ASSERT_EQ (c12, get_start (not_aaa));
1878   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1879   ASSERT_EQ (c14, get_finish (not_aaa));
1880   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1881
1882   /* Similarly, make a location with a range as the end-point.  */
1883   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1884   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1885   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1886   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1887   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1888   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1889   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1890   /* It should use the finish location of the range, not store the range
1891      itself.  */
1892   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1893   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1894   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1895   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1896   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1897 }
1898
1899 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
1900
1901 static void
1902 test_reading_source_line ()
1903 {
1904   /* Create a tempfile and write some text to it.  */
1905   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1906                         "01234567890123456789\n"
1907                         "This is the test text\n"
1908                         "This is the 3rd line");
1909
1910   /* Read back a specific line from the tempfile.  */
1911   int line_size;
1912   const char *source_line = location_get_source_line (tmp.get_filename (),
1913                                                       3, &line_size);
1914   ASSERT_TRUE (source_line != NULL);
1915   ASSERT_EQ (20, line_size);
1916   ASSERT_TRUE (!strncmp ("This is the 3rd line",
1917                          source_line, line_size));
1918
1919   source_line = location_get_source_line (tmp.get_filename (),
1920                                           2, &line_size);
1921   ASSERT_TRUE (source_line != NULL);
1922   ASSERT_EQ (21, line_size);
1923   ASSERT_TRUE (!strncmp ("This is the test text",
1924                          source_line, line_size));
1925
1926   source_line = location_get_source_line (tmp.get_filename (),
1927                                           4, &line_size);
1928   ASSERT_TRUE (source_line == NULL);
1929 }
1930
1931 /* Tests of lexing.  */
1932
1933 /* Verify that token TOK from PARSER has cpp_token_as_text
1934    equal to EXPECTED_TEXT.  */
1935
1936 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
1937   SELFTEST_BEGIN_STMT                                                   \
1938     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
1939     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
1940   SELFTEST_END_STMT
1941
1942 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1943    and ranges from EXP_START_COL to EXP_FINISH_COL.
1944    Use LOC as the effective location of the selftest.  */
1945
1946 static void
1947 assert_token_loc_eq (const location &loc,
1948                      const cpp_token *tok,
1949                      const char *exp_filename, int exp_linenum,
1950                      int exp_start_col, int exp_finish_col)
1951 {
1952   location_t tok_loc = tok->src_loc;
1953   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1954   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1955
1956   /* If location_t values are sufficiently high, then column numbers
1957      will be unavailable.  */
1958   if (!should_have_column_data_p (tok_loc))
1959     return;
1960
1961   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1962   source_range tok_range = get_range_from_loc (line_table, tok_loc);
1963   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1964   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1965 }
1966
1967 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1968    SELFTEST_LOCATION as the effective location of the selftest.  */
1969
1970 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1971                             EXP_START_COL, EXP_FINISH_COL) \
1972   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1973                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1974
1975 /* Test of lexing a file using libcpp, verifying tokens and their
1976    location information.  */
1977
1978 static void
1979 test_lexer (const line_table_case &case_)
1980 {
1981   /* Create a tempfile and write some text to it.  */
1982   const char *content =
1983     /*00000000011111111112222222222333333.3333444444444.455555555556
1984       12345678901234567890123456789012345.6789012345678.901234567890.  */
1985     ("test_name /* c-style comment */\n"
1986      "                                  \"test literal\"\n"
1987      " // test c++-style comment\n"
1988      "   42\n");
1989   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1990
1991   line_table_test ltt (case_);
1992
1993   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1994
1995   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1996   ASSERT_NE (fname, NULL);
1997
1998   /* Verify that we get the expected tokens back, with the correct
1999      location information.  */
2000
2001   location_t loc;
2002   const cpp_token *tok;
2003   tok = cpp_get_token_with_location (parser, &loc);
2004   ASSERT_NE (tok, NULL);
2005   ASSERT_EQ (tok->type, CPP_NAME);
2006   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2007   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
2008
2009   tok = cpp_get_token_with_location (parser, &loc);
2010   ASSERT_NE (tok, NULL);
2011   ASSERT_EQ (tok->type, CPP_STRING);
2012   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2013   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
2014
2015   tok = cpp_get_token_with_location (parser, &loc);
2016   ASSERT_NE (tok, NULL);
2017   ASSERT_EQ (tok->type, CPP_NUMBER);
2018   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2019   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2020
2021   tok = cpp_get_token_with_location (parser, &loc);
2022   ASSERT_NE (tok, NULL);
2023   ASSERT_EQ (tok->type, CPP_EOF);
2024
2025   cpp_finish (parser, NULL);
2026   cpp_destroy (parser);
2027 }
2028
2029 /* Forward decls.  */
2030
2031 struct lexer_test;
2032 class lexer_test_options;
2033
2034 /* A class for specifying options of a lexer_test.
2035    The "apply" vfunc is called during the lexer_test constructor.  */
2036
2037 class lexer_test_options
2038 {
2039  public:
2040   virtual void apply (lexer_test &) = 0;
2041 };
2042
2043 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2044    in its dtor.
2045
2046    This is needed by struct lexer_test to ensure that the cleanup of the
2047    cpp_reader happens *after* the cleanup of the temp_source_file.  */
2048
2049 class cpp_reader_ptr
2050 {
2051  public:
2052   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2053
2054   ~cpp_reader_ptr ()
2055   {
2056     cpp_finish (m_ptr, NULL);
2057     cpp_destroy (m_ptr);
2058   }
2059
2060   operator cpp_reader * () const { return m_ptr; }
2061
2062  private:
2063   cpp_reader *m_ptr;
2064 };
2065
2066 /* A struct for writing lexer tests.  */
2067
2068 struct lexer_test
2069 {
2070   lexer_test (const line_table_case &case_, const char *content,
2071               lexer_test_options *options);
2072   ~lexer_test ();
2073
2074   const cpp_token *get_token ();
2075
2076   /* The ordering of these fields matters.
2077      The line_table_test must be first, since the cpp_reader_ptr
2078      uses it.
2079      The cpp_reader must be cleaned up *after* the temp_source_file
2080      since the filenames in input.c's input cache are owned by the
2081      cpp_reader; in particular, when ~temp_source_file evicts the
2082      filename the filenames must still be alive.  */
2083   line_table_test m_ltt;
2084   cpp_reader_ptr m_parser;
2085   temp_source_file m_tempfile;
2086   string_concat_db m_concats;
2087   bool m_implicitly_expect_EOF;
2088 };
2089
2090 /* Use an EBCDIC encoding for the execution charset, specifically
2091    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2092
2093    This exercises iconv integration within libcpp.
2094    Not every build of iconv supports the given charset,
2095    so we need to flag this error and handle it gracefully.  */
2096
2097 class ebcdic_execution_charset : public lexer_test_options
2098 {
2099  public:
2100   ebcdic_execution_charset () : m_num_iconv_errors (0)
2101     {
2102       gcc_assert (s_singleton == NULL);
2103       s_singleton = this;
2104     }
2105   ~ebcdic_execution_charset ()
2106     {
2107       gcc_assert (s_singleton == this);
2108       s_singleton = NULL;
2109     }
2110
2111   void apply (lexer_test &test) FINAL OVERRIDE
2112   {
2113     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2114     cpp_opts->narrow_charset = "IBM1047";
2115
2116     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2117     callbacks->error = on_error;
2118   }
2119
2120   static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2121                         int level ATTRIBUTE_UNUSED,
2122                         int reason ATTRIBUTE_UNUSED,
2123                         rich_location *richloc ATTRIBUTE_UNUSED,
2124                         const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2125     ATTRIBUTE_FPTR_PRINTF(5,0)
2126   {
2127     gcc_assert (s_singleton);
2128     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2129     const char *msg = "conversion from %s to %s not supported by iconv";
2130 #ifdef ENABLE_NLS
2131     msg = dgettext ("cpplib", msg);
2132 #endif
2133     /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2134        when the local iconv build doesn't support the conversion.  */
2135     if (strcmp (msgid, msg) == 0)
2136       {
2137         s_singleton->m_num_iconv_errors++;
2138         return true;
2139       }
2140
2141     /* Otherwise, we have an unexpected error.  */
2142     abort ();
2143   }
2144
2145   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2146
2147  private:
2148   static ebcdic_execution_charset *s_singleton;
2149   int m_num_iconv_errors;
2150 };
2151
2152 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2153
2154 /* A lexer_test_options subclass that records a list of error
2155    messages emitted by the lexer.  */
2156
2157 class lexer_error_sink : public lexer_test_options
2158 {
2159  public:
2160   lexer_error_sink ()
2161   {
2162     gcc_assert (s_singleton == NULL);
2163     s_singleton = this;
2164   }
2165   ~lexer_error_sink ()
2166   {
2167     gcc_assert (s_singleton == this);
2168     s_singleton = NULL;
2169
2170     int i;
2171     char *str;
2172     FOR_EACH_VEC_ELT (m_errors, i, str)
2173       free (str);
2174   }
2175
2176   void apply (lexer_test &test) FINAL OVERRIDE
2177   {
2178     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2179     callbacks->error = on_error;
2180   }
2181
2182   static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2183                         int level ATTRIBUTE_UNUSED,
2184                         int reason ATTRIBUTE_UNUSED,
2185                         rich_location *richloc ATTRIBUTE_UNUSED,
2186                         const char *msgid, va_list *ap)
2187     ATTRIBUTE_FPTR_PRINTF(5,0)
2188   {
2189     char *msg = xvasprintf (msgid, *ap);
2190     s_singleton->m_errors.safe_push (msg);
2191     return true;
2192   }
2193
2194   auto_vec<char *> m_errors;
2195
2196  private:
2197   static lexer_error_sink *s_singleton;
2198 };
2199
2200 lexer_error_sink *lexer_error_sink::s_singleton;
2201
2202 /* Constructor.  Override line_table with a new instance based on CASE_,
2203    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2204    start parsing the tempfile.  */
2205
2206 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2207                         lexer_test_options *options)
2208 : m_ltt (case_),
2209   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2210   /* Create a tempfile and write the text to it.  */
2211   m_tempfile (SELFTEST_LOCATION, ".c", content),
2212   m_concats (),
2213   m_implicitly_expect_EOF (true)
2214 {
2215   if (options)
2216     options->apply (*this);
2217
2218   cpp_init_iconv (m_parser);
2219
2220   /* Parse the file.  */
2221   const char *fname = cpp_read_main_file (m_parser,
2222                                           m_tempfile.get_filename ());
2223   ASSERT_NE (fname, NULL);
2224 }
2225
2226 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2227
2228 lexer_test::~lexer_test ()
2229 {
2230   location_t loc;
2231   const cpp_token *tok;
2232
2233   if (m_implicitly_expect_EOF)
2234     {
2235       tok = cpp_get_token_with_location (m_parser, &loc);
2236       ASSERT_NE (tok, NULL);
2237       ASSERT_EQ (tok->type, CPP_EOF);
2238     }
2239 }
2240
2241 /* Get the next token from m_parser.  */
2242
2243 const cpp_token *
2244 lexer_test::get_token ()
2245 {
2246   location_t loc;
2247   const cpp_token *tok;
2248
2249   tok = cpp_get_token_with_location (m_parser, &loc);
2250   ASSERT_NE (tok, NULL);
2251   return tok;
2252 }
2253
2254 /* Verify that locations within string literals are correctly handled.  */
2255
2256 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2257    using the string concatenation database for TEST.
2258
2259    Assert that the character at index IDX is on EXPECTED_LINE,
2260    and that it begins at column EXPECTED_START_COL and ends at
2261    EXPECTED_FINISH_COL (unless the locations are beyond
2262    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2263    columns).  */
2264
2265 static void
2266 assert_char_at_range (const location &loc,
2267                       lexer_test& test,
2268                       location_t strloc, enum cpp_ttype type, int idx,
2269                       int expected_line, int expected_start_col,
2270                       int expected_finish_col)
2271 {
2272   cpp_reader *pfile = test.m_parser;
2273   string_concat_db *concats = &test.m_concats;
2274
2275   source_range actual_range = source_range();
2276   const char *err
2277     = get_source_range_for_char (pfile, concats, strloc, type, idx,
2278                                  &actual_range);
2279   if (should_have_column_data_p (strloc))
2280     ASSERT_EQ_AT (loc, NULL, err);
2281   else
2282     {
2283       ASSERT_STREQ_AT (loc,
2284                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2285                        err);
2286       return;
2287     }
2288
2289   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2290   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2291   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2292   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2293
2294   if (should_have_column_data_p (actual_range.m_start))
2295     {
2296       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2297       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2298     }
2299   if (should_have_column_data_p (actual_range.m_finish))
2300     {
2301       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2302       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2303     }
2304 }
2305
2306 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2307    the effective location of any errors.  */
2308
2309 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2310                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2311   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2312                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2313                         (EXPECTED_FINISH_COL))
2314
2315 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2316    using the string concatenation database for TEST.
2317
2318    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2319
2320 static void
2321 assert_num_substring_ranges (const location &loc,
2322                              lexer_test& test,
2323                              location_t strloc,
2324                              enum cpp_ttype type,
2325                              int expected_num_ranges)
2326 {
2327   cpp_reader *pfile = test.m_parser;
2328   string_concat_db *concats = &test.m_concats;
2329
2330   int actual_num_ranges = -1;
2331   const char *err
2332     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2333                                            &actual_num_ranges);
2334   if (should_have_column_data_p (strloc))
2335     ASSERT_EQ_AT (loc, NULL, err);
2336   else
2337     {
2338       ASSERT_STREQ_AT (loc,
2339                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2340                        err);
2341       return;
2342     }
2343   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2344 }
2345
2346 /* Macro for calling assert_num_substring_ranges, supplying
2347    SELFTEST_LOCATION for the effective location of any errors.  */
2348
2349 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2350                                     EXPECTED_NUM_RANGES)                \
2351   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2352                                (TYPE), (EXPECTED_NUM_RANGES))
2353
2354
2355 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2356    returns an error (using the string concatenation database for TEST).  */
2357
2358 static void
2359 assert_has_no_substring_ranges (const location &loc,
2360                                 lexer_test& test,
2361                                 location_t strloc,
2362                                 enum cpp_ttype type,
2363                                 const char *expected_err)
2364 {
2365   cpp_reader *pfile = test.m_parser;
2366   string_concat_db *concats = &test.m_concats;
2367   cpp_substring_ranges ranges;
2368   const char *actual_err
2369     = get_substring_ranges_for_loc (pfile, concats, strloc,
2370                                     type, ranges);
2371   if (should_have_column_data_p (strloc))
2372     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2373   else
2374     ASSERT_STREQ_AT (loc,
2375                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2376                      actual_err);
2377 }
2378
2379 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2380     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2381                                     (STRLOC), (TYPE), (ERR))
2382
2383 /* Lex a simple string literal.  Verify the substring location data, before
2384    and after running cpp_interpret_string on it.  */
2385
2386 static void
2387 test_lexer_string_locations_simple (const line_table_case &case_)
2388 {
2389   /* Digits 0-9 (with 0 at column 10), the simple way.
2390      ....................000000000.11111111112.2222222223333333333
2391      ....................123456789.01234567890.1234567890123456789
2392      We add a trailing comment to ensure that we correctly locate
2393      the end of the string literal token.  */
2394   const char *content = "        \"0123456789\" /* not a string */\n";
2395   lexer_test test (case_, content, NULL);
2396
2397   /* Verify that we get the expected token back, with the correct
2398      location information.  */
2399   const cpp_token *tok = test.get_token ();
2400   ASSERT_EQ (tok->type, CPP_STRING);
2401   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2402   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2403
2404   /* At this point in lexing, the quote characters are treated as part of
2405      the string (they are stripped off by cpp_interpret_string).  */
2406
2407   ASSERT_EQ (tok->val.str.len, 12);
2408
2409   /* Verify that cpp_interpret_string works.  */
2410   cpp_string dst_string;
2411   const enum cpp_ttype type = CPP_STRING;
2412   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2413                                       &dst_string, type);
2414   ASSERT_TRUE (result);
2415   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2416   free (const_cast <unsigned char *> (dst_string.text));
2417
2418   /* Verify ranges of individual characters.  This no longer includes the
2419      opening quote, but does include the closing quote.  */
2420   for (int i = 0; i <= 10; i++)
2421     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2422                           10 + i, 10 + i);
2423
2424   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2425 }
2426
2427 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2428    encoding.  */
2429
2430 static void
2431 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2432 {
2433   /* EBCDIC support requires iconv.  */
2434   if (!HAVE_ICONV)
2435     return;
2436
2437   /* Digits 0-9 (with 0 at column 10), the simple way.
2438      ....................000000000.11111111112.2222222223333333333
2439      ....................123456789.01234567890.1234567890123456789
2440      We add a trailing comment to ensure that we correctly locate
2441      the end of the string literal token.  */
2442   const char *content = "        \"0123456789\" /* not a string */\n";
2443   ebcdic_execution_charset use_ebcdic;
2444   lexer_test test (case_, content, &use_ebcdic);
2445
2446   /* Verify that we get the expected token back, with the correct
2447      location information.  */
2448   const cpp_token *tok = test.get_token ();
2449   ASSERT_EQ (tok->type, CPP_STRING);
2450   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2451   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2452
2453   /* At this point in lexing, the quote characters are treated as part of
2454      the string (they are stripped off by cpp_interpret_string).  */
2455
2456   ASSERT_EQ (tok->val.str.len, 12);
2457
2458   /* The remainder of the test requires an iconv implementation that
2459      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2460   if (use_ebcdic.iconv_errors_occurred_p ())
2461     return;
2462
2463   /* Verify that cpp_interpret_string works.  */
2464   cpp_string dst_string;
2465   const enum cpp_ttype type = CPP_STRING;
2466   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2467                                       &dst_string, type);
2468   ASSERT_TRUE (result);
2469   /* We should now have EBCDIC-encoded text, specifically
2470      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2471      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2472   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2473                 (const char *)dst_string.text);
2474   free (const_cast <unsigned char *> (dst_string.text));
2475
2476   /* Verify that we don't attempt to record substring location information
2477      for such cases.  */
2478   ASSERT_HAS_NO_SUBSTRING_RANGES
2479     (test, tok->src_loc, type,
2480      "execution character set != source character set");
2481 }
2482
2483 /* Lex a string literal containing a hex-escaped character.
2484    Verify the substring location data, before and after running
2485    cpp_interpret_string on it.  */
2486
2487 static void
2488 test_lexer_string_locations_hex (const line_table_case &case_)
2489 {
2490   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2491      and with a space in place of digit 6, to terminate the escaped
2492      hex code.
2493      ....................000000000.111111.11112222.
2494      ....................123456789.012345.67890123.  */
2495   const char *content = "        \"01234\\x35 789\"\n";
2496   lexer_test test (case_, content, NULL);
2497
2498   /* Verify that we get the expected token back, with the correct
2499      location information.  */
2500   const cpp_token *tok = test.get_token ();
2501   ASSERT_EQ (tok->type, CPP_STRING);
2502   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2503   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2504
2505   /* At this point in lexing, the quote characters are treated as part of
2506      the string (they are stripped off by cpp_interpret_string).  */
2507   ASSERT_EQ (tok->val.str.len, 15);
2508
2509   /* Verify that cpp_interpret_string works.  */
2510   cpp_string dst_string;
2511   const enum cpp_ttype type = CPP_STRING;
2512   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2513                                       &dst_string, type);
2514   ASSERT_TRUE (result);
2515   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2516   free (const_cast <unsigned char *> (dst_string.text));
2517
2518   /* Verify ranges of individual characters.  This no longer includes the
2519      opening quote, but does include the closing quote.  */
2520   for (int i = 0; i <= 4; i++)
2521     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2522   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2523   for (int i = 6; i <= 10; i++)
2524     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2525
2526   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2527 }
2528
2529 /* Lex a string literal containing an octal-escaped character.
2530    Verify the substring location data after running cpp_interpret_string
2531    on it.  */
2532
2533 static void
2534 test_lexer_string_locations_oct (const line_table_case &case_)
2535 {
2536   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2537      and with a space in place of digit 6, to terminate the escaped
2538      octal code.
2539      ....................000000000.111111.11112222.2222223333333333444
2540      ....................123456789.012345.67890123.4567890123456789012  */
2541   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2542   lexer_test test (case_, content, NULL);
2543
2544   /* Verify that we get the expected token back, with the correct
2545      location information.  */
2546   const cpp_token *tok = test.get_token ();
2547   ASSERT_EQ (tok->type, CPP_STRING);
2548   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2549
2550   /* Verify that cpp_interpret_string works.  */
2551   cpp_string dst_string;
2552   const enum cpp_ttype type = CPP_STRING;
2553   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2554                                       &dst_string, type);
2555   ASSERT_TRUE (result);
2556   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2557   free (const_cast <unsigned char *> (dst_string.text));
2558
2559   /* Verify ranges of individual characters.  This no longer includes the
2560      opening quote, but does include the closing quote.  */
2561   for (int i = 0; i < 5; i++)
2562     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2563   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2564   for (int i = 6; i <= 10; i++)
2565     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2566
2567   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2568 }
2569
2570 /* Test of string literal containing letter escapes.  */
2571
2572 static void
2573 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2574 {
2575   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2576      .....................000000000.1.11111.1.1.11222.22222223333333
2577      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2578   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2579   lexer_test test (case_, content, NULL);
2580
2581   /* Verify that we get the expected tokens back.  */
2582   const cpp_token *tok = test.get_token ();
2583   ASSERT_EQ (tok->type, CPP_STRING);
2584   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2585
2586   /* Verify ranges of individual characters. */
2587   /* "\t".  */
2588   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2589                         0, 1, 10, 11);
2590   /* "foo". */
2591   for (int i = 1; i <= 3; i++)
2592     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2593                           i, 1, 11 + i, 11 + i);
2594   /* "\\" and "\n".  */
2595   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2596                         4, 1, 15, 16);
2597   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2598                         5, 1, 17, 18);
2599
2600   /* "bar" and closing quote for nul-terminator.  */
2601   for (int i = 6; i <= 9; i++)
2602     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2603                           i, 1, 13 + i, 13 + i);
2604
2605   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2606 }
2607
2608 /* Another test of a string literal containing a letter escape.
2609    Based on string seen in
2610      printf ("%-%\n");
2611    in gcc.dg/format/c90-printf-1.c.  */
2612
2613 static void
2614 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2615 {
2616   /* .....................000000000.1111.11.1111.22222222223.
2617      .....................123456789.0123.45.6789.01234567890.  */
2618   const char *content = ("        \"%-%\\n\" /* non-str */\n");
2619   lexer_test test (case_, content, NULL);
2620
2621   /* Verify that we get the expected tokens back.  */
2622   const cpp_token *tok = test.get_token ();
2623   ASSERT_EQ (tok->type, CPP_STRING);
2624   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2625
2626   /* Verify ranges of individual characters. */
2627   /* "%-%".  */
2628   for (int i = 0; i < 3; i++)
2629     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2630                           i, 1, 10 + i, 10 + i);
2631   /* "\n".  */
2632   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2633                         3, 1, 13, 14);
2634
2635   /* Closing quote for nul-terminator.  */
2636   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2637                         4, 1, 15, 15);
2638
2639   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2640 }
2641
2642 /* Lex a string literal containing UCN 4 characters.
2643    Verify the substring location data after running cpp_interpret_string
2644    on it.  */
2645
2646 static void
2647 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2648 {
2649   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2650      as UCN 4.
2651      ....................000000000.111111.111122.222222223.33333333344444
2652      ....................123456789.012345.678901.234567890.12345678901234  */
2653   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
2654   lexer_test test (case_, content, NULL);
2655
2656   /* Verify that we get the expected token back, with the correct
2657      location information.  */
2658   const cpp_token *tok = test.get_token ();
2659   ASSERT_EQ (tok->type, CPP_STRING);
2660   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2661
2662   /* Verify that cpp_interpret_string works.
2663      The string should be encoded in the execution character
2664      set.  Assuming that that is UTF-8, we should have the following:
2665      -----------  ----  -----  -------  ----------------
2666      Byte offset  Byte  Octal  Unicode  Source Column(s)
2667      -----------  ----  -----  -------  ----------------
2668      0            0x30         '0'      10
2669      1            0x31         '1'      11
2670      2            0x32         '2'      12
2671      3            0x33         '3'      13
2672      4            0x34         '4'      14
2673      5            0xE2  \342   U+2174   15-20
2674      6            0x85  \205    (cont)  15-20
2675      7            0xB4  \264    (cont)  15-20
2676      8            0xE2  \342   U+2175   21-26
2677      9            0x85  \205    (cont)  21-26
2678      10           0xB5  \265    (cont)  21-26
2679      11           0x37         '7'      27
2680      12           0x38         '8'      28
2681      13           0x39         '9'      29
2682      14           0x00                  30 (closing quote)
2683      -----------  ----  -----  -------  ---------------.  */
2684
2685   cpp_string dst_string;
2686   const enum cpp_ttype type = CPP_STRING;
2687   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2688                                       &dst_string, type);
2689   ASSERT_TRUE (result);
2690   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2691                 (const char *)dst_string.text);
2692   free (const_cast <unsigned char *> (dst_string.text));
2693
2694   /* Verify ranges of individual characters.  This no longer includes the
2695      opening quote, but does include the closing quote.
2696      '01234'.  */
2697   for (int i = 0; i <= 4; i++)
2698     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2699   /* U+2174.  */
2700   for (int i = 5; i <= 7; i++)
2701     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2702   /* U+2175.  */
2703   for (int i = 8; i <= 10; i++)
2704     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2705   /* '789' and nul terminator  */
2706   for (int i = 11; i <= 14; i++)
2707     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2708
2709   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2710 }
2711
2712 /* Lex a string literal containing UCN 8 characters.
2713    Verify the substring location data after running cpp_interpret_string
2714    on it.  */
2715
2716 static void
2717 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2718 {
2719   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2720      ....................000000000.111111.1111222222.2222333333333.344444
2721      ....................123456789.012345.6789012345.6789012345678.901234  */
2722   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
2723   lexer_test test (case_, content, NULL);
2724
2725   /* Verify that we get the expected token back, with the correct
2726      location information.  */
2727   const cpp_token *tok = test.get_token ();
2728   ASSERT_EQ (tok->type, CPP_STRING);
2729   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2730                            "\"01234\\U00002174\\U00002175789\"");
2731
2732   /* Verify that cpp_interpret_string works.
2733      The UTF-8 encoding of the string is identical to that from
2734      the ucn4 testcase above; the only difference is the column
2735      locations.  */
2736   cpp_string dst_string;
2737   const enum cpp_ttype type = CPP_STRING;
2738   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2739                                       &dst_string, type);
2740   ASSERT_TRUE (result);
2741   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2742                 (const char *)dst_string.text);
2743   free (const_cast <unsigned char *> (dst_string.text));
2744
2745   /* Verify ranges of individual characters.  This no longer includes the
2746      opening quote, but does include the closing quote.
2747      '01234'.  */
2748   for (int i = 0; i <= 4; i++)
2749     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2750   /* U+2174.  */
2751   for (int i = 5; i <= 7; i++)
2752     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2753   /* U+2175.  */
2754   for (int i = 8; i <= 10; i++)
2755     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2756   /* '789' at columns 35-37  */
2757   for (int i = 11; i <= 13; i++)
2758     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2759   /* Closing quote/nul-terminator at column 38.  */
2760   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2761
2762   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2763 }
2764
2765 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
2766
2767 static uint32_t
2768 uint32_from_big_endian (const uint32_t *ptr_be_value)
2769 {
2770   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2771   return (((uint32_t) buf[0] << 24)
2772           | ((uint32_t) buf[1] << 16)
2773           | ((uint32_t) buf[2] << 8)
2774           | (uint32_t) buf[3]);
2775 }
2776
2777 /* Lex a wide string literal and verify that attempts to read substring
2778    location data from it fail gracefully.  */
2779
2780 static void
2781 test_lexer_string_locations_wide_string (const line_table_case &case_)
2782 {
2783   /* Digits 0-9.
2784      ....................000000000.11111111112.22222222233333
2785      ....................123456789.01234567890.12345678901234  */
2786   const char *content = "       L\"0123456789\" /* non-str */\n";
2787   lexer_test test (case_, content, NULL);
2788
2789   /* Verify that we get the expected token back, with the correct
2790      location information.  */
2791   const cpp_token *tok = test.get_token ();
2792   ASSERT_EQ (tok->type, CPP_WSTRING);
2793   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2794
2795   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
2796   cpp_string dst_string;
2797   const enum cpp_ttype type = CPP_WSTRING;
2798   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2799                                       &dst_string, type);
2800   ASSERT_TRUE (result);
2801   /* The cpp_reader defaults to big-endian with
2802      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2803      now be encoded as UTF-32BE.  */
2804   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2805   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2806   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2807   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2808   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2809   free (const_cast <unsigned char *> (dst_string.text));
2810
2811   /* We don't yet support generating substring location information
2812      for L"" strings.  */
2813   ASSERT_HAS_NO_SUBSTRING_RANGES
2814     (test, tok->src_loc, type,
2815      "execution character set != source character set");
2816 }
2817
2818 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
2819
2820 static uint16_t
2821 uint16_from_big_endian (const uint16_t *ptr_be_value)
2822 {
2823   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2824   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2825 }
2826
2827 /* Lex a u"" string literal and verify that attempts to read substring
2828    location data from it fail gracefully.  */
2829
2830 static void
2831 test_lexer_string_locations_string16 (const line_table_case &case_)
2832 {
2833   /* Digits 0-9.
2834      ....................000000000.11111111112.22222222233333
2835      ....................123456789.01234567890.12345678901234  */
2836   const char *content = "       u\"0123456789\" /* non-str */\n";
2837   lexer_test test (case_, content, NULL);
2838
2839   /* Verify that we get the expected token back, with the correct
2840      location information.  */
2841   const cpp_token *tok = test.get_token ();
2842   ASSERT_EQ (tok->type, CPP_STRING16);
2843   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2844
2845   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
2846   cpp_string dst_string;
2847   const enum cpp_ttype type = CPP_STRING16;
2848   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2849                                       &dst_string, type);
2850   ASSERT_TRUE (result);
2851
2852   /* The cpp_reader defaults to big-endian, so dst_string should
2853      now be encoded as UTF-16BE.  */
2854   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2855   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2856   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2857   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2858   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2859   free (const_cast <unsigned char *> (dst_string.text));
2860
2861   /* We don't yet support generating substring location information
2862      for L"" strings.  */
2863   ASSERT_HAS_NO_SUBSTRING_RANGES
2864     (test, tok->src_loc, type,
2865      "execution character set != source character set");
2866 }
2867
2868 /* Lex a U"" string literal and verify that attempts to read substring
2869    location data from it fail gracefully.  */
2870
2871 static void
2872 test_lexer_string_locations_string32 (const line_table_case &case_)
2873 {
2874   /* Digits 0-9.
2875      ....................000000000.11111111112.22222222233333
2876      ....................123456789.01234567890.12345678901234  */
2877   const char *content = "       U\"0123456789\" /* non-str */\n";
2878   lexer_test test (case_, content, NULL);
2879
2880   /* Verify that we get the expected token back, with the correct
2881      location information.  */
2882   const cpp_token *tok = test.get_token ();
2883   ASSERT_EQ (tok->type, CPP_STRING32);
2884   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2885
2886   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
2887   cpp_string dst_string;
2888   const enum cpp_ttype type = CPP_STRING32;
2889   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2890                                       &dst_string, type);
2891   ASSERT_TRUE (result);
2892
2893   /* The cpp_reader defaults to big-endian, so dst_string should
2894      now be encoded as UTF-32BE.  */
2895   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2896   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2897   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2898   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2899   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2900   free (const_cast <unsigned char *> (dst_string.text));
2901
2902   /* We don't yet support generating substring location information
2903      for L"" strings.  */
2904   ASSERT_HAS_NO_SUBSTRING_RANGES
2905     (test, tok->src_loc, type,
2906      "execution character set != source character set");
2907 }
2908
2909 /* Lex a u8-string literal.
2910    Verify the substring location data after running cpp_interpret_string
2911    on it.  */
2912
2913 static void
2914 test_lexer_string_locations_u8 (const line_table_case &case_)
2915 {
2916   /* Digits 0-9.
2917      ....................000000000.11111111112.22222222233333
2918      ....................123456789.01234567890.12345678901234  */
2919   const char *content = "      u8\"0123456789\" /* non-str */\n";
2920   lexer_test test (case_, content, NULL);
2921
2922   /* Verify that we get the expected token back, with the correct
2923      location information.  */
2924   const cpp_token *tok = test.get_token ();
2925   ASSERT_EQ (tok->type, CPP_UTF8STRING);
2926   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2927
2928   /* Verify that cpp_interpret_string works.  */
2929   cpp_string dst_string;
2930   const enum cpp_ttype type = CPP_STRING;
2931   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2932                                       &dst_string, type);
2933   ASSERT_TRUE (result);
2934   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2935   free (const_cast <unsigned char *> (dst_string.text));
2936
2937   /* Verify ranges of individual characters.  This no longer includes the
2938      opening quote, but does include the closing quote.  */
2939   for (int i = 0; i <= 10; i++)
2940     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2941 }
2942
2943 /* Lex a string literal containing UTF-8 source characters.
2944    Verify the substring location data after running cpp_interpret_string
2945    on it.  */
2946
2947 static void
2948 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2949 {
2950  /* This string literal is written out to the source file as UTF-8,
2951     and is of the form "before mojibake after", where "mojibake"
2952     is written as the following four unicode code points:
2953        U+6587 CJK UNIFIED IDEOGRAPH-6587
2954        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2955        U+5316 CJK UNIFIED IDEOGRAPH-5316
2956        U+3051 HIRAGANA LETTER KE.
2957      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2958      "before" and "after" are 1 byte per unicode character.
2959
2960      The numbering shown are "columns", which are *byte* numbers within
2961      the line, rather than unicode character numbers.
2962
2963      .................... 000000000.1111111.
2964      .................... 123456789.0123456.  */
2965   const char *content = ("        \"before "
2966                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2967                               UTF-8: 0xE6 0x96 0x87
2968                               C octal escaped UTF-8: \346\226\207
2969                             "column" numbers: 17-19.  */
2970                          "\346\226\207"
2971
2972                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2973                               UTF-8: 0xE5 0xAD 0x97
2974                               C octal escaped UTF-8: \345\255\227
2975                             "column" numbers: 20-22.  */
2976                          "\345\255\227"
2977
2978                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2979                               UTF-8: 0xE5 0x8C 0x96
2980                               C octal escaped UTF-8: \345\214\226
2981                             "column" numbers: 23-25.  */
2982                          "\345\214\226"
2983
2984                          /* U+3051 HIRAGANA LETTER KE
2985                               UTF-8: 0xE3 0x81 0x91
2986                               C octal escaped UTF-8: \343\201\221
2987                             "column" numbers: 26-28.  */
2988                          "\343\201\221"
2989
2990                          /* column numbers 29 onwards
2991                           2333333.33334444444444
2992                           9012345.67890123456789. */
2993                          " after\" /* non-str */\n");
2994   lexer_test test (case_, content, NULL);
2995
2996   /* Verify that we get the expected token back, with the correct
2997      location information.  */
2998   const cpp_token *tok = test.get_token ();
2999   ASSERT_EQ (tok->type, CPP_STRING);
3000   ASSERT_TOKEN_AS_TEXT_EQ
3001     (test.m_parser, tok,
3002      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3003
3004   /* Verify that cpp_interpret_string works.  */
3005   cpp_string dst_string;
3006   const enum cpp_ttype type = CPP_STRING;
3007   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3008                                       &dst_string, type);
3009   ASSERT_TRUE (result);
3010   ASSERT_STREQ
3011     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3012      (const char *)dst_string.text);
3013   free (const_cast <unsigned char *> (dst_string.text));
3014
3015   /* Verify ranges of individual characters.  This no longer includes the
3016      opening quote, but does include the closing quote.
3017      Assuming that both source and execution encodings are UTF-8, we have
3018      a run of 25 octets in each, plus the NUL terminator.  */
3019   for (int i = 0; i < 25; i++)
3020     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3021   /* NUL-terminator should use the closing quote at column 35.  */
3022   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3023
3024   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3025 }
3026
3027 /* Test of string literal concatenation.  */
3028
3029 static void
3030 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3031 {
3032   /* Digits 0-9.
3033      .....................000000000.111111.11112222222222
3034      .....................123456789.012345.67890123456789.  */
3035   const char *content = ("        \"01234\" /* non-str */\n"
3036                          "        \"56789\" /* non-str */\n");
3037   lexer_test test (case_, content, NULL);
3038
3039   location_t input_locs[2];
3040
3041   /* Verify that we get the expected tokens back.  */
3042   auto_vec <cpp_string> input_strings;
3043   const cpp_token *tok_a = test.get_token ();
3044   ASSERT_EQ (tok_a->type, CPP_STRING);
3045   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3046   input_strings.safe_push (tok_a->val.str);
3047   input_locs[0] = tok_a->src_loc;
3048
3049   const cpp_token *tok_b = test.get_token ();
3050   ASSERT_EQ (tok_b->type, CPP_STRING);
3051   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3052   input_strings.safe_push (tok_b->val.str);
3053   input_locs[1] = tok_b->src_loc;
3054
3055   /* Verify that cpp_interpret_string works.  */
3056   cpp_string dst_string;
3057   const enum cpp_ttype type = CPP_STRING;
3058   bool result = cpp_interpret_string (test.m_parser,
3059                                       input_strings.address (), 2,
3060                                       &dst_string, type);
3061   ASSERT_TRUE (result);
3062   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3063   free (const_cast <unsigned char *> (dst_string.text));
3064
3065   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3066   test.m_concats.record_string_concatenation (2, input_locs);
3067
3068   location_t initial_loc = input_locs[0];
3069
3070   /* "01234" on line 1.  */
3071   for (int i = 0; i <= 4; i++)
3072     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3073   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3074   for (int i = 5; i <= 10; i++)
3075     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3076
3077   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3078 }
3079
3080 /* Another test of string literal concatenation.  */
3081
3082 static void
3083 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3084 {
3085   /* Digits 0-9.
3086      .....................000000000.111.11111112222222
3087      .....................123456789.012.34567890123456.  */
3088   const char *content = ("        \"01\" /* non-str */\n"
3089                          "        \"23\" /* non-str */\n"
3090                          "        \"45\" /* non-str */\n"
3091                          "        \"67\" /* non-str */\n"
3092                          "        \"89\" /* non-str */\n");
3093   lexer_test test (case_, content, NULL);
3094
3095   auto_vec <cpp_string> input_strings;
3096   location_t input_locs[5];
3097
3098   /* Verify that we get the expected tokens back.  */
3099   for (int i = 0; i < 5; i++)
3100     {
3101       const cpp_token *tok = test.get_token ();
3102       ASSERT_EQ (tok->type, CPP_STRING);
3103       input_strings.safe_push (tok->val.str);
3104       input_locs[i] = tok->src_loc;
3105     }
3106
3107   /* Verify that cpp_interpret_string works.  */
3108   cpp_string dst_string;
3109   const enum cpp_ttype type = CPP_STRING;
3110   bool result = cpp_interpret_string (test.m_parser,
3111                                       input_strings.address (), 5,
3112                                       &dst_string, type);
3113   ASSERT_TRUE (result);
3114   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3115   free (const_cast <unsigned char *> (dst_string.text));
3116
3117   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3118   test.m_concats.record_string_concatenation (5, input_locs);
3119
3120   location_t initial_loc = input_locs[0];
3121
3122   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3123      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3124      and expect get_source_range_for_substring to fail.
3125      However, for a string concatenation test, we can have a case
3126      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3127      but subsequent strings can be after it.
3128      Attempting to detect this within assert_char_at_range
3129      would overcomplicate the logic for the common test cases, so
3130      we detect it here.  */
3131   if (should_have_column_data_p (input_locs[0])
3132       && !should_have_column_data_p (input_locs[4]))
3133     {
3134       /* Verify that get_source_range_for_substring gracefully rejects
3135          this case.  */
3136       source_range actual_range;
3137       const char *err
3138         = get_source_range_for_char (test.m_parser, &test.m_concats,
3139                                      initial_loc, type, 0, &actual_range);
3140       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3141       return;
3142     }
3143
3144   for (int i = 0; i < 5; i++)
3145     for (int j = 0; j < 2; j++)
3146       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3147                             i + 1, 10 + j, 10 + j);
3148
3149   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3150   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3151
3152   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3153 }
3154
3155 /* Another test of string literal concatenation, this time combined with
3156    various kinds of escaped characters.  */
3157
3158 static void
3159 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3160 {
3161   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3162      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3163   const char *content
3164     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3165        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3166     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3167   lexer_test test (case_, content, NULL);
3168
3169   auto_vec <cpp_string> input_strings;
3170   location_t input_locs[4];
3171
3172   /* Verify that we get the expected tokens back.  */
3173   for (int i = 0; i < 4; i++)
3174     {
3175       const cpp_token *tok = test.get_token ();
3176       ASSERT_EQ (tok->type, CPP_STRING);
3177       input_strings.safe_push (tok->val.str);
3178       input_locs[i] = tok->src_loc;
3179     }
3180
3181   /* Verify that cpp_interpret_string works.  */
3182   cpp_string dst_string;
3183   const enum cpp_ttype type = CPP_STRING;
3184   bool result = cpp_interpret_string (test.m_parser,
3185                                       input_strings.address (), 4,
3186                                       &dst_string, type);
3187   ASSERT_TRUE (result);
3188   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3189   free (const_cast <unsigned char *> (dst_string.text));
3190
3191   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3192   test.m_concats.record_string_concatenation (4, input_locs);
3193
3194   location_t initial_loc = input_locs[0];
3195
3196   for (int i = 0; i <= 4; i++)
3197     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3198   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3199   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3200   for (int i = 7; i <= 9; i++)
3201     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3202
3203   /* NUL-terminator should use the location of the final closing quote.  */
3204   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3205
3206   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3207 }
3208
3209 /* Test of string literal in a macro.  */
3210
3211 static void
3212 test_lexer_string_locations_macro (const line_table_case &case_)
3213 {
3214   /* Digits 0-9.
3215      .....................0000000001111111111.22222222223.
3216      .....................1234567890123456789.01234567890.  */
3217   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3218                          "  MACRO");
3219   lexer_test test (case_, content, NULL);
3220
3221   /* Verify that we get the expected tokens back.  */
3222   const cpp_token *tok = test.get_token ();
3223   ASSERT_EQ (tok->type, CPP_PADDING);
3224
3225   tok = test.get_token ();
3226   ASSERT_EQ (tok->type, CPP_STRING);
3227   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3228
3229   /* Verify ranges of individual characters.  We ought to
3230      see columns within the macro definition.  */
3231   for (int i = 0; i <= 10; i++)
3232     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3233                           i, 1, 20 + i, 20 + i);
3234
3235   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3236
3237   tok = test.get_token ();
3238   ASSERT_EQ (tok->type, CPP_PADDING);
3239 }
3240
3241 /* Test of stringification of a macro argument.  */
3242
3243 static void
3244 test_lexer_string_locations_stringified_macro_argument
3245   (const line_table_case &case_)
3246 {
3247   /* .....................000000000111111111122222222223.
3248      .....................123456789012345678901234567890.  */
3249   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3250                          "MACRO(foo)\n");
3251   lexer_test test (case_, content, NULL);
3252
3253   /* Verify that we get the expected token back.  */
3254   const cpp_token *tok = test.get_token ();
3255   ASSERT_EQ (tok->type, CPP_PADDING);
3256
3257   tok = test.get_token ();
3258   ASSERT_EQ (tok->type, CPP_STRING);
3259   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3260
3261   /* We don't support getting the location of a stringified macro
3262      argument.  Verify that it fails gracefully.  */
3263   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3264                                   "cpp_interpret_string_1 failed");
3265
3266   tok = test.get_token ();
3267   ASSERT_EQ (tok->type, CPP_PADDING);
3268
3269   tok = test.get_token ();
3270   ASSERT_EQ (tok->type, CPP_PADDING);
3271 }
3272
3273 /* Ensure that we are fail gracefully if something attempts to pass
3274    in a location that isn't a string literal token.  Seen on this code:
3275
3276      const char a[] = " %d ";
3277      __builtin_printf (a, 0.5);
3278                        ^
3279
3280    when c-format.c erroneously used the indicated one-character
3281    location as the format string location, leading to a read past the
3282    end of a string buffer in cpp_interpret_string_1.  */
3283
3284 static void
3285 test_lexer_string_locations_non_string (const line_table_case &case_)
3286 {
3287   /* .....................000000000111111111122222222223.
3288      .....................123456789012345678901234567890.  */
3289   const char *content = ("         a\n");
3290   lexer_test test (case_, content, NULL);
3291
3292   /* Verify that we get the expected token back.  */
3293   const cpp_token *tok = test.get_token ();
3294   ASSERT_EQ (tok->type, CPP_NAME);
3295   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3296
3297   /* At this point, libcpp is attempting to interpret the name as a
3298      string literal, despite it not starting with a quote.  We don't detect
3299      that, but we should at least fail gracefully.  */
3300   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3301                                   "cpp_interpret_string_1 failed");
3302 }
3303
3304 /* Ensure that we can read substring information for a token which
3305    starts in one linemap and ends in another .  Adapted from
3306    gcc.dg/cpp/pr69985.c.  */
3307
3308 static void
3309 test_lexer_string_locations_long_line (const line_table_case &case_)
3310 {
3311   /* .....................000000.000111111111
3312      .....................123456.789012346789.  */
3313   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3314                          "     \"0123456789012345678901234567890123456789"
3315                          "0123456789012345678901234567890123456789"
3316                          "0123456789012345678901234567890123456789"
3317                          "0123456789\"\n");
3318
3319   lexer_test test (case_, content, NULL);
3320
3321   /* Verify that we get the expected token back.  */
3322   const cpp_token *tok = test.get_token ();
3323   ASSERT_EQ (tok->type, CPP_STRING);
3324
3325   if (!should_have_column_data_p (line_table->highest_location))
3326     return;
3327
3328   /* Verify ranges of individual characters.  */
3329   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3330   for (int i = 0; i < 131; i++)
3331     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3332                           i, 2, 7 + i, 7 + i);
3333 }
3334
3335 /* Test of locations within a raw string that doesn't contain a newline.  */
3336
3337 static void
3338 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3339 {
3340   /* .....................00.0000000111111111122.
3341      .....................12.3456789012345678901.  */
3342   const char *content = ("R\"foo(0123456789)foo\"\n");
3343   lexer_test test (case_, content, NULL);
3344
3345   /* Verify that we get the expected token back.  */
3346   const cpp_token *tok = test.get_token ();
3347   ASSERT_EQ (tok->type, CPP_STRING);
3348
3349   /* Verify that cpp_interpret_string works.  */
3350   cpp_string dst_string;
3351   const enum cpp_ttype type = CPP_STRING;
3352   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3353                                       &dst_string, type);
3354   ASSERT_TRUE (result);
3355   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3356   free (const_cast <unsigned char *> (dst_string.text));
3357
3358   if (!should_have_column_data_p (line_table->highest_location))
3359     return;
3360
3361   /* 0-9, plus the nil terminator.  */
3362   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3363   for (int i = 0; i < 11; i++)
3364     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3365                           i, 1, 7 + i, 7 + i);
3366 }
3367
3368 /* Test of locations within a raw string that contains a newline.  */
3369
3370 static void
3371 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3372 {
3373   /* .....................00.0000.
3374      .....................12.3456.  */
3375   const char *content = ("R\"foo(\n"
3376   /* .....................00000.
3377      .....................12345.  */
3378                          "hello\n"
3379                          "world\n"
3380   /* .....................00000.
3381      .....................12345.  */
3382                          ")foo\"\n");
3383   lexer_test test (case_, content, NULL);
3384
3385   /* Verify that we get the expected token back.  */
3386   const cpp_token *tok = test.get_token ();
3387   ASSERT_EQ (tok->type, CPP_STRING);
3388
3389   /* Verify that cpp_interpret_string works.  */
3390   cpp_string dst_string;
3391   const enum cpp_ttype type = CPP_STRING;
3392   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3393                                       &dst_string, type);
3394   ASSERT_TRUE (result);
3395   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3396   free (const_cast <unsigned char *> (dst_string.text));
3397
3398   if (!should_have_column_data_p (line_table->highest_location))
3399     return;
3400
3401   /* Currently we don't support locations within raw strings that
3402      contain newlines.  */
3403   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3404                                   "range endpoints are on different lines");
3405 }
3406
3407 /* Test of parsing an unterminated raw string.  */
3408
3409 static void
3410 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3411 {
3412   const char *content = "R\"ouch()ouCh\" /* etc */";
3413
3414   lexer_error_sink errors;
3415   lexer_test test (case_, content, &errors);
3416   test.m_implicitly_expect_EOF = false;
3417
3418   /* Attempt to parse the raw string.  */
3419   const cpp_token *tok = test.get_token ();
3420   ASSERT_EQ (tok->type, CPP_EOF);
3421
3422   ASSERT_EQ (1, errors.m_errors.length ());
3423   /* We expect the message "unterminated raw string"
3424      in the "cpplib" translation domain.
3425      It's not clear that dgettext is available on all supported hosts,
3426      so this assertion is commented-out for now.
3427        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3428                      errors.m_errors[0]);
3429   */
3430 }
3431
3432 /* Test of lexing char constants.  */
3433
3434 static void
3435 test_lexer_char_constants (const line_table_case &case_)
3436 {
3437   /* Various char constants.
3438      .....................0000000001111111111.22222222223.
3439      .....................1234567890123456789.01234567890.  */
3440   const char *content = ("         'a'\n"
3441                          "        u'a'\n"
3442                          "        U'a'\n"
3443                          "        L'a'\n"
3444                          "         'abc'\n");
3445   lexer_test test (case_, content, NULL);
3446
3447   /* Verify that we get the expected tokens back.  */
3448   /* 'a'.  */
3449   const cpp_token *tok = test.get_token ();
3450   ASSERT_EQ (tok->type, CPP_CHAR);
3451   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3452
3453   unsigned int chars_seen;
3454   int unsignedp;
3455   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3456                                           &chars_seen, &unsignedp);
3457   ASSERT_EQ (cc, 'a');
3458   ASSERT_EQ (chars_seen, 1);
3459
3460   /* u'a'.  */
3461   tok = test.get_token ();
3462   ASSERT_EQ (tok->type, CPP_CHAR16);
3463   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3464
3465   /* U'a'.  */
3466   tok = test.get_token ();
3467   ASSERT_EQ (tok->type, CPP_CHAR32);
3468   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3469
3470   /* L'a'.  */
3471   tok = test.get_token ();
3472   ASSERT_EQ (tok->type, CPP_WCHAR);
3473   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3474
3475   /* 'abc' (c-char-sequence).  */
3476   tok = test.get_token ();
3477   ASSERT_EQ (tok->type, CPP_CHAR);
3478   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3479 }
3480 /* A table of interesting location_t values, giving one axis of our test
3481    matrix.  */
3482
3483 static const location_t boundary_locations[] = {
3484   /* Zero means "don't override the default values for a new line_table".  */
3485   0,
3486
3487   /* An arbitrary non-zero value that isn't close to one of
3488      the boundary values below.  */
3489   0x10000,
3490
3491   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3492   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3493   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3494   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3495   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3496   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3497
3498   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3499   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3500   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3501   LINE_MAP_MAX_LOCATION_WITH_COLS,
3502   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3503   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3504 };
3505
3506 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3507
3508 void
3509 for_each_line_table_case (void (*testcase) (const line_table_case &))
3510 {
3511   /* As noted above in the description of struct line_table_case,
3512      we want to explore a test matrix of interesting line_table
3513      situations, running various selftests for each case within the
3514      matrix.  */
3515
3516   /* Run all tests with:
3517      (a) line_table->default_range_bits == 0, and
3518      (b) line_table->default_range_bits == 5.  */
3519   int num_cases_tested = 0;
3520   for (int default_range_bits = 0; default_range_bits <= 5;
3521        default_range_bits += 5)
3522     {
3523       /* ...and use each of the "interesting" location values as
3524          the starting location within line_table.  */
3525       const int num_boundary_locations
3526         = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3527       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3528         {
3529           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3530
3531           testcase (c);
3532
3533           num_cases_tested++;
3534         }
3535     }
3536
3537   /* Verify that we fully covered the test matrix.  */
3538   ASSERT_EQ (num_cases_tested, 2 * 12);
3539 }
3540
3541 /* Run all of the selftests within this file.  */
3542
3543 void
3544 input_c_tests ()
3545 {
3546   test_linenum_comparisons ();
3547   test_should_have_column_data_p ();
3548   test_unknown_location ();
3549   test_builtins ();
3550   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3551
3552   for_each_line_table_case (test_accessing_ordinary_linemaps);
3553   for_each_line_table_case (test_lexer);
3554   for_each_line_table_case (test_lexer_string_locations_simple);
3555   for_each_line_table_case (test_lexer_string_locations_ebcdic);
3556   for_each_line_table_case (test_lexer_string_locations_hex);
3557   for_each_line_table_case (test_lexer_string_locations_oct);
3558   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3559   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3560   for_each_line_table_case (test_lexer_string_locations_ucn4);
3561   for_each_line_table_case (test_lexer_string_locations_ucn8);
3562   for_each_line_table_case (test_lexer_string_locations_wide_string);
3563   for_each_line_table_case (test_lexer_string_locations_string16);
3564   for_each_line_table_case (test_lexer_string_locations_string32);
3565   for_each_line_table_case (test_lexer_string_locations_u8);
3566   for_each_line_table_case (test_lexer_string_locations_utf8_source);
3567   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3568   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3569   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3570   for_each_line_table_case (test_lexer_string_locations_macro);
3571   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3572   for_each_line_table_case (test_lexer_string_locations_non_string);
3573   for_each_line_table_case (test_lexer_string_locations_long_line);
3574   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3575   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3576   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3577   for_each_line_table_case (test_lexer_char_constants);
3578
3579   test_reading_source_line ();
3580 }
3581
3582 } // namespace selftest
3583
3584 #endif /* CHECKING_P */