gcc/input.c

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic-core.h"
  25 #include "selftest.h"
  26 #include "cpplib.h"
  27
  28 #ifndef HAVE_ICONV
  29 #define HAVE_ICONV 0
  30 #endif
  31
  32 /* This is a cache used by get_next_line to store the content of a
  33    file to be searched for file lines.  */
  34 struct fcache
  35 {
  36   /* These are information used to store a line boundary.  */
  37   struct line_info
  38   {
  39     /* The line number.  It starts from 1.  */
  40     size_t line_num;
  41
  42     /* The position (byte count) of the beginning of the line,
  43        relative to the file data pointer.  This starts at zero.  */
  44     size_t start_pos;
  45
  46     /* The position (byte count) of the last byte of the line.  This
  47        normally points to the '\n' character, or to one byte after the
  48        last byte of the file, if the file doesn't contain a '\n'
  49        character.  */
  50     size_t end_pos;
  51
  52     line_info (size_t l, size_t s, size_t e)
  53       : line_num (l), start_pos (s), end_pos (e)
  54     {}
  55
  56     line_info ()
  57       :line_num (0), start_pos (0), end_pos (0)
  58     {}
  59   };
  60
  61   /* The number of time this file has been accessed.  This is used
  62      to designate which file cache to evict from the cache
  63      array.  */
  64   unsigned use_count;
  65
  66   /* The file_path is the key for identifying a particular file in
  67      the cache.
  68      For libcpp-using code, the underlying buffer for this field is
  69      owned by the corresponding _cpp_file within the cpp_reader.  */
  70   const char *file_path;
  71
  72   FILE *fp;
  73
  74   /* This points to the content of the file that we've read so
  75      far.  */
  76   char *data;
  77
  78   /*  The size of the DATA array above.*/
  79   size_t size;
  80
  81   /* The number of bytes read from the underlying file so far.  This
  82      must be less (or equal) than SIZE above.  */
  83   size_t nb_read;
  84
  85   /* The index of the beginning of the current line.  */
  86   size_t line_start_idx;
  87
  88   /* The number of the previous line read.  This starts at 1.  Zero
  89      means we've read no line so far.  */
  90   size_t line_num;
  91
  92   /* This is the total number of lines of the current file.  At the
  93      moment, we try to get this information from the line map
  94      subsystem.  Note that this is just a hint.  When using the C++
  95      front-end, this hint is correct because the input file is then
  96      completely tokenized before parsing starts; so the line map knows
  97      the number of lines before compilation really starts.  For e.g,
  98      the C front-end, it can happen that we start emitting diagnostics
  99      before the line map has seen the end of the file.  */
 100   size_t total_lines;
 101
 102   /* Could this file be missing a trailing newline on its final line?
 103      Initially true (to cope with empty files), set to true/false
 104      as each line is read.  */
 105   bool missing_trailing_newline;
 106
 107   /* This is a record of the beginning and end of the lines we've seen
 108      while reading the file.  This is useful to avoid walking the data
 109      from the beginning when we are asked to read a line that is
 110      before LINE_START_IDX above.  Note that the maximum size of this
 111      record is fcache_line_record_size, so that the memory consumption
 112      doesn't explode.  We thus scale total_lines down to
 113      fcache_line_record_size.  */
 114   vec<line_info, va_heap> line_record;
 115
 116   fcache ();
 117   ~fcache ();
 118 };
 119
 120 /* Current position in real source file.  */
 121
 122 location_t input_location = UNKNOWN_LOCATION;
 123
 124 struct line_maps *line_table;
 125
 126 /* A stashed copy of "line_table" for use by selftest::line_table_test.
 127    This needs to be a global so that it can be a GC root, and thus
 128    prevent the stashed copy from being garbage-collected if the GC runs
 129    during a line_table_test.  */
 130
 131 struct line_maps *saved_line_table;
 132
 133 static fcache *fcache_tab;
 134 static const size_t fcache_tab_size = 16;
 135 static const size_t fcache_buffer_size = 4 * 1024;
 136 static const size_t fcache_line_record_size = 100;
 137
 138 /* Expand the source location LOC into a human readable location.  If
 139    LOC resolves to a builtin location, the file name of the readable
 140    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 141    TRUE and LOC is virtual, then it is resolved to the expansion
 142    point of the involved macro.  Otherwise, it is resolved to the
 143    spelling location of the token.
 144
 145    When resolving to the spelling location of the token, if the
 146    resulting location is for a built-in location (that is, it has no
 147    associated line/column) in the context of a macro expansion, the
 148    returned location is the first one (while unwinding the macro
 149    location towards its expansion point) that is in real source
 150    code.
 151
 152    ASPECT controls which part of the location to use.  */
 153
 154 static expanded_location
 155 expand_location_1 (source_location loc,
 156                    bool expansion_point_p,
 157                    enum location_aspect aspect)
 158 {
 159   expanded_location xloc;
 160   const line_map_ordinary *map;
 161   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 162   tree block = NULL;
 163
 164   if (IS_ADHOC_LOC (loc))
 165     {
 166       block = LOCATION_BLOCK (loc);
 167       loc = LOCATION_LOCUS (loc);
 168     }
 169
 170   memset (&xloc, 0, sizeof (xloc));
 171
 172   if (loc >= RESERVED_LOCATION_COUNT)
 173     {
 174       if (!expansion_point_p)
 175         {
 176           /* We want to resolve LOC to its spelling location.
 177
 178              But if that spelling location is a reserved location that
 179              appears in the context of a macro expansion (like for a
 180              location for a built-in token), let's consider the first
 181              location (toward the expansion point) that is not reserved;
 182              that is, the first location that is in real source code.  */
 183           loc = linemap_unwind_to_first_non_reserved_loc (line_table,
 184                                                           loc, NULL);
 185           lrk = LRK_SPELLING_LOCATION;
 186         }
 187       loc = linemap_resolve_location (line_table, loc, lrk, &map);
 188
 189       /* loc is now either in an ordinary map, or is a reserved location.
 190          If it is a compound location, the caret is in a spelling location,
 191          but the start/finish might still be a virtual location.
 192          Depending of what the caller asked for, we may need to recurse
 193          one level in order to resolve any virtual locations in the
 194          end-points.  */
 195       switch (aspect)
 196         {
 197         default:
 198           gcc_unreachable ();
 199           /* Fall through.  */
 200         case LOCATION_ASPECT_CARET:
 201           break;
 202         case LOCATION_ASPECT_START:
 203           {
 204             source_location start = get_start (loc);
 205             if (start != loc)
 206               return expand_location_1 (start, expansion_point_p, aspect);
 207           }
 208           break;
 209         case LOCATION_ASPECT_FINISH:
 210           {
 211             source_location finish = get_finish (loc);
 212             if (finish != loc)
 213               return expand_location_1 (finish, expansion_point_p, aspect);
 214           }
 215           break;
 216         }
 217       xloc = linemap_expand_location (line_table, map, loc);
 218     }
 219
 220   xloc.data = block;
 221   if (loc <= BUILTINS_LOCATION)
 222     xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
 223
 224   return xloc;
 225 }
 226
 227 /* Initialize the set of cache used for files accessed by caret
 228    diagnostic.  */
 229
 230 static void
 231 diagnostic_file_cache_init (void)
 232 {
 233   if (fcache_tab == NULL)
 234     fcache_tab = new fcache[fcache_tab_size];
 235 }
 236
 237 /* Free the resources used by the set of cache used for files accessed
 238    by caret diagnostic.  */
 239
 240 void
 241 diagnostic_file_cache_fini (void)
 242 {
 243   if (fcache_tab)
 244     {
 245       delete [] (fcache_tab);
 246       fcache_tab = NULL;
 247     }
 248 }
 249
 250 /* Return the total lines number that have been read so far by the
 251    line map (in the preprocessor) so far.  For languages like C++ that
 252    entirely preprocess the input file before starting to parse, this
 253    equals the actual number of lines of the file.  */
 254
 255 static size_t
 256 total_lines_num (const char *file_path)
 257 {
 258   size_t r = 0;
 259   source_location l = 0;
 260   if (linemap_get_file_highest_location (line_table, file_path, &l))
 261     {
 262       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 263       expanded_location xloc = expand_location (l);
 264       r = xloc.line;
 265     }
 266   return r;
 267 }
 268
 269 /* Lookup the cache used for the content of a given file accessed by
 270    caret diagnostic.  Return the found cached file, or NULL if no
 271    cached file was found.  */
 272
 273 static fcache*
 274 lookup_file_in_cache_tab (const char *file_path)
 275 {
 276   if (file_path == NULL)
 277     return NULL;
 278
 279   diagnostic_file_cache_init ();
 280
 281   /* This will contain the found cached file.  */
 282   fcache *r = NULL;
 283   for (unsigned i = 0; i < fcache_tab_size; ++i)
 284     {
 285       fcache *c = &fcache_tab[i];
 286       if (c->file_path && !strcmp (c->file_path, file_path))
 287         {
 288           ++c->use_count;
 289           r = c;
 290         }
 291     }
 292
 293   if (r)
 294     ++r->use_count;
 295
 296   return r;
 297 }
 298
 299 /* Purge any mention of FILENAME from the cache of files used for
 300    printing source code.  For use in selftests when working
 301    with tempfiles.  */
 302
 303 void
 304 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
 305 {
 306   gcc_assert (file_path);
 307
 308   fcache *r = lookup_file_in_cache_tab (file_path);
 309   if (!r)
 310     /* Not found.  */
 311     return;
 312
 313   r->file_path = NULL;
 314   if (r->fp)
 315     fclose (r->fp);
 316   r->fp = NULL;
 317   r->nb_read = 0;
 318   r->line_start_idx = 0;
 319   r->line_num = 0;
 320   r->line_record.truncate (0);
 321   r->use_count = 0;
 322   r->total_lines = 0;
 323   r->missing_trailing_newline = true;
 324 }
 325
 326 /* Return the file cache that has been less used, recently, or the
 327    first empty one.  If HIGHEST_USE_COUNT is non-null,
 328    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 329    in the cache table.  */
 330
 331 static fcache*
 332 evicted_cache_tab_entry (unsigned *highest_use_count)
 333 {
 334   diagnostic_file_cache_init ();
 335
 336   fcache *to_evict = &fcache_tab[0];
 337   unsigned huc = to_evict->use_count;
 338   for (unsigned i = 1; i < fcache_tab_size; ++i)
 339     {
 340       fcache *c = &fcache_tab[i];
 341       bool c_is_empty = (c->file_path == NULL);
 342
 343       if (c->use_count < to_evict->use_count
 344           || (to_evict->file_path && c_is_empty))
 345         /* We evict C because it's either an entry with a lower use
 346            count or one that is empty.  */
 347         to_evict = c;
 348
 349       if (huc < c->use_count)
 350         huc = c->use_count;
 351
 352       if (c_is_empty)
 353         /* We've reached the end of the cache; subsequent elements are
 354            all empty.  */
 355         break;
 356     }
 357
 358   if (highest_use_count)
 359     *highest_use_count = huc;
 360
 361   return to_evict;
 362 }
 363
 364 /* Create the cache used for the content of a given file to be
 365    accessed by caret diagnostic.  This cache is added to an array of
 366    cache and can be retrieved by lookup_file_in_cache_tab.  This
 367    function returns the created cache.  Note that only the last
 368    fcache_tab_size files are cached.  */
 369
 370 static fcache*
 371 add_file_to_cache_tab (const char *file_path)
 372 {
 373
 374   FILE *fp = fopen (file_path, "r");
 375   if (fp == NULL)
 376     return NULL;
 377
 378   unsigned highest_use_count = 0;
 379   fcache *r = evicted_cache_tab_entry (&highest_use_count);
 380   r->file_path = file_path;
 381   if (r->fp)
 382     fclose (r->fp);
 383   r->fp = fp;
 384   r->nb_read = 0;
 385   r->line_start_idx = 0;
 386   r->line_num = 0;
 387   r->line_record.truncate (0);
 388   /* Ensure that this cache entry doesn't get evicted next time
 389      add_file_to_cache_tab is called.  */
 390   r->use_count = ++highest_use_count;
 391   r->total_lines = total_lines_num (file_path);
 392   r->missing_trailing_newline = true;
 393
 394   return r;
 395 }
 396
 397 /* Lookup the cache used for the content of a given file accessed by
 398    caret diagnostic.  If no cached file was found, create a new cache
 399    for this file, add it to the array of cached file and return
 400    it.  */
 401
 402 static fcache*
 403 lookup_or_add_file_to_cache_tab (const char *file_path)
 404 {
 405   fcache *r = lookup_file_in_cache_tab (file_path);
 406   if (r == NULL)
 407     r = add_file_to_cache_tab (file_path);
 408   return r;
 409 }
 410
 411 /* Default constructor for a cache of file used by caret
 412    diagnostic.  */
 413
 414 fcache::fcache ()
 415 : use_count (0), file_path (NULL), fp (NULL), data (0),
 416   size (0), nb_read (0), line_start_idx (0), line_num (0),
 417   total_lines (0), missing_trailing_newline (true)
 418 {
 419   line_record.create (0);
 420 }
 421
 422 /* Destructor for a cache of file used by caret diagnostic.  */
 423
 424 fcache::~fcache ()
 425 {
 426   if (fp)
 427     {
 428       fclose (fp);
 429       fp = NULL;
 430     }
 431   if (data)
 432     {
 433       XDELETEVEC (data);
 434       data = 0;
 435     }
 436   line_record.release ();
 437 }
 438
 439 /* Returns TRUE iff the cache would need to be filled with data coming
 440    from the file.  That is, either the cache is empty or full or the
 441    current line is empty.  Note that if the cache is full, it would
 442    need to be extended and filled again.  */
 443
 444 static bool
 445 needs_read (fcache *c)
 446 {
 447   return (c->nb_read == 0
 448           || c->nb_read == c->size
 449           || (c->line_start_idx >= c->nb_read - 1));
 450 }
 451
 452 /*  Return TRUE iff the cache is full and thus needs to be
 453     extended.  */
 454
 455 static bool
 456 needs_grow (fcache *c)
 457 {
 458   return c->nb_read == c->size;
 459 }
 460
 461 /* Grow the cache if it needs to be extended.  */
 462
 463 static void
 464 maybe_grow (fcache *c)
 465 {
 466   if (!needs_grow (c))
 467     return;
 468
 469   size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
 470   c->data = XRESIZEVEC (char, c->data, size);
 471   c->size = size;
 472 }
 473
 474 /*  Read more data into the cache.  Extends the cache if need be.
 475     Returns TRUE iff new data could be read.  */
 476
 477 static bool
 478 read_data (fcache *c)
 479 {
 480   if (feof (c->fp) || ferror (c->fp))
 481     return false;
 482
 483   maybe_grow (c);
 484
 485   char * from = c->data + c->nb_read;
 486   size_t to_read = c->size - c->nb_read;
 487   size_t nb_read = fread (from, 1, to_read, c->fp);
 488
 489   if (ferror (c->fp))
 490     return false;
 491
 492   c->nb_read += nb_read;
 493   return !!nb_read;
 494 }
 495
 496 /* Read new data iff the cache needs to be filled with more data
 497    coming from the file FP.  Return TRUE iff the cache was filled with
 498    mode data.  */
 499
 500 static bool
 501 maybe_read_data (fcache *c)
 502 {
 503   if (!needs_read (c))
 504     return false;
 505   return read_data (c);
 506 }
 507
 508 /* Read a new line from file FP, using C as a cache for the data
 509    coming from the file.  Upon successful completion, *LINE is set to
 510    the beginning of the line found.  *LINE points directly in the
 511    line cache and is only valid until the next call of get_next_line.
 512    *LINE_LEN is set to the length of the line.  Note that the line
 513    does not contain any terminal delimiter.  This function returns
 514    true if some data was read or process from the cache, false
 515    otherwise.  Note that subsequent calls to get_next_line might
 516    make the content of *LINE invalid.  */
 517
 518 static bool
 519 get_next_line (fcache *c, char **line, ssize_t *line_len)
 520 {
 521   /* Fill the cache with data to process.  */
 522   maybe_read_data (c);
 523
 524   size_t remaining_size = c->nb_read - c->line_start_idx;
 525   if (remaining_size == 0)
 526     /* There is no more data to process.  */
 527     return false;
 528
 529   char *line_start = c->data + c->line_start_idx;
 530
 531   char *next_line_start = NULL;
 532   size_t len = 0;
 533   char *line_end = (char *) memchr (line_start, '\n', remaining_size);
 534   if (line_end == NULL)
 535     {
 536       /* We haven't found the end-of-line delimiter in the cache.
 537          Fill the cache with more data from the file and look for the
 538          '\n'.  */
 539       while (maybe_read_data (c))
 540         {
 541           line_start = c->data + c->line_start_idx;
 542           remaining_size = c->nb_read - c->line_start_idx;
 543           line_end = (char *) memchr (line_start, '\n', remaining_size);
 544           if (line_end != NULL)
 545             {
 546               next_line_start = line_end + 1;
 547               break;
 548             }
 549         }
 550       if (line_end == NULL)
 551         {
 552           /* We've loadded all the file into the cache and still no
 553              '\n'.  Let's say the line ends up at one byte passed the
 554              end of the file.  This is to stay consistent with the case
 555              of when the line ends up with a '\n' and line_end points to
 556              that terminal '\n'.  That consistency is useful below in
 557              the len calculation.  */
 558           line_end = c->data + c->nb_read ;
 559           c->missing_trailing_newline = true;
 560         }
 561       else
 562         c->missing_trailing_newline = false;
 563     }
 564   else
 565     {
 566       next_line_start = line_end + 1;
 567       c->missing_trailing_newline = false;
 568     }
 569
 570   if (ferror (c->fp))
 571     return false;
 572
 573   /* At this point, we've found the end of the of line.  It either
 574      points to the '\n' or to one byte after the last byte of the
 575      file.  */
 576   gcc_assert (line_end != NULL);
 577
 578   len = line_end - line_start;
 579
 580   if (c->line_start_idx < c->nb_read)
 581     *line = line_start;
 582
 583   ++c->line_num;
 584
 585   /* Before we update our line record, make sure the hint about the
 586      total number of lines of the file is correct.  If it's not, then
 587      we give up recording line boundaries from now on.  */
 588   bool update_line_record = true;
 589   if (c->line_num > c->total_lines)
 590     update_line_record = false;
 591
 592     /* Now update our line record so that re-reading lines from the
 593      before c->line_start_idx is faster.  */
 594   if (update_line_record
 595       && c->line_record.length () < fcache_line_record_size)
 596     {
 597       /* If the file lines fits in the line record, we just record all
 598          its lines ...*/
 599       if (c->total_lines <= fcache_line_record_size
 600           && c->line_num > c->line_record.length ())
 601         c->line_record.safe_push (fcache::line_info (c->line_num,
 602                                                  c->line_start_idx,
 603                                                  line_end - c->data));
 604       else if (c->total_lines > fcache_line_record_size)
 605         {
 606           /* ... otherwise, we just scale total_lines down to
 607              (fcache_line_record_size lines.  */
 608           size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
 609           if (c->line_record.length () == 0
 610               || n >= c->line_record.length ())
 611             c->line_record.safe_push (fcache::line_info (c->line_num,
 612                                                      c->line_start_idx,
 613                                                      line_end - c->data));
 614         }
 615     }
 616
 617   /* Update c->line_start_idx so that it points to the next line to be
 618      read.  */
 619   if (next_line_start)
 620     c->line_start_idx = next_line_start - c->data;
 621   else
 622     /* We didn't find any terminal '\n'.  Let's consider that the end
 623        of line is the end of the data in the cache.  The next
 624        invocation of get_next_line will either read more data from the
 625        underlying file or return false early because we've reached the
 626        end of the file.  */
 627     c->line_start_idx = c->nb_read;
 628
 629   *line_len = len;
 630
 631   return true;
 632 }
 633
 634 /* Consume the next bytes coming from the cache (or from its
 635    underlying file if there are remaining unread bytes in the file)
 636    until we reach the next end-of-line (or end-of-file).  There is no
 637    copying from the cache involved.  Return TRUE upon successful
 638    completion.  */
 639
 640 static bool
 641 goto_next_line (fcache *cache)
 642 {
 643   char *l;
 644   ssize_t len;
 645
 646   return get_next_line (cache, &l, &len);
 647 }
 648
 649 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 650    If the line was read successfully, *LINE points to the beginning
 651    of the line in the file cache and *LINE_LEN is the length of the
 652    line.  *LINE is not nul-terminated, but may contain zero bytes.
 653    *LINE is only valid until the next call of read_line_num.
 654    This function returns bool if a line was read.  */
 655
 656 static bool
 657 read_line_num (fcache *c, size_t line_num,
 658                char **line, ssize_t *line_len)
 659 {
 660   gcc_assert (line_num > 0);
 661
 662   if (line_num <= c->line_num)
 663     {
 664       /* We've been asked to read lines that are before c->line_num.
 665          So lets use our line record (if it's not empty) to try to
 666          avoid re-reading the file from the beginning again.  */
 667
 668       if (c->line_record.is_empty ())
 669         {
 670           c->line_start_idx = 0;
 671           c->line_num = 0;
 672         }
 673       else
 674         {
 675           fcache::line_info *i = NULL;
 676           if (c->total_lines <= fcache_line_record_size)
 677             {
 678               /* In languages where the input file is not totally
 679                  preprocessed up front, the c->total_lines hint
 680                  can be smaller than the number of lines of the
 681                  file.  In that case, only the first
 682                  c->total_lines have been recorded.
 683
 684                  Otherwise, the first c->total_lines we've read have
 685                  their start/end recorded here.  */
 686               i = (line_num <= c->total_lines)
 687                 ? &c->line_record[line_num - 1]
 688                 : &c->line_record[c->total_lines - 1];
 689               gcc_assert (i->line_num <= line_num);
 690             }
 691           else
 692             {
 693               /*  So the file had more lines than our line record
 694                   size.  Thus the number of lines we've recorded has
 695                   been scaled down to fcache_line_reacord_size.  Let's
 696                   pick the start/end of the recorded line that is
 697                   closest to line_num.  */
 698               size_t n = (line_num <= c->total_lines)
 699                 ? line_num * fcache_line_record_size / c->total_lines
 700                 : c ->line_record.length () - 1;
 701               if (n < c->line_record.length ())
 702                 {
 703                   i = &c->line_record[n];
 704                   gcc_assert (i->line_num <= line_num);
 705                 }
 706             }
 707
 708           if (i && i->line_num == line_num)
 709             {
 710               /* We have the start/end of the line.  */
 711               *line = c->data + i->start_pos;
 712               *line_len = i->end_pos - i->start_pos;
 713               return true;
 714             }
 715
 716           if (i)
 717             {
 718               c->line_start_idx = i->start_pos;
 719               c->line_num = i->line_num - 1;
 720             }
 721           else
 722             {
 723               c->line_start_idx = 0;
 724               c->line_num = 0;
 725             }
 726         }
 727     }
 728
 729   /*  Let's walk from line c->line_num up to line_num - 1, without
 730       copying any line.  */
 731   while (c->line_num < line_num - 1)
 732     if (!goto_next_line (c))
 733       return false;
 734
 735   /* The line we want is the next one.  Let's read and copy it back to
 736      the caller.  */
 737   return get_next_line (c, line, line_len);
 738 }
 739
 740 /* Return the physical source line that corresponds to FILE_PATH/LINE.
 741    The line is not nul-terminated.  The returned pointer is only
 742    valid until the next call of location_get_source_line.
 743    Note that the line can contain several null characters,
 744    so LINE_LEN, if non-null, points to the actual length of the line.
 745    If the function fails, NULL is returned.  */
 746
 747 const char *
 748 location_get_source_line (const char *file_path, int line,
 749                           int *line_len)
 750 {
 751   char *buffer = NULL;
 752   ssize_t len;
 753
 754   if (line == 0)
 755     return NULL;
 756
 757   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 758   if (c == NULL)
 759     return NULL;
 760
 761   bool read = read_line_num (c, line, &buffer, &len);
 762
 763   if (read && line_len)
 764     *line_len = len;
 765
 766   return read ? buffer : NULL;
 767 }
 768
 769 /* Determine if FILE_PATH missing a trailing newline on its final line.
 770    Only valid to call once all of the file has been loaded, by
 771    requesting a line number beyond the end of the file.  */
 772
 773 bool
 774 location_missing_trailing_newline (const char *file_path)
 775 {
 776   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 777   if (c == NULL)
 778     return false;
 779
 780   return c->missing_trailing_newline;
 781 }
 782
 783 /* Test if the location originates from the spelling location of a
 784    builtin-tokens.  That is, return TRUE if LOC is a (possibly
 785    virtual) location of a built-in token that appears in the expansion
 786    list of a macro.  Please note that this function also works on
 787    tokens that result from built-in tokens.  For instance, the
 788    function would return true if passed a token "4" that is the result
 789    of the expansion of the built-in __LINE__ macro.  */
 790 bool
 791 is_location_from_builtin_token (source_location loc)
 792 {
 793   const line_map_ordinary *map = NULL;
 794   loc = linemap_resolve_location (line_table, loc,
 795                                   LRK_SPELLING_LOCATION, &map);
 796   return loc == BUILTINS_LOCATION;
 797 }
 798
 799 /* Expand the source location LOC into a human readable location.  If
 800    LOC is virtual, it resolves to the expansion point of the involved
 801    macro.  If LOC resolves to a builtin location, the file name of the
 802    readable location is set to the string "<built-in>".  */
 803
 804 expanded_location
 805 expand_location (source_location loc)
 806 {
 807   return expand_location_1 (loc, /*expansion_point_p=*/true,
 808                             LOCATION_ASPECT_CARET);
 809 }
 810
 811 /* Expand the source location LOC into a human readable location.  If
 812    LOC is virtual, it resolves to the expansion location of the
 813    relevant macro.  If LOC resolves to a builtin location, the file
 814    name of the readable location is set to the string
 815    "<built-in>".  */
 816
 817 expanded_location
 818 expand_location_to_spelling_point (source_location loc)
 819 {
 820   return expand_location_1 (loc, /*expansion_point_p=*/false,
 821                             LOCATION_ASPECT_CARET);
 822 }
 823
 824 /* The rich_location class within libcpp requires a way to expand
 825    source_location instances, and relies on the client code
 826    providing a symbol named
 827      linemap_client_expand_location_to_spelling_point
 828    to do this.
 829
 830    This is the implementation for libcommon.a (all host binaries),
 831    which simply calls into expand_location_1.  */
 832
 833 expanded_location
 834 linemap_client_expand_location_to_spelling_point (source_location loc,
 835                                                   enum location_aspect aspect)
 836 {
 837   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
 838 }
 839
 840
 841 /* If LOCATION is in a system header and if it is a virtual location for
 842    a token coming from the expansion of a macro, unwind it to the
 843    location of the expansion point of the macro.  Otherwise, just return
 844    LOCATION.
 845
 846    This is used for instance when we want to emit diagnostics about a
 847    token that may be located in a macro that is itself defined in a
 848    system header, for example, for the NULL macro.  In such a case, if
 849    LOCATION were passed directly to diagnostic functions such as
 850    warning_at, the diagnostic would be suppressed (unless
 851    -Wsystem-headers).  */
 852
 853 source_location
 854 expansion_point_location_if_in_system_header (source_location location)
 855 {
 856   if (in_system_header_at (location))
 857     location = linemap_resolve_location (line_table, location,
 858                                          LRK_MACRO_EXPANSION_POINT,
 859                                          NULL);
 860   return location;
 861 }
 862
 863 /* If LOCATION is a virtual location for a token coming from the expansion
 864    of a macro, unwind to the location of the expansion point of the macro.  */
 865
 866 source_location
 867 expansion_point_location (source_location location)
 868 {
 869   return linemap_resolve_location (line_table, location,
 870                                    LRK_MACRO_EXPANSION_POINT, NULL);
 871 }
 872
 873 /* Construct a location with caret at CARET, ranging from START to
 874    finish e.g.
 875
 876                  11111111112
 877         12345678901234567890
 878      522
 879      523   return foo + bar;
 880                   ~~~~^~~~~
 881      524
 882
 883    The location's caret is at the "+", line 523 column 15, but starts
 884    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
 885    of "bar" at column 19.  */
 886
 887 location_t
 888 make_location (location_t caret, location_t start, location_t finish)
 889 {
 890   location_t pure_loc = get_pure_location (caret);
 891   source_range src_range;
 892   src_range.m_start = get_start (start);
 893   src_range.m_finish = get_finish (finish);
 894   location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
 895                                                    pure_loc,
 896                                                    src_range,
 897                                                    NULL);
 898   return combined_loc;
 899 }
 900
 901 /* Same as above, but taking a source range rather than two locations.  */
 902
 903 location_t
 904 make_location (location_t caret, source_range src_range)
 905 {
 906   location_t pure_loc = get_pure_location (caret);
 907   return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
 908 }
 909
 910 #define ONE_K 1024
 911 #define ONE_M (ONE_K * ONE_K)
 912
 913 /* Display a number as an integer multiple of either:
 914    - 1024, if said integer is >= to 10 K (in base 2)
 915    - 1024 * 1024, if said integer is >= 10 M in (base 2)
 916  */
 917 #define SCALE(x) ((unsigned long) ((x) < 10 * ONE_K \
 918                   ? (x) \
 919                   : ((x) < 10 * ONE_M \
 920                      ? (x) / ONE_K \
 921                      : (x) / ONE_M)))
 922
 923 /* For a given integer, display either:
 924    - the character 'k', if the number is higher than 10 K (in base 2)
 925      but strictly lower than 10 M (in base 2)
 926    - the character 'M' if the number is higher than 10 M (in base2)
 927    - the charcter ' ' if the number is strictly lower  than 10 K  */
 928 #define STAT_LABEL(x) ((x) < 10 * ONE_K ? ' ' : ((x) < 10 * ONE_M ? 'k' : 'M'))
 929
 930 /* Display an integer amount as multiple of 1K or 1M (in base 2).
 931    Display the correct unit (either k, M, or ' ') after the amount, as
 932    well.  */
 933 #define FORMAT_AMOUNT(size) SCALE (size), STAT_LABEL (size)
 934
 935 /* Dump statistics to stderr about the memory usage of the line_table
 936    set of line maps.  This also displays some statistics about macro
 937    expansion.  */
 938
 939 void
 940 dump_line_table_statistics (void)
 941 {
 942   struct linemap_stats s;
 943   long total_used_map_size,
 944     macro_maps_size,
 945     total_allocated_map_size;
 946
 947   memset (&s, 0, sizeof (s));
 948
 949   linemap_get_statistics (line_table, &s);
 950
 951   macro_maps_size = s.macro_maps_used_size
 952     + s.macro_maps_locations_size;
 953
 954   total_allocated_map_size = s.ordinary_maps_allocated_size
 955     + s.macro_maps_allocated_size
 956     + s.macro_maps_locations_size;
 957
 958   total_used_map_size = s.ordinary_maps_used_size
 959     + s.macro_maps_used_size
 960     + s.macro_maps_locations_size;
 961
 962   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
 963            s.num_expanded_macros);
 964   if (s.num_expanded_macros != 0)
 965     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
 966              s.num_macro_tokens / s.num_expanded_macros);
 967   fprintf (stderr,
 968            "\nLine Table allocations during the "
 969            "compilation process\n");
 970   fprintf (stderr, "Number of ordinary maps used:        %5ld%c\n",
 971            SCALE (s.num_ordinary_maps_used),
 972            STAT_LABEL (s.num_ordinary_maps_used));
 973   fprintf (stderr, "Ordinary map used size:              %5ld%c\n",
 974            SCALE (s.ordinary_maps_used_size),
 975            STAT_LABEL (s.ordinary_maps_used_size));
 976   fprintf (stderr, "Number of ordinary maps allocated:   %5ld%c\n",
 977            SCALE (s.num_ordinary_maps_allocated),
 978            STAT_LABEL (s.num_ordinary_maps_allocated));
 979   fprintf (stderr, "Ordinary maps allocated size:        %5ld%c\n",
 980            SCALE (s.ordinary_maps_allocated_size),
 981            STAT_LABEL (s.ordinary_maps_allocated_size));
 982   fprintf (stderr, "Number of macro maps used:           %5ld%c\n",
 983            SCALE (s.num_macro_maps_used),
 984            STAT_LABEL (s.num_macro_maps_used));
 985   fprintf (stderr, "Macro maps used size:                %5ld%c\n",
 986            SCALE (s.macro_maps_used_size),
 987            STAT_LABEL (s.macro_maps_used_size));
 988   fprintf (stderr, "Macro maps locations size:           %5ld%c\n",
 989            SCALE (s.macro_maps_locations_size),
 990            STAT_LABEL (s.macro_maps_locations_size));
 991   fprintf (stderr, "Macro maps size:                     %5ld%c\n",
 992            SCALE (macro_maps_size),
 993            STAT_LABEL (macro_maps_size));
 994   fprintf (stderr, "Duplicated maps locations size:      %5ld%c\n",
 995            SCALE (s.duplicated_macro_maps_locations_size),
 996            STAT_LABEL (s.duplicated_macro_maps_locations_size));
 997   fprintf (stderr, "Total allocated maps size:           %5ld%c\n",
 998            SCALE (total_allocated_map_size),
 999            STAT_LABEL (total_allocated_map_size));
1000   fprintf (stderr, "Total used maps size:                %5ld%c\n",
1001            SCALE (total_used_map_size),
1002            STAT_LABEL (total_used_map_size));
1003   fprintf (stderr, "Ad-hoc table size:                   %5ld%c\n",
1004            SCALE (s.adhoc_table_size),
1005            STAT_LABEL (s.adhoc_table_size));
1006   fprintf (stderr, "Ad-hoc table entries used:           %5ld\n",
1007            s.adhoc_table_entries_used);
1008   fprintf (stderr, "optimized_ranges: %i\n",
1009            line_table->num_optimized_ranges);
1010   fprintf (stderr, "unoptimized_ranges: %i\n",
1011            line_table->num_unoptimized_ranges);
1012
1013   fprintf (stderr, "\n");
1014 }
1015
1016 /* Get location one beyond the final location in ordinary map IDX.  */
1017
1018 static source_location
1019 get_end_location (struct line_maps *set, unsigned int idx)
1020 {
1021   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
1022     return set->highest_location;
1023
1024   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
1025   return MAP_START_LOCATION (next_map);
1026 }
1027
1028 /* Helper function for write_digit_row.  */
1029
1030 static void
1031 write_digit (FILE *stream, int digit)
1032 {
1033   fputc ('0' + (digit % 10), stream);
1034 }
1035
1036 /* Helper function for dump_location_info.
1037    Write a row of numbers to STREAM, numbering a source line,
1038    giving the units, tens, hundreds etc of the column number.  */
1039
1040 static void
1041 write_digit_row (FILE *stream, int indent,
1042                  const line_map_ordinary *map,
1043                  source_location loc, int max_col, int divisor)
1044 {
1045   fprintf (stream, "%*c", indent, ' ');
1046   fprintf (stream, "|");
1047   for (int column = 1; column < max_col; column++)
1048     {
1049       source_location column_loc = loc + (column << map->m_range_bits);
1050       write_digit (stream, column_loc / divisor);
1051     }
1052   fprintf (stream, "\n");
1053 }
1054
1055 /* Write a half-closed (START) / half-open (END) interval of
1056    source_location to STREAM.  */
1057
1058 static void
1059 dump_location_range (FILE *stream,
1060                      source_location start, source_location end)
1061 {
1062   fprintf (stream,
1063            "  source_location interval: %u <= loc < %u\n",
1064            start, end);
1065 }
1066
1067 /* Write a labelled description of a half-closed (START) / half-open (END)
1068    interval of source_location to STREAM.  */
1069
1070 static void
1071 dump_labelled_location_range (FILE *stream,
1072                               const char *name,
1073                               source_location start, source_location end)
1074 {
1075   fprintf (stream, "%s\n", name);
1076   dump_location_range (stream, start, end);
1077   fprintf (stream, "\n");
1078 }
1079
1080 /* Write a visualization of the locations in the line_table to STREAM.  */
1081
1082 void
1083 dump_location_info (FILE *stream)
1084 {
1085   /* Visualize the reserved locations.  */
1086   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1087                                 0, RESERVED_LOCATION_COUNT);
1088
1089   /* Visualize the ordinary line_map instances, rendering the sources. */
1090   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1091     {
1092       source_location end_location = get_end_location (line_table, idx);
1093       /* half-closed: doesn't include this one. */
1094
1095       const line_map_ordinary *map
1096         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1097       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1098       dump_location_range (stream,
1099                            MAP_START_LOCATION (map), end_location);
1100       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1101       fprintf (stream, "  starting at line: %i\n",
1102                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1103       fprintf (stream, "  column and range bits: %i\n",
1104                map->m_column_and_range_bits);
1105       fprintf (stream, "  column bits: %i\n",
1106                map->m_column_and_range_bits - map->m_range_bits);
1107       fprintf (stream, "  range bits: %i\n",
1108                map->m_range_bits);
1109
1110       /* Render the span of source lines that this "map" covers.  */
1111       for (source_location loc = MAP_START_LOCATION (map);
1112            loc < end_location;
1113            loc += (1 << map->m_range_bits) )
1114         {
1115           gcc_assert (pure_location_p (line_table, loc) );
1116
1117           expanded_location exploc
1118             = linemap_expand_location (line_table, map, loc);
1119
1120           if (0 == exploc.column)
1121             {
1122               /* Beginning of a new source line: draw the line.  */
1123
1124               int line_size;
1125               const char *line_text = location_get_source_line (exploc.file,
1126                                                                 exploc.line,
1127                                                                 &line_size);
1128               if (!line_text)
1129                 break;
1130               fprintf (stream,
1131                        "%s:%3i|loc:%5i|%.*s\n",
1132                        exploc.file, exploc.line,
1133                        loc,
1134                        line_size, line_text);
1135
1136               /* "loc" is at column 0, which means "the whole line".
1137                  Render the locations *within* the line, by underlining
1138                  it, showing the source_location numeric values
1139                  at each column.  */
1140               int max_col = (1 << map->m_column_and_range_bits) - 1;
1141               if (max_col > line_size)
1142                 max_col = line_size + 1;
1143
1144               int indent = 14 + strlen (exploc.file);
1145
1146               /* Thousands.  */
1147               if (end_location > 999)
1148                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1149
1150               /* Hundreds.  */
1151               if (end_location > 99)
1152                 write_digit_row (stream, indent, map, loc, max_col, 100);
1153
1154               /* Tens.  */
1155               write_digit_row (stream, indent, map, loc, max_col, 10);
1156
1157               /* Units.  */
1158               write_digit_row (stream, indent, map, loc, max_col, 1);
1159             }
1160         }
1161       fprintf (stream, "\n");
1162     }
1163
1164   /* Visualize unallocated values.  */
1165   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1166                                 line_table->highest_location,
1167                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1168
1169   /* Visualize the macro line_map instances, rendering the sources. */
1170   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1171     {
1172       /* Each macro map that is allocated owns source_location values
1173          that are *lower* that the one before them.
1174          Hence it's meaningful to view them either in order of ascending
1175          source locations, or in order of ascending macro map index.  */
1176       const bool ascending_source_locations = true;
1177       unsigned int idx = (ascending_source_locations
1178                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1179                           : i);
1180       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1181       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1182                idx,
1183                linemap_map_get_macro_name (map),
1184                MACRO_MAP_NUM_MACRO_TOKENS (map));
1185       dump_location_range (stream,
1186                            map->start_location,
1187                            (map->start_location
1188                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1189       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1190               "expansion point is location %i",
1191               MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1192       fprintf (stream, "  map->start_location: %u\n",
1193                map->start_location);
1194
1195       fprintf (stream, "  macro_locations:\n");
1196       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1197         {
1198           source_location x = MACRO_MAP_LOCATIONS (map)[2 * i];
1199           source_location y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1200
1201           /* linemap_add_macro_token encodes token numbers in an expansion
1202              by putting them after MAP_START_LOCATION. */
1203
1204           /* I'm typically seeing 4 uninitialized entries at the end of
1205              0xafafafaf.
1206              This appears to be due to macro.c:replace_args
1207              adding 2 extra args for padding tokens; presumably there may
1208              be a leading and/or trailing padding token injected,
1209              each for 2 more location slots.
1210              This would explain there being up to 4 source_locations slots
1211              that may be uninitialized.  */
1212
1213           fprintf (stream, "    %u: %u, %u\n",
1214                    i,
1215                    x,
1216                    y);
1217           if (x == y)
1218             {
1219               if (x < MAP_START_LOCATION (map))
1220                 inform (x, "token %u has x-location == y-location == %u", i, x);
1221               else
1222                 fprintf (stream,
1223                          "x-location == y-location == %u encodes token # %u\n",
1224                          x, x - MAP_START_LOCATION (map));
1225                 }
1226           else
1227             {
1228               inform (x, "token %u has x-location == %u", i, x);
1229               inform (x, "token %u has y-location == %u", i, y);
1230             }
1231         }
1232       fprintf (stream, "\n");
1233     }
1234
1235   /* It appears that MAX_SOURCE_LOCATION itself is never assigned to a
1236      macro map, presumably due to an off-by-one error somewhere
1237      between the logic in linemap_enter_macro and
1238      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1239   dump_labelled_location_range (stream, "MAX_SOURCE_LOCATION",
1240                                 MAX_SOURCE_LOCATION,
1241                                 MAX_SOURCE_LOCATION + 1);
1242
1243   /* Visualize ad-hoc values.  */
1244   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1245                                 MAX_SOURCE_LOCATION + 1, UINT_MAX);
1246 }
1247
1248 /* string_concat's constructor.  */
1249
1250 string_concat::string_concat (int num, location_t *locs)
1251   : m_num (num)
1252 {
1253   m_locs = ggc_vec_alloc <location_t> (num);
1254   for (int i = 0; i < num; i++)
1255     m_locs[i] = locs[i];
1256 }
1257
1258 /* string_concat_db's constructor.  */
1259
1260 string_concat_db::string_concat_db ()
1261 {
1262   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1263 }
1264
1265 /* Record that a string concatenation occurred, covering NUM
1266    string literal tokens.  LOCS is an array of size NUM, containing the
1267    locations of the tokens.  A copy of LOCS is taken.  */
1268
1269 void
1270 string_concat_db::record_string_concatenation (int num, location_t *locs)
1271 {
1272   gcc_assert (num > 1);
1273   gcc_assert (locs);
1274
1275   location_t key_loc = get_key_loc (locs[0]);
1276
1277   string_concat *concat
1278     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1279   m_table->put (key_loc, concat);
1280 }
1281
1282 /* Determine if LOC was the location of the the initial token of a
1283    concatenation of string literal tokens.
1284    If so, *OUT_NUM is written to with the number of tokens, and
1285    *OUT_LOCS with the location of an array of locations of the
1286    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1287    storage owned by the string_concat_db.
1288    Otherwise, return false.  */
1289
1290 bool
1291 string_concat_db::get_string_concatenation (location_t loc,
1292                                             int *out_num,
1293                                             location_t **out_locs)
1294 {
1295   gcc_assert (out_num);
1296   gcc_assert (out_locs);
1297
1298   location_t key_loc = get_key_loc (loc);
1299
1300   string_concat **concat = m_table->get (key_loc);
1301   if (!concat)
1302     return false;
1303
1304   *out_num = (*concat)->m_num;
1305   *out_locs =(*concat)->m_locs;
1306   return true;
1307 }
1308
1309 /* Internal function.  Canonicalize LOC into a form suitable for
1310    use as a key within the database, stripping away macro expansion,
1311    ad-hoc information, and range information, using the location of
1312    the start of LOC within an ordinary linemap.  */
1313
1314 location_t
1315 string_concat_db::get_key_loc (location_t loc)
1316 {
1317   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1318                                   NULL);
1319
1320   loc = get_range_from_loc (line_table, loc).m_start;
1321
1322   return loc;
1323 }
1324
1325 /* Helper class for use within get_substring_ranges_for_loc.
1326    An vec of cpp_string with responsibility for releasing all of the
1327    str->text for each str in the vector.  */
1328
1329 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1330 {
1331  public:
1332   auto_cpp_string_vec (int alloc)
1333     : auto_vec <cpp_string> (alloc) {}
1334
1335   ~auto_cpp_string_vec ()
1336   {
1337     /* Clean up the copies within this vec.  */
1338     int i;
1339     cpp_string *str;
1340     FOR_EACH_VEC_ELT (*this, i, str)
1341       free (const_cast <unsigned char *> (str->text));
1342   }
1343 };
1344
1345 /* Attempt to populate RANGES with source location information on the
1346    individual characters within the string literal found at STRLOC.
1347    If CONCATS is non-NULL, then any string literals that the token at
1348    STRLOC  was concatenated with are also added to RANGES.
1349
1350    Return NULL if successful, or an error message if any errors occurred (in
1351    which case RANGES may be only partially populated and should not
1352    be used).
1353
1354    This is implemented by re-parsing the relevant source line(s).  */
1355
1356 static const char *
1357 get_substring_ranges_for_loc (cpp_reader *pfile,
1358                               string_concat_db *concats,
1359                               location_t strloc,
1360                               enum cpp_ttype type,
1361                               cpp_substring_ranges &ranges)
1362 {
1363   gcc_assert (pfile);
1364
1365   if (strloc == UNKNOWN_LOCATION)
1366     return "unknown location";
1367
1368   /* Reparsing the strings requires accurate location information.
1369      If -ftrack-macro-expansion has been overridden from its default
1370      of 2, then we might have a location of a macro expansion point,
1371      rather than the location of the literal itself.
1372      Avoid this by requiring that we have full macro expansion tracking
1373      for substring locations to be available.  */
1374   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1375     return "track_macro_expansion != 2";
1376
1377   /* If #line or # 44 "file"-style directives are present, then there's
1378      no guarantee that the line numbers we have can be used to locate
1379      the strings.  For example, we might have a .i file with # directives
1380      pointing back to lines within a .c file, but the .c file might
1381      have been edited since the .i file was created.
1382      In such a case, the safest course is to disable on-demand substring
1383      locations.  */
1384   if (line_table->seen_line_directive)
1385     return "seen line directive";
1386
1387   /* If string concatenation has occurred at STRLOC, get the locations
1388      of all of the literal tokens making up the compound string.
1389      Otherwise, just use STRLOC.  */
1390   int num_locs = 1;
1391   location_t *strlocs = &strloc;
1392   if (concats)
1393     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1394
1395   auto_cpp_string_vec strs (num_locs);
1396   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1397   for (int i = 0; i < num_locs; i++)
1398     {
1399       /* Get range of strloc.  We will use it to locate the start and finish
1400          of the literal token within the line.  */
1401       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1402
1403       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1404         /* If the string is within a macro expansion, we can't get at the
1405            end location.  */
1406         return "macro expansion";
1407
1408       if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1409         /* If so, we can't reliably determine where the token started within
1410            its line.  */
1411         return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1412
1413       if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1414         /* If so, we can't reliably determine where the token finished within
1415            its line.  */
1416         return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1417
1418       expanded_location start
1419         = expand_location_to_spelling_point (src_range.m_start);
1420       expanded_location finish
1421         = expand_location_to_spelling_point (src_range.m_finish);
1422       if (start.file != finish.file)
1423         return "range endpoints are in different files";
1424       if (start.line != finish.line)
1425         return "range endpoints are on different lines";
1426       if (start.column > finish.column)
1427         return "range endpoints are reversed";
1428
1429       int line_width;
1430       const char *line = location_get_source_line (start.file, start.line,
1431                                                    &line_width);
1432       if (line == NULL)
1433         return "unable to read source line";
1434
1435       /* Determine the location of the literal (including quotes
1436          and leading prefix chars, such as the 'u' in a u""
1437          token).  */
1438       const char *literal = line + start.column - 1;
1439       int literal_length = finish.column - start.column + 1;
1440
1441       /* Ensure that we don't crash if we got the wrong location.  */
1442       if (line_width < (start.column - 1 + literal_length))
1443         return "line is not wide enough";
1444
1445       cpp_string from;
1446       from.len = literal_length;
1447       /* Make a copy of the literal, to avoid having to rely on
1448          the lifetime of the copy of the line within the cache.
1449          This will be released by the auto_cpp_string_vec dtor.  */
1450       from.text = XDUPVEC (unsigned char, literal, literal_length);
1451       strs.safe_push (from);
1452
1453       /* For very long lines, a new linemap could have started
1454          halfway through the token.
1455          Ensure that the loc_reader uses the linemap of the
1456          *end* of the token for its start location.  */
1457       const line_map_ordinary *final_ord_map;
1458       linemap_resolve_location (line_table, src_range.m_finish,
1459                                 LRK_MACRO_EXPANSION_POINT, &final_ord_map);
1460       location_t start_loc
1461         = linemap_position_for_line_and_column (line_table, final_ord_map,
1462                                                 start.line, start.column);
1463
1464       cpp_string_location_reader loc_reader (start_loc, line_table);
1465       loc_readers.safe_push (loc_reader);
1466     }
1467
1468   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1469   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1470                                                  loc_readers.address (),
1471                                                  num_locs, &ranges, type);
1472   if (err)
1473     return err;
1474
1475   /* Success: "ranges" should now contain information on the string.  */
1476   return NULL;
1477 }
1478
1479 /* Attempt to populate *OUT_LOC with source location information on the
1480    given characters within the string literal found at STRLOC.
1481    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1482    character set.
1483
1484    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1485    and string literal "012345\n789"
1486    *OUT_LOC is written to with:
1487      "012345\n789"
1488          ~^~~~~
1489
1490    If CONCATS is non-NULL, then any string literals that the token at
1491    STRLOC was concatenated with are also considered.
1492
1493    This is implemented by re-parsing the relevant source line(s).
1494
1495    Return NULL if successful, or an error message if any errors occurred.
1496    Error messages are intended for GCC developers (to help debugging) rather
1497    than for end-users.  */
1498
1499 const char *
1500 get_source_location_for_substring (cpp_reader *pfile,
1501                                    string_concat_db *concats,
1502                                    location_t strloc,
1503                                    enum cpp_ttype type,
1504                                    int caret_idx, int start_idx, int end_idx,
1505                                    source_location *out_loc)
1506 {
1507   gcc_checking_assert (caret_idx >= 0);
1508   gcc_checking_assert (start_idx >= 0);
1509   gcc_checking_assert (end_idx >= 0);
1510   gcc_assert (out_loc);
1511
1512   cpp_substring_ranges ranges;
1513   const char *err
1514     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1515   if (err)
1516     return err;
1517
1518   if (caret_idx >= ranges.get_num_ranges ())
1519     return "caret_idx out of range";
1520   if (start_idx >= ranges.get_num_ranges ())
1521     return "start_idx out of range";
1522   if (end_idx >= ranges.get_num_ranges ())
1523     return "end_idx out of range";
1524
1525   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1526                             ranges.get_range (start_idx).m_start,
1527                             ranges.get_range (end_idx).m_finish);
1528   return NULL;
1529 }
1530
1531 #if CHECKING_P
1532
1533 namespace selftest {
1534
1535 /* Selftests of location handling.  */
1536
1537 /* Attempt to populate *OUT_RANGE with source location information on the
1538    given character within the string literal found at STRLOC.
1539    CHAR_IDX refers to an offset within the execution character set.
1540    If CONCATS is non-NULL, then any string literals that the token at
1541    STRLOC was concatenated with are also considered.
1542
1543    This is implemented by re-parsing the relevant source line(s).
1544
1545    Return NULL if successful, or an error message if any errors occurred.
1546    Error messages are intended for GCC developers (to help debugging) rather
1547    than for end-users.  */
1548
1549 static const char *
1550 get_source_range_for_char (cpp_reader *pfile,
1551                            string_concat_db *concats,
1552                            location_t strloc,
1553                            enum cpp_ttype type,
1554                            int char_idx,
1555                            source_range *out_range)
1556 {
1557   gcc_checking_assert (char_idx >= 0);
1558   gcc_assert (out_range);
1559
1560   cpp_substring_ranges ranges;
1561   const char *err
1562     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1563   if (err)
1564     return err;
1565
1566   if (char_idx >= ranges.get_num_ranges ())
1567     return "char_idx out of range";
1568
1569   *out_range = ranges.get_range (char_idx);
1570   return NULL;
1571 }
1572
1573 /* As get_source_range_for_char, but write to *OUT the number
1574    of ranges that are available.  */
1575
1576 static const char *
1577 get_num_source_ranges_for_substring (cpp_reader *pfile,
1578                                      string_concat_db *concats,
1579                                      location_t strloc,
1580                                      enum cpp_ttype type,
1581                                      int *out)
1582 {
1583   gcc_assert (out);
1584
1585   cpp_substring_ranges ranges;
1586   const char *err
1587     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1588
1589   if (err)
1590     return err;
1591
1592   *out = ranges.get_num_ranges ();
1593   return NULL;
1594 }
1595
1596 /* Selftests of location handling.  */
1597
1598 /* Helper function for verifying location data: when location_t
1599    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1600    as having column 0.  */
1601
1602 static bool
1603 should_have_column_data_p (location_t loc)
1604 {
1605   if (IS_ADHOC_LOC (loc))
1606     loc = get_location_from_adhoc_loc (line_table, loc);
1607   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1608     return false;
1609   return true;
1610 }
1611
1612 /* Selftest for should_have_column_data_p.  */
1613
1614 static void
1615 test_should_have_column_data_p ()
1616 {
1617   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1618   ASSERT_TRUE
1619     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1620   ASSERT_FALSE
1621     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1622 }
1623
1624 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1625    on LOC.  */
1626
1627 static void
1628 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1629               location_t loc)
1630 {
1631   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1632   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1633   /* If location_t values are sufficiently high, then column numbers
1634      will be unavailable and LOCATION_COLUMN (loc) will be 0.
1635      When close to the threshold, column numbers *may* be present: if
1636      the final linemap before the threshold contains a line that straddles
1637      the threshold, locations in that line have column information.  */
1638   if (should_have_column_data_p (loc))
1639     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1640 }
1641
1642 /* Various selftests involve constructing a line table and one or more
1643    line maps within it.
1644
1645    For maximum test coverage we want to run these tests with a variety
1646    of situations:
1647    - line_table->default_range_bits: some frontends use a non-zero value
1648    and others use zero
1649    - the fallback modes within line-map.c: there are various threshold
1650    values for source_location/location_t beyond line-map.c changes
1651    behavior (disabling of the range-packing optimization, disabling
1652    of column-tracking).  We can exercise these by starting the line_table
1653    at interesting values at or near these thresholds.
1654
1655    The following struct describes a particular case within our test
1656    matrix.  */
1657
1658 struct line_table_case
1659 {
1660   line_table_case (int default_range_bits, int base_location)
1661   : m_default_range_bits (default_range_bits),
1662     m_base_location (base_location)
1663   {}
1664
1665   int m_default_range_bits;
1666   int m_base_location;
1667 };
1668
1669 /* Constructor.  Store the old value of line_table, and create a new
1670    one, using sane defaults.  */
1671
1672 line_table_test::line_table_test ()
1673 {
1674   gcc_assert (saved_line_table == NULL);
1675   saved_line_table = line_table;
1676   line_table = ggc_alloc<line_maps> ();
1677   linemap_init (line_table, BUILTINS_LOCATION);
1678   gcc_assert (saved_line_table->reallocator);
1679   line_table->reallocator = saved_line_table->reallocator;
1680   gcc_assert (saved_line_table->round_alloc_size);
1681   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1682   line_table->default_range_bits = 0;
1683 }
1684
1685 /* Constructor.  Store the old value of line_table, and create a new
1686    one, using the sitation described in CASE_.  */
1687
1688 line_table_test::line_table_test (const line_table_case &case_)
1689 {
1690   gcc_assert (saved_line_table == NULL);
1691   saved_line_table = line_table;
1692   line_table = ggc_alloc<line_maps> ();
1693   linemap_init (line_table, BUILTINS_LOCATION);
1694   gcc_assert (saved_line_table->reallocator);
1695   line_table->reallocator = saved_line_table->reallocator;
1696   gcc_assert (saved_line_table->round_alloc_size);
1697   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1698   line_table->default_range_bits = case_.m_default_range_bits;
1699   if (case_.m_base_location)
1700     {
1701       line_table->highest_location = case_.m_base_location;
1702       line_table->highest_line = case_.m_base_location;
1703     }
1704 }
1705
1706 /* Destructor.  Restore the old value of line_table.  */
1707
1708 line_table_test::~line_table_test ()
1709 {
1710   gcc_assert (saved_line_table != NULL);
1711   line_table = saved_line_table;
1712   saved_line_table = NULL;
1713 }
1714
1715 /* Verify basic operation of ordinary linemaps.  */
1716
1717 static void
1718 test_accessing_ordinary_linemaps (const line_table_case &case_)
1719 {
1720   line_table_test ltt (case_);
1721
1722   /* Build a simple linemap describing some locations. */
1723   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1724
1725   linemap_line_start (line_table, 1, 100);
1726   location_t loc_a = linemap_position_for_column (line_table, 1);
1727   location_t loc_b = linemap_position_for_column (line_table, 23);
1728
1729   linemap_line_start (line_table, 2, 100);
1730   location_t loc_c = linemap_position_for_column (line_table, 1);
1731   location_t loc_d = linemap_position_for_column (line_table, 17);
1732
1733   /* Example of a very long line.  */
1734   linemap_line_start (line_table, 3, 2000);
1735   location_t loc_e = linemap_position_for_column (line_table, 700);
1736
1737   /* Transitioning back to a short line.  */
1738   linemap_line_start (line_table, 4, 0);
1739   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1740
1741   if (should_have_column_data_p (loc_back_to_short))
1742     {
1743       /* Verify that we switched to short lines in the linemap.  */
1744       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1745       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1746     }
1747
1748   /* Example of a line that will eventually be seen to be longer
1749      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1750      below that.  */
1751   linemap_line_start (line_table, 5, 2000);
1752
1753   location_t loc_start_of_very_long_line
1754     = linemap_position_for_column (line_table, 2000);
1755   location_t loc_too_wide
1756     = linemap_position_for_column (line_table, 4097);
1757   location_t loc_too_wide_2
1758     = linemap_position_for_column (line_table, 4098);
1759
1760   /* ...and back to a sane line length.  */
1761   linemap_line_start (line_table, 6, 100);
1762   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1763
1764   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1765
1766   /* Multiple files.  */
1767   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1768   linemap_line_start (line_table, 1, 200);
1769   location_t loc_f = linemap_position_for_column (line_table, 150);
1770   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1771
1772   /* Verify that we can recover the location info.  */
1773   assert_loceq ("foo.c", 1, 1, loc_a);
1774   assert_loceq ("foo.c", 1, 23, loc_b);
1775   assert_loceq ("foo.c", 2, 1, loc_c);
1776   assert_loceq ("foo.c", 2, 17, loc_d);
1777   assert_loceq ("foo.c", 3, 700, loc_e);
1778   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1779
1780   /* In the very wide line, the initial location should be fully tracked.  */
1781   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1782   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1783      be disabled.  */
1784   assert_loceq ("foo.c", 5, 0, loc_too_wide);
1785   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1786   /*...and column-tracking should be re-enabled for subsequent lines.  */
1787   assert_loceq ("foo.c", 6, 10, loc_sane_again);
1788
1789   assert_loceq ("bar.c", 1, 150, loc_f);
1790
1791   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1792   ASSERT_TRUE (pure_location_p (line_table, loc_a));
1793
1794   /* Verify using make_location to build a range, and extracting data
1795      back from it.  */
1796   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1797   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1798   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1799   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1800   ASSERT_EQ (loc_b, src_range.m_start);
1801   ASSERT_EQ (loc_d, src_range.m_finish);
1802 }
1803
1804 /* Verify various properties of UNKNOWN_LOCATION.  */
1805
1806 static void
1807 test_unknown_location ()
1808 {
1809   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1810   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1811   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1812 }
1813
1814 /* Verify various properties of BUILTINS_LOCATION.  */
1815
1816 static void
1817 test_builtins ()
1818 {
1819   assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1820   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1821 }
1822
1823 /* Regression test for make_location.
1824    Ensure that we use pure locations for the start/finish of the range,
1825    rather than storing a packed or ad-hoc range as the start/finish.  */
1826
1827 static void
1828 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1829 {
1830   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1831      with C++ frontend.
1832      ....................0000000001111111111222.
1833      ....................1234567890123456789012.  */
1834   const char *content = "     r += !aaa == bbb;\n";
1835   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1836   line_table_test ltt (case_);
1837   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1838
1839   const location_t c11 = linemap_position_for_column (line_table, 11);
1840   const location_t c12 = linemap_position_for_column (line_table, 12);
1841   const location_t c13 = linemap_position_for_column (line_table, 13);
1842   const location_t c14 = linemap_position_for_column (line_table, 14);
1843   const location_t c21 = linemap_position_for_column (line_table, 21);
1844
1845   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1846     return;
1847
1848   /* Use column 13 for the caret location, arbitrarily, to verify that we
1849      handle start != caret.  */
1850   const location_t aaa = make_location (c13, c12, c14);
1851   ASSERT_EQ (c13, get_pure_location (aaa));
1852   ASSERT_EQ (c12, get_start (aaa));
1853   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1854   ASSERT_EQ (c14, get_finish (aaa));
1855   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1856
1857   /* Make a location using a location with a range as the start-point.  */
1858   const location_t not_aaa = make_location (c11, aaa, c14);
1859   ASSERT_EQ (c11, get_pure_location (not_aaa));
1860   /* It should use the start location of the range, not store the range
1861      itself.  */
1862   ASSERT_EQ (c12, get_start (not_aaa));
1863   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1864   ASSERT_EQ (c14, get_finish (not_aaa));
1865   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1866
1867   /* Similarly, make a location with a range as the end-point.  */
1868   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1869   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1870   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1871   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1872   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1873   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1874   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1875   /* It should use the finish location of the range, not store the range
1876      itself.  */
1877   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1878   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1879   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1880   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1881   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1882 }
1883
1884 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
1885
1886 static void
1887 test_reading_source_line ()
1888 {
1889   /* Create a tempfile and write some text to it.  */
1890   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1891                         "01234567890123456789\n"
1892                         "This is the test text\n"
1893                         "This is the 3rd line");
1894
1895   /* Read back a specific line from the tempfile.  */
1896   int line_size;
1897   const char *source_line = location_get_source_line (tmp.get_filename (),
1898                                                       3, &line_size);
1899   ASSERT_TRUE (source_line != NULL);
1900   ASSERT_EQ (20, line_size);
1901   ASSERT_TRUE (!strncmp ("This is the 3rd line",
1902                          source_line, line_size));
1903
1904   source_line = location_get_source_line (tmp.get_filename (),
1905                                           2, &line_size);
1906   ASSERT_TRUE (source_line != NULL);
1907   ASSERT_EQ (21, line_size);
1908   ASSERT_TRUE (!strncmp ("This is the test text",
1909                          source_line, line_size));
1910
1911   source_line = location_get_source_line (tmp.get_filename (),
1912                                           4, &line_size);
1913   ASSERT_TRUE (source_line == NULL);
1914 }
1915
1916 /* Tests of lexing.  */
1917
1918 /* Verify that token TOK from PARSER has cpp_token_as_text
1919    equal to EXPECTED_TEXT.  */
1920
1921 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
1922   SELFTEST_BEGIN_STMT                                                   \
1923     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
1924     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
1925   SELFTEST_END_STMT
1926
1927 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1928    and ranges from EXP_START_COL to EXP_FINISH_COL.
1929    Use LOC as the effective location of the selftest.  */
1930
1931 static void
1932 assert_token_loc_eq (const location &loc,
1933                      const cpp_token *tok,
1934                      const char *exp_filename, int exp_linenum,
1935                      int exp_start_col, int exp_finish_col)
1936 {
1937   location_t tok_loc = tok->src_loc;
1938   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1939   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1940
1941   /* If location_t values are sufficiently high, then column numbers
1942      will be unavailable.  */
1943   if (!should_have_column_data_p (tok_loc))
1944     return;
1945
1946   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1947   source_range tok_range = get_range_from_loc (line_table, tok_loc);
1948   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1949   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1950 }
1951
1952 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1953    SELFTEST_LOCATION as the effective location of the selftest.  */
1954
1955 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1956                             EXP_START_COL, EXP_FINISH_COL) \
1957   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1958                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1959
1960 /* Test of lexing a file using libcpp, verifying tokens and their
1961    location information.  */
1962
1963 static void
1964 test_lexer (const line_table_case &case_)
1965 {
1966   /* Create a tempfile and write some text to it.  */
1967   const char *content =
1968     /*00000000011111111112222222222333333.3333444444444.455555555556
1969       12345678901234567890123456789012345.6789012345678.901234567890.  */
1970     ("test_name /* c-style comment */\n"
1971      "                                  \"test literal\"\n"
1972      " // test c++-style comment\n"
1973      "   42\n");
1974   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1975
1976   line_table_test ltt (case_);
1977
1978   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1979
1980   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1981   ASSERT_NE (fname, NULL);
1982
1983   /* Verify that we get the expected tokens back, with the correct
1984      location information.  */
1985
1986   location_t loc;
1987   const cpp_token *tok;
1988   tok = cpp_get_token_with_location (parser, &loc);
1989   ASSERT_NE (tok, NULL);
1990   ASSERT_EQ (tok->type, CPP_NAME);
1991   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1992   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1993
1994   tok = cpp_get_token_with_location (parser, &loc);
1995   ASSERT_NE (tok, NULL);
1996   ASSERT_EQ (tok->type, CPP_STRING);
1997   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1998   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1999
2000   tok = cpp_get_token_with_location (parser, &loc);
2001   ASSERT_NE (tok, NULL);
2002   ASSERT_EQ (tok->type, CPP_NUMBER);
2003   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2004   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
2005
2006   tok = cpp_get_token_with_location (parser, &loc);
2007   ASSERT_NE (tok, NULL);
2008   ASSERT_EQ (tok->type, CPP_EOF);
2009
2010   cpp_finish (parser, NULL);
2011   cpp_destroy (parser);
2012 }
2013
2014 /* Forward decls.  */
2015
2016 struct lexer_test;
2017 class lexer_test_options;
2018
2019 /* A class for specifying options of a lexer_test.
2020    The "apply" vfunc is called during the lexer_test constructor.  */
2021
2022 class lexer_test_options
2023 {
2024  public:
2025   virtual void apply (lexer_test &) = 0;
2026 };
2027
2028 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2029    in its dtor.
2030
2031    This is needed by struct lexer_test to ensure that the cleanup of the
2032    cpp_reader happens *after* the cleanup of the temp_source_file.  */
2033
2034 class cpp_reader_ptr
2035 {
2036  public:
2037   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2038
2039   ~cpp_reader_ptr ()
2040   {
2041     cpp_finish (m_ptr, NULL);
2042     cpp_destroy (m_ptr);
2043   }
2044
2045   operator cpp_reader * () const { return m_ptr; }
2046
2047  private:
2048   cpp_reader *m_ptr;
2049 };
2050
2051 /* A struct for writing lexer tests.  */
2052
2053 struct lexer_test
2054 {
2055   lexer_test (const line_table_case &case_, const char *content,
2056               lexer_test_options *options);
2057   ~lexer_test ();
2058
2059   const cpp_token *get_token ();
2060
2061   /* The ordering of these fields matters.
2062      The line_table_test must be first, since the cpp_reader_ptr
2063      uses it.
2064      The cpp_reader must be cleaned up *after* the temp_source_file
2065      since the filenames in input.c's input cache are owned by the
2066      cpp_reader; in particular, when ~temp_source_file evicts the
2067      filename the filenames must still be alive.  */
2068   line_table_test m_ltt;
2069   cpp_reader_ptr m_parser;
2070   temp_source_file m_tempfile;
2071   string_concat_db m_concats;
2072   bool m_implicitly_expect_EOF;
2073 };
2074
2075 /* Use an EBCDIC encoding for the execution charset, specifically
2076    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2077
2078    This exercises iconv integration within libcpp.
2079    Not every build of iconv supports the given charset,
2080    so we need to flag this error and handle it gracefully.  */
2081
2082 class ebcdic_execution_charset : public lexer_test_options
2083 {
2084  public:
2085   ebcdic_execution_charset () : m_num_iconv_errors (0)
2086     {
2087       gcc_assert (s_singleton == NULL);
2088       s_singleton = this;
2089     }
2090   ~ebcdic_execution_charset ()
2091     {
2092       gcc_assert (s_singleton == this);
2093       s_singleton = NULL;
2094     }
2095
2096   void apply (lexer_test &test) FINAL OVERRIDE
2097   {
2098     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2099     cpp_opts->narrow_charset = "IBM1047";
2100
2101     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2102     callbacks->error = on_error;
2103   }
2104
2105   static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2106                         int level ATTRIBUTE_UNUSED,
2107                         int reason ATTRIBUTE_UNUSED,
2108                         rich_location *richloc ATTRIBUTE_UNUSED,
2109                         const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2110     ATTRIBUTE_FPTR_PRINTF(5,0)
2111   {
2112     gcc_assert (s_singleton);
2113     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2114     const char *msg = "conversion from %s to %s not supported by iconv";
2115 #ifdef ENABLE_NLS
2116     msg = dgettext ("cpplib", msg);
2117 #endif
2118     /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2119        when the local iconv build doesn't support the conversion.  */
2120     if (strcmp (msgid, msg) == 0)
2121       {
2122         s_singleton->m_num_iconv_errors++;
2123         return true;
2124       }
2125
2126     /* Otherwise, we have an unexpected error.  */
2127     abort ();
2128   }
2129
2130   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2131
2132  private:
2133   static ebcdic_execution_charset *s_singleton;
2134   int m_num_iconv_errors;
2135 };
2136
2137 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2138
2139 /* A lexer_test_options subclass that records a list of error
2140    messages emitted by the lexer.  */
2141
2142 class lexer_error_sink : public lexer_test_options
2143 {
2144  public:
2145   lexer_error_sink ()
2146   {
2147     gcc_assert (s_singleton == NULL);
2148     s_singleton = this;
2149   }
2150   ~lexer_error_sink ()
2151   {
2152     gcc_assert (s_singleton == this);
2153     s_singleton = NULL;
2154
2155     int i;
2156     char *str;
2157     FOR_EACH_VEC_ELT (m_errors, i, str)
2158       free (str);
2159   }
2160
2161   void apply (lexer_test &test) FINAL OVERRIDE
2162   {
2163     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2164     callbacks->error = on_error;
2165   }
2166
2167   static bool on_error (cpp_reader *pfile ATTRIBUTE_UNUSED,
2168                         int level ATTRIBUTE_UNUSED,
2169                         int reason ATTRIBUTE_UNUSED,
2170                         rich_location *richloc ATTRIBUTE_UNUSED,
2171                         const char *msgid, va_list *ap)
2172     ATTRIBUTE_FPTR_PRINTF(5,0)
2173   {
2174     char *msg = xvasprintf (msgid, *ap);
2175     s_singleton->m_errors.safe_push (msg);
2176     return true;
2177   }
2178
2179   auto_vec<char *> m_errors;
2180
2181  private:
2182   static lexer_error_sink *s_singleton;
2183 };
2184
2185 lexer_error_sink *lexer_error_sink::s_singleton;
2186
2187 /* Constructor.  Override line_table with a new instance based on CASE_,
2188    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2189    start parsing the tempfile.  */
2190
2191 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2192                         lexer_test_options *options)
2193 : m_ltt (case_),
2194   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2195   /* Create a tempfile and write the text to it.  */
2196   m_tempfile (SELFTEST_LOCATION, ".c", content),
2197   m_concats (),
2198   m_implicitly_expect_EOF (true)
2199 {
2200   if (options)
2201     options->apply (*this);
2202
2203   cpp_init_iconv (m_parser);
2204
2205   /* Parse the file.  */
2206   const char *fname = cpp_read_main_file (m_parser,
2207                                           m_tempfile.get_filename ());
2208   ASSERT_NE (fname, NULL);
2209 }
2210
2211 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2212
2213 lexer_test::~lexer_test ()
2214 {
2215   location_t loc;
2216   const cpp_token *tok;
2217
2218   if (m_implicitly_expect_EOF)
2219     {
2220       tok = cpp_get_token_with_location (m_parser, &loc);
2221       ASSERT_NE (tok, NULL);
2222       ASSERT_EQ (tok->type, CPP_EOF);
2223     }
2224 }
2225
2226 /* Get the next token from m_parser.  */
2227
2228 const cpp_token *
2229 lexer_test::get_token ()
2230 {
2231   location_t loc;
2232   const cpp_token *tok;
2233
2234   tok = cpp_get_token_with_location (m_parser, &loc);
2235   ASSERT_NE (tok, NULL);
2236   return tok;
2237 }
2238
2239 /* Verify that locations within string literals are correctly handled.  */
2240
2241 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2242    using the string concatenation database for TEST.
2243
2244    Assert that the character at index IDX is on EXPECTED_LINE,
2245    and that it begins at column EXPECTED_START_COL and ends at
2246    EXPECTED_FINISH_COL (unless the locations are beyond
2247    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2248    columns).  */
2249
2250 static void
2251 assert_char_at_range (const location &loc,
2252                       lexer_test& test,
2253                       location_t strloc, enum cpp_ttype type, int idx,
2254                       int expected_line, int expected_start_col,
2255                       int expected_finish_col)
2256 {
2257   cpp_reader *pfile = test.m_parser;
2258   string_concat_db *concats = &test.m_concats;
2259
2260   source_range actual_range = source_range();
2261   const char *err
2262     = get_source_range_for_char (pfile, concats, strloc, type, idx,
2263                                  &actual_range);
2264   if (should_have_column_data_p (strloc))
2265     ASSERT_EQ_AT (loc, NULL, err);
2266   else
2267     {
2268       ASSERT_STREQ_AT (loc,
2269                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2270                        err);
2271       return;
2272     }
2273
2274   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2275   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2276   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2277   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2278
2279   if (should_have_column_data_p (actual_range.m_start))
2280     {
2281       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2282       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2283     }
2284   if (should_have_column_data_p (actual_range.m_finish))
2285     {
2286       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2287       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2288     }
2289 }
2290
2291 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2292    the effective location of any errors.  */
2293
2294 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2295                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2296   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2297                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2298                         (EXPECTED_FINISH_COL))
2299
2300 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2301    using the string concatenation database for TEST.
2302
2303    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2304
2305 static void
2306 assert_num_substring_ranges (const location &loc,
2307                              lexer_test& test,
2308                              location_t strloc,
2309                              enum cpp_ttype type,
2310                              int expected_num_ranges)
2311 {
2312   cpp_reader *pfile = test.m_parser;
2313   string_concat_db *concats = &test.m_concats;
2314
2315   int actual_num_ranges = -1;
2316   const char *err
2317     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2318                                            &actual_num_ranges);
2319   if (should_have_column_data_p (strloc))
2320     ASSERT_EQ_AT (loc, NULL, err);
2321   else
2322     {
2323       ASSERT_STREQ_AT (loc,
2324                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2325                        err);
2326       return;
2327     }
2328   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2329 }
2330
2331 /* Macro for calling assert_num_substring_ranges, supplying
2332    SELFTEST_LOCATION for the effective location of any errors.  */
2333
2334 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2335                                     EXPECTED_NUM_RANGES)                \
2336   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2337                                (TYPE), (EXPECTED_NUM_RANGES))
2338
2339
2340 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2341    returns an error (using the string concatenation database for TEST).  */
2342
2343 static void
2344 assert_has_no_substring_ranges (const location &loc,
2345                                 lexer_test& test,
2346                                 location_t strloc,
2347                                 enum cpp_ttype type,
2348                                 const char *expected_err)
2349 {
2350   cpp_reader *pfile = test.m_parser;
2351   string_concat_db *concats = &test.m_concats;
2352   cpp_substring_ranges ranges;
2353   const char *actual_err
2354     = get_substring_ranges_for_loc (pfile, concats, strloc,
2355                                     type, ranges);
2356   if (should_have_column_data_p (strloc))
2357     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2358   else
2359     ASSERT_STREQ_AT (loc,
2360                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2361                      actual_err);
2362 }
2363
2364 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2365     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2366                                     (STRLOC), (TYPE), (ERR))
2367
2368 /* Lex a simple string literal.  Verify the substring location data, before
2369    and after running cpp_interpret_string on it.  */
2370
2371 static void
2372 test_lexer_string_locations_simple (const line_table_case &case_)
2373 {
2374   /* Digits 0-9 (with 0 at column 10), the simple way.
2375      ....................000000000.11111111112.2222222223333333333
2376      ....................123456789.01234567890.1234567890123456789
2377      We add a trailing comment to ensure that we correctly locate
2378      the end of the string literal token.  */
2379   const char *content = "        \"0123456789\" /* not a string */\n";
2380   lexer_test test (case_, content, NULL);
2381
2382   /* Verify that we get the expected token back, with the correct
2383      location information.  */
2384   const cpp_token *tok = test.get_token ();
2385   ASSERT_EQ (tok->type, CPP_STRING);
2386   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2387   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2388
2389   /* At this point in lexing, the quote characters are treated as part of
2390      the string (they are stripped off by cpp_interpret_string).  */
2391
2392   ASSERT_EQ (tok->val.str.len, 12);
2393
2394   /* Verify that cpp_interpret_string works.  */
2395   cpp_string dst_string;
2396   const enum cpp_ttype type = CPP_STRING;
2397   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2398                                       &dst_string, type);
2399   ASSERT_TRUE (result);
2400   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2401   free (const_cast <unsigned char *> (dst_string.text));
2402
2403   /* Verify ranges of individual characters.  This no longer includes the
2404      opening quote, but does include the closing quote.  */
2405   for (int i = 0; i <= 10; i++)
2406     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2407                           10 + i, 10 + i);
2408
2409   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2410 }
2411
2412 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2413    encoding.  */
2414
2415 static void
2416 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2417 {
2418   /* EBCDIC support requires iconv.  */
2419   if (!HAVE_ICONV)
2420     return;
2421
2422   /* Digits 0-9 (with 0 at column 10), the simple way.
2423      ....................000000000.11111111112.2222222223333333333
2424      ....................123456789.01234567890.1234567890123456789
2425      We add a trailing comment to ensure that we correctly locate
2426      the end of the string literal token.  */
2427   const char *content = "        \"0123456789\" /* not a string */\n";
2428   ebcdic_execution_charset use_ebcdic;
2429   lexer_test test (case_, content, &use_ebcdic);
2430
2431   /* Verify that we get the expected token back, with the correct
2432      location information.  */
2433   const cpp_token *tok = test.get_token ();
2434   ASSERT_EQ (tok->type, CPP_STRING);
2435   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2436   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2437
2438   /* At this point in lexing, the quote characters are treated as part of
2439      the string (they are stripped off by cpp_interpret_string).  */
2440
2441   ASSERT_EQ (tok->val.str.len, 12);
2442
2443   /* The remainder of the test requires an iconv implementation that
2444      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2445   if (use_ebcdic.iconv_errors_occurred_p ())
2446     return;
2447
2448   /* Verify that cpp_interpret_string works.  */
2449   cpp_string dst_string;
2450   const enum cpp_ttype type = CPP_STRING;
2451   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2452                                       &dst_string, type);
2453   ASSERT_TRUE (result);
2454   /* We should now have EBCDIC-encoded text, specifically
2455      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2456      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2457   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2458                 (const char *)dst_string.text);
2459   free (const_cast <unsigned char *> (dst_string.text));
2460
2461   /* Verify that we don't attempt to record substring location information
2462      for such cases.  */
2463   ASSERT_HAS_NO_SUBSTRING_RANGES
2464     (test, tok->src_loc, type,
2465      "execution character set != source character set");
2466 }
2467
2468 /* Lex a string literal containing a hex-escaped character.
2469    Verify the substring location data, before and after running
2470    cpp_interpret_string on it.  */
2471
2472 static void
2473 test_lexer_string_locations_hex (const line_table_case &case_)
2474 {
2475   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2476      and with a space in place of digit 6, to terminate the escaped
2477      hex code.
2478      ....................000000000.111111.11112222.
2479      ....................123456789.012345.67890123.  */
2480   const char *content = "        \"01234\\x35 789\"\n";
2481   lexer_test test (case_, content, NULL);
2482
2483   /* Verify that we get the expected token back, with the correct
2484      location information.  */
2485   const cpp_token *tok = test.get_token ();
2486   ASSERT_EQ (tok->type, CPP_STRING);
2487   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2488   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2489
2490   /* At this point in lexing, the quote characters are treated as part of
2491      the string (they are stripped off by cpp_interpret_string).  */
2492   ASSERT_EQ (tok->val.str.len, 15);
2493
2494   /* Verify that cpp_interpret_string works.  */
2495   cpp_string dst_string;
2496   const enum cpp_ttype type = CPP_STRING;
2497   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2498                                       &dst_string, type);
2499   ASSERT_TRUE (result);
2500   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2501   free (const_cast <unsigned char *> (dst_string.text));
2502
2503   /* Verify ranges of individual characters.  This no longer includes the
2504      opening quote, but does include the closing quote.  */
2505   for (int i = 0; i <= 4; i++)
2506     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2507   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2508   for (int i = 6; i <= 10; i++)
2509     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2510
2511   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2512 }
2513
2514 /* Lex a string literal containing an octal-escaped character.
2515    Verify the substring location data after running cpp_interpret_string
2516    on it.  */
2517
2518 static void
2519 test_lexer_string_locations_oct (const line_table_case &case_)
2520 {
2521   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2522      and with a space in place of digit 6, to terminate the escaped
2523      octal code.
2524      ....................000000000.111111.11112222.2222223333333333444
2525      ....................123456789.012345.67890123.4567890123456789012  */
2526   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2527   lexer_test test (case_, content, NULL);
2528
2529   /* Verify that we get the expected token back, with the correct
2530      location information.  */
2531   const cpp_token *tok = test.get_token ();
2532   ASSERT_EQ (tok->type, CPP_STRING);
2533   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2534
2535   /* Verify that cpp_interpret_string works.  */
2536   cpp_string dst_string;
2537   const enum cpp_ttype type = CPP_STRING;
2538   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2539                                       &dst_string, type);
2540   ASSERT_TRUE (result);
2541   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2542   free (const_cast <unsigned char *> (dst_string.text));
2543
2544   /* Verify ranges of individual characters.  This no longer includes the
2545      opening quote, but does include the closing quote.  */
2546   for (int i = 0; i < 5; i++)
2547     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2548   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2549   for (int i = 6; i <= 10; i++)
2550     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2551
2552   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2553 }
2554
2555 /* Test of string literal containing letter escapes.  */
2556
2557 static void
2558 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2559 {
2560   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2561      .....................000000000.1.11111.1.1.11222.22222223333333
2562      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2563   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2564   lexer_test test (case_, content, NULL);
2565
2566   /* Verify that we get the expected tokens back.  */
2567   const cpp_token *tok = test.get_token ();
2568   ASSERT_EQ (tok->type, CPP_STRING);
2569   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2570
2571   /* Verify ranges of individual characters. */
2572   /* "\t".  */
2573   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2574                         0, 1, 10, 11);
2575   /* "foo". */
2576   for (int i = 1; i <= 3; i++)
2577     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2578                           i, 1, 11 + i, 11 + i);
2579   /* "\\" and "\n".  */
2580   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2581                         4, 1, 15, 16);
2582   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2583                         5, 1, 17, 18);
2584
2585   /* "bar" and closing quote for nul-terminator.  */
2586   for (int i = 6; i <= 9; i++)
2587     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2588                           i, 1, 13 + i, 13 + i);
2589
2590   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2591 }
2592
2593 /* Another test of a string literal containing a letter escape.
2594    Based on string seen in
2595      printf ("%-%\n");
2596    in gcc.dg/format/c90-printf-1.c.  */
2597
2598 static void
2599 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2600 {
2601   /* .....................000000000.1111.11.1111.22222222223.
2602      .....................123456789.0123.45.6789.01234567890.  */
2603   const char *content = ("        \"%-%\\n\" /* non-str */\n");
2604   lexer_test test (case_, content, NULL);
2605
2606   /* Verify that we get the expected tokens back.  */
2607   const cpp_token *tok = test.get_token ();
2608   ASSERT_EQ (tok->type, CPP_STRING);
2609   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2610
2611   /* Verify ranges of individual characters. */
2612   /* "%-%".  */
2613   for (int i = 0; i < 3; i++)
2614     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2615                           i, 1, 10 + i, 10 + i);
2616   /* "\n".  */
2617   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2618                         3, 1, 13, 14);
2619
2620   /* Closing quote for nul-terminator.  */
2621   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2622                         4, 1, 15, 15);
2623
2624   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2625 }
2626
2627 /* Lex a string literal containing UCN 4 characters.
2628    Verify the substring location data after running cpp_interpret_string
2629    on it.  */
2630
2631 static void
2632 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2633 {
2634   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2635      as UCN 4.
2636      ....................000000000.111111.111122.222222223.33333333344444
2637      ....................123456789.012345.678901.234567890.12345678901234  */
2638   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
2639   lexer_test test (case_, content, NULL);
2640
2641   /* Verify that we get the expected token back, with the correct
2642      location information.  */
2643   const cpp_token *tok = test.get_token ();
2644   ASSERT_EQ (tok->type, CPP_STRING);
2645   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2646
2647   /* Verify that cpp_interpret_string works.
2648      The string should be encoded in the execution character
2649      set.  Assuming that that is UTF-8, we should have the following:
2650      -----------  ----  -----  -------  ----------------
2651      Byte offset  Byte  Octal  Unicode  Source Column(s)
2652      -----------  ----  -----  -------  ----------------
2653      0            0x30         '0'      10
2654      1            0x31         '1'      11
2655      2            0x32         '2'      12
2656      3            0x33         '3'      13
2657      4            0x34         '4'      14
2658      5            0xE2  \342   U+2174   15-20
2659      6            0x85  \205    (cont)  15-20
2660      7            0xB4  \264    (cont)  15-20
2661      8            0xE2  \342   U+2175   21-26
2662      9            0x85  \205    (cont)  21-26
2663      10           0xB5  \265    (cont)  21-26
2664      11           0x37         '7'      27
2665      12           0x38         '8'      28
2666      13           0x39         '9'      29
2667      14           0x00                  30 (closing quote)
2668      -----------  ----  -----  -------  ---------------.  */
2669
2670   cpp_string dst_string;
2671   const enum cpp_ttype type = CPP_STRING;
2672   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2673                                       &dst_string, type);
2674   ASSERT_TRUE (result);
2675   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2676                 (const char *)dst_string.text);
2677   free (const_cast <unsigned char *> (dst_string.text));
2678
2679   /* Verify ranges of individual characters.  This no longer includes the
2680      opening quote, but does include the closing quote.
2681      '01234'.  */
2682   for (int i = 0; i <= 4; i++)
2683     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2684   /* U+2174.  */
2685   for (int i = 5; i <= 7; i++)
2686     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2687   /* U+2175.  */
2688   for (int i = 8; i <= 10; i++)
2689     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2690   /* '789' and nul terminator  */
2691   for (int i = 11; i <= 14; i++)
2692     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2693
2694   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2695 }
2696
2697 /* Lex a string literal containing UCN 8 characters.
2698    Verify the substring location data after running cpp_interpret_string
2699    on it.  */
2700
2701 static void
2702 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2703 {
2704   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2705      ....................000000000.111111.1111222222.2222333333333.344444
2706      ....................123456789.012345.6789012345.6789012345678.901234  */
2707   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
2708   lexer_test test (case_, content, NULL);
2709
2710   /* Verify that we get the expected token back, with the correct
2711      location information.  */
2712   const cpp_token *tok = test.get_token ();
2713   ASSERT_EQ (tok->type, CPP_STRING);
2714   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2715                            "\"01234\\U00002174\\U00002175789\"");
2716
2717   /* Verify that cpp_interpret_string works.
2718      The UTF-8 encoding of the string is identical to that from
2719      the ucn4 testcase above; the only difference is the column
2720      locations.  */
2721   cpp_string dst_string;
2722   const enum cpp_ttype type = CPP_STRING;
2723   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2724                                       &dst_string, type);
2725   ASSERT_TRUE (result);
2726   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2727                 (const char *)dst_string.text);
2728   free (const_cast <unsigned char *> (dst_string.text));
2729
2730   /* Verify ranges of individual characters.  This no longer includes the
2731      opening quote, but does include the closing quote.
2732      '01234'.  */
2733   for (int i = 0; i <= 4; i++)
2734     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2735   /* U+2174.  */
2736   for (int i = 5; i <= 7; i++)
2737     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2738   /* U+2175.  */
2739   for (int i = 8; i <= 10; i++)
2740     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2741   /* '789' at columns 35-37  */
2742   for (int i = 11; i <= 13; i++)
2743     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2744   /* Closing quote/nul-terminator at column 38.  */
2745   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2746
2747   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2748 }
2749
2750 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
2751
2752 static uint32_t
2753 uint32_from_big_endian (const uint32_t *ptr_be_value)
2754 {
2755   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2756   return (((uint32_t) buf[0] << 24)
2757           | ((uint32_t) buf[1] << 16)
2758           | ((uint32_t) buf[2] << 8)
2759           | (uint32_t) buf[3]);
2760 }
2761
2762 /* Lex a wide string literal and verify that attempts to read substring
2763    location data from it fail gracefully.  */
2764
2765 static void
2766 test_lexer_string_locations_wide_string (const line_table_case &case_)
2767 {
2768   /* Digits 0-9.
2769      ....................000000000.11111111112.22222222233333
2770      ....................123456789.01234567890.12345678901234  */
2771   const char *content = "       L\"0123456789\" /* non-str */\n";
2772   lexer_test test (case_, content, NULL);
2773
2774   /* Verify that we get the expected token back, with the correct
2775      location information.  */
2776   const cpp_token *tok = test.get_token ();
2777   ASSERT_EQ (tok->type, CPP_WSTRING);
2778   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2779
2780   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
2781   cpp_string dst_string;
2782   const enum cpp_ttype type = CPP_WSTRING;
2783   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2784                                       &dst_string, type);
2785   ASSERT_TRUE (result);
2786   /* The cpp_reader defaults to big-endian with
2787      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2788      now be encoded as UTF-32BE.  */
2789   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2790   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2791   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2792   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2793   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2794   free (const_cast <unsigned char *> (dst_string.text));
2795
2796   /* We don't yet support generating substring location information
2797      for L"" strings.  */
2798   ASSERT_HAS_NO_SUBSTRING_RANGES
2799     (test, tok->src_loc, type,
2800      "execution character set != source character set");
2801 }
2802
2803 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
2804
2805 static uint16_t
2806 uint16_from_big_endian (const uint16_t *ptr_be_value)
2807 {
2808   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2809   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2810 }
2811
2812 /* Lex a u"" string literal and verify that attempts to read substring
2813    location data from it fail gracefully.  */
2814
2815 static void
2816 test_lexer_string_locations_string16 (const line_table_case &case_)
2817 {
2818   /* Digits 0-9.
2819      ....................000000000.11111111112.22222222233333
2820      ....................123456789.01234567890.12345678901234  */
2821   const char *content = "       u\"0123456789\" /* non-str */\n";
2822   lexer_test test (case_, content, NULL);
2823
2824   /* Verify that we get the expected token back, with the correct
2825      location information.  */
2826   const cpp_token *tok = test.get_token ();
2827   ASSERT_EQ (tok->type, CPP_STRING16);
2828   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2829
2830   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
2831   cpp_string dst_string;
2832   const enum cpp_ttype type = CPP_STRING16;
2833   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2834                                       &dst_string, type);
2835   ASSERT_TRUE (result);
2836
2837   /* The cpp_reader defaults to big-endian, so dst_string should
2838      now be encoded as UTF-16BE.  */
2839   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2840   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2841   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2842   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2843   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2844   free (const_cast <unsigned char *> (dst_string.text));
2845
2846   /* We don't yet support generating substring location information
2847      for L"" strings.  */
2848   ASSERT_HAS_NO_SUBSTRING_RANGES
2849     (test, tok->src_loc, type,
2850      "execution character set != source character set");
2851 }
2852
2853 /* Lex a U"" string literal and verify that attempts to read substring
2854    location data from it fail gracefully.  */
2855
2856 static void
2857 test_lexer_string_locations_string32 (const line_table_case &case_)
2858 {
2859   /* Digits 0-9.
2860      ....................000000000.11111111112.22222222233333
2861      ....................123456789.01234567890.12345678901234  */
2862   const char *content = "       U\"0123456789\" /* non-str */\n";
2863   lexer_test test (case_, content, NULL);
2864
2865   /* Verify that we get the expected token back, with the correct
2866      location information.  */
2867   const cpp_token *tok = test.get_token ();
2868   ASSERT_EQ (tok->type, CPP_STRING32);
2869   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2870
2871   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
2872   cpp_string dst_string;
2873   const enum cpp_ttype type = CPP_STRING32;
2874   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2875                                       &dst_string, type);
2876   ASSERT_TRUE (result);
2877
2878   /* The cpp_reader defaults to big-endian, so dst_string should
2879      now be encoded as UTF-32BE.  */
2880   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2881   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2882   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2883   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2884   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2885   free (const_cast <unsigned char *> (dst_string.text));
2886
2887   /* We don't yet support generating substring location information
2888      for L"" strings.  */
2889   ASSERT_HAS_NO_SUBSTRING_RANGES
2890     (test, tok->src_loc, type,
2891      "execution character set != source character set");
2892 }
2893
2894 /* Lex a u8-string literal.
2895    Verify the substring location data after running cpp_interpret_string
2896    on it.  */
2897
2898 static void
2899 test_lexer_string_locations_u8 (const line_table_case &case_)
2900 {
2901   /* Digits 0-9.
2902      ....................000000000.11111111112.22222222233333
2903      ....................123456789.01234567890.12345678901234  */
2904   const char *content = "      u8\"0123456789\" /* non-str */\n";
2905   lexer_test test (case_, content, NULL);
2906
2907   /* Verify that we get the expected token back, with the correct
2908      location information.  */
2909   const cpp_token *tok = test.get_token ();
2910   ASSERT_EQ (tok->type, CPP_UTF8STRING);
2911   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2912
2913   /* Verify that cpp_interpret_string works.  */
2914   cpp_string dst_string;
2915   const enum cpp_ttype type = CPP_STRING;
2916   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2917                                       &dst_string, type);
2918   ASSERT_TRUE (result);
2919   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2920   free (const_cast <unsigned char *> (dst_string.text));
2921
2922   /* Verify ranges of individual characters.  This no longer includes the
2923      opening quote, but does include the closing quote.  */
2924   for (int i = 0; i <= 10; i++)
2925     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2926 }
2927
2928 /* Lex a string literal containing UTF-8 source characters.
2929    Verify the substring location data after running cpp_interpret_string
2930    on it.  */
2931
2932 static void
2933 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2934 {
2935  /* This string literal is written out to the source file as UTF-8,
2936     and is of the form "before mojibake after", where "mojibake"
2937     is written as the following four unicode code points:
2938        U+6587 CJK UNIFIED IDEOGRAPH-6587
2939        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2940        U+5316 CJK UNIFIED IDEOGRAPH-5316
2941        U+3051 HIRAGANA LETTER KE.
2942      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2943      "before" and "after" are 1 byte per unicode character.
2944
2945      The numbering shown are "columns", which are *byte* numbers within
2946      the line, rather than unicode character numbers.
2947
2948      .................... 000000000.1111111.
2949      .................... 123456789.0123456.  */
2950   const char *content = ("        \"before "
2951                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2952                               UTF-8: 0xE6 0x96 0x87
2953                               C octal escaped UTF-8: \346\226\207
2954                             "column" numbers: 17-19.  */
2955                          "\346\226\207"
2956
2957                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2958                               UTF-8: 0xE5 0xAD 0x97
2959                               C octal escaped UTF-8: \345\255\227
2960                             "column" numbers: 20-22.  */
2961                          "\345\255\227"
2962
2963                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2964                               UTF-8: 0xE5 0x8C 0x96
2965                               C octal escaped UTF-8: \345\214\226
2966                             "column" numbers: 23-25.  */
2967                          "\345\214\226"
2968
2969                          /* U+3051 HIRAGANA LETTER KE
2970                               UTF-8: 0xE3 0x81 0x91
2971                               C octal escaped UTF-8: \343\201\221
2972                             "column" numbers: 26-28.  */
2973                          "\343\201\221"
2974
2975                          /* column numbers 29 onwards
2976                           2333333.33334444444444
2977                           9012345.67890123456789. */
2978                          " after\" /* non-str */\n");
2979   lexer_test test (case_, content, NULL);
2980
2981   /* Verify that we get the expected token back, with the correct
2982      location information.  */
2983   const cpp_token *tok = test.get_token ();
2984   ASSERT_EQ (tok->type, CPP_STRING);
2985   ASSERT_TOKEN_AS_TEXT_EQ
2986     (test.m_parser, tok,
2987      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2988
2989   /* Verify that cpp_interpret_string works.  */
2990   cpp_string dst_string;
2991   const enum cpp_ttype type = CPP_STRING;
2992   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2993                                       &dst_string, type);
2994   ASSERT_TRUE (result);
2995   ASSERT_STREQ
2996     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2997      (const char *)dst_string.text);
2998   free (const_cast <unsigned char *> (dst_string.text));
2999
3000   /* Verify ranges of individual characters.  This no longer includes the
3001      opening quote, but does include the closing quote.
3002      Assuming that both source and execution encodings are UTF-8, we have
3003      a run of 25 octets in each, plus the NUL terminator.  */
3004   for (int i = 0; i < 25; i++)
3005     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
3006   /* NUL-terminator should use the closing quote at column 35.  */
3007   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
3008
3009   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3010 }
3011
3012 /* Test of string literal concatenation.  */
3013
3014 static void
3015 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3016 {
3017   /* Digits 0-9.
3018      .....................000000000.111111.11112222222222
3019      .....................123456789.012345.67890123456789.  */
3020   const char *content = ("        \"01234\" /* non-str */\n"
3021                          "        \"56789\" /* non-str */\n");
3022   lexer_test test (case_, content, NULL);
3023
3024   location_t input_locs[2];
3025
3026   /* Verify that we get the expected tokens back.  */
3027   auto_vec <cpp_string> input_strings;
3028   const cpp_token *tok_a = test.get_token ();
3029   ASSERT_EQ (tok_a->type, CPP_STRING);
3030   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3031   input_strings.safe_push (tok_a->val.str);
3032   input_locs[0] = tok_a->src_loc;
3033
3034   const cpp_token *tok_b = test.get_token ();
3035   ASSERT_EQ (tok_b->type, CPP_STRING);
3036   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3037   input_strings.safe_push (tok_b->val.str);
3038   input_locs[1] = tok_b->src_loc;
3039
3040   /* Verify that cpp_interpret_string works.  */
3041   cpp_string dst_string;
3042   const enum cpp_ttype type = CPP_STRING;
3043   bool result = cpp_interpret_string (test.m_parser,
3044                                       input_strings.address (), 2,
3045                                       &dst_string, type);
3046   ASSERT_TRUE (result);
3047   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3048   free (const_cast <unsigned char *> (dst_string.text));
3049
3050   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3051   test.m_concats.record_string_concatenation (2, input_locs);
3052
3053   location_t initial_loc = input_locs[0];
3054
3055   /* "01234" on line 1.  */
3056   for (int i = 0; i <= 4; i++)
3057     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3058   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3059   for (int i = 5; i <= 10; i++)
3060     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3061
3062   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3063 }
3064
3065 /* Another test of string literal concatenation.  */
3066
3067 static void
3068 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3069 {
3070   /* Digits 0-9.
3071      .....................000000000.111.11111112222222
3072      .....................123456789.012.34567890123456.  */
3073   const char *content = ("        \"01\" /* non-str */\n"
3074                          "        \"23\" /* non-str */\n"
3075                          "        \"45\" /* non-str */\n"
3076                          "        \"67\" /* non-str */\n"
3077                          "        \"89\" /* non-str */\n");
3078   lexer_test test (case_, content, NULL);
3079
3080   auto_vec <cpp_string> input_strings;
3081   location_t input_locs[5];
3082
3083   /* Verify that we get the expected tokens back.  */
3084   for (int i = 0; i < 5; i++)
3085     {
3086       const cpp_token *tok = test.get_token ();
3087       ASSERT_EQ (tok->type, CPP_STRING);
3088       input_strings.safe_push (tok->val.str);
3089       input_locs[i] = tok->src_loc;
3090     }
3091
3092   /* Verify that cpp_interpret_string works.  */
3093   cpp_string dst_string;
3094   const enum cpp_ttype type = CPP_STRING;
3095   bool result = cpp_interpret_string (test.m_parser,
3096                                       input_strings.address (), 5,
3097                                       &dst_string, type);
3098   ASSERT_TRUE (result);
3099   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3100   free (const_cast <unsigned char *> (dst_string.text));
3101
3102   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3103   test.m_concats.record_string_concatenation (5, input_locs);
3104
3105   location_t initial_loc = input_locs[0];
3106
3107   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3108      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3109      and expect get_source_range_for_substring to fail.
3110      However, for a string concatenation test, we can have a case
3111      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3112      but subsequent strings can be after it.
3113      Attempting to detect this within assert_char_at_range
3114      would overcomplicate the logic for the common test cases, so
3115      we detect it here.  */
3116   if (should_have_column_data_p (input_locs[0])
3117       && !should_have_column_data_p (input_locs[4]))
3118     {
3119       /* Verify that get_source_range_for_substring gracefully rejects
3120          this case.  */
3121       source_range actual_range;
3122       const char *err
3123         = get_source_range_for_char (test.m_parser, &test.m_concats,
3124                                      initial_loc, type, 0, &actual_range);
3125       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3126       return;
3127     }
3128
3129   for (int i = 0; i < 5; i++)
3130     for (int j = 0; j < 2; j++)
3131       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3132                             i + 1, 10 + j, 10 + j);
3133
3134   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3135   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3136
3137   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3138 }
3139
3140 /* Another test of string literal concatenation, this time combined with
3141    various kinds of escaped characters.  */
3142
3143 static void
3144 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3145 {
3146   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3147      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3148   const char *content
3149     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3150        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3151     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3152   lexer_test test (case_, content, NULL);
3153
3154   auto_vec <cpp_string> input_strings;
3155   location_t input_locs[4];
3156
3157   /* Verify that we get the expected tokens back.  */
3158   for (int i = 0; i < 4; i++)
3159     {
3160       const cpp_token *tok = test.get_token ();
3161       ASSERT_EQ (tok->type, CPP_STRING);
3162       input_strings.safe_push (tok->val.str);
3163       input_locs[i] = tok->src_loc;
3164     }
3165
3166   /* Verify that cpp_interpret_string works.  */
3167   cpp_string dst_string;
3168   const enum cpp_ttype type = CPP_STRING;
3169   bool result = cpp_interpret_string (test.m_parser,
3170                                       input_strings.address (), 4,
3171                                       &dst_string, type);
3172   ASSERT_TRUE (result);
3173   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3174   free (const_cast <unsigned char *> (dst_string.text));
3175
3176   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3177   test.m_concats.record_string_concatenation (4, input_locs);
3178
3179   location_t initial_loc = input_locs[0];
3180
3181   for (int i = 0; i <= 4; i++)
3182     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3183   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3184   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3185   for (int i = 7; i <= 9; i++)
3186     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3187
3188   /* NUL-terminator should use the location of the final closing quote.  */
3189   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3190
3191   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3192 }
3193
3194 /* Test of string literal in a macro.  */
3195
3196 static void
3197 test_lexer_string_locations_macro (const line_table_case &case_)
3198 {
3199   /* Digits 0-9.
3200      .....................0000000001111111111.22222222223.
3201      .....................1234567890123456789.01234567890.  */
3202   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3203                          "  MACRO");
3204   lexer_test test (case_, content, NULL);
3205
3206   /* Verify that we get the expected tokens back.  */
3207   const cpp_token *tok = test.get_token ();
3208   ASSERT_EQ (tok->type, CPP_PADDING);
3209
3210   tok = test.get_token ();
3211   ASSERT_EQ (tok->type, CPP_STRING);
3212   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3213
3214   /* Verify ranges of individual characters.  We ought to
3215      see columns within the macro definition.  */
3216   for (int i = 0; i <= 10; i++)
3217     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3218                           i, 1, 20 + i, 20 + i);
3219
3220   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3221
3222   tok = test.get_token ();
3223   ASSERT_EQ (tok->type, CPP_PADDING);
3224 }
3225
3226 /* Test of stringification of a macro argument.  */
3227
3228 static void
3229 test_lexer_string_locations_stringified_macro_argument
3230   (const line_table_case &case_)
3231 {
3232   /* .....................000000000111111111122222222223.
3233      .....................123456789012345678901234567890.  */
3234   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3235                          "MACRO(foo)\n");
3236   lexer_test test (case_, content, NULL);
3237
3238   /* Verify that we get the expected token back.  */
3239   const cpp_token *tok = test.get_token ();
3240   ASSERT_EQ (tok->type, CPP_PADDING);
3241
3242   tok = test.get_token ();
3243   ASSERT_EQ (tok->type, CPP_STRING);
3244   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3245
3246   /* We don't support getting the location of a stringified macro
3247      argument.  Verify that it fails gracefully.  */
3248   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3249                                   "cpp_interpret_string_1 failed");
3250
3251   tok = test.get_token ();
3252   ASSERT_EQ (tok->type, CPP_PADDING);
3253
3254   tok = test.get_token ();
3255   ASSERT_EQ (tok->type, CPP_PADDING);
3256 }
3257
3258 /* Ensure that we are fail gracefully if something attempts to pass
3259    in a location that isn't a string literal token.  Seen on this code:
3260
3261      const char a[] = " %d ";
3262      __builtin_printf (a, 0.5);
3263                        ^
3264
3265    when c-format.c erroneously used the indicated one-character
3266    location as the format string location, leading to a read past the
3267    end of a string buffer in cpp_interpret_string_1.  */
3268
3269 static void
3270 test_lexer_string_locations_non_string (const line_table_case &case_)
3271 {
3272   /* .....................000000000111111111122222222223.
3273      .....................123456789012345678901234567890.  */
3274   const char *content = ("         a\n");
3275   lexer_test test (case_, content, NULL);
3276
3277   /* Verify that we get the expected token back.  */
3278   const cpp_token *tok = test.get_token ();
3279   ASSERT_EQ (tok->type, CPP_NAME);
3280   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3281
3282   /* At this point, libcpp is attempting to interpret the name as a
3283      string literal, despite it not starting with a quote.  We don't detect
3284      that, but we should at least fail gracefully.  */
3285   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3286                                   "cpp_interpret_string_1 failed");
3287 }
3288
3289 /* Ensure that we can read substring information for a token which
3290    starts in one linemap and ends in another .  Adapted from
3291    gcc.dg/cpp/pr69985.c.  */
3292
3293 static void
3294 test_lexer_string_locations_long_line (const line_table_case &case_)
3295 {
3296   /* .....................000000.000111111111
3297      .....................123456.789012346789.  */
3298   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3299                          "     \"0123456789012345678901234567890123456789"
3300                          "0123456789012345678901234567890123456789"
3301                          "0123456789012345678901234567890123456789"
3302                          "0123456789\"\n");
3303
3304   lexer_test test (case_, content, NULL);
3305
3306   /* Verify that we get the expected token back.  */
3307   const cpp_token *tok = test.get_token ();
3308   ASSERT_EQ (tok->type, CPP_STRING);
3309
3310   if (!should_have_column_data_p (line_table->highest_location))
3311     return;
3312
3313   /* Verify ranges of individual characters.  */
3314   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3315   for (int i = 0; i < 131; i++)
3316     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3317                           i, 2, 7 + i, 7 + i);
3318 }
3319
3320 /* Test of locations within a raw string that doesn't contain a newline.  */
3321
3322 static void
3323 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3324 {
3325   /* .....................00.0000000111111111122.
3326      .....................12.3456789012345678901.  */
3327   const char *content = ("R\"foo(0123456789)foo\"\n");
3328   lexer_test test (case_, content, NULL);
3329
3330   /* Verify that we get the expected token back.  */
3331   const cpp_token *tok = test.get_token ();
3332   ASSERT_EQ (tok->type, CPP_STRING);
3333
3334   /* Verify that cpp_interpret_string works.  */
3335   cpp_string dst_string;
3336   const enum cpp_ttype type = CPP_STRING;
3337   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3338                                       &dst_string, type);
3339   ASSERT_TRUE (result);
3340   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3341   free (const_cast <unsigned char *> (dst_string.text));
3342
3343   if (!should_have_column_data_p (line_table->highest_location))
3344     return;
3345
3346   /* 0-9, plus the nil terminator.  */
3347   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3348   for (int i = 0; i < 11; i++)
3349     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3350                           i, 1, 7 + i, 7 + i);
3351 }
3352
3353 /* Test of locations within a raw string that contains a newline.  */
3354
3355 static void
3356 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3357 {
3358   /* .....................00.0000.
3359      .....................12.3456.  */
3360   const char *content = ("R\"foo(\n"
3361   /* .....................00000.
3362      .....................12345.  */
3363                          "hello\n"
3364                          "world\n"
3365   /* .....................00000.
3366      .....................12345.  */
3367                          ")foo\"\n");
3368   lexer_test test (case_, content, NULL);
3369
3370   /* Verify that we get the expected token back.  */
3371   const cpp_token *tok = test.get_token ();
3372   ASSERT_EQ (tok->type, CPP_STRING);
3373
3374   /* Verify that cpp_interpret_string works.  */
3375   cpp_string dst_string;
3376   const enum cpp_ttype type = CPP_STRING;
3377   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3378                                       &dst_string, type);
3379   ASSERT_TRUE (result);
3380   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3381   free (const_cast <unsigned char *> (dst_string.text));
3382
3383   if (!should_have_column_data_p (line_table->highest_location))
3384     return;
3385
3386   /* Currently we don't support locations within raw strings that
3387      contain newlines.  */
3388   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3389                                   "range endpoints are on different lines");
3390 }
3391
3392 /* Test of parsing an unterminated raw string.  */
3393
3394 static void
3395 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3396 {
3397   const char *content = "R\"ouch()ouCh\" /* etc */";
3398
3399   lexer_error_sink errors;
3400   lexer_test test (case_, content, &errors);
3401   test.m_implicitly_expect_EOF = false;
3402
3403   /* Attempt to parse the raw string.  */
3404   const cpp_token *tok = test.get_token ();
3405   ASSERT_EQ (tok->type, CPP_EOF);
3406
3407   ASSERT_EQ (1, errors.m_errors.length ());
3408   /* We expect the message "unterminated raw string"
3409      in the "cpplib" translation domain.
3410      It's not clear that dgettext is available on all supported hosts,
3411      so this assertion is commented-out for now.
3412        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3413                      errors.m_errors[0]);
3414   */
3415 }
3416
3417 /* Test of lexing char constants.  */
3418
3419 static void
3420 test_lexer_char_constants (const line_table_case &case_)
3421 {
3422   /* Various char constants.
3423      .....................0000000001111111111.22222222223.
3424      .....................1234567890123456789.01234567890.  */
3425   const char *content = ("         'a'\n"
3426                          "        u'a'\n"
3427                          "        U'a'\n"
3428                          "        L'a'\n"
3429                          "         'abc'\n");
3430   lexer_test test (case_, content, NULL);
3431
3432   /* Verify that we get the expected tokens back.  */
3433   /* 'a'.  */
3434   const cpp_token *tok = test.get_token ();
3435   ASSERT_EQ (tok->type, CPP_CHAR);
3436   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3437
3438   unsigned int chars_seen;
3439   int unsignedp;
3440   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3441                                           &chars_seen, &unsignedp);
3442   ASSERT_EQ (cc, 'a');
3443   ASSERT_EQ (chars_seen, 1);
3444
3445   /* u'a'.  */
3446   tok = test.get_token ();
3447   ASSERT_EQ (tok->type, CPP_CHAR16);
3448   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3449
3450   /* U'a'.  */
3451   tok = test.get_token ();
3452   ASSERT_EQ (tok->type, CPP_CHAR32);
3453   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3454
3455   /* L'a'.  */
3456   tok = test.get_token ();
3457   ASSERT_EQ (tok->type, CPP_WCHAR);
3458   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3459
3460   /* 'abc' (c-char-sequence).  */
3461   tok = test.get_token ();
3462   ASSERT_EQ (tok->type, CPP_CHAR);
3463   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3464 }
3465 /* A table of interesting location_t values, giving one axis of our test
3466    matrix.  */
3467
3468 static const location_t boundary_locations[] = {
3469   /* Zero means "don't override the default values for a new line_table".  */
3470   0,
3471
3472   /* An arbitrary non-zero value that isn't close to one of
3473      the boundary values below.  */
3474   0x10000,
3475
3476   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3477   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3478   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3479   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3480   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3481   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3482
3483   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3484   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3485   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3486   LINE_MAP_MAX_LOCATION_WITH_COLS,
3487   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3488   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3489 };
3490
3491 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3492
3493 void
3494 for_each_line_table_case (void (*testcase) (const line_table_case &))
3495 {
3496   /* As noted above in the description of struct line_table_case,
3497      we want to explore a test matrix of interesting line_table
3498      situations, running various selftests for each case within the
3499      matrix.  */
3500
3501   /* Run all tests with:
3502      (a) line_table->default_range_bits == 0, and
3503      (b) line_table->default_range_bits == 5.  */
3504   int num_cases_tested = 0;
3505   for (int default_range_bits = 0; default_range_bits <= 5;
3506        default_range_bits += 5)
3507     {
3508       /* ...and use each of the "interesting" location values as
3509          the starting location within line_table.  */
3510       const int num_boundary_locations
3511         = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3512       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3513         {
3514           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3515
3516           testcase (c);
3517
3518           num_cases_tested++;
3519         }
3520     }
3521
3522   /* Verify that we fully covered the test matrix.  */
3523   ASSERT_EQ (num_cases_tested, 2 * 12);
3524 }
3525
3526 /* Run all of the selftests within this file.  */
3527
3528 void
3529 input_c_tests ()
3530 {
3531   test_should_have_column_data_p ();
3532   test_unknown_location ();
3533   test_builtins ();
3534   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3535
3536   for_each_line_table_case (test_accessing_ordinary_linemaps);
3537   for_each_line_table_case (test_lexer);
3538   for_each_line_table_case (test_lexer_string_locations_simple);
3539   for_each_line_table_case (test_lexer_string_locations_ebcdic);
3540   for_each_line_table_case (test_lexer_string_locations_hex);
3541   for_each_line_table_case (test_lexer_string_locations_oct);
3542   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3543   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3544   for_each_line_table_case (test_lexer_string_locations_ucn4);
3545   for_each_line_table_case (test_lexer_string_locations_ucn8);
3546   for_each_line_table_case (test_lexer_string_locations_wide_string);
3547   for_each_line_table_case (test_lexer_string_locations_string16);
3548   for_each_line_table_case (test_lexer_string_locations_string32);
3549   for_each_line_table_case (test_lexer_string_locations_u8);
3550   for_each_line_table_case (test_lexer_string_locations_utf8_source);
3551   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3552   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3553   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3554   for_each_line_table_case (test_lexer_string_locations_macro);
3555   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3556   for_each_line_table_case (test_lexer_string_locations_non_string);
3557   for_each_line_table_case (test_lexer_string_locations_long_line);
3558   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3559   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3560   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3561   for_each_line_table_case (test_lexer_char_constants);
3562
3563   test_reading_source_line ();
3564 }
3565
3566 } // namespace selftest
3567
3568 #endif /* CHECKING_P */