gcc/input.c

   1 /* Data and functions related to line maps and input files.
   2    Copyright (C) 2004-2018 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "intl.h"
  24 #include "diagnostic-core.h"
  25 #include "selftest.h"
  26 #include "cpplib.h"
  27
  28 #ifndef HAVE_ICONV
  29 #define HAVE_ICONV 0
  30 #endif
  31
  32 /* This is a cache used by get_next_line to store the content of a
  33    file to be searched for file lines.  */
  34 struct fcache
  35 {
  36   /* These are information used to store a line boundary.  */
  37   struct line_info
  38   {
  39     /* The line number.  It starts from 1.  */
  40     size_t line_num;
  41
  42     /* The position (byte count) of the beginning of the line,
  43        relative to the file data pointer.  This starts at zero.  */
  44     size_t start_pos;
  45
  46     /* The position (byte count) of the last byte of the line.  This
  47        normally points to the '\n' character, or to one byte after the
  48        last byte of the file, if the file doesn't contain a '\n'
  49        character.  */
  50     size_t end_pos;
  51
  52     line_info (size_t l, size_t s, size_t e)
  53       : line_num (l), start_pos (s), end_pos (e)
  54     {}
  55
  56     line_info ()
  57       :line_num (0), start_pos (0), end_pos (0)
  58     {}
  59   };
  60
  61   /* The number of time this file has been accessed.  This is used
  62      to designate which file cache to evict from the cache
  63      array.  */
  64   unsigned use_count;
  65
  66   /* The file_path is the key for identifying a particular file in
  67      the cache.
  68      For libcpp-using code, the underlying buffer for this field is
  69      owned by the corresponding _cpp_file within the cpp_reader.  */
  70   const char *file_path;
  71
  72   FILE *fp;
  73
  74   /* This points to the content of the file that we've read so
  75      far.  */
  76   char *data;
  77
  78   /*  The size of the DATA array above.*/
  79   size_t size;
  80
  81   /* The number of bytes read from the underlying file so far.  This
  82      must be less (or equal) than SIZE above.  */
  83   size_t nb_read;
  84
  85   /* The index of the beginning of the current line.  */
  86   size_t line_start_idx;
  87
  88   /* The number of the previous line read.  This starts at 1.  Zero
  89      means we've read no line so far.  */
  90   size_t line_num;
  91
  92   /* This is the total number of lines of the current file.  At the
  93      moment, we try to get this information from the line map
  94      subsystem.  Note that this is just a hint.  When using the C++
  95      front-end, this hint is correct because the input file is then
  96      completely tokenized before parsing starts; so the line map knows
  97      the number of lines before compilation really starts.  For e.g,
  98      the C front-end, it can happen that we start emitting diagnostics
  99      before the line map has seen the end of the file.  */
 100   size_t total_lines;
 101
 102   /* Could this file be missing a trailing newline on its final line?
 103      Initially true (to cope with empty files), set to true/false
 104      as each line is read.  */
 105   bool missing_trailing_newline;
 106
 107   /* This is a record of the beginning and end of the lines we've seen
 108      while reading the file.  This is useful to avoid walking the data
 109      from the beginning when we are asked to read a line that is
 110      before LINE_START_IDX above.  Note that the maximum size of this
 111      record is fcache_line_record_size, so that the memory consumption
 112      doesn't explode.  We thus scale total_lines down to
 113      fcache_line_record_size.  */
 114   vec<line_info, va_heap> line_record;
 115
 116   fcache ();
 117   ~fcache ();
 118 };
 119
 120 /* Current position in real source file.  */
 121
 122 location_t input_location = UNKNOWN_LOCATION;
 123
 124 struct line_maps *line_table;
 125
 126 /* A stashed copy of "line_table" for use by selftest::line_table_test.
 127    This needs to be a global so that it can be a GC root, and thus
 128    prevent the stashed copy from being garbage-collected if the GC runs
 129    during a line_table_test.  */
 130
 131 struct line_maps *saved_line_table;
 132
 133 static fcache *fcache_tab;
 134 static const size_t fcache_tab_size = 16;
 135 static const size_t fcache_buffer_size = 4 * 1024;
 136 static const size_t fcache_line_record_size = 100;
 137
 138 /* Expand the source location LOC into a human readable location.  If
 139    LOC resolves to a builtin location, the file name of the readable
 140    location is set to the string "<built-in>". If EXPANSION_POINT_P is
 141    TRUE and LOC is virtual, then it is resolved to the expansion
 142    point of the involved macro.  Otherwise, it is resolved to the
 143    spelling location of the token.
 144
 145    When resolving to the spelling location of the token, if the
 146    resulting location is for a built-in location (that is, it has no
 147    associated line/column) in the context of a macro expansion, the
 148    returned location is the first one (while unwinding the macro
 149    location towards its expansion point) that is in real source
 150    code.
 151
 152    ASPECT controls which part of the location to use.  */
 153
 154 static expanded_location
 155 expand_location_1 (location_t loc,
 156                    bool expansion_point_p,
 157                    enum location_aspect aspect)
 158 {
 159   expanded_location xloc;
 160   const line_map_ordinary *map;
 161   enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
 162   tree block = NULL;
 163
 164   if (IS_ADHOC_LOC (loc))
 165     {
 166       block = LOCATION_BLOCK (loc);
 167       loc = LOCATION_LOCUS (loc);
 168     }
 169
 170   memset (&xloc, 0, sizeof (xloc));
 171
 172   if (loc >= RESERVED_LOCATION_COUNT)
 173     {
 174       if (!expansion_point_p)
 175         {
 176           /* We want to resolve LOC to its spelling location.
 177
 178              But if that spelling location is a reserved location that
 179              appears in the context of a macro expansion (like for a
 180              location for a built-in token), let's consider the first
 181              location (toward the expansion point) that is not reserved;
 182              that is, the first location that is in real source code.  */
 183           loc = linemap_unwind_to_first_non_reserved_loc (line_table,
 184                                                           loc, NULL);
 185           lrk = LRK_SPELLING_LOCATION;
 186         }
 187       loc = linemap_resolve_location (line_table, loc, lrk, &map);
 188
 189       /* loc is now either in an ordinary map, or is a reserved location.
 190          If it is a compound location, the caret is in a spelling location,
 191          but the start/finish might still be a virtual location.
 192          Depending of what the caller asked for, we may need to recurse
 193          one level in order to resolve any virtual locations in the
 194          end-points.  */
 195       switch (aspect)
 196         {
 197         default:
 198           gcc_unreachable ();
 199           /* Fall through.  */
 200         case LOCATION_ASPECT_CARET:
 201           break;
 202         case LOCATION_ASPECT_START:
 203           {
 204             location_t start = get_start (loc);
 205             if (start != loc)
 206               return expand_location_1 (start, expansion_point_p, aspect);
 207           }
 208           break;
 209         case LOCATION_ASPECT_FINISH:
 210           {
 211             location_t finish = get_finish (loc);
 212             if (finish != loc)
 213               return expand_location_1 (finish, expansion_point_p, aspect);
 214           }
 215           break;
 216         }
 217       xloc = linemap_expand_location (line_table, map, loc);
 218     }
 219
 220   xloc.data = block;
 221   if (loc <= BUILTINS_LOCATION)
 222     xloc.file = loc == UNKNOWN_LOCATION ? NULL : _("<built-in>");
 223
 224   return xloc;
 225 }
 226
 227 /* Initialize the set of cache used for files accessed by caret
 228    diagnostic.  */
 229
 230 static void
 231 diagnostic_file_cache_init (void)
 232 {
 233   if (fcache_tab == NULL)
 234     fcache_tab = new fcache[fcache_tab_size];
 235 }
 236
 237 /* Free the resources used by the set of cache used for files accessed
 238    by caret diagnostic.  */
 239
 240 void
 241 diagnostic_file_cache_fini (void)
 242 {
 243   if (fcache_tab)
 244     {
 245       delete [] (fcache_tab);
 246       fcache_tab = NULL;
 247     }
 248 }
 249
 250 /* Return the total lines number that have been read so far by the
 251    line map (in the preprocessor) so far.  For languages like C++ that
 252    entirely preprocess the input file before starting to parse, this
 253    equals the actual number of lines of the file.  */
 254
 255 static size_t
 256 total_lines_num (const char *file_path)
 257 {
 258   size_t r = 0;
 259   location_t l = 0;
 260   if (linemap_get_file_highest_location (line_table, file_path, &l))
 261     {
 262       gcc_assert (l >= RESERVED_LOCATION_COUNT);
 263       expanded_location xloc = expand_location (l);
 264       r = xloc.line;
 265     }
 266   return r;
 267 }
 268
 269 /* Lookup the cache used for the content of a given file accessed by
 270    caret diagnostic.  Return the found cached file, or NULL if no
 271    cached file was found.  */
 272
 273 static fcache*
 274 lookup_file_in_cache_tab (const char *file_path)
 275 {
 276   if (file_path == NULL)
 277     return NULL;
 278
 279   diagnostic_file_cache_init ();
 280
 281   /* This will contain the found cached file.  */
 282   fcache *r = NULL;
 283   for (unsigned i = 0; i < fcache_tab_size; ++i)
 284     {
 285       fcache *c = &fcache_tab[i];
 286       if (c->file_path && !strcmp (c->file_path, file_path))
 287         {
 288           ++c->use_count;
 289           r = c;
 290         }
 291     }
 292
 293   if (r)
 294     ++r->use_count;
 295
 296   return r;
 297 }
 298
 299 /* Purge any mention of FILENAME from the cache of files used for
 300    printing source code.  For use in selftests when working
 301    with tempfiles.  */
 302
 303 void
 304 diagnostics_file_cache_forcibly_evict_file (const char *file_path)
 305 {
 306   gcc_assert (file_path);
 307
 308   fcache *r = lookup_file_in_cache_tab (file_path);
 309   if (!r)
 310     /* Not found.  */
 311     return;
 312
 313   r->file_path = NULL;
 314   if (r->fp)
 315     fclose (r->fp);
 316   r->fp = NULL;
 317   r->nb_read = 0;
 318   r->line_start_idx = 0;
 319   r->line_num = 0;
 320   r->line_record.truncate (0);
 321   r->use_count = 0;
 322   r->total_lines = 0;
 323   r->missing_trailing_newline = true;
 324 }
 325
 326 /* Return the file cache that has been less used, recently, or the
 327    first empty one.  If HIGHEST_USE_COUNT is non-null,
 328    *HIGHEST_USE_COUNT is set to the highest use count of the entries
 329    in the cache table.  */
 330
 331 static fcache*
 332 evicted_cache_tab_entry (unsigned *highest_use_count)
 333 {
 334   diagnostic_file_cache_init ();
 335
 336   fcache *to_evict = &fcache_tab[0];
 337   unsigned huc = to_evict->use_count;
 338   for (unsigned i = 1; i < fcache_tab_size; ++i)
 339     {
 340       fcache *c = &fcache_tab[i];
 341       bool c_is_empty = (c->file_path == NULL);
 342
 343       if (c->use_count < to_evict->use_count
 344           || (to_evict->file_path && c_is_empty))
 345         /* We evict C because it's either an entry with a lower use
 346            count or one that is empty.  */
 347         to_evict = c;
 348
 349       if (huc < c->use_count)
 350         huc = c->use_count;
 351
 352       if (c_is_empty)
 353         /* We've reached the end of the cache; subsequent elements are
 354            all empty.  */
 355         break;
 356     }
 357
 358   if (highest_use_count)
 359     *highest_use_count = huc;
 360
 361   return to_evict;
 362 }
 363
 364 /* Create the cache used for the content of a given file to be
 365    accessed by caret diagnostic.  This cache is added to an array of
 366    cache and can be retrieved by lookup_file_in_cache_tab.  This
 367    function returns the created cache.  Note that only the last
 368    fcache_tab_size files are cached.  */
 369
 370 static fcache*
 371 add_file_to_cache_tab (const char *file_path)
 372 {
 373
 374   FILE *fp = fopen (file_path, "r");
 375   if (fp == NULL)
 376     return NULL;
 377
 378   unsigned highest_use_count = 0;
 379   fcache *r = evicted_cache_tab_entry (&highest_use_count);
 380   r->file_path = file_path;
 381   if (r->fp)
 382     fclose (r->fp);
 383   r->fp = fp;
 384   r->nb_read = 0;
 385   r->line_start_idx = 0;
 386   r->line_num = 0;
 387   r->line_record.truncate (0);
 388   /* Ensure that this cache entry doesn't get evicted next time
 389      add_file_to_cache_tab is called.  */
 390   r->use_count = ++highest_use_count;
 391   r->total_lines = total_lines_num (file_path);
 392   r->missing_trailing_newline = true;
 393
 394   return r;
 395 }
 396
 397 /* Lookup the cache used for the content of a given file accessed by
 398    caret diagnostic.  If no cached file was found, create a new cache
 399    for this file, add it to the array of cached file and return
 400    it.  */
 401
 402 static fcache*
 403 lookup_or_add_file_to_cache_tab (const char *file_path)
 404 {
 405   fcache *r = lookup_file_in_cache_tab (file_path);
 406   if (r == NULL)
 407     r = add_file_to_cache_tab (file_path);
 408   return r;
 409 }
 410
 411 /* Default constructor for a cache of file used by caret
 412    diagnostic.  */
 413
 414 fcache::fcache ()
 415 : use_count (0), file_path (NULL), fp (NULL), data (0),
 416   size (0), nb_read (0), line_start_idx (0), line_num (0),
 417   total_lines (0), missing_trailing_newline (true)
 418 {
 419   line_record.create (0);
 420 }
 421
 422 /* Destructor for a cache of file used by caret diagnostic.  */
 423
 424 fcache::~fcache ()
 425 {
 426   if (fp)
 427     {
 428       fclose (fp);
 429       fp = NULL;
 430     }
 431   if (data)
 432     {
 433       XDELETEVEC (data);
 434       data = 0;
 435     }
 436   line_record.release ();
 437 }
 438
 439 /* Returns TRUE iff the cache would need to be filled with data coming
 440    from the file.  That is, either the cache is empty or full or the
 441    current line is empty.  Note that if the cache is full, it would
 442    need to be extended and filled again.  */
 443
 444 static bool
 445 needs_read (fcache *c)
 446 {
 447   return (c->nb_read == 0
 448           || c->nb_read == c->size
 449           || (c->line_start_idx >= c->nb_read - 1));
 450 }
 451
 452 /*  Return TRUE iff the cache is full and thus needs to be
 453     extended.  */
 454
 455 static bool
 456 needs_grow (fcache *c)
 457 {
 458   return c->nb_read == c->size;
 459 }
 460
 461 /* Grow the cache if it needs to be extended.  */
 462
 463 static void
 464 maybe_grow (fcache *c)
 465 {
 466   if (!needs_grow (c))
 467     return;
 468
 469   size_t size = c->size == 0 ? fcache_buffer_size : c->size * 2;
 470   c->data = XRESIZEVEC (char, c->data, size);
 471   c->size = size;
 472 }
 473
 474 /*  Read more data into the cache.  Extends the cache if need be.
 475     Returns TRUE iff new data could be read.  */
 476
 477 static bool
 478 read_data (fcache *c)
 479 {
 480   if (feof (c->fp) || ferror (c->fp))
 481     return false;
 482
 483   maybe_grow (c);
 484
 485   char * from = c->data + c->nb_read;
 486   size_t to_read = c->size - c->nb_read;
 487   size_t nb_read = fread (from, 1, to_read, c->fp);
 488
 489   if (ferror (c->fp))
 490     return false;
 491
 492   c->nb_read += nb_read;
 493   return !!nb_read;
 494 }
 495
 496 /* Read new data iff the cache needs to be filled with more data
 497    coming from the file FP.  Return TRUE iff the cache was filled with
 498    mode data.  */
 499
 500 static bool
 501 maybe_read_data (fcache *c)
 502 {
 503   if (!needs_read (c))
 504     return false;
 505   return read_data (c);
 506 }
 507
 508 /* Read a new line from file FP, using C as a cache for the data
 509    coming from the file.  Upon successful completion, *LINE is set to
 510    the beginning of the line found.  *LINE points directly in the
 511    line cache and is only valid until the next call of get_next_line.
 512    *LINE_LEN is set to the length of the line.  Note that the line
 513    does not contain any terminal delimiter.  This function returns
 514    true if some data was read or process from the cache, false
 515    otherwise.  Note that subsequent calls to get_next_line might
 516    make the content of *LINE invalid.  */
 517
 518 static bool
 519 get_next_line (fcache *c, char **line, ssize_t *line_len)
 520 {
 521   /* Fill the cache with data to process.  */
 522   maybe_read_data (c);
 523
 524   size_t remaining_size = c->nb_read - c->line_start_idx;
 525   if (remaining_size == 0)
 526     /* There is no more data to process.  */
 527     return false;
 528
 529   char *line_start = c->data + c->line_start_idx;
 530
 531   char *next_line_start = NULL;
 532   size_t len = 0;
 533   char *line_end = (char *) memchr (line_start, '\n', remaining_size);
 534   if (line_end == NULL)
 535     {
 536       /* We haven't found the end-of-line delimiter in the cache.
 537          Fill the cache with more data from the file and look for the
 538          '\n'.  */
 539       while (maybe_read_data (c))
 540         {
 541           line_start = c->data + c->line_start_idx;
 542           remaining_size = c->nb_read - c->line_start_idx;
 543           line_end = (char *) memchr (line_start, '\n', remaining_size);
 544           if (line_end != NULL)
 545             {
 546               next_line_start = line_end + 1;
 547               break;
 548             }
 549         }
 550       if (line_end == NULL)
 551         {
 552           /* We've loadded all the file into the cache and still no
 553              '\n'.  Let's say the line ends up at one byte passed the
 554              end of the file.  This is to stay consistent with the case
 555              of when the line ends up with a '\n' and line_end points to
 556              that terminal '\n'.  That consistency is useful below in
 557              the len calculation.  */
 558           line_end = c->data + c->nb_read ;
 559           c->missing_trailing_newline = true;
 560         }
 561       else
 562         c->missing_trailing_newline = false;
 563     }
 564   else
 565     {
 566       next_line_start = line_end + 1;
 567       c->missing_trailing_newline = false;
 568     }
 569
 570   if (ferror (c->fp))
 571     return false;
 572
 573   /* At this point, we've found the end of the of line.  It either
 574      points to the '\n' or to one byte after the last byte of the
 575      file.  */
 576   gcc_assert (line_end != NULL);
 577
 578   len = line_end - line_start;
 579
 580   if (c->line_start_idx < c->nb_read)
 581     *line = line_start;
 582
 583   ++c->line_num;
 584
 585   /* Before we update our line record, make sure the hint about the
 586      total number of lines of the file is correct.  If it's not, then
 587      we give up recording line boundaries from now on.  */
 588   bool update_line_record = true;
 589   if (c->line_num > c->total_lines)
 590     update_line_record = false;
 591
 592     /* Now update our line record so that re-reading lines from the
 593      before c->line_start_idx is faster.  */
 594   if (update_line_record
 595       && c->line_record.length () < fcache_line_record_size)
 596     {
 597       /* If the file lines fits in the line record, we just record all
 598          its lines ...*/
 599       if (c->total_lines <= fcache_line_record_size
 600           && c->line_num > c->line_record.length ())
 601         c->line_record.safe_push (fcache::line_info (c->line_num,
 602                                                  c->line_start_idx,
 603                                                  line_end - c->data));
 604       else if (c->total_lines > fcache_line_record_size)
 605         {
 606           /* ... otherwise, we just scale total_lines down to
 607              (fcache_line_record_size lines.  */
 608           size_t n = (c->line_num * fcache_line_record_size) / c->total_lines;
 609           if (c->line_record.length () == 0
 610               || n >= c->line_record.length ())
 611             c->line_record.safe_push (fcache::line_info (c->line_num,
 612                                                      c->line_start_idx,
 613                                                      line_end - c->data));
 614         }
 615     }
 616
 617   /* Update c->line_start_idx so that it points to the next line to be
 618      read.  */
 619   if (next_line_start)
 620     c->line_start_idx = next_line_start - c->data;
 621   else
 622     /* We didn't find any terminal '\n'.  Let's consider that the end
 623        of line is the end of the data in the cache.  The next
 624        invocation of get_next_line will either read more data from the
 625        underlying file or return false early because we've reached the
 626        end of the file.  */
 627     c->line_start_idx = c->nb_read;
 628
 629   *line_len = len;
 630
 631   return true;
 632 }
 633
 634 /* Consume the next bytes coming from the cache (or from its
 635    underlying file if there are remaining unread bytes in the file)
 636    until we reach the next end-of-line (or end-of-file).  There is no
 637    copying from the cache involved.  Return TRUE upon successful
 638    completion.  */
 639
 640 static bool
 641 goto_next_line (fcache *cache)
 642 {
 643   char *l;
 644   ssize_t len;
 645
 646   return get_next_line (cache, &l, &len);
 647 }
 648
 649 /* Read an arbitrary line number LINE_NUM from the file cached in C.
 650    If the line was read successfully, *LINE points to the beginning
 651    of the line in the file cache and *LINE_LEN is the length of the
 652    line.  *LINE is not nul-terminated, but may contain zero bytes.
 653    *LINE is only valid until the next call of read_line_num.
 654    This function returns bool if a line was read.  */
 655
 656 static bool
 657 read_line_num (fcache *c, size_t line_num,
 658                char **line, ssize_t *line_len)
 659 {
 660   gcc_assert (line_num > 0);
 661
 662   if (line_num <= c->line_num)
 663     {
 664       /* We've been asked to read lines that are before c->line_num.
 665          So lets use our line record (if it's not empty) to try to
 666          avoid re-reading the file from the beginning again.  */
 667
 668       if (c->line_record.is_empty ())
 669         {
 670           c->line_start_idx = 0;
 671           c->line_num = 0;
 672         }
 673       else
 674         {
 675           fcache::line_info *i = NULL;
 676           if (c->total_lines <= fcache_line_record_size)
 677             {
 678               /* In languages where the input file is not totally
 679                  preprocessed up front, the c->total_lines hint
 680                  can be smaller than the number of lines of the
 681                  file.  In that case, only the first
 682                  c->total_lines have been recorded.
 683
 684                  Otherwise, the first c->total_lines we've read have
 685                  their start/end recorded here.  */
 686               i = (line_num <= c->total_lines)
 687                 ? &c->line_record[line_num - 1]
 688                 : &c->line_record[c->total_lines - 1];
 689               gcc_assert (i->line_num <= line_num);
 690             }
 691           else
 692             {
 693               /*  So the file had more lines than our line record
 694                   size.  Thus the number of lines we've recorded has
 695                   been scaled down to fcache_line_reacord_size.  Let's
 696                   pick the start/end of the recorded line that is
 697                   closest to line_num.  */
 698               size_t n = (line_num <= c->total_lines)
 699                 ? line_num * fcache_line_record_size / c->total_lines
 700                 : c ->line_record.length () - 1;
 701               if (n < c->line_record.length ())
 702                 {
 703                   i = &c->line_record[n];
 704                   gcc_assert (i->line_num <= line_num);
 705                 }
 706             }
 707
 708           if (i && i->line_num == line_num)
 709             {
 710               /* We have the start/end of the line.  */
 711               *line = c->data + i->start_pos;
 712               *line_len = i->end_pos - i->start_pos;
 713               return true;
 714             }
 715
 716           if (i)
 717             {
 718               c->line_start_idx = i->start_pos;
 719               c->line_num = i->line_num - 1;
 720             }
 721           else
 722             {
 723               c->line_start_idx = 0;
 724               c->line_num = 0;
 725             }
 726         }
 727     }
 728
 729   /*  Let's walk from line c->line_num up to line_num - 1, without
 730       copying any line.  */
 731   while (c->line_num < line_num - 1)
 732     if (!goto_next_line (c))
 733       return false;
 734
 735   /* The line we want is the next one.  Let's read and copy it back to
 736      the caller.  */
 737   return get_next_line (c, line, line_len);
 738 }
 739
 740 /* Return the physical source line that corresponds to FILE_PATH/LINE.
 741    The line is not nul-terminated.  The returned pointer is only
 742    valid until the next call of location_get_source_line.
 743    Note that the line can contain several null characters,
 744    so the returned value's length has the actual length of the line.
 745    If the function fails, a NULL char_span is returned.  */
 746
 747 char_span
 748 location_get_source_line (const char *file_path, int line)
 749 {
 750   char *buffer = NULL;
 751   ssize_t len;
 752
 753   if (line == 0)
 754     return char_span (NULL, 0);
 755
 756   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 757   if (c == NULL)
 758     return char_span (NULL, 0);
 759
 760   bool read = read_line_num (c, line, &buffer, &len);
 761   if (!read)
 762     return char_span (NULL, 0);
 763
 764   return char_span (buffer, len);
 765 }
 766
 767 /* Determine if FILE_PATH missing a trailing newline on its final line.
 768    Only valid to call once all of the file has been loaded, by
 769    requesting a line number beyond the end of the file.  */
 770
 771 bool
 772 location_missing_trailing_newline (const char *file_path)
 773 {
 774   fcache *c = lookup_or_add_file_to_cache_tab (file_path);
 775   if (c == NULL)
 776     return false;
 777
 778   return c->missing_trailing_newline;
 779 }
 780
 781 /* Test if the location originates from the spelling location of a
 782    builtin-tokens.  That is, return TRUE if LOC is a (possibly
 783    virtual) location of a built-in token that appears in the expansion
 784    list of a macro.  Please note that this function also works on
 785    tokens that result from built-in tokens.  For instance, the
 786    function would return true if passed a token "4" that is the result
 787    of the expansion of the built-in __LINE__ macro.  */
 788 bool
 789 is_location_from_builtin_token (location_t loc)
 790 {
 791   const line_map_ordinary *map = NULL;
 792   loc = linemap_resolve_location (line_table, loc,
 793                                   LRK_SPELLING_LOCATION, &map);
 794   return loc == BUILTINS_LOCATION;
 795 }
 796
 797 /* Expand the source location LOC into a human readable location.  If
 798    LOC is virtual, it resolves to the expansion point of the involved
 799    macro.  If LOC resolves to a builtin location, the file name of the
 800    readable location is set to the string "<built-in>".  */
 801
 802 expanded_location
 803 expand_location (location_t loc)
 804 {
 805   return expand_location_1 (loc, /*expansion_point_p=*/true,
 806                             LOCATION_ASPECT_CARET);
 807 }
 808
 809 /* Expand the source location LOC into a human readable location.  If
 810    LOC is virtual, it resolves to the expansion location of the
 811    relevant macro.  If LOC resolves to a builtin location, the file
 812    name of the readable location is set to the string
 813    "<built-in>".  */
 814
 815 expanded_location
 816 expand_location_to_spelling_point (location_t loc,
 817                                    enum location_aspect aspect)
 818 {
 819   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
 820 }
 821
 822 /* The rich_location class within libcpp requires a way to expand
 823    location_t instances, and relies on the client code
 824    providing a symbol named
 825      linemap_client_expand_location_to_spelling_point
 826    to do this.
 827
 828    This is the implementation for libcommon.a (all host binaries),
 829    which simply calls into expand_location_1.  */
 830
 831 expanded_location
 832 linemap_client_expand_location_to_spelling_point (location_t loc,
 833                                                   enum location_aspect aspect)
 834 {
 835   return expand_location_1 (loc, /*expansion_point_p=*/false, aspect);
 836 }
 837
 838
 839 /* If LOCATION is in a system header and if it is a virtual location for
 840    a token coming from the expansion of a macro, unwind it to the
 841    location of the expansion point of the macro.  Otherwise, just return
 842    LOCATION.
 843
 844    This is used for instance when we want to emit diagnostics about a
 845    token that may be located in a macro that is itself defined in a
 846    system header, for example, for the NULL macro.  In such a case, if
 847    LOCATION were passed directly to diagnostic functions such as
 848    warning_at, the diagnostic would be suppressed (unless
 849    -Wsystem-headers).  */
 850
 851 location_t
 852 expansion_point_location_if_in_system_header (location_t location)
 853 {
 854   if (in_system_header_at (location))
 855     location = linemap_resolve_location (line_table, location,
 856                                          LRK_MACRO_EXPANSION_POINT,
 857                                          NULL);
 858   return location;
 859 }
 860
 861 /* If LOCATION is a virtual location for a token coming from the expansion
 862    of a macro, unwind to the location of the expansion point of the macro.  */
 863
 864 location_t
 865 expansion_point_location (location_t location)
 866 {
 867   return linemap_resolve_location (line_table, location,
 868                                    LRK_MACRO_EXPANSION_POINT, NULL);
 869 }
 870
 871 /* Construct a location with caret at CARET, ranging from START to
 872    finish e.g.
 873
 874                  11111111112
 875         12345678901234567890
 876      522
 877      523   return foo + bar;
 878                   ~~~~^~~~~
 879      524
 880
 881    The location's caret is at the "+", line 523 column 15, but starts
 882    earlier, at the "f" of "foo" at column 11.  The finish is at the "r"
 883    of "bar" at column 19.  */
 884
 885 location_t
 886 make_location (location_t caret, location_t start, location_t finish)
 887 {
 888   location_t pure_loc = get_pure_location (caret);
 889   source_range src_range;
 890   src_range.m_start = get_start (start);
 891   src_range.m_finish = get_finish (finish);
 892   location_t combined_loc = COMBINE_LOCATION_DATA (line_table,
 893                                                    pure_loc,
 894                                                    src_range,
 895                                                    NULL);
 896   return combined_loc;
 897 }
 898
 899 /* Same as above, but taking a source range rather than two locations.  */
 900
 901 location_t
 902 make_location (location_t caret, source_range src_range)
 903 {
 904   location_t pure_loc = get_pure_location (caret);
 905   return COMBINE_LOCATION_DATA (line_table, pure_loc, src_range, NULL);
 906 }
 907
 908 /* Dump statistics to stderr about the memory usage of the line_table
 909    set of line maps.  This also displays some statistics about macro
 910    expansion.  */
 911
 912 void
 913 dump_line_table_statistics (void)
 914 {
 915   struct linemap_stats s;
 916   long total_used_map_size,
 917     macro_maps_size,
 918     total_allocated_map_size;
 919
 920   memset (&s, 0, sizeof (s));
 921
 922   linemap_get_statistics (line_table, &s);
 923
 924   macro_maps_size = s.macro_maps_used_size
 925     + s.macro_maps_locations_size;
 926
 927   total_allocated_map_size = s.ordinary_maps_allocated_size
 928     + s.macro_maps_allocated_size
 929     + s.macro_maps_locations_size;
 930
 931   total_used_map_size = s.ordinary_maps_used_size
 932     + s.macro_maps_used_size
 933     + s.macro_maps_locations_size;
 934
 935   fprintf (stderr, "Number of expanded macros:                     %5ld\n",
 936            s.num_expanded_macros);
 937   if (s.num_expanded_macros != 0)
 938     fprintf (stderr, "Average number of tokens per macro expansion:  %5ld\n",
 939              s.num_macro_tokens / s.num_expanded_macros);
 940   fprintf (stderr,
 941            "\nLine Table allocations during the "
 942            "compilation process\n");
 943   fprintf (stderr, "Number of ordinary maps used:        %5ld%c\n",
 944            SIZE_AMOUNT (s.num_ordinary_maps_used));
 945   fprintf (stderr, "Ordinary map used size:              %5ld%c\n",
 946            SIZE_AMOUNT (s.ordinary_maps_used_size));
 947   fprintf (stderr, "Number of ordinary maps allocated:   %5ld%c\n",
 948            SIZE_AMOUNT (s.num_ordinary_maps_allocated));
 949   fprintf (stderr, "Ordinary maps allocated size:        %5ld%c\n",
 950            SIZE_AMOUNT (s.ordinary_maps_allocated_size));
 951   fprintf (stderr, "Number of macro maps used:           %5ld%c\n",
 952            SIZE_AMOUNT (s.num_macro_maps_used));
 953   fprintf (stderr, "Macro maps used size:                %5ld%c\n",
 954            SIZE_AMOUNT (s.macro_maps_used_size));
 955   fprintf (stderr, "Macro maps locations size:           %5ld%c\n",
 956            SIZE_AMOUNT (s.macro_maps_locations_size));
 957   fprintf (stderr, "Macro maps size:                     %5ld%c\n",
 958            SIZE_AMOUNT (macro_maps_size));
 959   fprintf (stderr, "Duplicated maps locations size:      %5ld%c\n",
 960            SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
 961   fprintf (stderr, "Total allocated maps size:           %5ld%c\n",
 962            SIZE_AMOUNT (total_allocated_map_size));
 963   fprintf (stderr, "Total used maps size:                %5ld%c\n",
 964            SIZE_AMOUNT (total_used_map_size));
 965   fprintf (stderr, "Ad-hoc table size:                   %5ld%c\n",
 966            SIZE_AMOUNT (s.adhoc_table_size));
 967   fprintf (stderr, "Ad-hoc table entries used:           %5ld%c\n",
 968            SIZE_AMOUNT (s.adhoc_table_entries_used));
 969   fprintf (stderr, "optimized_ranges:                    %5xu%c\n",
 970            SIZE_AMOUNT (line_table->num_optimized_ranges));
 971   fprintf (stderr, "unoptimized_ranges:                  %5xu%c\n",
 972            SIZE_AMOUNT (line_table->num_unoptimized_ranges));
 973
 974   fprintf (stderr, "\n");
 975 }
 976
 977 /* Get location one beyond the final location in ordinary map IDX.  */
 978
 979 static location_t
 980 get_end_location (struct line_maps *set, unsigned int idx)
 981 {
 982   if (idx == LINEMAPS_ORDINARY_USED (set) - 1)
 983     return set->highest_location;
 984
 985   struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, idx + 1);
 986   return MAP_START_LOCATION (next_map);
 987 }
 988
 989 /* Helper function for write_digit_row.  */
 990
 991 static void
 992 write_digit (FILE *stream, int digit)
 993 {
 994   fputc ('0' + (digit % 10), stream);
 995 }
 996
 997 /* Helper function for dump_location_info.
 998    Write a row of numbers to STREAM, numbering a source line,
 999    giving the units, tens, hundreds etc of the column number.  */
1000
1001 static void
1002 write_digit_row (FILE *stream, int indent,
1003                  const line_map_ordinary *map,
1004                  location_t loc, int max_col, int divisor)
1005 {
1006   fprintf (stream, "%*c", indent, ' ');
1007   fprintf (stream, "|");
1008   for (int column = 1; column < max_col; column++)
1009     {
1010       location_t column_loc = loc + (column << map->m_range_bits);
1011       write_digit (stream, column_loc / divisor);
1012     }
1013   fprintf (stream, "\n");
1014 }
1015
1016 /* Write a half-closed (START) / half-open (END) interval of
1017    location_t to STREAM.  */
1018
1019 static void
1020 dump_location_range (FILE *stream,
1021                      location_t start, location_t end)
1022 {
1023   fprintf (stream,
1024            "  location_t interval: %u <= loc < %u\n",
1025            start, end);
1026 }
1027
1028 /* Write a labelled description of a half-closed (START) / half-open (END)
1029    interval of location_t to STREAM.  */
1030
1031 static void
1032 dump_labelled_location_range (FILE *stream,
1033                               const char *name,
1034                               location_t start, location_t end)
1035 {
1036   fprintf (stream, "%s\n", name);
1037   dump_location_range (stream, start, end);
1038   fprintf (stream, "\n");
1039 }
1040
1041 /* Write a visualization of the locations in the line_table to STREAM.  */
1042
1043 void
1044 dump_location_info (FILE *stream)
1045 {
1046   /* Visualize the reserved locations.  */
1047   dump_labelled_location_range (stream, "RESERVED LOCATIONS",
1048                                 0, RESERVED_LOCATION_COUNT);
1049
1050   /* Visualize the ordinary line_map instances, rendering the sources. */
1051   for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (line_table); idx++)
1052     {
1053       location_t end_location = get_end_location (line_table, idx);
1054       /* half-closed: doesn't include this one. */
1055
1056       const line_map_ordinary *map
1057         = LINEMAPS_ORDINARY_MAP_AT (line_table, idx);
1058       fprintf (stream, "ORDINARY MAP: %i\n", idx);
1059       dump_location_range (stream,
1060                            MAP_START_LOCATION (map), end_location);
1061       fprintf (stream, "  file: %s\n", ORDINARY_MAP_FILE_NAME (map));
1062       fprintf (stream, "  starting at line: %i\n",
1063                ORDINARY_MAP_STARTING_LINE_NUMBER (map));
1064       fprintf (stream, "  column and range bits: %i\n",
1065                map->m_column_and_range_bits);
1066       fprintf (stream, "  column bits: %i\n",
1067                map->m_column_and_range_bits - map->m_range_bits);
1068       fprintf (stream, "  range bits: %i\n",
1069                map->m_range_bits);
1070
1071       /* Render the span of source lines that this "map" covers.  */
1072       for (location_t loc = MAP_START_LOCATION (map);
1073            loc < end_location;
1074            loc += (1 << map->m_range_bits) )
1075         {
1076           gcc_assert (pure_location_p (line_table, loc) );
1077
1078           expanded_location exploc
1079             = linemap_expand_location (line_table, map, loc);
1080
1081           if (exploc.column == 0)
1082             {
1083               /* Beginning of a new source line: draw the line.  */
1084
1085               char_span line_text = location_get_source_line (exploc.file,
1086                                                               exploc.line);
1087               if (!line_text)
1088                 break;
1089               fprintf (stream,
1090                        "%s:%3i|loc:%5i|%.*s\n",
1091                        exploc.file, exploc.line,
1092                        loc,
1093                        (int)line_text.length (), line_text.get_buffer ());
1094
1095               /* "loc" is at column 0, which means "the whole line".
1096                  Render the locations *within* the line, by underlining
1097                  it, showing the location_t numeric values
1098                  at each column.  */
1099               size_t max_col = (1 << map->m_column_and_range_bits) - 1;
1100               if (max_col > line_text.length ())
1101                 max_col = line_text.length () + 1;
1102
1103               int indent = 14 + strlen (exploc.file);
1104
1105               /* Thousands.  */
1106               if (end_location > 999)
1107                 write_digit_row (stream, indent, map, loc, max_col, 1000);
1108
1109               /* Hundreds.  */
1110               if (end_location > 99)
1111                 write_digit_row (stream, indent, map, loc, max_col, 100);
1112
1113               /* Tens.  */
1114               write_digit_row (stream, indent, map, loc, max_col, 10);
1115
1116               /* Units.  */
1117               write_digit_row (stream, indent, map, loc, max_col, 1);
1118             }
1119         }
1120       fprintf (stream, "\n");
1121     }
1122
1123   /* Visualize unallocated values.  */
1124   dump_labelled_location_range (stream, "UNALLOCATED LOCATIONS",
1125                                 line_table->highest_location,
1126                                 LINEMAPS_MACRO_LOWEST_LOCATION (line_table));
1127
1128   /* Visualize the macro line_map instances, rendering the sources. */
1129   for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (line_table); i++)
1130     {
1131       /* Each macro map that is allocated owns location_t values
1132          that are *lower* that the one before them.
1133          Hence it's meaningful to view them either in order of ascending
1134          source locations, or in order of ascending macro map index.  */
1135       const bool ascending_location_ts = true;
1136       unsigned int idx = (ascending_location_ts
1137                           ? (LINEMAPS_MACRO_USED (line_table) - (i + 1))
1138                           : i);
1139       const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (line_table, idx);
1140       fprintf (stream, "MACRO %i: %s (%u tokens)\n",
1141                idx,
1142                linemap_map_get_macro_name (map),
1143                MACRO_MAP_NUM_MACRO_TOKENS (map));
1144       dump_location_range (stream,
1145                            map->start_location,
1146                            (map->start_location
1147                             + MACRO_MAP_NUM_MACRO_TOKENS (map)));
1148       inform (MACRO_MAP_EXPANSION_POINT_LOCATION (map),
1149               "expansion point is location %i",
1150               MACRO_MAP_EXPANSION_POINT_LOCATION (map));
1151       fprintf (stream, "  map->start_location: %u\n",
1152                map->start_location);
1153
1154       fprintf (stream, "  macro_locations:\n");
1155       for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (map); i++)
1156         {
1157           location_t x = MACRO_MAP_LOCATIONS (map)[2 * i];
1158           location_t y = MACRO_MAP_LOCATIONS (map)[(2 * i) + 1];
1159
1160           /* linemap_add_macro_token encodes token numbers in an expansion
1161              by putting them after MAP_START_LOCATION. */
1162
1163           /* I'm typically seeing 4 uninitialized entries at the end of
1164              0xafafafaf.
1165              This appears to be due to macro.c:replace_args
1166              adding 2 extra args for padding tokens; presumably there may
1167              be a leading and/or trailing padding token injected,
1168              each for 2 more location slots.
1169              This would explain there being up to 4 location_ts slots
1170              that may be uninitialized.  */
1171
1172           fprintf (stream, "    %u: %u, %u\n",
1173                    i,
1174                    x,
1175                    y);
1176           if (x == y)
1177             {
1178               if (x < MAP_START_LOCATION (map))
1179                 inform (x, "token %u has x-location == y-location == %u", i, x);
1180               else
1181                 fprintf (stream,
1182                          "x-location == y-location == %u encodes token # %u\n",
1183                          x, x - MAP_START_LOCATION (map));
1184                 }
1185           else
1186             {
1187               inform (x, "token %u has x-location == %u", i, x);
1188               inform (x, "token %u has y-location == %u", i, y);
1189             }
1190         }
1191       fprintf (stream, "\n");
1192     }
1193
1194   /* It appears that MAX_LOCATION_T itself is never assigned to a
1195      macro map, presumably due to an off-by-one error somewhere
1196      between the logic in linemap_enter_macro and
1197      LINEMAPS_MACRO_LOWEST_LOCATION.  */
1198   dump_labelled_location_range (stream, "MAX_LOCATION_T",
1199                                 MAX_LOCATION_T,
1200                                 MAX_LOCATION_T + 1);
1201
1202   /* Visualize ad-hoc values.  */
1203   dump_labelled_location_range (stream, "AD-HOC LOCATIONS",
1204                                 MAX_LOCATION_T + 1, UINT_MAX);
1205 }
1206
1207 /* string_concat's constructor.  */
1208
1209 string_concat::string_concat (int num, location_t *locs)
1210   : m_num (num)
1211 {
1212   m_locs = ggc_vec_alloc <location_t> (num);
1213   for (int i = 0; i < num; i++)
1214     m_locs[i] = locs[i];
1215 }
1216
1217 /* string_concat_db's constructor.  */
1218
1219 string_concat_db::string_concat_db ()
1220 {
1221   m_table = hash_map <location_hash, string_concat *>::create_ggc (64);
1222 }
1223
1224 /* Record that a string concatenation occurred, covering NUM
1225    string literal tokens.  LOCS is an array of size NUM, containing the
1226    locations of the tokens.  A copy of LOCS is taken.  */
1227
1228 void
1229 string_concat_db::record_string_concatenation (int num, location_t *locs)
1230 {
1231   gcc_assert (num > 1);
1232   gcc_assert (locs);
1233
1234   location_t key_loc = get_key_loc (locs[0]);
1235
1236   string_concat *concat
1237     = new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1238   m_table->put (key_loc, concat);
1239 }
1240
1241 /* Determine if LOC was the location of the the initial token of a
1242    concatenation of string literal tokens.
1243    If so, *OUT_NUM is written to with the number of tokens, and
1244    *OUT_LOCS with the location of an array of locations of the
1245    tokens, and return true.  *OUT_LOCS is a borrowed pointer to
1246    storage owned by the string_concat_db.
1247    Otherwise, return false.  */
1248
1249 bool
1250 string_concat_db::get_string_concatenation (location_t loc,
1251                                             int *out_num,
1252                                             location_t **out_locs)
1253 {
1254   gcc_assert (out_num);
1255   gcc_assert (out_locs);
1256
1257   location_t key_loc = get_key_loc (loc);
1258
1259   string_concat **concat = m_table->get (key_loc);
1260   if (!concat)
1261     return false;
1262
1263   *out_num = (*concat)->m_num;
1264   *out_locs =(*concat)->m_locs;
1265   return true;
1266 }
1267
1268 /* Internal function.  Canonicalize LOC into a form suitable for
1269    use as a key within the database, stripping away macro expansion,
1270    ad-hoc information, and range information, using the location of
1271    the start of LOC within an ordinary linemap.  */
1272
1273 location_t
1274 string_concat_db::get_key_loc (location_t loc)
1275 {
1276   loc = linemap_resolve_location (line_table, loc, LRK_SPELLING_LOCATION,
1277                                   NULL);
1278
1279   loc = get_range_from_loc (line_table, loc).m_start;
1280
1281   return loc;
1282 }
1283
1284 /* Helper class for use within get_substring_ranges_for_loc.
1285    An vec of cpp_string with responsibility for releasing all of the
1286    str->text for each str in the vector.  */
1287
1288 class auto_cpp_string_vec :  public auto_vec <cpp_string>
1289 {
1290  public:
1291   auto_cpp_string_vec (int alloc)
1292     : auto_vec <cpp_string> (alloc) {}
1293
1294   ~auto_cpp_string_vec ()
1295   {
1296     /* Clean up the copies within this vec.  */
1297     int i;
1298     cpp_string *str;
1299     FOR_EACH_VEC_ELT (*this, i, str)
1300       free (const_cast <unsigned char *> (str->text));
1301   }
1302 };
1303
1304 /* Attempt to populate RANGES with source location information on the
1305    individual characters within the string literal found at STRLOC.
1306    If CONCATS is non-NULL, then any string literals that the token at
1307    STRLOC  was concatenated with are also added to RANGES.
1308
1309    Return NULL if successful, or an error message if any errors occurred (in
1310    which case RANGES may be only partially populated and should not
1311    be used).
1312
1313    This is implemented by re-parsing the relevant source line(s).  */
1314
1315 static const char *
1316 get_substring_ranges_for_loc (cpp_reader *pfile,
1317                               string_concat_db *concats,
1318                               location_t strloc,
1319                               enum cpp_ttype type,
1320                               cpp_substring_ranges &ranges)
1321 {
1322   gcc_assert (pfile);
1323
1324   if (strloc == UNKNOWN_LOCATION)
1325     return "unknown location";
1326
1327   /* Reparsing the strings requires accurate location information.
1328      If -ftrack-macro-expansion has been overridden from its default
1329      of 2, then we might have a location of a macro expansion point,
1330      rather than the location of the literal itself.
1331      Avoid this by requiring that we have full macro expansion tracking
1332      for substring locations to be available.  */
1333   if (cpp_get_options (pfile)->track_macro_expansion != 2)
1334     return "track_macro_expansion != 2";
1335
1336   /* If #line or # 44 "file"-style directives are present, then there's
1337      no guarantee that the line numbers we have can be used to locate
1338      the strings.  For example, we might have a .i file with # directives
1339      pointing back to lines within a .c file, but the .c file might
1340      have been edited since the .i file was created.
1341      In such a case, the safest course is to disable on-demand substring
1342      locations.  */
1343   if (line_table->seen_line_directive)
1344     return "seen line directive";
1345
1346   /* If string concatenation has occurred at STRLOC, get the locations
1347      of all of the literal tokens making up the compound string.
1348      Otherwise, just use STRLOC.  */
1349   int num_locs = 1;
1350   location_t *strlocs = &strloc;
1351   if (concats)
1352     concats->get_string_concatenation (strloc, &num_locs, &strlocs);
1353
1354   auto_cpp_string_vec strs (num_locs);
1355   auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1356   for (int i = 0; i < num_locs; i++)
1357     {
1358       /* Get range of strloc.  We will use it to locate the start and finish
1359          of the literal token within the line.  */
1360       source_range src_range = get_range_from_loc (line_table, strlocs[i]);
1361
1362       if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (line_table))
1363         {
1364           /* If the string token was within a macro expansion, then we can
1365              cope with it for the simple case where we have a single token.
1366              Otherwise, bail out.  */
1367           if (src_range.m_start != src_range.m_finish)
1368             return "macro expansion";
1369         }
1370       else
1371         {
1372           if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1373             /* If so, we can't reliably determine where the token started within
1374                its line.  */
1375             return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1376
1377           if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1378             /* If so, we can't reliably determine where the token finished
1379                within its line.  */
1380             return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1381         }
1382
1383       expanded_location start
1384         = expand_location_to_spelling_point (src_range.m_start,
1385                                              LOCATION_ASPECT_START);
1386       expanded_location finish
1387         = expand_location_to_spelling_point (src_range.m_finish,
1388                                              LOCATION_ASPECT_FINISH);
1389       if (start.file != finish.file)
1390         return "range endpoints are in different files";
1391       if (start.line != finish.line)
1392         return "range endpoints are on different lines";
1393       if (start.column > finish.column)
1394         return "range endpoints are reversed";
1395
1396       char_span line = location_get_source_line (start.file, start.line);
1397       if (!line)
1398         return "unable to read source line";
1399
1400       /* Determine the location of the literal (including quotes
1401          and leading prefix chars, such as the 'u' in a u""
1402          token).  */
1403       size_t literal_length = finish.column - start.column + 1;
1404
1405       /* Ensure that we don't crash if we got the wrong location.  */
1406       if (line.length () < (start.column - 1 + literal_length))
1407         return "line is not wide enough";
1408
1409       char_span literal = line.subspan (start.column - 1, literal_length);
1410
1411       cpp_string from;
1412       from.len = literal_length;
1413       /* Make a copy of the literal, to avoid having to rely on
1414          the lifetime of the copy of the line within the cache.
1415          This will be released by the auto_cpp_string_vec dtor.  */
1416       from.text = (unsigned char *)literal.xstrdup ();
1417       strs.safe_push (from);
1418
1419       /* For very long lines, a new linemap could have started
1420          halfway through the token.
1421          Ensure that the loc_reader uses the linemap of the
1422          *end* of the token for its start location.  */
1423       const line_map_ordinary *start_ord_map;
1424       linemap_resolve_location (line_table, src_range.m_start,
1425                                 LRK_SPELLING_LOCATION, &start_ord_map);
1426       const line_map_ordinary *final_ord_map;
1427       linemap_resolve_location (line_table, src_range.m_finish,
1428                                 LRK_SPELLING_LOCATION, &final_ord_map);
1429       if (start_ord_map == NULL || final_ord_map == NULL)
1430         return "failed to get ordinary maps";
1431       /* Bulletproofing.  We ought to only have different ordinary maps
1432          for start vs finish due to line-length jumps.  */
1433       if (start_ord_map != final_ord_map
1434           && start_ord_map->to_file != final_ord_map->to_file)
1435           return "start and finish are spelled in different ordinary maps";
1436       location_t start_loc
1437         = linemap_position_for_line_and_column (line_table, final_ord_map,
1438                                                 start.line, start.column);
1439
1440       cpp_string_location_reader loc_reader (start_loc, line_table);
1441       loc_readers.safe_push (loc_reader);
1442     }
1443
1444   /* Rerun cpp_interpret_string, or rather, a modified version of it.  */
1445   const char *err = cpp_interpret_string_ranges (pfile, strs.address (),
1446                                                  loc_readers.address (),
1447                                                  num_locs, &ranges, type);
1448   if (err)
1449     return err;
1450
1451   /* Success: "ranges" should now contain information on the string.  */
1452   return NULL;
1453 }
1454
1455 /* Attempt to populate *OUT_LOC with source location information on the
1456    given characters within the string literal found at STRLOC.
1457    CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1458    character set.
1459
1460    For example, given CARET_IDX = 4, START_IDX = 3, END_IDX  = 7
1461    and string literal "012345\n789"
1462    *OUT_LOC is written to with:
1463      "012345\n789"
1464          ~^~~~~
1465
1466    If CONCATS is non-NULL, then any string literals that the token at
1467    STRLOC was concatenated with are also considered.
1468
1469    This is implemented by re-parsing the relevant source line(s).
1470
1471    Return NULL if successful, or an error message if any errors occurred.
1472    Error messages are intended for GCC developers (to help debugging) rather
1473    than for end-users.  */
1474
1475 const char *
1476 get_location_within_string (cpp_reader *pfile,
1477                             string_concat_db *concats,
1478                             location_t strloc,
1479                             enum cpp_ttype type,
1480                             int caret_idx, int start_idx, int end_idx,
1481                             location_t *out_loc)
1482 {
1483   gcc_checking_assert (caret_idx >= 0);
1484   gcc_checking_assert (start_idx >= 0);
1485   gcc_checking_assert (end_idx >= 0);
1486   gcc_assert (out_loc);
1487
1488   cpp_substring_ranges ranges;
1489   const char *err
1490     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1491   if (err)
1492     return err;
1493
1494   if (caret_idx >= ranges.get_num_ranges ())
1495     return "caret_idx out of range";
1496   if (start_idx >= ranges.get_num_ranges ())
1497     return "start_idx out of range";
1498   if (end_idx >= ranges.get_num_ranges ())
1499     return "end_idx out of range";
1500
1501   *out_loc = make_location (ranges.get_range (caret_idx).m_start,
1502                             ranges.get_range (start_idx).m_start,
1503                             ranges.get_range (end_idx).m_finish);
1504   return NULL;
1505 }
1506
1507 #if CHECKING_P
1508
1509 namespace selftest {
1510
1511 /* Selftests of location handling.  */
1512
1513 /* Attempt to populate *OUT_RANGE with source location information on the
1514    given character within the string literal found at STRLOC.
1515    CHAR_IDX refers to an offset within the execution character set.
1516    If CONCATS is non-NULL, then any string literals that the token at
1517    STRLOC was concatenated with are also considered.
1518
1519    This is implemented by re-parsing the relevant source line(s).
1520
1521    Return NULL if successful, or an error message if any errors occurred.
1522    Error messages are intended for GCC developers (to help debugging) rather
1523    than for end-users.  */
1524
1525 static const char *
1526 get_source_range_for_char (cpp_reader *pfile,
1527                            string_concat_db *concats,
1528                            location_t strloc,
1529                            enum cpp_ttype type,
1530                            int char_idx,
1531                            source_range *out_range)
1532 {
1533   gcc_checking_assert (char_idx >= 0);
1534   gcc_assert (out_range);
1535
1536   cpp_substring_ranges ranges;
1537   const char *err
1538     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1539   if (err)
1540     return err;
1541
1542   if (char_idx >= ranges.get_num_ranges ())
1543     return "char_idx out of range";
1544
1545   *out_range = ranges.get_range (char_idx);
1546   return NULL;
1547 }
1548
1549 /* As get_source_range_for_char, but write to *OUT the number
1550    of ranges that are available.  */
1551
1552 static const char *
1553 get_num_source_ranges_for_substring (cpp_reader *pfile,
1554                                      string_concat_db *concats,
1555                                      location_t strloc,
1556                                      enum cpp_ttype type,
1557                                      int *out)
1558 {
1559   gcc_assert (out);
1560
1561   cpp_substring_ranges ranges;
1562   const char *err
1563     = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1564
1565   if (err)
1566     return err;
1567
1568   *out = ranges.get_num_ranges ();
1569   return NULL;
1570 }
1571
1572 /* Selftests of location handling.  */
1573
1574 /* Verify that compare() on linenum_type handles comparisons over the full
1575    range of the type.  */
1576
1577 static void
1578 test_linenum_comparisons ()
1579 {
1580   linenum_type min_line (0);
1581   linenum_type max_line (0xffffffff);
1582   ASSERT_EQ (0, compare (min_line, min_line));
1583   ASSERT_EQ (0, compare (max_line, max_line));
1584
1585   ASSERT_GT (compare (max_line, min_line), 0);
1586   ASSERT_LT (compare (min_line, max_line), 0);
1587 }
1588
1589 /* Helper function for verifying location data: when location_t
1590    values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
1591    as having column 0.  */
1592
1593 static bool
1594 should_have_column_data_p (location_t loc)
1595 {
1596   if (IS_ADHOC_LOC (loc))
1597     loc = get_location_from_adhoc_loc (line_table, loc);
1598   if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
1599     return false;
1600   return true;
1601 }
1602
1603 /* Selftest for should_have_column_data_p.  */
1604
1605 static void
1606 test_should_have_column_data_p ()
1607 {
1608   ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
1609   ASSERT_TRUE
1610     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
1611   ASSERT_FALSE
1612     (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1));
1613 }
1614
1615 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
1616    on LOC.  */
1617
1618 static void
1619 assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
1620               location_t loc)
1621 {
1622   ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
1623   ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
1624   /* If location_t values are sufficiently high, then column numbers
1625      will be unavailable and LOCATION_COLUMN (loc) will be 0.
1626      When close to the threshold, column numbers *may* be present: if
1627      the final linemap before the threshold contains a line that straddles
1628      the threshold, locations in that line have column information.  */
1629   if (should_have_column_data_p (loc))
1630     ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
1631 }
1632
1633 /* Various selftests involve constructing a line table and one or more
1634    line maps within it.
1635
1636    For maximum test coverage we want to run these tests with a variety
1637    of situations:
1638    - line_table->default_range_bits: some frontends use a non-zero value
1639    and others use zero
1640    - the fallback modes within line-map.c: there are various threshold
1641    values for location_t beyond line-map.c changes
1642    behavior (disabling of the range-packing optimization, disabling
1643    of column-tracking).  We can exercise these by starting the line_table
1644    at interesting values at or near these thresholds.
1645
1646    The following struct describes a particular case within our test
1647    matrix.  */
1648
1649 struct line_table_case
1650 {
1651   line_table_case (int default_range_bits, int base_location)
1652   : m_default_range_bits (default_range_bits),
1653     m_base_location (base_location)
1654   {}
1655
1656   int m_default_range_bits;
1657   int m_base_location;
1658 };
1659
1660 /* Constructor.  Store the old value of line_table, and create a new
1661    one, using sane defaults.  */
1662
1663 line_table_test::line_table_test ()
1664 {
1665   gcc_assert (saved_line_table == NULL);
1666   saved_line_table = line_table;
1667   line_table = ggc_alloc<line_maps> ();
1668   linemap_init (line_table, BUILTINS_LOCATION);
1669   gcc_assert (saved_line_table->reallocator);
1670   line_table->reallocator = saved_line_table->reallocator;
1671   gcc_assert (saved_line_table->round_alloc_size);
1672   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1673   line_table->default_range_bits = 0;
1674 }
1675
1676 /* Constructor.  Store the old value of line_table, and create a new
1677    one, using the sitation described in CASE_.  */
1678
1679 line_table_test::line_table_test (const line_table_case &case_)
1680 {
1681   gcc_assert (saved_line_table == NULL);
1682   saved_line_table = line_table;
1683   line_table = ggc_alloc<line_maps> ();
1684   linemap_init (line_table, BUILTINS_LOCATION);
1685   gcc_assert (saved_line_table->reallocator);
1686   line_table->reallocator = saved_line_table->reallocator;
1687   gcc_assert (saved_line_table->round_alloc_size);
1688   line_table->round_alloc_size = saved_line_table->round_alloc_size;
1689   line_table->default_range_bits = case_.m_default_range_bits;
1690   if (case_.m_base_location)
1691     {
1692       line_table->highest_location = case_.m_base_location;
1693       line_table->highest_line = case_.m_base_location;
1694     }
1695 }
1696
1697 /* Destructor.  Restore the old value of line_table.  */
1698
1699 line_table_test::~line_table_test ()
1700 {
1701   gcc_assert (saved_line_table != NULL);
1702   line_table = saved_line_table;
1703   saved_line_table = NULL;
1704 }
1705
1706 /* Verify basic operation of ordinary linemaps.  */
1707
1708 static void
1709 test_accessing_ordinary_linemaps (const line_table_case &case_)
1710 {
1711   line_table_test ltt (case_);
1712
1713   /* Build a simple linemap describing some locations. */
1714   linemap_add (line_table, LC_ENTER, false, "foo.c", 0);
1715
1716   linemap_line_start (line_table, 1, 100);
1717   location_t loc_a = linemap_position_for_column (line_table, 1);
1718   location_t loc_b = linemap_position_for_column (line_table, 23);
1719
1720   linemap_line_start (line_table, 2, 100);
1721   location_t loc_c = linemap_position_for_column (line_table, 1);
1722   location_t loc_d = linemap_position_for_column (line_table, 17);
1723
1724   /* Example of a very long line.  */
1725   linemap_line_start (line_table, 3, 2000);
1726   location_t loc_e = linemap_position_for_column (line_table, 700);
1727
1728   /* Transitioning back to a short line.  */
1729   linemap_line_start (line_table, 4, 0);
1730   location_t loc_back_to_short = linemap_position_for_column (line_table, 100);
1731
1732   if (should_have_column_data_p (loc_back_to_short))
1733     {
1734       /* Verify that we switched to short lines in the linemap.  */
1735       line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (line_table);
1736       ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits);
1737     }
1738
1739   /* Example of a line that will eventually be seen to be longer
1740      than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
1741      below that.  */
1742   linemap_line_start (line_table, 5, 2000);
1743
1744   location_t loc_start_of_very_long_line
1745     = linemap_position_for_column (line_table, 2000);
1746   location_t loc_too_wide
1747     = linemap_position_for_column (line_table, 4097);
1748   location_t loc_too_wide_2
1749     = linemap_position_for_column (line_table, 4098);
1750
1751   /* ...and back to a sane line length.  */
1752   linemap_line_start (line_table, 6, 100);
1753   location_t loc_sane_again = linemap_position_for_column (line_table, 10);
1754
1755   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1756
1757   /* Multiple files.  */
1758   linemap_add (line_table, LC_ENTER, false, "bar.c", 0);
1759   linemap_line_start (line_table, 1, 200);
1760   location_t loc_f = linemap_position_for_column (line_table, 150);
1761   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
1762
1763   /* Verify that we can recover the location info.  */
1764   assert_loceq ("foo.c", 1, 1, loc_a);
1765   assert_loceq ("foo.c", 1, 23, loc_b);
1766   assert_loceq ("foo.c", 2, 1, loc_c);
1767   assert_loceq ("foo.c", 2, 17, loc_d);
1768   assert_loceq ("foo.c", 3, 700, loc_e);
1769   assert_loceq ("foo.c", 4, 100, loc_back_to_short);
1770
1771   /* In the very wide line, the initial location should be fully tracked.  */
1772   assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
1773   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
1774      be disabled.  */
1775   assert_loceq ("foo.c", 5, 0, loc_too_wide);
1776   assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
1777   /*...and column-tracking should be re-enabled for subsequent lines.  */
1778   assert_loceq ("foo.c", 6, 10, loc_sane_again);
1779
1780   assert_loceq ("bar.c", 1, 150, loc_f);
1781
1782   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
1783   ASSERT_TRUE (pure_location_p (line_table, loc_a));
1784
1785   /* Verify using make_location to build a range, and extracting data
1786      back from it.  */
1787   location_t range_c_b_d = make_location (loc_c, loc_b, loc_d);
1788   ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
1789   ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
1790   source_range src_range = get_range_from_loc (line_table, range_c_b_d);
1791   ASSERT_EQ (loc_b, src_range.m_start);
1792   ASSERT_EQ (loc_d, src_range.m_finish);
1793 }
1794
1795 /* Verify various properties of UNKNOWN_LOCATION.  */
1796
1797 static void
1798 test_unknown_location ()
1799 {
1800   ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
1801   ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION));
1802   ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION));
1803 }
1804
1805 /* Verify various properties of BUILTINS_LOCATION.  */
1806
1807 static void
1808 test_builtins ()
1809 {
1810   assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
1811   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
1812 }
1813
1814 /* Regression test for make_location.
1815    Ensure that we use pure locations for the start/finish of the range,
1816    rather than storing a packed or ad-hoc range as the start/finish.  */
1817
1818 static void
1819 test_make_location_nonpure_range_endpoints (const line_table_case &case_)
1820 {
1821   /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c
1822      with C++ frontend.
1823      ....................0000000001111111111222.
1824      ....................1234567890123456789012.  */
1825   const char *content = "     r += !aaa == bbb;\n";
1826   temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
1827   line_table_test ltt (case_);
1828   linemap_add (line_table, LC_ENTER, false, tmp.get_filename (), 1);
1829
1830   const location_t c11 = linemap_position_for_column (line_table, 11);
1831   const location_t c12 = linemap_position_for_column (line_table, 12);
1832   const location_t c13 = linemap_position_for_column (line_table, 13);
1833   const location_t c14 = linemap_position_for_column (line_table, 14);
1834   const location_t c21 = linemap_position_for_column (line_table, 21);
1835
1836   if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
1837     return;
1838
1839   /* Use column 13 for the caret location, arbitrarily, to verify that we
1840      handle start != caret.  */
1841   const location_t aaa = make_location (c13, c12, c14);
1842   ASSERT_EQ (c13, get_pure_location (aaa));
1843   ASSERT_EQ (c12, get_start (aaa));
1844   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
1845   ASSERT_EQ (c14, get_finish (aaa));
1846   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
1847
1848   /* Make a location using a location with a range as the start-point.  */
1849   const location_t not_aaa = make_location (c11, aaa, c14);
1850   ASSERT_EQ (c11, get_pure_location (not_aaa));
1851   /* It should use the start location of the range, not store the range
1852      itself.  */
1853   ASSERT_EQ (c12, get_start (not_aaa));
1854   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
1855   ASSERT_EQ (c14, get_finish (not_aaa));
1856   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
1857
1858   /* Similarly, make a location with a range as the end-point.  */
1859   const location_t aaa_eq_bbb = make_location (c12, c12, c21);
1860   ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
1861   ASSERT_EQ (c12, get_start (aaa_eq_bbb));
1862   ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
1863   ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
1864   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
1865   const location_t not_aaa_eq_bbb = make_location (c11, c12, aaa_eq_bbb);
1866   /* It should use the finish location of the range, not store the range
1867      itself.  */
1868   ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
1869   ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
1870   ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
1871   ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
1872   ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
1873 }
1874
1875 /* Verify reading of input files (e.g. for caret-based diagnostics).  */
1876
1877 static void
1878 test_reading_source_line ()
1879 {
1880   /* Create a tempfile and write some text to it.  */
1881   temp_source_file tmp (SELFTEST_LOCATION, ".txt",
1882                         "01234567890123456789\n"
1883                         "This is the test text\n"
1884                         "This is the 3rd line");
1885
1886   /* Read back a specific line from the tempfile.  */
1887   char_span source_line = location_get_source_line (tmp.get_filename (), 3);
1888   ASSERT_TRUE (source_line);
1889   ASSERT_TRUE (source_line.get_buffer () != NULL);
1890   ASSERT_EQ (20, source_line.length ());
1891   ASSERT_TRUE (!strncmp ("This is the 3rd line",
1892                          source_line.get_buffer (), source_line.length ()));
1893
1894   source_line = location_get_source_line (tmp.get_filename (), 2);
1895   ASSERT_TRUE (source_line);
1896   ASSERT_TRUE (source_line.get_buffer () != NULL);
1897   ASSERT_EQ (21, source_line.length ());
1898   ASSERT_TRUE (!strncmp ("This is the test text",
1899                          source_line.get_buffer (), source_line.length ()));
1900
1901   source_line = location_get_source_line (tmp.get_filename (), 4);
1902   ASSERT_FALSE (source_line);
1903   ASSERT_TRUE (source_line.get_buffer () == NULL);
1904 }
1905
1906 /* Tests of lexing.  */
1907
1908 /* Verify that token TOK from PARSER has cpp_token_as_text
1909    equal to EXPECTED_TEXT.  */
1910
1911 #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT)             \
1912   SELFTEST_BEGIN_STMT                                                   \
1913     unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK));    \
1914     ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt);           \
1915   SELFTEST_END_STMT
1916
1917 /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,
1918    and ranges from EXP_START_COL to EXP_FINISH_COL.
1919    Use LOC as the effective location of the selftest.  */
1920
1921 static void
1922 assert_token_loc_eq (const location &loc,
1923                      const cpp_token *tok,
1924                      const char *exp_filename, int exp_linenum,
1925                      int exp_start_col, int exp_finish_col)
1926 {
1927   location_t tok_loc = tok->src_loc;
1928   ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
1929   ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
1930
1931   /* If location_t values are sufficiently high, then column numbers
1932      will be unavailable.  */
1933   if (!should_have_column_data_p (tok_loc))
1934     return;
1935
1936   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
1937   source_range tok_range = get_range_from_loc (line_table, tok_loc);
1938   ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
1939   ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
1940 }
1941
1942 /* Use assert_token_loc_eq to verify the TOK->src_loc, using
1943    SELFTEST_LOCATION as the effective location of the selftest.  */
1944
1945 #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
1946                             EXP_START_COL, EXP_FINISH_COL) \
1947   assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
1948                        (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
1949
1950 /* Test of lexing a file using libcpp, verifying tokens and their
1951    location information.  */
1952
1953 static void
1954 test_lexer (const line_table_case &case_)
1955 {
1956   /* Create a tempfile and write some text to it.  */
1957   const char *content =
1958     /*00000000011111111112222222222333333.3333444444444.455555555556
1959       12345678901234567890123456789012345.6789012345678.901234567890.  */
1960     ("test_name /* c-style comment */\n"
1961      "                                  \"test literal\"\n"
1962      " // test c++-style comment\n"
1963      "   42\n");
1964   temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
1965
1966   line_table_test ltt (case_);
1967
1968   cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
1969
1970   const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
1971   ASSERT_NE (fname, NULL);
1972
1973   /* Verify that we get the expected tokens back, with the correct
1974      location information.  */
1975
1976   location_t loc;
1977   const cpp_token *tok;
1978   tok = cpp_get_token_with_location (parser, &loc);
1979   ASSERT_NE (tok, NULL);
1980   ASSERT_EQ (tok->type, CPP_NAME);
1981   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
1982   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9);
1983
1984   tok = cpp_get_token_with_location (parser, &loc);
1985   ASSERT_NE (tok, NULL);
1986   ASSERT_EQ (tok->type, CPP_STRING);
1987   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
1988   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48);
1989
1990   tok = cpp_get_token_with_location (parser, &loc);
1991   ASSERT_NE (tok, NULL);
1992   ASSERT_EQ (tok->type, CPP_NUMBER);
1993   ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
1994   ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5);
1995
1996   tok = cpp_get_token_with_location (parser, &loc);
1997   ASSERT_NE (tok, NULL);
1998   ASSERT_EQ (tok->type, CPP_EOF);
1999
2000   cpp_finish (parser, NULL);
2001   cpp_destroy (parser);
2002 }
2003
2004 /* Forward decls.  */
2005
2006 struct lexer_test;
2007 class lexer_test_options;
2008
2009 /* A class for specifying options of a lexer_test.
2010    The "apply" vfunc is called during the lexer_test constructor.  */
2011
2012 class lexer_test_options
2013 {
2014  public:
2015   virtual void apply (lexer_test &) = 0;
2016 };
2017
2018 /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy
2019    in its dtor.
2020
2021    This is needed by struct lexer_test to ensure that the cleanup of the
2022    cpp_reader happens *after* the cleanup of the temp_source_file.  */
2023
2024 class cpp_reader_ptr
2025 {
2026  public:
2027   cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2028
2029   ~cpp_reader_ptr ()
2030   {
2031     cpp_finish (m_ptr, NULL);
2032     cpp_destroy (m_ptr);
2033   }
2034
2035   operator cpp_reader * () const { return m_ptr; }
2036
2037  private:
2038   cpp_reader *m_ptr;
2039 };
2040
2041 /* A struct for writing lexer tests.  */
2042
2043 struct lexer_test
2044 {
2045   lexer_test (const line_table_case &case_, const char *content,
2046               lexer_test_options *options);
2047   ~lexer_test ();
2048
2049   const cpp_token *get_token ();
2050
2051   /* The ordering of these fields matters.
2052      The line_table_test must be first, since the cpp_reader_ptr
2053      uses it.
2054      The cpp_reader must be cleaned up *after* the temp_source_file
2055      since the filenames in input.c's input cache are owned by the
2056      cpp_reader; in particular, when ~temp_source_file evicts the
2057      filename the filenames must still be alive.  */
2058   line_table_test m_ltt;
2059   cpp_reader_ptr m_parser;
2060   temp_source_file m_tempfile;
2061   string_concat_db m_concats;
2062   bool m_implicitly_expect_EOF;
2063 };
2064
2065 /* Use an EBCDIC encoding for the execution charset, specifically
2066    IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2067
2068    This exercises iconv integration within libcpp.
2069    Not every build of iconv supports the given charset,
2070    so we need to flag this error and handle it gracefully.  */
2071
2072 class ebcdic_execution_charset : public lexer_test_options
2073 {
2074  public:
2075   ebcdic_execution_charset () : m_num_iconv_errors (0)
2076     {
2077       gcc_assert (s_singleton == NULL);
2078       s_singleton = this;
2079     }
2080   ~ebcdic_execution_charset ()
2081     {
2082       gcc_assert (s_singleton == this);
2083       s_singleton = NULL;
2084     }
2085
2086   void apply (lexer_test &test) FINAL OVERRIDE
2087   {
2088     cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2089     cpp_opts->narrow_charset = "IBM1047";
2090
2091     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2092     callbacks->diagnostic = on_diagnostic;
2093   }
2094
2095   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2096                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2097                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2098                              rich_location *richloc ATTRIBUTE_UNUSED,
2099                              const char *msgid, va_list *ap ATTRIBUTE_UNUSED)
2100     ATTRIBUTE_FPTR_PRINTF(5,0)
2101   {
2102     gcc_assert (s_singleton);
2103     /* Avoid exgettext from picking this up, it is translated in libcpp.  */
2104     const char *msg = "conversion from %s to %s not supported by iconv";
2105 #ifdef ENABLE_NLS
2106     msg = dgettext ("cpplib", msg);
2107 #endif
2108     /* Detect and record errors emitted by libcpp/charset.c:init_iconv_desc
2109        when the local iconv build doesn't support the conversion.  */
2110     if (strcmp (msgid, msg) == 0)
2111       {
2112         s_singleton->m_num_iconv_errors++;
2113         return true;
2114       }
2115
2116     /* Otherwise, we have an unexpected error.  */
2117     abort ();
2118   }
2119
2120   bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; }
2121
2122  private:
2123   static ebcdic_execution_charset *s_singleton;
2124   int m_num_iconv_errors;
2125 };
2126
2127 ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2128
2129 /* A lexer_test_options subclass that records a list of diagnostic
2130    messages emitted by the lexer.  */
2131
2132 class lexer_diagnostic_sink : public lexer_test_options
2133 {
2134  public:
2135   lexer_diagnostic_sink ()
2136   {
2137     gcc_assert (s_singleton == NULL);
2138     s_singleton = this;
2139   }
2140   ~lexer_diagnostic_sink ()
2141   {
2142     gcc_assert (s_singleton == this);
2143     s_singleton = NULL;
2144
2145     int i;
2146     char *str;
2147     FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2148       free (str);
2149   }
2150
2151   void apply (lexer_test &test) FINAL OVERRIDE
2152   {
2153     cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2154     callbacks->diagnostic = on_diagnostic;
2155   }
2156
2157   static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2158                              enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2159                              enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2160                              rich_location *richloc ATTRIBUTE_UNUSED,
2161                              const char *msgid, va_list *ap)
2162     ATTRIBUTE_FPTR_PRINTF(5,0)
2163   {
2164     char *msg = xvasprintf (msgid, *ap);
2165     s_singleton->m_diagnostics.safe_push (msg);
2166     return true;
2167   }
2168
2169   auto_vec<char *> m_diagnostics;
2170
2171  private:
2172   static lexer_diagnostic_sink *s_singleton;
2173 };
2174
2175 lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2176
2177 /* Constructor.  Override line_table with a new instance based on CASE_,
2178    and write CONTENT to a tempfile.  Create a cpp_reader, and use it to
2179    start parsing the tempfile.  */
2180
2181 lexer_test::lexer_test (const line_table_case &case_, const char *content,
2182                         lexer_test_options *options)
2183 : m_ltt (case_),
2184   m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2185   /* Create a tempfile and write the text to it.  */
2186   m_tempfile (SELFTEST_LOCATION, ".c", content),
2187   m_concats (),
2188   m_implicitly_expect_EOF (true)
2189 {
2190   if (options)
2191     options->apply (*this);
2192
2193   cpp_init_iconv (m_parser);
2194
2195   /* Parse the file.  */
2196   const char *fname = cpp_read_main_file (m_parser,
2197                                           m_tempfile.get_filename ());
2198   ASSERT_NE (fname, NULL);
2199 }
2200
2201 /* Destructor.  By default, verify that the next token in m_parser is EOF.  */
2202
2203 lexer_test::~lexer_test ()
2204 {
2205   location_t loc;
2206   const cpp_token *tok;
2207
2208   if (m_implicitly_expect_EOF)
2209     {
2210       tok = cpp_get_token_with_location (m_parser, &loc);
2211       ASSERT_NE (tok, NULL);
2212       ASSERT_EQ (tok->type, CPP_EOF);
2213     }
2214 }
2215
2216 /* Get the next token from m_parser.  */
2217
2218 const cpp_token *
2219 lexer_test::get_token ()
2220 {
2221   location_t loc;
2222   const cpp_token *tok;
2223
2224   tok = cpp_get_token_with_location (m_parser, &loc);
2225   ASSERT_NE (tok, NULL);
2226   return tok;
2227 }
2228
2229 /* Verify that locations within string literals are correctly handled.  */
2230
2231 /* Verify get_source_range_for_substring for token(s) at STRLOC,
2232    using the string concatenation database for TEST.
2233
2234    Assert that the character at index IDX is on EXPECTED_LINE,
2235    and that it begins at column EXPECTED_START_COL and ends at
2236    EXPECTED_FINISH_COL (unless the locations are beyond
2237    LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2238    columns).  */
2239
2240 static void
2241 assert_char_at_range (const location &loc,
2242                       lexer_test& test,
2243                       location_t strloc, enum cpp_ttype type, int idx,
2244                       int expected_line, int expected_start_col,
2245                       int expected_finish_col)
2246 {
2247   cpp_reader *pfile = test.m_parser;
2248   string_concat_db *concats = &test.m_concats;
2249
2250   source_range actual_range = source_range();
2251   const char *err
2252     = get_source_range_for_char (pfile, concats, strloc, type, idx,
2253                                  &actual_range);
2254   if (should_have_column_data_p (strloc))
2255     ASSERT_EQ_AT (loc, NULL, err);
2256   else
2257     {
2258       ASSERT_STREQ_AT (loc,
2259                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2260                        err);
2261       return;
2262     }
2263
2264   int actual_start_line = LOCATION_LINE (actual_range.m_start);
2265   ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2266   int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2267   ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2268
2269   if (should_have_column_data_p (actual_range.m_start))
2270     {
2271       int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2272       ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2273     }
2274   if (should_have_column_data_p (actual_range.m_finish))
2275     {
2276       int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2277       ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2278     }
2279 }
2280
2281 /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for
2282    the effective location of any errors.  */
2283
2284 #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2285                              EXPECTED_START_COL, EXPECTED_FINISH_COL)   \
2286   assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2287                         (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2288                         (EXPECTED_FINISH_COL))
2289
2290 /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC,
2291    using the string concatenation database for TEST.
2292
2293    Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES.  */
2294
2295 static void
2296 assert_num_substring_ranges (const location &loc,
2297                              lexer_test& test,
2298                              location_t strloc,
2299                              enum cpp_ttype type,
2300                              int expected_num_ranges)
2301 {
2302   cpp_reader *pfile = test.m_parser;
2303   string_concat_db *concats = &test.m_concats;
2304
2305   int actual_num_ranges = -1;
2306   const char *err
2307     = get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2308                                            &actual_num_ranges);
2309   if (should_have_column_data_p (strloc))
2310     ASSERT_EQ_AT (loc, NULL, err);
2311   else
2312     {
2313       ASSERT_STREQ_AT (loc,
2314                        "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2315                        err);
2316       return;
2317     }
2318   ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2319 }
2320
2321 /* Macro for calling assert_num_substring_ranges, supplying
2322    SELFTEST_LOCATION for the effective location of any errors.  */
2323
2324 #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2325                                     EXPECTED_NUM_RANGES)                \
2326   assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2327                                (TYPE), (EXPECTED_NUM_RANGES))
2328
2329
2330 /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC
2331    returns an error (using the string concatenation database for TEST).  */
2332
2333 static void
2334 assert_has_no_substring_ranges (const location &loc,
2335                                 lexer_test& test,
2336                                 location_t strloc,
2337                                 enum cpp_ttype type,
2338                                 const char *expected_err)
2339 {
2340   cpp_reader *pfile = test.m_parser;
2341   string_concat_db *concats = &test.m_concats;
2342   cpp_substring_ranges ranges;
2343   const char *actual_err
2344     = get_substring_ranges_for_loc (pfile, concats, strloc,
2345                                     type, ranges);
2346   if (should_have_column_data_p (strloc))
2347     ASSERT_STREQ_AT (loc, expected_err, actual_err);
2348   else
2349     ASSERT_STREQ_AT (loc,
2350                      "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2351                      actual_err);
2352 }
2353
2354 #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR)    \
2355     assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2356                                     (STRLOC), (TYPE), (ERR))
2357
2358 /* Lex a simple string literal.  Verify the substring location data, before
2359    and after running cpp_interpret_string on it.  */
2360
2361 static void
2362 test_lexer_string_locations_simple (const line_table_case &case_)
2363 {
2364   /* Digits 0-9 (with 0 at column 10), the simple way.
2365      ....................000000000.11111111112.2222222223333333333
2366      ....................123456789.01234567890.1234567890123456789
2367      We add a trailing comment to ensure that we correctly locate
2368      the end of the string literal token.  */
2369   const char *content = "        \"0123456789\" /* not a string */\n";
2370   lexer_test test (case_, content, NULL);
2371
2372   /* Verify that we get the expected token back, with the correct
2373      location information.  */
2374   const cpp_token *tok = test.get_token ();
2375   ASSERT_EQ (tok->type, CPP_STRING);
2376   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2377   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2378
2379   /* At this point in lexing, the quote characters are treated as part of
2380      the string (they are stripped off by cpp_interpret_string).  */
2381
2382   ASSERT_EQ (tok->val.str.len, 12);
2383
2384   /* Verify that cpp_interpret_string works.  */
2385   cpp_string dst_string;
2386   const enum cpp_ttype type = CPP_STRING;
2387   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2388                                       &dst_string, type);
2389   ASSERT_TRUE (result);
2390   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2391   free (const_cast <unsigned char *> (dst_string.text));
2392
2393   /* Verify ranges of individual characters.  This no longer includes the
2394      opening quote, but does include the closing quote.  */
2395   for (int i = 0; i <= 10; i++)
2396     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1,
2397                           10 + i, 10 + i);
2398
2399   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2400 }
2401
2402 /* As test_lexer_string_locations_simple, but use an EBCDIC execution
2403    encoding.  */
2404
2405 static void
2406 test_lexer_string_locations_ebcdic (const line_table_case &case_)
2407 {
2408   /* EBCDIC support requires iconv.  */
2409   if (!HAVE_ICONV)
2410     return;
2411
2412   /* Digits 0-9 (with 0 at column 10), the simple way.
2413      ....................000000000.11111111112.2222222223333333333
2414      ....................123456789.01234567890.1234567890123456789
2415      We add a trailing comment to ensure that we correctly locate
2416      the end of the string literal token.  */
2417   const char *content = "        \"0123456789\" /* not a string */\n";
2418   ebcdic_execution_charset use_ebcdic;
2419   lexer_test test (case_, content, &use_ebcdic);
2420
2421   /* Verify that we get the expected token back, with the correct
2422      location information.  */
2423   const cpp_token *tok = test.get_token ();
2424   ASSERT_EQ (tok->type, CPP_STRING);
2425   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2426   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20);
2427
2428   /* At this point in lexing, the quote characters are treated as part of
2429      the string (they are stripped off by cpp_interpret_string).  */
2430
2431   ASSERT_EQ (tok->val.str.len, 12);
2432
2433   /* The remainder of the test requires an iconv implementation that
2434      can convert from UTF-8 to the EBCDIC encoding requested above.  */
2435   if (use_ebcdic.iconv_errors_occurred_p ())
2436     return;
2437
2438   /* Verify that cpp_interpret_string works.  */
2439   cpp_string dst_string;
2440   const enum cpp_ttype type = CPP_STRING;
2441   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2442                                       &dst_string, type);
2443   ASSERT_TRUE (result);
2444   /* We should now have EBCDIC-encoded text, specifically
2445      IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2446      The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9.  */
2447   ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2448                 (const char *)dst_string.text);
2449   free (const_cast <unsigned char *> (dst_string.text));
2450
2451   /* Verify that we don't attempt to record substring location information
2452      for such cases.  */
2453   ASSERT_HAS_NO_SUBSTRING_RANGES
2454     (test, tok->src_loc, type,
2455      "execution character set != source character set");
2456 }
2457
2458 /* Lex a string literal containing a hex-escaped character.
2459    Verify the substring location data, before and after running
2460    cpp_interpret_string on it.  */
2461
2462 static void
2463 test_lexer_string_locations_hex (const line_table_case &case_)
2464 {
2465   /* Digits 0-9, expressing digit 5 in ASCII as "\x35"
2466      and with a space in place of digit 6, to terminate the escaped
2467      hex code.
2468      ....................000000000.111111.11112222.
2469      ....................123456789.012345.67890123.  */
2470   const char *content = "        \"01234\\x35 789\"\n";
2471   lexer_test test (case_, content, NULL);
2472
2473   /* Verify that we get the expected token back, with the correct
2474      location information.  */
2475   const cpp_token *tok = test.get_token ();
2476   ASSERT_EQ (tok->type, CPP_STRING);
2477   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2478   ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23);
2479
2480   /* At this point in lexing, the quote characters are treated as part of
2481      the string (they are stripped off by cpp_interpret_string).  */
2482   ASSERT_EQ (tok->val.str.len, 15);
2483
2484   /* Verify that cpp_interpret_string works.  */
2485   cpp_string dst_string;
2486   const enum cpp_ttype type = CPP_STRING;
2487   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2488                                       &dst_string, type);
2489   ASSERT_TRUE (result);
2490   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2491   free (const_cast <unsigned char *> (dst_string.text));
2492
2493   /* Verify ranges of individual characters.  This no longer includes the
2494      opening quote, but does include the closing quote.  */
2495   for (int i = 0; i <= 4; i++)
2496     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2497   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2498   for (int i = 6; i <= 10; i++)
2499     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2500
2501   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2502 }
2503
2504 /* Lex a string literal containing an octal-escaped character.
2505    Verify the substring location data after running cpp_interpret_string
2506    on it.  */
2507
2508 static void
2509 test_lexer_string_locations_oct (const line_table_case &case_)
2510 {
2511   /* Digits 0-9, expressing digit 5 in ASCII as "\065"
2512      and with a space in place of digit 6, to terminate the escaped
2513      octal code.
2514      ....................000000000.111111.11112222.2222223333333333444
2515      ....................123456789.012345.67890123.4567890123456789012  */
2516   const char *content = "        \"01234\\065 789\" /* not a string */\n";
2517   lexer_test test (case_, content, NULL);
2518
2519   /* Verify that we get the expected token back, with the correct
2520      location information.  */
2521   const cpp_token *tok = test.get_token ();
2522   ASSERT_EQ (tok->type, CPP_STRING);
2523   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2524
2525   /* Verify that cpp_interpret_string works.  */
2526   cpp_string dst_string;
2527   const enum cpp_ttype type = CPP_STRING;
2528   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2529                                       &dst_string, type);
2530   ASSERT_TRUE (result);
2531   ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2532   free (const_cast <unsigned char *> (dst_string.text));
2533
2534   /* Verify ranges of individual characters.  This no longer includes the
2535      opening quote, but does include the closing quote.  */
2536   for (int i = 0; i < 5; i++)
2537     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2538   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18);
2539   for (int i = 6; i <= 10; i++)
2540     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i);
2541
2542   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11);
2543 }
2544
2545 /* Test of string literal containing letter escapes.  */
2546
2547 static void
2548 test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2549 {
2550   /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.
2551      .....................000000000.1.11111.1.1.11222.22222223333333
2552      .....................123456789.0.12345.6.7.89012.34567890123456.  */
2553   const char *content = ("        \"\\tfoo\\\\\\nbar\" /* non-str */\n");
2554   lexer_test test (case_, content, NULL);
2555
2556   /* Verify that we get the expected tokens back.  */
2557   const cpp_token *tok = test.get_token ();
2558   ASSERT_EQ (tok->type, CPP_STRING);
2559   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2560
2561   /* Verify ranges of individual characters. */
2562   /* "\t".  */
2563   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2564                         0, 1, 10, 11);
2565   /* "foo". */
2566   for (int i = 1; i <= 3; i++)
2567     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2568                           i, 1, 11 + i, 11 + i);
2569   /* "\\" and "\n".  */
2570   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2571                         4, 1, 15, 16);
2572   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2573                         5, 1, 17, 18);
2574
2575   /* "bar" and closing quote for nul-terminator.  */
2576   for (int i = 6; i <= 9; i++)
2577     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2578                           i, 1, 13 + i, 13 + i);
2579
2580   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10);
2581 }
2582
2583 /* Another test of a string literal containing a letter escape.
2584    Based on string seen in
2585      printf ("%-%\n");
2586    in gcc.dg/format/c90-printf-1.c.  */
2587
2588 static void
2589 test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
2590 {
2591   /* .....................000000000.1111.11.1111.22222222223.
2592      .....................123456789.0123.45.6789.01234567890.  */
2593   const char *content = ("        \"%-%\\n\" /* non-str */\n");
2594   lexer_test test (case_, content, NULL);
2595
2596   /* Verify that we get the expected tokens back.  */
2597   const cpp_token *tok = test.get_token ();
2598   ASSERT_EQ (tok->type, CPP_STRING);
2599   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
2600
2601   /* Verify ranges of individual characters. */
2602   /* "%-%".  */
2603   for (int i = 0; i < 3; i++)
2604     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2605                           i, 1, 10 + i, 10 + i);
2606   /* "\n".  */
2607   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2608                         3, 1, 13, 14);
2609
2610   /* Closing quote for nul-terminator.  */
2611   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2612                         4, 1, 15, 15);
2613
2614   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5);
2615 }
2616
2617 /* Lex a string literal containing UCN 4 characters.
2618    Verify the substring location data after running cpp_interpret_string
2619    on it.  */
2620
2621 static void
2622 test_lexer_string_locations_ucn4 (const line_table_case &case_)
2623 {
2624   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
2625      as UCN 4.
2626      ....................000000000.111111.111122.222222223.33333333344444
2627      ....................123456789.012345.678901.234567890.12345678901234  */
2628   const char *content = "        \"01234\\u2174\\u2175789\" /* non-str */\n";
2629   lexer_test test (case_, content, NULL);
2630
2631   /* Verify that we get the expected token back, with the correct
2632      location information.  */
2633   const cpp_token *tok = test.get_token ();
2634   ASSERT_EQ (tok->type, CPP_STRING);
2635   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
2636
2637   /* Verify that cpp_interpret_string works.
2638      The string should be encoded in the execution character
2639      set.  Assuming that that is UTF-8, we should have the following:
2640      -----------  ----  -----  -------  ----------------
2641      Byte offset  Byte  Octal  Unicode  Source Column(s)
2642      -----------  ----  -----  -------  ----------------
2643      0            0x30         '0'      10
2644      1            0x31         '1'      11
2645      2            0x32         '2'      12
2646      3            0x33         '3'      13
2647      4            0x34         '4'      14
2648      5            0xE2  \342   U+2174   15-20
2649      6            0x85  \205    (cont)  15-20
2650      7            0xB4  \264    (cont)  15-20
2651      8            0xE2  \342   U+2175   21-26
2652      9            0x85  \205    (cont)  21-26
2653      10           0xB5  \265    (cont)  21-26
2654      11           0x37         '7'      27
2655      12           0x38         '8'      28
2656      13           0x39         '9'      29
2657      14           0x00                  30 (closing quote)
2658      -----------  ----  -----  -------  ---------------.  */
2659
2660   cpp_string dst_string;
2661   const enum cpp_ttype type = CPP_STRING;
2662   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2663                                       &dst_string, type);
2664   ASSERT_TRUE (result);
2665   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2666                 (const char *)dst_string.text);
2667   free (const_cast <unsigned char *> (dst_string.text));
2668
2669   /* Verify ranges of individual characters.  This no longer includes the
2670      opening quote, but does include the closing quote.
2671      '01234'.  */
2672   for (int i = 0; i <= 4; i++)
2673     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2674   /* U+2174.  */
2675   for (int i = 5; i <= 7; i++)
2676     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20);
2677   /* U+2175.  */
2678   for (int i = 8; i <= 10; i++)
2679     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26);
2680   /* '789' and nul terminator  */
2681   for (int i = 11; i <= 14; i++)
2682     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i);
2683
2684   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2685 }
2686
2687 /* Lex a string literal containing UCN 8 characters.
2688    Verify the substring location data after running cpp_interpret_string
2689    on it.  */
2690
2691 static void
2692 test_lexer_string_locations_ucn8 (const line_table_case &case_)
2693 {
2694   /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
2695      ....................000000000.111111.1111222222.2222333333333.344444
2696      ....................123456789.012345.6789012345.6789012345678.901234  */
2697   const char *content = "        \"01234\\U00002174\\U00002175789\" /* */\n";
2698   lexer_test test (case_, content, NULL);
2699
2700   /* Verify that we get the expected token back, with the correct
2701      location information.  */
2702   const cpp_token *tok = test.get_token ();
2703   ASSERT_EQ (tok->type, CPP_STRING);
2704   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
2705                            "\"01234\\U00002174\\U00002175789\"");
2706
2707   /* Verify that cpp_interpret_string works.
2708      The UTF-8 encoding of the string is identical to that from
2709      the ucn4 testcase above; the only difference is the column
2710      locations.  */
2711   cpp_string dst_string;
2712   const enum cpp_ttype type = CPP_STRING;
2713   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2714                                       &dst_string, type);
2715   ASSERT_TRUE (result);
2716   ASSERT_STREQ ("01234\342\205\264\342\205\265789",
2717                 (const char *)dst_string.text);
2718   free (const_cast <unsigned char *> (dst_string.text));
2719
2720   /* Verify ranges of individual characters.  This no longer includes the
2721      opening quote, but does include the closing quote.
2722      '01234'.  */
2723   for (int i = 0; i <= 4; i++)
2724     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2725   /* U+2174.  */
2726   for (int i = 5; i <= 7; i++)
2727     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24);
2728   /* U+2175.  */
2729   for (int i = 8; i <= 10; i++)
2730     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34);
2731   /* '789' at columns 35-37  */
2732   for (int i = 11; i <= 13; i++)
2733     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i);
2734   /* Closing quote/nul-terminator at column 38.  */
2735   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38);
2736
2737   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15);
2738 }
2739
2740 /* Fetch a big-endian 32-bit value and convert to host endianness.  */
2741
2742 static uint32_t
2743 uint32_from_big_endian (const uint32_t *ptr_be_value)
2744 {
2745   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2746   return (((uint32_t) buf[0] << 24)
2747           | ((uint32_t) buf[1] << 16)
2748           | ((uint32_t) buf[2] << 8)
2749           | (uint32_t) buf[3]);
2750 }
2751
2752 /* Lex a wide string literal and verify that attempts to read substring
2753    location data from it fail gracefully.  */
2754
2755 static void
2756 test_lexer_string_locations_wide_string (const line_table_case &case_)
2757 {
2758   /* Digits 0-9.
2759      ....................000000000.11111111112.22222222233333
2760      ....................123456789.01234567890.12345678901234  */
2761   const char *content = "       L\"0123456789\" /* non-str */\n";
2762   lexer_test test (case_, content, NULL);
2763
2764   /* Verify that we get the expected token back, with the correct
2765      location information.  */
2766   const cpp_token *tok = test.get_token ();
2767   ASSERT_EQ (tok->type, CPP_WSTRING);
2768   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
2769
2770   /* Verify that cpp_interpret_string works, using CPP_WSTRING.  */
2771   cpp_string dst_string;
2772   const enum cpp_ttype type = CPP_WSTRING;
2773   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2774                                       &dst_string, type);
2775   ASSERT_TRUE (result);
2776   /* The cpp_reader defaults to big-endian with
2777      CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should
2778      now be encoded as UTF-32BE.  */
2779   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2780   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2781   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2782   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2783   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2784   free (const_cast <unsigned char *> (dst_string.text));
2785
2786   /* We don't yet support generating substring location information
2787      for L"" strings.  */
2788   ASSERT_HAS_NO_SUBSTRING_RANGES
2789     (test, tok->src_loc, type,
2790      "execution character set != source character set");
2791 }
2792
2793 /* Fetch a big-endian 16-bit value and convert to host endianness.  */
2794
2795 static uint16_t
2796 uint16_from_big_endian (const uint16_t *ptr_be_value)
2797 {
2798   const unsigned char *buf = (const unsigned char *)ptr_be_value;
2799   return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1];
2800 }
2801
2802 /* Lex a u"" string literal and verify that attempts to read substring
2803    location data from it fail gracefully.  */
2804
2805 static void
2806 test_lexer_string_locations_string16 (const line_table_case &case_)
2807 {
2808   /* Digits 0-9.
2809      ....................000000000.11111111112.22222222233333
2810      ....................123456789.01234567890.12345678901234  */
2811   const char *content = "       u\"0123456789\" /* non-str */\n";
2812   lexer_test test (case_, content, NULL);
2813
2814   /* Verify that we get the expected token back, with the correct
2815      location information.  */
2816   const cpp_token *tok = test.get_token ();
2817   ASSERT_EQ (tok->type, CPP_STRING16);
2818   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
2819
2820   /* Verify that cpp_interpret_string works, using CPP_STRING16.  */
2821   cpp_string dst_string;
2822   const enum cpp_ttype type = CPP_STRING16;
2823   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2824                                       &dst_string, type);
2825   ASSERT_TRUE (result);
2826
2827   /* The cpp_reader defaults to big-endian, so dst_string should
2828      now be encoded as UTF-16BE.  */
2829   const uint16_t *be16_chars = (const uint16_t *)dst_string.text;
2830   ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0]));
2831   ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5]));
2832   ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9]));
2833   ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10]));
2834   free (const_cast <unsigned char *> (dst_string.text));
2835
2836   /* We don't yet support generating substring location information
2837      for L"" strings.  */
2838   ASSERT_HAS_NO_SUBSTRING_RANGES
2839     (test, tok->src_loc, type,
2840      "execution character set != source character set");
2841 }
2842
2843 /* Lex a U"" string literal and verify that attempts to read substring
2844    location data from it fail gracefully.  */
2845
2846 static void
2847 test_lexer_string_locations_string32 (const line_table_case &case_)
2848 {
2849   /* Digits 0-9.
2850      ....................000000000.11111111112.22222222233333
2851      ....................123456789.01234567890.12345678901234  */
2852   const char *content = "       U\"0123456789\" /* non-str */\n";
2853   lexer_test test (case_, content, NULL);
2854
2855   /* Verify that we get the expected token back, with the correct
2856      location information.  */
2857   const cpp_token *tok = test.get_token ();
2858   ASSERT_EQ (tok->type, CPP_STRING32);
2859   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
2860
2861   /* Verify that cpp_interpret_string works, using CPP_STRING32.  */
2862   cpp_string dst_string;
2863   const enum cpp_ttype type = CPP_STRING32;
2864   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2865                                       &dst_string, type);
2866   ASSERT_TRUE (result);
2867
2868   /* The cpp_reader defaults to big-endian, so dst_string should
2869      now be encoded as UTF-32BE.  */
2870   const uint32_t *be32_chars = (const uint32_t *)dst_string.text;
2871   ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0]));
2872   ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5]));
2873   ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9]));
2874   ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10]));
2875   free (const_cast <unsigned char *> (dst_string.text));
2876
2877   /* We don't yet support generating substring location information
2878      for L"" strings.  */
2879   ASSERT_HAS_NO_SUBSTRING_RANGES
2880     (test, tok->src_loc, type,
2881      "execution character set != source character set");
2882 }
2883
2884 /* Lex a u8-string literal.
2885    Verify the substring location data after running cpp_interpret_string
2886    on it.  */
2887
2888 static void
2889 test_lexer_string_locations_u8 (const line_table_case &case_)
2890 {
2891   /* Digits 0-9.
2892      ....................000000000.11111111112.22222222233333
2893      ....................123456789.01234567890.12345678901234  */
2894   const char *content = "      u8\"0123456789\" /* non-str */\n";
2895   lexer_test test (case_, content, NULL);
2896
2897   /* Verify that we get the expected token back, with the correct
2898      location information.  */
2899   const cpp_token *tok = test.get_token ();
2900   ASSERT_EQ (tok->type, CPP_UTF8STRING);
2901   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
2902
2903   /* Verify that cpp_interpret_string works.  */
2904   cpp_string dst_string;
2905   const enum cpp_ttype type = CPP_STRING;
2906   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2907                                       &dst_string, type);
2908   ASSERT_TRUE (result);
2909   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2910   free (const_cast <unsigned char *> (dst_string.text));
2911
2912   /* Verify ranges of individual characters.  This no longer includes the
2913      opening quote, but does include the closing quote.  */
2914   for (int i = 0; i <= 10; i++)
2915     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2916 }
2917
2918 /* Lex a string literal containing UTF-8 source characters.
2919    Verify the substring location data after running cpp_interpret_string
2920    on it.  */
2921
2922 static void
2923 test_lexer_string_locations_utf8_source (const line_table_case &case_)
2924 {
2925  /* This string literal is written out to the source file as UTF-8,
2926     and is of the form "before mojibake after", where "mojibake"
2927     is written as the following four unicode code points:
2928        U+6587 CJK UNIFIED IDEOGRAPH-6587
2929        U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2930        U+5316 CJK UNIFIED IDEOGRAPH-5316
2931        U+3051 HIRAGANA LETTER KE.
2932      Each of these is 3 bytes wide when encoded in UTF-8, whereas the
2933      "before" and "after" are 1 byte per unicode character.
2934
2935      The numbering shown are "columns", which are *byte* numbers within
2936      the line, rather than unicode character numbers.
2937
2938      .................... 000000000.1111111.
2939      .................... 123456789.0123456.  */
2940   const char *content = ("        \"before "
2941                          /* U+6587 CJK UNIFIED IDEOGRAPH-6587
2942                               UTF-8: 0xE6 0x96 0x87
2943                               C octal escaped UTF-8: \346\226\207
2944                             "column" numbers: 17-19.  */
2945                          "\346\226\207"
2946
2947                          /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
2948                               UTF-8: 0xE5 0xAD 0x97
2949                               C octal escaped UTF-8: \345\255\227
2950                             "column" numbers: 20-22.  */
2951                          "\345\255\227"
2952
2953                          /* U+5316 CJK UNIFIED IDEOGRAPH-5316
2954                               UTF-8: 0xE5 0x8C 0x96
2955                               C octal escaped UTF-8: \345\214\226
2956                             "column" numbers: 23-25.  */
2957                          "\345\214\226"
2958
2959                          /* U+3051 HIRAGANA LETTER KE
2960                               UTF-8: 0xE3 0x81 0x91
2961                               C octal escaped UTF-8: \343\201\221
2962                             "column" numbers: 26-28.  */
2963                          "\343\201\221"
2964
2965                          /* column numbers 29 onwards
2966                           2333333.33334444444444
2967                           9012345.67890123456789. */
2968                          " after\" /* non-str */\n");
2969   lexer_test test (case_, content, NULL);
2970
2971   /* Verify that we get the expected token back, with the correct
2972      location information.  */
2973   const cpp_token *tok = test.get_token ();
2974   ASSERT_EQ (tok->type, CPP_STRING);
2975   ASSERT_TOKEN_AS_TEXT_EQ
2976     (test.m_parser, tok,
2977      "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
2978
2979   /* Verify that cpp_interpret_string works.  */
2980   cpp_string dst_string;
2981   const enum cpp_ttype type = CPP_STRING;
2982   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
2983                                       &dst_string, type);
2984   ASSERT_TRUE (result);
2985   ASSERT_STREQ
2986     ("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
2987      (const char *)dst_string.text);
2988   free (const_cast <unsigned char *> (dst_string.text));
2989
2990   /* Verify ranges of individual characters.  This no longer includes the
2991      opening quote, but does include the closing quote.
2992      Assuming that both source and execution encodings are UTF-8, we have
2993      a run of 25 octets in each, plus the NUL terminator.  */
2994   for (int i = 0; i < 25; i++)
2995     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i);
2996   /* NUL-terminator should use the closing quote at column 35.  */
2997   ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35);
2998
2999   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26);
3000 }
3001
3002 /* Test of string literal concatenation.  */
3003
3004 static void
3005 test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3006 {
3007   /* Digits 0-9.
3008      .....................000000000.111111.11112222222222
3009      .....................123456789.012345.67890123456789.  */
3010   const char *content = ("        \"01234\" /* non-str */\n"
3011                          "        \"56789\" /* non-str */\n");
3012   lexer_test test (case_, content, NULL);
3013
3014   location_t input_locs[2];
3015
3016   /* Verify that we get the expected tokens back.  */
3017   auto_vec <cpp_string> input_strings;
3018   const cpp_token *tok_a = test.get_token ();
3019   ASSERT_EQ (tok_a->type, CPP_STRING);
3020   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3021   input_strings.safe_push (tok_a->val.str);
3022   input_locs[0] = tok_a->src_loc;
3023
3024   const cpp_token *tok_b = test.get_token ();
3025   ASSERT_EQ (tok_b->type, CPP_STRING);
3026   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3027   input_strings.safe_push (tok_b->val.str);
3028   input_locs[1] = tok_b->src_loc;
3029
3030   /* Verify that cpp_interpret_string works.  */
3031   cpp_string dst_string;
3032   const enum cpp_ttype type = CPP_STRING;
3033   bool result = cpp_interpret_string (test.m_parser,
3034                                       input_strings.address (), 2,
3035                                       &dst_string, type);
3036   ASSERT_TRUE (result);
3037   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3038   free (const_cast <unsigned char *> (dst_string.text));
3039
3040   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3041   test.m_concats.record_string_concatenation (2, input_locs);
3042
3043   location_t initial_loc = input_locs[0];
3044
3045   /* "01234" on line 1.  */
3046   for (int i = 0; i <= 4; i++)
3047     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3048   /* "56789" in line 2, plus its closing quote for the nul terminator.  */
3049   for (int i = 5; i <= 10; i++)
3050     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i);
3051
3052   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3053 }
3054
3055 /* Another test of string literal concatenation.  */
3056
3057 static void
3058 test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3059 {
3060   /* Digits 0-9.
3061      .....................000000000.111.11111112222222
3062      .....................123456789.012.34567890123456.  */
3063   const char *content = ("        \"01\" /* non-str */\n"
3064                          "        \"23\" /* non-str */\n"
3065                          "        \"45\" /* non-str */\n"
3066                          "        \"67\" /* non-str */\n"
3067                          "        \"89\" /* non-str */\n");
3068   lexer_test test (case_, content, NULL);
3069
3070   auto_vec <cpp_string> input_strings;
3071   location_t input_locs[5];
3072
3073   /* Verify that we get the expected tokens back.  */
3074   for (int i = 0; i < 5; i++)
3075     {
3076       const cpp_token *tok = test.get_token ();
3077       ASSERT_EQ (tok->type, CPP_STRING);
3078       input_strings.safe_push (tok->val.str);
3079       input_locs[i] = tok->src_loc;
3080     }
3081
3082   /* Verify that cpp_interpret_string works.  */
3083   cpp_string dst_string;
3084   const enum cpp_ttype type = CPP_STRING;
3085   bool result = cpp_interpret_string (test.m_parser,
3086                                       input_strings.address (), 5,
3087                                       &dst_string, type);
3088   ASSERT_TRUE (result);
3089   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3090   free (const_cast <unsigned char *> (dst_string.text));
3091
3092   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3093   test.m_concats.record_string_concatenation (5, input_locs);
3094
3095   location_t initial_loc = input_locs[0];
3096
3097   /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can
3098      detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3099      and expect get_source_range_for_substring to fail.
3100      However, for a string concatenation test, we can have a case
3101      where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3102      but subsequent strings can be after it.
3103      Attempting to detect this within assert_char_at_range
3104      would overcomplicate the logic for the common test cases, so
3105      we detect it here.  */
3106   if (should_have_column_data_p (input_locs[0])
3107       && !should_have_column_data_p (input_locs[4]))
3108     {
3109       /* Verify that get_source_range_for_substring gracefully rejects
3110          this case.  */
3111       source_range actual_range;
3112       const char *err
3113         = get_source_range_for_char (test.m_parser, &test.m_concats,
3114                                      initial_loc, type, 0, &actual_range);
3115       ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3116       return;
3117     }
3118
3119   for (int i = 0; i < 5; i++)
3120     for (int j = 0; j < 2; j++)
3121       ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j,
3122                             i + 1, 10 + j, 10 + j);
3123
3124   /* NUL-terminator should use the final closing quote at line 5 column 12.  */
3125   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12);
3126
3127   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3128 }
3129
3130 /* Another test of string literal concatenation, this time combined with
3131    various kinds of escaped characters.  */
3132
3133 static void
3134 test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3135 {
3136   /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
3137      digit 6 in ASCII as octal "\066", concatenating multiple strings.  */
3138   const char *content
3139     /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555
3140        .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */
3141     = ("        \"01234\"  \"\\x35\"  \"\\066\"  \"789\" /* non-str */\n");
3142   lexer_test test (case_, content, NULL);
3143
3144   auto_vec <cpp_string> input_strings;
3145   location_t input_locs[4];
3146
3147   /* Verify that we get the expected tokens back.  */
3148   for (int i = 0; i < 4; i++)
3149     {
3150       const cpp_token *tok = test.get_token ();
3151       ASSERT_EQ (tok->type, CPP_STRING);
3152       input_strings.safe_push (tok->val.str);
3153       input_locs[i] = tok->src_loc;
3154     }
3155
3156   /* Verify that cpp_interpret_string works.  */
3157   cpp_string dst_string;
3158   const enum cpp_ttype type = CPP_STRING;
3159   bool result = cpp_interpret_string (test.m_parser,
3160                                       input_strings.address (), 4,
3161                                       &dst_string, type);
3162   ASSERT_TRUE (result);
3163   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3164   free (const_cast <unsigned char *> (dst_string.text));
3165
3166   /* Simulate c-lex.c's lex_string in order to record concatenation.  */
3167   test.m_concats.record_string_concatenation (4, input_locs);
3168
3169   location_t initial_loc = input_locs[0];
3170
3171   for (int i = 0; i <= 4; i++)
3172     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i);
3173   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22);
3174   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30);
3175   for (int i = 7; i <= 9; i++)
3176     ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i);
3177
3178   /* NUL-terminator should use the location of the final closing quote.  */
3179   ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38);
3180
3181   ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11);
3182 }
3183
3184 /* Test of string literal in a macro.  */
3185
3186 static void
3187 test_lexer_string_locations_macro (const line_table_case &case_)
3188 {
3189   /* Digits 0-9.
3190      .....................0000000001111111111.22222222223.
3191      .....................1234567890123456789.01234567890.  */
3192   const char *content = ("#define MACRO     \"0123456789\" /* non-str */\n"
3193                          "  MACRO");
3194   lexer_test test (case_, content, NULL);
3195
3196   /* Verify that we get the expected tokens back.  */
3197   const cpp_token *tok = test.get_token ();
3198   ASSERT_EQ (tok->type, CPP_PADDING);
3199
3200   tok = test.get_token ();
3201   ASSERT_EQ (tok->type, CPP_STRING);
3202   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3203
3204   /* Verify ranges of individual characters.  We ought to
3205      see columns within the macro definition.  */
3206   for (int i = 0; i <= 10; i++)
3207     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3208                           i, 1, 20 + i, 20 + i);
3209
3210   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3211
3212   tok = test.get_token ();
3213   ASSERT_EQ (tok->type, CPP_PADDING);
3214 }
3215
3216 /* Test of stringification of a macro argument.  */
3217
3218 static void
3219 test_lexer_string_locations_stringified_macro_argument
3220   (const line_table_case &case_)
3221 {
3222   /* .....................000000000111111111122222222223.
3223      .....................123456789012345678901234567890.  */
3224   const char *content = ("#define MACRO(X) #X /* non-str */\n"
3225                          "MACRO(foo)\n");
3226   lexer_test test (case_, content, NULL);
3227
3228   /* Verify that we get the expected token back.  */
3229   const cpp_token *tok = test.get_token ();
3230   ASSERT_EQ (tok->type, CPP_PADDING);
3231
3232   tok = test.get_token ();
3233   ASSERT_EQ (tok->type, CPP_STRING);
3234   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3235
3236   /* We don't support getting the location of a stringified macro
3237      argument.  Verify that it fails gracefully.  */
3238   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3239                                   "cpp_interpret_string_1 failed");
3240
3241   tok = test.get_token ();
3242   ASSERT_EQ (tok->type, CPP_PADDING);
3243
3244   tok = test.get_token ();
3245   ASSERT_EQ (tok->type, CPP_PADDING);
3246 }
3247
3248 /* Ensure that we are fail gracefully if something attempts to pass
3249    in a location that isn't a string literal token.  Seen on this code:
3250
3251      const char a[] = " %d ";
3252      __builtin_printf (a, 0.5);
3253                        ^
3254
3255    when c-format.c erroneously used the indicated one-character
3256    location as the format string location, leading to a read past the
3257    end of a string buffer in cpp_interpret_string_1.  */
3258
3259 static void
3260 test_lexer_string_locations_non_string (const line_table_case &case_)
3261 {
3262   /* .....................000000000111111111122222222223.
3263      .....................123456789012345678901234567890.  */
3264   const char *content = ("         a\n");
3265   lexer_test test (case_, content, NULL);
3266
3267   /* Verify that we get the expected token back.  */
3268   const cpp_token *tok = test.get_token ();
3269   ASSERT_EQ (tok->type, CPP_NAME);
3270   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3271
3272   /* At this point, libcpp is attempting to interpret the name as a
3273      string literal, despite it not starting with a quote.  We don't detect
3274      that, but we should at least fail gracefully.  */
3275   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3276                                   "cpp_interpret_string_1 failed");
3277 }
3278
3279 /* Ensure that we can read substring information for a token which
3280    starts in one linemap and ends in another .  Adapted from
3281    gcc.dg/cpp/pr69985.c.  */
3282
3283 static void
3284 test_lexer_string_locations_long_line (const line_table_case &case_)
3285 {
3286   /* .....................000000.000111111111
3287      .....................123456.789012346789.  */
3288   const char *content = ("/* A very long line, so that we start a new line map.  */\n"
3289                          "     \"0123456789012345678901234567890123456789"
3290                          "0123456789012345678901234567890123456789"
3291                          "0123456789012345678901234567890123456789"
3292                          "0123456789\"\n");
3293
3294   lexer_test test (case_, content, NULL);
3295
3296   /* Verify that we get the expected token back.  */
3297   const cpp_token *tok = test.get_token ();
3298   ASSERT_EQ (tok->type, CPP_STRING);
3299
3300   if (!should_have_column_data_p (line_table->highest_location))
3301     return;
3302
3303   /* Verify ranges of individual characters.  */
3304   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131);
3305   for (int i = 0; i < 131; i++)
3306     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3307                           i, 2, 7 + i, 7 + i);
3308 }
3309
3310 /* Test of locations within a raw string that doesn't contain a newline.  */
3311
3312 static void
3313 test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3314 {
3315   /* .....................00.0000000111111111122.
3316      .....................12.3456789012345678901.  */
3317   const char *content = ("R\"foo(0123456789)foo\"\n");
3318   lexer_test test (case_, content, NULL);
3319
3320   /* Verify that we get the expected token back.  */
3321   const cpp_token *tok = test.get_token ();
3322   ASSERT_EQ (tok->type, CPP_STRING);
3323
3324   /* Verify that cpp_interpret_string works.  */
3325   cpp_string dst_string;
3326   const enum cpp_ttype type = CPP_STRING;
3327   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3328                                       &dst_string, type);
3329   ASSERT_TRUE (result);
3330   ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3331   free (const_cast <unsigned char *> (dst_string.text));
3332
3333   if (!should_have_column_data_p (line_table->highest_location))
3334     return;
3335
3336   /* 0-9, plus the nil terminator.  */
3337   ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11);
3338   for (int i = 0; i < 11; i++)
3339     ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3340                           i, 1, 7 + i, 7 + i);
3341 }
3342
3343 /* Test of locations within a raw string that contains a newline.  */
3344
3345 static void
3346 test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3347 {
3348   /* .....................00.0000.
3349      .....................12.3456.  */
3350   const char *content = ("R\"foo(\n"
3351   /* .....................00000.
3352      .....................12345.  */
3353                          "hello\n"
3354                          "world\n"
3355   /* .....................00000.
3356      .....................12345.  */
3357                          ")foo\"\n");
3358   lexer_test test (case_, content, NULL);
3359
3360   /* Verify that we get the expected token back.  */
3361   const cpp_token *tok = test.get_token ();
3362   ASSERT_EQ (tok->type, CPP_STRING);
3363
3364   /* Verify that cpp_interpret_string works.  */
3365   cpp_string dst_string;
3366   const enum cpp_ttype type = CPP_STRING;
3367   bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1,
3368                                       &dst_string, type);
3369   ASSERT_TRUE (result);
3370   ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3371   free (const_cast <unsigned char *> (dst_string.text));
3372
3373   if (!should_have_column_data_p (line_table->highest_location))
3374     return;
3375
3376   /* Currently we don't support locations within raw strings that
3377      contain newlines.  */
3378   ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3379                                   "range endpoints are on different lines");
3380 }
3381
3382 /* Test of parsing an unterminated raw string.  */
3383
3384 static void
3385 test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3386 {
3387   const char *content = "R\"ouch()ouCh\" /* etc */";
3388
3389   lexer_diagnostic_sink diagnostics;
3390   lexer_test test (case_, content, &diagnostics);
3391   test.m_implicitly_expect_EOF = false;
3392
3393   /* Attempt to parse the raw string.  */
3394   const cpp_token *tok = test.get_token ();
3395   ASSERT_EQ (tok->type, CPP_EOF);
3396
3397   ASSERT_EQ (1, diagnostics.m_diagnostics.length ());
3398   /* We expect the message "unterminated raw string"
3399      in the "cpplib" translation domain.
3400      It's not clear that dgettext is available on all supported hosts,
3401      so this assertion is commented-out for now.
3402        ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3403                      diagnostics.m_diagnostics[0]);
3404   */
3405 }
3406
3407 /* Test of lexing char constants.  */
3408
3409 static void
3410 test_lexer_char_constants (const line_table_case &case_)
3411 {
3412   /* Various char constants.
3413      .....................0000000001111111111.22222222223.
3414      .....................1234567890123456789.01234567890.  */
3415   const char *content = ("         'a'\n"
3416                          "        u'a'\n"
3417                          "        U'a'\n"
3418                          "        L'a'\n"
3419                          "         'abc'\n");
3420   lexer_test test (case_, content, NULL);
3421
3422   /* Verify that we get the expected tokens back.  */
3423   /* 'a'.  */
3424   const cpp_token *tok = test.get_token ();
3425   ASSERT_EQ (tok->type, CPP_CHAR);
3426   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3427
3428   unsigned int chars_seen;
3429   int unsignedp;
3430   cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3431                                           &chars_seen, &unsignedp);
3432   ASSERT_EQ (cc, 'a');
3433   ASSERT_EQ (chars_seen, 1);
3434
3435   /* u'a'.  */
3436   tok = test.get_token ();
3437   ASSERT_EQ (tok->type, CPP_CHAR16);
3438   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3439
3440   /* U'a'.  */
3441   tok = test.get_token ();
3442   ASSERT_EQ (tok->type, CPP_CHAR32);
3443   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3444
3445   /* L'a'.  */
3446   tok = test.get_token ();
3447   ASSERT_EQ (tok->type, CPP_WCHAR);
3448   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3449
3450   /* 'abc' (c-char-sequence).  */
3451   tok = test.get_token ();
3452   ASSERT_EQ (tok->type, CPP_CHAR);
3453   ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3454 }
3455 /* A table of interesting location_t values, giving one axis of our test
3456    matrix.  */
3457
3458 static const location_t boundary_locations[] = {
3459   /* Zero means "don't override the default values for a new line_table".  */
3460   0,
3461
3462   /* An arbitrary non-zero value that isn't close to one of
3463      the boundary values below.  */
3464   0x10000,
3465
3466   /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES.  */
3467   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100,
3468   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1,
3469   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3470   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1,
3471   LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100,
3472
3473   /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS.  */
3474   LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100,
3475   LINE_MAP_MAX_LOCATION_WITH_COLS - 1,
3476   LINE_MAP_MAX_LOCATION_WITH_COLS,
3477   LINE_MAP_MAX_LOCATION_WITH_COLS + 1,
3478   LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100,
3479 };
3480
3481 /* Run TESTCASE multiple times, once for each case in our test matrix.  */
3482
3483 void
3484 for_each_line_table_case (void (*testcase) (const line_table_case &))
3485 {
3486   /* As noted above in the description of struct line_table_case,
3487      we want to explore a test matrix of interesting line_table
3488      situations, running various selftests for each case within the
3489      matrix.  */
3490
3491   /* Run all tests with:
3492      (a) line_table->default_range_bits == 0, and
3493      (b) line_table->default_range_bits == 5.  */
3494   int num_cases_tested = 0;
3495   for (int default_range_bits = 0; default_range_bits <= 5;
3496        default_range_bits += 5)
3497     {
3498       /* ...and use each of the "interesting" location values as
3499          the starting location within line_table.  */
3500       const int num_boundary_locations
3501         = sizeof (boundary_locations) / sizeof (boundary_locations[0]);
3502       for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++)
3503         {
3504           line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3505
3506           testcase (c);
3507
3508           num_cases_tested++;
3509         }
3510     }
3511
3512   /* Verify that we fully covered the test matrix.  */
3513   ASSERT_EQ (num_cases_tested, 2 * 12);
3514 }
3515
3516 /* Run all of the selftests within this file.  */
3517
3518 void
3519 input_c_tests ()
3520 {
3521   test_linenum_comparisons ();
3522   test_should_have_column_data_p ();
3523   test_unknown_location ();
3524   test_builtins ();
3525   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
3526
3527   for_each_line_table_case (test_accessing_ordinary_linemaps);
3528   for_each_line_table_case (test_lexer);
3529   for_each_line_table_case (test_lexer_string_locations_simple);
3530   for_each_line_table_case (test_lexer_string_locations_ebcdic);
3531   for_each_line_table_case (test_lexer_string_locations_hex);
3532   for_each_line_table_case (test_lexer_string_locations_oct);
3533   for_each_line_table_case (test_lexer_string_locations_letter_escape_1);
3534   for_each_line_table_case (test_lexer_string_locations_letter_escape_2);
3535   for_each_line_table_case (test_lexer_string_locations_ucn4);
3536   for_each_line_table_case (test_lexer_string_locations_ucn8);
3537   for_each_line_table_case (test_lexer_string_locations_wide_string);
3538   for_each_line_table_case (test_lexer_string_locations_string16);
3539   for_each_line_table_case (test_lexer_string_locations_string32);
3540   for_each_line_table_case (test_lexer_string_locations_u8);
3541   for_each_line_table_case (test_lexer_string_locations_utf8_source);
3542   for_each_line_table_case (test_lexer_string_locations_concatenation_1);
3543   for_each_line_table_case (test_lexer_string_locations_concatenation_2);
3544   for_each_line_table_case (test_lexer_string_locations_concatenation_3);
3545   for_each_line_table_case (test_lexer_string_locations_macro);
3546   for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument);
3547   for_each_line_table_case (test_lexer_string_locations_non_string);
3548   for_each_line_table_case (test_lexer_string_locations_long_line);
3549   for_each_line_table_case (test_lexer_string_locations_raw_string_one_line);
3550   for_each_line_table_case (test_lexer_string_locations_raw_string_multiline);
3551   for_each_line_table_case (test_lexer_string_locations_raw_string_unterminated);
3552   for_each_line_table_case (test_lexer_char_constants);
3553
3554   test_reading_source_line ();
3555 }
3556
3557 } // namespace selftest
3558
3559 #endif /* CHECKING_P */